vctrs/0000755000176200001440000000000015157552632011426 5ustar liggesusersvctrs/tests/0000755000176200001440000000000014276722575012576 5ustar liggesusersvctrs/tests/testthat/0000755000176200001440000000000015157552632014430 5ustar liggesusersvctrs/tests/testthat/helper-c.R0000644000176200001440000000007114712761114016241 0ustar liggesusersclass_type <- function(x) { .Call(ffi_class_type, x) } vctrs/tests/testthat/helper-conditions.R0000644000176200001440000000343615065005761020201 0ustar liggesuserswith_subscript_data <- function( expr, subscript_arg, subscript_elt = NULL, subscript_action = NULL ) { local_options(rlang_force_unhandled_error = TRUE) tryCatch( expr, vctrs_error_subscript = function(cnd) { cnd$subscript_arg <- subscript_arg cnd$subscript_elt <- subscript_elt cnd$subscript_action <- subscript_action cnd_signal(cnd) } ) } with_tibble_cols <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "column", subscript_action = "rename" ) } with_tibble_rows <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "row", subscript_action = "remove" ) } with_dm_tables <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "table", subscript_action = "extract" ) } with_tidyselect_select <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "column", subscript_action = "select" ) } with_tidyselect_relocate <- function(expr) { with_subscript_data( expr, subscript_arg = quote(foo(bar)), subscript_elt = "column", subscript_action = "relocate" ) } my_vec_rep <- function(my_x, my_times) { vec_rep( my_x, my_times, error_call = current_env(), x_arg = "my_x", times_arg = "my_times" ) } my_vec_rep_each <- function(my_x, my_times) { vec_rep_each( my_x, my_times, error_call = current_env(), x_arg = "my_x", times_arg = "my_times" ) } my_vec_as_names <- function( my_names, ..., my_repair = "minimal", my_quiet = FALSE ) { vec_as_names( my_names, repair = my_repair, repair_arg = "my_repair", quiet = my_quiet ) } vctrs/tests/testthat/helper-names.R0000644000176200001440000000041414315060310017110 0ustar liggesuserslocal_name_repair_quiet <- function(frame = caller_env()) { local_options(rlib_name_repair_verbosity = "quiet", .frame = frame) } local_name_repair_verbose <- function(frame = caller_env()) { local_options(rlib_name_repair_verbosity = "verbose", .frame = frame) } vctrs/tests/testthat/helper-s4.R0000644000176200001440000000323315113325071016342 0ustar liggesusers.rando <- setClass( "vctrs_rando", contains = "numeric", slots = list(.Data = "numeric") ) rando <- function(n = 0) { .rando(as.numeric(seq_len(n))) } as_rando <- function(x) { rando(length(x)) } setMethod("[", "vctrs_rando", function(x, i, j, ..., drop = TRUE) { new_n <- length(vec_as_location(i, length(x@.Data), names(x@.Data))) rando(new_n) }) .Counts <- methods::setClass( "vctrs_Counts", contains = "integer", slots = c(name = "character") ) local_c_counts <- function(frame = caller_env()) { c_counts <- function(x, ...) { xs <- list(x, ...) xs_data <- lapply(xs, function(x) x@.Data) new_data <- do.call(c, xs_data) .Counts(new_data, name = "Dispatched") } local_s4_method( frame = frame, "c", methods::signature(x = "vctrs_Counts"), c_counts ) } local_s4_method <- function(generic, signature, method, frame = caller_env()) { methods::setMethod(generic, signature, method) exit_expr <- call2( methods::removeMethod, generic, signature, where = topenv(frame) ) local_exit(exit_expr, frame = frame) } with_s4_method <- function(generic, signature, method, expr) { local_s4_method(generic, signature, method) expr } local_exit <- function(expr, frame = caller_env()) { # We are at top-level when only one frame refers to the global environment if (is_reference(frame, global_env())) { is_global_frame <- sys.parents() == 0 if (sum(is_global_frame) == 1) { abort("Can't add an exit event at top-level") } } # Inline everything so the call will succeed in any environment expr <- call2(on.exit, expr, add = TRUE) eval_bare(expr, frame) invisible(expr) } vctrs/tests/testthat/test-compare.R0000644000176200001440000002134115120515501017137 0ustar liggesuserstest_that("inputs must be vectors", { expect_error(vec_compare(NULL, 1), class = "vctrs_error_scalar_type") expect_error(vec_compare(1, NULL), class = "vctrs_error_scalar_type") }) test_that("matches R ordering", { expect_same <- function(x, y) { expect_equal(vec_compare(!!x, !!y), cmp(!!x, !!y)) } expect_same(c(NA, FALSE, TRUE), FALSE) expect_same(c(NA, -100L, 0L, 100L), 0L) expect_same(c(NA, -Inf, -100, 100, Inf), 0L) expect_same(c(NA, NaN, 0), NA) expect_same(c(NA, "a", "b", "c"), "b") expect_same(as.raw(2:5), as.raw(4)) }) test_that("NAs equal when requested", { expect_value <- function(x, y, val, .ptype = NULL) { expect_equal(vec_compare(!!x, !!y, .ptype = .ptype, na_equal = TRUE), !!val) } expect_value(NA, NA, 0L) expect_value(NA, FALSE, -1L) expect_value(FALSE, NA, 1L) expect_value(NA_integer_, NA_integer_, 0L) expect_value(NA_integer_, 0L, -1L) expect_value(0L, NA_integer_, 1L) expect_value(NA_character_, NA_character_, 0L) expect_value(NA_character_, "", -1L) expect_value("", NA_character_, 1L) expect_value(0, NA_real_, 1L) expect_value(0, NaN, 1L) expect_value(0, 0, 0L) expect_value(NA_real_, NA_real_, 0L) expect_value(NA_real_, NaN, 1L) expect_value(NA_real_, 0, -1L) expect_value(NaN, NA_real_, -1L) expect_value(NaN, NaN, 0L) expect_value(NaN, 0, -1L) }) test_that("data frames are compared column by column", { df1 <- data.frame(x = c(1, 1, 1), y = c(-1, 0, 1)) expect_equal(vec_compare(df1, df1[2, ]), c(-1, 0, 1)) expect_equal(vec_compare(df1[1], df1[2, 1, drop = FALSE]), c(0, 0, 0)) expect_equal(vec_compare(df1[2], df1[2, 2, drop = FALSE]), c(-1, 0, 1)) expect_equal(vec_compare(df1[2:1], df1[2, 2:1]), c(-1, 0, 1)) }) test_that("can compare data frames with various types of columns", { x1 <- data_frame(x = 1, y = 2) y1 <- data_frame(x = 2, y = 1) x2 <- data_frame(x = "a") y2 <- data_frame(x = "b") x3 <- data_frame(x = FALSE) y3 <- data_frame(x = TRUE) x4 <- data_frame(x = 1L) y4 <- data_frame(x = 2L) expect_equal(vec_compare(x1, y1), -1) expect_equal(vec_compare(x2, y2), -1) expect_equal(vec_compare(x3, y3), -1) expect_equal(vec_compare(x4, y4), -1) }) test_that("can compare data frames with data frame columns", { df1 <- data_frame(x = data_frame(a = 1)) df2 <- data_frame(x = data_frame(a = 2)) expect_equal(vec_compare(df1, df1), 0) expect_equal(vec_compare(df1, df2), -1) }) test_that("can compare data frames with 0 columns", { x <- new_data_frame(n = 2L) expect_identical(vec_compare(x, x), c(0L, 0L)) }) test_that("C code doesn't crash with bad inputs", { df <- data.frame(x = c(1, 1, 1), y = c(-1, 0, 1)) expect_error(.Call(ffi_vec_compare, df, df[1], TRUE), "not comparable") # Names are not checked, as `vec_cast_common()` should take care of the type. # So if `vec_cast_common()` is not called, or is improperly specified, then # this could result in false equality. expect_equal( .Call(ffi_vec_compare, df, setNames(df, c("x", "z")), TRUE), c(0, 0, 0) ) df1 <- new_data_frame(list(x = 1:3, y = c(1, 1, 1))) df2 <- new_data_frame(list(y = 1:2, x = 1:2)) expect_error( .Call(ffi_vec_compare, df1, df2, TRUE), "must have the same types and lengths" ) }) test_that("xtfrm.vctrs_vctr works for variety of base classes", { df <- data.frame(x = c(NA, 1, 1), y = c(1, 2, 1)) # Internally uses `vec_rank()`, which propagates rows if not "complete" expect_equal(xtfrm.vctrs_vctr(df), c(NA, 2, 1)) x <- c(2, 3, 1) expect_equal(xtfrm.vctrs_vctr(x), x) expect_equal(xtfrm.vctrs_vctr(letters[x]), x) }) test_that("vec_proxy_order() orders list using order of appearance", { x <- 1:2 y <- 2:4 z <- "a" lst <- list(x, y, x, y, z) expect_identical(vec_proxy_order(lst), c(1L, 2L, 1L, 2L, 5L)) }) test_that("vec_compare() calls vec_proxy_compare()", { local_methods( vec_proxy_compare.vctrs_foobar = function(x, ...) rev(x), vec_ptype2.integer.vctrs_foobar = function(...) foobar(int()), vec_ptype2.vctrs_foobar = function(...) foobar(int()), vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.integer = function(x, ...) x, ) expect_identical(vec_compare(1:3, 1:3), int(0, 0, 0)) expect_identical(vec_compare(1:3, foobar(1:3)), int(-1, 0, 1)) }) test_that("vec_proxy_compare() preserves data frames and vectors", { df <- data_frame(x = 1:2, y = c("a", "b")) expect_identical(vec_proxy_compare(df), df) x <- c(NA, "a", "b", "c") expect_identical(vec_proxy_compare(x), x) }) test_that("vec_proxy_compare() handles data frame with a POSIXlt column", { df <- data.frame(times = 1:5, x = 1:5) df$times <- as.POSIXlt(seq.Date( as.Date("2019-12-30"), as.Date("2020-01-03"), by = "day" )) df2 <- df df2$times <- vec_proxy_compare(df$times) expect_identical( vec_proxy_compare(df), vec_proxy_compare(df2) ) }) test_that("vec_proxy_compare.POSIXlt() correctly orders (#720)", { dates <- as.POSIXlt(seq.Date( as.Date("2019-12-30"), as.Date("2020-01-03"), by = "day" )) expect_equal(vec_order(dates), 1:5) }) test_that("vec_proxy_compare.POSIXlt() correctly orders around DST", { # 1am in EDT x <- as.POSIXlt("2020-11-01 01:00:00", tz = "America/New_York") # "falls back" to 1am again, but in EST y <- as.POSIXlt(x + 3600) expect_equal(vec_order(c(y, x)), c(2, 1)) }) test_that("vec_proxy_compare() flattens df-cols", { df_col <- data_frame(z = 3:4, w = 4:5) df <- data_frame(x = 1:2, y = df_col) expect <- data_frame(x = 1:2, z = 3:4, w = 4:5) expect_identical(vec_proxy_compare(df), expect) }) test_that("vec_proxy_compare() unwraps 1 col dfs", { df <- data_frame(x = 1:2) expect_identical(vec_proxy_compare(df), 1:2) df_col <- data_frame(y = 1:2) df <- data_frame(x = df_col) expect_identical(vec_proxy_compare(df), 1:2) }) test_that("vec_proxy_order() works on deeply nested lists", { df_col <- data_frame(z = list("b", "a", "b")) # Relaxed and unwrapped df1 <- data_frame(x = df_col) expect_identical(vec_proxy_order(df1), c(1L, 2L, 1L)) df2 <- data_frame(x = df_col, y = 1:3) expect_identical(vec_proxy_order(df2), data_frame(x = c(1L, 2L, 1L), y = 1:3)) }) test_that("error is thrown when comparing complexes (#1655)", { expect_snapshot({ (expect_error(vec_compare(complex(), complex()))) }) }) test_that("error is thrown when comparing lists", { expect_error(vec_compare(list(), list()), class = "vctrs_error_unsupported") expect_error( .Call(ffi_vec_compare, list(), list(), FALSE), "Can't compare lists" ) }) test_that("error is thrown when comparing data frames with list columns", { df <- data_frame(x = list()) expect_error(vec_compare(df, df), class = "vctrs_error_unsupported") expect_error(.Call(ffi_vec_compare, df, df, FALSE), "Can't compare lists") }) test_that("error is thrown when comparing scalars", { x <- new_sclr(x = 1) expect_error(vec_compare(x, x), class = "vctrs_error_scalar_type") expect_error( .Call(ffi_vec_compare, x, x, FALSE), class = "vctrs_error_scalar_type" ) }) test_that("`na_equal` is validated", { expect_snapshot({ (expect_error(vec_compare(1, 1, na_equal = 1))) (expect_error(vec_compare(1, 1, na_equal = c(TRUE, FALSE)))) }) }) test_that("can compare equal strings with different encodings", { for (x_encoding in encodings()) { for (y_encoding in encodings()) { expect_equal(vec_compare(x_encoding, y_encoding), 0L) } } }) test_that("can compare non-equal strings with different encodings", { x <- "x" y <- encodings()$latin1 expect_equal(vec_compare(x, y), -1L) }) test_that("comparison can be determined when strings have identical encodings", { encs <- encodings() for (enc in encs) { expect_equal(vec_compare(enc, enc), 0L) } }) test_that("comparison is known to always fail with bytes", { enc <- encoding_bytes() error <- "translating strings with \"bytes\" encoding" expect_error(vec_compare(enc, enc), error) }) test_that("comparison is known to fail when comparing bytes to other encodings", { error <- "translating strings with \"bytes\" encoding" for (enc in encodings()) { expect_error(vec_compare(encoding_bytes(), enc), error) expect_error(vec_compare(enc, encoding_bytes()), error) } }) test_that("can compare unspecified", { expect_equal(vec_compare(NA, NA), NA_integer_) expect_equal(vec_compare(NA, NA, na_equal = TRUE), 0) expect_equal( vec_compare(c(NA, NA), unspecified(2)), c(NA_integer_, NA_integer_) ) }) test_that("can't supply NA as `na_equal`", { expect_snapshot(error = TRUE, { vec_compare(NA, NA, na_equal = NA) }) }) test_that("vec_compare() silently falls back to base data frame", { expect_silent(expect_identical( vec_compare(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), rep(0L, 32) )) }) vctrs/tests/testthat/helper-s3.R0000644000176200001440000001315015065005761016347 0ustar liggesusersnew_ctor <- function(base_class) { function(x = list(), ..., class = NULL) { if (inherits(x, "tbl_df")) { tibble::new_tibble(x, class = c(class, base_class), nrow = nrow(x)) } else if (is.data.frame(x)) { structure(x, class = c(class, base_class, "data.frame"), ...) } else { structure(x, class = c(class, base_class), ...) } } } foobar <- new_ctor("vctrs_foobar") foobar_c <- new_ctor("vctrs_foobar_c") foobaz <- new_ctor("vctrs_foobaz") quux <- new_ctor("vctrs_quux") expect_foobar <- function(x) expect_s3_class({{ x }}, "vctrs_foobar") expect_foobar_c <- function(x) expect_s3_class({{ x }}, "vctrs_foobar_c") expect_foobaz <- function(x) expect_s3_class({{ x }}, "vctrs_foobaz") expect_quux <- function(x) expect_s3_class({{ x }}, "vctrs_quux") local_c_foobar <- function(frame = caller_env()) { local_methods( c.vctrs_foobar = function(...) foobar_c(NextMethod()), .frame = frame ) } with_c_foobar <- function(expr) { local_c_foobar() expr } unrownames <- function(x) { row.names(x) <- NULL x } local_methods <- function(..., .frame = caller_env()) { local_bindings(..., .env = global_env(), .frame = .frame) } with_methods <- function(.expr, ...) { local_methods(...) .expr } local_proxy <- function(frame = caller_env()) { local_methods( .frame = frame, vec_proxy.vctrs_proxy = function(x, ...) proxy_deref(x), vec_restore.vctrs_proxy = function(x, to, ...) new_proxy(x), vec_ptype2.vctrs_proxy = function(x, y, ...) { UseMethod("vec_ptype2.vctrs_proxy") }, vec_ptype2.vctrs_proxy.vctrs_proxy = function(x, y, ...) { new_proxy(vec_ptype(proxy_deref(x))) }, vec_cast.vctrs_proxy = function(x, to, ...) { UseMethod("vec_cast.vctrs_proxy") }, vec_cast.vctrs_proxy.vctrs_proxy = function(x, to, ...) x ) } new_proxy <- function(x) { structure(list(env(x = x)), class = "vctrs_proxy") } proxy_deref <- function(x, ...) { x[[1]]$x } local_env_proxy <- function(frame = caller_env()) { local_methods( .frame = frame, vec_proxy.vctrs_proxy = proxy_deref, vec_restore.vctrs_proxy = function(x, ...) new_proxy(x), vec_cast.vctrs_proxy = function(x, to, ...) { UseMethod("vec_cast.vctrs_proxy") }, vec_cast.vctrs_proxy.vctrs_proxy = function(x, to, ...) x, vec_ptype2.vctrs_proxy = function(x, y, ...) { UseMethod("vec_ptype2.vctrs_proxy") }, vec_ptype2.vctrs_proxy.vctrs_proxy = function(x, y, ...) { new_proxy(proxy_deref(x)[0]) } ) } local_no_stringsAsFactors <- function(frame = caller_env()) { local_options(.frame = frame, stringsAsFactors = FALSE) } tibble <- function(...) { tibble::tibble(...) } local_foobar_proxy <- function(frame = caller_env()) { local_methods(.frame = frame, vec_proxy.vctrs_foobar = function(x, ...) x) } subclass <- function(x) { class(x) <- c("vctrs_foo", "vctrs_foobar", class(x)) x } # Subclass promoted to logical new_lgl_subtype <- function(x) { stopifnot(is_logical(x)) structure(x, class = "vctrs_lgl_subtype") } local_lgl_subtype <- function(frame = caller_env()) { local_methods( .frame = frame, vec_ptype2.vctrs_lgl_subtype = function(x, y, ...) { UseMethod("vec_ptype2.vctrs_lgl_subtype") }, vec_ptype2.vctrs_lgl_subtype.vctrs_lgl_subtype = function(x, y, ...) x, vec_ptype2.vctrs_lgl_subtype.logical = function(x, y, ...) y, vec_ptype2.logical.vctrs_lgl_subtype = function(x, y, ...) x, vec_cast.vctrs_lgl_subtype = function(x, to, ...) { UseMethod("vec_cast.vctrs_lgl_subtype") }, vec_cast.vctrs_lgl_subtype.vctrs_lgl_subtype = function(x, to, ...) x, vec_cast.vctrs_lgl_subtype.logical = function(x, to, ...) { new_lgl_subtype(x) }, vec_cast.logical.vctrs_lgl_subtype = function(x, to, ...) unstructure(x) ) } with_lgl_subtype <- function(expr) { local_lgl_subtype() expr } # Logical promoted to subclass new_lgl_supertype <- function(x) { stopifnot(is_logical(x)) structure(x, class = "vctrs_lgl_supertype") } local_lgl_supertype <- function(frame = caller_env()) { local_methods( .frame = frame, vec_ptype2.vctrs_lgl_supertype = function(x, y, ...) { UseMethod("vec_ptype2.vctrs_lgl_supertype") }, vec_ptype2.vctrs_lgl_supertype.vctrs_lgl_supertype = function(x, y, ...) x, vec_ptype2.vctrs_lgl_supertype.logical = function(x, y, ...) x, vec_ptype2.logical.vctrs_lgl_supertype = function(x, y, ...) y, vec_cast.vctrs_lgl_supertype = function(x, to, ...) { UseMethod("vec_cast.vctrs_lgl_supertype") }, vec_cast.vctrs_lgl_supertype.vctrs_lgl_supertype = function(x, to, ...) x, vec_cast.vctrs_lgl_supertype.logical = function(x, to, ...) { new_lgl_subtype(x) }, vec_cast.logical.vctrs_lgl_supertype = function(x, to, ...) unstructure(x) ) } with_lgl_supertype <- function(expr) { local_lgl_supertype() expr } foobar_df_ptype2 <- function(x, y, ...) { foobar(df_ptype2(x, y, ...)) } foobar_df_cast <- function(x, y, ...) { foobar(df_cast(x, y, ...)) } local_foobar_df_methods <- function(expr, frame = caller_env()) { local_methods( .frame = frame, vec_ptype2.vctrs_foobar.vctrs_foobar = foobar_df_ptype2, vec_ptype2.data.frame.vctrs_foobar = foobar_df_ptype2, vec_ptype2.vctrs_foobar.data.frame = foobar_df_ptype2, vec_cast.vctrs_foobar.vctrs_foobar = foobar_df_cast, vec_cast.data.frame.vctrs_foobar = foobar_df_cast, vec_cast.vctrs_foobar.data.frame = foobar_df_cast ) } with_foobar_df_methods <- function(expr) { local_foobar_df_methods() expr } set_tibble <- function(x) { base <- class(x)[-length(class(x))] class(x) <- c(base, "tbl_df", "tbl", "data.frame") x } vctrs/tests/testthat/helper-size.R0000644000176200001440000000014015065005761016767 0ustar liggesusersexpect_size <- function(object, n) { expect_identical(vec_size(object), vec_cast(n, int())) } vctrs/tests/testthat/test-utils.R0000644000176200001440000000433415065005761016666 0ustar liggesuserstest_that("names preserved if outer name is missing", { x <- c("a", "z", "") expect_equal(outer_names(x, NULL, 3), x) expect_equal(outer_names(x, "", 3), x) expect_equal(outer_names(x, na_chr, 3), x) }) test_that("outer name vectorised if needed", { expect_equal(outer_names(NULL, "x", 1L), c("x")) expect_equal(outer_names(NULL, "x", 2L), c("x1", "x2")) }) test_that("outer and inner names are combined", { expect_equal(outer_names("x", "y", 1), c("y..x")) }) test_that("options are created", { expect_identical( unclass(new_opts(c("a", "c"), letters[1:4])), c(a = TRUE, b = FALSE, c = TRUE, d = FALSE) ) }) test_that("can't supply unknown option", { expect_error( new_opts(c("a", "foo"), letters[1:4]), "Argument must be one of \"a\", \"b\", \"c\" or \"d\"" ) expect_error( new_opts(c("a", "foo"), letters[1:4], arg = "foo"), "`foo` must be one of \"a\", \"b\", \"c\" or \"d\"" ) }) test_that("`has_dim()` doesn't partial match on the `dim` attribute (#948)", { x <- structure(1, dimB = 1) expect_false(has_dim(x)) }) test_that("df_has_base_subset() detects `[` methods", { expect_true(df_has_base_subset(foobar(mtcars))) out <- with_methods( `[.vctrs_foobar` = function(x, i, ...) { structure(NextMethod(), dispatched = TRUE) }, df_has_base_subset(foobar(mtcars)) ) expect_false(out) }) test_that("vec_common_suffix() finds common suffix", { x <- c("foo", "bar", "baz") y <- c("quux", "foo", "hop", "baz") expect_identical(vec_common_suffix(x, y), "baz") x <- c("foo", "bar", "baz") y <- c("quux", "foo", "bar", "baz") expect_identical(vec_common_suffix(x, y), x) x <- letters y <- chr() expect_identical(vec_common_suffix(x, y), chr()) x <- data.frame(x = 1:3, y = c("foo", "bar", "baz")) y <- data.frame(x = 0:3, y = c("foo", "hop", "bar", "baz")) exp <- data.frame(x = 2:3, y = c("bar", "baz")) expect_identical(vec_common_suffix(x, y), exp) }) test_that("fast_c() concatenates", { expect_identical(fast_c(character(), "foo"), "foo") expect_identical(fast_c("foo", character()), "foo") expect_identical(fast_c("foo", c("bar", "baz")), c("foo", "bar", "baz")) expect_identical(fast_c(c("bar", "baz"), "foo"), c("bar", "baz", "foo")) }) vctrs/tests/testthat/test-type-integer64.R0000644000176200001440000001703115065005761020312 0ustar liggesuserstest_that("casting of integer64 works", { x <- bit64::as.integer64(1:10) expect_equal(vec_cast(x, bit64::integer64()), x) expect_equal(vec_cast(x, integer()), 1:10) expect_equal(vec_cast(1:10, bit64::integer64()), x) expect_equal(vec_cast(x, double()), as.double(x)) expect_equal(vec_cast(as.numeric(1:10), bit64::integer64()), x) expect_equal(vec_cast(x, logical()), rep(TRUE, 10L)) expect_equal( vec_cast(c(TRUE, FALSE), bit64::integer64()), bit64::as.integer64(c(1, 0)) ) expect_equal(vec_cast(NA, bit64::integer64()), bit64::as.integer64(NA)) expect_equal( vec_cast(unspecified(2), bit64::integer64()), bit64::as.integer64(c(NA, NA)) ) expect_error(vec_cast(x, factor()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(factor(), x), class = "vctrs_error_incompatible_type") # These used to be allowed expect_error( vec_cast(x, character()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(as.character(1:10), bit64::integer64()), class = "vctrs_error_incompatible_type" ) }) test_that("vec_ptype2 for integer64 works", { x <- bit64::as.integer64(1:10) expect_equal(vec_ptype2(x, x), bit64::integer64()) expect_equal(vec_ptype2(x, 1L), bit64::integer64()) expect_equal(vec_ptype2(1L, x), bit64::integer64()) expect_equal(vec_ptype2(x, TRUE), bit64::integer64()) expect_equal(vec_ptype2(TRUE, x), bit64::integer64()) expect_equal(vec_ptype2(x, NA), bit64::integer64()) expect_equal(vec_ptype2(NA, x), bit64::integer64()) expect_equal(vec_ptype2(unspecified(), x), bit64::integer64()) expect_equal(vec_ptype2(x, unspecified()), bit64::integer64()) expect_error(vec_ptype2(x, 1), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(x, ""), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2("", x), class = "vctrs_error_incompatible_type") expect_error( vec_ptype2(data.frame(), x), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(x, data.frame()), class = "vctrs_error_incompatible_type" ) }) test_that("vec_ptype_abbr.integer64", { expect_equal(vec_ptype_abbr(bit64::as.integer64(1:10)), "int64") expect_equal(vec_ptype_full(bit64::as.integer64(1:10)), "integer64") }) test_that("can sort integer64", { x <- bit64::as.integer64(c(-1, -3, -2, 1)) expect_identical(vec_order(x), int(2, 3, 1, 4)) expect_identical(x[vec_order(x)], bit64::as.integer64(c(-3, -2, -1, 1))) }) test_that("can slice integer64 objects of all dimensions", { x <- bit64::as.integer64(1:8) expect <- bit64::as.integer64(c(1, 3)) expect_identical(vec_slice(x, c(1, 3)), expect) dim(x) <- c(4, 2) expect <- bit64::as.integer64(c(1, 3, 5, 7)) dim(expect) <- c(2, 2) expect_identical(vec_slice(x, c(1, 3)), expect) dim(x) <- c(2, 2, 2) expect <- bit64::as.integer64(c(2, 4, 6, 8)) dim(expect) <- c(1, 2, 2) expect_identical(vec_slice(x, 2), expect) }) test_that("can slice integer64 objects with `NA_integer_`", { idx <- c(NA_integer_, 1) x <- bit64::as.integer64(1:8) expect <- bit64::as.integer64(c(NA, 1)) expect_identical(vec_slice(x, idx), expect) dim(x) <- c(4, 2) expect <- bit64::as.integer64(c(NA, 1, NA, 5)) dim(expect) <- c(2, 2) expect_identical(vec_slice(x, idx), expect) dim(x) <- c(2, 2, 2) expect <- bit64::as.integer64(c(NA, 1, NA, 3, NA, 5, NA, 7)) dim(expect) <- c(2, 2, 2) expect_identical(vec_slice(x, idx), expect) }) test_that("can init integer64 objects", { idx <- c(NA_integer_, NA_integer_) x <- bit64::as.integer64(1:8) expect_identical(vec_init(x, 2), vec_slice(x, idx)) dim(x) <- c(4, 2) expect_identical(vec_init(x, 2), vec_slice(x, idx)) dim(x) <- c(2, 2, 2) expect_identical(vec_init(x, 2), vec_slice(x, idx)) }) test_that("can chop integer64 objects with `NA_integer_` indices", { idx <- list(NA_integer_, 1) x <- bit64::as.integer64(1:8) expect <- list( bit64::as.integer64(NA), bit64::as.integer64(1) ) expect_identical(vec_chop(x, indices = idx), expect) dim(x) <- c(4, 2) expect <- list( bit64::as.integer64(c(NA, NA)), bit64::as.integer64(c(1, 5)) ) dim(expect[[1]]) <- c(1, 2) dim(expect[[2]]) <- c(1, 2) expect_identical(vec_chop(x, indices = idx), expect) dim(x) <- c(2, 2, 2) expect <- list( bit64::as.integer64(c(NA, NA, NA, NA)), bit64::as.integer64(c(1, 3, 5, 7)) ) dim(expect[[1]]) <- c(1, 2, 2) dim(expect[[2]]) <- c(1, 2, 2) expect_identical(vec_chop(x, indices = idx), expect) }) test_that("equality proxy converts atomic input to data frames of doubles", { x <- bit64::as.integer64(1) expect_identical( vec_proxy_equal(x), data_frame(left = 2147483648, right = 1) ) }) test_that("equality proxy works with 1-D arrays", { x <- bit64::as.integer64(1:6) y <- x dim(y) <- 6 expect_identical( vec_proxy_equal(x), vec_proxy_equal(y) ) }) test_that("equality proxy on >=2-D input converts to data frame and proxies each column", { x <- bit64::as.integer64(1:8) dim(x) <- c(2, 2, 2) proxy1 <- integer64_proxy(x[1:2, 1, 1]) proxy2 <- integer64_proxy(x[1:2, 2, 1]) proxy3 <- integer64_proxy(x[1:2, 1, 2]) proxy4 <- integer64_proxy(x[1:2, 2, 2]) expect_identical( vec_proxy_equal(x), vec_cbind(proxy1, proxy2, proxy3, proxy4, .name_repair = "minimal") ) }) test_that("can detect missing values with integer64 (#1304)", { x <- bit64::as.integer64(c(NA, NA, 2, NA, 2, 2)) expect_identical( vec_detect_missing(x), c(TRUE, TRUE, FALSE, TRUE, FALSE, FALSE) ) dim(x) <- c(3, 2) expect_identical(vec_detect_missing(x), c(TRUE, FALSE, FALSE)) }) test_that("can fill missing values with integer64", { x <- bit64::as.integer64(c(NA, NA, 2, NA, 2, 2)) expect <- bit64::as.integer64(c(NA, NA, 2, 2, 2, 2)) expect_identical(vec_fill_missing(x, "down"), expect) dim(x) <- c(3, 2) expect <- bit64::as.integer64(c(NA, NA, 2, 2, 2, 2)) dim(expect) <- c(3, 2) expect_identical(vec_fill_missing(x, "up"), expect) }) test_that("can compare values with integer64", { x <- bit64::as.integer64(c(1, NA, 2)) y <- bit64::as.integer64(c(0, 2, 3)) expect_identical(vec_compare(x, y), c(1L, NA, -1L)) x <- bit64::as.integer64(1:8) y <- bit64::as.integer64(c(1, 2, 1, 5, 1, 5, 1, 5)) dim(x) <- c(2, 2, 2) dim(y) <- c(2, 2, 2) expect_identical(vec_compare(x, y), c(1L, -1L)) }) test_that("integer64 <-> data frame works as expected", { x <- bit64::as.integer64(c(-2, -1, 0, 1)) proxy <- integer64_proxy(x) expect_identical( proxy$left, c(2147483647, 2147483647, 2147483648, 2147483648) ) expect_identical(proxy$right, c(4294967294, 4294967295, 0, 1)) expect_identical(integer64_restore(proxy), x) x <- bit64::as.integer64("9223372036854775807") + -1:0 proxy <- integer64_proxy(x) expect_identical(proxy$left, c(4294967295, 4294967295)) expect_identical(proxy$right, c(4294967294, 4294967295)) expect_identical(integer64_restore(proxy), x) x <- bit64::as.integer64("-9223372036854775807") + 0:1 proxy <- integer64_proxy(x) expect_identical(proxy$left, c(0, 0)) expect_identical(proxy$right, c(1, 2)) expect_identical(integer64_restore(proxy), x) x <- bit64::as.integer64(NA) proxy <- integer64_proxy(x) expect_identical(proxy$left, NA_real_) expect_identical(proxy$right, NA_real_) expect_identical(integer64_restore(proxy), x) }) test_that("`integer64_proxy()` doesn't allow arrays", { x <- bit64::as.integer64(1:6) dim(x) <- c(3, 2) expect_error(integer64_proxy(x), "should not have a `dim` attribute") }) vctrs/tests/testthat/test-vctrs.R0000644000176200001440000000155415065005761016670 0ustar liggesuserstest_that("generics are extensible", { expect_error(vec_cast(NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_restore(NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy_equal(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_proxy_compare(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype2(NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype_abbr(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype_full(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_arith(NA, NA, NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_ptype_finalise(NA, NA), class = "rlib_error_dots_nonempty") expect_error(vec_assign(NA, NA, NA, NA), class = "rlib_error_dots_nonempty") }) vctrs/tests/testthat/test-list-combine.R0000644000176200001440000020423715132161317020112 0ustar liggesuserstest_that("basic `list_combine()` works", { values <- list(1:2, 3:4) indices <- list(c(4, 1), c(3, 2)) size <- 4 expect_identical_list_combine( x = values, indices = indices, size = size, expect = c(2L, 4L, 3L, 1L) ) }) test_that("`list_combine()` works with homogeneous fallback in `default`", { # This errors expect_snapshot(error = TRUE, { list_combine( list(foobar(1), 1), indices = list(1, 2), size = 2 ) }) # So this should too expect_snapshot(error = TRUE, { list_combine( list(foobar(1)), indices = list(1), size = 2, default = 1 ) }) }) test_that("`list_combine()` returns `unspecified` to retain `size` invariant", { expect_identical( list_combine(list(), indices = list(), size = 0), unspecified() ) expect_identical( list_combine(list(), indices = list(), size = 1), unspecified(1) ) expect_identical( list_combine(list(), indices = list(), size = 2), unspecified(2) ) # With only `NULL` elements, still no known `ptype` expect_identical( list_combine(list(NULL), indices = list(integer()), size = 0), unspecified(0) ) expect_identical( list_combine(list(NULL), indices = list(integer()), size = 2), unspecified(2) ) expect_identical( list_combine(list(NULL), indices = list(1:2), size = 2), unspecified(2) ) # Now ptype is known expect_identical( list_combine( list(NULL), indices = list(integer()), ptype = numeric(), size = 1 ), vec_init(numeric(), n = 1) ) }) test_that("`NULL`s are removed after computing `default`'s index", { expect_identical_list_combine( x = list(1:2, NULL, 7:8), indices = list(1:2, 5:6, 7:8), size = 8, default = 0L, expect = int(1, 2, 0, 0, NA, NA, 7, 8) ) }) test_that("can combine factors", { fctr1 <- factor("z") fctr2 <- factor(c("x", "y")) x <- list(fctr1, fctr2) indices <- list(2, c(3, 1)) # levels are in the order they are seen! expect <- factor(c("y", "z", "x"), levels = c("z", "x", "y")) expect_identical(list_combine(x, indices = indices, size = 3), expect) # With `default`, the `default` is added in as the last item default <- factor("w") expect <- factor(c("y", "z", "x"), levels = c("z", "x", "y", "w")) expect_identical( list_combine(x, indices = indices, size = 3, default = default), expect ) }) test_that("preserves names when inputs are cast to a common type (#1689)", { expect_named( list_combine( list(c(a = 1)), indices = list(1), ptype = integer(), size = 1 ), "a" ) expect_named( list_combine( list(c(a = 1)), indices = list(TRUE), ptype = integer(), size = 1 ), "a" ) # With `default` expect_named( list_combine( list(c(a = 1)), indices = list(1), default = c(b = 0), ptype = integer(), size = 2 ), c("a", "b") ) expect_named( list_combine( list(c(a = 1)), indices = list(c(TRUE, FALSE)), default = c(b = 0), ptype = integer(), size = 2 ), c("a", "b") ) # With name spec name_spec <- "{outer}_{inner}" expect_named( list_combine( list(foo = c(a = 1)), indices = list(1), size = 1, ptype = integer(), name_spec = name_spec ), "foo_a" ) expect_named( list_combine( list(foo = c(a = 1)), indices = list(1), default = c(b = 0), ptype = integer(), name_spec = name_spec, size = 2 ), c("foo_a", "b") ) expect_named( list_combine( list(foo = c(a = 1)), indices = list(c(TRUE, FALSE)), default = c(b = 0), ptype = integer(), name_spec = name_spec, size = 2 ), c("foo_a", "b") ) # When `x` elements are recycled, names are also recycled x <- list(c(a = 1), c(b = 2)) expect_named( list_combine( x, indices = list(1:2, 3:4), size = 4, ptype = integer() ), c("a", "a", "b", "b") ) expect_named( list_combine( x, indices = list( c(TRUE, TRUE, FALSE, FALSE), c(FALSE, FALSE, TRUE, TRUE) ), size = 4, ptype = integer() ), c("a", "a", "b", "b") ) # When `default` elements are recycled, names are also recycled expect_named( list_combine( list(c(a = 1), c(b = 2)), indices = list(1, 3), default = c(c = 0), ptype = integer(), size = 4 ), c("a", "c", "b", "c") ) expect_named( list_combine( list(c(a = 1), c(b = 2)), indices = list( c(TRUE, FALSE, FALSE, FALSE), c(FALSE, FALSE, TRUE, FALSE) ), default = c(c = 0), ptype = integer(), size = 4 ), c("a", "c", "b", "c") ) }) test_that("not all inputs have to be named", { x <- list(c(a = 1), 2, c(c = 3)) indices <- list(2, 1, 3) expect_identical_list_combine( x = x, indices = indices, size = 3, expect = set_names(c(2, 1, 3), c("", "a", "c")) ) expect_identical_list_combine( x = x, indices = indices, size = 4, default = 0, expect = set_names(c(2, 1, 3, 0), c("", "a", "c", "")) ) }) test_that("list_combine() keeps data frame row names", { df1 <- data.frame(x = 1:2, row.names = c("r1", "r2")) df2 <- data.frame(x = 3:4, row.names = c("r3", "r4")) x <- list(df1, df2) indices <- list(c(3, 1), c(2, 4)) result <- list_combine(x, indices = indices, size = sum(lengths(indices))) expect <- c("r2", "r3", "r1", "r4") expect_identical(vec_names(result), expect) default <- data.frame(x = 0L, row.names = "d") result <- list_combine(x, indices = indices, size = 5, default = default) expect <- c("r2", "r3", "r1", "r4", "d") expect_identical(vec_names(result), expect) # With casting ptype <- data.frame(x = double()) default <- data.frame(x = 0L, row.names = "d") result <- list_combine( x = x, indices = indices, size = 5, default = default, ptype = ptype ) expect <- c("r2", "r3", "r1", "r4", "d") expect_identical(vec_names(result), expect) expect_type(result$x, "double") }) test_that("df-col row names are repaired silently", { df1 <- data_frame(x = new_data_frame(list(a = 1), row.names = "inner")) df2 <- data_frame(x = new_data_frame(list(a = 2), row.names = "inner")) x <- list(df1, df2) indices <- list(1, 2) expect_silent({ result <- list_combine(x, indices = indices, size = 2) }) expect_identical(vec_names(result$x), c("inner...1", "inner...2")) default <- data_frame(x = new_data_frame(list(a = 0), row.names = "inner")) expect_silent({ result <- list_combine(x, indices = indices, size = 3, default = default) }) expect_identical( vec_names(result$x), c("inner...1", "inner...2", "inner...3") ) }) test_that("name repair is respected and happens after ordering according to `indices`", { local_name_repair_quiet() x <- list(c(a = 1), c(a = 2)) indices <- list(2, 1) expect_named(list_combine(x, indices = indices, size = 2), c("a", "a")) expect_named( list_combine(x, indices = indices, size = 2, name_repair = "unique"), c("a...1", "a...2") ) # With `default` indices <- list(3, 1) default <- c(a = 0) expect_named( list_combine(x, indices = indices, size = 3, default = default), c("a", "a", "a") ) expect_named( list_combine( x, indices = indices, size = 3, default = default, name_repair = "unique" ), c("a...1", "a...2", "a...3") ) }) test_that("list_combine() works with simple homogeneous foreign S3 classes", { expect_identical( list_combine( list(foobar(1), foobar(2)), indices = list(1, 2), size = 2 ), vec_c(foobar(c(1, 2))) ) # With `default` expect_identical( list_combine( list(foobar(1), foobar(2)), indices = list(3, 1), size = 4, default = foobar(0) ), vec_c(foobar(c(2, 0, 1, 0))) ) }) test_that("list_combine() fails with complex foreign S3 classes", { expect_snapshot(error = TRUE, { x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") list_combine(list(x, y), indices = list(1, 2), size = 2) }) expect_snapshot(error = TRUE, { x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") list_combine( list(x, y), indices = list(1, 2), size = 2, error_call = call("foo"), x_arg = "arg" ) }) # Want this to mention `default` by name expect_snapshot(error = TRUE, { x <- structure(foobar(1), attr_foo = "foo") default <- structure(foobar(2), attr_foo = "bar") list_combine( list(x), indices = list(1), size = 2, default = default ) }) expect_snapshot(error = TRUE, { x <- structure(foobar(1), attr_foo = "foo") default <- structure(foobar(2), attr_foo = "bar") list_combine( list(x), indices = list(1), size = 2, default = default, default_arg = "d" ) }) }) test_that("list_combine() fails with complex foreign S4 classes", { expect_snapshot({ joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") (expect_error( list_combine(list(joe, jane), indices = list(1:2, 3), size = 3), class = "vctrs_error_incompatible_type" )) (expect_error( list_combine( list(joe, jane), indices = list(1:2, 3), size = 3, error_call = call("foo"), x_arg = "arg" ), class = "vctrs_error_incompatible_type" )) }) }) test_that("list_combine() falls back to c() if S3 method is available", { # Check off-by-one error expect_error( list_combine( list(foobar(1), "", foobar(2)), indices = list(1, 2, 3), size = 3 ), class = "vctrs_error_incompatible_type" ) # Fallback when the class implements `c()` method_foobar <- function(...) { xs <- list(...) xs <- map(xs, unclass) res <- exec("c", !!!xs) foobar(res) } local_methods( c.vctrs_foobar = method_foobar ) expect_identical( list_combine(list(foobar(1), foobar(2)), indices = list(1, 2), size = 2), foobar(c(1, 2)) ) expect_identical( list_combine(list(foobar(1), foobar(2)), indices = list(2, 1), size = 2), foobar(c(2, 1)) ) expect_identical( list_combine( list(NULL, foobar(1), NULL, foobar(2)), indices = list(integer(), 1, integer(), 2), size = 2 ), foobar(c(1, 2)) ) # OOB error is respected expect_error( list_combine( list(foobar(1), foobar(2)), indices = list(1, 3), size = 2 ), class = "vctrs_error_subscript_oob" ) # Unassigned locations results in missing values. # Repeated assignment uses the last assigned value. expect_identical( list_combine( list(foobar(c(1, 2)), foobar(3)), indices = list(c(1, 3), 1), size = 3 ), foobar(c(3, NA, 2)) ) expect_identical( list_combine( list(foobar(c(1, 2)), foobar(3)), indices = list(c(2, NA), NA), size = 3 ), foobar(c(NA, 1, NA)) ) # Respects `default` expect_identical( list_combine( list(foobar(c(1, 2)), foobar(3)), indices = list(c(1, 3), 1), default = foobar(0), size = 4 ), foobar(c(3, 0, 2, 0)) ) expect_identical( list_combine( list(foobar(c(1, 2)), foobar(3)), indices = list(c(1, 3), 1), default = foobar(c(4, 5, 6, 7)), size = 4 ), foobar(c(3, 5, 2, 7)) ) # Names are kept expect_identical( list_combine( list(foobar(c(x = 1, y = 2)), foobar(c(x = 1))), indices = list(c(2, 1), 3), size = 3 ), foobar(c(y = 2, x = 1, x = 1)) ) expect_identical( list_combine( list(foobar(c(x = 1, y = 2)), foobar(c(x = 1))), indices = list(c(2, 1), 3), size = 5, default = foobar(c(default = 0)) ), foobar(c(y = 2, x = 1, x = 1, default = 0, default = 0)) ) # Recycles to the size of index expect_identical( list_combine( list(foobar(1), foobar(2)), indices = list(c(1, 3), 2), size = 3 ), foobar(c(1, 2, 1)) ) expect_identical( list_combine( list(foobar(1), foobar(2)), indices = list(c(1, 2), integer()), size = 2 ), foobar(c(1, 1)) ) expect_snapshot({ (expect_error( list_combine( list(foobar(1), foobar(2)), indices = list(c(1, 3), integer()), size = 2 ), class = "vctrs_error_subscript_oob" )) }) expect_snapshot({ x <- list(foobar(1:2)) indices <- list(1:3) (expect_error(list_combine(x, indices = indices, size = 3))) (expect_error(list_combine( x, indices = indices, size = 3, x_arg = "arg", error_call = call("foo") ))) }) method_vctrs_c_fallback <- function(...) { xs <- list(...) xs <- map(xs, unclass) res <- exec("c", !!!xs) structure(res, class = "vctrs_c_fallback") } # Registered fallback s3_register("base::c", "vctrs_c_fallback", method_vctrs_c_fallback) expect_identical( list_combine( list( structure(1, class = "vctrs_c_fallback"), structure(2, class = "vctrs_c_fallback") ), indices = list(2, 1), size = 2 ), structure(c(2, 1), class = "vctrs_c_fallback") ) # Don't fallback for S3 lists which are treated as scalars by default expect_error( list_combine( list(foobar(list(1)), foobar(list(2))), indices = list(1, 2), size = 2 ), class = "vctrs_error_scalar_type" ) }) test_that("list_combine() falls back to c() if S3 method is available and respects `default`", { # Fallback when the class implements `c()` method_foobar <- function(...) { xs <- list(...) xs <- map(xs, unclass) res <- exec("c", !!!xs) foobar(res) } local_methods( c.vctrs_foobar = method_foobar ) # Respects `default` expect_identical( list_combine( list(foobar(c(1, 2)), foobar(3)), indices = list(c(1, 3), 1), default = foobar(0), size = 4 ), foobar(c(3, 0, 2, 0)) ) expect_identical( list_combine( list(foobar(c(1, 2)), foobar(3)), indices = list(c(1, 3), 1), default = foobar(c(4, 5, 6, 7)), size = 4 ), foobar(c(3, 5, 2, 7)) ) # Names are kept expect_identical( list_combine( list(foobar(c(x = 1, y = 2)), foobar(c(x = 1))), indices = list(c(2, 1), 3), size = 3 ), foobar(c(y = 2, x = 1, x = 1)) ) expect_identical( list_combine( list(foobar(c(x = 1, y = 2)), foobar(c(x = 1))), indices = list(c(2, 1), 3), size = 5, default = foobar(c(default = 0)) ), foobar(c(y = 2, x = 1, x = 1, default = 0, default = 0)) ) }) test_that("list_combine() falls back for S4 classes with a registered c() method", { joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") expect_snapshot({ (expect_error( list_combine(list(joe, 1, jane), indices = list(c(1, 2), 3, 4), size = 4), class = "vctrs_error_incompatible_type" )) }) local_c_counts() expect_identical( list_combine(list(joe, jane), indices = list(c(1, 3), 2), size = 3), .Counts(c(1L, 3L, 2L), name = "Dispatched") ) expect_identical( list_combine( list(NULL, joe, jane), indices = list(integer(), c(1, 3), 2), size = 3 ), .Counts(c(1L, 3L, 2L), name = "Dispatched") ) # Unassigned locations results in missing values. # Repeated assignment uses the last assigned value. expect_identical( list_combine(list(joe, jane), indices = list(c(1, 3), 1), size = 3), .Counts(c(3L, NA, 2L), name = "Dispatched") ) expect_identical( list_combine(list(joe, jane), indices = list(c(2, NA), NA), size = 3), .Counts(c(NA, 1L, NA), name = "Dispatched") ) # `default` is respected default <- .Counts(0L, name = "Unknown") expect_identical( list_combine( list(joe, jane), indices = list(c(1, 3), 1), default = default, size = 5 ), .Counts(c(3L, 0L, 2L, 0L, 0L), name = "Dispatched") ) }) test_that("can ignore names in `list_combine()` by providing a `zap()` name-spec (#232)", { expect_snapshot({ (expect_error( list_combine( list(a = c(b = 1:2)), indices = list(1:2), size = 2 ) )) (expect_error( list_combine( list(a = c(b = 1:2)), indices = list(1:2), size = 2, error_call = call("foo") ) )) }) expect_identical( list_combine( list(a = c(b = 1:2), b = 3L), indices = list(1:2, 3), size = 3, name_spec = zap() ), 1:3 ) expect_identical( list_combine( list(a = c(foo = 1:2), b = c(bar = 3L)), indices = list(2:1, 3), size = 3, name_spec = zap() ), c(2L, 1L, 3L) ) expect_identical( list_combine( list(a = c(foo = 1:2), b = c(bar = 3L)), indices = list(2:1, 3), default = c(baz = 0L), size = 4, name_spec = zap() ), c(2L, 1L, 3L, 0L) ) expect_snapshot({ x <- list(a = c(b = c("a", "b")), b = 3L) (expect_error( list_combine( x, indices = list(1:2, 3), size = 3, name_spec = zap() ), class = "vctrs_error_incompatible_type" )) x <- list(a = c(foo = 1:2), b = c(bar = "")) (expect_error( list_combine( x, indices = list(2:1, 3), size = 3, name_spec = zap() ), class = "vctrs_error_incompatible_type" )) }) }) test_that("list_combine() falls back to c() methods (#1120)", { expect_error( list_combine( list(foobar(1), foobar(2, class = "foo")), indices = list(1, 2), size = 2 ), class = "vctrs_error_incompatible_type" ) local_methods( c.vctrs_foobar = function(...) { out <- NextMethod() paste0(rep_along(out, "dispatched"), seq_along(out)) } ) # Homogeneous subclasses xs <- list(foobar(1), foobar(2, class = "foo")) expect_identical( list_combine(xs, indices = list(2, 1), size = 2), c("dispatched2", "dispatched1") ) expect_identical( list_combine(xs, indices = list(3, 1), size = 4, default = foobar(0)), c("dispatched2", "dispatched3", "dispatched1", "dispatched4") ) # Different subclasses xs <- list( foobar(c(x = 1, y = 2), class = "foo"), foobar(c(x = 1), foo = 1) ) expect_identical( list_combine(xs, indices = list(c(2, 1), 3), size = 3), c("dispatched2", "dispatched1", "dispatched3") ) expect_identical( list_combine(xs, indices = list(c(4, 1), 3), size = 5, default = foobar(0)), c("dispatched2", "dispatched4", "dispatched3", "dispatched1", "dispatched5") ) }) test_that("list_combine() fails if foreign classes are not homogeneous and there is no c() method", { xs <- list( foobar(c(x = 1, y = 2), class = "foo"), foobar(c(x = 1), foo = 1) ) expect_error( list_combine(xs, indices = list(c(2, 1), 3), size = 3), class = "vctrs_error_incompatible_type" ) x <- foobar(c(x = 1, y = 2), class = "foo") default <- foobar(c(x = 1), foo = 1) expect_snapshot(error = TRUE, { list_combine( list(x), indices = list(c(1, 2)), size = 3, default = default ) }) }) test_that("recycling error indices are correct even with `NULL` removal", { # Should be element 3! expect_snapshot_list_combine( error = TRUE, x = list(1:2, NULL, 7:8), indices = list(1:2, 5:6, 7:9), size = 9, default = 0L ) }) test_that("size 1 `default` is recycled correctly", { expect_identical_list_combine( x = list(1:2, 5L, 7:8), indices = list(1:2, 5, 7:8), size = 9, default = 0L, expect = int(1, 2, 0, 0, 5, 0, 7, 8, 0) ) }) test_that("`indices` is required!", { expect_error(list_combine(list(1, 2), size = 2)) }) test_that("`size` is required!", { expect_error(list_combine(list(1, 2), indices = list(1, 2))) }) test_that("`x_arg` works", { expect_snapshot(error = TRUE, { list_combine(list(1, "2"), indices = list(1, 2), size = 2, x_arg = "xs") }) expect_snapshot(error = TRUE, { list_combine(list(1, 2), indices = list(1, 2, 3), size = 2, x_arg = "xs") }) }) test_that("`indices_arg` works", { expect_snapshot(error = TRUE, { list_combine(list(1, 2), indices = 1, size = 2, indices_arg = "i") }) expect_snapshot(error = TRUE, { list_combine( list(1, 2), indices = list(1, 2, 3), size = 2, indices_arg = "i" ) }) }) test_that("`...` must be empty", { expect_snapshot(error = TRUE, { list_combine(list(1, 2), indices = list(1, 2), size = 2, "foo") }) }) test_that("list_combine() `default` is inserted correctly", { xs <- list("a", "b") indices <- list(1, 3) expect_identical_list_combine( x = xs, indices = indices, size = 4, default = "c", expect = c("a", "c", "b", "c") ) }) test_that("list_combine() `default` is inserted correctly with data frames", { xs <- list( data.frame(a = 1:2, b = 1:2), data.frame(a = 3L, b = 3L) ) indices <- list(1:2, 5) expect_identical( list_combine( x = xs, indices = indices, size = 6, default = data.frame(a = 0L, b = NA_integer_) ), data.frame( a = int(1, 2, 0, 0, 3, 0), b = int(1, 2, NA, NA, 3, NA) ) ) }) test_that("list_combine() `default` is inserted correctly when there are duplicate indices", { xs <- list("a", "b", "c") indices <- list(1, 1, 3) expect_identical_list_combine( x = xs, indices = indices, size = 4, default = "d", expect = c("b", "d", "c", "d") ) }) test_that("list_combine() `default` is inserted correctly when it is the size of `size`", { xs <- list("2", "4") indices <- list(2, 4) default <- letters[1:5] expect_identical_list_combine( x = xs, indices = indices, size = 5, default = default, expect = c("a", "2", "c", "4", "e") ) }) test_that("list_combine() `default` is correctly not used when all spots are filled", { xs <- list("a", "b", "c") indices <- list(1, 2, 3) expect_identical_list_combine( x = xs, indices = indices, size = 3, default = "d", expect = c("a", "b", "c") ) }) test_that("list_combine() `default` names work correctly with `name_spec`", { xs <- list(x = c(a = "a"), y = c(b = "b"), z = c(c = "c")) indices <- list(1, 3, NA) expect_identical( list_combine( xs, indices = indices, size = 5, default = c(d = "d"), name_spec = "{outer}_{inner}" ), c(x_a = "a", d = "d", y_b = "b", d = "d", d = "d") ) }) test_that("list_combine() `size` type is validated", { expect_snapshot(error = TRUE, { list_combine(list(1), indices = list(1), size = "x") }) }) test_that("list_combine() `indices` are validated against `size`", { # TODO: Not the best error message here expect_snapshot_list_combine( error = TRUE, x = list(1), indices = list(3), size = 2 ) }) test_that("list_combine() `default` vector check is done", { expect_snapshot_list_combine( error = TRUE, x = list(1), indices = list(1), size = 1, default = lm(1 ~ 1), default_arg = "d" ) }) test_that("list_combine() `default` size check is done", { # Must be size 1, or same size as `size` expect_identical_list_combine( x = list(1), indices = list(2), size = 3, default = 0, default_arg = "d", expect = c(0, 1, 0) ) expect_snapshot_list_combine( error = TRUE, x = list(1L), indices = list(1), size = 1, default = 1:2, default_arg = "d" ) }) test_that("list_combine() `default` is taken into account when computing `ptype`", { expect_identical( list_combine( list(1L), indices = list(1), size = 2, default = 1.5, default_arg = "d" ), c(1, 1.5) ) # `default` is not in output, but helps determine output type! expect_identical( list_combine( list(1L), indices = list(1), size = 1, default = 1.5, default_arg = "d" ), 1 ) # Empty `xs` and `indices`, but `default` is provided and determines type, # which would otherwise be expect_identical_list_combine( x = list(), indices = list(), size = 2, default = 0, default_arg = "d", expect = c(0, 0) ) # Computed `ptype` among `xs` isn't compatible with `default` expect_snapshot(error = TRUE, { list_combine( list(x = 1), indices = list(1), size = 2, default = "a", default_arg = "d" ) }) # Provided `ptype` isn't compatible with `default` expect_snapshot(error = TRUE, { list_combine( list(x = 1L), indices = list(1), size = 2, default = 1.5, default_arg = "d", ptype = integer() ) }) }) test_that("list_combine() works with data frame `default` with fallback columns", { x <- list( data_frame(a = foobar(1:2), b = 3:4, c = foobar(5:6)), data_frame(a = foobar(3L), b = 5L, c = foobar(7L)) ) indices <- list( c(3, NA), 2 ) default <- data_frame(a = foobar(0L), b = 0L, c = foobar(-1L)) expect_identical( list_combine( x = x, indices = indices, size = 5, default = default ), data_frame( a = foobar(int(0, 3, 1, 0, 0)), b = int(0, 5, 3, 0, 0), c = foobar(int(-1, 7, 5, -1, -1)) ) ) with_c_foobar({ expect_identical( list_combine( x = x, indices = indices, size = 5, default = default ), data_frame( a = foobar_c(int(0, 3, 1, 0, 0)), b = int(0, 5, 3, 0, 0), c = foobar_c(int(-1, 7, 5, -1, -1)) ) ) }) }) test_that("list_combine() `unmatched = 'error'` doesn't error when all locations are covered", { expect_identical_list_combine( x = list(1:3), indices = list(1:3), size = 3, unmatched = "error", expect = 1:3 ) expect_identical_list_combine( x = list(1:3), indices = list(c(TRUE, TRUE, TRUE)), size = 3, unmatched = "error", expect = 1:3 ) }) test_that("list_combine() `unmatched = 'error'` doesn't error in the empty case", { expect_identical( list_combine(list(), indices = list(), size = 0, unmatched = "error"), unspecified() ) }) test_that("list_combine() `unmatched = 'error'` errors with unmatched `indices` when `size` is used", { expect_snapshot(error = TRUE, { # Duplicates result in unmatched locations list_combine( list(1, 1), indices = list(1, 1), size = 2, unmatched = "error" ) }) expect_snapshot(error = TRUE, { # `NA` results in unmatched locations list_combine( list(1, 1), indices = list(1, NA), size = 2, unmatched = "error" ) }) expect_snapshot(error = TRUE, { # `NA` results in unmatched locations list_combine( list(1:9, 1:9), indices = list( c(TRUE, FALSE, NA, TRUE, FALSE, NA, TRUE, FALSE, NA), c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, NA, NA, NA) ), size = 9, unmatched = "error" ) }) expect_snapshot(error = TRUE, { # Unused location list_combine( list(1, 3), indices = list(1, 3), size = 3, unmatched = "error" ) }) expect_snapshot(error = TRUE, { # Unused location list_combine( list(1, 1), indices = list(c(TRUE, FALSE), c(TRUE, FALSE)), size = 2, unmatched = "error" ) }) expect_snapshot(error = TRUE, { list_combine( list(), indices = list(), size = 2, unmatched = "error" ) }) }) test_that("list_combine() `unmatched = 'error'` errors pluralize correctly", { expect_snapshot(error = TRUE, { # One location list_combine( list(1, 3), indices = list(1, 3), size = 3, unmatched = "error" ) }) expect_snapshot(error = TRUE, { # Two locations list_combine( list(1, 3), indices = list(1, 3), size = 4, unmatched = "error" ) }) expect_snapshot(error = TRUE, { # Many locations list_combine( list(1, 3), indices = list(1, 3), size = 100, unmatched = "error" ) }) }) test_that("list_combine() `unmatched = 'error'` error classes are as expected", { cnd <- catch_cnd(list_combine( list(1, 3), indices = list(1, 3), size = 3, unmatched = "error" )) expect_true(inherits_all( cnd, c("vctrs_error_combine_unmatched", "vctrs_error_combine") )) }) test_that("list_combine() `unmatched = 'error'` can't be set when `default` is also set", { expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), default = 1, size = 1, unmatched = "error" ) }) expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), default = 1, size = 1, unmatched = "error", default_arg = ".default", error_call = quote(foo()) ) }) }) test_that("list_combine() `unmatched` is validated", { expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), size = 1, unmatched = "e" ) }) expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), size = 1, unmatched = c("a", "b") ) }) expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), size = 1, unmatched = NA_character_ ) }) # With error call expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), size = 1, unmatched = "e", error_call = quote(foo()) ) }) expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), size = 1, unmatched = c("a", "b"), error_call = quote(foo()) ) }) }) test_that("list_combine() `multiple` is validated", { expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), size = 1, multiple = "a" ) }) expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), size = 1, multiple = c("a", "b") ) }) expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), size = 1, multiple = NA_character_ ) }) # With error call expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), size = 1, multiple = "a", error_call = quote(foo()) ) }) expect_snapshot(error = TRUE, { list_combine( list(1), indices = list(1), size = 1, multiple = c("a", "b"), error_call = quote(foo()) ) }) }) test_that("`NA` indices are considered unmatched locations", { # The `NA` index is considered "unmatched". It's the same as providing a # logical index with an `NA` in it, like indices in this case of: list(c(TRUE, # FALSE, FALSE), c(FALSE, NA, TRUE)) We consider that `NA` to be unmatched, # and running vec_as_location on that gives the same index as this. expect_identical_list_combine( x = list(1L, 2:3), indices = list(1, c(NA, 3)), size = 3, default = 0L, expect = c(1L, 0L, 3L) ) expect_identical_list_combine( x = list(1L, 2:3), indices = list(c(TRUE, FALSE, FALSE), c(FALSE, NA, TRUE)), size = 3, default = 0L, expect = c(1L, 0L, 3L) ) # Which means this errors expect_snapshot(error = TRUE, { list_combine( x = list(1, 2:3), indices = list(1, c(NA, 3)), size = 3, unmatched = "error" ) }) expect_snapshot(error = TRUE, { list_combine( x = list(1, 2:3), indices = list(c(TRUE, FALSE, FALSE), c(FALSE, NA, TRUE)), size = 3, unmatched = "error" ) }) }) test_that("`indices` corresponding to `NULL` values are considered matched", { # These are considered "matched" but there is nothing to put there, # so `NA` ends up in the slot expect_identical_list_combine( x = list(1, NULL), indices = list(1, c(2, 3)), size = 3, default = 0, expect = c(1, NA, NA) ) expect_identical_list_combine( x = list(1, NULL), indices = list(c(TRUE, FALSE, FALSE), c(FALSE, TRUE, TRUE)), size = 3, default = 0, expect = c(1, NA, NA) ) # Which means this is not an error expect_identical_list_combine( x = list(1, NULL), indices = list(1, c(2, 3)), size = 3, unmatched = "error", expect = c(1, NA, NA) ) expect_identical_list_combine( x = list(1, NULL), indices = list(c(TRUE, FALSE, FALSE), c(FALSE, TRUE, TRUE)), size = 3, unmatched = "error", expect = c(1, NA, NA) ) }) test_that("`x` must be a list", { expect_snapshot(error = TRUE, { list_combine(1, indices = list(1), size = 1) }) expect_snapshot(error = TRUE, { list_combine( 1, indices = list(1), size = 1, error_call = call("foo"), x_arg = "arg" ) }) expect_snapshot(error = TRUE, { list_combine(data.frame(x = 1), indices = list(1), size = 1) }) expect_snapshot(error = TRUE, { list_combine(array(list(1)), indices = list(1), size = 1) }) }) test_that("`indices` must be a list", { expect_snapshot(error = TRUE, { list_combine(list(1), indices = 1, size = 1) }) expect_snapshot(error = TRUE, { list_combine(list(1), indices = 1, size = 1, error_call = call("foo")) }) expect_snapshot(error = TRUE, { list_combine(list(1), indices = data.frame(x = 1), size = 1) }) }) test_that("`indices` has a restricted type", { expect_error( list_combine(list(1), indices = list("x"), size = 1), class = "vctrs_error_subscript_type" ) expect_error( list_combine(list(1), indices = list(quote(name)), size = 1), class = "vctrs_error_subscript_type" ) }) test_that("`x` and `indices` must be lists of the same size", { expect_snapshot(error = TRUE, { list_combine(list(1, 2), indices = list(1), size = 1) }) }) test_that("can combine with an AsIs list (#1463)", { x <- I(list(1, 2)) expect_identical(list_combine(x, indices = list(1, 2), size = 2), c(1, 2)) }) test_that("can combine empty vectors", { expect_identical( list_combine( list(), indices = list(), size = 0 ), unspecified() ) expect_identical( list_combine( list(), indices = list(), size = 0, multiple = "first" ), unspecified() ) expect_identical( list_combine( list(), indices = list(), size = 0, ptype = numeric() ), numeric() ) expect_identical( list_combine( list(), indices = list(), size = 0, ptype = numeric(), multiple = "first" ), numeric() ) }) test_that("can combine a list of NULL", { expect_identical( list_combine(list(NULL), indices = list(integer()), size = 0), unspecified(0) ) expect_identical( list_combine(list(NULL), indices = list(integer()), size = 1), unspecified(1) ) expect_identical( list_combine( list(NULL), indices = list(integer()), size = 0, ptype = numeric() ), numeric() ) expect_identical( list_combine( list(NULL), indices = list(integer()), ptype = numeric(), size = 1 ), vec_init(numeric(), n = 1) ) expect_identical( list_combine( list(NULL, NULL), indices = list(integer(), integer()), size = 0, ptype = numeric() ), numeric() ) }) test_that("NULLs are ignored when combined with other vectors", { expect_identical_list_combine( x = list("a", NULL, "b"), indices = list(2, integer(), 1), size = 2, expect = c("b", "a") ) }) test_that("can use a `NULL` element with a corresponding index", { # Because of `size = 2`, we must return something with `size = 2`. # Our sized identity element is `unspecified(2)`. expect_identical( list_combine(list(NULL), indices = list(1:2), size = 2L), unspecified(2) ) expect_identical( list_combine(list(NULL), indices = list(1:2), size = 2, ptype = integer()), c(NA_integer_, NA_integer_) ) x <- list("a", NULL, c("b", "c")) indices <- list(3L, c(1L, 4L), c(2L, 5L)) expect_identical_list_combine( x = x, indices = indices, size = 5, expect = c(NA, "b", "a", NA, "c") ) }) test_that("can combine atomic vectors", { expect_identical_list_combine( x = list(1, 2), indices = list(2, 1), size = 2, expect = c(2, 1) ) expect_identical_list_combine( x = list("a", "b"), indices = list(2, 1), size = 2, expect = c("b", "a") ) }) test_that("can combine lists", { x <- list(list("a", "b"), list("c")) indices <- list(c(2, 3), 1) expect_identical( list_combine(x, indices = indices, size = 3), list("c", "a", "b") ) }) test_that("can combine data frames", { df1 <- data_frame(x = 1:2) df2 <- data_frame(x = 3:4) x <- list(df1, df2) indices <- list(c(3, 1), c(2, 4)) expect <- vec_slice(vec_c(df1, df2), vec_order(vec_c(!!!indices))) expect_identical(list_combine(x, indices = indices, size = 4), expect) }) test_that("can fallback when combining matrices", { mat1 <- matrix(1:4, nrow = 2, ncol = 2) mat2 <- matrix(5:10, nrow = 3, ncol = 2) x <- list(mat1, mat2) indices <- list(c(4, 1), c(2, 3, 5)) expect <- vec_slice(vec_c(mat1, mat2), vec_order(vec_c(!!!indices))) expect_identical(list_combine(x, indices = indices, size = 5), expect) }) test_that("can fallback when combining matrices and using `default`", { mat1 <- matrix(1:4, nrow = 2, ncol = 2) mat2 <- matrix(5:10, nrow = 3, ncol = 2) x <- list(mat1, mat2) indices <- list(c(5, 1), c(2, 3, 7)) default <- matrix(11:24, nrow = 7, ncol = 2) size <- 7 expect <- vec_c( vec_slice(mat1, 2), vec_slice(mat2, 1), vec_slice(mat2, 2), vec_slice(default, 4), vec_slice(mat1, 1), vec_slice(default, 6), vec_slice(mat2, 3) ) expect_identical( list_combine(x, indices = indices, size = size, default = default), expect ) }) test_that("can fallback when combining arrays of >2D", { arr1 <- array(1:8, c(2, 2, 2)) arr2 <- matrix(9:10, c(1, 2)) x <- list(arr1, arr2) indices <- list(c(3, 1), 2) expect <- vec_slice(vec_c(arr1, arr2), vec_order(vec_c(!!!indices))) expect_identical(list_combine(x, indices = indices, size = 3), expect) }) test_that("common type failure after common class fallback reports the original class (#1981)", { int <- foobar(int(1)) dbl <- foobar(dbl(1)) # Works expect_identical( list_combine( list(int, int), indices = list(1, 2), size = 2 ), foobar(int(1, 1)) ) # Failure with 1 and 2 expect_snapshot(error = TRUE, { list_combine( list(int, dbl), indices = list(1, 2), size = 2 ) }) # Failure with 1 and 3 expect_snapshot(error = TRUE, { list_combine( list(int, int, dbl), indices = list(1, 2, 3), size = 3 ) }) # Failure with 1 and `default` expect_snapshot(error = TRUE, { list_combine( list(int, int), indices = list(1, 2), size = 2, default = dbl ) }) }) test_that("can combine with all size 0 elements and get the right ptype", { x <- list(integer(), integer()) indices <- list(integer(), integer()) expect_identical_list_combine( x = x, indices = indices, size = 0, expect = integer() ) }) test_that("can combine with some size 0 elements", { x <- list(integer(), 1:2, integer()) indices <- list(integer(), 2:1, integer()) expect_identical_list_combine( x = x, indices = indices, size = 2, expect = 2:1 ) }) test_that("NULL is a valid index", { expect_identical_list_combine( x = list(1, 2), indices = list(NULL, 1), size = 1, expect = 2 ) expect_snapshot_list_combine( error = TRUE, x = list(1, 2), indices = list(NULL, 2), size = 1 ) }) test_that("combining recycles elements of x to the size of the index", { x <- list(1, 2) expect_identical_list_combine( x = x, indices = list(c(3, 4, 5), c(2, 1)), size = 5, expect = c(2, 2, 1, 1, 1) ) expect_identical_list_combine( x = x, indices = list( c(FALSE, FALSE, TRUE, TRUE, TRUE), c(TRUE, TRUE, FALSE, FALSE, FALSE) ), size = 5, expect = c(2, 2, 1, 1, 1) ) expect_identical_list_combine( x = x, indices = list(c(3, 4, 5), c(2, 1)), size = 5, slice_x = TRUE, expect = c(2, 2, 1, 1, 1) ) expect_identical_list_combine( x = x, indices = list( c(FALSE, FALSE, TRUE, TRUE, TRUE), c(TRUE, TRUE, FALSE, FALSE, FALSE) ), size = 5, slice_x = TRUE, expect = c(2, 2, 1, 1, 1) ) x <- list(1:2) expect_snapshot_list_combine( error = TRUE, x = x, indices = list(1:3), size = 4 ) expect_snapshot_list_combine( error = TRUE, x = x, indices = list(1:3), size = 4, x_arg = "arg" ) expect_snapshot_list_combine( error = TRUE, x = x, indices = list(1:3), size = 4, slice_x = TRUE ) expect_snapshot_list_combine( error = TRUE, x = x, indices = list(1:3), size = 4, slice_x = TRUE, x_arg = "arg" ) expect_snapshot_list_combine( error = TRUE, x = x, indices = list(c(TRUE, TRUE, TRUE, TRUE)), size = 4 ) expect_snapshot_list_combine( error = TRUE, x = x, indices = list(c(TRUE, TRUE, TRUE, TRUE)), size = 4, x_arg = "arg" ) expect_snapshot_list_combine( error = TRUE, x = x, indices = list(c(TRUE, TRUE, TRUE, TRUE)), size = 4, slice_x = TRUE ) }) test_that("combining takes the common type", { x <- list(1, "a") indices <- list(1, 2) expect_snapshot_list_combine( error = TRUE, x = x, indices = indices, size = 2 ) x <- list(1, 2L) expect_identical(list_combine(x, indices = indices, size = 2), c(1, 2)) }) test_that("common type failure uses positional errors", { expect_snapshot({ x <- list(1, a = "x", 2) # Looking for `x[[1]]` and `x$a` (expect_error(list_combine(x, indices = list(2, 1, 3), size = 3))) # Directed cast should also produce directional errors (#1690) (expect_error(list_combine( x, indices = list(2, 1, 3), size = 3, ptype = double() ))) # Lossy cast y <- list(1, a = 2.5) (expect_error(list_combine( y, indices = list(2, 1), size = 2, ptype = integer() ))) }) }) test_that("can specify a ptype to override common type", { indices <- list(1, 2) x <- list(1, 2L) expect_identical( list_combine(x, indices = indices, size = 2, ptype = integer()), c(1L, 2L) ) x <- list(1.5, 2) expect_snapshot(error = TRUE, cnd_class = TRUE, { list_combine(x, indices = indices, size = 2, ptype = integer()) }) expect_snapshot(error = TRUE, cnd_class = TRUE, { list_combine( x, indices = indices, size = 2, ptype = integer(), error_call = call("foo"), x_arg = "arg" ) }) }) test_that("common type is correctly computed with unspecified values and a `default` (#2094)", { expect_identical( list_combine( x = list(NA), indices = list(1), size = 1, default = "a" ), NA_character_ ) expect_identical( list_combine( x = list(NA), indices = list(1), size = 2, default = "a" ), c(NA, "a") ) }) test_that("outer names are kept", { x <- list(x = 1, y = 2) expect_named_list_combine( x = x, indices = list(2, 1), size = 2, expect = c("y", "x") ) }) test_that("outer names are recycled in the right order", { x <- list(x = 1, y = 2) expect_snapshot(error = TRUE, { list_combine(x, indices = list(c(1, 2), 3), size = 3) }) expect_named( list_combine( x, indices = list(c(1, 3), 2), size = 3, name_spec = "{outer}_{inner}" ), c("x_1", "y", "x_2") ) expect_named( list_combine( x, indices = list(c(3, 1), 2), size = 3, name_spec = "{outer}_{inner}" ), c("x_2", "y", "x_1") ) }) test_that("outer names can be merged with inner names", { x <- list(x = c(a = 1), y = c(b = 2)) expect_named( list_combine( x, indices = list(2, 1), size = 2, name_spec = "{outer}_{inner}" ), c("y_b", "x_a") ) }) test_that("preserves names when inputs are cast to a common type (#1689)", { expect_named( list_combine( list(c(a = 1)), indices = list(1), size = 1, ptype = integer() ), "a" ) # With `default` expect_named( list_combine( list(c(a = 1)), indices = list(1), default = c(b = 0), ptype = integer(), size = 2 ), c("a", "b") ) # With name spec name_spec <- "{outer}_{inner}" expect_named( list_combine( list(foo = c(a = 1)), ptype = integer(), name_spec = name_spec, indices = list(1), size = 1 ), "foo_a" ) expect_named( list_combine( list(foo = c(a = 1)), indices = list(1), default = c(b = 0), ptype = integer(), name_spec = name_spec, size = 2 ), c("foo_a", "b") ) # When `x` elements are recycled, names are also recycled x <- list(c(a = 1), c(b = 2)) indices <- list(1:2, 3:4) expect_named( list_combine(x, indices = indices, size = 4, ptype = integer()), c("a", "a", "b", "b") ) # When `default` elements are recycled, names are also recycled expect_named( list_combine( list(c(a = 1), c(b = 2)), indices = list(1, 3), default = c(c = 0), ptype = integer(), size = 4 ), c("a", "c", "b", "c") ) }) test_that("individual data frame columns retain vector names", { df1 <- data_frame(x = c(a = 1, b = 2)) df2 <- data_frame(x = c(c = 3)) x <- list(df1, df2) indices <- list(c(1, 2), 3) result <- list_combine(x, indices = indices, size = 3) expect_named(result$x, c("a", "b", "c")) # Names should be identical to equivalent `vec_c()` call expect_identical( list_combine(x, indices = indices, size = 3), vec_c(!!!x) ) }) test_that("assigning to the same location twice means last wins", { x <- list(1:2, 3L) indices <- list(1:2, 1L) expect_identical_list_combine( x = x, indices = indices, size = 3, expect = c(3L, 2L, NA) ) expect_identical_list_combine( x = x, indices = indices, size = 2, expect = c(3L, 2L) ) }) test_that("when assigning to the same location, names are continually overwritten (#2019)", { expect_identical_list_combine( x = list( c(a = 1, b = 2), c(c = 3, d = 4) ), indices = list( c(1, 2), c(1, 3) ), size = 3, multiple = "last", expect = c(c = 3, b = 2, d = 4) ) expect_identical_list_combine( x = list( c(a = 1, b = 2), c(c = 3, d = 4) ), indices = list( c(1, 2), c(1, 3) ), size = 3, multiple = "first", expect = c(a = 1, b = 2, d = 4) ) }) test_that("df-cols: when assigning to the same location, names are continually overwritten (#2019)", { expect_identical( list_combine( x = list( data_frame(x = c(a = 1, b = 2), y = c("foo", "bar")), data_frame(x = c(c = 0), y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "last" ), data_frame( x = c(c = 0, b = 2), y = c("baz", "bar") ) ) expect_identical( list_combine( x = list( data_frame(x = c(a = 1, b = 2), y = c("foo", "bar")), data_frame(x = c(c = 0), y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "first" ), data_frame( x = c(a = 1, b = 2), y = c("foo", "bar") ) ) # Homogenous fallback expect_identical( list_combine( x = list( data_frame(x = foobar(c(a = 1, b = 2)), y = c("foo", "bar")), data_frame(x = foobar(c(c = 0)), y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "last" ), data_frame( x = foobar(c(c = 0, b = 2)), y = c("baz", "bar") ) ) expect_identical( list_combine( x = list( data_frame(x = foobar(c(a = 1, b = 2)), y = c("foo", "bar")), data_frame(x = foobar(c(c = 0)), y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "first" ), data_frame( x = foobar(c(a = 1, b = 2)), y = c("foo", "bar") ) ) # `c()` fallback with_c_foobar({ expect_identical( list_combine( x = list( data_frame(x = foobar(c(a = 1, b = 2)), y = c("foo", "bar")), data_frame(x = foobar(c(c = 0)), y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "last" ), data_frame( x = foobar_c(c(c = 0, b = 2)), y = c("baz", "bar") ) ) }) with_c_foobar({ expect_identical( list_combine( x = list( data_frame(x = foobar(c(a = 1, b = 2)), y = c("foo", "bar")), data_frame(x = foobar(c(c = 0)), y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "first" ), data_frame( x = foobar_c(c(a = 1, b = 2)), y = c("foo", "bar") ) ) }) }) test_that("when assigning to the same location, names are cleared as needed (#2019)", { expect_identical_list_combine( x = list( c(a = 1, b = 2), c(3, 4) ), indices = list( c(1, 2), c(1, 3) ), size = 3, multiple = "last", expect = c(3, b = 2, 4) ) expect_identical_list_combine( x = list( c(1, 2), c(c = 3, d = 4) ), indices = list( c(1, 2), c(1, 3) ), size = 3, multiple = "first", expect = c(1, 2, d = 4) ) }) test_that("df-cols: when assigning to the same location, names are cleared as needed (#2019)", { # - 1st element has names # - 2nd element doesn't have names, so `""` is used as the name to overwrite # the names written when inserting the 1st element # - Reversed for `multiple = "first"` expect_identical( list_combine( x = list( data_frame(x = c(a = 1, b = 2), y = c("foo", "bar")), data_frame(x = 0, y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "last" ), data_frame( x = set_names(c(0, 2), c("", "b")), y = c("baz", "bar") ) ) expect_identical( list_combine( x = list( data_frame(x = c(1, 2), y = c("foo", "bar")), data_frame(x = c(c = 0), y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "first" ), data_frame( x = set_names(c(1, 2), c("", "")), y = c("foo", "bar") ) ) # Homogenous fallback expect_identical( list_combine( x = list( data_frame(x = foobar(c(a = 1, b = 2)), y = c("foo", "bar")), data_frame(x = foobar(0), y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "last" ), data_frame( x = foobar(set_names(c(0, 2), c("", "b"))), y = c("baz", "bar") ) ) expect_identical( list_combine( x = list( data_frame(x = foobar(c(1, 2)), y = c("foo", "bar")), data_frame(x = foobar(c(c = 0)), y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "first" ), data_frame( x = foobar(set_names(c(1, 2), c("", ""))), y = c("foo", "bar") ) ) # `c()` fallback with_c_foobar({ expect_identical( list_combine( x = list( data_frame(x = foobar(c(a = 1, b = 2)), y = c("foo", "bar")), data_frame(x = foobar(0), y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "last" ), data_frame( x = foobar_c(set_names(c(0, 2), c("", "b"))), y = c("baz", "bar") ) ) }) with_c_foobar({ expect_identical( list_combine( x = list( data_frame(x = foobar(c(1, 2)), y = c("foo", "bar")), data_frame(x = foobar(c(c = 0)), y = "baz") ), indices = list( c(1, 2), 1 ), size = 2, multiple = "first" ), data_frame( x = foobar_c(set_names(c(1, 2), c("", ""))), y = c("foo", "bar") ) ) }) }) test_that("index values are validated", { x <- list(1, 2) indices1 <- list(4, 1) indices2 <- list(c(1, 4), 2) indices3 <- list(c(1, 3, 4), 2) expect_error( list_combine(x, indices = indices1, size = 2), class = "vctrs_error_subscript_oob" ) expect_error( list_combine(x, indices = indices2, size = 3), class = "vctrs_error_subscript_oob" ) expect_identical( list_combine(x, indices = indices3, size = 4), c(1, 2, 1, 1) ) }) test_that("name repair is respected and happens after ordering according to `indices`", { local_name_repair_quiet() x <- list(c(a = 1), c(a = 2)) indices <- list(2, 1) expect_named(list_combine(x, indices = indices, size = 2), c("a", "a")) expect_named( list_combine(x, indices = indices, size = 2, name_repair = "unique"), c("a...1", "a...2") ) # With `default` indices <- list(3, 1) default <- c(a = 0) expect_named( list_combine(x, indices = indices, size = 3, default = default), c("a", "a", "a") ) expect_named( list_combine( x, indices = indices, size = 3, default = default, name_repair = "unique" ), c("a...1", "a...2", "a...3") ) }) test_that("list_combine() can repair names quietly", { local_name_repair_verbose() x <- c(x = "a", x = "b", x = "c") indices <- list(2, c(3, 1)) expect_snapshot({ res <- list_combine( vec_chop(x, indices = indices), indices = indices, size = 3, name_repair = "unique_quiet" ) }) expect_named(res, c("x...1", "x...2", "x...3")) x <- c("if" = "a", "in" = "b", "for" = "c") indices <- list(2, c(3, 1)) expect_snapshot({ res <- list_combine( vec_chop(x, indices = indices), indices = indices, size = 3, name_repair = "universal_quiet" ) }) expect_named(res, c(".if", ".in", ".for")) }) test_that("list_combine() errors on unsupported location values", { expect_snapshot_list_combine( error = TRUE, x = list(1, 2), indices = list(c(1, 2), 0), size = 3 ) expect_snapshot_list_combine( error = TRUE, x = list(1), indices = list(-1), size = 1 ) }) test_that("missing values propagate", { expect_identical_list_combine( x = list(1, 2), indices = list(c(NA_integer_, NA_integer_), c(NA_integer_, 3)), size = 4, expect = c(NA, NA, 2, NA) ) }) test_that("list_combine() fallback doesn't support `name_spec` or `ptype`", { expect_snapshot({ foo <- structure(foobar(1), foo = "foo") bar <- structure(foobar(2), bar = "bar") (expect_error( with_c_foobar(list_combine( list(foo, bar), indices = list(1, 2), size = 2, name_spec = "{outer}_{inner}" )), "name specification" )) # With error call (expect_error( with_c_foobar(list_combine( list(foo, bar), indices = list(1, 2), size = 2, name_spec = "{outer}_{inner}", error_call = call("foo") )), "name specification" )) # Used to be an error about `ptype` x <- list(foobar(1)) (expect_error( with_c_foobar(list_combine(x, indices = list(1), size = 1, ptype = "")), class = "vctrs_error_incompatible_type" )) }) }) test_that("list_combine() supports numeric S3 indices", { local_methods( vec_ptype2.vctrs_foobar = function(x, y, ...) { UseMethod("vec_ptype2.vctrs_foobar") }, vec_ptype2.vctrs_foobar.integer = function(x, y, ...) foobar(integer()), vec_cast.integer.vctrs_foobar = function(x, to, ...) vec_data(x) ) expect_identical( list_combine(list(1), indices = list(foobar(1L)), size = 1), 1 ) }) test_that("list_combine() does not support non-numeric S3 indices", { expect_snapshot({ (expect_error( list_combine(list(1), indices = list(factor("x")), size = 1), class = "vctrs_error_subscript_type" )) (expect_error( list_combine(list(1), indices = list(foobar(1L)), size = 1), class = "vctrs_error_subscript_type" )) }) }) test_that("list_combine() supports named `indices` (#2095)", { # Particularly in the fallback case where we have to `vec_c()` the `indices` # With outer names on `indices`: expect_identical_list_combine( x = list(c("a", "b")), indices = list(a = c(1, 2)), size = 2, expect = c("a", "b") ) expect_identical_list_combine( x = list(c("a", "b", "c", "d")), indices = list(a = c(FALSE, TRUE, FALSE, TRUE)), size = 4, slice_x = TRUE, expect = c(NA, "b", NA, "d") ) # With outer and inner names on `indices`: expect_identical_list_combine( x = list(c("a", "b"), c("c", "d")), indices = list(a = c(x = 1, y = 2), b = c(x = 3, y = 4)), size = 4, expect = c("a", "b", "c", "d") ) expect_identical_list_combine( x = list(c("a", "b", "c", "d"), c("e", "f", "g", "h")), indices = list( a = c(w = FALSE, x = TRUE, y = FALSE, z = TRUE), b = c(w = FALSE, x = FALSE, y = TRUE, z = FALSE) ), size = 4, slice_x = TRUE, expect = c(NA, "b", "g", "d") ) }) test_that("`list_combine()` with `slice_x = FALSE`", { values <- list(1:2, 3:4) size <- 4 indices <- list( c(TRUE, FALSE, FALSE, TRUE), c(FALSE, TRUE, TRUE, FALSE) ) expect_identical_list_combine( x = values, indices = indices, size = size, expect = int(1, 3, 4, 2) ) indices <- list( c(1, 4), c(2, 3) ) expect_identical_list_combine( x = values, indices = indices, size = size, expect = int(1, 3, 4, 2) ) }) test_that("`list_combine()` with `slice_x = FALSE` / recycling", { values <- list(1L, 2L) size <- 4 indices <- list( c(TRUE, FALSE, FALSE, TRUE), c(FALSE, TRUE, TRUE, FALSE) ) expect_identical_list_combine( x = values, indices = indices, size = size, expect = int(1, 2, 2, 1) ) indices <- list( c(1, 4), c(2, 3) ) expect_identical_list_combine( x = values, indices = indices, size = size, expect = int(1, 2, 2, 1) ) }) test_that("`list_combine()` with `slice_x = TRUE`", { values <- list(1:4, 5:8) size <- 4 indices <- list( c(TRUE, FALSE, FALSE, TRUE), c(FALSE, TRUE, TRUE, FALSE) ) expect_identical_list_combine( x = values, indices = indices, size = size, slice_x = TRUE, expect = int(1, 6, 7, 4) ) indices <- list( c(1, 4), c(2, 3) ) expect_identical_list_combine( x = values, indices = indices, size = size, slice_x = TRUE, expect = int(1, 6, 7, 4) ) }) test_that("`list_combine()` with `slice_x = TRUE` / recycling", { values <- list(1L, 2L) size <- 4 indices <- list( c(TRUE, FALSE, FALSE, TRUE), c(FALSE, TRUE, TRUE, FALSE) ) expect_identical_list_combine( x = values, indices = indices, size = size, slice_x = TRUE, expect = int(1, 2, 2, 1) ) indices <- list( c(1, 4), c(2, 3) ) expect_identical_list_combine( x = values, indices = indices, size = size, slice_x = TRUE, expect = int(1, 2, 2, 1) ) }) test_that("`list_combine()` with logical `indices` checks `indices` size", { values <- list(1L, 2L) indices <- list( c(TRUE, FALSE, FALSE, TRUE), c(FALSE, TRUE, TRUE, FALSE) ) size <- 5 # This isn't the most obvious error but it is hard to know how to do better. # Ideally it would report a size error for `indices`, right now it falls # through to `list_as_locations()` which doesn't allow logical indices. expect_snapshot_list_combine( error = TRUE, x = values, indices = indices, size = size ) }) test_that("`multiple` can let first index win", { x <- list( 1:3, 4:6 ) indices <- list( c(1, 2, 3), c(2, 3, 4) ) size <- 4 expect_identical_list_combine( x = x, indices = indices, size = size, multiple = "first", expect = int(1, 2, 3, 6) ) expect_identical_list_combine( x = x, indices = indices, size = size, multiple = "last", expect = int(1, 4, 5, 6) ) }) test_that("`multiple` works with data frames", { x <- list( data_frame(a = 1:3, b = foobar(4:6), c = foobar(c(7, 8, 9))), data_frame(a = 4:6, b = foobar(7:9), c = foobar(c(10, 11, 12))) ) indices <- list( c(1, 2, 3), c(2, 3, 4) ) size <- 4 # Normal and homogeneous fallback mixed expect_identical( list_combine( x = x, indices = indices, size = size, multiple = "first" ), data_frame( a = int(1, 2, 3, 6), b = foobar(int(4, 5, 6, 9)), c = foobar(dbl(7, 8, 9, 12)) ) ) # Normal and `c()` fallback mixed with_c_foobar({ expect_identical( list_combine( x = x, indices = indices, size = size, multiple = "first" ), data_frame( a = int(1, 2, 3, 6), b = foobar_c(int(4, 5, 6, 9)), c = foobar_c(dbl(7, 8, 9, 12)) ) ) }) }) test_that("`multiple` works with data frame columns", { x <- list( data_frame( a = 1:3, b = data_frame(x = c("a", "b", "c"), y = foobar(4:6)), c = foobar(c(7, 8, 9)) ), data_frame( a = 4:6, b = data_frame(x = c("d", "e", "f"), y = foobar(7:9)), c = foobar(c(10, 11, 12)) ) ) indices <- list( c(1, 2, 3), c(2, 3, 4) ) size <- 4 # Normal and homogeneous fallback mixed expect_identical( list_combine( x = x, indices = indices, size = size, multiple = "first" ), data_frame( a = int(1, 2, 3, 6), b = data_frame( x = c("a", "b", "c", "f"), y = foobar(int(4, 5, 6, 9)) ), c = foobar(dbl(7, 8, 9, 12)) ) ) # Normal and `c()` fallback mixed with_c_foobar({ expect_identical( list_combine( x = x, indices = indices, size = size, multiple = "first" ), data_frame( a = int(1, 2, 3, 6), b = data_frame( x = c("a", "b", "c", "f"), y = foobar_c(int(4, 5, 6, 9)) ), c = foobar_c(dbl(7, 8, 9, 12)) ) ) }) }) test_that("`multiple` shows correctly indexed errors", { # In fallback, reversal happens after recycling and slicing expect_snapshot_list_combine( error = TRUE, x = list(1:2, 3L), indices = list(1:3, 4), size = 4, multiple = "first" ) expect_snapshot_list_combine( error = TRUE, x = list(1:2, 3L), indices = list(1:3, 4), size = 4, multiple = "last" ) # If there is only 1 issue in `x` sizes, they report consistently expect_snapshot_list_combine( error = TRUE, x = list(1:4, 3:5), indices = list(1:3, 4), size = 4, slice_x = TRUE, multiple = "first" ) expect_snapshot_list_combine( error = TRUE, x = list(1:4, 3:5), indices = list(1:3, 4), size = 4, slice_x = TRUE, multiple = "last" ) # If there are multiple `x` issues, because we reverse the iteration # order in the main path in the `multiple = "first"` case, we end up # reporting the last problem first, while the fallback case still # reports the fist problem first. The indices in the error are correct # in both cases, so this inconsistency is allowed. expect_snapshot_list_combine( error = TRUE, x = list(1:2, 3:5), indices = list(1:3, 4), size = 4, slice_x = TRUE, multiple = "first" ) expect_snapshot_list_combine( error = TRUE, x = list(1:2, 3:5), indices = list(1:3, 4), size = 4, slice_x = TRUE, multiple = "last" ) }) test_that("`multiple` also applies to names", { expect_identical_list_combine( x = list(c(a = 1, b = 2), c(c = 3)), indices = list(1:2, 2), size = 2, multiple = "first", expect = c(a = 1, b = 2) ) expect_identical_list_combine( x = list(c(a = 1, b = 2), c(c = 3)), indices = list(1:2, 2), size = 2, multiple = "last", expect = c(a = 1, c = 3) ) }) test_that("`multiple` doesn't affect `default`", { expect_identical_list_combine( x = list(1:2, 4L), indices = list(c(1, 4), 4), size = 5, multiple = "first", default = 0L, expect = int(1, 0, 0, 2, 0) ) expect_identical_list_combine( x = list(1:2, 4L), indices = list(c(1, 4), 4), size = 5, multiple = "last", default = 0L, expect = int(1, 0, 0, 4, 0) ) }) test_that("`multiple` doesn't apply WITHIN a single index", { # You always get the last value within a single index vector. # That possibly makes `multiple = "first"` a little confusing, # but `multiple` is mostly useful with logical vectors anyways # (case_when() style approach), so it doesn't matter much. expect_identical_list_combine( x = list(1:2, 3:4), indices = list(c(1, 1), c(1, 1)), size = 1, multiple = "first", expect = 2L ) expect_identical_list_combine( x = list(1:2, 3:4), indices = list(c(1, 1), c(1, 1)), size = 1, multiple = "last", expect = 4L ) }) test_that("`compact_seq()` work as `indices`", { expect_identical_list_combine( x = list(1:3, 4:5), indices = list(compact_seq(2, 3), compact_seq(0, 2)), size = 5, expect = int(4:5, 1:3) ) expect_identical_list_combine( x = list(1:3, 4L), indices = list(compact_seq(2, 3), 1), size = 5, expect = int(4, NA, 1:3) ) }) test_that("`compact_seq()` `indices` work with `unmatched`", { expect_snapshot_list_combine( error = TRUE, x = list(1:2, 4:5), indices = list(compact_seq(3, 2), compact_seq(0, 2)), size = 5, unmatched = "error" ) expect_snapshot_list_combine( error = TRUE, x = list(1:2, 4L), indices = list(compact_seq(3, 2), 1), size = 5, unmatched = "error" ) }) test_that("`compact_seq()` `indices` work with `default`", { expect_identical_list_combine( x = list(1:2, 4:5), indices = list(compact_seq(3, 2), compact_seq(0, 2)), size = 5, default = 0L, expect = int(4:5, 0L, 1:2) ) expect_identical_list_combine( x = list(1:2, 4L), indices = list(compact_seq(3, 2), 1), size = 5, default = 0L, expect = int(4L, 0L, 0L, 1:2) ) }) vctrs/tests/testthat/test-type.R0000644000176200001440000002653515120272011016500 0ustar liggesuserstest_that("vec_ptype() is a no-op for NULL", { expect_null(vec_ptype(NULL)) }) test_that("vec_ptype() errors on scalars", { expect_error(vec_ptype(quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype(quote(fn())), class = "vctrs_error_scalar_type") }) test_that(".ptype argument overrides others", { expect_equal(vec_ptype_common(.ptype = 1:10), numeric()) }) test_that(".ptype required in strict mode", { old <- options(vctrs.no_guessing = TRUE) on.exit(options(old)) expect_error(vec_ptype_common(), "strict mode") }) test_that("can feed ptype into itself", { expect_equal(vec_ptype_common(vec_ptype_common(1:10)), numeric()) }) test_that("finalised prototypes created from under specified inputs", { expect_equal(vec_ptype_common(), NULL) expect_equal(vec_ptype_common(NULL), NULL) expect_equal(vec_ptype_common(NA), logical()) expect_equal(vec_ptype_common(NA, NULL), logical()) expect_equal(vec_ptype_common(NULL, NA), logical()) }) test_that("finalised prototypes created from under specified data frame cols", { df <- data.frame(x = NA) expect_equal(vec_ptype_common(df)$x, logical()) }) test_that("non-missing logical get correct type", { expect_equal(vec_ptype_common(TRUE), logical()) }) test_that("output tests", { expect_snapshot(vec_ptype_show()) expect_snapshot(vec_ptype_show(integer())) expect_snapshot(vec_ptype_show(integer(), double())) expect_snapshot(vec_ptype_show(logical(), integer(), double())) }) test_that("vec_ptype_common() handles matrices", { m <- matrix(1:4, nrow = 2) expect_identical(vec_ptype_common(m, m), matrix(int(), ncol = 2)) }) test_that("vec_ptype_common() doesn't mutate input", { x <- list(a = 1L, b = 2) expect_identical(vec_ptype_common(!!!x), numeric()) expect_identical(x, list(a = 1L, b = 2)) }) test_that("vec_ptype_common() includes index in argument tag", { df1 <- tibble(x = tibble(y = tibble(z = 1))) df2 <- tibble(x = tibble(y = tibble(z = "a"))) # Create a column name too large for default buffer nm <- str_dup("foobarfoobar", 10) large_df1 <- set_names(df1, nm) large_df2 <- set_names(df2, nm) expect_snapshot(error = TRUE, vec_ptype_common(df1, df2)) expect_snapshot(error = TRUE, vec_ptype_common(df1, df1, df2)) expect_snapshot(error = TRUE, vec_ptype_common(large_df1, large_df2)) # Names expect_snapshot(error = TRUE, vec_ptype_common(foo = TRUE, bar = "foo")) expect_snapshot( error = TRUE, vec_ptype_common(foo = TRUE, baz = FALSE, bar = "foo") ) expect_snapshot(error = TRUE, vec_ptype_common(foo = df1, bar = df2)) expect_snapshot(error = TRUE, vec_ptype_common(df1, df1, bar = df2)) # One splice box expect_snapshot(error = TRUE, vec_ptype_common(TRUE, !!!list(1, "foo"))) expect_snapshot(error = TRUE, vec_ptype_common(TRUE, !!!list(1, 2), "foo")) expect_snapshot( error = TRUE, vec_ptype_common(1, !!!list(TRUE, FALSE), "foo") ) # One named splice box expect_snapshot( error = TRUE, vec_ptype_common(foo = TRUE, !!!list(FALSE, FALSE), bar = "foo") ) expect_snapshot( error = TRUE, vec_ptype_common(foo = TRUE, !!!list(bar = 1, "foo")) ) expect_snapshot( error = TRUE, vec_ptype_common(foo = TRUE, !!!list(bar = "foo")) ) expect_snapshot( error = TRUE, vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), baz = "chr") ) # Two splice boxes in next and current expect_snapshot( error = TRUE, vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), !!!list(baz = "chr")) ) }) test_that("proxied types are have s3 bare type", { for (x in proxied_empty_types) { expect_identical(vec_typeof_bare(x), "s3") } }) test_that("vec_ptype() preserves attributes of unproxied structures", { expect_identical(vec_ptype(foobar(dbl(1))), foobar(dbl())) }) test_that("vec_ptype() errors on scalar lists", { expect_error(vec_ptype(foobar(list())), class = "vctrs_error_scalar_type") }) test_that("can retrieve type info", { exp <- list(type = "integer", had_proxy_method = FALSE) expect_identical(vec_type_info(1:3), exp) exp <- list(type = "s3", had_proxy_method = FALSE) expect_identical(vec_type_info(~foo), exp) x <- as.POSIXlt(new_datetime(0)) exp <- list(type = "s3", had_proxy_method = TRUE) expect_identical(vec_type_info(x), exp) }) test_that("can retrieve proxy info", { exp <- list(type = "integer", had_proxy_method = FALSE, proxy = 1:3) expect_identical(vec_proxy_info(1:3), exp) exp <- list(type = "scalar", had_proxy_method = FALSE, proxy = ~foo) expect_identical(vec_proxy_info(~foo), exp) x <- as.POSIXlt(new_datetime(0)) proxy <- new_data_frame(unclass(x)) exp <- list( type = "dataframe", had_proxy_method = TRUE, proxy = proxy ) expect_identical(vec_proxy_info(x), exp) }) test_that("class_type() detects classes", { expect_identical(class_type(list()), "none") expect_identical(class_type(foobar(list())), "unknown") expect_identical(class_type(structure(list(), class = "list")), "list") expect_identical( class_type(subclass(structure(list(), class = "list"))), "list" ) expect_identical( class_type(I(subclass(structure(list(), class = "list")))), "list" ) expect_identical(class_type(I(list())), "bare_asis") expect_identical(class_type(I(1)), "bare_asis") expect_identical(class_type(data.frame()), "bare_data_frame") expect_identical(class_type(tibble::tibble()), "bare_tibble") expect_identical(class_type(subclass(data.frame())), "data_frame") expect_identical(class_type(new_factor()), "bare_factor") expect_identical(class_type(new_ordered()), "bare_ordered") expect_identical(class_type(subclass(new_factor())), "unknown") expect_identical(class_type(subclass(new_ordered())), "unknown") expect_identical(class_type(new_date()), "bare_date") expect_identical(class_type(new_datetime()), "bare_posixct") expect_identical(class_type(as.POSIXlt(new_date())), "bare_posixlt") expect_identical(class_type(subclass(new_date())), "unknown") expect_identical(class_type(subclass(new_datetime())), "unknown") expect_identical(class_type(NA), "none") expect_identical(class_type(foobar()), "unknown") }) test_that("vec_ptype() handles class-less yet OBJECT gremlins", { gremlin <- stats::model.frame(freeny) expect_error(vec_ptype(gremlin), NA) expect_error(vec_c(gremlin), NA) expect_error(vec_init(gremlin), NA) expect_error(vec_slice(gremlin, 1), NA) }) test_that("explicit list subclasses are vectors", { list_subclass <- function(x) { structure(x, class = c("custom_subclass", "list")) } x <- list_subclass(list()) expect_true(vec_is(x)) df <- data.frame(x = 1:2) df$z <- list_subclass(list(1, 2)) expect_identical(vec_slice(df, 1)$z, list_subclass(list(1))) }) test_that("the type of a classed data frame with an unspecified column retains unspecifiedness", { df1 <- subclass(data_frame(x = 1, y = NA)) df2 <- subclass(data_frame(x = 1, y = unspecified(1))) expect <- subclass(data_frame(x = numeric(), y = unspecified())) expect_identical(vec_ptype(df1), expect) expect_identical(vec_ptype(df2), expect) }) test_that("vec_ptype() methods can be written", { local_methods( vec_ptype.vctrs_foobar = function(x, ...) "dispatch" ) expect_identical(vec_ptype(foobar()), "dispatch") }) test_that("vec_ptype_finalise() works with NULL", { expect_identical(vec_ptype_finalise(NULL), NULL) }) test_that("vec_ptype_finalise() works recursively over bare data frames", { df <- new_data_frame(list( x = numeric(), y = unspecified() )) expect <- data_frame(x = numeric(), y = logical()) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() works recursively over classed data frames", { df <- new_data_frame(list( x = numeric(), y = unspecified() )) df <- subclass(df) expect <- subclass(data_frame(x = numeric(), y = logical())) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() can handle data frame columns", { df <- data_frame(x = numeric(), y = data_frame(z = unspecified())) expect <- data_frame(x = numeric(), y = data_frame(z = logical())) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() requires vector types", { expect_error( vec_ptype_finalise(quote(name)), class = "vctrs_error_scalar_type" ) expect_error(vec_ptype_finalise(foobar()), class = "vctrs_error_scalar_type") }) # This might change in the future if we decide that prototypes don't # have names test_that("vec_ptype() preserves type of names and row names", { expect_identical(vec_ptype(c(foo = 1)), named(dbl())) expect_identical(vec_ptype(mtcars), mtcars[0, ]) expect_identical(vec_ptype(foobar(mtcars)), foobar(mtcars[0, ])) }) test_that("vec_ptype_common() handles spliced names consistently (#1570)", { args1 <- list(a = "foo", b = "bar") args2 <- list(y = NULL, z = 1) y_name <- "y" z_name <- "z" expect_snapshot(error = TRUE, { vec_ptype_common( a = "foo", b = "bar", y = NULL, z = 1 ) vec_ptype_common( !!!args1, !!!args2 ) vec_ptype_common( !!!args1, "{y_name}" := NULL, "{z_name}" := 1 ) }) }) test_that("vec_ptype() and vec_ptype2() don't finalize their output", { expect_identical(vec_ptype(NA), unspecified()) expect_identical(vec_ptype2(NA, NA), unspecified()) expect_identical(vec_ptype2(NA, NULL), unspecified()) expect_identical(vec_ptype2(NULL, NA), unspecified()) expect_identical(vec_ptype(unspecified()), unspecified()) expect_identical(vec_ptype2(unspecified(), unspecified()), unspecified()) expect_identical(vec_ptype2(unspecified(), NULL), unspecified()) expect_identical(vec_ptype2(NULL, unspecified()), unspecified()) }) test_that("vec_ptype_common() always finalizes its output (#2099)", { expect_identical(vec_ptype_common(NA), logical()) expect_identical(vec_ptype_common(NA, NA), logical()) expect_identical(vec_ptype_common(unspecified(1)), logical()) expect_identical(vec_ptype_common(unspecified(1), unspecified(1)), logical()) # Even explicit `.ptype`s expect_identical( vec_ptype_common(.ptype = NA), logical() ) expect_identical( vec_ptype_common(.ptype = unspecified(1)), logical() ) }) test_that("vec_ptype_common() lets you opt out of ptype finalization (#2094)", { expect_identical( vec_ptype_common(NA, .finalise = FALSE), unspecified() ) expect_identical( vec_ptype_common(NA, NA, .finalise = FALSE), unspecified() ) expect_identical( vec_ptype_common(unspecified(1), .finalise = FALSE), unspecified() ) expect_identical( vec_ptype_common(unspecified(1), unspecified(1), .finalise = FALSE), unspecified() ) # Works for explicit `.ptype` too expect_identical( vec_ptype_common(.ptype = NA, .finalise = FALSE), unspecified() ) expect_identical( vec_ptype_common(.ptype = unspecified(1), .finalise = FALSE), unspecified() ) }) test_that("vec_ptype_common_params() lets you opt out of ptype finalization", { expect_identical( vec_ptype_common_params(NA, .finalise = FALSE), unspecified() ) # Works for explicit `.ptype` too expect_identical( vec_ptype_common_params(.ptype = NA, .finalise = FALSE), unspecified() ) }) test_that("`.finalise` is validated", { expect_snapshot(error = TRUE, { vec_ptype_common(.finalise = 1) }) expect_snapshot(error = TRUE, { vec_ptype_common_params(.finalise = 1) }) }) vctrs/tests/testthat/test-interval.R0000644000176200001440000004343615065005761017360 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_interval_groups() test_that("can compute groups", { x <- data_frame( start = c(1L, 9L, 2L, 2L, 10L), end = c(5L, 11L, 6L, 8L, 12L) ) expect_identical( vec_interval_groups(x$start, x$end), data_frame(start = c(1L, 9L), end = c(8L, 12L)) ) }) test_that("can group with size one input", { x <- data_frame(start = 1L, end = 2L) expect_identical( vec_interval_groups(x$start, x$end), x ) }) test_that("can group with size zero input", { x <- data_frame(start = integer(), end = integer()) expect_identical( vec_interval_groups(x$start, x$end), x ) }) test_that("missing intervals are retained", { x <- data_frame(start = NA, end = NA) expect_identical( vec_interval_groups(x$start, x$end), x ) x <- data_frame(start = c(NA, NA), end = c(NA, NA)) expect_identical( vec_interval_groups(x$start, x$end), x[1, ] ) x <- data_frame(start = c(3, NA, 2, NA), end = c(5, NA, 3, NA)) expect_identical( vec_interval_groups(x$start, x$end), data_frame(start = c(2, NA), end = c(5, NA)) ) }) test_that("missing intervals can be dropped", { x <- data_frame(start = NA, end = NA) expect_identical( vec_interval_groups(x$start, x$end, missing = "drop"), data_frame(start = logical(), end = logical()) ) x <- data_frame(start = c(NA, NA), end = c(NA, NA)) expect_identical( vec_interval_groups(x$start, x$end, missing = "drop"), data_frame(start = logical(), end = logical()) ) x <- data_frame(start = c(3, NA, 2, NA), end = c(5, NA, 3, NA)) expect_identical( vec_interval_groups(x$start, x$end, missing = "drop"), data_frame(start = 2, end = 5) ) }) test_that("max endpoint is retained even if it isn't the last in the group", { # 10 is max end of first group, but 5 is last value in that group x <- data_frame(start = c(1L, 2L, 12L), end = c(10L, 5L, 15L)) expect_identical( vec_interval_groups(x$start, x$end), data_frame(start = c(1L, 12L), end = c(10L, 15L)) ) }) # ------------------------------------------------------------------------------ # vec_interval_locate_groups() test_that("can locate groups", { x <- data_frame( start = c(1L, 9L, 2L, 2L, 10L), end = c(5L, 11L, 6L, 8L, 12L) ) out <- vec_interval_locate_groups(x$start, x$end) expect_identical( out$key, data_frame(start = c(1L, 9L), end = c(8L, 12L)) ) expect_identical( out$loc, list(c(1L, 3L, 4L), c(2L, 5L)) ) }) test_that("can locate groups with size one input", { expect_identical( vec_interval_locate_groups(1L, 2L), data_frame( key = data_frame(start = 1L, end = 2L), loc = list(1L) ) ) }) test_that("can locate groups with size zero input", { expect_identical( vec_interval_locate_groups(integer(), integer()), data_frame( key = data_frame(start = integer(), end = integer()), loc = list() ) ) }) test_that("locations are ordered by both `start` and `end`", { x <- data_frame(start = c(4L, 4L, 1L), end = c(6L, 5L, 2L)) out <- vec_interval_locate_groups(x$start, x$end) # Ties of `start = 4` are broken by `end` values and reordered expect_identical( out$loc, list(3L, c(2L, 1L)) ) # So this orders `x` expect_identical( vec_slice(x, unlist(out$loc)), vec_sort(x) ) }) test_that("missing intervals are retained", { x <- data_frame(start = NA, end = NA) out <- vec_interval_locate_groups(x$start, x$end) expect_identical( out$key, data_frame(start = NA, end = NA) ) expect_identical( out$loc, list(1L) ) x <- data_frame(start = c(3, NA, 2, NA), end = c(5, NA, 3, NA)) out <- vec_interval_locate_groups(x$start, x$end) expect_identical( out$key, data_frame(start = c(2, NA), end = c(5, NA)) ) expect_identical( out$loc, list(c(3L, 1L), c(2L, 4L)), ) }) test_that("missing intervals can be dropped", { x <- data_frame(start = NA, end = NA) out <- vec_interval_locate_groups(x$start, x$end, missing = "drop") expect_identical( out$key, data_frame(start = logical(), end = logical()) ) expect_identical( out$loc, list() ) x <- data_frame(start = c(3, NA, 2, NA), end = c(5, NA, 3, NA)) out <- vec_interval_locate_groups(x$start, x$end, missing = "drop") expect_identical( out$key, data_frame(start = 2, end = 5) ) expect_identical( out$loc, list(c(3L, 1L)), ) }) test_that("treats NA and NaN as equivalent with doubles", { x <- data_frame(start = c(NA, NaN, NA, NaN), end = c(NA, NA, NaN, NaN)) out <- vec_interval_locate_groups(x$start, x$end) expect_identical( out$key, data_frame(start = NA_real_, end = NaN) ) expect_identical( out$loc, list(1:4), ) out <- vec_interval_locate_groups(x$start, x$end, missing = "drop") expect_identical( out$key, data_frame(start = double(), end = double()) ) expect_identical( out$loc, list(), ) }) test_that("recognizes missing rows in data frames", { start <- data_frame( year = c(2019, NA, NA, 2019, 2019), month = c(12, NA, NA, 12, 12) ) end <- data_frame( year = c(2020, NA, NA, 2020, 2020), month = c(2, NA, NA, 11, 12) ) x <- data_frame(start = start, end = end) out <- vec_interval_locate_groups(x$start, x$end) expect_start <- data_frame(year = c(2019, NA), month = c(12, NA)) expect_end <- data_frame(year = c(2020, NA), month = c(12, NA)) expect <- data_frame(start = expect_start, end = expect_end) expect_identical(out$key, expect) expect_identical(out$loc, list(c(1L, 4L, 5L), c(2L, 3L))) }) test_that("works on various types", { x <- data_frame(start = c(1.5, 2, 3.1, NA), end = c(1.7, 3.2, 4.5, NA)) out <- vec_interval_locate_groups(x$start, x$end) expect_identical( out$key, data_frame(start = c(1.5, 2, NA), end = c(1.7, 4.5, NA)) ) expect_identical(out$loc, list(1L, 2:3, 4L)) out <- vec_interval_locate_groups(x$start, x$end, missing = "drop") expect_identical(out$key, data_frame(start = c(1.5, 2), end = c(1.7, 4.5))) expect_identical(out$loc, list(1L, 2:3)) x <- data_frame(start = c("a", "c", "f", NA), end = c("b", "g", "h", NA)) out <- vec_interval_locate_groups(x$start, x$end) expect_identical( out$key, data_frame(start = c("a", "c", NA), end = c("b", "h", NA)) ) expect_identical(out$loc, list(1L, 2:3, 4L)) out <- vec_interval_locate_groups(x$start, x$end, missing = "drop") expect_identical(out$key, data_frame(start = c("a", "c"), end = c("b", "h"))) expect_identical(out$loc, list(1L, 2:3)) }) test_that("can keep abutting intervals separate", { # after x <- data_frame(start = c(1L, 2L, 0L), end = c(2L, 3L, 2L)) out <- vec_interval_locate_groups(x$start, x$end, abutting = FALSE) expect_identical(out$key, data_frame(start = c(0L, 2L), end = c(2L, 3L))) expect_identical(out$loc, list(c(3L, 1L), 2L)) # before x <- data_frame(start = c(1L, 0L), end = c(2L, 1L)) out <- vec_interval_locate_groups(x$start, x$end, abutting = FALSE) expect_identical(out$key, data_frame(start = c(0L, 1L), end = c(1L, 2L))) expect_identical(out$loc, list(2L, 1L)) # both x <- data_frame(start = c(1L, 0L, 2L), end = c(2L, 1L, 3L)) out <- vec_interval_locate_groups(x$start, x$end, abutting = FALSE) expect_identical( out$key, data_frame(start = c(0L, 1L, 2L), end = c(1L, 2L, 3L)) ) expect_identical(out$loc, list(2L, 1L, 3L)) }) test_that("`missing` is validated", { expect_snapshot( (expect_error(vec_interval_locate_groups(1, 2, missing = "s"))) ) expect_snapshot( (expect_error(vec_interval_locate_groups( 1, 2, missing = c("group", "drop") ))) ) }) test_that("common type is taken", { expect_snapshot((expect_error(vec_interval_locate_groups(1, "x")))) }) # ------------------------------------------------------------------------------ # vec_interval_complement() test_that("computes the complement", { x <- data_frame( start = c(6L, 1L, 2L, 12L), end = c(9L, 3L, 4L, 14L) ) expect_identical( vec_interval_complement(x$start, x$end), data_frame(start = c(4L, 9L), end = c(6L, 12L)) ) }) test_that("treats intervals as half-open like [a, b)", { x <- data_frame( start = c(1L, 5L), end = c(4L, 6L) ) expect_identical( vec_interval_complement(x$start, x$end), data_frame(start = 4L, end = 5L) ) }) test_that("`[a, b)` and `[b, c)` result in no complement values", { x <- data_frame( start = c(1L, 5L), end = c(5L, 6L) ) expect_identical( vec_interval_complement(x$start, x$end), data_frame(start = integer(), end = integer()) ) }) test_that("works with `lower == upper`", { x <- data_frame( start = c(1L, 2L, 12L, NA), end = c(10L, 5L, 15L, NA) ) expect_identical( vec_interval_complement(x$start, x$end, lower = 10L, upper = 10L), data_frame(start = integer(), end = integer()) ) expect_identical( vec_interval_complement(x$start, x$end, lower = -1L, upper = -1L), data_frame(start = integer(), end = integer()) ) expect_identical( vec_interval_complement(x$start, x$end, lower = 20L, upper = 20L), data_frame(start = integer(), end = integer()) ) }) test_that("works with `lower` before any values", { x <- data_frame( start = c(1L, 2L, 12L, NA), end = c(10L, 5L, 15L, NA) ) expect_identical( vec_interval_complement(x$start, x$end, lower = -1L), data_frame(start = c(-1L, 10L), end = c(1L, 12L)) ) }) test_that("works if both `lower` and `upper` are before any values", { x <- data_frame( start = c(2L, 1L, 12L, NA), end = c(5L, 10L, 15L, NA) ) expect_identical( vec_interval_complement(x$start, x$end, lower = -5L, upper = -2L), data_frame(start = -5L, end = -2L) ) }) test_that("works with `upper` after any values", { x <- data_frame( start = c(2L, 1L, 13L, 12L, NA), end = c(5L, 10L, 17L, 15L, NA) ) expect_identical( vec_interval_complement(x$start, x$end, upper = 20L), data_frame(start = c(10L, 17L), end = c(12L, 20L)) ) }) test_that("works if both `lower` and `upper` are after any values", { x <- data_frame( start = c(2L, 1L, 12L, NA), end = c(5L, 10L, 15L, NA) ) expect_identical( vec_interval_complement(x$start, x$end, lower = 17L, upper = 19L), data_frame(start = 17L, end = 19L) ) }) test_that("works with only NA and `lower`", { x <- data_frame(start = NA_integer_, end = NA_integer_) expect_identical( vec_interval_complement(x$start, x$end, lower = 5L), data_frame(start = integer(), end = integer()) ) }) test_that("works with only NA and `upper`", { x <- data_frame(start = NA_integer_, end = NA_integer_) expect_identical( vec_interval_complement(x$start, x$end, upper = 5L), data_frame(start = integer(), end = integer()) ) }) test_that("works with only NA and both `lower` and `upper`", { x <- data_frame(start = NA_integer_, end = NA_integer_) expect_identical( vec_interval_complement(x$start, x$end, lower = 2L, upper = 5L), data_frame(start = 2L, end = 5L) ) }) test_that("works with `lower` that is on the max set value", { x <- data_frame( start = c(1L, 12L), end = c(9L, 13L) ) expect_identical( vec_interval_complement(x$start, x$end, lower = 9L), data_frame(start = 9L, end = 12L) ) }) test_that("works with `upper` that is on the max set value", { x <- data_frame( start = c(-5L, 1L, 2L, 12L), end = c(0L, 10L, 5L, 15L) ) expect_identical( vec_interval_complement(x$start, x$end, upper = 10L), data_frame(start = 0L, end = 1L) ) expect_identical( vec_interval_complement(x$start, x$end, lower = 10L, upper = 10L), data_frame(start = integer(), end = integer()) ) }) test_that("size zero case generally returns nothing", { expect_identical( vec_interval_complement(integer(), integer()), data_frame(start = integer(), end = integer()) ) expect_identical( vec_interval_complement(integer(), integer(), lower = 5L), data_frame(start = integer(), end = integer()) ) expect_identical( vec_interval_complement(integer(), integer(), upper = 5L), data_frame(start = integer(), end = integer()) ) }) test_that("size zero case with both `lower` and `upper` returns an interval", { expect_identical( vec_interval_complement(integer(), integer(), lower = 5L, upper = 10L), data_frame(start = 5L, end = 10L) ) }) test_that("size zero case with `lower == upper` doesn't return anything", { expect_identical( vec_interval_complement(integer(), integer(), lower = 5L, upper = 5L), data_frame(start = integer(), end = integer()) ) }) test_that("works when `lower` is contained in an interval", { expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = 3), data_frame(start = 5, end = 10) ) }) test_that("works when `lower` is in a gap between intervals", { expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = 7), data_frame(start = 7, end = 10) ) }) test_that("works when `upper` is in a gap between intervals", { expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), upper = 7), data_frame(start = c(-3, 5), end = c(1, 7)) ) }) test_that("works when `lower` and `upper` are in a gap between intervals", { expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = 6, upper = 7), data_frame(start = 6, end = 7) ) expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = 7, upper = 7), data_frame(start = double(), end = double()) ) }) test_that("works when `lower` and `upper` have an interval between them", { expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = 0, upper = 7), data_frame(start = c(0, 5), end = c(1, 7)) ) expect_identical( vec_interval_complement(c(-5, 1, 10), c(-3, 5, 15), lower = -6, upper = 7), data_frame(start = c(-6, -3, 5), end = c(-5, 1, 7)) ) }) test_that("allow `lower > upper` which returns an empty interval", { x <- data_frame(start = c(1, 2), end = c(5, 12)) expect_identical( vec_interval_complement(x$start, x$end, lower = 10, upper = 9), data_frame(start = double(), end = double()) ) }) test_that("complement works when `lower` and `upper` are in the same interval", { x <- data_frame(start = 1, end = 5) expect_identical( vec_interval_complement(x$start, x$end, lower = 2, upper = 4), data_frame(start = double(), end = double()) ) }) test_that("`lower` and `upper` can't contain missing values", { expect_snapshot({ (expect_error(vec_interval_complement(1, 2, lower = NA))) (expect_error(vec_interval_complement(1, 2, upper = NA))) start <- data_frame(x = 1, y = 2) end <- data_frame(x = 1, y = 3) (expect_error(vec_interval_complement( start, end, lower = data_frame(x = 1, y = NA) ))) (expect_error(vec_interval_complement( start, end, upper = data_frame(x = 1, y = NA) ))) }) }) # ------------------------------------------------------------------------------ # vec_interval_locate_containers() test_that("can locate containers", { x <- data_frame( start = c(1L, 9L, 2L, 2L, 10L), end = c(5L, 12L, 6L, 8L, 12L) ) expect_identical( vec_interval_locate_containers(x$start, x$end), c(1L, 4L, 2L) ) }) test_that("can locate containers with size one input", { x <- data_frame(start = 1L, end = 2L) expect_identical( vec_interval_locate_containers(x$start, x$end), 1L ) }) test_that("can locate containers with size zero input", { x <- data_frame(start = integer(), end = integer()) expect_identical( vec_interval_locate_containers(x$start, x$end), integer() ) }) test_that("missing intervals are retained", { x <- data_frame(start = NA, end = NA) expect_identical( vec_interval_locate_containers(x$start, x$end), 1L ) x <- data_frame(start = c(NA, NA), end = c(NA, NA)) # Ties use first missing value seen expect_identical( vec_interval_locate_containers(x$start, x$end), 1L ) x <- data_frame(start = c(3, NA, 2, NA), end = c(5, NA, 5, NA)) # Missing intervals at the end expect_identical( vec_interval_locate_containers(x$start, x$end), c(3L, 2L) ) }) test_that("locations order the intervals", { x <- data_frame(start = c(4L, 4L, 1L, NA, 4L), end = c(5L, 6L, 2L, NA, 6L)) out <- vec_interval_locate_containers(x$start, x$end) expect_identical( out, c(3L, 2L, 4L) ) # This orders `x` expect_identical( vec_slice(x, out), vec_sort(vec_slice(x, out)) ) }) test_that("treats NA and NaN as equivalent with doubles", { x <- data_frame(start = c(NA, NaN, NA, NaN), end = c(NA, NA, NaN, NaN)) expect_identical(vec_interval_locate_containers(x$start, x$end), 1L) }) test_that("recognizes missing rows in data frames", { start <- data_frame( year = c(2019, NA, NA, 2019, 2019), month = c(12, NA, NA, 12, 12) ) end <- data_frame( year = c(2020, NA, NA, 2020, 2020), month = c(2, NA, NA, 11, 12) ) x <- data_frame(start = start, end = end) expect_identical( vec_interval_locate_containers(x$start, x$end), c(5L, 2L) ) }) test_that("duplicate containers return the first", { x <- data_frame(start = c(1, 1, 2, 1, 2), end = c(2, 2, 3, 2, 3)) expect_identical(vec_interval_locate_containers(x$start, x$end), c(1L, 3L)) }) test_that("works on various types", { x <- data_frame( start = c(1.5, 3, NA, 1.6, NA), end = c(1.7, 3.1, NA, 3.2, NA) ) out <- vec_interval_locate_containers(x$start, x$end) expect_identical(out, c(1L, 4L, 3L)) x <- data_frame( start = c("a", "a", NA, "f", NA), end = c("b", "g", NA, "h", NA) ) out <- vec_interval_locate_containers(x$start, x$end) expect_identical(out, c(2L, 4L, 3L)) }) test_that("common type is taken", { expect_snapshot((expect_error(vec_interval_locate_containers(1, "x")))) }) vctrs/tests/testthat/test-type-rational.R0000644000176200001440000000126415065005761020315 0ustar liggesusers# These tests check the rational type from the S3 vignette test_that("equality proxy is taken (#375)", { local_rational_class() x <- rational(c(1, 2, 1, 2, 6), c(1, 1, 2, 2, 2)) expect_identical(x == rational(3, 1), c(FALSE, FALSE, FALSE, FALSE, TRUE)) expect_identical(unique(x), rational(c(1, 2, 1, 6), c(1, 1, 2, 2))) }) test_that("order proxy is taken", { local_rational_class() x <- rational(c(1, 2, 1, 2, 6), c(1, 1, 2, 2, 2)) expect_identical(sort(x), rational(c(1, 1, 2, 2, 6), c(2, 1, 2, 1, 2))) }) test_that("can find common type and cast to rational", { local_rational_class() x <- rational(1:2, 2:1) expect_identical(vec_cast_common(x, x), list(x, x)) }) vctrs/tests/testthat/test-subscript-loc.R0000644000176200001440000007176415065005761020332 0ustar liggesuserstest_that("vec_as_location2() returns a position", { expect_identical(vec_as_location2(2, 2L), 2L) expect_identical(vec_as_location2("foo", 2L, c("bar", "foo")), 2L) expect_identical(vec_as_location2("0", 4L, as.character(-1:2)), 2L) }) test_that("vec_as_location2() requires integer or character inputs", { expect_snapshot({ (expect_error( vec_as_location2(TRUE, 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(mtcars, 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(env(), 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(foobar(), 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(2.5, 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(Inf, 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(-Inf, 10L), class = "vctrs_error_subscript_type" )) "Idem with custom `arg`" (expect_error( vec_as_location2(foobar(), 10L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(2.5, 3L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_rows(vec_as_location2(TRUE)), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_location() requires integer, character, or logical inputs", { expect_snapshot({ (expect_error( vec_as_location(mtcars, 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(env(), 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(foobar(), 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(2.5, 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(list(), 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(function() NULL, 10L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(Sys.Date(), 3L), class = "vctrs_error_subscript_type" )) "Idem with custom `arg`" (expect_error( vec_as_location(env(), 10L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(foobar(), 10L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(2.5, 3L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_location2() and vec_as_location() require integer- or character-like OO inputs", { expect_identical(vec_as_location2(factor("foo"), 2L, c("bar", "foo")), 2L) expect_identical(vec_as_location(factor("foo"), 2L, c("bar", "foo")), 2L) expect_error( vec_as_location2(foobar(1L), 10L), class = "vctrs_error_subscript_type" ) expect_error( vec_as_location(foobar(1L), 10L), class = "vctrs_error_subscript_type" ) # Define subtype of logical and integer local_methods( vec_ptype2.vctrs_foobar = function(x, y, ...) { UseMethod("vec_ptype2.vctrs_foobar") }, vec_ptype2.vctrs_foobar.logical = function(x, y, ...) logical(), vec_ptype2.vctrs_foobar.integer = function(x, y, ...) integer(), vec_ptype2.logical.vctrs_foobar = function(x, y, ...) logical(), vec_ptype2.integer.vctrs_foobar = function(x, y, ...) integer(), vec_cast.vctrs_foobar = function(x, to, ...) { UseMethod("vec_cast.vctrs_foobar") }, vec_cast.vctrs_foobar.integer = function(x, to, ...) foobar(x), vec_cast.integer.vctrs_foobar = function(x, to, ...) { vec_cast(unclass(x), int()) }, vec_cast.logical.vctrs_foobar = function(x, to, ...) { vec_cast(unclass(x), lgl()) } ) expect_error( vec_as_location2(foobar(TRUE), 10L), class = "vctrs_error_subscript_type" ) expect_identical(vec_as_location(foobar(TRUE), 10L), 1:10) expect_identical(vec_as_location(foobar(FALSE), 10L), int()) }) test_that("vec_as_location() and variants check for OOB elements (#1605)", { expect_snapshot({ "Numeric indexing" (expect_error( vec_as_location(10L, 2L), class = "vctrs_error_subscript_oob" )) (expect_error( vec_as_location(-10L, 2L), class = "vctrs_error_subscript_oob" )) (expect_error( vec_as_location2(10L, 2L), class = "vctrs_error_subscript_oob" )) "Character indexing" (expect_error( vec_as_location("foo", 1L, names = "bar"), class = "vctrs_error_subscript_oob" )) (expect_error( vec_as_location2("foo", 1L, names = "bar"), class = "vctrs_error_subscript_oob" )) (expect_error( vec_as_location2("foo", 1L, names = "bar", call = call("baz")), class = "vctrs_error_subscript_oob" )) }) expect_error(num_as_location(10L, 2L), class = "vctrs_error_subscript_oob") expect_error(num_as_location2(10L, 2L), class = "vctrs_error_subscript_oob") }) test_that("vec_as_location() doesn't require `n` for character indexing", { expect_identical(vec_as_location("b", NULL, names = letters), 2L) }) test_that("vec_as_location2() requires length 1 inputs", { expect_snapshot({ (expect_error( vec_as_location2(1:2, 2L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(c("foo", "bar"), 2L, c("foo", "bar")), class = "vctrs_error_subscript_type" )) "Idem with custom `arg`" (expect_error( vec_as_location2(1:2, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(mtcars, 10L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(1:2, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_location2() requires positive integers", { expect_snapshot({ (expect_error( vec_as_location2(0, 2L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(-1, 2L), class = "vctrs_error_subscript_type" )) "Idem with custom `arg`" (expect_error( vec_as_location2(0, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_location2() fails with NA", { expect_snapshot({ (expect_error( vec_as_location2(na_int, 2L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(na_chr, 1L, names = "foo"), class = "vctrs_error_subscript_type" )) "Idem with custom `arg`" (expect_error( vec_as_location2(na_int, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_location2() doesn't allow lossy casts", { expect_error(vec_as_location2(2^31, 3L), class = "vctrs_error_subscript_type") # Lossy casts generate missing values, which are disallowed expect_error( allow_lossy_cast(vec_as_location2(2^31, 3L)), class = "vctrs_error_subscript_type" ) }) test_that("all subscript errors inherit from `vctrs_error_subscript`", { expect_error(vec_as_location(100, 2L), class = "vctrs_error_subscript") expect_error( vec_as_location("foo", 2L, names = c("bar", "baz")), class = "vctrs_error_subscript" ) expect_error(vec_as_location(foobar(1L), 2L), class = "vctrs_error_subscript") expect_error(vec_as_location(1.5, 2L), class = "vctrs_error_subscript") expect_error(vec_as_location2(TRUE, 2L), class = "vctrs_error_subscript") expect_error(vec_as_location2(1.5, 2L), class = "vctrs_error_subscript") }) test_that("all OOB errors inherit from `vctrs_error_subscript_oob`", { expect_error(vec_as_location(100, 2L), class = "vctrs_error_subscript_oob") expect_error( vec_as_location("foo", 2L, names = c("bar", "baz")), class = "vctrs_error_subscript_oob" ) }) test_that("vec_as_location() preserves names if possible", { expect_identical(vec_as_location(c(a = 1L, b = 3L), 3L), c(a = 1L, b = 3L)) expect_identical(vec_as_location(c(a = 1, b = 3), 3L), c(a = 1L, b = 3L)) expect_identical( vec_as_location(c(a = "z", b = "y"), 26L, letters), c(a = 26L, b = 25L) ) expect_identical( vec_as_location(c(foo = TRUE, bar = FALSE, baz = TRUE), 3L), c(foo = 1L, baz = 3L) ) expect_identical( vec_as_location(c(foo = TRUE), 3L), c(foo = 1L, foo = 2L, foo = 3L) ) expect_identical( vec_as_location(c(foo = NA), 3L), c(foo = na_int, foo = na_int, foo = na_int) ) # Names of negative selections are dropped expect_identical(vec_as_location(c(a = -1L, b = -3L), 3L), 2L) }) test_that("vec_as_location2() optionally allows missing values", { expect_identical(vec_as_location2(NA, 2L, missing = "propagate"), na_int) expect_error( vec_as_location2(NA, 2L, missing = "error"), class = "vctrs_error_subscript_type" ) }) test_that("num_as_location2() optionally allows missing and negative locations", { expect_identical(num_as_location2(na_dbl, 2L, missing = "propagate"), na_int) expect_identical(num_as_location2(-1, 2L, negative = "ignore"), -1L) expect_error( num_as_location2(-3, 2L, negative = "ignore"), class = "vctrs_error_subscript_oob" ) expect_error( num_as_location2(0, 2L, negative = "ignore"), class = "vctrs_error_subscript_type" ) }) test_that("num_as_location() optionally allows negative indices", { expect_identical( num_as_location(dbl(1, -1), 2L, negative = "ignore"), int(1L, -1L) ) expect_error( num_as_location(c(1, -10), 2L, negative = "ignore"), class = "vctrs_error_subscript_oob" ) }) test_that("num_as_location() optionally forbids negative indices", { expect_snapshot({ (expect_error( num_as_location(dbl(1, -1), 2L, negative = "error"), class = "vctrs_error_subscript_type" )) }) expect_error( num_as_location(c(1, -10), 2L, negative = "error"), class = "vctrs_error_subscript_type" ) }) test_that("num_as_location() optionally ignores zero indices", { expect_identical(num_as_location(c(1, 0), 2L, zero = "ignore"), c(1L, 0L)) }) test_that("num_as_location() optionally forbids zero indices", { expect_snapshot({ (expect_error( num_as_location(0L, 1L, zero = "error"), class = "vctrs_error_subscript_type" )) (expect_error( num_as_location(c(0, 0, 0, 0, 0, 0), 1, zero = "error"), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_location() handles NULL", { expect_identical( vec_as_location(NULL, 10), vec_as_location(int(), 10), ) }) test_that("vec_as_location() checks for mix of negative and missing locations", { expect_snapshot({ (expect_error( vec_as_location(-c(1L, NA), 30), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(-c(1L, rep(NA, 10)), 30), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_location() checks for mix of negative and positive locations", { expect_snapshot({ (expect_error( vec_as_location(c(-1L, 1L), 30), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(c(-1L, rep(1L, 10)), 30), class = "vctrs_error_subscript_type" )) }) }) test_that("logical subscripts must match size of indexed vector", { expect_snapshot({ (expect_error( vec_as_location(c(TRUE, FALSE), 3), class = "vctrs_error_subscript_size" )) }) }) test_that("character subscripts require named vectors", { expect_snapshot({ (expect_error(vec_as_location(letters[1], 3), "unnamed vector")) }) }) test_that("arg is evaluated lazily (#1150)", { expect_silent(vec_as_location(1, 1, arg = { writeLines("oof") "boo" })) }) test_that("arg works for complex expressions (#1150)", { expect_error(vec_as_location(mean, 1, arg = paste0("foo", "bar")), "foobar") }) test_that("can optionally extend beyond the end", { expect_error(num_as_location(1:5, 3), class = "vctrs_error_subscript_oob") expect_identical(num_as_location(1:5, 3, oob = "extend"), 1:5) expect_identical(num_as_location(4:5, 3, oob = "extend"), 4:5) expect_snapshot({ (expect_error( num_as_location(3, 1, oob = "extend"), class = "vctrs_error_subscript_oob" )) (expect_error( num_as_location(c(1, 3), 1, oob = "extend"), class = "vctrs_error_subscript_oob" )) (expect_error( num_as_location(c(1:5, 7), 3, oob = "extend"), class = "vctrs_error_subscript_oob" )) (expect_error( num_as_location(c(1:5, 7, 1), 3, oob = "extend"), class = "vctrs_error_subscript_oob" )) (expect_error( class = "vctrs_error_subscript_oob", num_as_location(c(1:5, 7, 1, 10), 3, oob = "extend") )) }) }) test_that("can extend beyond the end consecutively but non-monotonically (#1166)", { expect_identical(num_as_location(6:4, 3, oob = "extend"), 6:4) expect_identical( num_as_location(c(1:5, 7, 6), 3, oob = "extend"), c(1:5, 7L, 6L) ) expect_identical( num_as_location(c(1, NA, 4, 3), 2, oob = "extend"), c(1L, NA, 4L, 3L) ) }) test_that("num_as_location() can optionally remove oob values (#1595)", { expect_identical(num_as_location(c(5, 3, 2, 4), 3, oob = "remove"), c(3L, 2L)) expect_identical( num_as_location(c(-4, 5, 2, -1), 3, oob = "remove", negative = "ignore"), c(2L, -1L) ) }) test_that("num_as_location() errors when inverting oob negatives unless `oob = 'remove'` (#1630)", { expect_snapshot(error = TRUE, { num_as_location(-4, 3, oob = "error", negative = "invert") }) expect_snapshot(error = TRUE, { num_as_location(c(-4, 4, 5), 3, oob = "extend", negative = "invert") }) expect_identical( num_as_location(-4, 3, oob = "remove", negative = "invert"), c(1L, 2L, 3L) ) expect_identical( num_as_location(c(-4, -2), 3, oob = "remove", negative = "invert"), c(1L, 3L) ) }) test_that("num_as_location() generally drops zeros when inverting negatives (#1612)", { expect_identical( num_as_location(c(-3, 0, -1), n = 5L, negative = "invert", zero = "remove"), c(2L, 4L, 5L) ) # Trying to "ignore" and retain the zeroes in the output doesn't make sense, # where would they be placed? Instead, think of the ignored zeros as being # inverted as well, they just don't correspond to any location after the # inversion so they aren't in the output. expect_identical( num_as_location( c(-3, 0, -1, 0), n = 5L, negative = "invert", zero = "ignore" ), c(2L, 4L, 5L) ) }) test_that("num_as_location() errors on disallowed zeros when inverting negatives (#1612)", { expect_snapshot(error = TRUE, { num_as_location(c(0, -1), n = 2L, negative = "invert", zero = "error") }) expect_snapshot(error = TRUE, { num_as_location(c(-1, 0), n = 2L, negative = "invert", zero = "error") }) }) test_that("num_as_location() with `oob = 'remove'` doesn't remove missings if they are being propagated", { expect_identical(num_as_location(NA_integer_, 1, oob = "remove"), NA_integer_) }) test_that("num_as_location() with `oob = 'remove'` doesn't remove zeros if they are being ignored", { expect_identical(num_as_location(0, 1, oob = "remove", zero = "ignore"), 0L) expect_identical(num_as_location(0, 0, oob = "remove", zero = "ignore"), 0L) }) test_that("num_as_location() with `oob = 'extend'` doesn't allow ignored oob negative values (#1614)", { # This is fine (ignored negative that is in bounds) expect_identical( num_as_location(c(-5L, 6L), 5L, oob = "extend", negative = "ignore"), c(-5L, 6L) ) expect_snapshot(error = TRUE, { # Ignored negatives aren't allowed to extend the vector num_as_location(-6L, 5L, oob = "extend", negative = "ignore") }) expect_snapshot(error = TRUE, { # Ensure error only reports negative indices num_as_location(c(-7L, 6L), 5L, oob = "extend", negative = "ignore") }) expect_snapshot(error = TRUE, { num_as_location(c(-7L, NA), 5L, oob = "extend", negative = "ignore") }) }) test_that("num_as_location() with `oob = 'error'` reports negative and positive oob values", { expect_snapshot(error = TRUE, { num_as_location(c(-6L, 7L), n = 5L, oob = "error", negative = "ignore") }) }) test_that("num_as_location() with `missing = 'remove'` retains names (#1633)", { x <- c(a = 1, b = NA, c = 2, d = NA) expect_named(num_as_location(x, n = 2, missing = "remove"), c("a", "c")) }) test_that("num_as_location() with `zero = 'remove'` retains names (#1633)", { x <- c(a = 1, b = 0, c = 2, d = 0) expect_named(num_as_location(x, n = 2, zero = "remove"), c("a", "c")) }) test_that("num_as_location() with `oob = 'remove'` retains names (#1633)", { x <- c(a = 1, b = 3, c = 2, d = 4) expect_named(num_as_location(x, n = 2, oob = "remove"), c("a", "c")) }) test_that("num_as_location() with `negative = 'invert'` drops names (#1633)", { # The inputs don't map 1:1 to outputs x <- c(a = -1, b = -3) expect_named(num_as_location(x, n = 5), NULL) }) test_that("missing values are supported in error formatters", { expect_snapshot({ (expect_error( num_as_location(c(1, NA, 2, 3), 1), class = "vctrs_error_subscript_oob" )) (expect_error( num_as_location(c(1, NA, 3), 1, oob = "extend"), class = "vctrs_error_subscript_oob" )) }) }) test_that("can disallow missing values", { expect_snapshot({ (expect_error( vec_as_location(c(1, NA), 2, missing = "error"), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location( c(1, NA, 2, NA), 2, missing = "error", arg = "foo", call = call("my_function") ), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(c(1, NA, 2, NA), 2, missing = "error")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(NA, 1, missing = "error")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(NA, 3, missing = "error")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location( c(TRUE, NA, FALSE), 3, missing = "error" )), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location( NA_character_, 2, missing = "error", names = c("x", "y") )), class = "vctrs_error_subscript_type" )) }) }) test_that("can alter logical missing value handling (#1595)", { x <- c(a = TRUE, b = NA, c = FALSE, d = NA) expect_identical( vec_as_location(x, n = 4L, missing = "propagate"), c(a = 1L, b = NA, d = NA) ) expect_identical( vec_as_location(x, n = 4L, missing = "remove"), c(a = 1L) ) expect_snapshot(error = TRUE, { vec_as_location(x, n = 4L, missing = "error") }) # Specifically test size 1 case, which has its own special path x <- c(a = NA) expect_identical( vec_as_location(x, n = 2L, missing = "propagate"), c(a = NA_integer_, a = NA_integer_) ) expect_identical( vec_as_location(x, n = 2L, missing = "remove"), named(integer()) ) expect_snapshot(error = TRUE, { vec_as_location(x, n = 2L, missing = "error") }) }) test_that("can alter character missing value handling (#1595)", { x <- c(NA, "z", NA) names(x) <- c("a", "b", "c") names <- c("x", "z") expect_identical( vec_as_location(x, n = 2L, names = names, missing = "propagate"), set_names(c(NA, 2L, NA), names(x)) ) expect_identical( vec_as_location(x, n = 2L, names = names, missing = "remove"), set_names(2L, "b") ) expect_snapshot(error = TRUE, { vec_as_location(x, n = 2L, names = names, missing = "error") }) }) test_that("can alter integer missing value handling (#1595)", { x <- c(NA, 1L, NA, 3L) names(x) <- c("a", "b", "c", "d") expect_identical( vec_as_location(x, n = 4L, missing = "propagate"), x ) expect_identical( vec_as_location(x, n = 4L, missing = "remove"), c(b = 1L, d = 3L) ) expect_snapshot(error = TRUE, { vec_as_location(x, n = 4L, missing = "error") }) }) test_that("can alter negative integer missing value handling (#1595)", { x <- c(-1L, NA, NA, -3L) expect_snapshot(error = TRUE, { num_as_location(x, n = 4L, missing = "propagate", negative = "invert") }) expect_identical( num_as_location(x, n = 4L, missing = "remove", negative = "invert"), c(2L, 4L) ) expect_snapshot(error = TRUE, { num_as_location(x, n = 4L, missing = "error", negative = "invert") }) }) test_that("missing value character indices never match missing value names (#1489)", { x <- NA_character_ names <- NA_character_ expect_identical( vec_as_location(x, n = 1L, names = names, missing = "propagate"), NA_integer_ ) expect_identical( vec_as_location(x, n = 1L, names = names, missing = "remove"), integer() ) }) test_that("empty string character indices never match empty string names (#1489)", { names <- c("", "y") expect_snapshot(error = TRUE, { vec_as_location("", n = 2L, names = names) }) expect_snapshot(error = TRUE, { vec_as_location(c("", "y", ""), n = 2L, names = names) }) }) test_that("scalar logical `FALSE` and `NA` cases don't modify a shared object (#1633)", { x <- vec_as_location(FALSE, n = 2) expect_identical(x, integer()) y <- vec_as_location(c(a = FALSE), n = 2) expect_identical(y, named(integer())) # Still unnamed expect_identical(x, integer()) x <- vec_as_location(NA, n = 2, missing = "remove") expect_identical(x, integer()) y <- vec_as_location(c(a = FALSE), n = 2, missing = "remove") expect_identical(y, named(integer())) # Still unnamed expect_identical(x, integer()) }) test_that("can customise subscript type errors", { expect_snapshot({ "With custom `arg`" (expect_error( num_as_location( -1, 2, negative = "error", arg = "foo", call = call("my_function") ), class = "vctrs_error_subscript_type" )) (expect_error( num_as_location2( -1, 2, negative = "error", arg = "foo", call = call("my_function") ), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(0, 2, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(na_dbl, 2, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location2(c(1, 2), 2, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location( c(TRUE, FALSE), 3, arg = "foo", call = call("my_function") ), class = "vctrs_error_subscript_size" )) (expect_error( vec_as_location(c(-1, NA), 3, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(c(-1, 1), 3, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type" )) (expect_error( num_as_location( c(1, 4), 2, oob = "extend", arg = "foo", call = call("my_function") ), class = "vctrs_error_subscript_oob" )) (expect_error( num_as_location( 0, 1, zero = "error", arg = "foo", call = call("my_function") ), class = "vctrs_error_subscript_type" )) "With tibble columns" (expect_error( with_tibble_cols(num_as_location(-1, 2, negative = "error")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(num_as_location2(-1, 2, negative = "error")), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location2(0, 2)), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location2(na_dbl, 2)), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location2(c(1, 2), 2)), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(c(TRUE, FALSE), 3)), class = "vctrs_error_subscript_size" )) (expect_error( with_tibble_cols(vec_as_location(c(-1, NA), 3)), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(vec_as_location(c(-1, 1), 3)), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_cols(num_as_location(c(1, 4), 2, oob = "extend")), class = "vctrs_error_subscript_oob" )) (expect_error( with_tibble_cols(num_as_location(0, 1, zero = "error")), class = "vctrs_error_subscript_type" )) }) }) test_that("can customise OOB errors", { expect_snapshot({ (expect_error( vec_slice(set_names(letters), "foo"), class = "vctrs_error_subscript_oob" )) "With custom `arg`" (expect_error( vec_as_location( 30, length(letters), arg = "foo", call = call("my_function") ), class = "vctrs_error_subscript_oob" )) (expect_error( vec_as_location( "foo", NULL, letters, arg = "foo", call = call("my_function") ), class = "vctrs_error_subscript_oob" )) "With tibble columns" (expect_error( with_tibble_cols(vec_slice(set_names(letters), "foo")), class = "vctrs_error_subscript_oob" )) (expect_error( with_tibble_cols(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob" )) (expect_error( with_tibble_cols(vec_slice(set_names(letters), -30)), class = "vctrs_error_subscript_oob" )) "With tibble rows" (expect_error( with_tibble_rows(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob" )) (expect_error( with_tibble_rows(vec_slice(set_names(letters), 1:30)), class = "vctrs_error_subscript_oob" )) (expect_error( with_tibble_rows(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob" )) "With tidyselect select" (expect_error( with_tidyselect_select(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob" )) (expect_error( with_tidyselect_select(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob" )) (expect_error( with_tidyselect_select(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob" )) "With tidyselect relocate" (expect_error( with_tidyselect_relocate(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob" )) (expect_error( with_tidyselect_relocate(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob" )) (expect_error( with_tidyselect_relocate(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob" )) }) }) test_that("num_as_location() requires non-S3 inputs", { expect_error(num_as_location(factor("foo"), 2), "must be a numeric vector") }) test_that("vec_as_location() checks dimensionality", { expect_snapshot({ (expect_error( vec_as_location(matrix(TRUE, nrow = 1), 3L), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_location(array(TRUE, dim = c(1, 1, 1)), 3L), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_rows(vec_as_location(matrix(TRUE, nrow = 1), 3L)), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_location() works with vectors of dimensionality 1", { expect_identical(vec_as_location(array(TRUE, dim = 1), 3L), 1:3) }) test_that("vec_as_location() UI", { expect_snapshot(error = TRUE, vec_as_location(1, 1L, missing = "bogus")) }) test_that("num_as_location() UI", { expect_snapshot(error = TRUE, num_as_location(1, 1L, missing = "bogus")) expect_snapshot(error = TRUE, num_as_location(1, 1L, negative = "bogus")) expect_snapshot(error = TRUE, num_as_location(1, 1L, oob = "bogus")) expect_snapshot(error = TRUE, num_as_location(1, 1L, zero = "bogus")) }) test_that("vec_as_location2() UI", { expect_snapshot(error = TRUE, vec_as_location2(1, 1L, missing = "bogus")) }) test_that("vec_as_location() evaluates arg lazily", { expect_silent(vec_as_location(1L, 1L, arg = print("oof"))) }) test_that("vec_as_location2() evaluates arg lazily", { expect_silent(vec_as_location2(1L, 1L, arg = print("oof"))) expect_silent(vec_as_location2_result( 1L, 1L, names = NULL, arg = print("oof"), missing = "error", negative = "error", call = NULL )) }) vctrs/tests/testthat/test-match.R0000644000176200001440000021106115113325071016610 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_locate_matches() - logicals test_that("isn't confused by unspecified logical vectors", { x <- vec_locate_matches(logical(), NA) expect_identical(x$needles, integer()) expect_identical(x$haystack, integer()) x <- vec_locate_matches(NA, logical()) expect_identical(x$needles, 1L) expect_identical(x$haystack, NA_integer_) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - integers test_that("can match in increasing order", { x <- vec_locate_matches(1:2, 1:3) expect_identical(x$needles, 1:2) expect_identical(x$haystack, 1:2) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - doubles test_that("can match doubles", { x <- vec_locate_matches(c(1, 2, 5), c(2, 2, 3, 1)) expect_identical(x$needles, c(1L, 2L, 2L, 3L)) expect_identical(x$haystack, c(4L, 1L, 2L, NA)) }) test_that("can match Inf and -Inf with all conditions", { x <- c(Inf, -Inf) y <- c(-Inf, 0, Inf) res <- vec_locate_matches(x, y, condition = "==") expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(3L, 1L)) res <- vec_locate_matches(x, y, condition = "<") expect_identical(res$needles, c(1L, 2L, 2L)) expect_identical(res$haystack, c(NA, 2L, 3L)) res <- vec_locate_matches(x, y, condition = "<=") expect_identical(res$needles, c(1L, 2L, 2L, 2L)) expect_identical(res$haystack, c(3L, 1L, 2L, 3L)) res <- vec_locate_matches(x, y, condition = ">") expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(1L, 2L, NA)) res <- vec_locate_matches(x, y, condition = ">=") expect_identical(res$needles, c(1L, 1L, 1L, 2L)) expect_identical(res$haystack, c(1L, 2L, 3L, 1L)) }) test_that("NA and NaN don't match numbers with equality conditions", { expect_identical(vec_locate_matches(1, NA_real_)$haystack, NA_integer_) expect_identical(vec_locate_matches(1, NaN)$haystack, NA_integer_) expect_identical(vec_locate_matches(NA_real_, 1)$haystack, NA_integer_) expect_identical(vec_locate_matches(NaN, 1)$haystack, NA_integer_) }) test_that("NA and NaN are the same by default", { res <- vec_locate_matches(NA_real_, NaN) expect_identical(res$needles, 1L) expect_identical(res$haystack, 1L) res <- vec_locate_matches(NaN, NA_real_) expect_identical(res$needles, 1L) expect_identical(res$haystack, 1L) res <- vec_locate_matches(c(NaN, NA, NaN), c(NA, NaN, NA), condition = "==") expect_identical(res$needles, rep(c(1L, 2L, 3L), each = 3)) expect_identical(res$haystack, rep(c(1L, 2L, 3L), times = 3)) res <- vec_locate_matches( c(NaN, NA, NaN), c(NA, NaN, NA), condition = "==", multiple = "first" ) expect_identical(res$needles, c(1L, 2L, 3L)) expect_identical(res$haystack, c(1L, 1L, 1L)) }) test_that("NA and NaN are distinct if requested", { res <- vec_locate_matches( c(NaN, NA, NaN), c(NA, NaN, NA), condition = "==", nan_distinct = TRUE ) expect_identical(res$needles, c(1L, 2L, 2L, 3L)) expect_identical(res$haystack, c(2L, 1L, 3L, 2L)) res <- vec_locate_matches( c(NaN, NA, NaN), c(NA, NaN, NA), condition = "==", multiple = "first", nan_distinct = TRUE ) expect_identical(res$needles, c(1L, 2L, 3L)) expect_identical(res$haystack, c(2L, 1L, 2L)) }) test_that("NA and NaN match each other in non-equi conditions by default", { res <- vec_locate_matches( c(NaN, NA, NaN, 1), c(NA, NaN, NA), condition = ">=", nan_distinct = FALSE ) expect_identical(res$needles, c(rep(c(1L, 2L, 3L), each = 3), 4L)) expect_identical(res$haystack, c(rep(c(1L, 2L, 3L), times = 3), NA)) res <- vec_locate_matches( c(NaN, NA, NaN, 1), c(NA, NaN, NA), condition = "<=", nan_distinct = FALSE ) expect_identical(res$needles, c(rep(c(1L, 2L, 3L), each = 3), 4L)) expect_identical(res$haystack, c(rep(c(1L, 2L, 3L), times = 3), NA)) }) test_that("NA and NaN never match each other in non-equi conditions if treated as distinct", { res <- vec_locate_matches( c(NaN, NA, NaN), c(NA, NaN, NA), condition = ">=", nan_distinct = TRUE ) expect_identical(res$needles, c(1L, 2L, 2L, 3L)) expect_identical(res$haystack, c(2L, 1L, 3L, 2L)) res <- vec_locate_matches( c(NaN, NA, NaN), c(NA, NaN, NA), condition = "<=", nan_distinct = TRUE ) expect_identical(res$needles, c(1L, 2L, 2L, 3L)) expect_identical(res$haystack, c(2L, 1L, 3L, 2L)) }) test_that("NA and NaN are both considered incomplete no matter the value of `nan_distinct`", { res <- vec_locate_matches( c(NA, NaN), c(NA, NaN), incomplete = NA, nan_distinct = FALSE ) expect_identical(res$needles, 1:2) expect_identical(res$haystack, c(NA_integer_, NA_integer_)) res <- vec_locate_matches( c(NA, NaN), c(NA, NaN), incomplete = NA, nan_distinct = TRUE ) expect_identical(res$needles, 1:2) expect_identical(res$haystack, c(NA_integer_, NA_integer_)) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - complex test_that("complex can be matched", { x <- complex(real = 1, imaginary = c(1, 2)) y <- complex(real = 1, imaginary = c(1, 1, 3)) z <- complex(real = 2, imaginary = 1) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(1L, 2L, NA)) res <- vec_locate_matches(x, z) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(NA_integer_, NA_integer_)) }) test_that("complex order lexicographically", { x <- complex(real = 1, imaginary = c(1, 2, 5)) y <- complex(real = 1, imaginary = c(1, 4, 3)) res <- vec_locate_matches(x, y, condition = "<") expect_identical(res$needles, c(1L, 1L, 2L, 2L, 3L)) expect_identical(res$haystack, c(2L, 3L, 2L, 3L, NA)) }) test_that("complex incomplete values match correctly", { x <- complex(real = c(NA, NA, NaN, NaN), imaginary = c(NA, NaN, NA, NaN)) y <- complex(real = c(NA, NA, NaN, NaN), imaginary = c(NA, NaN, NA, NaN)) # Missings can match, and all missing values should be treated equally res <- vec_locate_matches( x, y, condition = "==", incomplete = "compare", nan_distinct = FALSE ) expect_identical(res$needles, rep(1:4, each = 4)) expect_identical(res$haystack, rep(1:4, times = 4)) res <- vec_locate_matches( x, y, condition = "==", incomplete = "match", nan_distinct = FALSE ) expect_identical(res$needles, rep(1:4, each = 4)) expect_identical(res$haystack, rep(1:4, times = 4)) # Missings can match, but all combinations are different res <- vec_locate_matches( x, y, condition = "==", incomplete = "compare", nan_distinct = TRUE ) expect_identical(res$needles, 1:4) expect_identical(res$haystack, 1:4) res <- vec_locate_matches( x, y, condition = "==", incomplete = "match", nan_distinct = TRUE ) expect_identical(res$needles, 1:4) expect_identical(res$haystack, 1:4) # Missings don't match res <- vec_locate_matches(x, y, condition = "==", incomplete = NA) expect_identical(res$needles, 1:4) expect_identical(res$haystack, rep(NA_integer_, 4)) # Missings don't match, but are never considered no-matches expect_identical( vec_locate_matches( x, y, condition = "==", incomplete = NA, no_match = "error" ), vec_locate_matches(x, y, condition = "==", incomplete = NA) ) }) test_that("complex missing values are always grouped together (#1403)", { # Unlike data frames and rcrd types, for complex vectors if either element # is missing then the whole observation is normalised to have both components # be missing. This means `1+NAi` matches `2+NAi`. It also matches `2+NaNi` # unless `nan_distinct = TRUE`. x <- complex(real = c(1, 1, 2, 2, 2), imaginary = c(NA, 1, NA, 2, NaN)) y <- x[-1] res <- vec_locate_matches(x, y, condition = ">=") expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L, 4L, 4L, 5L, 5L)) expect_identical(res$haystack, c(2L, 4L, 1L, 2L, 4L, 1L, 3L, 2L, 4L)) }) test_that("behavior with complex missing values matches base R", { x <- complex(real = c(1, 1, 2, 2, 2), imaginary = c(NA, 1, NA, 2, NaN)) expect_identical( vec_locate_matches(x, x, nan_distinct = TRUE, multiple = "first")$haystack, match(x, x) ) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - characters test_that("character ordering is done in the C locale", { x <- c("a", "A") y <- c("a", "A", "b", "B") # a < b, but a > A and a > B res <- vec_locate_matches(x, y, condition = "<") expect_identical(res$needles, c(1L, 2L, 2L, 2L)) expect_identical(res$haystack, c(3L, 1L, 3L, 4L)) }) test_that("`chr_proxy_collate` can affect the matching process", { x <- c("a", "A") y <- c("a", "A") res <- vec_locate_matches(x, y, condition = "==") expect_identical(res$needles, 1:2) expect_identical(res$haystack, 1:2) res <- vec_locate_matches(x, y, condition = "==", chr_proxy_collate = tolower) expect_identical(res$needles, c(1L, 1L, 2L, 2L)) expect_identical(res$haystack, c(1L, 2L, 1L, 2L)) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - lists test_that("lists can be matched", { x <- list(1, 2, 1, NULL) y <- list(1, 1, 3, NULL) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L, 4L)) expect_identical(res$haystack, c(1L, 2L, NA, 1L, 2L, 4L)) }) test_that("list incompleteness is detected", { res <- vec_locate_matches(list(NULL), list(NULL), incomplete = NA) expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) }) test_that("list ordering is by first appearance in `needles` (so non-equi joins don't make much sense)", { x <- list(3, 2, 1, NULL) y <- list(1, 3, 1, 3) res <- vec_locate_matches(x, y, condition = ">") # x[1] appears first, so it isn't greater than anything # x[2] is greater than x[1] (when x[1] is in y) # and so on... # NULL still doesn't match anything expect_identical(res$needles, c(1L, 2L, 2L, 3L, 3L, 4L)) expect_identical(res$haystack, c(NA, 2L, 4L, 2L, 4L, NA)) # With data frame columns containing list-columns df1 <- data_frame(col = data_frame(x = x)) df2 <- data_frame(col = data_frame(x = y)) expect_identical(vec_locate_matches(x, y, condition = ">"), res) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - data frame test_that("can match with 1 column data frames", { df1 <- data_frame(x = c(1L, 3L, 1L, 3L)) df2 <- data_frame(x = c(1L, 3L, 1L)) expect_identical( vec_locate_matches(df1, df2), vec_locate_matches(df1$x, df2$x) ) }) test_that("can match with >1 column data frames", { df1 <- data_frame(x = c(1L, 3L, 1L, 3L), y = c(1L, 4L, 1L, 2L)) df2 <- data_frame(x = c(1L, 3L, 1L), y = c(1L, 2L, 1L)) res <- vec_locate_matches(df1, df2, condition = c("==", "==")) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L, 4L)) expect_identical(res$haystack, c(1L, 3L, NA, 1L, 3L, 2L)) }) test_that("can match with df-cols of varying types", { y <- c(1L, 1L) expect_needles <- c(1L, 2L) expect_haystack <- c(NA, 1L) df1 <- data_frame(x = data_frame(x = c(2L, 1L), y = y)) df2 <- data_frame(x = data_frame(x = c(1L, 3L), y = y)) res <- vec_locate_matches(df1, df2) expect_identical(res$needles, expect_needles) expect_identical(res$haystack, expect_haystack) df1 <- data_frame(x = data_frame(x = c(2, 1), y = y)) df2 <- data_frame(x = data_frame(x = c(1, 3), y = y)) res <- vec_locate_matches(df1, df2) expect_identical(res$needles, expect_needles) expect_identical(res$haystack, expect_haystack) df1 <- data_frame(x = data_frame(x = c(TRUE, FALSE), y = y)) df2 <- data_frame(x = data_frame(x = c(FALSE, NA), y = y)) res <- vec_locate_matches(df1, df2) expect_identical(res$needles, expect_needles) expect_identical(res$haystack, expect_haystack) df1 <- data_frame(x = data_frame(x = c("x", "y"), y = y)) df2 <- data_frame(x = data_frame(x = c("y", "z"), y = y)) res <- vec_locate_matches(df1, df2) expect_identical(res$needles, expect_needles) expect_identical(res$haystack, expect_haystack) df1 <- data_frame( x = data_frame(x = complex(real = c(1, 2), imaginary = c(2, 1)), y = y) ) df2 <- data_frame( x = data_frame(x = complex(real = c(2, 3), imaginary = c(1, 1)), y = y) ) res <- vec_locate_matches(df1, df2) expect_identical(res$needles, expect_needles) expect_identical(res$haystack, expect_haystack) }) test_that("ensure that matching works if outer runs are present (i.e. `==` comes before non-equi condition)", { df1 <- data_frame(x = c(1, 2, 1, 1), y = c(2, 2, 3, 2)) df2 <- data_frame(x = c(1, 1), y = c(2, 3)) res <- vec_locate_matches(df1, df2, condition = c("==", "<=")) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 4L, 4L)) expect_identical(res$haystack, c(1L, 2L, NA, 2L, 1L, 2L)) df1$z <- c(1L, 2L, 1L, 3L) df2$z <- c(5L, 2L) res <- vec_locate_matches(df1, df2, condition = c("==", "==", "<")) expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(1L, NA, 2L, 1L)) res <- vec_locate_matches(df1, df2, condition = c("==", ">=", "<")) expect_identical(res$needles, c(1L, 2L, 3L, 3L, 4L)) expect_identical(res$haystack, c(1L, NA, 1L, 2L, 1L)) }) test_that("df-cols propagate an NA if any columns are incomplete", { df <- data_frame(x = 1, y = data_frame(x = c(1, 1, NA), y = c(1, NA, 2))) res <- vec_locate_matches(df, df, incomplete = "compare") expect_identical(res$needles, 1:3) expect_identical(res$haystack, 1:3) res <- vec_locate_matches(df, df, incomplete = "match") expect_identical(res$needles, 1:3) expect_identical(res$haystack, 1:3) # 2nd and 3rd rows aren't fully complete res <- vec_locate_matches(df, df, incomplete = NA) expect_identical(res$needles, 1:3) expect_identical(res$haystack, c(1L, NA, NA)) res <- vec_locate_matches(df, df, incomplete = "drop") expect_identical(res$needles, 1L) expect_identical(res$haystack, 1L) }) test_that("df-cols aren't flattened, so `condition` is applied jointly on the df-col columns", { x <- data_frame(a = 1L, b = data_frame(x = 3L, y = 4L)) y <- data_frame(a = 1L, b = data_frame(x = 2L, y = 5L)) # In particular `x$b[1,] > y$b[1,]` because `3 > 4` and that breaks the tie # before any values of the `x$b$y` column are checked res <- vec_locate_matches(x, y, condition = c("==", ">")) expect_identical(res$needles, 1L) expect_identical(res$haystack, 1L) }) test_that("must have at least 1 column to match", { expect_snapshot(error = TRUE, { vec_locate_matches(data_frame(), data_frame()) }) expect_snapshot(error = TRUE, { vec_locate_matches(data_frame(), data_frame(), error_call = call("foo")) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - rcrd test_that("rcrd types can be matched", { x <- new_rcrd(list(x = c(1L, 3L), y = c(1L, 4L))) y <- new_rcrd(list(x = c(1L, 2L), y = c(1L, 5L))) res <- vec_locate_matches(x, y, condition = "<=") expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(1L, 2L, NA)) # In particular: `(3, 4) > (2, 5)` since the first elt breaks the tie res <- vec_locate_matches(x, y, condition = ">") expect_identical(res$needles, c(1L, 2L, 2L)) expect_identical(res$haystack, c(NA, 1L, 2L)) }) test_that("rcrd type matching works with rcrd-cols", { x <- data_frame( a = c(1L, 1L), b = new_rcrd(list(x = c(1L, 3L), y = c(1L, 4L))) ) y <- data_frame( a = c(1L, 1L), b = new_rcrd(list(x = c(1L, 2L), y = c(1L, 5L))) ) res <- vec_locate_matches(x, y, condition = c("==", "<=")) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(1L, 2L, NA)) res <- vec_locate_matches(x, y, condition = c("==", ">")) expect_identical(res$needles, c(1L, 2L, 2L)) expect_identical(res$haystack, c(NA, 1L, 2L)) }) test_that("rcrd type incompleteness is handled correctly", { x <- new_rcrd(list(x = c(1L, NA), y = c(NA_integer_, NA_integer_))) y <- new_rcrd(list(x = c(1L, 2L, NA), y = c(NA, 5L, NA))) # When `incomplete = "compare"`, the types of incompleteness still must # match exactly to have a match. i.e. (x=1L, y=NA) doesn't match (x=NA, y=1L). # This is the same as the rule for data frames. res <- vec_locate_matches(x, y, condition = "==", incomplete = "compare") expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(1L, 3L)) res <- vec_locate_matches(x, y, condition = "==", incomplete = "match") expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(1L, 3L)) # If any field contains NA, the entire observation is incomplete. res <- vec_locate_matches(x, y, condition = "==", incomplete = NA) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(NA_integer_, NA_integer_)) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - S3 test_that("S3 types with order proxies that depend on the data are combined before the proxy is taken", { # i.e. `bignum:::vec_proxy_order.bignum_biginteger()` x <- structure(c(5L, 1L), class = "foo") y <- structure(c(8L, 5L), class = "foo") local_methods( vec_proxy_order.foo = function(x, ...) { rank(unclass(x)) } ) # Can't take the order proxies separately because they are the same! expect_identical(vec_proxy_order(x), vec_proxy_order(y)) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, NA)) x_df <- data_frame(a = x, b = x) y_df <- data_frame(a = y, b = y) res <- vec_locate_matches(x_df, y_df) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, NA)) }) test_that("Works with base R S3 types we support natively", { x <- new_factor(c(1L, 2L), levels = c("x", "y")) y <- new_factor(c(3L, 1L, 1L), levels = c("x", "y", "z")) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, NA)) x <- new_ordered(c(1L, 2L), levels = c("x", "y")) y <- new_ordered(c(2L, 1L, 1L), levels = c("x", "y")) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, 1L)) x <- new_date(c(1, 2)) y <- new_date(c(3, 1, 1)) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, NA)) x <- new_datetime(c(1, 2)) y <- new_datetime(c(3, 1, 1)) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, NA)) x <- as.POSIXlt(new_datetime(c(1, 2))) y <- as.POSIXlt(new_datetime(c(3, 1, 1))) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, NA)) }) test_that("Works with classed data frame columns", { x_col <- new_data_frame(list(a = c(1L, 2L), b = c(2, 3)), class = "foo") y_col <- new_data_frame( list(a = c(1L, 1L, 1L), b = c(2, 4, 2)), class = "foo" ) x <- new_data_frame(list(c = c(1L, 1L), d = x_col)) y <- new_data_frame(list(c = c(1L, 1L, 1L), d = y_col)) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(1L, 3L, NA)) }) test_that("AsIs types are combined before order proxies are taken (#1557)", { x <- I(list(5, 1)) y <- I(list(8, 5, 5)) res <- vec_locate_matches(x, y) expect_identical(res$needles, c(1L, 1L, 2L)) expect_identical(res$haystack, c(2L, 3L, NA)) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - ptype2 / casting test_that("common type of `needles` and `haystack` is taken", { x <- 1 y <- "a" expect_snapshot(error = TRUE, { vec_locate_matches(x, y) }) expect_snapshot(error = TRUE, { vec_locate_matches(x, y, needles_arg = "x", error_call = call("foo")) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - missing values test_that("integer missing values match with `==`, `>=`, and `<=` when `incomplete = 'compare'", { res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "==") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "<=") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = ">=") expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = "<") expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) res <- vec_locate_matches(NA_integer_, c(1L, NA, 2L, NA), condition = ">") expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) }) test_that("integer missing values can match with any condition when `incomplete = 'match'`", { res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = "==", incomplete = "match" ) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = "<=", incomplete = "match" ) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = ">=", incomplete = "match" ) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = "<", incomplete = "match" ) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = ">", incomplete = "match" ) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) }) test_that("integer missing values report all matches even with a `filter`", { res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = "<=", filter = "min" ) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = "<", filter = "min", incomplete = "match" ) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = ">=", filter = "max" ) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = ">", filter = "max", incomplete = "match" ) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(2L, 4L)) }) test_that("integer missing value matches can be limited by `multiple`", { res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = "<=", multiple = "first" ) expect_identical(res$needles, 1L) expect_identical(res$haystack, 2L) res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = "<=", multiple = "last" ) expect_identical(res$needles, 1L) expect_identical(res$haystack, 4L) res <- vec_locate_matches( NA_integer_, c(1L, NA, 2L, NA), condition = "<=", multiple = "any" ) expect_identical(res$needles, 1L) expect_identical(res$haystack, 2L) }) test_that("missing values match within columns", { df1 <- data_frame(x = c(1L, 2L, 1L), y = rep(NA_integer_, 3)) df2 <- data_frame(x = c(2L, 1L, 1L), y = c(1L, NA, NA)) res <- vec_locate_matches(df1, df2, condition = c("==", "==")) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L)) expect_identical(res$haystack, c(2L, 3L, NA, 2L, 3L)) expect_identical( vec_locate_matches(df1, df2, condition = c("<=", ">=")), vec_locate_matches(df1, df2, condition = c("==", "==")) ) res <- vec_locate_matches(df1, df2, condition = c("<", ">")) expect_identical(res$needles, 1:3) expect_identical(res$haystack, rep(NA_integer_, 3)) res <- vec_locate_matches( df1, df2, condition = c("<=", ">"), incomplete = "compare" ) expect_identical(res$needles, 1:3) expect_identical(res$haystack, rep(NA_integer_, 3)) res <- vec_locate_matches( df1, df2, condition = c("<=", ">"), incomplete = "match" ) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L)) expect_identical(res$haystack, c(2L, 3L, NA, 2L, 3L)) }) test_that("missing values being 'match'ed hands off correctly to next column", { df1 <- data_frame(x = c(NA, NA, 1L, 2L, NA), y = c(2, 3, 0, 1, NA)) df2 <- data_frame(x = c(NA, NA, NA, 3L), y = c(2, 1, NA, 0)) res <- vec_locate_matches( df1, df2, condition = c("<", ">"), incomplete = "match" ) expect_identical(res$needles, c(1L, 2L, 2L, 3L, 4L, 5L)) expect_identical(res$haystack, c(2L, 1L, 2L, NA, 4L, 3L)) }) test_that("integer needles can't match NAs in the haystack", { # At the C level, 1L > NA_integer_ (INT_MIN), # but we are careful to work around this res <- vec_locate_matches(1L, c(1L, NA, 2L, NA), condition = ">=") expect_identical(res$needles, 1L) expect_identical(res$haystack, 1L) res <- vec_locate_matches(1L, c(1L, NA, 2L, NA), condition = ">") expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) }) test_that("double needles can't match NAs or NaNs in the haystack", { # At the C level, our helpers assumg NA and NaN are the smallest values, # so we are careful to avoid including them with >= and > conditions res <- vec_locate_matches(1, c(1, NA, 2, NaN), condition = ">=") expect_identical(res$needles, 1L) expect_identical(res$haystack, 1L) res <- vec_locate_matches(1, c(1, NA, 2, NaN), condition = ">") expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) }) test_that("NA and NaN match correctly with non-equi conditions and `nan_distinct`", { res <- vec_locate_matches( c(NA, NaN), c(1L, NA, 2L, NaN), condition = "<=", nan_distinct = TRUE ) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches( c(NA, NaN), c(1L, NA, 2L, NaN), condition = "<", nan_distinct = TRUE ) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(NA_integer_, NA_integer_)) res <- vec_locate_matches( c(NA, NaN), c(1L, NA, 2L, NaN), condition = "<", nan_distinct = TRUE, incomplete = "match" ) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, 4L)) res <- vec_locate_matches( c(NA, NaN), c(1L, NA, 2L, NaN), condition = ">=", nan_distinct = FALSE ) expect_identical(res$needles, c(1L, 1L, 2L, 2L)) expect_identical(res$haystack, c(2L, 4L, 2L, 4L)) res <- vec_locate_matches( c(NA, NaN), c(1L, NA, 2L, NaN), condition = ">", nan_distinct = FALSE, incomplete = "match" ) expect_identical(res$needles, c(1L, 1L, 2L, 2L)) expect_identical(res$haystack, c(2L, 4L, 2L, 4L)) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `incomplete` test_that("can handle incomplete needles with `incomplete = `", { x <- c(1L, NA, 2L) y <- c(NA, 1L, 1L) res <- vec_locate_matches(x, y, condition = "==", incomplete = NA) expect_identical(res$needles, c(1L, 1L, 2L, 3L)) expect_identical(res$haystack, c(2L, 3L, NA, NA)) res <- vec_locate_matches(x, y, condition = "<=", incomplete = 0L) expect_identical(res$needles, c(1L, 1L, 2L, 3L)) expect_identical(res$haystack, c(2L, 3L, 0L, NA)) res <- vec_locate_matches(x, y, condition = ">=", incomplete = -1L) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L)) expect_identical(res$haystack, c(2L, 3L, -1L, 2L, 3L)) }) test_that("can drop incomplete needle rows with `incomplete = 'drop'", { x <- c(1L, NA, 2L) y <- c(NA, 1L, 1L) res <- vec_locate_matches(x, y, condition = "==", incomplete = "drop") expect_identical(res$needles, c(1L, 1L, 3L)) expect_identical(res$haystack, c(2L, 3L, NA)) }) test_that("if `incomplete = `, an NA in any column results in the value", { df1 <- data_frame(x = c(1L, NA, 2L, 1L, 1L), y = c(2L, 2L, NA, 1L, 1L)) df2 <- data_frame(x = c(1L, 1L, 2L), y = c(1L, 1L, NA)) res <- vec_locate_matches( df1, df2, condition = c("==", "=="), incomplete = NA ) expect_identical(res$needles, c(1L, 2L, 3L, 4L, 4L, 5L, 5L)) expect_identical(res$haystack, c(NA, NA, NA, 1L, 2L, 1L, 2L)) res <- vec_locate_matches( df1, df2, condition = c(">=", ">="), incomplete = NA ) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 4L, 4L, 5L, 5L)) expect_identical(res$haystack, c(1L, 2L, NA, NA, 1L, 2L, 1L, 2L)) }) test_that("`incomplete = / 'drop'` still handles NAs in future columns when an earlier column has no matches", { df1 <- data_frame(x = c(1, 1, 2, 3), y = c(1, NA, NA, 4)) df2 <- data_frame(x = c(1, 3), y = c(1, 5)) # The 2 in row 3 of df1 has no match, but the NA in the 2nd column still propagates res <- vec_locate_matches(df1, df2, incomplete = NA, no_match = -1L) expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(1L, NA, NA, -1L)) res <- vec_locate_matches(df1, df2, incomplete = "drop", no_match = -1L) expect_identical(res$needles, c(1L, 4L)) expect_identical(res$haystack, c(1L, -1L)) # The 1 in row 1 and 2 of df1 have no match, but the NA in row 2 of the 2nd column propagates res <- vec_locate_matches( df1, df2, incomplete = NA, no_match = -1L, condition = ">" ) expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(-1L, NA, NA, 1L)) res <- vec_locate_matches( df1, df2, incomplete = "drop", no_match = -1L, condition = ">" ) expect_identical(res$needles, c(1L, 4L)) expect_identical(res$haystack, c(-1L, 1L)) }) test_that("`incomplete` can error informatively", { expect_snapshot({ (expect_error(vec_locate_matches(NA, 1, incomplete = "error"))) (expect_error(vec_locate_matches( NA, 1, incomplete = "error", needles_arg = "foo" ))) (expect_error(vec_locate_matches( NA, 1, incomplete = "error", needles_arg = "foo", error_call = call("fn") ))) }) }) test_that("`incomplete` error is classed", { expect_error( vec_locate_matches(NA, 1, incomplete = "error"), class = "vctrs_error_matches_incomplete" ) }) test_that("`incomplete` is validated", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, incomplete = 1.5))) (expect_error(vec_locate_matches(1, 2, incomplete = c("match", "drop")))) (expect_error(vec_locate_matches(1, 2, incomplete = "x"))) # Uses internal call (expect_error(vec_locate_matches( 1, 2, incomplete = "x", error_call = call("fn") ))) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `condition` test_that("multiple matches from a non-equi condition are returned in first appearance order", { res <- vec_locate_matches(0L, c(1L, 0L, -1L, 0L), condition = "<=") expect_identical(res$needles, rep(1L, 3)) expect_identical(res$haystack, c(1L, 2L, 4L)) # Checking equi for good measure res <- vec_locate_matches(0L, c(1L, 0L, -1L, 0L), condition = "==") expect_identical(res$needles, rep(1L, 2)) expect_identical(res$haystack, c(2L, 4L)) }) test_that("multiple matches from a non-equi condition are returned in first appearance order when the matches are in different nesting containers", { df <- data_frame(x = 0, y = 0) df2 <- data_frame(x = 2:1, y = 1:2) res <- vec_locate_matches(df, df2, condition = c("<=", "<=")) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(1L, 2L)) }) test_that("`condition` is validated", { expect_error( vec_locate_matches(1, 2, condition = 1), "`condition` must be a character vector" ) expect_error( vec_locate_matches(1, 2, condition = "x"), 'must only contain "==", ">", ">=", "<", or "<="' ) expect_error( vec_locate_matches(1, 2, condition = c("==", "==")), "must be length 1, or the same length as the number of columns of the input" ) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `multiple` test_that("can get all matches", { x <- vec_locate_matches(c(1L, 3L), c(1L, 3L, 1L, 3L), multiple = "all") expect_identical(x$needles, c(1L, 1L, 2L, 2L)) expect_identical(x$haystack, c(1L, 3L, 2L, 4L)) }) test_that("can get first match", { x <- vec_locate_matches(c(1L, 3L), c(1L, 3L, 1L, 3L), multiple = "first") expect_identical(x$needles, 1:2) expect_identical(x$haystack, 1:2) }) test_that("can get last match", { x <- vec_locate_matches(c(1L, 3L), c(1L, 3L, 1L, 3L), multiple = "last") expect_identical(x$needles, 1:2) expect_identical(x$haystack, 3:4) }) test_that("can get any match", { x <- vec_locate_matches(c(1L, 3L), c(1L, 3L, 1L, 3L), multiple = "any") expect_identical(x$needles, 1:2) expect_identical(x$haystack, 1:2) }) test_that("duplicate needles match the same haystack locations", { x <- vec_locate_matches(c(1L, 3L, 1L, 3L), c(1L, 3L, 1L), multiple = "all") expect_identical(x$needles, c(1L, 1L, 2L, 3L, 3L, 4L)) expect_identical(x$haystack, c(1L, 3L, 2L, 1L, 3L, 2L)) }) test_that("correctly gets all matches when they come from different nesting containers", { needles <- data_frame( a = c(1, 8), b = c(2, 9) ) haystack <- data_frame( a = c(6, 5), b = c(6, 7) ) expect_identical( vec_locate_matches(needles, haystack, condition = "<", multiple = "all"), data_frame(needles = c(1L, 1L, 2L), haystack = c(1L, 2L, NA)) ) }) test_that("correctly gets first/last/any match when they come from different nesting containers", { needles <- data_frame( a = c(1, 8), b = c(2, 9) ) haystack <- data_frame( a = c(6, 5, 0), b = c(6, 7, 1) ) expect_identical( vec_locate_matches(needles, haystack, condition = "<", multiple = "first"), data_frame(needles = c(1L, 2L), haystack = c(1L, NA)) ) expect_identical( vec_locate_matches(needles, haystack, condition = "<", multiple = "last"), data_frame(needles = c(1L, 2L), haystack = c(2L, NA)) ) expect_identical( vec_locate_matches(needles, haystack, condition = "<", multiple = "any"), data_frame(needles = c(1L, 2L), haystack = c(2L, NA)) ) expect_identical( vec_locate_matches( needles, haystack, condition = "<", multiple = "first", remaining = NA_integer_ ), data_frame(needles = c(1L, 2L, NA, NA), haystack = c(1L, NA, 2L, 3L)) ) expect_identical( vec_locate_matches( needles, haystack, condition = "<", multiple = "last", remaining = NA_integer_ ), data_frame(needles = c(1L, 2L, NA, NA), haystack = c(2L, NA, 1L, 3L)) ) expect_identical( vec_locate_matches( needles, haystack, condition = "<", multiple = "any", remaining = NA_integer_ ), data_frame(needles = c(1L, 2L, NA, NA), haystack = c(2L, NA, 1L, 3L)) ) }) test_that("`multiple` is validated", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, multiple = 1.5))) (expect_error(vec_locate_matches(1, 2, multiple = c("first", "last")))) (expect_error(vec_locate_matches(1, 2, multiple = "x"))) # Uses internal error (expect_error(vec_locate_matches( 1, 2, multiple = "x", error_call = call("fn") ))) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `multiple` (deprecated) test_that("`multiple` can error informatively", { expect_snapshot({ (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error"))) (expect_error(vec_locate_matches( 1L, c(1L, 1L), multiple = "error", needles_arg = "foo" ))) (expect_error(vec_locate_matches( 1L, c(1L, 1L), multiple = "error", needles_arg = "foo", error_call = call("fn") ))) (expect_error(vec_locate_matches( 1L, c(1L, 1L), multiple = "error", needles_arg = "foo", haystack_arg = "bar" ))) }) }) test_that("`multiple` can warn informatively", { expect_snapshot({ (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning"))) (expect_warning(vec_locate_matches( 1L, c(1L, 1L), multiple = "warning", needles_arg = "foo" ))) (expect_warning(vec_locate_matches( 1L, c(1L, 1L), multiple = "warning", needles_arg = "foo", error_call = call("fn") ))) (expect_warning(vec_locate_matches( 1L, c(1L, 1L), multiple = "warning", needles_arg = "foo", haystack_arg = "bar" ))) }) }) test_that("warning falls back to 'all'", { expect_warning( result <- vec_locate_matches( c(1L, 3L, 1L, 3L), c(1L, 3L, 1L), multiple = "warning" ), class = "vctrs_warning_matches_multiple" ) expect_identical( result, vec_locate_matches(c(1L, 3L, 1L, 3L), c(1L, 3L, 1L), multiple = "all") ) }) test_that("errors on multiple matches that come from different nesting containers", { df <- data_frame(x = 0, y = 0) df2 <- data_frame(x = 1:2, y = 2:1) expect_snapshot(error = TRUE, { vec_locate_matches(df, df2, condition = c("<=", "<="), multiple = "error") }) }) test_that("errors when a match from a different nesting container is processed early on", { # Row 1 has 2 matches # Row 2 has 0 matches needles <- data_frame( a = c(1, 8), b = c(2, 9) ) # Rows 1 and 2 end up in different nesting containers haystack <- data_frame( a = c(5, 6), b = c(7, 6) ) # needles[1,] records the haystack[1,] match first, which is in the 1st # value of `loc_first_match_o_haystack`, then records the haystack[3,] match # which is in the 3rd value of `loc_first_match_o_haystack` even though it # is processed 2nd (i.e. we need to use `loc` rather than `i` when detecting # multiple matches) expect_snapshot(error = TRUE, { vec_locate_matches(needles, haystack, condition = "<", multiple = "error") }) }) test_that("`multiple = 'error'` doesn't error errneously on the last observation", { expect_error(res <- vec_locate_matches(1:2, 1:2, multiple = "error"), NA) expect_identical(res$needles, 1:2) expect_identical(res$haystack, 1:2) }) test_that("`multiple = 'error' / 'warning'` throw correctly when combined with `relationship`", { x <- c(1, 2, 2) y <- c(2, 1, 2) # `multiple` error technically fires first expect_snapshot({ (expect_error(vec_locate_matches( x, y, relationship = "one-to-one", multiple = "error" ))) }) # Works when warning is also requested expect_snapshot({ (expect_error(vec_locate_matches( x, y, relationship = "warn-many-to-many", multiple = "error" ))) }) # Both warnings are thrown if applicable expect_snapshot({ vec_locate_matches( x, y, relationship = "warn-many-to-many", multiple = "warning" ) }) # Both warning and error are thrown if applicable expect_snapshot(error = TRUE, { vec_locate_matches(x, y, relationship = "one-to-one", multiple = "warning") }) x <- c(1, 2) y <- c(2, 1, 2) expect_snapshot({ (expect_error(vec_locate_matches( x, y, relationship = "warn-many-to-many", multiple = "error" ))) }) # Only `multiple` warning is applicable here expect_snapshot({ vec_locate_matches( x, y, relationship = "warn-many-to-many", multiple = "warning" ) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `relationship` test_that("`relationship` handles one-to-one case", { # No error expect_identical( vec_locate_matches(1:2, 2:1, relationship = "one-to-one"), vec_locate_matches(1:2, 2:1) ) # Doesn't care about the zero match case expect_identical( vec_locate_matches(1:2, 3:4, relationship = "one-to-one"), vec_locate_matches(1:2, 3:4) ) expect_snapshot({ (expect_error(vec_locate_matches( c(2, 1), c(1, 1), relationship = "one-to-one" ))) (expect_error(vec_locate_matches( c(1, 1), c(1, 2), relationship = "one-to-one" ))) }) }) test_that("`relationship` handles one-to-many case", { # No error expect_identical( vec_locate_matches(c(1, 2), c(1, 2, 2), relationship = "one-to-many"), vec_locate_matches(c(1, 2), c(1, 2, 2)) ) # Doesn't care about the zero match case expect_identical( vec_locate_matches(1:2, 3:4, relationship = "one-to-many"), vec_locate_matches(1:2, 3:4) ) expect_snapshot({ (expect_error(vec_locate_matches( c(1, 2, 2), c(2, 1), relationship = "one-to-many" ))) }) }) test_that("`relationship` handles many-to-one case", { # No error expect_identical( vec_locate_matches(c(1, 2, 2), c(1, 2), relationship = "many-to-one"), vec_locate_matches(c(1, 2, 2), c(1, 2)) ) # Doesn't care about the zero match case expect_identical( vec_locate_matches(1:2, 3:4, relationship = "many-to-one"), vec_locate_matches(1:2, 3:4) ) expect_snapshot({ (expect_error(vec_locate_matches( c(1, 2), c(1, 2, 2), relationship = "many-to-one" ))) }) }) test_that("`relationship` handles many-to-many case", { # No error expect_identical( vec_locate_matches(c(1, 2, 2), c(1, 2), relationship = "many-to-many"), vec_locate_matches(c(1, 2, 2), c(1, 2)) ) # No error expect_identical( vec_locate_matches(c(1, 2), c(1, 2, 2), relationship = "many-to-many"), vec_locate_matches(c(1, 2), c(1, 2, 2)) ) # No error expect_identical( vec_locate_matches(c(1, 1, 2), c(1, 2, 2), relationship = "many-to-many"), vec_locate_matches(c(1, 1, 2), c(1, 2, 2)) ) # Doesn't care about the zero match case expect_identical( vec_locate_matches(1:2, 3:4, relationship = "many-to-many"), vec_locate_matches(1:2, 3:4) ) }) test_that("`relationship` handles warn-many-to-many case", { # No warning expect_identical( expect_silent( vec_locate_matches( c(1, 2, 2), c(1, 2), relationship = "warn-many-to-many" ) ), vec_locate_matches(c(1, 2, 2), c(1, 2)) ) # No warning expect_identical( expect_silent( vec_locate_matches( c(1, 2), c(1, 2, 2), relationship = "warn-many-to-many" ) ), vec_locate_matches(c(1, 2), c(1, 2, 2)) ) # Doesn't care about the zero match case expect_identical( expect_silent( vec_locate_matches(1:2, 3:4, relationship = "warn-many-to-many") ), vec_locate_matches(1:2, 3:4) ) # Specifically designed to ensure we test both: # - Finding multiple `needles` matches before multiple `haystack` matches # - Finding multiple `haystack` matches before multiple `needles` matches expect_snapshot({ (expect_warning(vec_locate_matches( c(1, 2, 1), c(1, 2, 2), relationship = "warn-many-to-many" ))) (expect_warning(vec_locate_matches( c(1, 1, 2), c(2, 2, 1), relationship = "warn-many-to-many" ))) }) }) test_that("`relationship` considers `incomplete` matches as possible multiple matches", { x <- c(1, NA, NaN) y <- c(NA, 1) expect_snapshot({ (expect_error(vec_locate_matches(x, y, relationship = "one-to-many"))) }) # No error expect_identical( vec_locate_matches(x, y, relationship = "one-to-many", incomplete = NA), vec_locate_matches(x, y, incomplete = NA) ) # No error expect_identical( vec_locate_matches(x, y, relationship = "one-to-many", nan_distinct = TRUE), vec_locate_matches(x, y, nan_distinct = TRUE) ) }) test_that("`relationship` errors on multiple matches that come from different nesting containers", { df <- data_frame(x = 0, y = 0) df2 <- data_frame(x = 1:2, y = 2:1) expect_snapshot({ (expect_error(vec_locate_matches( df, df2, condition = c("<=", "<="), relationship = "many-to-one" ))) }) }) test_that("`relationship` errors when a match from a different nesting container is processed early on", { # Row 1 has 2 matches # Row 2 has 0 matches needles <- data_frame( a = c(1, 8), b = c(2, 9) ) # Rows 1 and 2 end up in different nesting containers haystack <- data_frame( a = c(5, 6), b = c(7, 6) ) # needles[1,] records the haystack[1,] match first, which is in the 1st # value of `loc_first_match_o_haystack`, then records the haystack[3,] match # which is in the 3rd value of `loc_first_match_o_haystack` even though it # is processed 2nd (i.e. we need to use `loc` rather than `i` when detecting # multiple matches) expect_snapshot({ (expect_error(vec_locate_matches( needles, haystack, condition = "<", relationship = "many-to-one" ))) }) }) test_that("`relationship` doesn't error errneously on the last observation", { expect_error( res <- vec_locate_matches(1:2, 1:2, relationship = "many-to-one"), NA ) expect_identical(res$needles, 1:2) expect_identical(res$haystack, 1:2) }) test_that("`relationship` doesn't error if `multiple` removes multiple matches", { out <- vec_locate_matches( c(1, 2), c(1, 1), multiple = "any", relationship = "one-to-one" ) expect_identical(out$needles, c(1L, 2L)) expect_identical(out$haystack, c(1L, NA)) out <- vec_locate_matches( c(1, 2), c(1, 1), multiple = "first", relationship = "one-to-one" ) expect_identical(out$needles, c(1L, 2L)) expect_identical(out$haystack, c(1L, NA)) out <- vec_locate_matches( c(1, 2), c(1, 1), multiple = "last", relationship = "one-to-one" ) expect_identical(out$needles, c(1L, 2L)) expect_identical(out$haystack, c(2L, NA)) }) test_that("`relationship` can still detect problematic `haystack` relationships when `multiple = first/last` are used", { expect_snapshot({ (expect_error(vec_locate_matches( c(3, 1, 1), c(2, 1, 3, 3), multiple = "first", relationship = "one-to-one" ))) (expect_error(vec_locate_matches( c(3, 1, 1), c(2, 1, 3, 3), multiple = "first", relationship = "one-to-many" ))) }) }) test_that("`relationship` and `remaining` work properly together", { expect_snapshot({ out <- vec_locate_matches( c(1, 2, 2), c(2, 3, 1, 1, 4), relationship = "warn-many-to-many", remaining = NA_integer_ ) }) expect_identical(out$needles, c(1L, 1L, 2L, 3L, NA, NA)) expect_identical(out$haystack, c(3L, 4L, 1L, 1L, 2L, 5L)) }) test_that("`relationship` errors if `condition` creates multiple matches", { expect_snapshot({ (expect_error(vec_locate_matches( 1, c(1, 2), condition = "<=", relationship = "many-to-one" ))) }) }) test_that("`relationship` doesn't error if `filter` removes multiple matches", { out <- vec_locate_matches( 1, c(1, 2), condition = "<=", filter = "min", relationship = "many-to-one" ) expect_identical(out$needles, 1L) expect_identical(out$haystack, 1L) out <- vec_locate_matches( 1, c(1, 2), condition = "<=", filter = "max", relationship = "many-to-one" ) expect_identical(out$needles, 1L) expect_identical(out$haystack, 2L) }) test_that("`relationship` still errors if `filter` hasn't removed all multiple matches", { expect_snapshot({ (expect_error(vec_locate_matches( 1, c(1, 2, 1), condition = "<=", filter = "min", relationship = "many-to-one" ))) }) # But not here out <- vec_locate_matches( c(1, 1), c(1, 2, 1), condition = "<=", filter = "max", relationship = "many-to-one" ) expect_identical(out$needles, c(1L, 2L)) expect_identical(out$haystack, c(2L, 2L)) }) test_that("`relationship` errors when we have >1 size 1 matches across containers (tidyverse/dplyr#6835)", { # Carefully designed to ensure we get 2 nested containment groups that split # up the rows of `y`, but each of the nested containment groups contain exactly # 1 match, so `size_match` in `expand_compact_indices()` won't ever be >1 x <- data_frame(a = 1L, b = 5L) y <- data_frame(a = c(1L, 2L), b = c(4L, 3L)) expect_snapshot(error = TRUE, { vec_locate_matches( x, y, condition = c("<=", ">="), filter = c("none", "none"), relationship = "one-to-one" ) }) }) test_that("`relationship` doesn't error when the first match from a different container gets filtered out (tidyverse/dplyr#6835)", { # Carefully designed to ensure we get 2 nested containment groups that split # up the rows of `y`. Row 1 (processed first) doesn't hold the minimum `b` # value, so it gets filtered out. Row 2 is in the "extra" matches section # but is actually the first (and only) real match, so we don't want to error # on it. x <- data_frame(a = 1L, b = 5L) y <- data_frame(a = c(1L, 2L), b = c(4L, 3L)) out <- vec_locate_matches( x, y, condition = c("<=", ">="), filter = c("none", "min"), relationship = "one-to-one" ) expect_identical(out$needles, 1L) expect_identical(out$haystack, 2L) # Similar to the above example, but with a `max` filter. Row 1 doesn't hold # the max `c` value so it is filtered out even though it is a `>=` match. x <- data_frame(a = 1L, b = 5L, c = 3L) y <- data_frame(a = c(1L, 2L), b = c(4L, 3L), c = c(1L, 2L)) out <- vec_locate_matches( x, y, condition = c("<=", ">=", ">="), filter = c("none", "none", "max"), relationship = "one-to-one" ) expect_identical(out$needles, 1L) expect_identical(out$haystack, 2L) }) test_that("`relationship` errors respect argument tags and error call", { expect_snapshot({ (expect_error(vec_locate_matches( 1L, c(1L, 1L), relationship = "one-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn") ))) (expect_error(vec_locate_matches( c(1L, 1L), 1L, relationship = "one-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn") ))) (expect_error(vec_locate_matches( c(1L, 1L), 1L, relationship = "one-to-many", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn") ))) (expect_error(vec_locate_matches( 1L, c(1L, 1L), relationship = "many-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn") ))) }) }) test_that("`relationship` warnings respect argument tags and error call", { expect_snapshot({ (expect_warning(vec_locate_matches( c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn") ))) (expect_warning(vec_locate_matches( c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", needles_arg = "foo", error_call = call("fn") ))) (expect_warning(vec_locate_matches( c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", haystack_arg = "bar", error_call = call("fn") ))) }) }) test_that("`relationship` is validated", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, relationship = 1.5))) (expect_error(vec_locate_matches( 1, 2, relationship = c("one-to-one", "one-to-many") ))) (expect_error(vec_locate_matches(1, 2, relationship = "x"))) # Uses internal error (expect_error(vec_locate_matches( 1, 2, relationship = "x", error_call = call("fn") ))) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `no_match` test_that("can control `no_match`", { x <- vec_locate_matches(1:3, 1L) expect_identical(x$haystack, c(1L, NA, NA)) x <- vec_locate_matches(1:3, 1L, no_match = 0L) expect_identical(x$haystack, c(1L, 0L, 0L)) }) test_that("can drop unmatched needles", { x <- vec_locate_matches(1:3, 2L, no_match = "drop") expect_identical(x$needles, 2L) expect_identical(x$haystack, 1L) }) test_that("can drop unmatched missings when `incomplete = 'match'`", { x <- vec_locate_matches(c(NaN, 2, NA), 2, no_match = "drop") expect_identical(x$needles, 2L) expect_identical(x$haystack, 1L) x <- vec_locate_matches( c(NaN, 2, NA), NA, no_match = "drop", nan_distinct = FALSE ) expect_identical(x$needles, c(1L, 3L)) expect_identical(x$haystack, c(1L, 1L)) x <- vec_locate_matches( c(NaN, 2, NA), NA, no_match = "drop", nan_distinct = TRUE ) expect_identical(x$needles, 3L) expect_identical(x$haystack, 1L) }) test_that("can differentiate between `no_match` and `incomplete`", { res <- vec_locate_matches(c(1, NA), 2, incomplete = NA, no_match = -1L) expect_identical(res$needles, 1:2) expect_identical(res$haystack, c(-1L, NA)) }) test_that("`no_match` can error informatively", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, no_match = "error"))) (expect_error(vec_locate_matches( 1, 2, no_match = "error", needles_arg = "foo" ))) (expect_error(vec_locate_matches( 1, 2, no_match = "error", needles_arg = "foo", error_call = call("fn") ))) (expect_error(vec_locate_matches( 1, 2, no_match = "error", needles_arg = "foo", haystack_arg = "bar" ))) }) }) test_that("`no_match = 'error'` doesn't error on handled incomplete values", { res <- vec_locate_matches( c(NA, NaN, NA, 1), c(NA, 1), incomplete = NA, no_match = "error" ) expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(rep(NA, 3), 2L)) }) test_that("`no_match = 'drop'` doesn't drop handled incomplete values", { res <- vec_locate_matches( c(NA, NaN, NA, 1), c(NA, 1), incomplete = NA, no_match = "drop" ) expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(rep(NA, 3), 2L)) }) test_that("errors with the right location on unmatched needles when different nesting containers are present", { df <- data_frame(x = 2:1, y = 2:1) df2 <- data_frame(x = 1:2, y = 2:1) # i.e. should be location 2 expect_snapshot( (expect_error(vec_locate_matches( df, df2, condition = ">=", no_match = "error" ))) ) }) test_that("`no_match` is validated", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, no_match = 1.5))) (expect_error(vec_locate_matches(1, 2, no_match = c(1L, 2L)))) (expect_error(vec_locate_matches(1, 2, no_match = "x"))) # Uses internal call (expect_error(vec_locate_matches( 1, 2, no_match = "x", error_call = call("fn") ))) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - `remaining` test_that("`remaining` can retain `haystack` values that `needles` didn't match", { res <- vec_locate_matches(1, 0:2, remaining = NA) expect_identical(res$needles, c(1L, NA, NA)) expect_identical(res$haystack, c(2L, 1L, 3L)) res <- vec_locate_matches(1, 0:2, remaining = NA, condition = ">=") expect_identical(res$needles, c(1L, 1L, NA)) expect_identical(res$haystack, c(1L, 2L, 3L)) res <- vec_locate_matches(1, 0:2, remaining = NA, condition = "<") expect_identical(res$needles, c(1L, NA, NA)) expect_identical(res$haystack, c(3L, 1L, 2L)) }) test_that("`incomplete` affects `needles` but not `haystack`", { # Matches NA to NA, so nothing remaining res <- vec_locate_matches( c(1, NA), c(NA, 1), incomplete = "compare", remaining = NA ) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, 1L)) # Matches NA to NA, so nothing remaining res <- vec_locate_matches( c(1, NA), c(NA, 1), incomplete = "match", remaining = NA ) expect_identical(res$needles, c(1L, 2L)) expect_identical(res$haystack, c(2L, 1L)) # Doesn't match NA to NA, so `haystack` is left with remaining values res <- vec_locate_matches( c(1, NA), c(NA, 1), condition = "<", incomplete = "compare", remaining = NA ) expect_identical(res$needles, c(1L, 2L, NA, NA)) expect_identical(res$haystack, c(NA, NA, 1L, 2L)) # Matches NA to NA, so only remaining value is for `1` res <- vec_locate_matches( c(1, NA), c(NA, 1), condition = "<", incomplete = "match", remaining = NA ) expect_identical(res$needles, c(1L, 2L, NA)) expect_identical(res$haystack, c(NA, 1L, 2L)) # `needles` NA value is propagated, so `haystack` is left with a remaining value res <- vec_locate_matches(c(1, NA), c(NA, 1), incomplete = NA, remaining = NA) expect_identical(res$needles, c(1L, 2L, NA)) expect_identical(res$haystack, c(2L, NA, 1L)) # `needles` NA value is dropped, so `haystack` is left with a remaining value res <- vec_locate_matches( c(1, NA), c(NA, 1), incomplete = "drop", remaining = NA ) expect_identical(res$needles, c(1L, NA)) expect_identical(res$haystack, c(2L, 1L)) }) test_that("`remaining` combined with `multiple = 'first/last'` treats non-first/last matches as remaining", { x <- c(1, 2) y <- c(1, 2, 2) res <- vec_locate_matches(x, y, remaining = NA, multiple = "first") expect_identical(res$needles, c(1L, 2L, NA)) expect_identical(res$haystack, c(1L, 2L, 3L)) res <- vec_locate_matches(x, y, remaining = NA, multiple = "last") expect_identical(res$needles, c(1L, 2L, NA)) expect_identical(res$haystack, c(1L, 3L, 2L)) res <- vec_locate_matches(x, y, remaining = NA, multiple = "any") expect_identical(res$needles, c(1L, 2L, NA)) expect_identical(res$haystack, c(1L, 2L, 3L)) }) test_that("`remaining` combined with the haystack reordering retains appearance order", { x <- data_frame(a = 1, b = 4) y <- data_frame(a = c(2, 1, 0), b = c(2, 1, 0)) # Appearance order for the haystack locations res <- vec_locate_matches(x, y, condition = c("<=", ">=")) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(1L, 2L)) # Retain that appearance order of the matches, with remaining values appended res <- vec_locate_matches(x, y, condition = c("<=", ">="), remaining = NA) expect_identical(res$needles, c(1L, 1L, NA)) expect_identical(res$haystack, c(1L, 2L, 3L)) }) test_that("`remaining` can error informatively", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, remaining = "error"))) (expect_error(vec_locate_matches( 1, 2, remaining = "error", needles_arg = "foo" ))) (expect_error(vec_locate_matches( 1, 2, remaining = "error", needles_arg = "foo", error_call = call("fn") ))) (expect_error(vec_locate_matches( 1, 2, remaining = "error", needles_arg = "foo", haystack_arg = "bar" ))) }) }) test_that("`remaining` is validated", { expect_snapshot({ (expect_error(vec_locate_matches(1, 2, remaining = 1.5))) (expect_error(vec_locate_matches(1, 2, remaining = c(1L, 2L)))) (expect_error(vec_locate_matches(1, 2, remaining = "x"))) # Uses internal call (expect_error(vec_locate_matches( 1, 2, remaining = "x", error_call = call("fn") ))) }) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - filter test_that("simple `filter`s work", { needles <- c(1, 2, 4) haystack <- c(2, 1, 3, 0) res <- vec_locate_matches(needles, haystack, condition = "<", filter = "max") expect_identical(res$haystack, c(3L, 3L, NA)) res <- vec_locate_matches(needles, haystack, condition = "<", filter = "min") expect_identical(res$haystack, c(1L, 3L, NA)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max") expect_identical(res$haystack, c(2L, 1L, 3L)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "min") expect_identical(res$haystack, c(4L, 4L, 4L)) }) test_that("haystack duplicates are preserved", { needles <- c(1, 2, 4) haystack <- c(2, 1, 2, 3, 0, 1, 0) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max") expect_identical(res$needles, c(1L, 1L, 2L, 2L, 3L)) expect_identical(res$haystack, c(2L, 6L, 1L, 3L, 4L)) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "min") expect_identical(res$needles, c(1L, 1L, 2L, 2L, 3L, 3L)) expect_identical(res$haystack, c(5L, 7L, 5L, 7L, 5L, 7L)) }) test_that("haystack duplicates can be controlled by `multiple`", { needles <- c(1, 2, 4) haystack <- c(2, 1, 2, 3, 0, 1, 0) res <- vec_locate_matches( needles, haystack, condition = ">=", filter = "max", multiple = "first" ) expect_identical(res$needles, 1:3) expect_identical(res$haystack, c(2L, 1L, 4L)) res <- vec_locate_matches( needles, haystack, condition = ">=", filter = "max", multiple = "last" ) expect_identical(res$needles, 1:3) expect_identical(res$haystack, c(6L, 3L, 4L)) res <- vec_locate_matches( needles, haystack, condition = ">=", filter = "max", multiple = "any" ) expect_identical(res$needles, 1:3) expect_identical(res$haystack, c(2L, 1L, 4L)) }) test_that("`filter` works when valid matches are in different nesting containers", { needles <- data_frame(x = 0L, y = 1L, z = 2L) haystack <- data_frame( x = c(1L, 2L, 1L, 0L), y = c(2L, 1L, 2L, 3L), z = c(3L, 3L, 2L, 2L) ) info <- compute_nesting_container_info(haystack, c("<=", "<=", "<=")) haystack_order <- info[[1]] container_ids <- info[[2]] # Rows 1 and 2 of haystack are in different nesting containers, but # both have the "max" filter value of `z=3` so both should be in the result. # Row 4 is in its own container, so it will be considered the "max" # of its group, but it is less than rows 1 and 2 so it will ultimately be # filtered out. expect_identical(container_ids, c(1L, 2L, 1L, 0L)) expect_identical(haystack_order, c(4L, 3L, 1L, 2L)) res <- vec_locate_matches( needles, haystack, condition = c("<=", "<=", "<="), filter = c("none", "none", "max") ) expect_identical(res$needles, c(1L, 1L)) expect_identical(res$haystack, c(1L, 2L)) res <- vec_locate_matches( needles, haystack, condition = c("<=", "<=", "<="), filter = c("none", "none", "max"), multiple = "first" ) expect_identical(res$haystack, 1L) res <- vec_locate_matches( needles, haystack, condition = c("<=", "<=", "<="), filter = c("none", "none", "max"), multiple = "last" ) expect_identical(res$haystack, 2L) res <- vec_locate_matches( needles, haystack, condition = c("<=", "<=", "<="), filter = c("none", "none", "max"), multiple = "any" ) expect_identical(res$haystack, 1L) }) test_that("single filter is applied to all columns", { needles <- data_frame(x = 5L, y = 8L, z = 4L) haystack <- data_frame( x = c(1L, 3L, 2L, 2L), y = c(1L, 3L, 2L, 3L), z = c(1L, 2L, 3L, 3L) ) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "max") expect_identical(res$haystack, 2L) res <- vec_locate_matches(needles, haystack, condition = ">=", filter = "min") expect_identical(res$haystack, 1L) }) test_that("different `filter`s can be used per column", { needles <- data_frame(x = c(0, 2, 1, 1), y = c(2, 0, 0, 4)) haystack <- data_frame(x = c(2, 2, 2, 1, 1), y = c(1, 1, 2, 2, 1)) res <- vec_locate_matches( needles, haystack, condition = c(">=", "<"), filter = c("max", "min") ) expect_identical(res$needles, c(1L, 2L, 2L, 3L, 4L)) expect_identical(res$haystack, c(NA, 1L, 2L, 5L, NA)) }) test_that("`filter` works with incomplete values", { needles <- c(1, NA, 4, NA) haystack <- c(NA, 1, NA, 1, 3) res <- vec_locate_matches( needles, haystack, condition = ">=", filter = "max", incomplete = "compare" ) expect_identical(res$needles, c(1L, 1L, 2L, 2L, 3L, 4L, 4L)) expect_identical(res$haystack, c(2L, 4L, 1L, 3L, 5L, 1L, 3L)) res <- vec_locate_matches( needles, haystack, condition = ">=", filter = "max", incomplete = "compare", multiple = "first" ) expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(2L, 1L, 5L, 1L)) res <- vec_locate_matches( needles, haystack, condition = ">=", filter = "max", incomplete = "compare", multiple = "any" ) expect_identical(res$needles, 1:4) expect_identical(res$haystack, c(2L, 1L, 5L, 1L)) res <- vec_locate_matches( needles, haystack, condition = ">=", filter = "max", incomplete = NA ) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 4L)) expect_identical(res$haystack, c(2L, 4L, NA, 5L, NA)) }) test_that("`filter` works with mixed NA and NaN", { needles <- c(1, NA, 4, NaN) haystack <- c(NA, 1, NaN, 1, 3) res <- vec_locate_matches( needles, haystack, condition = ">=", filter = "min", incomplete = "compare", nan_distinct = FALSE ) expect_identical(res$needles, c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L)) expect_identical(res$haystack, c(2L, 4L, 1L, 3L, 2L, 4L, 1L, 3L)) res <- vec_locate_matches( needles, haystack, condition = ">=", filter = "min", incomplete = "compare", nan_distinct = TRUE ) expect_identical(res$needles, c(1L, 1L, 2L, 3L, 3L, 4L)) expect_identical(res$haystack, c(2L, 4L, 1L, 2L, 4L, 3L)) }) test_that("`filter` is validated", { expect_error(vec_locate_matches(1, 2, filter = 1.5), "character vector") expect_error( vec_locate_matches(1, 2, filter = "x"), 'one of "none", "min", or "max"' ) expect_error( vec_locate_matches(1, 2, filter = c("min", "max")), "length 1, or the same length as" ) }) # ------------------------------------------------------------------------------ # vec_locate_matches() - edge cases test_that("zero row `needles` results in zero row data frame output", { res <- vec_locate_matches(integer(), 1:3) expect_identical(res$needles, integer()) expect_identical(res$haystack, integer()) res <- vec_locate_matches(integer(), 1:3, condition = "<") expect_identical(res$needles, integer()) expect_identical(res$haystack, integer()) }) test_that("zero row `haystack` results in no-matches for all needles", { res <- vec_locate_matches(1:3, integer()) expect_identical(res$needles, 1:3) expect_identical(res$haystack, rep(NA_integer_, 3)) res <- vec_locate_matches(1:3, integer(), condition = "<") expect_identical(res$needles, 1:3) expect_identical(res$haystack, rep(NA_integer_, 3)) }) test_that("zero row `haystack` still allows needle incomplete handling", { res <- vec_locate_matches(c(1, NA), integer(), incomplete = NA, no_match = 0L) expect_identical(res$needles, 1:2) expect_identical(res$haystack, c(0L, NA)) res <- vec_locate_matches( c(1, NA), integer(), incomplete = NA, no_match = 0L, condition = "<" ) expect_identical(res$needles, 1:2) expect_identical(res$haystack, c(0L, NA)) }) test_that("zero column data frames are not allowed", { expect_error( vec_locate_matches(data_frame(.size = 2L), data_frame(.size = 2L)), "at least 1 column" ) }) test_that("zero column input still checks `condition` correctness", { x <- data_frame(.size = 2) y <- data_frame(.size = 3) expect_error( vec_locate_matches(x, y, condition = c("==", "<=")), "length 1, or the same length as the number of columns" ) }) test_that("`multiple = 'first'/'last'` returns the first/last by appearance", { x <- c(1, 2, 3) y <- c(2, 1, 0) res <- vec_locate_matches(x, y, condition = ">=", multiple = "first") expect_identical(res$haystack, c(2L, 1L, 1L)) res <- vec_locate_matches(x, y, condition = ">=", multiple = "last") expect_identical(res$haystack, c(3L, 3L, 3L)) }) test_that("NA adjustment of `>` and `>=` conditions is protected from empty haystack", { res <- vec_locate_matches(1L, integer(), condition = ">") expect_identical(res$needles, 1L) expect_identical(res$haystack, NA_integer_) }) test_that("potential overflow on large output size is caught informatively", { # Windows 32-bit doesn't support long vectors of this size, and the # intermediate `r_ssize` will be too large skip_if(.Machine$sizeof.pointer < 8L, message = "No long vector support") expect_snapshot({ (expect_error(vec_locate_matches(1:1e7, 1:1e7, condition = ">="))) }) }) vctrs/tests/testthat/test-error-call.R0000644000176200001440000001313315065005761017565 0ustar liggesuserstest_that("failing common type reports correct error call", { my_function <- function() vec_ptype2(2, chr()) expect_snapshot((expect_error(my_function()))) }) test_that("failing cast reports correct error call", { my_function <- function() vec_cast(2, chr()) expect_snapshot((expect_error(my_function()))) df1 <- data_frame(x = TRUE, y = TRUE) df2 <- data_frame(y = "1") my_function <- function(lhs, rhs) vec_cast(lhs, rhs) expect_snapshot((expect_error(my_function(df1, df2)))) df1 <- data_frame(y = TRUE) df2 <- data_frame(y = "1") expect_snapshot((expect_error(my_function(df1, df2)))) }) test_that("lossy cast reports correct error call", { my_function <- function() vec_cast(2, lgl()) expect_snapshot((expect_error(my_function()))) }) test_that("failing common size reports correct error call", { my_function <- function() vec_recycle(1:2, 10) expect_snapshot((expect_error(my_function()))) # FIXME my_function <- function() vec_size_common(1:2, 1:10) expect_snapshot((expect_error(my_function()))) }) test_that("unsupported error reports correct error call", { x <- new_vctr(1:2) my_function <- function() dim(x) <- 1:2 expect_snapshot((expect_error(my_function()))) my_function <- function() median(x) expect_snapshot((expect_error(my_function()))) }) test_that("scalar error reports correct error call", { my_function <- function() obj_check_vector(foobar()) expect_snapshot((expect_error(my_function()))) }) test_that("size error reports correct error call", { my_function <- function() vec_check_size(1:2, size = 1) expect_snapshot((expect_error(my_function()))) }) test_that("bare casts report correct error call", { my_function <- function() vec_cast(1.5, int()) expect_snapshot((expect_error(my_function()))) my_function <- function() vec_cast(1.5, lgl()) expect_snapshot((expect_error(my_function()))) my_function <- function() vec_cast(2L, lgl()) expect_snapshot((expect_error(my_function()))) # Passing call to `shape_broadcast()` my_function <- function() vec_cast(matrix(TRUE), dbl()) expect_snapshot((expect_error(my_function()))) }) test_that("base S3 casts report correct error call", { my_function <- function() vec_cast("a", factor("b")) expect_snapshot((expect_error(my_function()))) }) test_that("names validation reports correct error call", { my_function <- function() { vec_as_names(c("x", "", "y"), repair = "check_unique") } expect_snapshot((expect_error(my_function()))) my_function <- function() { vec_as_names(c("x", "x"), repair = "check_unique", repair_arg = "repair") } expect_snapshot((expect_error(my_function()))) my_function <- function() { vec_as_names("...", repair = "check_unique", repair_arg = "repair") } expect_snapshot((expect_error(my_function()))) }) test_that("subscript validation reports correct error calls", { my_function <- function() vctrs::num_as_location(1, 1L, missing = "bogus") expect_snapshot((expect_error(my_function()))) my_function <- function() vctrs::vec_as_location(10, 2) expect_snapshot((expect_error(my_function()))) my_function <- function(my_arg) vec_as_location(my_arg, 2) expect_snapshot((expect_error(my_function(1.5)))) my_function <- function(my_arg) vctrs::vec_as_subscript(my_arg) expect_snapshot((expect_error(my_function(1.5)))) my_function <- function(my_arg) vctrs::vec_as_location(my_arg, 2) expect_snapshot((expect_error(my_function(list())))) my_function <- function(my_arg) vec_as_location(1, my_arg) expect_snapshot((expect_error(my_function(1.5)))) my_function <- function(my_arg) vec_as_location(my_arg, 1, missing = "error") expect_snapshot((expect_error(my_function(NA)))) }) test_that("`vec_ptype()` reports correct error call", { my_function <- function(my_arg) vec_ptype(my_arg) expect_snapshot({ (expect_error(my_function(env()))) (expect_error(my_function(foobar(list())))) }) }) test_that("`vec_slice()` uses `error_call`", { my_function <- function(x, i) vec_slice(x, i, error_call = current_env()) expect_snapshot({ (expect_error(my_function(env(), 1))) (expect_error(my_function(1, 2))) }) }) test_that("vec_slice() reports self in error context", { expect_snapshot({ (expect_error(vec_slice(foobar(list()), 1))) (expect_error(vec_slice(list(), env()))) }) }) test_that("list_sizes() reports error context", { expect_snapshot({ (expect_error(list_sizes(foobar(list())))) (expect_error(list_sizes(list(env())))) (expect_error(list_sizes(list(1, 2, env())))) (expect_error(list_sizes(list(1, 2, foo = env())))) }) }) test_that("vec_size() reports error context", { expect_snapshot({ (expect_error(vec_size(env()))) }) }) test_that("vec_cast_common() reports error context", { my_function <- function(...) vec_cast_common(...) expect_snapshot((expect_error(my_function(my_arg = 1.5, .to = int())))) expect_snapshot( (expect_error(my_function(my_arg = 1.5, .to = int(), .arg = "my_arg"))) ) expect_snapshot( (expect_error(my_function(this_arg = 1, that_arg = "foo", .arg = "my_arg"))) ) expect_snapshot((expect_error(my_function(1, "foo", .arg = "my_arg")))) x <- data.frame(x = "a") y <- data.frame(x = 1, y = 2) expect_snapshot((expect_error(my_function(this_arg = x, that_arg = y)))) }) test_that("vec_ptype_common() reports error context", { my_function <- function(...) vec_ptype_common(...) expect_snapshot((expect_error(my_function(this_arg = 1, that_arg = "foo")))) expect_snapshot( (expect_error(my_function(this_arg = 1, that_arg = "foo", .arg = "my_arg"))) ) expect_snapshot((expect_error(my_function(1, "foo", .arg = "my_arg")))) }) vctrs/tests/testthat/helper-type-dplyr.R0000644000176200001440000000023315065005761020131 0ustar liggesusersexpect_drop <- function(x, value) { drop <- dplyr::group_by_drop_default(x) if (value) { expect_true(drop) } else { expect_false(drop) } } vctrs/tests/testthat/helper-performance.R0000644000176200001440000000232614751701606020330 0ustar liggesusersskip_if_not_testing_performance <- function(x) { opt <- Sys.getenv("VCTRS_TEST_PERFORMANCE", unset = "false") testing <- identical(opt, "true") if (testing) { return() } skip("Not testing performance") } expect_time_lt <- function(expr, expect) { time <- time_of({{ expr }}) expect_lt(time, expect) } time_of <- function(expr) { expr <- enquo(expr) time <- system.time(eval_tidy(expr)) unclass(time)[["elapsed"]] } # From r-lib/bench with_memory_prof <- function(expr) { f <- tempfile() on.exit(unlink(f)) tryCatch( utils::Rprofmem(f, threshold = 1), error = function(...) skip("Can't profile memory on this system.") ) on.exit(utils::Rprofmem(NULL), add = TRUE) res <- force(expr) utils::Rprofmem(NULL) bytes <- parse_allocations(f)$bytes bytes <- sum(bytes, na.rm = TRUE) new_vctrs_bytes(bytes) } parse_allocations <- function(filename) { if (!is_installed("profmem")) { testthat::skip("profmem must be installed.") } readRprofmem <- env_get(ns_env("profmem"), "readRprofmem") tryCatch( readRprofmem(filename), error = function(cnd) { testthat::skip(sprintf( "Memory profiling failed: %s", conditionMessage(cnd) )) } ) } vctrs/tests/testthat/test-c.R0000644000176200001440000005567615127057357015776 0ustar liggesuserslocal_name_repair_quiet() test_that("zero length input returns NULL", { expect_equal(vec_c(), NULL) expect_equal(vec_c(NULL), NULL) expect_equal(vec_c(NULL, ), NULL) expect_equal(vec_c(NULL, NULL), NULL) }) test_that("NULL is idempotent", { expect_equal(vec_c(NULL, 1L), 1L) expect_equal(vec_c(1L, NULL), 1L) }) test_that("NA is idempotent", { expect_equal(vec_c(NA, 1L), c(NA, 1L)) expect_equal(vec_c(NA, "x"), c(NA, "x")) expect_equal(vec_c(NA, factor("x")), factor(c(NA, "x"))) expect_equal(vec_c(NA, new_date(0)), new_date(c(NA, 0))) expect_equal(vec_c(NA, new_datetime(0)), new_datetime(c(NA, 0))) expect_equal(vec_c(NA, new_duration(0)), new_duration(c(NA, 0))) }) test_that("NA is logical if no other types intervene", { expect_equal(vec_c(logical()), logical()) expect_equal(vec_c(NA), NA) expect_equal(vec_c(NA, NA), c(NA, NA)) }) test_that("different types are coerced to common", { expect_equal(vec_c(TRUE, 1L, 1), c(1, 1, 1)) expect_equal(vec_c(TRUE, 2:4), 1:4) }) test_that("specified .ptypes do not allow more casts", { expect_error( vec_c(TRUE, .ptype = character()), class = "vctrs_error_incompatible_type" ) }) test_that("common type failure uses error call and error arg (#1641, #1692)", { expect_snapshot(error = TRUE, { vec_c("x", 1, .error_call = call("foo"), .error_arg = "arg") }) expect_snapshot(error = TRUE, { vec_c( "x", .ptype = integer(), .error_call = call("foo"), .error_arg = "arg" ) }) }) test_that("common type failure uses positional errors", { expect_snapshot({ # Looking for `..1` and `a` (expect_error(vec_c(1, a = "x", 2))) # Directed cast should also produce positional errors (#1690) (expect_error(vec_c(1, a = "x", 2, .ptype = double(), .error_arg = "arg"))) # Lossy cast (expect_error(vec_c(1, a = 2.5, .ptype = integer()))) }) }) test_that("combines outer an inner names", { expect_equal(vec_c(x = 1), c(x = 1)) expect_equal(vec_c(c(x = 1)), c(x = 1)) expect_equal(vec_c(c(x = 1:2)), c(x1 = 1, x2 = 2)) expect_error(vec_c(y = c(x = 1)), "Please supply") }) test_that("can bind data.frame columns", { df <- data.frame(x = NA, y = 1:2) df$x <- data.frame(a = 1:2) expected <- data.frame(x = NA, y = c(1:2, 1:2)) expected$x <- data.frame(a = c(1:2, 1:2)) expect_equal(vec_c(df, df), expected) }) test_that("vec_c() handles matrices", { m <- matrix(1:4, nrow = 2) dimnames(m) <- list(c("foo", "bar"), c("baz", "quux")) # FIXME: `vec_ptype_common(m, m)` doesn't return dimension names exp <- matrix(c(1:2, 1:2, 3:4, 3:4), nrow = 4) rownames(exp) <- c("foo", "bar", "foo", "bar") expect_identical(vec_c(m, m), exp) expect_error(vec_c(outer = m), "Please supply") }) test_that("vec_c() includes index in argument tag", { df1 <- tibble(x = tibble(y = tibble(z = 1))) df2 <- tibble(x = tibble(y = tibble(z = "a"))) expect_snapshot(error = TRUE, vec_c(df1, df2)) expect_snapshot(error = TRUE, vec_c(df1, df1, df2)) expect_snapshot(error = TRUE, vec_c(foo = df1, bar = df2)) }) test_that("vec_c() handles record classes", { local_rational_class() out <- vec_c(rational(1, 2), 1L, NA) expect_true(vec_is(out, rational(1, 2))) expect_size(out, 3) expect_identical( vec_proxy(out), data.frame(n = c(1L, 1L, NA), d = c(2L, 1L, NA)) ) }) test_that("can mix named and unnamed vectors (#271)", { expect_identical(vec_c(c(a = 1), 2), c(a = 1, 2)) expect_identical(vec_c(0, c(a = 1), 2, b = 3), c(0, a = 1, 2, b = 3)) }) test_that("preserves names when inputs are cast to a common type (#1690)", { expect_named(vec_c(c(a = 1), .ptype = integer()), "a") expect_named( vec_c(foo = c(a = 1), .ptype = integer(), .name_spec = "{outer}_{inner}"), "foo_a" ) expect_named( vec_c(foo = c(a = 1), .ptype = integer(), .name_spec = "inner"), "a" ) }) test_that("vec_c() repairs names", { local_name_repair_quiet() # Default minimal repair expect_named(vec_c(a = 1, a = 2, `_` = 3), c("a", "a", "_")) out <- vec_c(!!!set_names(1, NA)) expect_named(out, "") expect_named( vec_c(a = 1, a = 2, `_` = 3, .name_repair = "unique"), c("a...1", "a...2", "_") ) expect_error( vec_c(a = 1, a = 2, `_` = 3, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique" ) expect_named( vec_c(a = 1, a = 2, `_` = 3, .name_repair = "universal"), c("a...1", "a...2", "._") ) expect_named(vec_c(a = 1, a = 2, .name_repair = ~ toupper(.)), c("A", "A")) }) test_that("vec_c() can repair names quietly", { local_name_repair_verbose() expect_snapshot({ res_unique <- vec_c(x = TRUE, x = 0, .name_repair = "unique_quiet") res_universal <- vec_c( "if" = TRUE, "in" = 0, .name_repair = "universal_quiet" ) }) expect_named(res_unique, c("x...1", "x...2")) expect_named(res_universal, c(".if", ".in")) }) test_that("vec_c() doesn't use outer names for data frames (#524)", { x <- data.frame(inner = 1) expect_equal(vec_c(outer = x), x) a <- data.frame(x = 1L) b <- data.frame(x = 2L) expect_equal(vec_c(foo = a, bar = b), data.frame(x = 1:2)) }) test_that("vec_c() preserves row names and inner names", { x <- data.frame(a = 1, row.names = "r1") y <- data.frame(a = 2, row.names = "r2") expect_equal(rownames(vec_c(x, y)), c("r1", "r2")) expect_equal(rownames(vec_c(x, x)), c("r1...1", "r1...2")) vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) oo_x <- set_names( as.POSIXlt(c("2020-01-01", "2020-01-02", "2020-01-03")), letters[1:3] ) oo_y <- as.POSIXlt(c(FOO = "2020-01-04")) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "d") mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("d")) nested_x <- new_data_frame( list(df = df_x, mat = mat_x, vec = vec_x, oo = oo_x), row.names = c("foo", "bar", "baz") ) nested_y <- new_data_frame( list(df = df_y, mat = mat_y, vec = vec_y, oo = oo_y), row.names = c("quux") ) nested_out <- vec_c(nested_x, nested_y) expect_identical(row.names(nested_out), c("foo", "bar", "baz", "quux")) expect_identical(row.names(nested_out$df), c("a", "b", "c", "d")) expect_identical(row.names(nested_out$mat), c("a", "b", "c", "d")) expect_identical(names(nested_out$vec), c("a", "b", "c", "FOO")) expect_identical(names(nested_out$oo), c("a", "b", "c", "FOO")) }) test_that("vec_c() outer names work with proxied objects", { x <- as.POSIXlt(new_datetime(0)) exp <- set_names(x, "outer") expect_equal(vec_c(outer = x), exp) named_x <- set_names(x, "inner") exp <- set_names(named_x, "outer_inner") expect_error(vec_c(outer = named_x), "Please supply") expect_equal(vec_c(outer = named_x, .name_spec = "{outer}_{inner}"), exp) xs <- as.POSIXlt(new_datetime(c(0, 1))) exp <- set_names(xs, c("outer_1", "outer_2")) expect_error(vec_c(outer = xs), "Please supply") expect_equal(vec_c(outer = xs, .name_spec = "{outer}_{inner}"), exp) }) test_that("vec_c() works with simple homogeneous foreign S3 classes", { expect_identical(vec_c(foobar(1), foobar(2)), vec_c(foobar(c(1, 2)))) expect_identical(vec_c(NULL, foobar(1), foobar(2)), vec_c(foobar(c(1, 2)))) }) test_that("vec_c() works with simple homogeneous foreign S4 classes", { joe1 <- .Counts(c(1L, 2L), name = "Joe") joe2 <- .Counts(3L, name = "Joe") expect_identical(vec_c(joe1, joe2), .Counts(1:3, name = "Joe")) }) test_that("vec_c() fails with complex foreign S3 classes", { expect_snapshot({ x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error(vec_c(x, y), class = "vctrs_error_incompatible_type")) (expect_error( vec_c(x, y, .error_call = call("foo"), .error_arg = "arg"), class = "vctrs_error_incompatible_type" )) }) }) test_that("vec_c() fails with complex foreign S4 classes", { expect_snapshot({ joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") (expect_error(vec_c(joe, jane), class = "vctrs_error_incompatible_type")) (expect_error( vec_c(joe, jane, .error_call = call("foo"), .error_arg = "arg"), class = "vctrs_error_incompatible_type" )) }) }) test_that("vec_c() falls back to c() if S3 method is available", { # Check off-by-one error expect_error( vec_c(foobar(1), "", foobar(2)), class = "vctrs_error_incompatible_type" ) # Fallback when the class implements `c()` method <- function(...) rep_along(list(...), "dispatched") local_methods( c.vctrs_foobar = method ) expect_identical( vec_c(foobar(1), foobar(2, class = "foo")), c("dispatched", "dispatched") ) expect_identical( vec_c(NULL, foobar(1), NULL, foobar(2, class = "foo")), c("dispatched", "dispatched") ) # Registered fallback s3_register("base::c", "vctrs_c_fallback", method) expect_identical( vec_c( structure(1, class = "vctrs_c_fallback"), structure(2, class = "vctrs_c_fallback") ), c("dispatched", "dispatched") ) # Don't fallback for S3 lists which are treated as scalars by default expect_error( vec_c(foobar(list(1)), foobar(list(2))), class = "vctrs_error_scalar_type" ) }) test_that("c() fallback is consistent", { dispatched <- function(x) structure(x, class = "dispatched") c_method <- function(...) dispatched(NextMethod()) out <- with_methods( vec_ptype2.vctrs_foobaz.vctrs_foobaz = function(...) foobaz(df_ptype2(...)), vec_cast.vctrs_foobaz.vctrs_foobaz = function(...) foobaz(df_cast(...)), c.vctrs_foobar = c_method, list( direct = vec_c(foobar(1L), foobar(2L)), df = vec_c(data_frame(x = foobar(1L)), data_frame(x = foobar(2L))), tib = vec_c(tibble(x = foobar(1L)), tibble(x = foobar(2L))), foreign_df = vec_c( foobaz(data_frame(x = foobar(1L))), foobaz(data_frame(x = foobar(2L))) ) ) ) expect_equal(out$direct, dispatched(1:2)) expect_equal(out$df$x, dispatched(1:2)) expect_equal(out$tib$x, dispatched(1:2)) expect_equal(out$foreign_df$x, dispatched(1:2)) # Hard case: generic record vectors my_rec_record <- function(x) { new_rcrd(list(x = x), class = "my_rec_record") } out <- with_methods( c.vctrs_foobar = c_method, vec_ptype2.my_rec_record.my_rec_record = function(x, y, ...) { my_rec_record(vec_ptype2(field(x, "x"), field(y, "x"), ...)) }, vec_cast.my_rec_record.my_rec_record = function(x, to, ...) { x }, vec_c( data_frame(x = my_rec_record(foobar(1L))), data_frame(x = my_rec_record(foobar(2L))) ) ) expect_equal(field(out$x, "x"), dispatched(1:2)) }) test_that("vec_c() falls back to c() if S4 method is available", { joe1 <- .Counts(c(1L, 2L), name = "Joe") joe2 <- .Counts(3L, name = "Joe") c_counts <- function(x, ...) { xs <- list(x, ...) xs_data <- lapply(xs, function(x) x@.Data) new_data <- do.call(c, xs_data) .Counts(new_data, name = x@name) } local_s4_method("c", methods::signature(x = "vctrs_Counts"), c_counts) expect_identical( vec_c(joe1, joe2), .Counts(c(1L, 2L, 3L), name = "Joe") ) expect_identical( vec_c(NULL, joe1, joe2), .Counts(c(1L, 2L, 3L), name = "Joe") ) }) test_that("vec_c() fallback doesn't support (most) `name_spec` or `ptype`", { expect_snapshot({ (expect_error( with_c_foobar(vec_c( foobar(1), foobar(2), .name_spec = "{outer}_{inner}" )), "name specification" )) # Used to be an error about `ptype` (expect_error( with_c_foobar(vec_c(foobar(1), foobar(2), .ptype = "")), class = "vctrs_error_incompatible_type" )) # Uses error call (#1641) (expect_error( with_c_foobar(vec_c( foobar(1), foobar(2), .error_call = call("foo"), .name_spec = "{outer}_{inner}" )) )) }) }) test_that("vec_c() fallback does support `.name_spec = 'inner'`", { # Because of how useful it is, and how easy it is to implement! # It's extremely useful to be able to hardcode `.name_spec = "inner"` # and not have to worry about hitting a fallback type that wouldn't support # `name_spec`. expect_identical( with_c_foobar(vec_c(foobar(1), foobar(2), .name_spec = "inner")), foobar_c(c(1, 2)) ) expect_identical( with_c_foobar(vec_c(x = foobar(1), y = foobar(2), .name_spec = "inner")), foobar_c(c(1, 2)) ) expect_identical( with_c_foobar(vec_c( x = foobar(c(a = 1)), y = foobar(c(b = 2)), z = foobar(3), .name_spec = "inner" )), foobar_c(c(a = 1, b = 2, 3)) ) }) test_that("vec_c() doesn't fall back when ptype2 is implemented", { new_quux <- function(x) structure(x, class = "vctrs_quux") with_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) new_quux(int()), vec_cast.vctrs_quux.vctrs_foobar = function(x, to, ...) new_quux(x), vec_restore.vctrs_quux = function(x, ...) new_quux(x), c.vctrs_foobar = function(...) foobar(NextMethod()), { expect_s3_class(c(foobar(1:3), foobar(4L)), "vctrs_foobar") expect_s3_class(vec_c(foobar(1:3), foobar(4L)), "vctrs_quux") } ) }) test_that("vec_c() falls back even when ptype is supplied", { expect_foobar(vec_c(foobar(1), foobar(2), .ptype = foobar(dbl()))) with_methods( c.vctrs_foobar = function(...) quux(NextMethod()), { expect_quux(vec_c(foobar(1), foobar(2), .ptype = foobar(dbl()))) expect_quux(vec_c( foobar(1, foo = TRUE), foobar(2, bar = TRUE), .ptype = foobar(dbl()) )) } ) }) test_that("vec_implements_ptype2() is FALSE for scalars", { expect_false(vec_implements_ptype2(quote(foo))) }) test_that("vec_implements_ptype2() and vec_c() fallback are compatible with old registration", { foo <- structure(NA, class = "vctrs_implements_ptype2_false") expect_false(vec_implements_ptype2(foo)) vec_ptype2.vctrs_implements_ptype2_true <- function(...) NULL s3_register( "vctrs::vec_ptype2", "vctrs_implements_ptype2_true", vec_ptype2.vctrs_implements_ptype2_true ) bar <- structure(NA, class = "vctrs_implements_ptype2_true") expect_true(vec_implements_ptype2(bar)) local_methods( `c.vctrs_implements_ptype2_true` = function(...) stop("never called") ) expect_identical(vec_c(bar), bar) }) test_that("can ignore names in `vec_c()` by providing a `zap()` name-spec (#232)", { expect_error(vec_c(a = c(b = 1:2))) expect_identical(vec_c(a = c(b = 1:2), b = 3L, .name_spec = zap()), 1:3) expect_snapshot({ (expect_error( vec_c(a = c(b = letters), b = 1, .name_spec = zap()), class = "vctrs_error_incompatible_type" )) }) }) test_that("can ignore outer names in `vec_c()` by providing an 'inner' name-spec (#1988)", { expect_identical( vec_c(x = c(a = 1, 2), y = c(3, b = 4), .name_spec = "inner"), c(a = 1, 2, 3, b = 4) ) # Importantly, outer names are still used in error messages! expect_snapshot(error = TRUE, { vec_c(x = c(a = 1), y = c(b = "2"), .name_spec = "inner") }) }) test_that("can concatenate subclasses of `vctrs_vctr` which don't have ptype2 methods", { x <- new_vctr(1, class = "vctrs_foo") expect_identical(vec_c(x, x), new_vctr(c(1, 1), class = "vctrs_foo")) }) test_that("base c() fallback handles unspecified chunks", { local_methods( c.vctrs_foobar = function(...) { x <- NextMethod() # Should not be passed any unspecified chunks if (anyNA(x)) { abort("tilt") } foobar(x) }, `[.vctrs_foobar` = function(x, i, ...) { # Return a quux to detect dispatch quux(NextMethod()) } ) out <- vec_c(foobar(1:2), rep(NA, 2)) expect_identical(out, quux(c(1:2, NA, NA))) out <- vec_c(rep(NA, 2), foobar(1:2), NA) expect_identical(out, quux(c(NA, NA, 1:2, NA))) }) test_that("can zap outer names from a name-spec (#1215)", { zap_outer_spec <- function(outer, inner) if (is_character(inner)) inner expect_null( names(vec_c(a = 1:2, .name_spec = zap_outer_spec)) ) expect_identical( names(vec_c(a = 1:2, c(foo = 3L), .name_spec = zap_outer_spec)), c("", "", "foo") ) expect_null( names(list_unchop( list(a = 1:2), indices = list(1:2), name_spec = zap_outer_spec )) ) expect_identical( names(list_unchop( list(a = 1:2, c(foo = 3L)), indices = list(1:2, 3), name_spec = zap_outer_spec )), c("", "", "foo") ) # These days it is more efficient to use a name-spec of "inner" (#1988) expect_identical( vec_c(a = 1:2, .name_spec = zap_outer_spec), vec_c(a = 1:2, .name_spec = "inner") ) expect_identical( vec_c(a = 1:2, c(foo = 3L), .name_spec = zap_outer_spec), vec_c(a = 1:2, c(foo = 3L), .name_spec = "inner") ) expect_identical( list_unchop(list(a = 1:2), indices = list(1:2), name_spec = zap_outer_spec), list_unchop(list(a = 1:2), indices = list(1:2), name_spec = "inner") ) expect_identical( list_unchop( list(a = 1:2, c(foo = 3L)), indices = list(1:2, 3), name_spec = zap_outer_spec ), list_unchop( list(a = 1:2, c(foo = 3L)), indices = list(1:2, 3), name_spec = "inner" ) ) }) test_that("named empty vectors force named output (#1263)", { x <- set_names(int(), chr()) expect_named(vec_c(x), chr()) expect_named(vec_c(x, x), chr()) expect_named(vec_c(x, 1L), "") expect_named(vec_c(x, 1), "") expect_named(list_unchop(list(x), indices = list(int())), chr()) expect_named(list_unchop(list(x, x), indices = list(int(), int())), chr()) expect_named(list_unchop(list(x, 1L), indices = list(int(), 1)), "") expect_named(list_unchop(list(x, 1), indices = list(int(), 1)), "") }) test_that("calls cast method even with empty objects", { # https://github.com/paleolimbot/wk/issues/230 # There is a common type, but the cast method is intended # to fail here for this test local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { x }, vec_cast.vctrs_foobar.default = function(x, to, ...) { vec_default_cast(x, to) } ) expect_snapshot(error = TRUE, { vec_c( foobar(integer()), foobar(integer(), foo = "bar") ) }) }) test_that("can handle spliced lists (#1578)", { expect_identical( vec_c(!!!list(foo = 1), !!!list(bar = 2)), c(foo = 1, bar = 2) ) }) # Golden tests ------------------------------------------------------- test_that("concatenation performs expected allocations", { vec_c_list <- function(x, ptype = NULL) { vec_c(!!!x, .ptype = ptype) } expect_snapshot({ ints <- rep(list(1L), 1e2) dbls <- rep(list(1), 1e2) # Extra allocations from `list2()`, see r-lib/rlang#937 "# `vec_c()` " "Integers" with_memory_prof(vec_c_list(ints)) "Doubles" with_memory_prof(vec_c_list(dbls)) "Integers to integer" with_memory_prof(vec_c_list(ints, ptype = int())) "Doubles to integer" with_memory_prof(vec_c_list(dbls, ptype = int())) "# `list_unchop()` " "Integers" with_memory_prof(list_unchop(ints)) "Doubles" with_memory_prof(list_unchop(dbls)) "Integers to integer" with_memory_prof(list_unchop(ints, ptype = int())) "Doubles to integer" with_memory_prof(list_unchop(dbls, ptype = int())) "# Concatenation with names" "Named integers" ints <- rep(list(set_names(1:3, letters[1:3])), 1e2) with_memory_prof(list_unchop(ints)) "Named matrices" mat <- matrix(1:4, 2, dimnames = list(c("foo", "bar"))) mats <- rep(list(mat), 1e2) with_memory_prof(list_unchop(mats)) "Data frame with named columns" df <- data_frame( x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c("A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb"))) ) dfs <- rep(list(df), 1e2) with_memory_prof(list_unchop(dfs)) "Data frame with rownames (non-repaired, non-recursive case)" df <- data_frame(x = 1:2) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(list_unchop(dfs)) "Data frame with rownames (repaired, non-recursive case)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(list_unchop(dfs)) "Data frame with rownames (non-repaired, recursive case) (#1217)" df <- data_frame( x = 1:2, y = data_frame(x = 1:2) ) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(list_unchop(dfs)) "Data frame with rownames (repaired, recursive case) (#1217)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(list_unchop(dfs)) "list-ofs (#1496)" make_list_of <- function(n) { df <- tibble::tibble( x = new_list_of(vec_chop(1:n), ptype = integer()) ) vec_chop(df) } with_memory_prof(list_unchop(make_list_of(1e3))) with_memory_prof(list_unchop(make_list_of(2e3))) with_memory_prof(list_unchop(make_list_of(4e3))) }) }) test_that("can dispatch many times", { # This caused a crash when counters were not correctly protected foo <- structure( list(x.sorted = numeric(0), tp = numeric(0), fp = numeric(0)), row.names = integer(0), class = c("vctrs_foobar", "tbl_df", "tbl", "data.frame") ) x <- lapply(1:200, function(...) foo) expect_error(NA, object = vctrs::list_unchop(x)) }) test_that("dots splicing clones as appropriate", { x <- list(a = 1) vctrs::vec_cbind(!!!x) expect_equal(x, list(a = 1)) x <- list(a = 1) vctrs::vec_rbind(!!!x) expect_equal(x, list(a = 1)) x <- list(a = 1) vctrs::vec_c(!!!x) expect_equal(x, list(a = 1)) x <- list(a = 1) vctrs::vec_cbind(!!!x, 2) expect_equal(x, list(a = 1)) x <- list(a = 1) vctrs::vec_rbind(!!!x, 2) expect_equal(x, list(a = 1)) x <- list(a = 1) vctrs::vec_c(!!!x, 2) expect_equal(x, list(a = 1)) }) test_that("can combine records wrapped in data frames", { local_methods( vec_proxy.vctrs_foobar = function(x, ...) { data_frame(x = unclass(x), y = seq_along(x)) }, vec_restore.vctrs_foobar = function(x, to, ...) { foobar(x$x) } ) x <- foobar(1:2) y <- foobar(3:4) expect_equal( vec_c(x, y), foobar(1:4) ) expect_equal( list_unchop(list(x, y), indices = list(1:2, 3:4)), foobar(1:4) ) expect_equal( vec_rbind(data_frame(x = x), data_frame(x = y)), data_frame(x = foobar(1:4)) ) }) test_that("fallback works with subclasses of `vctrs_vctr`", { # Used to fail because of interaction between common class fallback # for `base::c()` and the `c()` method for `vctrs_vctr` that called # back into `vec_c()`. # Reprex for failure in the ricu package x <- new_rcrd(list(a = 1), class = "vctrs_foobar") expect_equal( vec_c(x, x, .name_spec = "{inner}"), new_rcrd(list(a = c(1, 1)), class = "vctrs_foobar") ) # Reprex for failure in the groupr package x <- new_rcrd(list(a = 1), class = "vctrs_foobar") df <- data_frame(x = x) expect_equal( vec_rbind(df, data.frame()), df ) expect_equal( vec_cast_common(df, data.frame()), list(df, data_frame(x = x[0])) ) }) vctrs/tests/testthat/test-rep.R0000644000176200001440000001215115065005761016310 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_rep() test_that("`vec_rep()` can repeat vectors", { expect_identical(vec_rep(1:2, 5), rep(1:2, 5)) expect_identical(vec_rep(list(1, "x"), 5), rep(list(1, "x"), 5)) }) test_that("`vec_rep()` repeats data frames row wise", { x <- data.frame(x = 1:2, y = 3:4) expect_identical(vec_rep(x, 2), vec_slice(x, c(1:2, 1:2))) }) test_that("`vec_rep()` can repeat 0 `times`", { expect_identical(vec_rep(1, 0), numeric()) }) test_that("`vec_rep()` can repeat 1 `time`", { expect_identical(vec_rep(1:3, 1), 1:3) }) test_that("`vec_rep()` can repeat `x` of size 1", { expect_identical(vec_rep(1, 2), c(1, 1)) }) test_that("`vec_rep()` errors on long vector output", { # Exact error message may be platform specific expect_error( vec_rep(1:2, .Machine$integer.max), "output size must be less than" ) }) test_that("`vec_rep()` validates `times`", { expect_snapshot({ (expect_error(my_vec_rep(1, "x"), class = "vctrs_error_incompatible_type")) (expect_error(my_vec_rep(1, c(1, 2)))) (expect_error(my_vec_rep(1, -1))) (expect_error(my_vec_rep(1, NA_integer_))) }) }) # ------------------------------------------------------------------------------ # vec_rep_each() test_that("`vec_rep_each()` can repeat each element of vectors", { expect_identical(vec_rep_each(1:2, 5), rep(1:2, each = 5)) expect_identical(vec_rep_each(list(1, "x"), 5), rep(list(1, "x"), each = 5)) }) test_that("`vec_rep_each()` `times` is vectorized", { expect_identical(vec_rep_each(1:2, 1:2), rep(1:2, times = 1:2)) }) test_that("`vec_rep_each()` repeats data frames row wise", { x <- data.frame(x = 1:2, y = 3:4) expect_identical(vec_rep_each(x, c(2, 1)), vec_slice(x, c(1, 1, 2))) }) test_that("`vec_rep_each()` can repeat 0 `times`", { expect_identical(vec_rep_each(1:2, 0), integer()) }) test_that("`vec_rep_each()` finalizes type when repeating 0 times (#1673)", { expect_identical(vec_rep_each(NA, 0), logical()) }) test_that("`vec_rep_each()` retains names when repeating 0 times (#1673)", { x <- c(a = 1, b = 2) expect_identical(vec_rep_each(x, 0), named(numeric())) }) test_that("`vec_rep_each()` can repeat 1 `time`", { expect_identical(vec_rep_each(1:2, 1), 1:2) }) test_that("`vec_rep_each()` errors on long vector output", { # Exact error message may be platform specific expect_error( vec_rep_each(1:2, .Machine$integer.max), "output size must be less than" ) }) test_that("`vec_rep_each()` validates `times`", { expect_snapshot({ (expect_error( my_vec_rep_each(1, "x"), class = "vctrs_error_incompatible_type" )) (expect_error(my_vec_rep_each(1, -1))) (expect_error(my_vec_rep_each(c(1, 2), c(1, -1)))) (expect_error(my_vec_rep_each(1, NA_integer_))) (expect_error(my_vec_rep_each(c(1, 2), c(1, NA_integer_)))) }) }) test_that("`vec_rep_each()` uses recyclying errors", { expect_snapshot({ (expect_error( my_vec_rep_each(1:2, 1:3), class = "vctrs_error_recycle_incompatible_size" )) }) }) # ------------------------------------------------------------------------------ test_that("`vec_rep()` validates `times`", { expect_snapshot(error = TRUE, my_vec_rep(1, "x")) expect_snapshot(error = TRUE, my_vec_rep(1, c(1, 2))) expect_snapshot(error = TRUE, my_vec_rep(1, -1)) expect_snapshot(error = TRUE, my_vec_rep(1, NA_integer_)) }) test_that("`vec_rep_each()` validates `times`", { expect_snapshot(error = TRUE, my_vec_rep_each(1, "x")) expect_snapshot(error = TRUE, my_vec_rep_each(1, -1)) expect_snapshot(error = TRUE, my_vec_rep_each(c(1, 2), c(1, -1))) expect_snapshot(error = TRUE, my_vec_rep_each(1, NA_integer_)) expect_snapshot(error = TRUE, my_vec_rep_each(c(1, 2), c(1, NA_integer_))) }) test_that("`vec_rep_each()` uses recyclying errors", { expect_snapshot(error = TRUE, my_vec_rep_each(1:2, 1:3)) }) # vec_unrep -------------------------------------------------------------------- test_that("can unrep a vector", { x <- c(1, 3, 3, 1, 5, 5, 6) expect <- data_frame( key = c(1, 3, 1, 5, 6), times = c(1L, 2L, 1L, 2L, 1L) ) expect_identical(vec_unrep(x), expect) }) test_that("can unrep a data frame", { df <- data_frame( x = c(1, 1, 2, 2, 2), y = c(1, 1, 1, 1, 2) ) expect <- data_frame( key = vec_slice(df, c(1, 3, 5)), times = c(2L, 2L, 1L) ) expect_identical(vec_unrep(df), expect) }) test_that("works with size zero input", { expect_identical( vec_unrep(integer()), data_frame(key = integer(), times = integer()) ) }) test_that("can roundtrip empty input", { x <- integer() compressed <- vec_unrep(x) expect_identical(vec_rep_each(compressed$key, compressed$times), x) x <- data_frame() compressed <- vec_unrep(x) expect_identical(vec_rep_each(compressed$key, compressed$times), x) }) test_that("works with data frames with rows but no columns", { x <- data_frame(.size = 5) expect <- data_frame(key = data_frame(.size = 1L), times = 5L) expect_identical(vec_unrep(x), expect) }) test_that("errors on scalars", { expect_snapshot(error = TRUE, { vec_unrep(environment()) }) }) vctrs/tests/testthat/test-set.R0000644000176200001440000003247215113325071016316 0ustar liggesusers# vec_set_intersect ------------------------------------------------------- test_that("retains names of `x` elements", { x <- c(a = 1, b = 4, c = 1, d = 4, e = 2) y <- c(w = 3, x = 2, y = 1, z = 2) expect_identical( vec_set_intersect(x, y), c(a = 1, e = 2) ) }) test_that("returns elements in order they first appear in `x`", { expect_identical(vec_set_intersect(c(3, 1, 2, 3), c(2, 3)), c(3, 2)) }) test_that("returns unique elements", { expect_identical(vec_set_intersect(c(1, 2, 1), c(2, 2, 1)), c(1, 2)) }) test_that("works with character vectors of different encodings", { encs <- encodings() # Always returns UTF-8 expect_identical(vec_set_intersect(encs$utf8, encs$latin1), encs$utf8) expect_identical(vec_set_intersect(encs$latin1, encs$utf8), encs$utf8) }) test_that("has consistency with `NA` values", { expect_identical(vec_set_intersect(c(NA_real_, 1), NA_real_), NA_real_) expect_identical(vec_set_intersect(c(1, NA_real_), NA_real_), NA_real_) expect_identical(vec_set_intersect(c(NA_real_, NaN), NaN), NaN) expect_identical(vec_set_intersect(c(NaN, NA_real_), NaN), NaN) }) test_that("works with complex missing values", { na <- complex( real = c(NA_real_, NA_real_, NaN, NaN), imaginary = c(NA_real_, NaN, NA_real_, NaN) ) expect_identical(vec_set_intersect(na, na), na) expect_identical(vec_set_intersect(na, na[2]), na[2]) }) test_that("works correctly with unspecified logical vectors", { expect_identical(vec_set_intersect(NA, NA), NA) }) test_that("returns a vector of the common type", { expect_identical(vec_set_intersect(1L, c(2, 1)), 1) }) test_that("works with data frames", { x <- data_frame( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") ) y <- data_frame( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") ) expect_identical(vec_set_intersect(x, y), vec_slice(x, c(2, 4))) expect_identical(vec_set_intersect(y, x), vec_slice(y, c(1, 3))) }) test_that("works with rcrds", { x <- new_rcrd(list( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") )) y <- new_rcrd(list( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") )) expect_identical(vec_set_intersect(x, y), vec_slice(x, c(2, 4))) expect_identical(vec_set_intersect(y, x), vec_slice(y, c(1, 3))) }) # vec_set_difference ------------------------------------------------------ test_that("retains names of `x` elements", { x <- c(a = 1, b = 4, c = 1, d = 4, e = 5) y <- c(w = 3, x = 2, y = 1, z = 2) expect_identical( vec_set_difference(x, y), c(b = 4, e = 5) ) }) test_that("returns elements in order they first appear in `x`", { expect_identical(vec_set_difference(c(3, 1, 2, 3), 1), c(3, 2)) }) test_that("returns unique elements", { expect_identical(vec_set_difference(c(1, 2, 1, 4), c(4, 5)), c(1, 2)) }) test_that("works with character vectors of different encodings", { encs <- encodings() expect_identical(vec_set_difference(encs$utf8, encs$latin1), character()) expect_identical(vec_set_difference(encs$latin1, encs$utf8), character()) }) test_that("has consistency with `NA` values", { expect_identical(vec_set_difference(c(NA_real_, 1), NA_real_), 1) expect_identical(vec_set_difference(c(1, NA_real_), NA_real_), 1) expect_identical(vec_set_difference(c(NA_real_, NaN), NaN), NA_real_) expect_identical(vec_set_difference(c(NaN, NA_real_), NaN), NA_real_) }) test_that("works with complex missing values", { na <- complex( real = c(NA_real_, NA_real_, NaN, NaN), imaginary = c(NA_real_, NaN, NA_real_, NaN) ) expect_identical(vec_set_difference(na, na), complex()) expect_identical(vec_set_difference(na, na[2]), na[c(1, 3, 4)]) }) test_that("works correctly with unspecified logical vectors", { expect_identical(vec_set_difference(NA, NA), logical()) }) test_that("returns a vector of the common type", { expect_identical(vec_set_difference(c(3L, 1L), c(2, 1)), 3) }) test_that("works with data frames", { x <- data_frame( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") ) y <- data_frame( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") ) expect_identical(vec_set_difference(x, y), vec_slice(x, 1)) expect_identical(vec_set_difference(y, x), vec_slice(y, c(2, 4))) }) test_that("works with rcrds", { x <- new_rcrd(list( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") )) y <- new_rcrd(list( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") )) expect_identical(vec_set_difference(x, y), vec_slice(x, 1)) expect_identical(vec_set_difference(y, x), vec_slice(y, c(2, 4))) }) # vec_set_union ----------------------------------------------------------- test_that("retains names of `x` and `y` elements", { x <- c(a = 1, b = 4, c = 1, d = 4, e = 5) y <- c(w = 3, x = 2, y = 1, z = 2) expect_identical( vec_set_union(x, y), c(a = 1, b = 4, e = 5, w = 3, x = 2) ) }) test_that("does minimal name repair on duplicate names", { x <- c(a = 1) y <- c(a = 2) expect_named(vec_set_union(x, y), c("a", "a")) }) test_that("returns elements in order they first appear in `x` and `y`", { expect_identical( vec_set_union(c(3, 1, 2, 3), c(4, 2, 5, 4)), c(3, 1, 2, 4, 5) ) }) test_that("returns unique elements", { expect_identical(vec_set_union(c(1, 2, 1, 4), c(4, 5, 5)), c(1, 2, 4, 5)) }) test_that("works with character vectors of different encodings", { encs <- encodings() # Always returns UTF-8 expect_identical(vec_set_union(encs$utf8, encs$latin1), encs$utf8) expect_identical(vec_set_union(encs$latin1, encs$utf8), encs$utf8) }) test_that("has consistency with `NA` values", { expect_identical(vec_set_union(c(NA_real_, 1), NA_real_), c(NA_real_, 1)) expect_identical(vec_set_union(c(1, NA_real_), NA_real_), c(1, NA_real_)) expect_identical(vec_set_union(NA_real_, NaN), c(NA_real_, NaN)) expect_identical(vec_set_union(NaN, NA_real_), c(NaN, NA_real_)) }) test_that("works with complex missing values", { na <- complex( real = c(NA_real_, NA_real_, NaN, NaN), imaginary = c(NA_real_, NaN, NA_real_, NaN) ) expect_identical(vec_set_union(na, na), na) expect_identical(vec_set_union(na[-2], na), na[c(1, 3, 4, 2)]) }) test_that("works correctly with unspecified logical vectors", { expect_identical(vec_set_union(NA, NA), NA) }) test_that("returns a vector of the common type", { expect_identical(vec_set_union(c(3L, 1L), c(2, 1)), c(3, 1, 2)) }) test_that("works with data frames", { x <- data_frame( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") ) y <- data_frame( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") ) expect_identical( vec_set_union(x, y), vec_c(vec_slice(x, c(1, 2, 4)), vec_slice(y, c(2, 4))) ) expect_identical( vec_set_union(y, x), vec_c(vec_slice(y, c(1, 2, 3, 4)), vec_slice(x, 1)) ) }) test_that("works with rcrds", { x <- new_rcrd(list( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") )) y <- new_rcrd(list( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") )) expect_identical( vec_set_union(x, y), vec_c(vec_slice(x, c(1, 2, 4)), vec_slice(y, c(2, 4))) ) expect_identical( vec_set_union(y, x), vec_c(vec_slice(y, c(1, 2, 3, 4)), vec_slice(x, 1)) ) }) test_that("works on empty inputs", { expect_identical(vec_set_union(integer(), 12L), 12L) expect_identical(vec_set_union(12L, integer()), 12L) }) # vec_set_symmetric_difference -------------------------------------------- test_that("retains names of `x` and `y` elements", { x <- c(a = 1, b = 4, c = 1, d = 4, e = 5) y <- c(w = 3, x = 2, y = 1, z = 2) expect_identical( vec_set_symmetric_difference(x, y), c(b = 4, e = 5, w = 3, x = 2) ) }) test_that("returns elements in order they first appear in `x` and `y`", { expect_identical( vec_set_symmetric_difference(c(3, 1, 2, 3), c(4, 2, 5, 4)), c(3, 1, 4, 5) ) }) test_that("returns unique elements", { expect_identical( vec_set_symmetric_difference(c(1, 2, 1, 4), c(4, 5, 5)), c(1, 2, 5) ) }) test_that("works with character vectors of different encodings", { encs <- encodings() # Always returns UTF-8 expect_identical( vec_set_symmetric_difference(encs$utf8, encs$latin1), character() ) expect_identical( vec_set_symmetric_difference(encs$latin1, encs$utf8), character() ) }) test_that("has consistency with `NA` values", { expect_identical(vec_set_symmetric_difference(c(NA_real_, 1), NA_real_), 1) expect_identical(vec_set_symmetric_difference(c(1, NA_real_), NA_real_), 1) expect_identical(vec_set_symmetric_difference(c(NaN, 1), NaN), 1) expect_identical(vec_set_symmetric_difference(c(1, NaN), NaN), 1) expect_identical( vec_set_symmetric_difference(NA_real_, NaN), c(NA_real_, NaN) ) expect_identical( vec_set_symmetric_difference(NaN, NA_real_), c(NaN, NA_real_) ) }) test_that("works with complex missing values", { na <- complex( real = c(NA_real_, NA_real_, NaN, NaN), imaginary = c(NA_real_, NaN, NA_real_, NaN) ) expect_identical(vec_set_symmetric_difference(na, na), complex()) expect_identical(vec_set_symmetric_difference(na[-2], na[-4]), na[c(4, 2)]) }) test_that("works correctly with unspecified logical vectors", { expect_identical(vec_set_symmetric_difference(NA, NA), logical()) }) test_that("returns a vector of the common type", { expect_identical(vec_set_symmetric_difference(c(3L, 1L), c(2, 1)), c(3, 2)) }) test_that("works with data frames", { x <- data_frame( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") ) y <- data_frame( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") ) expect_identical( vec_set_symmetric_difference(x, y), vec_c(vec_slice(x, 1), vec_slice(y, c(2, 4))) ) expect_identical( vec_set_symmetric_difference(y, x), vec_c(vec_slice(y, c(2, 4)), vec_slice(x, 1)) ) }) test_that("works with rcrds", { x <- new_rcrd(list( a = c(1, 2, 1, 1), b = c("a", "b", "a", "d") )) y <- new_rcrd(list( a = c(2, 3, 1, 2), b = c("b", "b", "d", "d") )) expect_identical( vec_set_symmetric_difference(x, y), vec_c(vec_slice(x, 1), vec_slice(y, c(2, 4))) ) expect_identical( vec_set_symmetric_difference(y, x), vec_c(vec_slice(y, c(2, 4)), vec_slice(x, 1)) ) }) # common ------------------------------------------------------------------ test_that("works with package version columns of data frames (#1837)", { package_frame <- function(x) { data_frame(version = package_version(x)) } x <- package_frame(c("4.0", "2.0")) y <- package_frame(c("1.0", "3.0", "4.0")) expect_identical(vec_set_intersect(x, y), package_frame("4.0")) expect_identical(vec_set_difference(x, y), package_frame("2.0")) expect_identical( vec_set_union(x, y), package_frame(c("4.0", "2.0", "1.0", "3.0")) ) expect_identical( vec_set_symmetric_difference(x, y), package_frame(c("2.0", "1.0", "3.0")) ) }) test_that("errors nicely if common type can't be taken", { expect_snapshot(error = TRUE, { vec_set_intersect(1, "x") }) expect_snapshot(error = TRUE, { vec_set_difference(1, "x") }) expect_snapshot(error = TRUE, { vec_set_union(1, "x") }) expect_snapshot(error = TRUE, { vec_set_symmetric_difference(1, "x") }) }) test_that("dots must be empty", { expect_snapshot(error = TRUE, { vec_set_intersect(1, 2, 3) }) expect_snapshot(error = TRUE, { vec_set_difference(1, 2, 3) }) expect_snapshot(error = TRUE, { vec_set_union(1, 2, 3) }) expect_snapshot(error = TRUE, { vec_set_symmetric_difference(1, 2, 3) }) }) test_that("`ptype` is respected", { expect_identical(vec_set_intersect(1, 1, ptype = integer()), 1L) expect_identical(vec_set_difference(1, 1, ptype = integer()), integer()) expect_identical(vec_set_union(1, 2, ptype = integer()), c(1L, 2L)) expect_identical( vec_set_symmetric_difference(1, 2, ptype = integer()), c(1L, 2L) ) expect_snapshot(error = TRUE, { vec_set_intersect(1, 1.5, ptype = integer()) }) expect_snapshot(error = TRUE, { vec_set_difference(1, 1.5, ptype = integer()) }) expect_snapshot(error = TRUE, { vec_set_union(1, 1.5, ptype = integer()) }) expect_snapshot(error = TRUE, { vec_set_symmetric_difference(1, 1.5, ptype = integer()) }) }) test_that("`x_arg` and `y_arg` can be adjusted", { expect_snapshot(error = TRUE, { vec_set_intersect(1, "2", x_arg = "foo", y_arg = "bar") }) expect_snapshot(error = TRUE, { vec_set_difference(1, "2", x_arg = "foo", y_arg = "bar") }) expect_snapshot(error = TRUE, { vec_set_union(1, "2", x_arg = "foo", y_arg = "bar") }) expect_snapshot(error = TRUE, { vec_set_symmetric_difference(1, "2", x_arg = "foo", y_arg = "bar") }) expect_snapshot(error = TRUE, { vec_set_intersect(1, "2", x_arg = "", y_arg = "") }) }) test_that("`error_call` can be adjusted", { my_set_intersect <- function() { vec_set_intersect(1, "x", error_call = current_env()) } my_set_difference <- function() { vec_set_difference(1, "x", error_call = current_env()) } my_set_union <- function() { vec_set_union(1, "x", error_call = current_env()) } my_set_symmetric_difference <- function() { vec_set_symmetric_difference(1, "x", error_call = current_env()) } expect_snapshot(error = TRUE, { my_set_intersect() }) expect_snapshot(error = TRUE, { my_set_difference() }) expect_snapshot(error = TRUE, { my_set_union() }) expect_snapshot(error = TRUE, { my_set_symmetric_difference() }) }) vctrs/tests/testthat/test-type-bare.R0000644000176200001440000004212115113335375017413 0ustar liggesuserstest_that("ptype2 base methods are not inherited", { ptypes <- vec_remove(base_empty_types, c("null", "dataframe")) for (ptype in ptypes) { x <- new_vctr(ptype, class = "foobar", inherit_base_type = TRUE) expect_s3_class(vec_ptype2(x, x), "foobar") expect_error(vec_ptype2(x, ptype), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(ptype, x), class = "vctrs_error_incompatible_type") } }) test_that("cast base methods are not inherited", { ptypes <- vec_remove(base_empty_types, c("null", "dataframe")) for (ptype in ptypes) { x <- new_vctr(ptype, class = "foobar", inherit_base_type = TRUE) expect_s3_class(vec_cast(ptype, x), "foobar") expect_error(vec_cast(x, ptype), class = "vctrs_error_incompatible_type") } }) test_that("default cast allows objects with the same type", { x <- structure(1, class = c("foo", "double")) expect_equal(vec_cast(x, x), x) }) # vec_shaped_ptype ------------------------------------------------------- test_that("array dimensions are preserved", { mat1 <- matrix(lgl(), nrow = 1, ncol = 1) mat2 <- matrix(lgl(), nrow = 2, ncol = 2) mat3 <- matrix(lgl(), nrow = 2, ncol = 3) expect_equal(vec_ptype2(mat1, mat1), matrix(lgl(), nrow = 0, ncol = 1)) expect_equal(vec_ptype2(mat1, mat2), matrix(lgl(), nrow = 0, ncol = 2)) expect_error(vec_ptype2(mat2, mat3), class = "vctrs_error_incompatible_type") }) test_that("vec_shaped_ptype()", { int <- function(...) array(NA_integer_, c(...)) expect_identical( vec_shaped_ptype(integer(), int(5), int(10)), new_shape(integer()) ) expect_identical( vec_shaped_ptype(integer(), int(5, 1), int(10, 1)), new_shape(integer(), 1) ) expect_identical( vec_shaped_ptype(integer(), int(5, 1, 2), int(10, 1, 2)), new_shape(integer(), 1:2) ) }) test_that("vec_shaped_ptype() evaluates arg lazily", { expect_silent(vec_shaped_ptype( integer(), int(5), int(10), x_arg = print("oof") )) expect_silent(vec_shaped_ptype( integer(), int(5), int(10), y_arg = print("oof") )) }) # vec_cast() -------------------------------------------------------------- # NULL test_that("NULL is idempotent", { expect_equal(vec_cast(NULL, NULL), NULL) expect_equal(vec_cast(list(1:3), NULL), list(1:3)) }) # Logical test_that("safe casts work as expected", { exp <- lgl(TRUE, FALSE) expect_equal(vec_cast(NULL, logical()), NULL) expect_equal(vec_cast(lgl(TRUE, FALSE), logical()), exp) expect_equal(vec_cast(int(1L, 0L), logical()), exp) expect_equal(vec_cast(dbl(1, 0), logical()), exp) # These used to be allowed expect_error( vec_cast(chr("T", "F"), logical()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(chr("TRUE", "FALSE"), logical()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(chr("true", "false"), logical()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(list(1, 0), logical()), class = "vctrs_error_incompatible_type" ) }) test_that("NA casts work as expected", { exp <- lgl(NA) to <- lgl() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(int(NA), to), exp) expect_equal(vec_cast(dbl(NA), to), exp) # These used to be allowed expect_error(vec_cast(chr(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(lgl(NA)) to_mat <- matrix(lgl()) expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) # These used to be allowed expect_error( vec_cast(mat(chr(NA)), to_mat), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type" ) }) test_that("lossy casts generate warning", { expect_lossy( vec_cast(int(2L, 1L), lgl()), lgl(TRUE, TRUE), x = int(), to = lgl() ) expect_lossy( vec_cast(dbl(2, 1), lgl()), lgl(TRUE, TRUE), x = dbl(), to = lgl() ) # These used to be allowed expect_error( vec_cast(chr("x", "TRUE"), lgl()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(chr("t", "T"), lgl()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(chr("f", "F"), lgl()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(list(c(TRUE, FALSE), TRUE), lgl()), class = "vctrs_error_incompatible_type" ) }) test_that("invalid casts generate error", { expect_error( vec_cast(factor("a"), logical()), class = "vctrs_error_incompatible_type" ) }) test_that("dimensionality matches output", { x1 <- matrix(TRUE, nrow = 1, ncol = 1) x2 <- matrix(1, nrow = 0, ncol = 2) expect_dim(vec_cast(x1, x2), c(1, 2)) expect_dim(vec_cast(TRUE, x2), c(1, 2)) x <- matrix(1, nrow = 2, ncol = 2) expect_error(vec_cast(x, logical()), class = "vctrs_error_incompatible_type") }) test_that("the common type of two `NA` vectors is unspecified", { expect_equal(vec_ptype2(NA, NA), unspecified()) }) # Integer test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, integer()), NULL) expect_equal(vec_cast(lgl(TRUE, FALSE), integer()), int(1L, 0L)) expect_equal(vec_cast(int(1L, 2L), integer()), int(1L, 2L)) expect_equal(vec_cast(dbl(1, 2), integer()), int(1L, 2L)) # These used to be allowed expect_error( vec_cast(chr("1", "2"), integer()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(list(1L, 2L), integer()), class = "vctrs_error_incompatible_type" ) }) test_that("NA casts work as expected", { exp <- int(NA) to <- int() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(int(NA), to), exp) expect_equal(vec_cast(dbl(NA), to), exp) # These used to be allowed expect_error(vec_cast(chr(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(int(NA)) to_mat <- matrix(int()) expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) # These used to be allowed expect_error( vec_cast(mat(chr(NA)), to_mat), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type" ) }) test_that("lossy casts generate error", { expect_lossy(vec_cast(c(2.5, 2), int()), int(2, 2), x = dbl(), to = int()) expect_lossy( vec_cast(c(.Machine$integer.max + 1, 1), int()), int(NA, 1L), x = dbl(), to = int() ) expect_lossy( vec_cast(c(-.Machine$integer.max - 1, 1), int()), int(NA, 1L), x = dbl(), to = int() ) # These used to be allowed expect_error( vec_cast(c("2.5", "2"), int()), class = "vctrs_error_incompatible_type" ) }) test_that("invalid casts generate error", { expect_error( vec_cast(factor("a"), integer()), class = "vctrs_error_incompatible_type" ) }) # Double test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, double()), NULL) expect_equal(vec_cast(lgl(TRUE, FALSE), double()), dbl(1, 0)) expect_equal(vec_cast(int(1, 0), double()), dbl(1, 0)) expect_equal(vec_cast(dbl(1, 1.5), double()), dbl(1, 1.5)) # These used to be allowed expect_error( vec_cast(chr("1", "1.5"), double()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(list(1, 1.5), double()), class = "vctrs_error_incompatible_type" ) }) test_that("NA casts work as expected", { exp <- dbl(NA) to <- dbl() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(int(NA), to), exp) expect_equal(vec_cast(dbl(NA), to), exp) # These used to be allowed expect_error(vec_cast(chr(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(dbl(NA)) to_mat <- matrix(dbl()) expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) # These used to be allowed expect_error( vec_cast(mat(chr(NA)), to_mat), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type" ) }) test_that("invalid casts generate error", { expect_error( vec_cast(factor("a"), double()), class = "vctrs_error_incompatible_type" ) }) # Complex test_that("safe casts to complex works", { expect_identical(vec_cast(NULL, cpl()), NULL) expect_identical(vec_cast(lgl(TRUE, FALSE), cpl()), cpl(1, 0)) expect_identical(vec_cast(int(1, 0), cpl()), cpl(1, 0)) expect_identical(vec_cast(dbl(1, 1.5), cpl()), cpl(1, 1.5)) # This used to be allowed expect_error( vec_cast(list(1, 1.5), cpl()), class = "vctrs_error_incompatible_type" ) }) test_that("NA casts work as expected", { # This goes through a special path for expect_equal(vec_cast(lgl(NA), cpl()), NA_complex_) # TODO: Use our own cast routines here? # It isn't great that this logical `NA` cast returns a different `NA` # than the one above with just `lgl(NA)` (which is seen as unspecified). i.e. # check the `Im()` slot between the two in R >=4.4.0. We can fix this with our # own cast routines rather than using `vec_coerce_bare()`. expect_type(vec_cast(lgl(NA, TRUE), cpl()), "complex") expect_identical(is.na(vec_cast(lgl(NA, TRUE), cpl())), c(TRUE, FALSE)) # TODO: Use our own cast routines here? # `as.complex(NA/NA_real_/NA_integer_)` and `Rf_CoerceVector(NA/NA_real_/NA_integer_)` # have gone back and forth about what they return in the `Im()` slot. In some # R versions they return `0` and in others they return `NA_real_`. # https://stat.ethz.ch/pipermail/r-devel/2023-April/082545.html # https://stat.ethz.ch/pipermail/r-devel/2023-September/082864.html # expect_equal(vec_cast(int(NA), cpl()), NA_complex_) expect_type(vec_cast(int(NA), cpl()), "complex") expect_identical(is.na(vec_cast(int(NA), cpl())), TRUE) # expect_equal(vec_cast(dbl(NA), cpl()), NA_complex_) expect_type(vec_cast(dbl(NA), cpl()), "complex") expect_identical(is.na(vec_cast(dbl(NA), cpl())), TRUE) # This used to be allowed expect_error( vec_cast(list(NA), cpl()), class = "vctrs_error_incompatible_type" ) }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(NA_complex_) to_mat <- matrix(cpl()) # TODO: Use our own cast routines here? # `as.complex(NA/NA_real_/NA_integer_)` and `Rf_CoerceVector(NA/NA_real_/NA_integer_)` # have gone back and forth about what they return in the `Im()` slot. In some # R versions they return `0` and in others they return `NA_real_`. # https://stat.ethz.ch/pipermail/r-devel/2023-April/082545.html # https://stat.ethz.ch/pipermail/r-devel/2023-September/082864.html # expect_equal(vec_cast(mat(lgl(NA)), to_mat), exp_mat) expect_type(vec_cast(mat(lgl(NA)), to_mat), "complex") expect_identical(is.na(vec_cast(mat(lgl(NA)), to_mat)), matrix(TRUE)) # expect_equal(vec_cast(mat(int(NA)), to_mat), exp_mat) expect_type(vec_cast(mat(int(NA)), to_mat), "complex") expect_identical(is.na(vec_cast(mat(int(NA)), to_mat)), matrix(TRUE)) # expect_equal(vec_cast(mat(dbl(NA)), to_mat), exp_mat) expect_type(vec_cast(mat(dbl(NA)), to_mat), "complex") expect_identical(is.na(vec_cast(mat(dbl(NA)), to_mat)), matrix(TRUE)) # This used to be allowed expect_error( vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type" ) }) test_that("complex is coercible to numeric types", { expect_identical(vec_ptype2(cpl(), NULL), cpl()) expect_identical(vec_ptype2(NULL, cpl()), cpl()) expect_identical(vec_ptype2(cpl(), int()), cpl()) expect_identical(vec_ptype2(int(), cpl()), cpl()) expect_identical(vec_ptype2(cpl(), dbl()), cpl()) expect_identical(vec_ptype2(dbl(), cpl()), cpl()) expect_identical(vec_ptype2(cpl(), cpl()), cpl()) expect_identical(vec_c(0, 1i), cpl(0i, 1i)) }) test_that("complex is not coercible to logical", { expect_error( vec_ptype2(cpl(), lgl()), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(lgl(), cpl()), class = "vctrs_error_incompatible_type" ) }) # Character test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, character()), NULL) expect_equal(vec_cast(NA, character()), NA_character_) # These used to be allowed expect_error( vec_cast(lgl(TRUE, FALSE), character()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(list("x", "y"), character()), class = "vctrs_error_incompatible_type" ) }) test_that("NA casts work as expected", { exp <- chr(NA) to <- chr() expect_equal(vec_cast(lgl(NA), to), exp) expect_equal(vec_cast(chr(NA), to), exp) # These used to be allowed expect_error(vec_cast(int(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(dbl(NA), to), class = "vctrs_error_incompatible_type") expect_error(vec_cast(list(NA), to), class = "vctrs_error_incompatible_type") }) test_that("Shaped NA casts work as expected", { mat <- matrix exp_mat <- mat(chr(NA)) to_mat <- matrix(chr()) expect_equal(vec_cast(mat(chr(NA)), to_mat), exp_mat) # These used to be allowed expect_error( vec_cast(mat(lgl(NA)), to_mat), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(mat(int(NA)), to_mat), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(mat(dbl(NA)), to_mat), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(mat(list(NA)), to_mat), class = "vctrs_error_incompatible_type" ) }) test_that("difftime does not get special treatment", { dt1 <- as.difftime(600, units = "secs") # This used to be allowed expect_error( vec_cast(dt1, character()), class = "vctrs_error_incompatible_type" ) }) # Raw test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, raw()), NULL) # This used to be allowed expect_error( vec_cast(list(raw(1)), raw()), class = "vctrs_error_incompatible_type" ) }) test_that("invalid casts generate error", { expect_error( vec_cast(raw(1), double()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(double(1), raw()), class = "vctrs_error_incompatible_type" ) }) test_that("can sort raw", { x <- as.raw(c(3, 1, 2, 4)) expect_identical(vec_order(x), int(2, 3, 1, 4)) expect_identical(x[vec_order(x)], as.raw(1:4)) }) test_that("raw has informative type summaries", { expect_equal(vec_ptype_abbr(raw()), "raw") expect_equal(vec_ptype_full(raw()), "raw") }) test_that("can provide common type with raw", { local_methods( vec_ptype2.raw.vctrs_foobar = function(...) "dispatched-left", vec_ptype2.vctrs_foobar = function(...) NULL, vec_ptype2.vctrs_foobar.raw = function(...) "dispatched-right" ) expect_identical(vec_ptype2(raw(), foobar("")), "dispatched-left") expect_identical(vec_ptype2(foobar(""), raw()), "dispatched-right") }) # Lists test_that("safe casts work as expected", { expect_equal(vec_cast(NULL, list()), NULL) expect_equal(vec_cast(NA, list()), list(NULL)) expect_equal(vec_cast(list(1L, 2L), list()), list(1L, 2L)) # This used to be allowed expect_error(vec_cast(1:2, list()), class = "vctrs_error_incompatible_type") }) test_that("dimensionality matches to", { x1 <- matrix(TRUE, nrow = 1, ncol = 1) x2 <- matrix(1L, nrow = 0, ncol = 2) expect_dim(vec_cast(x1, x2), c(1, 2)) expect_dim(vec_cast(TRUE, x2), c(1, 2)) }) test_that("data frames are cast to list row wise (#639)", { x <- data.frame(x = 1:2, row.names = c("a", "b")) expect <- list(data.frame(x = 1L), data.frame(x = 2L)) # This used to be allowed expect_error(vec_cast(x, list()), class = "vctrs_error_incompatible_type") }) test_that("data frames can be cast to shaped lists", { to <- array(list(), dim = c(0, 2, 1)) x <- data.frame(x = 1:2, y = 3:4) expect <- list(vec_slice(x, 1), vec_slice(x, 2)) expect <- array(expect, dim = c(2, 2, 1)) # This used to be allowed expect_error(vec_cast(x, to), class = "vctrs_error_incompatible_type") }) test_that("Casting atomic `NA` values to list results in a `NULL`", { x <- c(NA, 1) expect <- list(NULL, 1) # This used to be allowed expect_error(vec_cast(x, list()), class = "vctrs_error_incompatible_type") }) test_that("Casting data frame `NA` rows to list results in a `NULL`", { x <- data.frame(x = c(NA, NA, 1), y = c(NA, 1, 2)) expect <- list(NULL, vec_slice(x, 2), vec_slice(x, 3)) # This used to be allowed expect_error(vec_cast(x, list()), class = "vctrs_error_incompatible_type") }) # Unspecified test_that("unspecified can be cast to bare methods", { for (x in vectors[-4]) { expect_identical(vec_cast(unspecified(3), x), vec_init(x, 3)) } }) vctrs/tests/testthat/test-bind.R0000644000176200001440000011410015127057357016442 0ustar liggesuserslocal_name_repair_quiet() # rows -------------------------------------------------------------------- test_that("empty inputs return an empty data frame", { expect_equal(vec_rbind(), data_frame()) expect_equal(vec_rbind(NULL, NULL), data_frame()) }) test_that("vec_rbind(): NULL is idempotent", { df <- data_frame(x = 1) expect_equal(vec_rbind(df, NULL), df) }) test_that("vec_rbind() output is tibble if any input is tibble", { df <- data_frame(x = 1) dt <- tibble::tibble(x = 1) expect_s3_class(vec_rbind(dt), "tbl_df") expect_s3_class(vec_rbind(dt, df), "tbl_df") expect_s3_class(vec_rbind(df, dt), "tbl_df") }) test_that("type of column is common type of individual columns", { x_int <- data_frame(x = 1L) x_dbl <- data_frame(x = 2.5) expect_equal(vec_rbind(x_int, x_int), data_frame(x = c(1L, 1L))) expect_equal(vec_rbind(x_int, x_dbl), data_frame(x = c(1, 2.5))) }) test_that("incompatible columns throws common type error", { x_int <- data_frame(x = 1L) x_chr <- data_frame(x = "a") expect_snapshot({ (expect_error( vec_rbind(x_int, x_chr), class = "vctrs_error_incompatible_type" )) (expect_error( vec_rbind(x_int, x_chr, .error_call = call("foo")), class = "vctrs_error_incompatible_type" )) (expect_error( vec_rbind(x_int, x_chr, .ptype = x_chr, .error_call = call("foo")), class = "vctrs_error_incompatible_type" )) }) }) test_that("result contains union of columns", { expect_named( vec_rbind( data_frame(x = 1), data_frame(y = 1) ), c("x", "y") ) expect_named( vec_rbind( data_frame(y = 1, x = 1), data_frame(y = 1, z = 2) ), c("y", "x", "z") ) }) test_that("all inputs coerced to data frames", { expect_equal( vec_rbind(data_frame(x = 1L), c(x = 1.5)), data_frame(x = c(1, 1.5)) ) }) test_that("names are supplied if needed", { local_name_repair_verbose() expect_snapshot(out <- vec_rbind(data_frame(...1 = 1), 1)) expect_equal(out, data_frame(...1 = c(1, 1))) }) test_that("matrix becomes data frame and has names properly repaired", { x <- matrix(1:4, nrow = 2) expect_equal(vec_rbind(x), data.frame(...1 = 1:2, ...2 = 3:4)) }) test_that("can bind data.frame columns", { df <- data.frame(x = NA, y = 1:2) df$x <- data.frame(a = 1:2) expected <- data.frame(x = NA, y = c(1:2, 1:2)) expected$x <- data.frame(a = c(1:2, 1:2)) expect_equal(vec_rbind(df, df), expected) }) test_that("can rbind unspecified vectors", { expect_identical(vec_rbind(NA), data_frame(...1 = NA)) expect_identical(vec_rbind(NA, NA), data_frame(...1 = lgl(NA, NA))) df <- data.frame(x = 1) expect_identical(vec_rbind(NA, df), data.frame(x = c(NA, 1))) expect_identical(vec_rbind(df, NA), data.frame(x = c(1, NA))) expect_identical(vec_rbind(NA, df, NA), data.frame(x = c(NA, 1, NA))) expect_identical( vec_rbind(c(x = NA), data.frame(x = 1)), data.frame(x = c(NA, 1)) ) expect_identical( vec_rbind(c(y = NA), df), data.frame(y = c(NA, NA), x = c(NA, 1)) ) out <- suppressMessages(vec_rbind(c(x = NA, x = NA), df)) exp <- data.frame(x...1 = c(NA, NA), x...2 = c(NA, NA), x = c(NA, 1)) expect_identical(out, exp) }) test_that("as_df_row() tidies the names of unspecified vectors", { expect_identical(as_df_row(c(NA, NA)), c(NA, NA)) expect_identical(as_df_row(unspecified(2)), unspecified(2)) expect_identical( as_df_row(c(a = NA, a = NA), quiet = TRUE), data.frame(a...1 = NA, a...2 = NA) ) expect_identical( as_df_row(c(a = TRUE, a = TRUE), quiet = TRUE), data.frame(a...1 = TRUE, a...2 = TRUE) ) }) test_that("can rbind spliced lists", { data <- list(c(a = 1, b = 2), c(a = TRUE, b = FALSE)) expect_identical(vec_rbind(!!!data), data_frame(a = c(1, 1), b = c(2, 0))) }) test_that("can rbind list columns", { out <- vec_rbind(data_frame(x = list(1, 2)), data_frame(x = list(3))) expect_identical(out, data_frame(x = list(1, 2, 3))) }) test_that("can rbind lists", { out <- vec_rbind(list(x = 1:2)) expect_identical(out, data_frame(x = list(c(1L, 2L)))) out <- vec_rbind(list(x = 1:2, y = 3L)) expect_identical(out, data_frame(x = list(c(1L, 2L)), y = list(3L))) out <- vec_rbind(list(x = 1, y = 2), list(y = "string")) expect_identical(out, data_frame(x = list(1, NULL), y = list(2, "string"))) }) test_that("can rbind factors", { fctr <- factor(c("a", "b")) expect_equal(vec_rbind(fctr), data_frame(...1 = fctr[1], ...2 = fctr[2])) fctr_named <- set_names(fctr) expect_equal(vec_rbind(fctr_named), data_frame(a = fctr[1], b = fctr[2])) }) test_that("can rbind dates", { date <- new_date(c(0, 1)) expect_equal(vec_rbind(date), data_frame(...1 = date[1], ...2 = date[2])) date_named <- set_names(date, c("a", "b")) expect_equal(vec_rbind(date_named), data_frame(a = date[1], b = date[2])) }) test_that("can rbind POSIXlt objects into POSIXct objects", { datetime <- as.POSIXlt(new_datetime(0)) expect_s3_class(vec_rbind(datetime, datetime)[[1]], "POSIXct") datetime_named <- set_names(datetime, "col") expect_named(vec_rbind(datetime_named, datetime_named), "col") }) test_that("can rbind table objects (#913)", { x <- new_table(1:4, c(2L, 2L)) y <- x colnames <- c("c1", "c2") rownames <- c("r1", "r2", "r3", "r4") dimnames(x) <- list(rownames[1:2], colnames) dimnames(y) <- list(rownames[3:4], colnames) expect <- data.frame(c1 = c(1:2, 1:2), c2 = c(3:4, 3:4), row.names = rownames) expect_identical(vec_rbind(x, y), expect) }) test_that("can rbind missing vectors", { expect_identical(vec_rbind(c(x = na_int)), data_frame(x = na_int)) expect_identical( vec_rbind(c(x = na_int), c(x = na_int)), data_frame(x = int(na_int, na_int)) ) }) test_that("vec_rbind() respects size invariants (#286)", { expect_identical(vec_rbind(), new_data_frame(n = 0L)) expect_identical(vec_rbind(int(), int()), new_data_frame(n = 2L)) expect_identical( vec_rbind(c(x = int()), c(x = TRUE)), new_data_frame(list(x = lgl(NA, TRUE))) ) expect_identical( vec_rbind(int(), new_data_frame(n = 2L), int()), new_data_frame(n = 4L) ) }) test_that("can repair names in `vec_rbind()` (#229)", { expect_snapshot({ (expect_error(vec_rbind(.name_repair = "none"), "can't be `\"none\"`")) (expect_error( vec_rbind(.name_repair = "minimal"), "can't be `\"minimal\"`" )) (expect_error( vec_rbind(list(a = 1, a = 2), .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique" )) }) expect_named( vec_rbind(list(a = 1, a = 2), .name_repair = "unique"), c("a...1", "a...2") ) expect_named(vec_rbind(list(`_` = 1)), "_") expect_named(vec_rbind(list(`_` = 1), .name_repair = "universal"), c("._")) expect_named( vec_rbind(list(a = 1, a = 2), .name_repair = ~ toupper(.)), c("A", "A") ) }) test_that("can repair names quietly", { local_name_repair_verbose() expect_snapshot({ res_unique <- vec_rbind( c(x = 1, x = 2), c(x = 3, x = 4), .name_repair = "unique_quiet" ) res_universal <- vec_rbind( c("if" = 1, "in" = 2), c("if" = 3, "for" = 4), .name_repair = "universal_quiet" ) }) expect_named(res_unique, c("x...1", "x...2")) expect_named(res_universal, c(".if", ".in", ".for")) }) test_that("can construct an id column", { df <- data.frame(x = 1) expect_named(vec_rbind(df, df, .names_to = "id"), c("id", "x")) expect_equal(vec_rbind(df, df, .names_to = "id")$id, c(1L, 2L)) expect_equal(vec_rbind(a = df, b = df, .names_to = "id")$id, c("a", "b")) expect_equal(vec_rbind(a = df, df, .names_to = "id")$id, c("a", "")) }) test_that("vec_rbind() fails with arrays of dimensionality > 3", { expect_snapshot({ (expect_error(vec_rbind(array(NA, c(1, 1, 1))))) (expect_error(vec_rbind(array(NA, c(1, 1, 1)), .error_call = call("foo")))) }) }) test_that("row names are preserved by vec_rbind()", { df1 <- mtcars[1:3, ] df2 <- mtcars[4:5, ] expect_identical(vec_rbind(df1, df2), mtcars[1:5, ]) row.names(df2) <- NULL out <- mtcars[1:5, ] row.names(out) <- c(row.names(df1), "...4", "...5") expect_identical(vec_rbind(df1, df2), out) }) test_that("can assign row names in vec_rbind()", { df1 <- mtcars[1:3, ] df2 <- mtcars[4:5, ] expect_snapshot({ (expect_error( vec_rbind( foo = df1, df2, .names_to = NULL ), "specification" )) }) # Combination out <- vec_rbind( foo = df1, df2, .names_to = NULL, .name_spec = "{outer}_{inner}" ) exp <- mtcars[1:5, ] row.names(exp) <- c(paste0("foo_", row.names(df1)), row.names(df2)) expect_identical(out, exp) out <- vec_rbind(foo = df1, df2, .names_to = "id") exp <- mtcars[1:5, ] exp <- vec_cbind(id = c(rep("foo", 3), rep("", 2)), exp) expect_identical(out, exp) # Sequence out <- vec_rbind( foo = unrownames(df1), df2, bar = unrownames(mtcars[6, ]), .names_to = NULL, .name_spec = "{outer}_{inner}" ) exp <- mtcars[1:6, ] row.names(exp) <- c(paste0("foo_", 1:3), row.names(df2), "bar") expect_identical(out, exp) out <- vec_rbind( foo = unrownames(df1), df2, bar = unrownames(mtcars[6, ]), .names_to = "id" ) exp <- mtcars[1:6, ] exp <- vec_cbind(id = c(rep("foo", 3), rep("", 2), "bar"), exp) row.names(exp) <- c(paste0("...", 1:3), row.names(df2), "...6") expect_identical(out, exp) }) test_that("vec_rbind() takes the proxy and restores", { df <- foobar(data.frame(x = 1)) # This data frame subclass has an identity proxy and the restore # method falls back to a bare data frame if `$x` has any missing values. # In `vec_rbind()`, the `vec_init()` call will create a bare data frame, # but at the end it is `vec_restore()`d to the right class. local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { x }, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) { x }, vec_proxy.vctrs_foobar = function(x, ...) { x }, vec_restore.vctrs_foobar = function(x, to, ...) { if (any(is.na(x$x))) { new_data_frame(x) } else { vec_restore_default(x, to) } } ) expect_identical( vec_rbind(df, df), foobar(data.frame(x = c(1, 1))) ) }) test_that("vec_rbind() proxies before initializing", { df <- foobar(data.frame(x = 1)) # This data frame subclass doesn't allow `NA`s in columns. # If initialization happened before proxying, it would try to # create `NA` rows with `vec_init()`. local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { x }, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) { x }, vec_proxy.vctrs_foobar = function(x, ...) { new_data_frame(x) }, vec_restore.vctrs_foobar = function(x, to, ...) { if (any(is.na(x$x))) { abort("`x` can't have NA values.") } vec_restore_default(x, to) } ) expect_identical( vec_rbind(df, df), foobar(data.frame(x = c(1, 1))) ) }) test_that("vec_rbind() requires a data frame proxy for data frame ptypes", { df <- foobar(data.frame(x = 1)) local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) x, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) x, vec_proxy.vctrs_foobar = function(x, ...) 1 ) expect_error( vec_rbind(df, df), "Can't fill a data frame that doesn't have a data frame proxy" ) }) test_that("names of `...` are used for type and cast errors even when zapped", { xs <- list(a = data_frame(x = 1), b = data_frame(x = "2")) expect_snapshot(error = TRUE, { vec_rbind(!!!xs) }) expect_snapshot(error = TRUE, { vec_rbind(!!!xs, .ptype = data_frame(x = double())) }) }) test_that("monitoring: name repair while rbinding doesn't modify in place", { df <- new_data_frame(list(x = 1, x = 1)) expect <- new_data_frame(list(x = 1, x = 1)) # Name repair occurs expect_named(vec_rbind(df), c("x...1", "x...2")) # No changes to `df` expect_identical(df, expect) }) test_that("performance: Row binding with S3 columns doesn't duplicate on every assignment (#1151)", { skip_if_not_testing_performance() x <- as.Date("2000-01-01") x <- rep(x, 100) df <- data.frame(x = x) lst <- rep_len(list(df), 10000) expect_time_lt(vec_rbind(!!!lst), 5) }) test_that("performance: Row binding with df-cols doesn't duplicate on every assignment (#1122)", { skip_if_not_testing_performance() df_col <- new_data_frame(list(x = 1:1000)) df <- new_data_frame(list(y = df_col)) lst <- rep_len(list(df), 10000) expect_time_lt(vec_rbind(!!!lst), 5) }) # cols -------------------------------------------------------------------- test_that("vec_cbind() reports error context", { expect_snapshot({ (expect_error(vec_cbind(foobar(list())))) (expect_error(vec_cbind(foobar(list()), .error_call = call("foo")))) (expect_error(vec_cbind(a = 1:2, b = int()))) (expect_error(vec_cbind(a = 1:2, b = int(), .error_call = call("foo")))) }) }) test_that("empty inputs give data frame", { expect_equal(vec_cbind(), data_frame()) expect_equal(vec_cbind(NULL), data_frame()) expect_equal(vec_cbind(data.frame(a = 1), NULL), data_frame(a = 1)) }) test_that("number of rows is preserved with zero column data frames (#1281)", { df <- new_data_frame(n = 2L) expect_size(vec_cbind(df, df), 2L) }) test_that("vec_cbind(): NULL is idempotent", { df <- data_frame(x = 1) expect_equal(vec_cbind(df, NULL), df) }) test_that("outer names are respected", { expect_named(vec_cbind(x = 1, y = 4), c("x", "y")) expect_named(vec_cbind(a = data.frame(x = 1)), "a") }) test_that("inner names are respected", { expect_named(vec_cbind(data_frame(x = 1), data_frame(y = 1)), c("x", "y")) }) test_that("nameless vectors get tidy defaults", { expect_named(vec_cbind(1:2, 1), c("...1", "...2")) }) test_that("matrix becomes data frame", { x <- matrix(1:4, nrow = 2) expect_equal(vec_cbind(x), data.frame(...1 = 1:2, ...2 = 3:4)) # Packed if named expect_equal(vec_cbind(x = x), data_frame(x = x)) }) test_that("duplicate names are de-deduplicated", { local_name_repair_verbose() expect_snapshot({ (expect_named(vec_cbind(x = 1, x = 1), c("x...1", "x...2"))) (expect_named( vec_cbind(data.frame(x = 1), data.frame(x = 1)), c("x...1", "x...2") )) }) }) test_that("rows recycled to longest", { df <- data.frame(x = 1:3) expect_dim(vec_cbind(df), c(3, 1)) expect_dim(vec_cbind(df, NULL), c(3, 1)) expect_dim(vec_cbind(df, y = 1), c(3, 2)) expect_dim(vec_cbind(data.frame(x = 1), y = 1:3), c(3, 2)) expect_dim( vec_cbind( data.frame(a = 1, b = 2), y = 1:3 ), c(3, 3) ) }) test_that("vec_cbind() output is tibble if any input is tibble", { df <- data.frame(x = 1) dt <- tibble::tibble(y = 2) expect_s3_class(vec_cbind(dt), "tbl_df") expect_s3_class(vec_cbind(df, dt), "tbl_df") expect_s3_class(vec_cbind(dt, df), "tbl_df") }) test_that("can override default .nrow", { expect_dim(vec_cbind(x = 1, .size = 3), c(3, 1)) }) test_that("can repair names in `vec_cbind()` (#227)", { expect_snapshot({ (expect_error( vec_cbind(a = 1, a = 2, .name_repair = "none"), "can't be `\"none\"`" )) (expect_error( vec_cbind(a = 1, a = 2, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique" )) }) expect_named( vec_cbind(a = 1, a = 2, .name_repair = "unique"), c("a...1", "a...2") ) expect_named(vec_cbind(`_` = 1, .name_repair = "universal"), "._") expect_named(vec_cbind(a = 1, a = 2, .name_repair = "minimal"), c("a", "a")) expect_named(vec_cbind(a = 1, a = 2, .name_repair = toupper), c("A", "A")) }) test_that("can repair names quietly", { local_name_repair_verbose() expect_snapshot({ res_unique <- vec_cbind(x = 1, x = 2, .name_repair = "unique_quiet") res_universal <- vec_cbind( "if" = 1, "in" = 2, .name_repair = "universal_quiet" ) }) expect_named(res_unique, c("x...1", "x...2")) expect_named(res_universal, c(".if", ".in")) }) test_that("can supply `.names_to` to `vec_rbind()` (#229)", { expect_snapshot({ (expect_error(vec_rbind(data_frame(), .names_to = letters))) (expect_error(vec_rbind(data_frame(), .names_to = 10))) (expect_error(vec_rbind( data_frame(), .names_to = letters, .error_call = call("foo") ))) }) x <- data_frame(foo = 1:2, bar = 3:4) y <- data_frame(foo = 5L, bar = 6L) expect_identical( vec_rbind(a = x, b = y, .names_to = "quux"), data_frame( quux = c("a", "a", "b"), foo = c(1L, 2L, 5L), bar = c(3L, 4L, 6L) ) ) expect_identical( vec_rbind(a = x, b = y, .names_to = "foo"), data_frame(foo = c("a", "a", "b"), bar = c(3L, 4L, 6L)) ) # No names expect_identical( vec_rbind(x, y, .names_to = "quux"), data_frame(quux = c(1L, 1L, 2L), foo = c(1L, 2L, 5L), bar = c(3L, 4L, 6L)) ) expect_identical( vec_rbind(x, y, .names_to = "foo"), data_frame(foo = c(1L, 1L, 2L), bar = c(3L, 4L, 6L)) ) # Partial names expect_identical(vec_rbind(x, b = y, .names_to = "quux")$quux, c("", "", "b")) }) test_that("can supply existing `.names_to`", { x <- data.frame(a = 1, id = TRUE) expect_identical( vec_rbind(foo = x, bar = c(a = 2), .names_to = "id"), data_frame(a = c(1, 2), id = c("foo", "bar")) ) y <- data.frame(id = TRUE, a = 1) expect_identical( vec_rbind(foo = y, bar = c(a = 2), .names_to = "id"), data_frame(id = c("foo", "bar"), a = c(1, 2)) ) }) test_that("vec_cbind() returns visibly (#452)", { # Shouldn't be needed once `check_unique` is implemented in C expect_visible(vctrs::vec_cbind(x = 1, .name_repair = "check_unique")) }) test_that("vec_cbind() packs named data frames (#446)", { expect_identical(vec_cbind(data_frame(y = 1:3)), data_frame(y = 1:3)) expect_identical( vec_cbind(x = data_frame(y = 1:3)), data_frame(x = data_frame(y = 1:3)) ) }) test_that("vec_cbind() packs 1d arrays", { a <- array(1:2) expect_identical(vec_cbind(a), data_frame(...1 = 1:2)) expect_identical(vec_cbind(x = a), data_frame(x = a)) }) test_that("vec_cbind() packs named matrices", { m <- matrix(1:4, 2) expect_identical(vec_cbind(m), data_frame(...1 = 1:2, ...2 = 3:4)) expect_identical(vec_cbind(x = m), data_frame(x = m)) }) test_that("vec_cbind() never packs named vectors", { expect_identical(vec_cbind(1:2), data_frame(...1 = 1:2)) expect_identical(vec_cbind(x = 1:2), data_frame(x = 1:2)) }) test_that("names are repaired late if unpacked", { df <- data_frame(b = 2, b = 3, .name_repair = "minimal") out1 <- vec_cbind(a = 1, df) out2 <- vec_cbind(a = 1, as.matrix(df)) out3 <- vec_cbind(a = 1, matrix(1:2, nrow = 1)) expect_named(out1, c("a", "b...2", "b...3")) expect_named(out2, c("a", "b...2", "b...3")) expect_named(out3, c("a", "...2", "...3")) }) test_that("names are not repaired if packed", { df <- data_frame(b = 2, b = 3, .name_repair = "minimal") out1 <- vec_cbind(a = 1, packed = df) out2 <- vec_cbind(a = 1, packed = as.matrix(df)) out3 <- vec_cbind(a = 1, packed = matrix(1:2, nrow = 1)) expect_named(out1, c("a", "packed")) expect_named(out2, c("a", "packed")) expect_named(out3, c("a", "packed")) expect_named(out1$packed, c("b", "b")) expect_identical(colnames(out2$packed), c("b", "b")) expect_identical(colnames(out3$packed), NULL) }) test_that("vec_cbind() fails with arrays of dimensionality > 3", { a <- array(NA, c(1, 1, 1)) expect_snapshot({ (expect_error(vec_cbind(a))) (expect_error(vec_cbind(a, .error_call = call("foo")))) (expect_error(vec_cbind(x = a))) }) }) test_that("monitoring: name repair while cbinding doesn't modify in place", { df <- new_data_frame(list(x = 1, x = 1)) expect <- new_data_frame(list(x = 1, x = 1)) # Name repair occurs expect_named(vec_cbind(df), c("x...1", "x...2")) # No changes to `df` expect_identical(df, expect) }) test_that("vec_rbind() consistently handles unnamed outputs", { # Name repair of columns is a little weird but unclear we can do better expect_identical( vec_rbind(1, 2, .names_to = NULL), data.frame(...1 = c(1, 2)) ) expect_identical( vec_rbind(1, 2, ...10 = 3, .names_to = NULL), data.frame(...1 = c(1, 2, 3), row.names = c("...1", "...2", "...3")) ) expect_identical( vec_rbind(a = 1, b = 2, .names_to = NULL), data.frame(...1 = c(1, 2), row.names = c("a", "b")) ) expect_identical( vec_rbind(c(a = 1), c(b = 2), .names_to = NULL), data.frame(a = c(1, NA), b = c(NA, 2)) ) }) test_that("vec_rbind() ignores named inputs by default (#966)", { expect_identical( vec_rbind(foo = c(a = 1)), data.frame(a = 1) ) expect_identical( vec_rbind(foo = c(a = 1), .names_to = NULL), data.frame(a = 1, row.names = "foo") ) }) test_that("vec_cbind() consistently handles unnamed outputs", { expect_identical( vec_cbind(1, 2), data.frame(...1 = 1, ...2 = 2) ) expect_identical( vec_cbind(1, 2, ...10 = 3), data.frame(...1 = 1, ...2 = 2, ...3 = 3) ) expect_identical( vec_cbind(a = 1, b = 2), data.frame(a = 1, b = 2) ) expect_identical( vec_cbind(c(a = 1), c(b = 2)), new_data_frame(list(...1 = c(a = 1), ...2 = c(b = 2))) ) }) test_that("vec_rbind() name repair messages are useful", { local_name_repair_verbose() expect_snapshot({ vec_rbind(1, 2) vec_rbind(1, 2, .names_to = NULL) vec_rbind(1, 2, ...10 = 3) vec_rbind(1, 2, ...10 = 3, .names_to = NULL) vec_rbind(a = 1, b = 2) vec_rbind(a = 1, b = 2, .names_to = NULL) vec_rbind(c(a = 1), c(b = 2)) vec_rbind(c(a = 1), c(b = 2), .names_to = NULL) }) }) test_that("vec_rbind() is silent when assigning duplicate row names of df-cols", { df <- new_data_frame(list(x = mtcars[1:3, 1, drop = FALSE])) expect_snapshot(vec_rbind(df, df)) expect_snapshot(vec_rbind(mtcars[1:4, ], mtcars[1:3, ])) }) test_that("vec_cbind() name repair messages are useful", { local_name_repair_verbose() expect_snapshot({ vec_cbind(1, 2) vec_cbind(1, 2, ...10 = 3) vec_cbind(a = 1, b = 2) vec_cbind(c(a = 1), c(b = 2)) }) }) test_that("cbind() deals with row names", { expect_identical( vec_cbind(mtcars[1:3], foo = 1), cbind(mtcars[1:3], foo = 1) ) expect_identical( vec_cbind(mtcars[1:3], mtcars[4]), cbind(mtcars[1:3], mtcars[4]) ) out <- vec_cbind( mtcars[1, 1, drop = FALSE], unrownames(mtcars[1:3, 2, drop = FALSE]) ) exp <- mtcars[1:3, c(1, 2)] exp[[1]] <- exp[[1, 1]] row.names(exp) <- paste0(c("Mazda RX4..."), 1:3) expect_identical(out, exp) }) test_that("prefer row names of first named input (#1058)", { df0 <- unrownames(mtcars[1:5, 1:3]) df1 <- mtcars[1:5, 4:6] df2 <- mtcars[5:1, 7:9] expect_identical( row.names(vec_cbind(df0, df1, df2)), row.names(df1) ) expect_identical( row.names(vec_cbind(df0, df2, df1)), row.names(df2) ) }) test_that("can rbind data frames with matrix columns (#625)", { df <- tibble(x = 1:2, y = matrix(1:4, nrow = 2)) expect_identical(vec_rbind(df, df), vec_slice(df, c(1, 2, 1, 2))) }) test_that("rbind repairs names of data frames (#704)", { df <- data_frame(x = 1, x = 2, .name_repair = "minimal") df_repaired <- data_frame(x...1 = 1, x...2 = 2) expect_identical(vec_rbind(df), df_repaired) expect_identical(vec_rbind(df, df), vec_rbind(df_repaired, df_repaired)) expect_snapshot({ (expect_error( vec_rbind(df, df, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique" )) (expect_error( vec_rbind( df, df, .name_repair = "check_unique", .error_call = call("foo") ), class = "vctrs_error_names_must_be_unique" )) }) }) test_that("vec_rbind() works with simple homogeneous foreign S3 classes", { expect_identical( vec_rbind(set_names(foobar(1), "x"), set_names(foobar(2), "x")), data_frame(x = foobar(c(1, 2))) ) }) test_that("vec_rbind() works with simple homogeneous foreign S4 classes", { joe1 <- .Counts(1L, name = "Joe") joe2 <- .Counts(2L, name = "Joe") expect_identical( vec_rbind(set_names(joe1, "x"), set_names(joe2, "x")), data_frame(x = .Counts(1:2, name = "Joe")) ) }) test_that("vec_rbind() fails with complex foreign S3 classes", { expect_snapshot({ x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error( vec_rbind(set_names(x, "x"), set_names(y, "x")), class = "vctrs_error_incompatible_type" )) }) }) test_that("vec_rbind() fails with complex foreign S4 classes", { expect_snapshot({ joe <- .Counts(1L, name = "Joe") jane <- .Counts(2L, name = "Jane") (expect_error( vec_rbind(set_names(joe, "x"), set_names(jane, "y")), class = "vctrs_error_incompatible_type" )) }) }) test_that("vec_rbind() falls back to c() if S3 method is available", { x <- foobar(1, foo = 1) y <- foobar(2, bar = 2) x_df <- data_frame(x = x) y_df <- data_frame(x = y) expect_error(vec_rbind(x_df, y_df), class = "vctrs_error_incompatible_type") expect_error(vec_c(x_df, y_df), class = "vctrs_error_incompatible_type") expect_error( list_unchop(list(x_df, y_df), indices = list(1, 2)), class = "vctrs_error_incompatible_type" ) with_c_method <- function(expr) { with_methods( c.vctrs_foobar = function(...) quux(NextMethod()), expr ) } out <- with_c_method(vec_rbind(x_df, y_df)) exp <- data_frame(x = quux(c(1, 2))) expect_identical(out, exp) expect_identical(with_c_method(vec_c(x_df, y_df)), exp) expect_identical( with_c_method(list_unchop(list(x_df, y_df), indices = list(1, 2))), exp ) # Fallback is used with data frame subclasses, with or without # ptype2 method foo_df <- foobaz(x_df) bar_df <- foobaz(y_df) out <- with_c_method(vec_rbind(foo_df, bar_df)) exp <- foobaz(data_frame(x = quux(c(1, 2)))) expect_identical(out, exp) expect_identical(with_c_method(vec_c(foo_df, bar_df)), exp) expect_identical( with_c_method(list_unchop(list(foo_df, bar_df), indices = list(1, 2))), exp ) with_hybrid_methods <- function(expr, cast = TRUE) { methods <- list( c.vctrs_foobar = function(...) quux(NextMethod()), vec_ptype2.vctrs_foobaz.vctrs_foobaz = function(...) { foobaz(df_ptype2(...)) }, vec_cast.vctrs_foobaz.vctrs_foobaz = if (cast) { function(...) foobaz(df_cast(...)) } ) with_methods(expr, !!!compact(methods)) } expect_equal( with_hybrid_methods( cast = FALSE, vec_rbind(foo_df, bar_df) ), foobaz(data_frame(x = quux(c(1, 2)))) ) # Falls back to data frame because there is no ptype2/cast methods out <- with_hybrid_methods(vec_rbind(foo_df, bar_df)) exp <- foobaz(data_frame(x = quux(c(1, 2)))) expect_identical(out, exp) expect_identical(with_hybrid_methods(vec_c(foo_df, bar_df)), exp) expect_identical( with_hybrid_methods(list_unchop( list(foo_df, bar_df), indices = list(1, 2) )), exp ) wrapper_x_df <- data_frame(x = x_df) wrapper_y_df <- data_frame(x = y_df) out <- with_c_method(vec_rbind(wrapper_x_df, wrapper_y_df)) exp <- data_frame(x = data_frame(x = quux(c(1, 2)))) expect_identical(out, exp) expect_identical(with_c_method(vec_c(wrapper_x_df, wrapper_y_df)), exp) expect_identical( with_c_method(list_unchop( list(wrapper_x_df, wrapper_y_df), indices = list(1, 2) )), exp ) }) test_that("c() fallback works with unspecified columns", { local_methods( c.vctrs_foobar = function(...) foobar(NextMethod()), `[.vctrs_foobar` = function(x, i, ...) { foobar(NextMethod(), dispatched = TRUE) } ) out <- vec_rbind( data_frame(x = foobar(1)), data_frame(y = foobar(2)) ) expect_identical( out, data_frame( x = foobar(c(1, NA), dispatched = TRUE), y = foobar(c(NA, 2), dispatched = TRUE) ) ) }) test_that("c() fallback works with vctrs-powered data frame subclass", { local_methods( c.vctrs_quux = function(...) quux(vec_paste0(NextMethod(), "-c")), `[.vctrs_quux` = function(x, i, ...) quux(vec_paste0(NextMethod(), "-[")) ) local_foobar_df_methods() ### Joint case df1 <- foobar(data_frame(x = quux(1:3))) df2 <- data_frame(x = quux(4:5)) out <- vctrs::vec_rbind(df1, df2) exp <- foobar(data_frame(x = quux(paste0(1:5, "-c")))) expect_identical(out, exp) out <- vctrs::vec_rbind(df2, df1) exp <- foobar(data_frame(x = quux(paste0(c(4:5, 1:3), "-c")))) expect_identical(out, exp) ### Disjoint case df1 <- foobar(data_frame(x = quux(1:3))) df2 <- data.frame(y = 4:5) out <- vctrs::vec_rbind(df1, df2) exp <- foobar(data_frame( x = quux(c(paste0(1:3, "-c-["), paste0(c(NA, NA), "-["))), y = c(rep(NA, 3), 4:5) )) expect_identical(out, exp) out <- vctrs::vec_rbind(df2, df1) exp <- foobar(data_frame( y = c(4:5, rep(NA, 3)), x = quux(c(paste0(c(NA, NA), "-["), paste0(1:3, "-c-["))) )) expect_identical(out, exp) }) test_that("vec_rbind() falls back to c() if S3 method is available for S4 class", { joe <- data_frame(x = .Counts(c(1L, 2L), name = "Joe")) jane <- data_frame(x = .Counts(3L, name = "Jane")) expect_error(vec_rbind(joe, jane), class = "vctrs_error_incompatible_type") out <- with_methods( c.vctrs_Counts = function(...) .Counts(NextMethod(), name = "dispatched"), vec_rbind(joe, jane) ) expect_identical(out$x, .Counts(1:3, name = "dispatched")) }) test_that("rbind supports names and inner names (#689)", { out <- vec_rbind( data_frame(x = list(a = 1, b = 2)), data_frame(x = list(3)), data_frame(x = list(d = 4)) ) expect_identical(out$x, list(a = 1, b = 2, 3, d = 4)) vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) oo_x <- set_names( as.POSIXlt(c("2020-01-01", "2020-01-02", "2020-01-03")), letters[1:3] ) oo_y <- c(FOO = as.POSIXlt(c("2020-01-04"))) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "d") mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("d")) nested_x <- new_data_frame( list(df = df_x, mat = mat_x, vec = vec_x, oo = oo_x), row.names = c("foo", "bar", "baz") ) nested_y <- new_data_frame( list(df = df_y, mat = mat_y, vec = vec_y, oo = oo_y), row.names = c("quux") ) nested_out <- vec_rbind(nested_x, nested_y) expect_identical(row.names(nested_out), c("foo", "bar", "baz", "quux")) expect_identical(row.names(nested_out$df), c("a", "b", "c", "d")) expect_identical(row.names(nested_out$mat), c("a", "b", "c", "d")) expect_identical(names(nested_out$vec), c("a", "b", "c", "FOO")) expect_identical(names(nested_out$oo), c("a", "b", "c", "FOO")) }) test_that("vec_rbind() doesn't fall back to c() with proxied classes (#1119)", { foobar_rcrd <- function(x, y) { new_rcrd(list(x = x, y = y), class = "vctrs_foobar") } x <- foobar_rcrd(x = 1:2, y = 3:4) out <- vec_rbind(x, x) exp <- data_frame( ...1 = foobar_rcrd(x = c(1L, 1L), y = c(3L, 3L)), ...2 = foobar_rcrd(x = c(2L, 2L), y = c(4L, 4L)) ) expect_identical(out, exp) out <- vec_rbind(data_frame(x = x), data_frame(x = x)) exp <- data_frame( x = foobar_rcrd(x = c(1L, 2L, 1L, 2L), y = c(3L, 4L, 3L, 4L)) ) expect_identical(out, exp) }) test_that("vec_rbind() fallback works with tibbles", { x <- foobar("foo") df <- data_frame(x = x) tib <- tibble(x = x) local_methods(c.vctrs_foobar = function(...) quux(NextMethod())) exp <- tibble(x = quux(c("foo", "foo"))) expect_identical(vec_rbind(tib, tib), exp) expect_identical(vec_rbind(df, tib), exp) expect_identical(vec_rbind(tib, df), exp) }) test_that("vec_rbind() zaps names when name-spec is zap() and names-to is NULL", { expect_identical( vec_rbind(foo = c(x = 1), .names_to = NULL, .name_spec = zap()), data.frame(x = 1) ) }) test_that("can zap names even when `.names_to` is supplied", { expect_identical( vec_rbind(foo = c(x = 1), .names_to = zap(), .name_spec = zap()), data.frame(x = 1) ) expect_identical( vec_rbind( foo = data.frame(x = 1, row.names = "row"), .names_to = zap(), .name_spec = zap() ), data.frame(x = 1) ) # We previously didn't allow `.name_spec = zap()` when `.names_to = "id"`, # but this does have a use case - zapping inner row names while also moving # outer names into a new column expect_identical( vec_rbind(foo = c(x = 1), .names_to = "id", .name_spec = zap()), data.frame(id = "foo", x = 1) ) expect_identical( vec_rbind( foo = data.frame(x = 1, row.names = "row"), .names_to = "id", .name_spec = zap() ), data.frame(id = "foo", x = 1) ) }) test_that("can request 'inner' names when `.names_to` is supplied", { # Note how it can be useful to lock `.name_spec` to `"inner"` in your API, but # still expose `.names_to` to your users and allow all of its options. # `purrr::list_rbind()` does this. expect_identical( vec_rbind( foo = data.frame(x = 1, row.names = "row"), .names_to = zap(), .name_spec = "inner" ), data.frame(x = 1, row.names = "row") ) expect_identical( vec_rbind( foo = data.frame(x = 1, row.names = "row"), .names_to = "id", .name_spec = "inner" ), data.frame(id = "foo", x = 1, row.names = "row") ) expect_identical( vec_rbind( foo = data.frame(x = 1, row.names = "row"), .names_to = NULL, .name_spec = "inner" ), data.frame(x = 1, row.names = "row") ) }) test_that("can zap outer names from a name-spec (#1215)", { zap_outer_spec <- function(outer, inner) if (is_character(inner)) inner df <- data.frame(x = 1:2) df_named <- data.frame(x = 3L, row.names = "foo") expect_null( vec_names(vec_rbind(a = df, .names_to = NULL, .name_spec = zap_outer_spec)) ) expect_identical( vec_names(vec_rbind(a = df, df_named, .name_spec = zap_outer_spec)), c("...1", "...2", "foo") ) # These days it is more efficient to use a name-spec of "inner" (#1988) expect_identical( vec_rbind(a = df, .names_to = NULL, .name_spec = zap_outer_spec), vec_rbind(a = df, .names_to = NULL, .name_spec = "inner") ) expect_identical( vec_rbind(a = df, df_named, .name_spec = zap_outer_spec), vec_rbind(a = df, df_named, .name_spec = "inner") ) }) test_that("column names are treated consistently in vec_rbind()", { exp <- data.frame(a = c(1L, 1L), b = c(2L, 2L)) x <- c(a = 1L, b = 2L) expect_identical(vec_rbind(x, x), exp) x <- array(1:2, dimnames = list(c("a", "b"))) expect_identical(vec_rbind(x, x), exp) x <- matrix(1:2, nrow = 1, dimnames = list(NULL, c("a", "b"))) expect_identical(vec_rbind(x, x), exp) x <- array(1:6, c(1, 2, 1), dimnames = list(NULL, c("a", "b"), NULL)) expect_error(vec_rbind(x, x), "Can't bind arrays") }) test_that("can repair names of row-binded vectors (#1567)", { local_name_repair_verbose() expect_silent( expect_named( vec_rbind( x = 1:3, y = 4:6, .name_repair = function(x) c("a", "a", "a") ), c("a", "a", "a") ) ) }) test_that("can repair names of row-binded matrices", { local_name_repair_verbose() expect_silent({ expect_named( vec_rbind( x = matrix(1:3, 1), y = matrix(4:6, 1), .name_repair = function(x) c("a", "a", "a") ), c("a", "a", "a") ) expect_named( vec_rbind( x = matrix(1:3, 1), y = 4:6, .name_repair = function(x) c("a", "a", "a") ), c("a", "a", "a") ) }) }) test_that("vec_rbind() only restores one time", { restored <- list() local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) x, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) x, vec_proxy.vctrs_foobar = function(x, ...) x, vec_restore.vctrs_foobar = function(x, to, ...) { # Ignore proxying and restoration of ptypes if (length(x)) { restored <<- c(restored, list(x)) } foobar(x) } ) df <- data_frame(x = foobar(1:3)) vec_rbind(df, df) expect_equal( restored, list( rep(na_int, 6), # From `vec_init()` foobar(c(1:3, 1:3)) # Final restoration ) ) }) test_that("vec_rbind() applies `base::c()` fallback to df-cols (#1462, #1640)", { x <- structure(1, class = "myclass") df <- tibble(a = tibble(x = x)) df <- vec_rbind(df, df) expect_equal(df$a$x, structure(c(1, 1), class = "myclass")) }) # Golden tests ------------------------------------------------------- test_that("row-binding performs expected allocations", { vec_rbind_list <- function(x) { vec_rbind(!!!x) } expect_snapshot({ ints <- rep(list(1L), 1e2) named_ints <- rep(list(set_names(1:3, letters[1:3])), 1e2) "Integers as rows" suppressMessages(with_memory_prof(vec_rbind_list(ints))) suppressMessages(with_memory_prof(vec_rbind_list(named_ints))) "Data frame with named columns" df <- data_frame( x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c("A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb"))) ) dfs <- rep(list(df), 1e2) with_memory_prof(vec_rbind_list(dfs)) "Data frame with rownames (non-repaired, non-recursive case)" df <- data_frame(x = 1:2) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) "Data frame with rownames (repaired, non-recursive case)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) "Data frame with rownames (non-repaired, recursive case) (#1217)" df <- data_frame( x = 1:2, y = data_frame(x = 1:2) ) dfs <- rep(list(df), 1e2) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) "Data frame with rownames (repaired, recursive case) (#1217)" dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) }) }) test_that("`.names_to` is assigned after restoration (#1648)", { df <- data_frame(x = factor("foo")) expect_equal( vec_rbind(name = df, .names_to = "x"), data_frame(x = "name") ) # This used to fail with: #> Error in `vctrs::vec_rbind()`: #> ! adding class "factor" to an invalid object }) vctrs/tests/testthat/test-type-sclr.R0000644000176200001440000000536015065005761017450 0ustar liggesuserstest_that("sclr is a named list", { x <- new_sclr(x = 1, y = 2) expect_type(x, "list") expect_s3_class(x, "vctrs_sclr") expect_named(x, c("x", "y")) }) test_that("scalar must have unique names", { expect_error(new_sclr(x = 1, x = 2), "not TRUE") }) test_that("can get and set existing fields", { x <- new_sclr(x = 1, y = 2) x$x <- 3 expect_equal(x$x, 3) x[["y"]] <- 4 expect_equal(x[["y"]], 4) expect_error(x$z, "Invalid index") expect_error(x$z <- 1, "Invalid index") }) test_that("as.list strips attributes apart from names", { x <- new_sclr(x = 1, y = 2) y <- as.list(x) expect_type(y, "list") expect_equal(attributes(y), list(names = names(x))) }) test_that("as.data.frame works", { # #167: Not sure if this is the correct behavior. x <- new_sclr(x = 1, y = 2) expect_equal( as.data.frame(x, nm = "a"), new_data_frame(list(a = list(x))) ) }) test_that("putting in a data frame makes a list-col", { x <- new_sclr(x = 1, y = 2) df <- data.frame(x) expect_s3_class(df, "data.frame") expect_equal(df$x, list(x)) }) test_that("vector operations are unsupported", { x <- new_sclr(x = 1, y = 2) expect_error(x["a"], class = "vctrs_error_unsupported") expect_error(x["a"] <- 1, class = "vctrs_error_unsupported") expect_error(names(x) <- "x", class = "vctrs_error_unsupported") expect_error(dim(x) <- 1, class = "vctrs_error_unsupported") expect_error(dimnames(x) <- 1, class = "vctrs_error_unsupported") expect_error(levels(x) <- 1, class = "vctrs_error_unsupported") expect_error(is.na(x) <- 1, class = "vctrs_error_unsupported") expect_error(c(x), class = "vctrs_error_unsupported") expect_error(abs(x), class = "vctrs_error_unsupported") expect_error(x + 1, class = "vctrs_error_unsupported") expect_error(Arg(x), class = "vctrs_error_unsupported") expect_error(sum(x), class = "vctrs_error_unsupported") expect_error(order(x), class = "vctrs_error_unsupported") expect_error(levels(x), class = "vctrs_error_unsupported") expect_error(t(x), class = "vctrs_error_unsupported") expect_error(unique(x), class = "vctrs_error_unsupported") expect_error(duplicated(x), class = "vctrs_error_unsupported") expect_error(anyDuplicated(x), class = "vctrs_error_unsupported") expect_error(as.logical(x), class = "vctrs_error_unsupported") expect_error(as.integer(x), class = "vctrs_error_unsupported") expect_error(as.double(x), class = "vctrs_error_unsupported") expect_error(as.character(x), class = "vctrs_error_unsupported") expect_error(as.Date(x), class = "vctrs_error_unsupported") expect_error(as.POSIXct(x), class = "vctrs_error_unsupported") }) test_that("summary is unimplemented", { x <- new_sclr(x = 1, y = 2) expect_error(summary(x), class = "vctrs_error_unimplemented") }) vctrs/tests/testthat/test-type-factor.R0000644000176200001440000001606415065005761017766 0ustar liggesuserstest_that("ptype methods are descriptive", { f <- factor() o <- ordered(character()) expect_equal(vec_ptype_abbr(f), "fct") expect_equal(vec_ptype_abbr(o), "ord") expect_equal(vec_ptype_full(f), "factor<>") expect_equal(vec_ptype_full(o), "ordered<>") }) test_that("slicing factors uses a proxy to not go through `[.factor`", { x <- factor("x") y <- ordered("y") local_methods( `[.factor` = function(x, ...) abort("should not be called") ) expect_identical(vec_slice(x, 1), x) expect_identical(vec_slice(y, 1), y) }) test_that("`vec_c()` throws the right error with subclassed factors (#1015)", { skip("Factors now have a `c()` method") a <- subclass(factor("a")) b <- subclass(factor("b")) # We used to return a subclass expect_identical(vec_c(a, a), subclass(factor(c("a", "a")))) # We used to fail if attributes were incoPatible expect_error(vec_c(a, b), class = "vctrs_error_incompatible_type") }) # Coercion ---------------------------------------------------------------- test_that("factor/character coercions are symmetric and unchanging", { types <- list( ordered(character()), factor(), character() ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) local_options(width = 200) expect_snapshot(print(mat)) }) test_that("factors level are unioned", { # This is technically incorrect, but because of R's existing behaviour # anything else will cause substantial friction. fa <- factor(levels = "a") fb <- factor(levels = "b") expect_equal(vec_ptype_common(fa, fb), factor(levels = c("a", "b"))) expect_equal(vec_ptype_common(fb, fa), factor(levels = c("b", "a"))) }) test_that("coercion errors with factors", { f <- factor(levels = "a") expect_error( vec_ptype_common(f, logical()), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype_common(logical(), f), class = "vctrs_error_incompatible_type" ) }) test_that("vec_ptype2(, NA) is symmetric (#687)", { fct <- new_factor() expect_identical( vec_ptype2(fct, NA), vec_ptype2(NA, fct) ) fct <- new_ordered() expect_identical( vec_ptype2(fct, NA), vec_ptype2(NA, fct) ) }) test_that("vec_ptype2(, NA) is symmetric (#687)", { i64 <- bit64::integer64() expect_identical( vec_ptype2(i64, NA), vec_ptype2(NA, i64) ) }) test_that("vec_ptype2() errors with malformed factors", { x <- structure(1, class = "factor") y <- factor("x") expect_error(vec_ptype2(x, y, x_arg = "z"), "`z` is a corrupt factor") expect_error(vec_ptype2(y, x, y_arg = "z"), "`z` is a corrupt factor") }) test_that("vec_ptype2() errors with malformed ordered factors", { x <- structure(1, class = c("ordered", "factor")) y <- as.ordered(factor("x")) expect_error(vec_ptype2(x, y, x_arg = "z"), "`z` is a corrupt ordered factor") expect_error(vec_ptype2(y, x, y_arg = "z"), "`z` is a corrupt ordered factor") }) test_that("ordered factors with different levels are not compatible", { expect_error( vec_ptype2(ordered("a"), ordered("b")), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(ordered("a"), ordered(c("a", "b"))), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(ordered("a"), ordered("b")), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(ordered("a"), ordered(c("a", "b"))), class = "vctrs_error_incompatible_type" ) }) test_that("factors and ordered factors are not compatible", { expect_error( vec_ptype2(factor("a"), ordered("a")), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(ordered("a"), factor("a")), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(factor("a"), ordered("a")), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(ordered("a"), factor("a")), class = "vctrs_error_incompatible_type" ) }) # Casting ----------------------------------------------------------------- test_that("safe casts work as expected", { fa <- factor("a") fab <- factor(c("a", "b")) expect_equal(vec_cast(NULL, fa), NULL) expect_equal(vec_cast(fa, fa), fa) expect_equal(vec_cast(fa, fab), fab[1]) expect_equal(vec_cast("a", fab), fab[1]) expect_equal(vec_cast("a", factor()), fa) expect_equal(vec_cast(fa, factor()), fa) # This used to be allowed expect_error( vec_cast(list("a", "b"), fab), class = "vctrs_error_incompatible_type" ) }) test_that("can cast to character", { expect_equal(vec_cast(factor("X"), character()), "X") expect_equal(vec_cast(ordered("X"), character()), "X") }) test_that("can cast NA and unspecified to factor", { expect_identical(vec_cast(NA, new_factor()), factor(NA)) expect_identical(vec_cast(NA, new_ordered()), ordered(NA)) expect_identical(vec_cast(unspecified(2), new_factor()), factor(c(NA, NA))) expect_identical(vec_cast(unspecified(2), new_ordered()), ordered(c(NA, NA))) }) test_that("lossy factor casts fail", { fa <- factor("a") fb <- factor("b") expect_lossy(vec_cast(fa, fb), factor(NA, levels = "b"), x = fa, to = fb) expect_lossy(vec_cast("a", fb), factor(NA, levels = "b"), x = chr(), to = fb) }) test_that("invalid casts generate error", { expect_error( vec_cast(double(), factor("a")), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(factor("a"), logical()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(ordered("a"), logical()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(logical(), factor("a")), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(logical(), ordered("a")), class = "vctrs_error_incompatible_type" ) }) test_that("orderedness of factor is preserved", { ord <- ordered(c("a", "b"), levels = c("b", "a")) expect_equal(vec_cast("a", ord), ordered("a", levels = c("b", "a"))) }) test_that("NA are not considered lossy in factor cast (#109)", { f <- factor(c("itsy", "bitsy", NA, "spider", "spider")) expect_warning(vec_cast(f, f[1]), NA) }) test_that("Casting to a factor with explicit NA levels retains them", { f <- factor(c("x", NA), exclude = NULL) expect_identical(vec_cast(f, f), f) expect_identical(vec_cast(f, factor()), f) }) test_that("characters can be cast to ordered", { expect_identical(vec_cast("a", ordered("a")), ordered("a")) expect_error( vec_cast(c("a", "b"), ordered("a")), class = "vctrs_error_cast_lossy" ) }) # Proxy / restore --------------------------------------------------------- test_that("subclassed factors / ordered factors can be restored (#1015)", { x <- subclass(factor("a")) proxy <- vec_proxy(x) expect_identical(vec_restore(proxy, x), x) y <- subclass(ordered("a")) proxy <- vec_proxy(y) expect_identical(vec_restore(proxy, y), y) }) # Arithmetic and factor --------------------------------------------------- test_that("factors don't support math or arthimetic", { f <- factor("x") expect_error(vec_math("sum", f), class = "vctrs_error_unsupported") expect_error(vec_arith("+", f, f), class = "vctrs_error_unsupported") }) vctrs/tests/testthat/helper-order.R0000644000176200001440000000075515113325071017135 0ustar liggesusers# Keep in sync with macros in `order.c` GROUP_DATA_SIZE_DEFAULT <- 100000L ORDER_INSERTION_BOUNDARY <- 128L INT_ORDER_COUNTING_RANGE_BOUNDARY <- 100000L # Force radix method for character comparisons base_order <- function(x, na.last = TRUE, decreasing = FALSE) { if (is.data.frame(x)) { x <- unname(x) } else { x <- list(x) } args <- list( na.last = na.last, decreasing = decreasing, method = "radix" ) args <- c(x, args) rlang::exec("order", !!!args) } vctrs/tests/testthat/helper-rational.R0000644000176200001440000000401015065005761017626 0ustar liggesusers# Rational record class from the S3 vector vignette new_rational <- function(n = integer(), d = integer()) { if (!is_integer(n)) { abort("`n` must be an integer.") } if (!is_integer(d)) { abort("`d` must be an integer.") } new_rcrd(list(n = n, d = d), class = "vctrs_rational") } rational <- function(n, d) { args <- vec_cast_common(n, d, .to = integer()) args <- vec_recycle_common(!!!args) new_rational(args[[1L]], args[[2L]]) } format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_proxy_equal.vctrs_rational <- function(x) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } rational_methods <- list( vec_ptype_abbr.vctrs_rational = function(x, ...) { "rtnl" }, vec_ptype_full.vctrs_rational = function(x, ...) { "rational" }, vec_ptype2.vctrs_rational = function(x, y, ...) { UseMethod("vec_ptype2.vctrs_rational") }, vec_ptype2.vctrs_rational.vctrs_rational = function(x, y, ...) { new_rational() }, vec_ptype2.vctrs_rational.integer = function(x, y, ...) { new_rational() }, vec_ptype2.integer.vctrs_rational = function(x, y, ...) { new_rational() }, vec_cast.vctrs_rational = function(x, to, ...) { UseMethod("vec_cast.vctrs_rational") }, vec_cast.vctrs_rational.vctrs_rational = function(x, to, ...) { x }, vec_cast.double.vctrs_rational = function(x, to, ...) { field(x, "n") / field(x, "d") }, vec_cast.vctrs_rational.integer = function(x, to, ...) { rational(x, 1) }, vec_proxy_equal.vctrs_rational = vec_proxy_equal.vctrs_rational, vec_proxy_compare.vctrs_rational = vec_proxy_compare.vctrs_rational ) local_rational_class <- function(frame = caller_env()) { local_methods(.frame = frame, !!!rational_methods) } vctrs/tests/testthat/test-type-sf.R0000644000176200001440000001725315157322145017121 0ustar liggesusers# Never run on CRAN, even if they have sf, because we don't regularly # check these on CI and we don't want a change in sf to force a CRAN # failure for vctrs. skip_on_cran() # Avoids adding `sf` to Suggests. # These tests are only run on the devs' machines. testthat_import_from( "sf", c( "st_sf", "st_sfc", "st_point", "st_bbox", "st_precision", "st_crs", "st_linestring", "st_as_sf", "st_multipoint" ) ) # Need recent version to work around restore bug for sfc lists and changes # to `c.sfc()` skip_if_not_installed("sf", "1.0-11") test_that("sf has a ptype2 method", { sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) out = vctrs::vec_ptype2(sf1, sf2) exp = st_sf( x = double(), y = character(), geo1 = sfc1[0], geo2 = sfc2[0], stringsAsFactors = FALSE ) expect_identical(out, exp) out = vctrs::vec_ptype2(sf1, new_data_frame(sf2)) expect_identical(out, exp) out = vctrs::vec_ptype2(new_data_frame(sf1), sf2) exp_rhs = st_sf( x = double(), y = character(), geo1 = sfc1[0], geo2 = sfc2[0], stringsAsFactors = FALSE, sf_column_name = "geo2" ) expect_identical(out, exp_rhs) }) test_that("sf has a cast method", { sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) expect_error( vctrs::vec_cast(sf1, sf2), class = "vctrs_error_cast_lossy" ) expect_error( vctrs::vec_cast(sf2, sf1), class = "vctrs_error_cast_lossy" ) common = vec_ptype2(sf1, sf2) out = vctrs::vec_cast(sf1, common) exp = st_sf( x = c(1, 0), y = character(2)[NA], geo1 = sfc1, geo2 = sfc2[c(NA, NA) + 0L], stringsAsFactors = FALSE ) expect_identical(out, exp) out = vctrs::vec_cast(new_data_frame(sf1), common) expect_identical(out, exp) out = vctrs::vec_cast(sf1, new_data_frame(common)) expect_identical(out, new_data_frame(exp)) out = vctrs::vec_cast(sf2, common) exp = st_sf( x = 0, y = "", geo1 = sfc1[NA + 0L], geo2 = sfc2, stringsAsFactors = FALSE ) expect_identical(out, exp) }) # https://github.com/r-lib/vctrs/issues/1136 test_that("can combine sf data frames", { testthat_import_from("dplyr", "bind_rows") sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) exp = data_frame( x = c(1, 0, 0), geo1 = sfc1[c(1:2, NA)], y = c(NA, NA, ""), geo2 = sfc2[c(NA, NA, 1)] ) expect_identical(vctrs::vec_rbind(sf1, sf2), exp) expect_identical(bind_rows(sf1, sf2), st_as_sf(exp)) exp = data_frame( y = c("", NA, NA, ""), x = c(0, 1, 0, 0), geo2 = sfc2[c(1, NA, NA, 1)], geo1 = sfc1[c(NA, 1:2, NA)] ) expect_identical(vctrs::vec_rbind(sf2, sf1, sf2), exp) expect_identical(bind_rows(sf2, sf1, sf2), st_as_sf(exp)) }) test_that("can combine sf and tibble", { sfc1 = st_sfc(st_point(1:2), st_point(3:4)) sfc2 = st_sfc(st_linestring(matrix(1:4, 2))) sf1 = st_sf(x = c(TRUE, FALSE), geo1 = sfc1) sf2 = st_sf(y = "", geo2 = sfc2, x = 0, stringsAsFactors = FALSE) out = vctrs::vec_rbind(sf2, data.frame(x = 1)) exp = data_frame( y = c("", NA), x = c(0, 1), geo2 = sfc2[c(1L, NA)] ) expect_identical(out, exp) out = vctrs::vec_rbind(sf2, tibble::tibble(x = 1)) expect_identical(out, exp) out = vctrs::vec_rbind(tibble::tibble(x = 1), sf2) exp = data_frame( x = c(1, 0), y = c(NA, ""), geo2 = sfc2[c(NA, 1L)] ) expect_identical(out, exp) }) # https://github.com/r-spatial/sf/issues/1390 test_that("can combine sfc lists", { ls <- st_linestring(matrix(1:3, ncol = 3)) sfc <- st_sfc(ls) expect_identical(vec_c(sfc, sfc), c(sfc, sfc)) sf <- st_as_sf(data.frame(id = 1, geometry = sfc)) # Currently returns a bare data frame because of the workaround for # the `c()` fallback sentinels expect_identical(vec_rbind(sf, sf), new_data_frame(rbind(sf, sf))) expect_identical(vec_rbind(sf, sf, sf), new_data_frame(rbind(sf, sf, sf))) }) test_that("can combine sfc lists with unspecified chunks", { point <- st_point(1:2) out <- vec_c(c(NA, NA), st_sfc(point), NA) expect_identical(out, st_sfc(NA, NA, point, NA)) multipoint <- st_multipoint(matrix(1:4, 2)) x <- st_sfc(point) y <- st_sfc(multipoint, multipoint) out <- vec_rbind( data_frame(x = x), data_frame(y = y) ) expect_identical( out, data_frame( x = st_sfc(point, NA, NA), y = st_sfc(NA, multipoint, multipoint) ) ) }) test_that("`n_empty` attribute of `sfc` vectors is restored", { pt1 = st_sfc(st_point(c(NA_real_, NA_real_))) pt2 = st_sfc(st_point(0:1)) x = c(pt1, pt2) expect_identical(attr(vctrs::vec_slice(x, 1), "n_empty"), 1L) expect_identical(attr(vctrs::vec_slice(x, 2), "n_empty"), 0L) combined = vctrs::vec_c(pt1, pt2, pt1) expect_length(combined, 3) expect_identical(attr(combined, "n_empty"), 2L) }) test_that("bbox attributes of `sfc` vectors are restored", { pt1 = st_sfc(st_point(c(1L, 2L))) pt2 = st_sfc(st_point(c(10L, 20L))) x = c(pt1, pt2) expect_identical(st_bbox(vctrs::vec_slice(x, 1)), st_bbox(pt1)) expect_identical(st_bbox(vctrs::vec_slice(x, 2)), st_bbox(pt2)) combined = vctrs::vec_c(pt1, pt2) expect_identical(st_bbox(x), st_bbox(combined)) }) test_that("`precision` and `crs` attributes of `sfc` vectors are restored", { x = st_sfc(st_point(c(pi, pi)), precision = 1e-4, crs = 3857) out = vctrs::vec_slice(x, 1) expect_identical(st_precision(x), st_precision(out)) expect_identical(st_crs(x), st_crs(out)) }) test_that("`precision` attributes of `sfc` vectors are combined", { x = st_sfc(st_point(c(pi, pi)), precision = 1e-4, crs = 3857) y = st_sfc(st_point(c(0, 0)), precision = 1e-4, crs = 3857) out = vctrs::vec_c(x, y) expect_identical(st_precision(x), st_precision(out)) # These used to be errors before we fell back to c() y = st_sfc(st_point(c(0, 0)), precision = 1e-2, crs = 3857) expect_identical(vctrs::vec_c(x, y), c(x, y)) # expect_error(vctrs::vec_c(x, y), "precisions not equal") }) test_that("`crs` attributes of `sfc` vectors must be the same", { x = st_sfc(st_point(c(pi, pi)), precision = 1e-4, crs = 3857) y = st_sfc(st_point(c(0, 0)), precision = 1e-4, crs = 3857) out = vctrs::vec_c(x, y) expect_identical(st_crs(x), st_crs(out)) # Error on different `crs` comes from sf as of 1.0-10 y = st_sfc(st_point(c(0, 0)), precision = 1e-4, crs = 4326) expect_snapshot(error = TRUE, { vctrs::vec_c(x, y) }) }) test_that("`vec_locate_matches()` works with `sfc` vectors", { x <- c( st_sfc(st_point(c(0, 0))), st_sfc(st_point(c(0, 1))), st_sfc(st_point(c(2, 1))), st_sfc(c(st_point(c(0, 1)), st_point(c(0, 1)))) ) y <- c( st_sfc(c(st_point(c(0, 1)), st_point(c(0, 1)))), st_sfc(st_point(c(0, 0))), st_sfc(st_point(c(0, 3))), st_sfc(st_point(c(0, 0))), st_sfc(st_point(c(0, 1))) ) out <- vec_locate_matches(x, y) expect_identical(out$needles, c(1L, 1L, 2L, 3L, 4L)) expect_identical(out$haystack, c(2L, 4L, 5L, NA, 1L)) }) test_that("`vec_rbind()` doesn't leak common type fallbacks (#1331)", { sf = st_sf(id = 1:2, geo = st_sfc(st_point(c(1, 1)), st_point(c(2, 2)))) expect_equal( vec_rbind(sf, sf), data_frame(id = rep(1:2, 2), geo = rep(sf$geo, 2)) ) expect_equal( vec_rbind(sf, sf, .names_to = "id"), data_frame(id = rep(1:2, each = 2), geo = rep(sf$geo, 2)) ) }) vctrs/tests/testthat/test-s4.R0000644000176200001440000000402215113335375016047 0ustar liggesuserstest_that("basics", { x <- rando(10) expect_true(vec_is(x)) expect_equal(vec_size(x), 10) expect_identical(vec_ptype_common(x, x), vec_ptype(x)) }) test_that("casting of rando works", { x <- as_rando(1:10) expect_equal(vec_cast(x, rando()), x) expect_equal(vec_cast(NA, rando()), as_rando(NA)) expect_equal(vec_cast(unspecified(2), rando()), as_rando(c(NA, NA))) expect_error(vec_cast(x, factor()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(factor(), x), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype2 for rando works", { x <- as_rando(1:10) expect_equal(vec_ptype(vec_ptype2(x, x)), rando()) expect_equal(vec_ptype2(x, NA), rando()) expect_equal(vec_ptype2(NA, x), rando()) expect_equal(vec_ptype2(unspecified(), x), rando()) expect_equal(vec_ptype2(x, unspecified()), rando()) expect_error(vec_ptype2(x, 1), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(x, ""), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2("", x), class = "vctrs_error_incompatible_type") expect_error( vec_ptype2(data.frame(), x), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(x, data.frame()), class = "vctrs_error_incompatible_type" ) }) test_that("vec_ptype_abbr.rando", { expect_equal(vec_ptype_abbr(as_rando(1:10)), "vctrs_rn") expect_equal(vec_ptype_full(as_rando(1:10)), "vctrs_rando") }) test_that("proxy and data", { x <- rando(10) expect_identical(vec_ptype(vec_proxy(x)), x[0]) expect_identical(vec_data(x), x@.Data) expect_false(isS4(vec_data(x))) expect_s4_class(vec_restore(vec_data(x), x), "vctrs_rando") expect_true(isS4(vec_restore(vec_data(x), x))) }) test_that("as_not_s4() copies and works", { # Initial condition x <- rando() expect_true(isS4(x)) # Unsetting has no side effect on x as_not_s4(x) expect_true(isS4(x)) # Unsetting actually works y <- as_not_s4(x) expect_false(isS4(y)) }) vctrs/tests/testthat/helper-list-of-transpose.R0000644000176200001440000000014615120272011021376 0ustar liggesusers# Infers both type and size list_of2 <- function(...) { list_of(..., .ptype = NULL, .size = NULL) } vctrs/tests/testthat/test-case-when.R0000644000176200001440000003240715120272011017364 0ustar liggesuserstest_that("works with data frames", { conditions <- list( c(FALSE, TRUE, FALSE, FALSE), c(TRUE, TRUE, FALSE, FALSE), c(FALSE, TRUE, FALSE, TRUE) ) values <- list( data_frame(x = 1, y = 2), data_frame(x = 3, y = 4), data_frame(x = 3:6, y = 4:7) ) out <- vec_case_when(conditions, values) expect_identical( out, data_frame( x = c(3, 1, NA, 6), y = c(4, 2, NA, 7) ) ) }) test_that("first `TRUE` case wins", { conditions <- list( c(TRUE, FALSE), c(TRUE, TRUE), c(TRUE, TRUE) ) values <- list( 1, 2, 3 ) expect_identical( vec_case_when(conditions, values), c(1, 2) ) }) test_that("can replace missing values by catching with `is.na()`", { x <- c(1:3, NA) conditions <- list( x <= 1, x <= 2, is.na(x) ) values <- list( 1, 2, 0 ) expect_identical( vec_case_when(conditions, values), c(1, 2, NA, 0) ) }) test_that("Unused logical `NA` can still be cast to `values` ptype", { # Requires that casting happen before recycling, because it recycles # to size zero, resulting in a logical rather than an unspecified. expect_identical( vec_case_when(list(TRUE, FALSE), list("x", NA)), "x" ) expect_identical( vec_case_when(list(FALSE, TRUE), list("x", NA)), NA_character_ ) }) test_that("`conditions` inputs can be size zero", { expect_identical( vec_case_when( list(logical(), logical()), list(1, 2) ), numeric() ) expect_snapshot(error = TRUE, { vec_case_when(list(logical()), list(1:2)) }) }) test_that("retains inner names of `values` inputs", { value1 <- c(x = 1, y = 2) value2 <- c(z = 3, w = 4) out <- vec_case_when( list(c(TRUE, FALSE), c(TRUE, TRUE)), list(a = value1, b = value2) ) expect_named(out, c("x", "w")) }) test_that("outer names have no affect over the output names", { value1 <- c(1, 2) value2 <- c(3, 4) out <- vec_case_when( list(c(TRUE, FALSE), c(FALSE, TRUE)), list(x = value1, y = value2) ) expect_named(out, NULL) }) test_that("`values` are cast to their common type", { expect_identical(vec_case_when(list(FALSE, TRUE), list(1, 2L)), 2) expect_identical(vec_case_when(list(FALSE, TRUE), list(1, NA)), NA_real_) expect_snapshot(error = TRUE, { vec_case_when(list(FALSE, TRUE), list(1, "x")) }) }) test_that("`values` must be size 1 or same size as the `conditions`", { expect_identical( vec_case_when( list(c(TRUE, TRUE)), list(1) ), c(1, 1) ) expect_identical( vec_case_when( list(c(TRUE, FALSE), c(TRUE, TRUE)), list(c(1, 2), c(3, 4)) ), c(1, 4) ) expect_snapshot(error = TRUE, { vec_case_when( list(c(TRUE, FALSE, TRUE, TRUE)), list(1:3) ) }) }) test_that("Unhandled `NA` are given a value of `default`", { expect_identical( vec_case_when(list(NA), list(1)), NA_real_ ) expect_identical( vec_case_when(list(NA), list(1), default = 2), 2 ) expect_identical( vec_case_when( list( c(FALSE, NA, TRUE, FALSE), c(NA, FALSE, TRUE, FALSE) ), list( 2, 3 ), default = 4 ), c(4, 4, 2, 4) ) }) test_that("`NA` is overridden by any `TRUE` values", { x <- c(1, 2, NA, 3) expect <- c("one", "not_one", "missing", "not_one") # `TRUE` overriding before the `NA` conditions <- list( is.na(x), x == 1 ) values <- list( "missing", "one" ) expect_identical( vec_case_when( conditions, values, default = "not_one" ), expect ) # `TRUE` overriding after the `NA` conditions <- list( x == 1, is.na(x) ) values <- list( "one", "missing" ) expect_identical( vec_case_when( conditions, values, default = "not_one" ), expect ) }) test_that("works when there is a used `default` and no missing values", { expect_identical( vec_case_when(list(c(TRUE, FALSE)), list(1), default = 3:4), c(1, 4) ) }) test_that("works when there are missing values but no `default`", { expect_identical(vec_case_when(list(c(TRUE, NA)), list(1)), c(1, NA)) }) test_that("A `NULL` `default` fills in with missing values", { expect_identical( vec_case_when(list(c(TRUE, FALSE, FALSE)), list(1)), c(1, NA, NA) ) }) test_that("`default` fills in all unused slots", { expect_identical( vec_case_when(list(c(TRUE, FALSE, FALSE)), list(1), default = 2), c(1, 2, 2) ) }) test_that("`default` is initialized correctly in the logical / unspecified case", { # i.e. `vec_ptype(NA)` is unspecified but the result should be finalized to logical expect_identical(vec_case_when(list(FALSE), list(NA)), NA) }) test_that("`default` can be vectorized, and is sliced to fit as needed", { out <- vec_case_when( list( c(FALSE, TRUE, FALSE, TRUE, FALSE), c(FALSE, TRUE, FALSE, FALSE, TRUE) ), list( 1:5, 6:10 ), default = 11:15 ) expect_identical(out, c(11L, 2L, 13L, 4L, 10L)) }) test_that("`default` must be size 1 or same size as `conditions` (exact same as any other `values` input)", { expect_snapshot(error = TRUE, { vec_case_when(list(FALSE), list(1L), default = 2:3) }) }) test_that("`default` participates in common type determination (exact same as any other `values` input)", { expect_identical(vec_case_when(list(FALSE), list(1L), default = 2), 2) }) test_that("`default` that is an unused logical `NA` can still be cast to `values` ptype", { # Requires that casting happen before recycling, because it recycles # to size zero, resulting in a logical rather than an unspecified. expect_identical(vec_case_when(list(TRUE), list("x"), default = NA), "x") }) test_that("`default` type is used when all values are logical `NA` (#2094)", { expect_identical( vec_case_when(list(TRUE), list(NA), default = "a"), NA_character_ ) expect_identical( vec_case_when(list(c(TRUE, FALSE)), list(NA), default = "a"), c(NA_character_, "a") ) }) test_that("`default_arg` can be customized", { expect_snapshot(error = TRUE, { vec_case_when(list(FALSE), list(1L), default = 2:3, default_arg = "foo") }) expect_snapshot(error = TRUE, { vec_case_when(list(FALSE), list(1L), default = "x", default_arg = "foo") }) }) test_that("`conditions_arg` is validated", { expect_snapshot(error = TRUE, { vec_case_when(list("x"), list(1), conditions_arg = 1) }) }) test_that("`values_arg` is validated", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(lm(1 ~ 1)), values_arg = 1) }) }) test_that("`default_arg` is validated", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(1), default = "x", default_arg = 1) }) }) test_that("`conditions` must all be the same size", { expect_snapshot(error = TRUE, { vec_case_when( list(c(TRUE, FALSE), TRUE), list(1, 2) ) }) expect_snapshot(error = TRUE, { vec_case_when( list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2) ) }) }) test_that("`conditions` must be logical (and aren't cast to logical!)", { expect_snapshot(error = TRUE, { vec_case_when(list(1), list(2)) }) # Make sure input numbering is right in the error message! expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, 3.5), list(2, 4)) }) # `vec_as_location()` would not allow this either x <- structure(c(FALSE, TRUE), class = "my_logical") expect_snapshot(error = TRUE, { vec_case_when(list(x), list(1), default = 2) }) }) test_that("`conditions` are allowed to have attributes", { x <- structure(c(FALSE, TRUE), label = "foo") expect_identical(vec_case_when(list(x), list(1), default = 2), c(2, 1)) }) test_that("`conditions` can't be arrays (#6862)", { x <- array(TRUE, dim = c(3, 3)) y <- c("a", "b", "c") expect_snapshot(error = TRUE, { vec_case_when(list(x), list(y)) }) expect_snapshot(error = TRUE, { vec_case_when(list(x), list(y), size = 3) }) # Not even 1D arrays x <- array(TRUE, dim = 3) expect_snapshot(error = TRUE, { vec_case_when(list(x), list(y)) }) }) test_that("`size` overrides the `conditions` sizes", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(1), size = 5) }) expect_snapshot(error = TRUE, { vec_case_when( list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2), size = 2 ) }) }) test_that("0 `conditions` result depends on `size` and `default` and `ptype`", { expect_identical( vec_case_when( conditions = list(), values = list() ), unspecified() ) expect_identical( vec_case_when( conditions = list(), values = list(), size = 0 ), unspecified() ) expect_identical( vec_case_when( conditions = list(), values = list(), size = 2 ), unspecified(2) ) expect_identical( vec_case_when( conditions = list(), values = list(), default = integer() ), integer() ) expect_identical( vec_case_when( conditions = list(), values = list(), default = 1L ), integer() ) expect_identical( vec_case_when( conditions = list(), values = list(), ptype = integer() ), integer() ) expect_identical( vec_case_when( conditions = list(), values = list(), size = 2L, default = 1L ), c(1L, 1L) ) expect_identical( vec_case_when( conditions = list(), values = list(), size = 2L, default = 1:2 ), c(1L, 2L) ) expect_identical( vec_case_when( conditions = list(), values = list(), size = 2L, ptype = integer() ), c(NA_integer_, NA_integer_) ) }) test_that("`vec_replace_when()` with empty `conditions` is a no-op", { x <- 1:5 expect_identical( vec_replace_when(x, conditions = list(), values = list()), x ) }) test_that("`ptype` overrides the `values` types", { expect_identical( vec_case_when(list(FALSE, TRUE), list(1, 2), ptype = integer()), 2L ) expect_snapshot(error = TRUE, { vec_case_when(list(FALSE, TRUE), list(1, 2), ptype = character()) }) }) test_that("number of `conditions` and `values` must be the same", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list()) }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, TRUE), list(1)) }) }) test_that("dots must be empty", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(1), 2) }) }) test_that("`conditions` must be a list", { expect_snapshot(error = TRUE, { vec_case_when(1, list(2)) }) }) test_that("`values` must be a list", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), 1) }) }) test_that("named inputs show up in the error message", { expect_snapshot(error = TRUE, { vec_case_when(list(x = 1.5), list(1)) }) expect_snapshot(error = TRUE, { vec_case_when(list(x = 1.5), list(1), conditions_arg = "foo") }) expect_snapshot(error = TRUE, { vec_case_when(list(x = 1.5), list(1), conditions_arg = "") }) expect_snapshot(error = TRUE, { vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2)) }) expect_snapshot(error = TRUE, { vec_case_when( list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "foo" ) }) expect_snapshot(error = TRUE, { vec_case_when( list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "" ) }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, FALSE), list(1, x = "y")) }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "foo") }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "") }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(NULL)) }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(x = NULL)) }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(NULL), values_arg = "foo") }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(x = NULL), values_arg = "foo") }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(NULL), values_arg = "") }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(x = NULL), values_arg = "") }) }) test_that("proof that `ptype` finalization is important", { # Imagine you have an input logical vector you are remapping # and it happens to only have `NA`s x <- c(NA, NA) conditions <- list(x %in% NA) values <- list(FALSE) # If no `ptype` finalization happened, then `ptype = x` would result in # `unspecified` being the output type and these would error. `list_combine()` # now does `ptype` finalization when an explicit `ptype` is provided, so this # works. expect_identical( vec_case_when(conditions, values, default = x, ptype = x), c(FALSE, FALSE) ) expect_identical( vec_replace_when(x, conditions, values), c(FALSE, FALSE) ) }) test_that("`unmatched` errors are correct", { conditions <- list( c(TRUE, FALSE, TRUE, FALSE, FALSE, NA, NA, NA, TRUE), c(FALSE, TRUE, TRUE, FALSE, NA, FALSE, NA, TRUE, NA) ) values <- list( 1, 2 ) expect_snapshot(error = TRUE, { vec_case_when(conditions, values, unmatched = "error") }) }) vctrs/tests/testthat/test-conditions.R0000644000176200001440000001367715065005761017711 0ustar liggesuserstest_that("conditions inherit from `vctrs_error`", { expect_error(stop_incompatible(NULL, NULL), class = "vctrs_error") expect_error( stop_incompatible_type(NULL, NULL, x_arg = "x", y_arg = "y"), class = "vctrs_error" ) expect_error( stop_incompatible_cast(NULL, NULL, x_arg = "x", to_arg = "to"), class = "vctrs_error" ) expect_error(stop_incompatible_op("", NULL, NULL), class = "vctrs_error") expect_error( stop_incompatible_size(NULL, NULL, 0, 0, x_arg = "x", y_arg = "y"), class = "vctrs_error" ) expect_error( maybe_lossy_cast(NULL, NULL, NULL, TRUE, x_arg = "x", to_arg = "to"), class = "vctrs_error" ) expect_error(stop_unsupported("", ""), class = "vctrs_error") expect_error(stop_unimplemented("", ""), class = "vctrs_error") expect_error(stop_scalar_type(NULL), class = "vctrs_error") expect_error(stop_names(), class = "vctrs_error") expect_error(stop_names_cannot_be_empty(""), class = "vctrs_error") expect_error(stop_names_cannot_be_dot_dot("..1"), class = "vctrs_error") expect_error(stop_names_must_be_unique("x"), class = "vctrs_error") }) test_that("incompatible cast throws an incompatible type error", { err <- expect_error( stop_incompatible_cast(1, 2, x_arg = "x", to_arg = "to"), class = "vctrs_error_incompatible_type" ) expect_equal(err$x, 1) expect_equal(err$y, 2) expect_equal(err$x_arg, "x") expect_equal(err$y_arg, "to") # Convenience aliases expect_equal(err$to, err$y) expect_equal(err$to_arg, err$y_arg) }) test_that("incompatible type error validates `action`", { expect_snapshot({ (expect_error(stop_incompatible_type( 1, 1, x_arg = "", y_arg = "", action = "c" ))) (expect_error(stop_incompatible_type( 1, 1, x_arg = "", y_arg = "", action = 1 ))) }) }) test_that("can override arg in OOB conditions", { expect_snapshot({ (expect_error( with_subscript_data( vec_slice(set_names(letters), "foo"), NULL ), class = "vctrs_error_subscript_oob" )) (expect_error( with_subscript_data( vec_slice(set_names(letters), "foo"), quote(foo) ), class = "vctrs_error_subscript_oob" )) (expect_error( with_subscript_data( vec_slice(set_names(letters), "foo"), quote(foo(bar)) ), class = "vctrs_error_subscript_oob" )) }) }) test_that("scalar type errors are informative", { expect_snapshot({ (expect_error( vec_slice(foobar(list(1)), 1), class = "vctrs_error_scalar_type" )) (expect_error( stop_scalar_type(foobar(list(1)), arg = "foo"), class = "vctrs_error_scalar_type" )) }) }) test_that("empty names errors are informative", { expect_snapshot({ (expect_error( vec_as_names(c("x", "", "y"), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty" )) (expect_error( vec_as_names(c("x", "", "y", ""), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty" )) (expect_error( vec_as_names(rep("", 10), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty" )) }) }) test_that("dot dot names errors are informative", { expect_snapshot({ (expect_error( vec_as_names(c("..1", "..1", "..1", "...", "z"), repair = "check_unique"), class = "vctrs_error_names_cannot_be_dot_dot" )) (expect_error( vec_as_names( c(rep("..1", 20), rep(c("..2", "..3", "..4", "...", "..5"), 2)), repair = "check_unique" ), class = "vctrs_error_names_cannot_be_dot_dot" )) }) }) test_that("unique names errors are informative", { expect_snapshot({ (expect_error( vec_as_names(c("x", "x", "x", "y", "y", "z"), repair = "check_unique"), class = "vctrs_error_names_must_be_unique" )) (expect_error( vec_as_names( c(rep("x", 20), rep(c("a", "b", "c", "d", "e"), 2)), repair = "check_unique" ), class = "vctrs_error_names_must_be_unique" )) }) }) test_that("can't supply both `message` and `details`", { expect_error( stop_incompatible_type( 1, 2, message = "my message", x_arg = "x", y_arg = "y" ), "my message", class = "vctrs_error_incompatible_type" ) expect_error( stop_incompatible_type( 1, 2, message = "my message", details = "my details", x_arg = "x", y_arg = "y" ), "Can't supply both `message` and `details`." ) }) test_that("lossy cast errors are internal", { # Should not trigger testthat warnings about untested class expect_error(vec_cast(mtcars, mtcars[1:3]), "convert") expect_error(vec_cast(1.5, int()), "convert") }) test_that("lossy cast from character to factor mentions loss of generality", { expect_snapshot({ (expect_error(vec_cast("a", factor("b")), class = "vctrs_error_cast_lossy")) }) }) test_that("lossy cast `conditionMessage()` result matches `cnd_message()` (#1592)", { cnd <- catch_cnd(vec_cast(1.5, to = integer())) expect_identical(conditionMessage(cnd), cnd_message(cnd)) expect_snapshot({ cat(conditionMessage(cnd)) }) }) test_that("ordered cast failures mention conversion", { expect_snapshot({ (expect_error( vec_cast(ordered("x"), ordered("y")), class = "vctrs_error_incompatible_type" )) }) }) test_that("incompatible size errors", { expect_snapshot({ (expect_error(stop_incompatible_size( 1:2, 3:5, 2L, 3L, x_arg = "", y_arg = "" ))) (expect_error(stop_incompatible_size( 1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = "" ))) (expect_error(stop_incompatible_size( 1:2, 3:5, 2L, 3L, x_arg = "", y_arg = "bar" ))) (expect_error(stop_incompatible_size( 1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = quote(bar) ))) }) }) vctrs/tests/testthat/test-equal.R0000644000176200001440000003350115157004241016625 0ustar liggesusers# vectorised -------------------------------------------------------------- test_that("throws error for unsuported type", { expect_snapshot(error = TRUE, cnd_class = TRUE, { vec_equal(expression(x), expression(x)) }) }) test_that("correct behaviour for basic vectors", { expect_equal(vec_equal(c(TRUE, FALSE), TRUE), c(TRUE, FALSE)) expect_equal(vec_equal(c(1L, 2L), 1L), c(TRUE, FALSE)) expect_equal(vec_equal(c(1, 2), 1), c(TRUE, FALSE)) expect_equal(vec_equal(c("1", "2"), "1"), c(TRUE, FALSE)) expect_equal(vec_equal(as.raw(1:2), as.raw(1L)), c(TRUE, FALSE)) expect_equal(vec_equal(list(1:3, 1:2), list(1:3)), c(TRUE, FALSE)) expect_equal(vec_equal(list(1:3, 1.5), list(1:3)), c(TRUE, FALSE)) expect_equal( vec_equal(list(as.raw(1:3), as.raw(1.5)), list(as.raw(1:3))), c(TRUE, FALSE) ) expect_equal(vec_equal(list(1 + 1i, 1 + 0i), list(1 + 1i)), c(TRUE, FALSE)) expect_equal(vec_equal(c(1, 2) + 1i, 1 + 1i), c(TRUE, FALSE)) }) test_that("NAs are equal", { expect_true(vec_equal(NA, NA, na_equal = TRUE)) expect_true(vec_equal(NA_integer_, NA_integer_, na_equal = TRUE)) expect_true(vec_equal(NA_real_, NA_real_, na_equal = TRUE)) expect_true(vec_equal(NA_character_, NA_character_, na_equal = TRUE)) expect_true(vec_equal(list(NULL), list(NULL), na_equal = TRUE)) }) test_that("double special values", { expect_equal(vec_equal(c(NaN, NA), NaN, na_equal = TRUE), c(TRUE, FALSE)) expect_equal(vec_equal(c(NA, NaN), NA, na_equal = TRUE), c(TRUE, FALSE)) expect_true(vec_equal(Inf, Inf)) expect_true(vec_equal(-Inf, -Inf)) }) test_that("`list(NULL)` is considered a missing value (#653)", { expect_equal(vec_equal(list(NULL), list(NULL)), NA) expect_equal(vec_equal(list(NULL), list(1)), NA) }) test_that("can compare data frames", { df <- data.frame(x = 1:2, y = letters[2:1], stringsAsFactors = FALSE) expect_equal(vec_equal(df, df[1, ]), c(TRUE, FALSE)) }) test_that("can compare data frames with various types of columns", { x1 <- data_frame(x = 1, y = 2) y1 <- data_frame(x = 2, y = 1) x2 <- data_frame(x = "a") y2 <- data_frame(x = "b") x3 <- data_frame(x = FALSE) y3 <- data_frame(x = TRUE) x4 <- data_frame(x = 1L) y4 <- data_frame(x = 2L) x5 <- data_frame(x = as.raw(0)) y5 <- data_frame(x = as.raw(1)) x6 <- data_frame(x = 1 + 0i) y6 <- data_frame(x = 1 + 1i) expect_false(vec_equal(x1, y1)) expect_false(vec_equal(x2, y2)) expect_false(vec_equal(x3, y3)) expect_false(vec_equal(x4, y4)) expect_false(vec_equal(x5, y5)) expect_false(vec_equal(x6, y6)) }) test_that("can compare data frames with data frame columns", { df1 <- data_frame(x = data_frame(a = 1)) df2 <- data_frame(x = data_frame(a = 2)) expect_true(vec_equal(df1, df1)) expect_false(vec_equal(df1, df2)) }) test_that("can compare data frames with list columns", { df1 <- data_frame(x = list(a = 1, b = 2), y = c(1, 1)) df2 <- data_frame(x = list(a = 0, b = 2), y = c(1, 1)) expect_equal(vec_equal(df1, df2), c(FALSE, TRUE)) }) test_that("data frames are cast to common type", { expect_identical( vec_equal( data.frame(x = 1), data.frame(x = 1, y = 2), na_equal = TRUE ), FALSE ) expect_identical( vec_equal( data.frame(x = 1, y = 2, z = 2), data.frame(x = 1, y = 2), na_equal = TRUE ), FALSE ) expect_identical( vec_equal( data.frame(x = 1), data.frame(y = 1), na_equal = TRUE ), FALSE ) expect_identical( vec_equal( data.frame(x = 1), data.frame(x = 2) ), FALSE ) }) test_that("can compare data frames with 0 columns", { x <- new_data_frame(n = 1L) expect_true(vec_equal(x, x)) }) test_that("can compare lists of scalars (#643)", { lst <- list(new_sclr(x = 1)) expect_true(vec_equal(lst, lst)) # NA does not propagate lst <- list(new_sclr(y = NA)) expect_true(vec_equal(lst, lst)) df <- data.frame(x = c(1, 4, 3), y = c(2, 8, 9)) model <- lm(y ~ x, df) lst <- list(model) expect_true(vec_equal(lst, lst)) }) test_that("can determine equality of strings with different encodings (#553)", { for (x_encoding in encodings()) { for (y_encoding in encodings()) { expect_equal(vec_equal(x_encoding, y_encoding), TRUE) expect_equal(vec_equal(x_encoding, y_encoding), x_encoding == y_encoding) } } }) test_that("equality can be determined when strings have identical encodings", { encs <- encodings() for (enc in encs) { expect_true(vec_equal(enc, enc)) expect_equal(vec_equal(enc, enc), enc == enc) } }) test_that("equality is known to always fail with bytes", { enc <- encoding_bytes() error <- "translating strings with \"bytes\" encoding" expect_error(vec_equal(enc, enc), error) }) test_that("equality is known to fail when comparing bytes to other encodings", { error <- "translating strings with \"bytes\" encoding" for (enc in encodings()) { expect_error(vec_equal(encoding_bytes(), enc), error) expect_error(vec_equal(enc, encoding_bytes()), error) } }) test_that("`na_equal` is validated", { expect_snapshot(error = TRUE, { vec_equal(1, 1, na_equal = 1) }) expect_snapshot(error = TRUE, { vec_equal(1, 1, na_equal = c(TRUE, FALSE)) }) }) test_that("can compare lists of expressions", { x <- list(expression(x), expression(y)) y <- list(expression(x)) expect_equal(vec_equal(x, y), c(TRUE, FALSE)) }) test_that("vec_equal() silently falls back to base data frame", { expect_silent(expect_identical( vec_equal(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), rep(TRUE, 32) )) }) test_that("recycling works in all cases", { # Both size 1 is its own path. Both "recycle". x <- 1 y <- 1 expect_identical(vec_equal(x, y), TRUE) x <- 1 y <- 1:2 expect_identical(vec_equal(x, y), c(TRUE, FALSE)) x <- 1:2 y <- 1 expect_identical(vec_equal(x, y), c(TRUE, FALSE)) x <- 1:2 y <- 1:2 expect_identical(vec_equal(x, y), c(TRUE, TRUE)) # Again, with data frames x <- data.frame(x = 1, y = 2) y <- data.frame(x = 1, y = 2) expect_identical(vec_equal(x, y), TRUE) x <- data.frame(x = 1, y = 2) y <- data.frame(x = 1:2, y = 2:3) expect_identical(vec_equal(x, y), c(TRUE, FALSE)) x <- data.frame(x = 1:2, y = 2:3) y <- data.frame(x = 1, y = 2) expect_identical(vec_equal(x, y), c(TRUE, FALSE)) x <- data.frame(x = 1:2, y = 2:3) y <- data.frame(x = 1:2, y = 2:3) expect_identical(vec_equal(x, y), c(TRUE, TRUE)) }) test_that("equal if attributes are in different order (treated like a map)", { x1 <- structure(1, foo = 1, bar = 2) x2 <- structure(1, bar = 2, foo = 1) expect_true(vec_equal(x1, x2)) x1 <- list(structure(expression(x = 1), foo = 1, bar = 2)) x2 <- list(structure(expression(x = 1), bar = 2, foo = 1)) expect_true(vec_equal(x1, x2)) x1 <- list(structure(pairlist(x = 1), foo = 1, bar = 2)) x2 <- list(structure(pairlist(x = 1), bar = 2, foo = 1)) expect_true(vec_equal(x1, x2)) # Want the same srcref x0 <- function() {} x1 <- list(structure(x0, foo = 1, bar = 2)) x2 <- list(structure(x0, bar = 2, foo = 1)) expect_true(vec_equal(x1, x2)) }) # object ------------------------------------------------------------------ test_that("can compare NULL", { expect_true(obj_equal(NULL, NULL)) }) test_that("can compare objects with reference semantics", { expect_true(obj_equal(globalenv(), globalenv())) expect_false(obj_equal(globalenv(), environment())) expect_true(obj_equal(quote(x), quote(x))) expect_false(obj_equal(quote(x), quote(y))) }) test_that("can compare pairlists", { expect_true(obj_equal(quote(x + y), quote(x + y))) expect_true(obj_equal(pairlist(x = 1, y = 2), pairlist(x = 1, y = 2))) }) test_that("can compare functions", { f1 <- function(x, y) x + y f2 <- function(x, y) x + y expect_false(obj_equal(f2, f1)) attr(f1, "srcref") <- NULL attr(f2, "srcref") <- NULL expect_true(obj_equal(f2, f1)) f3 <- f1 formals(f3) <- alist(x = 1) expect_false(obj_equal(f3, f1)) f4 <- f1 body(f4) <- quote(x) expect_false(obj_equal(f4, f2)) }) test_that("not equal if different types or lengths", { expect_false(obj_equal(1, 2)) expect_false(obj_equal(1:2, 1)) }) test_that("not equal if attributes not equal", { x1 <- structure(1:10, x = 1, y = 2) x2 <- structure(1:10, x = 1, y = 3) expect_false(obj_equal(x1, x2)) }) test_that("not equal if only one has attributes", { x1 <- structure(1, foo = 1) x2 <- 1 expect_false(obj_equal(x1, x2)) expect_false(obj_equal(x2, x1)) }) test_that("not equal if attribute tag names are different", { x1 <- structure(1, foo = 2) x2 <- structure(1, bar = 2) expect_false(obj_equal(x1, x2)) }) test_that("not equal if one has more attributes than the other", { x1 <- structure(1, foo = 1) x2 <- structure(1, foo = 1, bar = 2) expect_false(obj_equal(x1, x2)) expect_false(obj_equal(x2, x1)) }) test_that("equal if attributes are in different order (treated like a map)", { x1 <- structure(1, foo = 1, bar = 2) x2 <- structure(1, bar = 2, foo = 1) expect_true(obj_equal(x1, x2)) x1 <- structure(expression(x = 1), foo = 1, bar = 2) x2 <- structure(expression(x = 1), bar = 2, foo = 1) expect_true(obj_equal(x1, x2)) x1 <- structure(pairlist(x = 1), foo = 1, bar = 2) x2 <- structure(pairlist(x = 1), bar = 2, foo = 1) expect_true(obj_equal(x1, x2)) # Want the same srcref x0 <- function() {} x1 <- structure(x0, foo = 1, bar = 2) x2 <- structure(x0, bar = 2, foo = 1) expect_true(obj_equal(x1, x2)) }) test_that("can compare expressions", { expect_true(obj_equal(expression(x), expression(x))) expect_false(obj_equal(expression(x), expression(y))) # Attributes on the expression vectors expect_true(obj_equal( vec_set_attributes(expression(x), list(foo = 1)), vec_set_attributes(expression(x), list(foo = 1)) )) expect_false(obj_equal( vec_set_attributes(expression(x), list(foo = 1)), vec_set_attributes(expression(x), list(foo = 2)) )) # Length check expect_true(obj_equal(expression(x, y), expression(x, y))) expect_false(obj_equal(expression(x, y), expression(x))) }) test_that("language argument names are considered", { expect_false(obj_equal( call("fn", foo = 1), call("fn", bar = 1) )) }) test_that("pairlist tags are considered", { expect_false(obj_equal( pairlist(foo = 1), pairlist(bar = 1) )) }) test_that("attribute pairlist tags are considered", { expect_false(obj_equal( structure(1, foo = 1), structure(1, bar = 1) )) }) # na ---------------------------------------------------------------------- test_that("NA propagate symmetrically (#204)", { exp <- c(NA, NA) expect_identical(vec_equal(c(TRUE, FALSE), NA), exp) expect_identical(vec_equal(1:2, NA), exp) expect_identical(vec_equal(c(1, 2), NA), exp) expect_identical(vec_equal(letters[1:2], NA), exp) expect_identical(vec_equal(NA, c(TRUE, FALSE)), exp) expect_identical(vec_equal(NA, 1:2), exp) expect_identical(vec_equal(NA, c(1, 2)), exp) expect_identical(vec_equal(NA, letters[1:2]), exp) }) test_that("NA propagate from data frames columns", { x <- data.frame(x = 1:3) y <- data.frame(x = c(1L, NA, 2L)) expect_identical(vec_equal(x, y), c(TRUE, NA, FALSE)) expect_identical(vec_equal(y, x), c(TRUE, NA, FALSE)) expect_identical(vec_equal(x, y, na_equal = TRUE), c(TRUE, FALSE, FALSE)) expect_identical(vec_equal(y, x, na_equal = TRUE), c(TRUE, FALSE, FALSE)) x <- data.frame(x = 1:3, y = 1:3) y <- data.frame(x = c(1L, NA, 2L), y = c(NA, 2L, 3L)) expect_identical(vec_equal(x, y), c(NA, NA, FALSE)) expect_identical(vec_equal(y, x), c(NA, NA, FALSE)) expect_identical(vec_equal(x, y, na_equal = TRUE), c(FALSE, FALSE, FALSE)) expect_identical(vec_equal(y, x, na_equal = TRUE), c(FALSE, FALSE, FALSE)) }) test_that("NA do not propagate from list components (#662)", { expect_true(obj_equal(NA, NA)) expect_true(vec_equal(list(NA), list(NA))) }) test_that("NA do not propagate from names when comparing objects", { x <- set_names(1:3, c("a", "b", NA)) y <- set_names(1:3, c("a", NA, NA)) expect_true(obj_equal(x, x)) expect_false(obj_equal(x, y)) expect_equal(vec_equal(list(x, x, y), list(x, y, y)), c(TRUE, FALSE, TRUE)) }) test_that("NA do not propagate from attributes", { x <- structure(1:3, foo = NA) y <- structure(1:3, foo = "") expect_true(obj_equal(x, x)) expect_false(obj_equal(x, y)) }) test_that("NA do not propagate from function bodies or formals", { fn <- other <- function() NA body(other) <- TRUE expect_true(vec_equal(list(fn), list(fn))) expect_false(vec_equal(list(fn), list(other))) expect_true(obj_equal(fn, fn)) expect_false(obj_equal(fn, other)) fn <- other <- function(x = NA) NULL formals(other) <- list(x = NULL) expect_true(vec_equal(list(fn), list(fn))) expect_false(vec_equal(list(fn), list(other))) }) test_that("can check equality of unspecified objects", { expect_equal(vec_equal(NA, NA), NA) expect_true(vec_equal(NA, NA, na_equal = TRUE)) expect_equal(vec_equal(unspecified(1), unspecified(1)), NA) expect_true(vec_equal(unspecified(1), unspecified(1), na_equal = TRUE)) expect_equal(vec_equal(NA, unspecified(1)), NA) expect_true(vec_equal(NA, unspecified(1), na_equal = TRUE)) }) test_that("can't supply NA as `na_equal`", { expect_snapshot(error = TRUE, { vec_equal(NA, NA, na_equal = NA) }) }) # proxy ------------------------------------------------------------------- test_that("vec_equal() takes vec_proxy() by default", { local_env_proxy() x <- new_proxy(1:3) y <- new_proxy(3:1) expect_identical(vec_equal(x, y), lgl(FALSE, TRUE, FALSE)) }) test_that("vec_equal() takes vec_proxy_equal() if implemented", { local_comparable_tuple() x <- tuple(1:3, 1:3) y <- tuple(1:3, 4:6) expect_identical(x == y, rep(TRUE, 3)) expect_identical(vec_equal(x, y), rep(TRUE, 3)) # Recursive case foo <- data_frame(x = x) bar <- data_frame(x = y) expect_identical(vec_equal(foo, bar), rep(TRUE, 3)) }) vctrs/tests/testthat/helper-memory.R0000644000176200001440000000022714276722575017347 0ustar liggesusersmaybe_shared_col <- function(x, i) { .Call(vctrs_maybe_shared_col, x, i) } new_df_unshared_col <- function() { .Call(vctrs_new_df_unshared_col) } vctrs/tests/testthat/test-assert.R0000644000176200001440000005456215120272011017021 0ustar liggesuserstest_that("basic assert is idempotent", { x <- new_vctr(1:4) expect_true(vec_is(x)) expect_identical(vec_assert(x), x) expect_identical(vec_assert(x), new_vctr(1:4)) expect_false(withVisible(vec_assert(x))$visible) expect_true(vec_is(1:4)) expect_identical(vec_assert(1:4), 1:4) }) test_that("asserting ptype", { x <- new_vctr(1:4) good <- new_vctr(integer()) expect_true(vec_is(x, good)) expect_error(vec_assert(x, good), NA) # Is this the correct error message? bad <- new_vctr(double()) expect_false(vec_is(x, bad)) expect_error(vec_assert(x, bad), class = "vctrs_error_assert_ptype") }) test_that("asserting size", { x <- new_vctr(1:4) expect_true(vec_is(x, size = 4)) expect_error(vec_assert(x, size = 4), NA) expect_false(vec_is(x, size = 5)) expect_error(vec_assert(x, size = 5), class = "vctrs_error_assert_size") }) test_that("vec_assert() labels input", { expect_error( vec_assert(new_vctr(1:4), size = 5), regexp = "`new_vctr\\(1:4\\)` must have", class = "vctrs_error_assert_size" ) expect_error( vec_assert(new_vctr(1:4), size = 5, arg = "foobar"), regexp = "`foobar` must have", class = "vctrs_error_assert_size" ) }) test_that("bare atomic vectors are vectors but not recursive", { expect_true(obj_is_vector(TRUE)) expect_true(obj_is_vector(1L)) expect_true(obj_is_vector(1)) expect_true(obj_is_vector(1i)) expect_true(obj_is_vector("foo")) expect_true(obj_is_vector(as.raw(1))) }) test_that("S3 atomic vectors are vectors", { expect_true(obj_is_vector(foobar(TRUE))) expect_true(obj_is_vector(foobar(1L))) expect_true(obj_is_vector(foobar(1))) expect_true(obj_is_vector(foobar(1i))) expect_true(obj_is_vector(foobar("foo"))) expect_true(obj_is_vector(foobar(as.raw(1)))) }) test_that("bare lists are vectors", { expect_true(obj_is_vector(list())) }) test_that("S3 lists are not vectors by default", { expect_false(obj_is_vector(foobar())) expect_false(obj_is_list(foobar())) local_foobar_proxy() # TODO: These seem inconsistent. # Should we require that S3 list proxies satisfy `obj_is_list()`? # (i.e. unclass themselves or explicitly inherit from `"list"`?) expect_true(obj_is_vector(foobar())) expect_false(obj_is_list(foobar())) }) test_that("data frames and records are vectors", { expect_true(obj_is_vector(mtcars)) expect_true(obj_is_vector(new_rcrd(list(x = 1, y = 2)))) }) test_that("non-vector base types are scalars", { expect_identical(vec_typeof(quote(foo)), "scalar") expect_identical(vec_typeof(pairlist("")), "scalar") expect_identical(vec_typeof(function() NULL), "scalar") expect_identical(vec_typeof(env()), "scalar") expect_identical(vec_typeof(quote(foo)), "scalar") expect_identical(vec_typeof(~foo), "scalar") expect_identical(vec_typeof(base::`{`), "scalar") expect_identical(vec_typeof(base::c), "scalar") expect_identical(vec_typeof(expression()), "scalar") expect_false(obj_is_vector(quote(foo))) expect_false(obj_is_vector(pairlist(""))) expect_false(obj_is_vector(function() NULL)) expect_false(obj_is_vector(env())) expect_false(obj_is_vector(~foo)) expect_false(obj_is_vector(base::`{`)) expect_false(obj_is_vector(base::c)) expect_false(obj_is_vector(expression())) expect_false(vec_is(quote(foo))) expect_false(vec_is(pairlist(""))) expect_false(vec_is(function() NULL)) expect_false(vec_is(env())) expect_false(vec_is(~foo)) expect_false(vec_is(base::`{`)) expect_false(vec_is(base::c)) expect_false(vec_is(expression())) expect_error(vec_assert(quote(foo)), class = "vctrs_error_scalar_type") expect_error(vec_assert(pairlist("")), class = "vctrs_error_scalar_type") expect_error(vec_assert(function() NULL), class = "vctrs_error_scalar_type") expect_error(vec_assert(env()), class = "vctrs_error_scalar_type") expect_error(vec_assert(~foo), class = "vctrs_error_scalar_type") expect_error(vec_assert(base::`{`), class = "vctrs_error_scalar_type") expect_error(vec_assert(base::c), class = "vctrs_error_scalar_type") expect_error(vec_assert(expression()), class = "vctrs_error_scalar_type") }) test_that("non-vector types can be proxied", { x <- new_proxy(1:3) expect_identical(vec_typeof(x), "scalar") expect_false(obj_is_vector(x)) expect_false(vec_is(x)) expect_error(vec_assert(x), class = "vctrs_error_scalar_type") local_env_proxy() expect_identical(vec_typeof(x), "integer") expect_true(obj_is_vector(x)) expect_true(vec_is(x)) expect_error(regexp = NA, vec_assert(x)) }) test_that("obj_check_vector() is silent on vectors", { expect_null(obj_check_vector(1)) expect_null(obj_check_vector(data_frame())) }) test_that("obj_check_vector() errors on scalars", { expect_snapshot(error = TRUE, { obj_check_vector(quote(foo)) }) expect_snapshot(error = TRUE, { obj_check_vector(foobar()) }) }) test_that("obj_check_vector() error respects `arg` and `call`", { my_check_vector <- function(foo) { obj_check_vector(foo) } expect_snapshot(error = TRUE, { my_check_vector(foobar()) }) }) test_that("obj_check_vector() error contains FAQ links and correct bullets", { # Expect to see: # - Link to general FAQ about scalar types x <- expression() expect_snapshot(error = TRUE, obj_check_vector(x)) # Expect to see: # - Bullet about incompatible S3 list # - Full class list # - Link to specific FAQ about creating vectors x <- structure(list(), class = "my_list") expect_snapshot(error = TRUE, obj_check_vector(x)) # Expect to see: # - Bullet about incompatible data frame # - Full class list # - Link to specific FAQ about creating vectors x <- data_frame(a = 1) class(x) <- c("data.frame", "my_df") expect_snapshot(error = TRUE, obj_check_vector(x)) }) test_that("vec_assert() uses friendly type in error messages", { # Friendly type will be generated in rlang in the future. Upstream # changes should not cause CRAN failures. skip_on_cran() expect_error(vec_assert(function() NULL), class = "vctrs_error_scalar_type") }) test_that("vec_typeof() handles all types", { for (i in seq_along(empty_types)) { expect_identical(vec_typeof(!!empty_types[[i]]), !!names(empty_types)[[i]]) } }) test_that("bare prototypes don't act as partial types", { expect_false(vec_is(foobar(1), dbl())) expect_error(vec_assert(foobar(1), dbl()), class = "vctrs_error_assert_ptype") }) test_that("data frames are always classified as such even when dispatch is off", { expect_identical(vec_typeof_bare(mtcars), "dataframe") }) test_that("assertion is not applied on proxy", { local_methods( vec_proxy.vctrs_foobar = function(x, ...) unclass(x), vec_restore.vctrs_foobar = function(x, ...) foobar(x), `[.vctrs_foobar` = function(x, i) vec_slice(x, i) ) x <- foobar(list()) expect_true(vec_is(x, x)) expect_false(vec_is(x, list())) expect_error(vec_assert(x, list()), class = "vctrs_error_assert_ptype") expect_error(vec_assert(x, x), regexp = NA) }) test_that("attributes of unclassed vectors are asserted", { x <- structure(FALSE, foo = "bar") y <- structure(TRUE, foo = "bar") expect_false(vec_is(x, FALSE)) expect_false(vec_is(FALSE, x)) expect_true(vec_is(y, x)) expect_true(vec_is(x, y)) }) test_that("unspecified is finalised before assertion", { expect_true(vec_is(NA, TRUE)) expect_true(vec_is(data.frame(x = NA), data.frame(x = lgl()))) expect_error(regexp = NA, vec_assert(NA, TRUE)) expect_error( regexp = NA, vec_assert(data.frame(x = NA), data.frame(x = lgl())) ) }) test_that("assertion failures are explained", { local_no_stringsAsFactors() local_options(rlang_backtrace_on_error = "none") expect_snapshot(error = TRUE, vec_assert(lgl(), chr())) expect_snapshot(error = TRUE, vec_assert(lgl(), factor())) expect_snapshot(error = TRUE, vec_assert(lgl(), factor(levels = "foo"))) expect_snapshot( error = TRUE, vec_assert(factor(levels = "bar"), factor(levels = "foo")) ) expect_snapshot(error = TRUE, vec_assert(factor(), chr())) expect_snapshot(error = TRUE, vec_assert(lgl(), data.frame())) expect_snapshot(error = TRUE, vec_assert(lgl(), data.frame(x = 1))) expect_snapshot(error = TRUE, vec_assert(lgl(), data.frame(x = 1, y = 2))) expect_snapshot(error = TRUE, vec_assert(data.frame(), chr())) expect_snapshot(error = TRUE, vec_assert(data.frame(x = 1), chr())) expect_snapshot( error = TRUE, vec_assert(data.frame(x = 1), data.frame(x = "foo")) ) expect_snapshot( error = TRUE, vec_assert(data.frame(x = 1), data.frame(x = "foo", y = 2)) ) expect_snapshot(error = TRUE, vec_assert(data.frame(x = 1, y = 2), chr())) expect_snapshot( error = TRUE, vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo")) ) expect_snapshot( error = TRUE, vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo", y = 2)) ) }) test_that("vec_assert() validates `size` (#1470)", { expect_snapshot({ (expect_error(vec_assert(1, size = c(2, 3)))) (expect_error(vec_assert(1, size = 1.5))) (expect_error(vec_assert(1, size = "x"))) }) }) test_that("NULL is not a vector", { expect_false(obj_is_vector(NULL)) expect_false(vec_is(NULL)) }) test_that("names and row names do not influence type identity (#707)", { expect_true(vec_is(c(a = TRUE), logical())) expect_true(vec_is(TRUE, c(a = TRUE))) expect_true(vec_is(structure(mtcars, row.names = 1:32), mtcars)) expect_true(vec_is(mtcars, structure(mtcars, row.names = 1:32))) }) # vec_check_size -------------------------------------------------------- test_that("vec_check_size() is silent if the size is right", { expect_null(vec_check_size(1:5, size = 5L)) expect_null(vec_check_size(data_frame(.size = 10L), size = 10L)) }) test_that("vec_check_size() errors on the wrong size", { expect_snapshot(error = TRUE, { vec_check_size(1:5, size = 1L) }) expect_snapshot(error = TRUE, { vec_check_size(1:5, size = 10L) }) }) test_that("vec_check_size() errors on scalars", { expect_snapshot(error = TRUE, { vec_check_size(quote(foo), size = 1L) }) expect_snapshot(error = TRUE, { vec_check_size(foobar(), size = 1L) }) }) test_that("vec_check_size() error respects `arg` and `call`", { my_check_size <- function(foo, size) { vec_check_size(foo, size) } expect_snapshot(error = TRUE, { my_check_size(1L, size = 5L) }) expect_snapshot(error = TRUE, { my_check_size(foobar(), size = 5L) }) }) test_that("vec_check_size() validates `size`", { expect_snapshot(error = TRUE, { vec_check_size(1, size = "x") }) expect_snapshot(error = TRUE, { vec_check_size(1, size = c(1L, 2L)) }) expect_snapshot(error = TRUE, { vec_check_size(1, size = 1.5) }) }) # vec_check_recyclable -------------------------------------------------------- test_that("vec_check_recyclable() is silent if the size is right", { expect_null(vec_check_recyclable(1:5, size = 5L)) expect_null(vec_check_recyclable(1, size = 5L)) expect_null(vec_check_recyclable(data_frame(.size = 10L), size = 10L)) expect_null(vec_check_recyclable(data_frame(.size = 1L), size = 10L)) }) test_that("vec_check_recyclable() errors on the wrong size", { expect_snapshot(error = TRUE, { vec_check_recyclable(1:5, size = 1L) }) expect_snapshot(error = TRUE, { vec_check_recyclable(1:5, size = 10L) }) }) test_that("vec_check_recyclable() errors on scalars", { expect_snapshot(error = TRUE, { vec_check_recyclable(quote(foo), size = 1L) }) expect_snapshot(error = TRUE, { vec_check_recyclable(foobar(), size = 1L) }) }) test_that("vec_check_recyclable() error respects `arg` and `call`", { my_check_recyclable <- function(foo, size) { vec_check_recyclable(foo, size) } expect_snapshot(error = TRUE, { my_check_recyclable(1:2, size = 5L) }) expect_snapshot(error = TRUE, { my_check_recyclable(foobar(), size = 5L) }) }) test_that("vec_check_recyclable() validates `size`", { expect_snapshot(error = TRUE, { vec_check_recyclable(1, size = "x") }) expect_snapshot(error = TRUE, { vec_check_recyclable(1, size = c(1L, 2L)) }) expect_snapshot(error = TRUE, { vec_check_recyclable(1, size = 1.5) }) }) # obj_is_list and friends ----------------------------------------------- test_that("bare lists are lists", { expect_true(obj_is_list(list())) }) test_that("AsIs lists are lists (#1463)", { expect_true(obj_is_list(I(list()))) expect_true(obj_is_list(I(list_of(1)))) expect_false(obj_is_list(I(double()))) }) test_that("list_of are lists", { expect_true(obj_is_list(new_list_of())) }) test_that("Vectors with a non-VECSXP type are not lists", { expect_false(obj_is_list(1)) expect_false(obj_is_list("a")) expect_false(obj_is_list(quote(name))) }) test_that("List arrays are not lists", { # Bare list arrays x <- array(list(1)) expect_false(obj_is_list(x)) # Classed list arrays x <- structure(list(1), class = "list", dim = 1) expect_false(obj_is_list(x)) }) test_that("explicitly classed lists are lists", { x <- structure(list(), class = "list") expect_true(obj_is_list(x)) expect_true(obj_is_list(subclass(x))) }) test_that("explicit inheritance must be in the base class", { x <- structure(1:2, class = c("list", "foobar")) expect_false(obj_is_list(x)) }) test_that("POSIXlt are not considered a list", { expect_false(obj_is_list(as.POSIXlt(new_datetime()))) }) test_that("rcrd types are not lists", { expect_false(obj_is_list(new_rcrd(list(x = 1)))) }) test_that("scalars are not lists", { expect_false(obj_is_list(foobar())) }) test_that("S3 types can't lie about their internal representation", { x <- structure(1:2, class = c("foobar", "list")) expect_false(obj_is_list(x)) }) test_that("data frames of all types are not lists", { expect_false(obj_is_list(data.frame())) expect_false(obj_is_list(subclass(data.frame()))) expect_false(obj_is_list(tibble::tibble())) }) test_that("S3 list with non-list proxy is still a list (#1208)", { x <- structure(list(), class = c("foobar", "list")) local_methods(vec_proxy.foobar = function(x) 1) # This used to be an error (#1003) # expect_error(obj_is_list(x), "`x` inherits") expect_true(obj_is_list(x)) }) test_that("list-rcrds with data frame proxies are considered lists (#1208)", { x <- structure( list(1:2, "x"), special = c("a", "b"), class = c("list_rcrd", "list") ) local_methods( vec_proxy.list_rcrd = function(x) { special <- attr(x, "special") data <- unstructure(x) new_data_frame(list(data = data, special = special)) } ) expect_true(obj_is_list(x)) }) test_that("list_all_vectors() works", { expect_true(list_all_vectors(list(1))) expect_true(list_all_vectors(list_of(1))) expect_false(list_all_vectors(list(1, env()))) expect_snapshot((expect_error(list_all_vectors(env())))) }) test_that("obj_check_list() works", { expect_null(obj_check_list(list(1))) expect_null(obj_check_list(list_of(1))) expect_snapshot({ my_function <- function(my_arg) obj_check_list(my_arg) (expect_error(my_function(env()))) }) }) test_that("obj_check_list() uses a special error when `arg` is the empty string (#1604)", { expect_snapshot(error = TRUE, { obj_check_list(1, arg = "") }) }) test_that("obj_check_list() and list_check_all_vectors() work", { expect_null(list_check_all_vectors(list())) expect_null(list_check_all_vectors(list(1, mtcars))) expect_snapshot({ my_function <- function(my_arg) list_check_all_vectors(my_arg) (expect_error(my_function(env()))) (expect_error(my_function(list(1, env())))) (expect_error(my_function(list(1, name = env())))) (expect_error(my_function(list(1, foo = env())))) }) }) test_that("list_all_size() works", { expect_true(list_all_size(list(), 2)) expect_true(list_all_size(list(integer()), 0)) expect_true(list_all_size(list(NULL), 0)) expect_true(list_all_size(list(1:2, 2:3), 2)) expect_false(list_all_size(list(1:2, 1:3), 2)) expect_false(list_all_size(list(NULL, 1:2), 2)) expect_true(list_all_size(list_of(1:3, 2:4), 3)) expect_false(list_all_size(list_of(1:3, 2:4), 4)) }) test_that("list_check_all_size() works", { expect_null(list_check_all_size(list(), 2)) expect_null(list_check_all_size(list(integer()), 0)) expect_null(list_check_all_size(list(NULL), 0)) expect_null(list_check_all_size(list(1:2, 2:3), 2)) expect_snapshot({ my_function <- function(my_arg, size) list_check_all_size(my_arg, size) # Validates sizes (expect_error(list_check_all_size(list(1:2, 1:3), 2))) (expect_error(my_function(list(1:2, 1:3), 2))) # `NULL` is not ignored (expect_error(my_function(list(NULL, 1:2), 2))) }) }) test_that("list_all_recyclable() works", { expect_true(list_all_recyclable(list(), 2)) expect_true(list_all_recyclable(list(integer()), 0)) expect_true(list_all_recyclable(list(NULL), 0)) expect_true(list_all_recyclable(list(1:2, 2:3), 2)) expect_true(list_all_recyclable(list(1, 2:3), 2)) expect_false(list_all_recyclable(list(1:2, 1:3), 2)) expect_false(list_all_recyclable(list(NULL, 1:2), 2)) expect_false(list_all_recyclable(list(1, 1:2), 1)) expect_true(list_all_recyclable(list_of(1:3, 2:4), 3)) expect_false(list_all_recyclable(list_of(1:3, 2:4), 4)) }) test_that("list_check_all_recyclable() works", { expect_null(list_check_all_recyclable(list(), 2)) expect_null(list_check_all_recyclable(list(integer()), 0)) expect_null(list_check_all_recyclable(list(NULL), 0)) expect_null(list_check_all_recyclable(list(1:2, 2:3), 2)) expect_null(list_check_all_recyclable(list(1, 2:3), 2)) expect_snapshot({ my_function <- function(my_arg, size) { list_check_all_recyclable(my_arg, size) } # Validates sizes (expect_error(list_check_all_recyclable(list(1:2, 1:3), 2))) (expect_error(my_function(list(1:2, 1:3), 2))) # `NULL` is not ignored (expect_error(my_function(list(NULL, 1:2), 2))) }) }) test_that("list_all_size() and list_check_all_size() error on scalars", { x <- list(env()) expect_snapshot({ # Error considered internal to `list_all_size()` (expect_error(list_all_size(x, 2))) my_function <- function(my_arg, size) list_check_all_size(my_arg, size) (expect_error(my_function(x, 2))) }) }) test_that("list_all_recyclable() and list_check_all_recyclable() error on scalars", { x <- list(env()) expect_snapshot({ # Error considered internal to `list_all_recyclable()` (expect_error(list_all_recyclable(x, 2))) my_function <- function(my_arg, size) { list_check_all_recyclable(my_arg, size) } (expect_error(my_function(x, 2))) }) }) test_that("list_all_size() and list_check_all_size() throw error using internal call on non-list input", { expect_snapshot({ (expect_error(list_all_size(1, 2))) # `arg` and `call` are ignored (expect_error(list_check_all_size(1, 2, arg = "arg", call = call("foo")))) }) }) test_that("list_all_size() and list_check_all_size() validate `size`", { expect_snapshot({ (expect_error(list_all_size(list(), size = "x"))) (expect_error(list_check_all_size(list(), size = "x"))) }) }) test_that("list_all_recyclable() and list_check_all_recyclable() validate `size`", { expect_snapshot({ (expect_error(list_all_recyclable(list(), size = "x"))) (expect_error(list_check_all_recyclable(list(), size = "x"))) }) }) test_that("list_all_size() works with `allow_null`", { x <- list(1, NULL, 2) expect_false(list_all_size(x, size = 1)) expect_true(list_all_size(x, size = 1, allow_null = TRUE)) x <- list(1, NULL, 2:3) expect_false(list_all_size(x, size = 1)) expect_false(list_all_size(x, size = 1, allow_null = TRUE)) # `NULL` size is 0 by default x <- list(integer(), NULL, integer()) expect_true(list_all_size(x, size = 0)) expect_true(list_all_size(x, size = 0, allow_null = TRUE)) }) test_that("list_check_all_size() works with `allow_null`", { x <- list(1, NULL, 2) expect_snapshot(error = TRUE, { list_check_all_size(x, size = 1) }) expect_null(list_check_all_size(x, size = 1, allow_null = TRUE)) # Index of 3rd element is reported correctly x <- list(1, NULL, 2:3) expect_snapshot(error = TRUE, { list_check_all_size(x, size = 1) }) expect_snapshot(error = TRUE, { list_check_all_size(x, size = 1, allow_null = TRUE) }) # `NULL` size is 0 by default x <- list(integer(), NULL, integer()) expect_null(list_check_all_size(x, size = 0)) expect_null(list_check_all_size(x, size = 0, allow_null = TRUE)) }) test_that("list_all_vectors() works with `allow_null`", { x <- list(1, NULL, 2) expect_false(list_all_vectors(x)) expect_true(list_all_vectors(x, allow_null = TRUE)) x <- list(1, NULL, environment()) expect_false(list_all_vectors(x)) expect_false(list_all_vectors(x, allow_null = TRUE)) }) test_that("list_check_all_vectors() works with `allow_null`", { x <- list(1, NULL, 2) expect_snapshot(error = TRUE, { list_check_all_vectors(x) }) expect_null(list_check_all_vectors(x, allow_null = TRUE)) # Index of 3rd element is reported correctly x <- list(1, NULL, environment()) expect_snapshot(error = TRUE, { list_check_all_vectors(x) }) expect_snapshot(error = TRUE, { list_check_all_vectors(x, allow_null = TRUE) }) }) test_that("list_all_recyclable() works with `allow_null`", { x <- list(1, NULL, 2:3) expect_false(list_all_recyclable(x, size = 2)) expect_true(list_all_recyclable(x, size = 2, allow_null = TRUE)) x <- list(1, NULL, 2:4) expect_false(list_all_recyclable(x, size = 2)) expect_false(list_all_recyclable(x, size = 2, allow_null = TRUE)) # `NULL` size is 0 by default x <- list(integer(), NULL, integer()) expect_true(list_all_recyclable(x, size = 0)) expect_true(list_all_recyclable(x, size = 0, allow_null = TRUE)) }) test_that("list_check_all_recyclable() works with `allow_null`", { x <- list(1, NULL, 2:3) expect_snapshot(error = TRUE, { list_check_all_recyclable(x, size = 2) }) expect_null(list_check_all_recyclable(x, size = 2, allow_null = TRUE)) # Index of 3rd element is reported correctly x <- list(1, NULL, 2:4) expect_snapshot(error = TRUE, { list_check_all_recyclable(x, size = 2) }) expect_snapshot(error = TRUE, { list_check_all_recyclable(x, size = 2, allow_null = TRUE) }) # `NULL` size is 0 by default x <- list(integer(), NULL, integer()) expect_null(list_check_all_recyclable(x, size = 0)) expect_null(list_check_all_recyclable(x, size = 0, allow_null = TRUE)) }) test_that("informative messages when 1d array doesn't match vector", { x <- array(1:3) expect_snapshot((expect_error(vec_assert(x, int())))) }) vctrs/tests/testthat/test-ptype-abbr-full.R0000644000176200001440000000606415065005761020535 0ustar liggesuserstest_that("input must be a vector", { expect_error(vec_ptype_abbr(sum), "Not a vector") expect_error(vec_ptype_full(sum), "Not a vector") }) test_that("NULL has method", { expect_equal(vec_ptype_abbr(NULL), "NULL") expect_equal(vec_ptype_full(NULL), "NULL") }) test_that("non objects default to type + shape", { expect_equal(vec_ptype_abbr(ones(10)), "dbl[1d]") expect_equal(vec_ptype_abbr(ones(0, 10)), "dbl[,10]") expect_equal(vec_ptype_abbr(ones(10, 0)), "dbl[,0]") expect_equal(vec_ptype_full(ones(10)), "double[1d]") expect_equal(vec_ptype_full(ones(0, 10)), "double[,10]") expect_equal(vec_ptype_full(ones(10, 0)), "double[,0]") }) test_that("non objects can omit shape", { expect_equal(vec_ptype_abbr(ones(10), suffix_shape = FALSE), "dbl") expect_equal(vec_ptype_abbr(ones(0, 10), suffix_shape = FALSE), "dbl") expect_equal(vec_ptype_abbr(ones(10, 0), suffix_shape = FALSE), "dbl") }) test_that("objects default to first class", { x <- structure(1, class = c("foofy", "goofy")) expect_equal(vec_ptype_full(x), "foofy") expect_equal(vec_ptype_abbr(x), "foofy") }) test_that("atomic vectors and arrays as expected", { expect_equal(vec_ptype_full(1:5), "integer") dbl_mat <- array(double(), c(0, 3)) expect_equal(vec_ptype_full(dbl_mat), "double[,3]") }) test_that("complex and factor as expected (#323)", { expect_equal(vec_ptype_abbr(0i), "cpl") expect_equal(vec_ptype_abbr(factor()), "fct") }) test_that("named lists are always tagged (#322)", { expect_identical(vec_ptype_abbr(list(x = 1, y = 2)), "named list") expect_identical( vec_ptype_abbr(list(x = 1, y = 2), prefix_named = TRUE), "named list" ) }) test_that("named atomics are tagged optionally (#781)", { expect_identical( vec_ptype_abbr(c(x = 1, y = 2), prefix_named = TRUE), "named dbl" ) expect_identical( vec_ptype_abbr(c(x = 1L, y = 2L), prefix_named = TRUE), "named int" ) }) test_that("vec_ptype_abbr() adds named tag in case of row names", { expect_equal( vec_ptype_abbr(mtcars, prefix_named = TRUE), "named df[,11]" ) mat <- matrix(1:4, 2) rownames(mat) <- c("foo", "bar") expect_equal( vec_ptype_abbr(mat, prefix_named = TRUE), "named int[,2]" ) }) test_that("vec_ptype_abbr() and vec_ptype_full() are not inherited (#1549)", { foobar <- foobar(class = c("vctrs_bar", "vctrs_foo")) local_methods( vec_ptype_abbr.vctrs_foo = function(...) "foo_abbr", vec_ptype_full.vctrs_foo = function(...) "foo_full" ) expect_equal( vec_ptype_abbr(foobar), vec_ptype_abbr.default(foobar) ) expect_equal( vec_ptype_full(foobar), "vctrs_bar" ) local_methods( vec_ptype_abbr.vctrs_bar = function(...) "bar_abbr", vec_ptype_full.vctrs_bar = function(...) "bar_full" ) expect_equal( vec_ptype_abbr(foobar), "bar_abbr" ) expect_equal( vec_ptype_full(foobar), "bar_full" ) }) test_that("data.frames have good default abbr and full methods", { expect_snapshot({ df <- foobar(data.frame(x = 1, y = "", z = TRUE)) vec_ptype_abbr(df) vec_ptype_full(df) }) }) vctrs/tests/testthat/test-arith.R0000644000176200001440000000146515134173404016634 0ustar liggesuserstest_that("logical/integer/numeric works", { expect_equal(vec_arith("+", TRUE, TRUE), 2L) expect_equal(vec_arith("+", TRUE, 1L), 2L) expect_equal(vec_arith("+", TRUE, 1), 2) expect_equal(vec_arith("+", 1L, TRUE), 2L) expect_equal(vec_arith("+", 1L, 1L), 2L) expect_equal(vec_arith("+", 1L, 1), 2) expect_equal(vec_arith("+", 1, TRUE), 2L) expect_equal(vec_arith("+", 1, 1L), 2L) expect_equal(vec_arith("+", 1, 1), 2) }) test_that("default is error", { f <- new_vctr(1:10, class = "foo") expect_error(vec_arith("+", f, 1), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", TRUE, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", 1L, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", 1, f), class = "vctrs_error_incompatible_op") }) vctrs/tests/testthat/test-size.R0000644000176200001440000001340715075743736016515 0ustar liggesuserstest_that("vec_as_short_length() checks inputs", { expect_equal(vec_as_short_length(0), 0) expect_equal(vec_as_short_length(1L), 1) my_function <- function(my_arg) vec_as_short_length(my_arg) expect_snapshot({ (expect_error(my_function(-1))) (expect_error(my_function(1:2))) (expect_error(my_function(1.5))) (expect_error(my_function(NA))) (expect_error(my_function(na_int))) (expect_error(my_function("foo"))) (expect_error(my_function(foobar(1:2)))) (expect_error(my_function(.Machine$double.xmax))) }) }) test_that("vec_as_short_length() has a special error about long vector support", { # In particular, skips on 32-bit Windows where `r_ssize == int` skip_if(.Machine$sizeof.pointer < 8L, message = "No long vector support") my_function <- function(my_arg) vec_as_short_length(my_arg) expect_snapshot({ (expect_error(my_function(.Machine$integer.max + 1))) }) }) # vec_size ----------------------------------------------------------------- test_that("vec_size must be called with vector", { expect_error(vec_size(mean), class = "vctrs_error_scalar_type") }) test_that("length is number of rows", { expect_equal(vec_size(integer()), 0) expect_equal(vec_size(array(integer())), 0) expect_equal(vec_size(1:2), 2) expect_equal(vec_size(array(dim = 2)), 2) expect_equal(vec_size(matrix(nrow = 2, ncol = 3)), 2) expect_equal(vec_size(array(dim = c(2, 1, 5))), 2) }) test_that("length of record is number of rows, not fields", { r <- new_rcrd(list(x = 1:10)) expect_equal(vec_size(r), 10) }) test_that("handles three types of data frame rownames", { df1 <- df2 <- df3 <- data.frame(x = 1:3) rownames(df1) <- NULL rownames(df2) <- 3:1 rownames(df3) <- letters[1:3] expect_equal(vec_size(df1), 3) expect_equal(vec_size(df2), 3) expect_equal(vec_size(df3), 3) }) test_that("handles positive short row names (#220)", { data <- structure(mtcars, row.names = c(NA, 32)) expect_identical(vec_size(data), 32L) }) test_that("size is proxied", { local_env_proxy() expect_size(new_proxy(1:3), 3) expect_size(new_proxy(list(1, 2, 3)), 3) expect_size(new_proxy(foobar(list(1, 2, 3))), 3) }) test_that("`NULL` has size zero", { expect_identical(vec_size(NULL), 0L) }) test_that("can take the size of unspecified objects", { expect_size(NA, 1) expect_size(c(NA, NA), 2) expect_size(unspecified(2), 2) }) # vec_size_common --------------------------------------------------------- test_that("vec_size_common() checks inputs", { expect_snapshot({ (expect_error(vec_size_common(.size = "foo"))) (expect_error(vec_size_common(.size = 1:2))) }) }) test_that("vec_size_common() mentions `arg` in errors", { my_function <- function(...) vec_size_common(..., .arg = "my_arg") expect_snapshot({ (expect_error(my_function(this_arg = 1:2, that_arg = int()))) }) }) test_that("vec_size_common with no input is 0L unless `.absent` is provided", { expect_identical(vec_size_common(), 0L) expect_identical(vec_size_common(NULL), 0L) expect_equal(vec_size_common(.absent = na_int), na_int) }) test_that("`.absent` must be supplied when `...` is empty", { expect_snapshot({ (expect_error(vec_size_common(.absent = NULL))) }) }) test_that("`.absent` must be a length 1 integer if provided", { expect_snapshot({ (expect_error(vec_size_common(.absent = 1), "must be a single integer")) (expect_error( vec_size_common(.absent = c(1L, 2L)), "must be a single integer" )) }) }) test_that("`NULL` is treated as the absence of input", { expect_equal(vec_size_common(1:5, NULL), vec_size_common(1:5)) }) test_that("size 1 is overshadowed by any other size", { expect_equal(vec_size_common(1, integer()), 0) expect_equal(vec_size_common(1, 1:5), 5) }) test_that("if not size 1, sizes must be identical", { expect_equal(vec_size_common(integer(), integer()), 0) expect_error( vec_size_common(1:2, integer()), class = "vctrs_error_incompatible_size" ) expect_error( vec_size_common(1:2, 1:3), class = "vctrs_error_incompatible_size" ) }) test_that("argument tags are forwarded", { expect_snapshot(error = TRUE, vec_size_common(1:2, 1, 1:4)) expect_snapshot(error = TRUE, vec_size_common(foo = 1:2, 1, bar = 1:4)) }) test_that("can pass size", { expect_identical(vec_size_common(1:2, 1:3, .size = 5L), 5L) }) test_that("provided size is cast to an integer", { expect_identical(vec_size_common(.size = 1), 1L) }) test_that("doesn't mutate the input", { x <- list(a = 1, b = 2:3) expect_identical(vec_size_common(!!!x), 2L) expect_identical(x, list(a = 1, b = 2:3)) }) # list_sizes -------------------------------------------------------------- test_that("only lists are allowed", { expect_error(list_sizes(mtcars), "must be a list") expect_error(list_sizes(1), "must be a list") }) test_that("computes element sizes", { expect_identical(list_sizes(list(1, 1:3, c("a", "b"))), c(1L, 3L, 2L)) }) test_that("retains list names", { x <- list(1, x = 2, a = 3) expect_named(list_sizes(x), c("", "x", "a")) x <- list_of(y = 1, x = 2, a = 3) expect_named(list_sizes(x), c("y", "x", "a")) }) test_that("retains names of empty lists", { x <- structure(list(), names = character()) expect_named(list_sizes(x), character()) }) # sequences --------------------------------------------------------------- test_that("vec_seq_along returns size-0 output for size-0 input", { expect_equal(vec_seq_along(character()), integer()) expect_equal(vec_seq_along(data.frame()), integer()) }) test_that("vec_init_along can be called with single argument", { expect_equal(vec_init_along(1:3), rep(NA_integer_, 3)) }) # %0% -------------------------------------------------------------------- test_that("uses y when x is empty", { expect_equal(1 %0% 2, 1) expect_equal(1[0] %0% 2, 2) }) vctrs/tests/testthat/test-proxy.R0000644000176200001440000001124015065005761016701 0ustar liggesuserstest_that("vec_data() preserves names (#245)", { x <- set_names(letters, LETTERS) expect_identical(vec_names(x), vec_names(vec_data(x))) x <- diag(2) rownames(x) <- letters[1:2] colnames(x) <- LETTERS[1:2] expect_identical(vec_names(x), vec_names(vec_data(x))) }) test_that("vec_data() preserves size (#245)", { x <- set_names(letters, LETTERS) expect_identical(vec_size(x), vec_size(vec_data(x))) x <- diag(2) expect_identical(vec_size(x), vec_size(vec_data(x))) }) test_that("vec_data() preserves dim and dimnames (#245)", { x <- set_names(letters, LETTERS) expect_identical(vec_dim(x), vec_dim(vec_data(x))) x <- diag(2) expect_identical(vec_dim(x), vec_dim(vec_data(x))) x <- diag(2) rownames(x) <- letters[1:2] colnames(x) <- LETTERS[1:2] expect_identical(dimnames(x), dimnames(vec_data(x))) }) test_that("strips vector attributes apart from names, dim and dimnames", { x <- new_vctr(1:10, a = 1, b = 2) expect_null(attributes(vec_data(x))) x <- new_vctr(c(x = 1, y = 2), a = 1, b = 2) expect_equal(names(attributes(vec_data(x))), "names") x <- new_vctr(1, a = 1, dim = c(1L, 1L)) expect_equal(names(attributes(vec_data(x))), "dim") x <- new_vctr(1, a = 1, dim = c(1L, 1L), dimnames = list("foo", "bar")) expect_equal(names(attributes(vec_data(x))), c("dim", "dimnames")) }) test_that("vec_proxy() is a no-op with data vectors", { for (x in vectors) { expect_identical(vec_proxy(!!x), !!x) } x <- structure(1:3, foo = "bar") expect_identical(vec_proxy(!!x), !!x) }) test_that("vec_proxy() transforms records to data frames", { for (x in records) { expect_identical(vec_proxy(x), new_data_frame(unclass(x))) } }) test_that("vec_proxy() is a no-op with non vectors", { x <- foobar(list()) expect_identical(vec_proxy(x), x) }) test_that("can take the proxy of non-vector objects", { local_env_proxy() expect_identical(vec_proxy(new_proxy(1:3)), 1:3) }) test_that("vec_data() asserts vectorness", { expect_error(vec_data(new_sclr()), class = "vctrs_error_scalar_type") expect_error(vec_data(~foo), class = "vctrs_error_scalar_type") }) test_that("vec_data() is proxied", { local_env_proxy() x <- new_proxy(mtcars) expect_identical(vec_data(x), vec_data(mtcars)) }) test_that("vec_proxy_equal() is recursive over data frames (#641)", { x <- new_data_frame(list(x = foobar(1:3), y = 41:43)) default <- vec_proxy_equal(x) expect_s3_class(default$x, "vctrs_foobar") local_methods(vec_proxy_equal.vctrs_foobar = function(...) c(0, 0, 0)) overridden <- vec_proxy_equal(x) expect_identical(overridden$x, c(0, 0, 0)) }) test_that("vec_proxy_equal() returns a POSIXct for POSIXlt objects (#901)", { x <- as.POSIXlt(new_date(0), tz = "UTC") expect_s3_class(vec_proxy_equal(x), "POSIXct") }) test_that("vec_proxy_equal() defaults to vec_proxy() and vec_proxy_compare() defaults to vec_proxy_equal() (#1140)", { foobar_proxy <- function(x, ...) data_frame(x = unclass(x), y = seq_along(x)) local_methods(vec_proxy.vctrs_foobar = foobar_proxy) x <- foobar(3:1) expect_identical(vec_proxy(x), foobar_proxy(x)) expect_identical(vec_proxy_equal(x), foobar_proxy(x)) expect_identical(vec_proxy_compare(x), foobar_proxy(x)) local_methods(vec_proxy_equal.vctrs_foobar = function(x, ...) { foobar_proxy(letters[x]) }) expect_identical(vec_proxy_equal(x), data_frame(x = letters[3:1], y = 1:3)) expect_identical(vec_proxy_compare(x), data_frame(x = letters[3:1], y = 1:3)) }) test_that("equal/compare/order proxy methods that return data frames are automatically flattened", { x <- new_vctr(1:2, class = "custom") equal <- data_frame(a = 1:2, b = 3:4) order <- data_frame(a = 3:4, b = 4:5) local_methods( vec_proxy_equal.custom = function(x, ...) data_frame(col = equal), vec_proxy_order.custom = function(x, ...) data_frame(col = order) ) expect_identical(vec_proxy_equal(x), equal) expect_identical(vec_proxy_compare(x), equal) expect_identical(vec_proxy_order(x), order) }) test_that("equal/compare/order proxy methods that return 1 column data frames are automatically unwrapped", { x <- new_vctr(1:2, class = "custom") equal <- 1:2 order <- 3:4 local_methods( vec_proxy_equal.custom = function(x, ...) data_frame(a = equal), vec_proxy_order.custom = function(x, ...) { data_frame(col = data_frame(a = order)) } ) expect_identical(vec_proxy_equal(x), equal) expect_identical(vec_proxy_compare(x), equal) expect_identical(vec_proxy_order(x), order) }) test_that("vec_data() preserves data frames", { expect_identical( vec_data(tibble(x = 1)), data_frame(x = 1) ) # Rownames are preserved expect_identical( vec_data(mtcars), mtcars ) }) vctrs/tests/testthat/test-type-dplyr.R0000644000176200001440000001154015110234412017620 0ustar liggesusers# `grouped_df` ------------------------------------------------------- bare_mtcars <- unrownames(mtcars) test_that("grouped-df is proxied and restored", { gdf <- dplyr::group_by(bare_mtcars, cyl) expect_identical(vec_proxy(gdf), gdf) expect_identical(vec_restore(bare_mtcars, gdf), gdf) expect_identical(vec_ptype(gdf), gdf[0, ]) gdf <- dplyr::group_by(bare_mtcars, cyl, am, vs) expect_identical(gdf[0, ], vec_ptype(gdf)) out <- vec_ptype(dplyr::group_by(bare_mtcars, cyl, .drop = FALSE)) expect_drop(out, FALSE) }) test_that("can take the common type of grouped tibbles and tibbles", { gdf <- dplyr::group_by(bare_mtcars, cyl) expect_identical(vec_ptype2(gdf, data.frame()), vec_ptype(gdf)) expect_identical(vec_ptype2(data.frame(), gdf), vec_ptype(gdf)) expect_identical(vec_ptype2(gdf, tibble()), vec_ptype(gdf)) expect_identical(vec_ptype2(tibble(), gdf), vec_ptype(gdf)) gdf_nodrop <- dplyr::group_by(bare_mtcars, cyl, .drop = FALSE) expect_drop(vec_ptype2(gdf, gdf_nodrop), FALSE) expect_drop(vec_ptype2(gdf_nodrop, gdf), FALSE) expect_drop(vec_ptype2(gdf_nodrop, bare_mtcars), FALSE) expect_drop(vec_ptype2(bare_mtcars, gdf_nodrop), FALSE) }) test_that("the common type of grouped tibbles includes the union of grouping variables", { gdf1 <- dplyr::group_by(bare_mtcars, cyl) gdf2 <- dplyr::group_by(bare_mtcars, am, vs) expect_identical( vec_ptype2(gdf1, gdf2), vec_ptype(dplyr::group_by(bare_mtcars, cyl, am, vs)) ) }) test_that("can cast to and from `grouped_df`", { gdf <- dplyr::group_by(unrownames(bare_mtcars), cyl) input <- bare_mtcars[10] cast_gdf <- dplyr::group_by(vec_cast(bare_mtcars[10], bare_mtcars), cyl) expect_error( vec_cast(input, dplyr::group_by(bare_mtcars["cyl"], cyl)), class = "vctrs_error_cast_lossy" ) expect_identical( vec_cast(input, gdf), cast_gdf ) expect_identical( vec_cast(gdf, bare_mtcars), unrownames(bare_mtcars) ) expect_identical( vec_cast(tibble::as_tibble(input), gdf), unrownames(cast_gdf) ) tib <- tibble::as_tibble(bare_mtcars) expect_identical( unrownames(vec_cast(gdf, tib)), tib ) }) test_that("casting to `grouped_df` doesn't require grouping variables", { expect_identical( vec_cast(bare_mtcars[10], dplyr::group_by(bare_mtcars, cyl)), dplyr::group_by(vec_cast(bare_mtcars[10], bare_mtcars), cyl) ) }) test_that("casting to `grouped_df` handles `drop`", { gdf_nodrop <- dplyr::group_by(bare_mtcars, cyl, .drop = FALSE) expect_identical(vec_cast(bare_mtcars, gdf_nodrop), gdf_nodrop) }) test_that("can cbind grouped data frames", { gdf <- dplyr::group_by(bare_mtcars[-10], cyl) df <- unrownames(bare_mtcars)[10] expect_identical( unrownames(vec_cbind(gdf, df)), tibble::as_tibble(bare_mtcars)[c(1:9, 11, 10)] ) gdf1 <- dplyr::group_by(bare_mtcars[2], cyl) gdf2 <- dplyr::group_by(bare_mtcars[8:9], vs, am) expect_identical( unrownames(vec_cbind(gdf1, gdf2)), tibble::as_tibble(bare_mtcars)[c(2, 8, 9)] ) }) # `rowwise` ---------------------------------------------------------- test_that("rowwise can be proxied and restored", { rww <- dplyr::rowwise(unrownames(bare_mtcars)) expect_identical(vec_proxy(rww), rww) expect_identical(vec_restore(unrownames(bare_mtcars), rww), rww) expect_identical(vec_ptype(rww), rww[0, ]) }) test_that("can take the common type of rowwise tibbles and tibbles", { rww <- dplyr::rowwise(bare_mtcars) expect_identical(vec_ptype2(rww, data.frame()), vec_ptype(rww)) expect_identical(vec_ptype2(data.frame(), rww), vec_ptype(rww)) expect_identical(vec_ptype2(rww, tibble()), vec_ptype(rww)) expect_identical(vec_ptype2(tibble(), rww), vec_ptype(rww)) }) test_that("can cast to and from `rowwise_df`", { rww <- unrownames(dplyr::rowwise(bare_mtcars)) input <- bare_mtcars[10] cast_rww <- dplyr::rowwise(vec_cast(bare_mtcars[10], bare_mtcars)) expect_error( vec_cast(input, dplyr::rowwise(bare_mtcars["cyl"])), class = "vctrs_error_cast_lossy" ) expect_identical( vec_cast(input, rww), cast_rww ) expect_identical( vec_cast(rww, bare_mtcars), unrownames(bare_mtcars) ) expect_identical( vec_cast(tibble::as_tibble(input), rww), unrownames(cast_rww) ) tib <- tibble::as_tibble(bare_mtcars) expect_identical( unrownames(vec_cast(rww, tib)), tib ) }) test_that("can cbind rowwise data frames", { df <- unrownames(bare_mtcars) rww <- dplyr::rowwise(df[-2]) gdf <- dplyr::group_by(df[2], cyl) exp <- dplyr::rowwise(df[c(1, 3:11, 2)]) expect_identical(vec_cbind(rww, df[2]), exp) # Suboptimal expect_identical(vec_cbind(rww, gdf), exp) }) test_that("common type between rowwise and grouped data frames is a bare df", { out <- vec_ptype_common( dplyr::rowwise(bare_mtcars), dplyr::group_by(bare_mtcars, cyl) ) expect_identical(out, tibble::as_tibble(bare_mtcars[0, ])) }) vctrs/tests/testthat/test-list-of-transpose.R0000644000176200001440000000605015120272011021076 0ustar liggesuserstest_that("transposes vectors", { expect_identical( list_of_transpose(list_of2(1:2, 3:4, 5:6)), list_of2(c(1L, 3L, 5L), c(2L, 4L, 6L)) ) }) test_that("transposes data frames", { expect_identical( list_of_transpose(list_of2( data_frame(a = 1:3, b = letters[1:3]), data_frame(a = 4:6, b = letters[4:6]) )), list_of2( data_frame(a = c(1L, 4L), b = letters[c(1L, 4L)]), data_frame(a = c(2L, 5L), b = letters[c(2L, 5L)]), data_frame(a = c(3L, 6L), b = letters[c(3L, 6L)]) ) ) }) test_that("empty `x` is fully reversible", { x <- list_of(.ptype = integer(), .size = 0) out <- list_of_transpose(x) expect_identical( out, list_of(.ptype = integer(), .size = 0) ) expect_identical(list_of_transpose(out), x) x <- list_of(.ptype = integer(), .size = 2) out <- list_of_transpose(x) expect_identical( out, list_of(integer(), integer(), .ptype = integer(), .size = 0) ) expect_identical(list_of_transpose(out), x) }) test_that("retains only inner names", { # I don't think we should expose `name_spec`, we've hard coded it to `"inner"` # for now. What would this even do with outer names? Exposing `name_spec` for # the interleave step would allow making names of `a_w` and `b_y` via a glue # spec, which feels weird and not useful. x <- list_of2(a = c(w = 1, x = 2), b = c(y = 3, z = 4)) expect_identical( list_of_transpose(x), list_of2( c(w = 1, y = 3), c(x = 2, z = 4) ) ) # Silent repair of duplicate data frame row names x <- list_of2( data.frame(a = 1, row.names = "x"), data.frame(a = 2, row.names = "x") ) expect_silent({ expect_identical( list_of_transpose(x), list_of2(data.frame(a = c(1, 2), row.names = c("x...1", "x...2"))) ) }) }) test_that("`x` is validated", { expect_snapshot(error = TRUE, { list_of_transpose(1) }) expect_snapshot(error = TRUE, { list_of_transpose(1, x_arg = "x", error_call = quote(foo())) }) }) test_that("`x` must be a fully specified list of", { expect_snapshot(error = TRUE, { x <- list_of(.ptype = integer(), .size = zap()) list_of_transpose(x) }) expect_snapshot(error = TRUE, { x <- list_of(.ptype = zap(), .size = 1) list_of_transpose(x) }) }) test_that("`...` must be empty", { expect_snapshot(error = TRUE, { list_of_transpose(list_of2(1), 2) }) }) test_that("doesn't allow `NULL` elements", { # These would break the invariants around the size of the output relative # to the size of the input if we just dropped them. We require that the user # drop them entirely or replace them up front. The `[<-` method for list-of # ensures that the type and size is maintained for whatever they replace with. expect_snapshot(error = TRUE, { list_of_transpose(list_of2(1:4, NULL, 5:8)) }) }) test_that("`x` being a list subclass can't affect the transposition", { x <- new_list_of( list(1, 2), ptype = double(), size = 1L, class = "my_list" ) expect_identical( list_of_transpose(x), list_of2(c(1, 2)) ) }) vctrs/tests/testthat/helper-types.R0000644000176200001440000000405014751701606017167 0ustar liggesusers# Don't call tibble::tibble() to avoid catch-22, because tibble now uses vctrs bare_tibble <- structure(data.frame(), class = c("tbl_df", "tbl", "data.frame")) base_empty_types <- list( null = NULL, logical = lgl(), integer = int(), double = dbl(), complex = cpl(), character = chr(), raw = bytes(), list = list(), dataframe = data.frame() ) base_s3_empty_types <- list( bare_factor = new_factor(), bare_ordered = new_ordered(), bare_date = new_date(), bare_posixct = new_datetime(tzone = "UTC"), bare_posixlt = as.POSIXlt(new_datetime(tzone = "UTC")), bare_tibble = bare_tibble ) proxied_empty_types <- list( double = new_hidden(), dataframe = bare_tibble, dataframe = structure(data.frame(), class = c("vctrs_foobar", "data.frame")) ) empty_types <- c( base_empty_types, proxied_empty_types, # Non proxied type scalar = foobar(list()), scalar = function() NULL ) atomics <- list(TRUE, 1L, 1.0, 1i, "foo", bytes(1)) vectors <- c(atomics, list(list())) records <- list( df = data.frame(x = 1), rcrd = new_rcrd(list(x = 1)), posixlt = as.POSIXlt("2020-01-01") ) tuple <- function(x = integer(), y = integer()) { fields <- vec_recycle_common( x = vec_cast(x, integer()), y = vec_cast(y, integer()) ) new_rcrd(fields, class = "vctrs_tuple") } tuple_methods <- list( format.vctrs_tuple = function(x, ...) { paste0("(", field(x, "x"), ",", field(x, "y"), ")") }, vec_ptype2.vctrs_tuple.vctrs_tuple = function(x, y, ...) x, vec_cast.vctrs_tuple.vctrs_tuple = function(x, to, ...) x ) local_tuple_methods <- function(frame = caller_env()) { local_methods(.frame = frame, !!!tuple_methods) } set_tuple_methods <- function(env = global_env()) { env_bind(env, !!!tuple_methods) } local_comparable_tuple <- function(frame = caller_env()) { local_tuple_methods(frame = frame) # Compare only on first field local_methods( .frame = frame, vec_proxy_equal.vctrs_tuple = function(x, ...) field(x, "x") ) } c_na <- function(...) { x <- c(...) names(x)[names(x) == ""] <- NA_character_ x } vctrs/tests/testthat/test-group.R0000644000176200001440000001342015065005761016656 0ustar liggesusers# group id ---------------------------------------------------------------- test_that("vec_group_id detects groups in order of appearance", { x <- c(2, 4, 2, 1, 4) expect <- structure(c(1L, 2L, 1L, 3L, 2L), n = 3L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id works for size 0 input", { expect <- structure(integer(), n = 0L) expect_equal(vec_group_id(NULL), expect) expect_equal(vec_group_id(numeric()), expect) }) test_that("vec_group_id works on base S3 objects", { x <- factor(c("x", "y", "x")) expect <- structure(c(1L, 2L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) x <- new_date(c(0, 1, 0)) expect <- structure(c(1L, 2L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id works row wise on data frames", { df <- data.frame(x = c(1, 2, 1, 1), y = c(2, 3, 2, 3)) expect <- structure(c(1L, 2L, 1L, 3L), n = 3L) expect_equal(vec_group_id(df), expect) }) test_that("vec_group_id works row wise on arrays", { x <- array(c(1, 1, 1, 2, 4, 2), c(3, 2)) expect <- structure(c(1L, 2L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id works with different encodings", { expect <- structure(c(1L, 1L, 1L), n = 1L) expect_equal(vec_group_id(encodings()), expect) }) test_that("vec_group_id takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), c(1, 1, 1, 2)) # Compares on only the first field expect <- structure(c(1L, 2L, 1L, 1L), n = 2L) expect_equal(vec_group_id(x), expect) }) test_that("vec_group_id takes the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), 1:4) df <- data_frame(x = x) expect <- structure(c(1L, 2L, 1L, 1L), n = 2L) expect_equal(vec_group_id(df), expect) }) # group rle --------------------------------------------------------------- test_that("vec_group_rle returns a `vctrs_group_rle` object", { expect_s3_class(vec_group_rle(1), "vctrs_group_rle") }) test_that("vec_group_rle works with size 0 input", { expect <- new_group_rle(integer(), integer(), 0L) expect_equal(vec_group_rle(integer()), expect) expect_equal(vec_group_rle(NULL), expect) }) test_that("vec_group_rle detects groups in order of appearance", { x <- c(2, 2, 3, 1, 1) expect <- new_group_rle(1:3, c(2L, 1L, 2L), 3L) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle can refer to groups it has already seen", { x <- c(2, 3, 2) expect <- new_group_rle(c(1L, 2L, 1L), rep(1L, 3), 2L) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle works on base S3 objects", { expect <- new_group_rle(c(1L, 2L, 1L, 3L), c(1L, 2L, 1L, 1L), 3L) x <- factor(c("x", "y", "y", "x", "z")) expect_equal(vec_group_rle(x), expect) x <- new_date(c(0, 1, 1, 0, 2)) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), c(1, 1, 1, 2)) # Compares on only the first field expect <- new_group_rle(c(1L, 2L, 1L), c(1L, 1L, 2L), 2L) expect_equal(vec_group_rle(x), expect) }) test_that("vec_group_rle works row wise on data frames", { df <- data.frame(x = c(1, 1, 2, 1), y = c(2, 2, 3, 2)) expect <- new_group_rle(c(1L, 2L, 1L), c(2L, 1L, 1L), 2L) expect_equal(vec_group_rle(df), expect) }) test_that("vec_group_rle takes the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), 1:4) df <- data_frame(x = x) expect <- new_group_rle(c(1L, 2L, 1L), c(1L, 1L, 2L), 2L) expect_equal(vec_group_rle(df), expect) }) test_that("can access fields", { x <- vec_group_rle(c(1, 1, 2)) expect_equal(fields(x), c("group", "length")) expect_identical(field(x, "group"), c(1L, 2L)) expect_identical(field(x, "length"), c(2L, 1L)) }) test_that("can access number of groups", { x <- vec_group_rle(c(1, 1, 2)) expect_identical(attr(x, "n"), 2L) }) test_that("print method is useful", { x <- new_group_rle(c(1L, 2L, 1L), c(3L, 2L, 1L), 2L) expect_snapshot(x) }) # group loc -------------------------------------------------------------- test_that("can locate unique groups of an empty vector", { out <- vec_group_loc(integer()) expect_s3_class(out, "data.frame") expect_equal(out$key, integer()) expect_equal(out$loc, list()) }) test_that("can locate unique groups of a data frame", { df <- data_frame(x = c(1, 1, 1, 2, 2), y = c("a", "a", "b", "a", "b")) out <- vec_group_loc(df) expect_equal(nrow(out), 4L) expect_equal(out$key, vec_unique(df)) }) test_that("can locate unique groups of a data frame with a list column", { df <- data_frame(x = list(1:2, 1:2, "a", 5.5, "a")) out <- vec_group_loc(df) expect_equal(nrow(out), 3L) expect_equal(out$key, vec_unique(df)) }) test_that("`x` must be a vector", { expect_error(vec_group_loc(environment()), class = "vctrs_error_scalar_type") }) test_that("`key` column retains full type information", { x <- factor(letters[c(1, 2, 1)], levels = letters[1:3]) out <- vec_group_loc(x) expect_equal(levels(out$key), levels(x)) }) test_that("vec_group_loc takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_equal(vec_group_loc(x)$key, x[1:2]) expect_equal(vec_group_loc(x)$loc, list(c(1L, 3L), 2L)) x <- as.POSIXlt(new_datetime(c(1, 2, 1))) expect_equal(vec_group_loc(x)$key, x[1:2]) expect_equal(vec_group_loc(x)$loc, list(c(1L, 3L), 2L)) }) test_that("vec_group_loc takes the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2, 1, 1), 1:4) df <- data_frame(x = x) expect <- data_frame( key = vec_slice(df, c(1, 2)), loc = list(c(1L, 3L, 4L), 2L) ) expect_equal(vec_group_loc(df), expect) }) test_that("vec_group_loc works with different encodings", { encs <- encodings() expect_identical(nrow(vec_group_loc(encs)), 1L) }) vctrs/tests/testthat/test-fields.R0000644000176200001440000000445215065005761016775 0ustar liggesuserstest_that("n_fields captures number of fields", { r <- new_rcrd(list(x = 1, y = 2)) expect_equal(n_fields(r), 2) }) # get --------------------------------------------------------------------- test_that("can extract valid field", { r <- new_rcrd(list(x = 1, y = 2)) expect_equal(field(r, "x"), 1) expect_equal(field(r, 1L), 1) }) test_that("can extract field even if encoding is different", { x1 <- "fa\u00e7ile" skip_if_not(Encoding(x1) == "UTF-8") x2 <- iconv(x1, from = "UTF-8", to = "latin1") skip_if_not(Encoding(x2) == "latin1") r <- new_rcrd(setNames(list(1), x1)) expect_equal(field(r, x1), 1) expect_equal(field(r, x2), 1) }) test_that("invalid indices throw error", { r <- new_rcrd(list(x = 1, y = 2)) expect_error(field(r, "z"), "Invalid index") expect_error(field(r, NA_character_), "Invalid index") expect_error(field(r, ""), "Invalid index") expect_error(field(r, letters), "Invalid index") expect_error(field(r, 0L), "Invalid index") expect_error(field(r, NA_integer_), "Invalid index") expect_error(field(r, 0), "Invalid index") expect_error(field(r, NA_real_), "Invalid index") expect_error(field(r, Inf), "Invalid index") expect_error(field(r, mean), "Invalid index") }) test_that("corrupt rcrd throws error", { r <- new_rcrd(list(x = 1, y = 2)) expect_error(field(1:10, 1L), "Corrupt rcrd") expect_error(field(list(), 1L), "Corrupt rcrd") expect_error(field(list(1), "x"), "Corrupt x") expect_error(field(setNames(list(1, 1), "y"), "x"), "Corrupt x") }) # set --------------------------------------------------------------------- test_that("field<- modifies a copy", { r1 <- new_rcrd(list(x = 1, y = 2)) r2 <- r1 field(r1, "x") <- 3 expect_equal(field(r1, "x"), 3) expect_equal(field(r2, "x"), 1) }) test_that("field<- checks inputs", { x <- list() expect_error(field(x, "x") <- 1, "Corrupt rcrd") r <- new_rcrd(list(x = 1)) expect_error(field(r, "x") <- 1:3, "Invalid value") expect_error(field(r, "x") <- environment(), "Invalid value") }) test_that("field<- respects size, not length (#450)", { r1 <- new_rcrd(list(df = new_data_frame(n = 2L))) new_df <- data.frame(x = 1:2) field(r1, 'df') <- new_df expect_equal(field(r1, "df"), new_df) expect_error(field(r1, 'df') <- new_data_frame(n = 3L), "Invalid value") }) vctrs/tests/testthat/test-type-rcrd.R0000644000176200001440000002347515065005761017446 0ustar liggesusers# constructor and accessors ----------------------------------------------- test_that("can construct and access components", { r <- new_rcrd(list(x = 1, y = 2)) expect_equal(length(r), 1) expect_equal(n_fields(r), 2) expect_equal(names(r), NULL) expect_equal(fields(r), c("x", "y")) expect_error(r$x, class = "vctrs_error_unsupported") expect_equal(field(r, "x"), 1) }) test_that("requires format method", { x <- new_rcrd(list(x = 1)) expect_error(format(x), class = "vctrs_error_unimplemented") }) test_that("vec_proxy() transforms records to data frames", { expect_identical( vec_proxy(new_rcrd(list(a = "1"))), new_data_frame(list(a = "1")) ) }) test_that("equality, comparison, and order proxies are recursive and fall through (#1503, #1664)", { base <- new_rcrd(list(a = 1), class = "custom") x <- new_rcrd(list(x = base)) expect_identical(vec_proxy_equal(x), 1) expect_identical(vec_proxy_compare(x), 1) expect_identical(vec_proxy_order(x), 1) local_methods(vec_proxy_equal.custom = function(x, ...) { rep("equal", length(x)) }) expect_identical(vec_proxy_equal(x), "equal") expect_identical(vec_proxy_compare(x), "equal") expect_identical(vec_proxy_order(x), "equal") local_methods(vec_proxy_compare.custom = function(x, ...) { rep("compare", length(x)) }) expect_identical(vec_proxy_equal(x), "equal") expect_identical(vec_proxy_compare(x), "compare") expect_identical(vec_proxy_order(x), "compare") local_methods(vec_proxy_order.custom = function(x, ...) { rep("order", length(x)) }) expect_identical(vec_proxy_equal(x), "equal") expect_identical(vec_proxy_compare(x), "compare") expect_identical(vec_proxy_order(x), "order") y <- new_rcrd(list(a = 1), class = "custom2") local_methods(vec_proxy_compare.custom2 = function(x, ...) { rep("compare2", length(x)) }) z <- data_frame(x = x, y = y) # Each column falls back independently expect_identical(vec_proxy_equal(z), data_frame(x = "equal", y = 1)) expect_identical( vec_proxy_compare(z), data_frame(x = "compare", y = "compare2") ) expect_identical(vec_proxy_order(z), data_frame(x = "order", y = "compare2")) }) # base methods ------------------------------------------------------------ test_that("has no names", { x <- new_rcrd(list(a = 1, b = 2L)) expect_null(names(x)) expect_null(vec_names(x)) }) test_that("removing names with `NULL` is a no-op (#1419)", { x <- new_rcrd(list(a = 1, b = 2L)) expect_identical(`names<-`(x, NULL), x) expect_identical(vec_set_names(x, NULL), x) }) test_that("setting character names is an error (#1419)", { x <- new_rcrd(list(a = 1, b = 2L)) expect_error(`names<-`(x, "x"), "Can't assign names") expect_error(vec_set_names(x, "x"), "Can't assign names") }) test_that("na.omit() works and retains metadata (#1413)", { x <- new_rcrd(list(a = c(1, 1, NA, NA), b = c(1, NA, 1, NA))) result <- na.omit(x) expect <- vec_slice(x, 1:3) attr(expect, "na.action") <- structure(4L, class = "omit") expect_identical(result, expect) }) test_that("na.fail() works", { # Only considered missing if all fields are missing x <- new_rcrd(list(a = c(1, 1, NA), b = c(1, NA, 1))) expect_identical(na.fail(x), x) x <- new_rcrd(list(a = c(1, 1, NA, NA), b = c(1, NA, 1, NA))) expect_snapshot(error = TRUE, na.fail(x)) }) # coercion ---------------------------------------------------------------- test_that("can't cast list to rcrd", { l <- list( new_rcrd(list(a = "1", b = 3L)), new_rcrd(list(b = "4", a = 2)) ) expect_error( vec_cast(l, new_rcrd(list(a = 1L, b = 2L))), class = "vctrs_error_incompatible_type" ) }) test_that("can recast rcrd from list", { r <- new_rcrd(list(x = integer(), y = numeric())) expect_equal( vec_restore(list(x = 1L, y = 1), r), new_rcrd(list(x = 1L, y = 1)) ) }) test_that("can't cast rcrd to list", { r <- new_rcrd(list(x = 1:2, y = 2:3)) expect_error(vec_cast(r, list()), class = "vctrs_error_incompatible_type") expect_error(vec_cast(r, list()), class = "vctrs_error_incompatible_type") }) test_that("default casts are implemented correctly", { r <- new_rcrd(list(x = 1, y = 1)) expect_error(vec_cast(1, r), class = "vctrs_error_incompatible_type") expect_equal(vec_cast(NULL, r), NULL) }) test_that("can't cast incompatible rcrd", { expect_error( vec_cast( new_rcrd(list(a = "1", b = 3L)), new_rcrd(list(a = "1")) ), class = "vctrs_error_cast_lossy" ) expect_error( vec_cast( new_rcrd(list(a = "1", b = 3L)), new_rcrd(list(a = "1", c = 3L)) ), class = "vctrs_error_cast_lossy" ) expect_error( vec_cast( new_rcrd(list(a = "a", b = 3L)), new_rcrd(list(a = 1, b = 3L)) ), class = "vctrs_error_incompatible_type" ) }) # input validation -------------------------------------------------------- test_that("must be list of equal length vectors", { expect_error(new_rcrd(list()), "list of length 1") expect_error( new_rcrd(list(x = environment())), class = "vctrs_error_scalar_type" ) expect_error(new_rcrd(list(x = 1:2, y = 1:3)), "same size") }) test_that("names must be unique", { expect_error( new_rcrd(list(1, 2)), class = "vctrs_error_names_cannot_be_empty" ) expect_error( new_rcrd(list(x = 1, 2)), class = "vctrs_error_names_cannot_be_empty" ) expect_error( new_rcrd(list(x = 1, x = 2)), class = "vctrs_error_names_must_be_unique" ) expect_error(new_rcrd(setNames(list(1, 2), "x")), "can't return `NA`") }) test_that("subset assignment throws error", { x <- new_rcrd(list(x = 1)) expect_error( x$y <- 2, class = "vctrs_error_unsupported" ) }) test_that("can supply data frame as fields", { expect_identical( new_rcrd(list(x = 1)), new_rcrd(tibble(x = 1)) ) }) test_that("fields are not recycled", { expect_error( new_rcrd(list(x = 1, y = 1:2)), "must be the same size" ) }) # tuple class ---------------------------------------------------------- # use simple class to test essential features of rcrds test_that("print and str use format", { local_tuple_methods() r <- tuple(1, 1:100) expect_snapshot(r) expect_snapshot(str(r[1:10])) expect_snapshot(str(list(list(list(r, 1:100))))) }) test_that("subsetting methods applied to each field", { local_tuple_methods() x <- tuple(1:2, 1) expect_equal(x[1], tuple(1, 1)) expect_equal(x[[1]], tuple(1, 1)) expect_equal(rep(tuple(1, 1), 2), tuple(c(1, 1), 1)) length(x) <- 1 expect_equal(x, tuple(1, 1)) }) test_that("subset assignment modifies each field", { local_tuple_methods() x <- tuple(c(1, 1), c(2, 2)) expect_error(x[[]] <- tuple(), "missing") x[[1]] <- tuple(3, 3) expect_equal(x, tuple(c(3, 1), c(3, 2))) x[1] <- tuple(4, 4) expect_equal(x, tuple(c(4, 1), c(4, 2))) }) test_that("subset assignment recycles", { local_tuple_methods() x <- tuple(c(1, 1), c(2, 2)) x[1:2] <- tuple(1, 1) expect_equal(x, tuple(c(1, 1), c(1, 1))) x[] <- tuple(2, 2) expect_equal(x, tuple(c(2, 2), c(2, 2))) }) test_that("can sort rcrd", { local_tuple_methods() x <- tuple(c(1, 2, 1), c(3, 1, 2)) expect_equal(xtfrm(x), c(2, 3, 1)) expect_equal(order(x), c(3, 1, 2)) expect_equal(sort(x), tuple(c(1, 1, 2), c(2, 3, 1))) }) test_that("can use dictionary methods on a rcrd", { local_tuple_methods() x <- tuple(c(1, 2, 1), c(3, 1, 3)) expect_equal(unique(x), x[1:2]) expect_equal(duplicated(x), c(FALSE, FALSE, TRUE)) expect_equal(anyDuplicated(x), TRUE) }) test_that("cannot round trip through list", { local_tuple_methods() t <- tuple(1:2, 3:4) # Used to be allowed expect_error(vec_cast(t, list()), class = "vctrs_error_incompatible_type") }) test_that("can convert to list using as.list() or vec_chop() (#1113)", { local_tuple_methods() t <- tuple(1:2, 3:4) expect <- list(tuple(1L, 3L), tuple(2L, 4L)) expect_identical(as.list(t), expect) expect_identical(vec_chop(t), expect) }) test_that("dangerous methods marked as unimplemented", { local_tuple_methods() t <- tuple() expect_error(mean(t), class = "vctrs_error_unsupported") expect_error(abs(t), class = "vctrs_error_unsupported") expect_error(is.finite(t), class = "vctrs_error_unsupported") expect_error(is.nan(t), class = "vctrs_error_unsupported") }) # slicing ----------------------------------------------------------------- test_that("dots are an error (#1295)", { foo <- new_rcrd(list(foo = "foo")) expect_snapshot(error = TRUE, foo[1, 2]) }) test_that("records are restored after slicing the proxy", { expect_identical(new_rcrd(list(x = 1:2))[1], new_rcrd(list(x = 1L))) }) test_that("can slice with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:2))) out <- vec_slice(x, 2) expect_identical( out, new_rcrd(data_frame(x = data_frame(y = 2L))) ) expect_identical( x[2], out ) expect_identical( x[[2]], out ) }) test_that("can rep with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:2))) expect_identical( rep(x, length.out = 4), vec_slice(x, c(1:2, 1:2)) ) }) test_that("can assign with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:3))) y <- new_rcrd(data_frame(x = data_frame(y = FALSE))) exp <- new_rcrd(data_frame(x = data_frame(y = c(1L, 2L, 0L)))) expect_identical(vec_assign(x, 3, y), exp) out <- x out[[3]] <- y expect_identical(out, exp) }) test_that("can resize with df-cols fields", { x <- new_rcrd(data_frame(x = data_frame(y = 1:3))) length(x) <- 2 expect_identical(x, new_rcrd(data_frame(x = data_frame(y = 1:2)))) length(x) <- 4 expect_identical(x, new_rcrd(data_frame(x = data_frame(y = c(1:2, NA, NA))))) }) test_that("`[[` preserves type of record fields (#1205)", { x <- new_rcrd(list(x = 1:3, a = list(1, 2:3, 4:6))) expect_identical(field(x[3], "a"), list(4:6)) expect_identical(field(x[[3]], "a"), list(4:6)) }) vctrs/tests/testthat/test-type-date-time.R0000644000176200001440000005607215132161317020357 0ustar liggesuserstest_that("date-times have informative types", { expect_identical(vec_ptype_abbr(Sys.Date()), "date") expect_identical(vec_ptype_full(Sys.Date()), "date") expect_identical(vec_ptype_abbr(Sys.time()), "dttm") expect_identical(vec_ptype_full(Sys.time()), "datetime") expect_identical(vec_ptype_abbr(new_duration(10)), "drtn") expect_identical(vec_ptype_full(new_duration(10)), "duration") }) test_that("vec_ptype() returns a double date for integer dates", { x <- structure(0L, class = "Date") expect_true(is.double(vec_ptype(x))) }) test_that("dates and times are vectors", { expect_true(vec_is(Sys.Date())) expect_true(vec_is(as.POSIXct("2020-01-01"))) expect_true(vec_is(as.POSIXlt("2020-01-01"))) }) test_that("vec_cast() converts POSIXct with int representation to double when converting zones", { x <- structure(integer(), class = c("POSIXct", "POSIXt"), tzone = "UTC") y <- structure( numeric(), class = c("POSIXct", "POSIXt"), tzone = "America/Los_Angeles" ) expect_true(is.double(vec_cast(x, y))) }) test_that("vec_c() converts POSIXct with int representation to double representation (#540)", { time1 <- seq( as.POSIXct("2015-12-01", tz = "UTC"), length.out = 2, by = "days" ) time2 <- vec_c(time1) expect_true(is.double(time2)) time3 <- vec_c(time1, time1) expect_true(is.double(time3)) }) test_that("vec_c() and vec_rbind() convert Dates with int representation to double representation (#396)", { x <- structure(0L, class = "Date") df <- data.frame(x = x) expect_true(is.double(vec_c(x))) expect_true(is.double(vec_c(x, x))) expect_true(is.double(vec_rbind(df)$x)) expect_true(is.double(vec_rbind(df, df)$x)) }) test_that("vec_c() and vec_ptype() standardize missing `tzone` attributes (#561)", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_identical(attr(vec_ptype(x), "tzone"), "") expect_identical(attr(vec_c(x, x), "tzone"), "") }) # constructor ------------------------------------------------------------- test_that("can create a date", { expect_identical(new_date(), structure(double(), class = "Date")) expect_identical(new_date(0), structure(0, class = "Date")) }) test_that("retains input names", { expect_named(new_date(c(x = 0)), "x") }) test_that("drops attributes except names", { expect_identical(new_date(structure(1, foo = "bar")), new_date(1)) }) test_that("only allows doubles", { expect_error(new_date(1L), "must be a double vector") expect_error(new_date("x"), "must be a double vector") }) test_that("can create a datetime", { expect_identical( new_datetime(), structure(double(), class = c("POSIXct", "POSIXt"), tzone = "") ) expect_identical( new_datetime(0), structure(0, class = c("POSIXct", "POSIXt"), tzone = "") ) }) test_that("retains input names", { expect_named(new_datetime(c(x = 0)), "x") }) test_that("drops attributes except names", { expect_identical(new_datetime(structure(1, foo = "bar")), new_datetime(1)) }) test_that("only allows doubles", { expect_error(new_datetime(1L), "must be a double vector") expect_error(new_datetime("x"), "must be a double vector") }) test_that("tzone is allowed to be `NULL`", { expect_identical(new_datetime(tzone = NULL), new_datetime(tzone = "")) }) test_that("tzone must be character or `NULL`", { expect_error(new_datetime(tzone = 1), "character vector or `NULL`") }) # coerce ------------------------------------------------------------------ test_that("datetime coercions are symmetric and unchanging", { types <- list( new_date(), new_datetime(), new_datetime(tzone = "US/Central"), as.POSIXlt(character(), tz = "US/Central"), difftime(Sys.time() + 1000, Sys.time()), difftime(Sys.time() + 1, Sys.time()) ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) local_options(width = 200) expect_snapshot(print(mat)) }) test_that("tz comes from first non-empty", { # On the assumption that if you've set the time zone explicitly it # should win x <- as.POSIXct("2020-01-01") y <- as.POSIXct("2020-01-01", tz = "America/New_York") expect_identical(vec_ptype2(x, y), y[0]) expect_identical(vec_ptype2(y, x), y[0]) z <- as.POSIXct("2020-01-01", tz = "Pacific/Auckland") expect_identical(vec_ptype2(y, z), y[0]) expect_identical(vec_ptype2(z, y), z[0]) }) test_that("POSIXlt remains POSIXlt in vec_ptype()", { # See `vec_ptype.POSIXlt()` for details x <- as.POSIXlt("2020-01-01", tz = "UTC") exp <- x[0] expect_identical(vec_ptype(x), exp) x <- as.POSIXlt("2020-01-01", tz = "America/New_York") exp <- x[0] expect_identical(vec_ptype(x), exp) }) test_that("POSIXlt always steered towards POSIXct in vec_ptype2()", { dtc <- as.POSIXct("2020-01-01", tz = "UTC") dtl <- as.POSIXlt("2020-01-01", tz = "UTC") expect_identical(vec_ptype2(dtc, dtl), dtc[0]) expect_identical(vec_ptype2(dtl, dtc), dtc[0]) expect_identical(vec_ptype2(dtl, dtl), dtc[0]) }) test_that("vec_ptype2() on a POSIXlt with multiple time zones returns the first", { x <- as.POSIXlt(new_datetime(), tz = "Pacific/Auckland") expect_identical(attr(x, "tzone"), c("Pacific/Auckland", "NZST", "NZDT")) expect_identical(attr(vec_ptype2(x, new_date()), "tzone"), "Pacific/Auckland") }) test_that("vec_ptype2(, NA) is symmetric (#687)", { date <- new_date() expect_identical( vec_ptype2(date, NA), vec_ptype2(NA, date) ) }) test_that("vec_ptype2(, NA) is symmetric (#687)", { time <- Sys.time() expect_identical( vec_ptype2(time, NA), vec_ptype2(NA, time) ) }) test_that("vec_ptype2(, NA) is symmetric (#687)", { dtime <- Sys.time() - Sys.time() expect_identical( vec_ptype2(dtime, NA), vec_ptype2(NA, dtime) ) }) test_that("vec_ptype2() standardizes duration storage type to double", { x <- structure(1L, units = "secs", class = "difftime") expect <- new_duration(double(), units = "secs") expect_identical(vec_ptype2(x, x), expect) }) test_that("vec_ptype_common() gives expected results with ", { # See `vec_ptype.POSIXlt()` for details x <- as.POSIXlt("2020-01-01", tz = "UTC") # `vec_ptype(x)` returns `` exp <- x[0] expect_identical(vec_ptype_common(x), exp) expect_identical(vec_ptype_common(.ptype = x), exp) # `vec_ptype2(x, x)` returns `` exp <- new_datetime(tzone = "UTC") expect_identical(vec_ptype_common(x, x), exp) # With input, still uses `vec_ptype(x)`, so returns `` exp <- x[0] expect_identical(vec_ptype_common(x, NA), exp) expect_identical(vec_ptype_common(NA, x), exp) expect_identical(vec_ptype_common(x, NULL), exp) expect_identical(vec_ptype_common(NULL, x), exp) }) # cast: dates --------------------------------------------------------------- test_that("safe casts work as expected", { date <- as.Date("2018-01-01") datetime_ct <- as.POSIXct(as.character(date)) datetime_lt <- as.POSIXlt(datetime_ct) expect_identical(vec_cast(NULL, date), NULL) expect_identical(vec_cast(date, date), date) expect_identical(vec_cast(datetime_ct, date), date) expect_identical(vec_cast(datetime_lt, date), date) missing_date <- new_date(NA_real_) expect_identical(vec_cast(missing_date, missing_date), missing_date) expect_identical( vec_cast(as.POSIXct(missing_date), missing_date), missing_date ) expect_identical( vec_cast(as.POSIXlt(missing_date), missing_date), missing_date ) # These used to be allowed expect_error(vec_cast(17532, date), class = "vctrs_error_incompatible_type") expect_error( vec_cast("2018-01-01", date), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(list(date), date), class = "vctrs_error_incompatible_type" ) }) test_that("date - datetime cast can be roundtripped", { date <- as.Date("2018-01-01") datetime <- as.POSIXct("2018-01-01", tz = "America/New_York") expect_identical(vec_cast(vec_cast(date, datetime), date), date) expect_identical(vec_cast(vec_cast(datetime, date), datetime), datetime) }) test_that("lossy casts generate error", { date <- as.Date("2018-01-01") datetime <- as.POSIXct(as.character(date)) + c(0, 3600) expect_lossy( vec_cast(datetime, date), vec_c(date, date), x = datetime, to = date ) }) test_that("invalid casts generate error", { date <- as.Date("2018-01-01") expect_error( vec_cast(integer(), date), class = "vctrs_error_incompatible_type" ) }) test_that("can cast NA and unspecified to Date", { expect_identical(vec_cast(NA, new_date()), new_date(NA_real_)) expect_identical(vec_cast(unspecified(2), new_date()), new_date(dbl(NA, NA))) }) test_that("casting an integer date to another date returns a double date", { x <- structure(0L, class = "Date") expect_true(is.double(vec_cast(x, x))) }) test_that("casting an integer POSIXct to a Date returns a double Date", { x <- .POSIXct(18000L, tz = "America/New_York") expect <- new_date(0) expect_identical(vec_cast(x, new_date()), expect) }) # cast: datetimes ----------------------------------------------------------- test_that("safe casts work as expected", { datetime_c <- as.POSIXct("1970-02-01", tz = "UTC") datetime_l <- as.POSIXlt("1970-02-01", tz = "UTC") expect_identical(vec_cast(NULL, datetime_c), NULL) expect_identical(vec_cast(datetime_c, datetime_c), datetime_c) expect_identical(vec_cast(datetime_l, datetime_c), datetime_c) expect_identical(vec_cast(as.Date(datetime_c), datetime_c), datetime_c) expect_identical(vec_cast(NULL, datetime_l), NULL) expect_identical(vec_cast(datetime_c, datetime_l), datetime_l) expect_identical(vec_cast(datetime_l, datetime_l), datetime_l) expect_identical(vec_cast(as.Date(datetime_l), datetime_l), datetime_l) expect_error( vec_cast(raw(), datetime_l), class = "vctrs_error_incompatible_type" ) missing_c <- new_datetime(NA_real_, tzone = "UTC") missing_l <- as.POSIXlt(missing_c) expect_identical(vec_cast(missing_c, missing_c), missing_c) expect_identical(vec_cast(missing_l, missing_c), missing_c) expect_identical(vec_cast(as.Date(missing_c), missing_c), missing_c) expect_identical(vec_cast(missing_l, missing_l), missing_l) expect_identical(vec_cast(missing_c, missing_l), missing_l) expect_identical(vec_cast(as.Date(missing_l), missing_l), missing_l) # These used to be allowed expect_error( vec_cast(2678400, datetime_c), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast("1970-02-01", datetime_c), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(list(datetime_c), datetime_c), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(2678400, datetime_l), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast("1970-02-01", datetime_l), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(list(datetime_l), datetime_l), class = "vctrs_error_incompatible_type" ) }) test_that("invalid casts generate error", { datetime <- as.POSIXct("1970-02-01", tz = "UTC") expect_error( vec_cast(integer(), datetime), class = "vctrs_error_incompatible_type" ) }) test_that("dates become midnight in date-time tzone", { date1 <- as.Date("2010-01-01") datetime_c <- as.POSIXct(character(), tz = "Pacific/Auckland") datetime_l <- as.POSIXlt(character(), tz = "Pacific/Auckland") date2_c <- vec_cast(date1, datetime_c) expect_identical(tzone(date2_c), "Pacific/Auckland") expect_identical(format(date2_c, "%H:%M"), "00:00") date2_l <- vec_cast(date1, datetime_l) expect_identical(tzone(date2_l), "Pacific/Auckland") expect_identical(format(date2_l, "%H:%M"), "00:00") }) test_that("can cast NA and unspecified to POSIXct and POSIXlt", { dtc <- as.POSIXct("2020-01-01") dtl <- as.POSIXlt("2020-01-01") expect_identical(vec_cast(NA, dtc), vec_init(dtc)) expect_identical(vec_cast(NA, dtl), vec_init(dtl)) expect_identical(vec_cast(unspecified(2), dtc), vec_init(dtc, 2)) expect_identical(vec_cast(unspecified(2), dtl), vec_init(dtl, 2)) }) test_that("changing time zones retains the underlying moment in time", { x_ct <- as.POSIXct("2019-01-01", tz = "America/New_York") x_lt <- as.POSIXlt(x_ct) to_ct <- new_datetime(tzone = "America/Los_Angeles") to_lt <- as.POSIXlt(to_ct) expect_ct <- x_ct attr(expect_ct, "tzone") <- "America/Los_Angeles" expect_lt <- as.POSIXlt(expect_ct) expect_identical(vec_cast(x_ct, to_ct), expect_ct) expect_identical(vec_cast(x_ct, to_lt), expect_lt) expect_identical(vec_cast(x_lt, to_ct), expect_ct) expect_identical(vec_cast(x_lt, to_lt), expect_lt) }) test_that("casting to date always retains the zoned year-month-day value", { x <- as.POSIXct("2019-01-01", tz = "Asia/Shanghai") expect_identical(vec_cast(x, new_date()), as.Date("2019-01-01")) }) # cast: durations ------------------------------------------------------------ test_that("safe casts work as expected", { dt1 <- as.difftime(600, units = "secs") dt2 <- as.difftime(10, units = "mins") expect_identical(vec_cast(NULL, dt1), NULL) expect_identical(vec_cast(dt1, dt1), dt1) expect_identical(vec_cast(dt1, dt2), dt2) # These used to be allowed expect_error(vec_cast(600, dt1), class = "vctrs_error_incompatible_type") expect_error( vec_cast(list(dt1), dt1), class = "vctrs_error_incompatible_type" ) }) test_that("invalid casts generate error", { dt <- as.difftime(600, units = "secs") expect_error(vec_cast(integer(), dt), class = "vctrs_error_incompatible_type") }) test_that("can cast NA and unspecified to duration", { expect_identical(vec_cast(NA, new_duration()), new_duration(na_dbl)) expect_identical( vec_cast(unspecified(2), new_duration()), new_duration(dbl(NA, NA)) ) }) test_that("casting coerces corrupt integer storage durations to double (#1602)", { x <- structure(1L, units = "secs", class = "difftime") expect <- new_duration(1, units = "secs") expect_identical(vec_cast(x, x), expect) # Names are retained through the coercion names(x) <- "a" expect_named(vec_cast(x, x), "a") }) # proxy/restore: dates --------------------------------------------------- test_that("restoring an integer to an integer Date converts to double", { x <- structure(0L, class = "Date") expect_true(is.double(vec_restore(x, x))) }) test_that("vec_proxy() returns a double for Dates with int representation", { x <- structure(0L, class = "Date") expect_true(is.double(vec_proxy(x))) }) # proxy/restore: datetimes ------------------------------------------------ test_that("restoring an integer to an integer POSIXct converts to double", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_true(is.double(vec_restore(x, x))) }) test_that("restoring to a POSIXct with no time zone standardizes to an empty string (#561)", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_identical(attr(vec_restore(x, x), "tzone"), "") }) test_that("restoring to a POSIXlt with no time zone standardizes to an empty string", { # Manually create a POSIXlt without a `tzone` attribute. # This is just: # `x <- as.POSIXlt("1970-01-01")` # which usually won't add a `tzone` attribute, but platforms where the local # time is UTC attach a `tzone` attribute automatically. x <- structure( list( sec = 0, min = 0L, hour = 0L, mday = 1L, mon = 0L, year = 70L, wday = 4L, yday = 0L, isdst = 0L, zone = "EST", gmtoff = NA_integer_ ), class = c("POSIXlt", "POSIXt") ) proxy <- vec_proxy(x) expect_identical(attr(vec_restore(proxy, x), "tzone"), "") }) test_that("proxying a POSIXct with no time zone standardizes to an empty string", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_identical(attr(vec_proxy(x), "tzone"), "") }) test_that("vec_proxy() returns a double for POSIXct with int representation", { x <- structure(0L, class = c("POSIXct", "POSIXt")) expect_true(is.double(vec_proxy(x))) }) test_that("POSIXlt roundtrips through proxy and restore", { x <- as_posixlt("2020-01-03") out <- vec_restore(vec_proxy(x), x) expect_identical(out, x) }) test_that("subclassed Dates / POSIXct / POSIXlt can be restored (#1015)", { x <- subclass(new_date(0)) proxy <- vec_proxy(x) expect_identical(vec_restore(proxy, x), x) y <- subclass(new_datetime(0)) proxy <- vec_proxy(y) expect_identical(vec_restore(proxy, y), y) z <- subclass(as.POSIXlt(new_datetime(0))) proxy <- vec_proxy(z) expect_identical(vec_restore(proxy, z), z) }) # arithmetic -------------------------------------------------------------- test_that("default is error", { d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-02 12:00") lt <- as.POSIXlt(dt) t <- as.difftime(12, units = "hours") f <- factor("x") expect_error(vec_arith("+", d, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", dt, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", lt, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("+", t, f), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", dt, t), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", lt, t), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", d, t), class = "vctrs_error_incompatible_op") expect_error( vec_arith("!", t, MISSING()), class = "vctrs_error_incompatible_op" ) }) test_that("date-time vs date-time", { d <- as.Date("2018-01-01") dt <- as.POSIXct(d) lt <- as.POSIXlt(dt) expect_error(vec_arith("+", d, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", d, d), d - d) expect_error(vec_arith("+", dt, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, dt), dt - dt) expect_error(vec_arith("+", lt, lt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, lt), lt - lt) expect_error(vec_arith("+", d, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", d, dt), difftime(d, dt)) expect_error(vec_arith("+", dt, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, d), difftime(dt, d)) expect_error(vec_arith("+", d, lt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", d, lt), difftime(d, lt)) expect_error(vec_arith("+", lt, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, d), difftime(lt, d)) expect_error(vec_arith("+", dt, lt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, lt), difftime(dt, lt)) expect_error(vec_arith("+", lt, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, dt), difftime(lt, dt)) }) test_that("date-time vs numeric", { d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-01", tz = "America/New_York") lt <- as.POSIXlt(dt) expect_identical(vec_arith("+", d, 1), d + 1) expect_identical(vec_arith("+", 1, d), d + 1) expect_identical(vec_arith("-", d, 1), d - 1) expect_error(vec_arith("-", 1, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("+", dt, 1), dt + 1) expect_identical(vec_arith("+", 1, dt), dt + 1) expect_identical(vec_arith("-", dt, 1), dt - 1) expect_error(vec_arith("-", 1, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("+", lt, 1), lt + 1) expect_identical(vec_arith("+", 1, lt), lt + 1) expect_identical(vec_arith("-", lt, 1), lt - 1) expect_error(vec_arith("-", 1, lt), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", 1, d), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", d, 1), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", 1, dt), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", dt, 1), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", 1, lt), class = "vctrs_error_incompatible_op") expect_error(vec_arith("*", lt, 1), class = "vctrs_error_incompatible_op") }) test_that("POSIXlt + numeric = POSIXct", { lt <- as.POSIXlt("2018-01-01", tz = "America/New_York") expect_s3_class(vec_arith("+", lt, 1), "POSIXct") expect_s3_class(vec_arith("+", 1, lt), "POSIXct") }) test_that("vec_arith() standardizes the `tzone` attribute", { dt <- structure(0, class = c("POSIXct", "POSIXt")) x <- vec_arith("+", dt, 1) expect_identical(attr(x, "tzone"), "") }) test_that("date-time vs difftime", { d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-01", tz = "UTC") lt <- as.POSIXlt(dt) t <- as.difftime(1, units = "days") th <- as.difftime(c(1, 24), units = "hours") expect_identical(vec_arith("+", d, t), d + t) expect_identical(vec_arith("+", t, d), t + d) expect_identical(vec_arith("+", dt, t), dt + t) expect_identical(vec_arith("+", t, dt), t + dt) expect_identical(vec_arith("+", lt, t), lt + t) expect_identical(vec_arith("+", t, lt), t + lt) expect_lossy(vec_arith("+", d, th), d + th, x = t, to = d) expect_lossy(vec_arith("+", th, d), th + d, x = t, to = d) expect_identical(vec_arith("+", dt, th), dt + th) expect_identical(vec_arith("+", th, dt), th + dt) expect_identical(vec_arith("+", lt, th), lt + th) expect_identical(vec_arith("+", th, lt), th + lt) expect_identical(vec_arith("-", d, t), d - t) expect_error(vec_arith("-", t, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, t), dt - t) expect_error(vec_arith("-", t, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, t), lt - t) expect_error(vec_arith("-", t, lt), class = "vctrs_error_incompatible_op") expect_lossy(vec_arith("-", d, th), d - th, x = t, to = d) expect_error(vec_arith("-", th, d), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", dt, th), dt - th) expect_error(vec_arith("-", th, dt), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("-", lt, th), lt - th) expect_error(vec_arith("-", th, lt), class = "vctrs_error_incompatible_op") }) test_that("difftime vs difftime/numeric", { t <- as.difftime(12, units = "hours") expect_identical(vec_arith("-", t, MISSING()), -t) expect_identical(vec_arith("+", t, MISSING()), t) expect_identical(vec_arith("-", t, t), t - t) expect_identical(vec_arith("-", t, 1), t - 1) expect_identical(vec_arith("-", 1, t), 1 - t) expect_identical(vec_arith("+", t, t), 2 * t) expect_identical(vec_arith("+", t, 1), t + 1) expect_identical(vec_arith("+", 1, t), t + 1) expect_identical(vec_arith("*", 2, t), 2 * t) expect_identical(vec_arith("*", t, 2), 2 * t) expect_error(vec_arith("*", t, t), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("/", t, 2), t / 2) expect_error(vec_arith("/", 2, t), class = "vctrs_error_incompatible_op") expect_identical(vec_arith("/", t, t), 1) expect_identical(vec_arith("%/%", t, t), 1) expect_identical(vec_arith("%%", t, t), 0) }) # Math -------------------------------------------------------------------- test_that("date and date times don't support math", { expect_error(vec_math("sum", new_date()), class = "vctrs_error_unsupported") expect_error( vec_math("sum", new_datetime()), class = "vctrs_error_unsupported" ) expect_error( vec_math("sum", as.POSIXlt(new_datetime())), class = "vctrs_error_unsupported" ) }) vctrs/tests/testthat/test-missing.R0000644000176200001440000001253715065005761017203 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_detect_missing() test_that("can detect different types of NA", { expect_true(vec_detect_missing(NA)) expect_true(vec_detect_missing(NA_integer_)) expect_true(vec_detect_missing(NA_real_)) expect_true(vec_detect_missing(NA_complex_)) expect_true(vec_detect_missing(complex(real = NA, imaginary = 1))) expect_true(vec_detect_missing(NaN)) expect_true(vec_detect_missing(NA_character_)) expect_true(vec_detect_missing(list(NULL))) }) test_that("can detect different types of NA in data frames", { # using multiple columns to prevent proxy unwrapping expect_true(vec_detect_missing(data.frame(x = NA, y = NA))) expect_true(vec_detect_missing(data.frame(x = NA_integer_, y = NA_integer_))) expect_true(vec_detect_missing(data.frame(x = NA_real_, y = NaN))) expect_true(vec_detect_missing(data.frame(x = NA_complex_, y = NA_complex_))) expect_true(vec_detect_missing(data.frame( x = complex(real = NA, imaginary = 1), y = complex(real = 1, imaginary = NA) ))) expect_true(vec_detect_missing(data.frame( x = NA_character_, y = NA_character_ ))) expect_true(vec_detect_missing(new_data_frame(list( x = list(NULL), y = list(NULL) )))) }) test_that("raw vectors can never be NA", { expect_false(vec_detect_missing(raw(1))) expect_false(vec_detect_missing(data.frame(x = raw(1), y = raw(1)))) }) test_that("vectorised over rows of a data frame", { df <- data.frame(x = c(1, 1, NA, NA), y = c(1, NA, 1, NA)) expect_equal(vec_detect_missing(df), c(FALSE, FALSE, FALSE, TRUE)) }) test_that("works recursively with data frame columns", { df <- data.frame(x = c(1, 1, NA, NA)) df$df <- data.frame(y = c(NA, 1, 1, NA), z = c(1, NA, 1, NA)) expect_equal(vec_detect_missing(df), c(FALSE, FALSE, FALSE, TRUE)) }) test_that("0 row, N col data frame always returns `logical()` (#1585)", { expect_identical(vec_detect_missing(data_frame()), logical()) expect_identical( vec_detect_missing(data_frame(x = integer(), y = double())), logical() ) }) test_that(">0 row, 0 col data frame always returns `TRUE` for each row (#1585)", { # `vec_detect_missing()` returns `TRUE` for each row because it (in theory) does # `all()` on each row, and since there are 0 columns we get # `all(logical()) == TRUE` for each row. expect_identical( vec_detect_missing(data_frame(.size = 2L)), c(TRUE, TRUE) ) }) test_that("works with `NULL` input (#1494)", { expect_identical(vec_detect_missing(NULL), logical()) }) # ------------------------------------------------------------------------------ # vec_any_missing() test_that("can check for any missing with all base vector types", { expect_false(vec_any_missing(TRUE)) expect_false(vec_any_missing(1L)) expect_false(vec_any_missing(1)) expect_false(vec_any_missing(complex(real = 1, imaginary = 1))) expect_false(vec_any_missing("1")) expect_false(vec_any_missing(list(1))) expect_true(vec_any_missing(c(TRUE, NA))) expect_true(vec_any_missing(c(1L, NA_integer_))) expect_true(vec_any_missing(c(1, NA_real_))) expect_true(vec_any_missing(complex(real = c(1, NA), imaginary = c(1, NA)))) expect_true(vec_any_missing(c("1", NA_character_))) expect_true(vec_any_missing(list(1, NULL))) }) test_that("raw vectors can never be missing", { expect_false(vec_any_missing(raw(1))) expect_false(vec_any_missing(data.frame(x = raw(1), y = raw(1)))) }) test_that("works with empty vectors", { # Like `any(logical())` expect_false(vec_any_missing(integer())) }) test_that("correctly detects complex missingness", { expect_false(vec_any_missing(complex(real = 1, imaginary = 1))) expect_true(vec_any_missing(complex(real = 1, imaginary = NA))) expect_true(vec_any_missing(complex(real = NA, imaginary = 1))) expect_true(vec_any_missing(complex(real = NA, imaginary = NA))) }) test_that("treats NaN as missing", { expect_true(vec_any_missing(NaN)) }) test_that("works with `NULL` input", { expect_false(vec_any_missing(NULL)) }) test_that("entire row of a data frame must be missing", { df <- data.frame(x = c(1, 1, NA, NA), y = c(1, NA, 1, NA)) expect_true(vec_any_missing(df)) expect_false(vec_any_missing(df[-4, ])) }) test_that("works recursively with data frame columns", { df <- data.frame(x = c(1, 1, NA, NA)) df$df <- data.frame(y = c(NA, 1, 1, NA), z = c(1, NA, 1, NA)) expect_true(vec_any_missing(df)) expect_false(vec_any_missing(df[-4, ])) }) test_that("0 row, N col data frame always returns `FALSE` (#1585)", { df <- data_frame() expect_false(vec_any_missing(df)) expect_false(vec_any_missing(data_frame(x = integer(), y = double()))) # This is consistent with `vec_detect_missing()` returning `logical()` for 0 row # data frames. Then `any(logical()) == FALSE` to get `vec_any_missing()`. expect_identical( vec_any_missing(df), any(vec_detect_missing(df)) ) }) test_that(">0 row, 0 col data frame always returns `TRUE` (#1585)", { df <- data_frame(.size = 2L) expect_true(vec_any_missing(df)) # This is consistent with `vec_detect_missing()` returning `TRUE` for each row # because it (in theory) does `all()` on each row, and since there are 0 # columns we get `all(logical()) == TRUE` for each row. # Then `any(c(TRUE, TRUE)) == TRUE` to get `vec_any_missing()`. expect_identical( vec_any_missing(df), any(vec_detect_missing(df)) ) }) vctrs/tests/testthat/test-cast.R0000644000176200001440000002625515127057357016475 0ustar liggesuserstest_that("Casting to named argument mentions 'match type '", { expect_snapshot(error = TRUE, vec_cast(1, "", x_arg = "foo", to_arg = "bar")) expect_snapshot(error = TRUE, vec_cast(1, "", x_arg = "foo")) }) # vec_cast() --------------------------------------------------------------- test_that("new classes are uncoercible by default", { x <- structure(1:10, class = "vctrs_nonexistant") expect_error(vec_cast(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_cast(x, 1), class = "vctrs_error_incompatible_type") }) test_that("casting requires vectors", { expect_error(vec_cast(NULL, quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_cast(NA, quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_cast(list(), quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_cast(quote(name), NULL), class = "vctrs_error_scalar_type") expect_error(vec_cast(quote(name), NA), class = "vctrs_error_scalar_type") expect_error(vec_cast(quote(name), list()), class = "vctrs_error_scalar_type") expect_error( vec_cast(quote(name), quote(name)), class = "vctrs_error_scalar_type" ) }) test_that("dimensionality matches output", { x1 <- matrix(TRUE, nrow = 1, ncol = 1) x2 <- matrix(1, nrow = 0, ncol = 2) expect_dim(vec_cast(x1, x2), c(1, 2)) expect_dim(vec_cast(TRUE, x2), c(1, 2)) x <- matrix(1, nrow = 2, ncol = 2) expect_error(vec_cast(x, logical()), class = "vctrs_error_incompatible_type") }) test_that("empty input to vec_cast_common() returns list()", { expect_equal(vec_cast_common(), list()) expect_equal(vec_cast_common(NULL, NULL), list(NULL, NULL)) }) test_that("identical structures can be cast to each other", { expect_identical(vec_cast(foobar("foo"), foobar("bar")), foobar("foo")) }) test_that("cast common preserves names", { expect_identical(vec_cast_common(foo = 1, bar = 2L), list(foo = 1, bar = 2)) }) test_that("cast errors create helpful messages (#57, #225)", { # Lossy cast expect_snapshot(error = TRUE, vec_cast(1.5, 10L)) # Incompatible cast expect_snapshot(error = TRUE, vec_cast(factor("foo"), 10)) # Nested data frames - Lossy cast expect_snapshot(error = TRUE, { x <- tibble(a = tibble(b = 1.5)) y <- tibble(a = tibble(b = 10L)) vec_cast(x, y) }) # Nested data frames - Incompatible cast expect_snapshot(error = TRUE, { x <- tibble(a = tibble(b = factor("foo"))) y <- tibble(a = tibble(b = 10)) vec_cast(x, y) }) # Nested data frames - Common cast error expect_snapshot(error = TRUE, { x <- tibble(a = tibble(b = factor("foo"))) y <- tibble(a = tibble(b = 10)) vec_cast_common(x, y) }) }) test_that("unspecified can be cast to shaped vectors", { x <- matrix(letters[1:4], 2) expect_identical(vec_cast(NA, x), matrix(chr(NA, NA), 1)) x <- foobar(c(1:4)) dim(x) <- c(2, 2) out <- vec_cast(NA, x) exp <- foobar(int(c(NA, NA))) dim(exp) <- c(1, 2) expect_identical(out, exp) }) test_that("vec_cast() falls back to base class even when casting to non-base type", { expect_equal(vec_cast(foobar(mtcars), mtcars), mtcars) expect_equal(vec_cast(mtcars, foobar(mtcars)), mtcars) }) test_that("vec_cast() only attempts to fall back if `to` is a data frame (#1568)", { expect_snapshot({ (expect_error( vec_cast(foobar(mtcars), 1), class = "vctrs_error_incompatible_type" )) }) }) test_that("vec_cast() evaluates x_arg and to_arg lazily", { expect_silent(vec_cast(TRUE, logical(), x_arg = print("oof"))) expect_silent(vec_cast(TRUE, logical(), to_arg = print("oof"))) }) # Conditions -------------------------------------------------------------- test_that("can suppress cast errors selectively", { f <- function() vec_cast(factor("a"), to = factor("b")) expect_error(regexp = NA, allow_lossy_cast(f())) expect_error(regexp = NA, allow_lossy_cast(f(), x_ptype = factor("a"))) expect_error(regexp = NA, allow_lossy_cast(f(), to_ptype = factor("b"))) expect_error( regexp = NA, allow_lossy_cast(f(), x_ptype = factor("a"), to_ptype = factor("b")) ) expect_error( allow_lossy_cast(f(), x_ptype = factor("c")), class = "vctrs_error_cast_lossy" ) expect_error( allow_lossy_cast(f(), x_ptype = factor("b"), to_ptype = factor("a")), class = "vctrs_error_cast_lossy" ) expect_error( allow_lossy_cast(f(), x_ptype = factor("a"), to_ptype = factor("c")), class = "vctrs_error_cast_lossy" ) }) test_that("can signal deprecation warnings for lossy casts", { local_options(lifecycle_verbosity = "warning") lossy_cast <- function() { maybe_lossy_cast( TRUE, factor("foo"), factor("bar"), lossy = TRUE, .deprecation = TRUE, x_arg = "x", to_arg = "to" ) } expect_snapshot({ (expect_warning(expect_true(lossy_cast()))) }) expect_warning(regexp = NA, expect_true(allow_lossy_cast(lossy_cast()))) expect_warning( regexp = NA, expect_true(allow_lossy_cast(lossy_cast(), factor("foo"), factor("bar"))) ) expect_warning(expect_true(allow_lossy_cast( lossy_cast(), factor("bar"), double() ))) }) # vec_cast_common() ------------------------------------------------------- test_that("vec_ptype_common() optionally falls back to base class", { x <- foobar(NA, foo = 1) y <- foobaz(NA, bar = 2) x_df <- data_frame(x = x) y_df <- data_frame(x = y) expect_error( vec_ptype_common_params(x, y, .fallback_opts = enabled_fallback_opts()), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype_common_params( x_df, y_df, .fallback_opts = enabled_fallback_opts() ), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype_common_params(x, y, .fallback_opts = enabled_fallback_opts()), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype_common_params( x_df, y_df, .fallback_opts = enabled_fallback_opts() ), class = "vctrs_error_incompatible_type" ) class(y) <- c("foo", class(x)) y_df <- data_frame(x = y) common_sentinel <- vec_ptype_common_params( x, y, .fallback_opts = enabled_fallback_opts() ) expect_true(is_common_class_fallback(common_sentinel)) expect_identical(fallback_class(common_sentinel), "vctrs_foobar") common_sentinel <- vec_ptype_common_params( x_df, y_df, .fallback_opts = enabled_fallback_opts() ) expect_true(is_common_class_fallback(common_sentinel$x)) expect_identical(fallback_class(common_sentinel$x), "vctrs_foobar") common <- vec_cast_common_opts(x = x, y = y, .opts = enabled_fallback_opts()) expect_identical(common, list(x = x, y = y)) common <- vec_cast_common_opts( x = x_df, y = y_df, .opts = enabled_fallback_opts() ) expect_identical(common, list(x = x_df, y = y_df)) }) test_that("vec_ptype_common_fallback() collects common type", { x <- foobar(1, foo = 1, class = c("quux", "baz")) y <- foobar(2, bar = 2, class = "baz") x_df <- data_frame(x = x) y_df <- data_frame(x = y) out <- vec_ptype_common_fallback(x, y) expect_identical(typeof(out), "double") expect_true(is_common_class_fallback(out)) expect_identical(fallback_class(out), c("baz", "vctrs_foobar")) out <- vec_ptype_common_fallback(x_df, y_df) expect_identical(typeof(out$x), "double") expect_true(is_common_class_fallback(out$x)) expect_identical(fallback_class(out$x), c("baz", "vctrs_foobar")) # Different base types can't fall back to common class z <- foobar(3L, baz = 3) expect_error( vec_ptype_common_fallback(x, z), class = "vctrs_error_incompatible_type" ) z_df <- data_frame(x = z) expect_error( vec_ptype_common_fallback(x_df, z_df), class = "vctrs_error_incompatible_type" ) }) test_that("fallback sentinel is returned with unspecified inputs", { fallback <- vec_ptype_common_fallback(foobar(1), foobar(1)) expect_identical(vec_ptype_common_fallback(NA, foobar(1)), fallback) expect_identical(vec_ptype_common_fallback(foobar(1), NA), fallback) }) test_that("vec_ptype_common() supports subclasses of list", { x <- structure(list(1), class = c("vctrs_foo", "list")) y <- structure(list(2), class = c("bar", "vctrs_foo", "list")) expect_error(vec_c(x, y), class = "vctrs_error_incompatible_type") out <- with_methods( c.vctrs_foo = function(...) quux(NextMethod()), vec_c(x, y) ) expect_identical(out, quux(list(1, 2))) }) test_that("vec_cast_common_fallback() works with tibbles", { x <- foobar("foo") df <- data_frame(x = x) tib <- tibble(x = x) exp <- list(tib, tib) expect_identical(vec_cast_common_fallback(tib, tib), exp) expect_identical(vec_cast_common_fallback(tib, df), exp) expect_identical(vec_cast_common_fallback(df, tib), exp) }) test_that("can call `vec_cast()` from C (#1666)", { fn <- inject(function(x, i) .Call(!!ffi_exp_vec_cast, x, i)) environment(fn) <- ns_env("utils") x <- array(1, dim = c(1, 1)) y <- array(2, dim = c(2, 2)) expect_equal(fn(x, y), vec_cast(x, y)) }) test_that("df-fallback for cast is not sensitive to attributes order", { x <- structure( list(col = ""), class = c("vctrs_foobar", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -1L), foo = "foo", bar = "bar" ) ptype <- structure( list(col = character(0)), foo = "foo", bar = "bar", row.names = integer(0), class = c("vctrs_foobar", "tbl_df", "tbl", "data.frame") ) expect_identical(vec_cast(x, ptype), x) }) test_that("bare-type fallback for df-cast works", { # NOTE: Not sure why this was necessary. The cubble and yamlet # packages fail without this. local_methods( c.vctrs_foobaz = function(...) quux(NextMethod()) ) df <- data_frame(x = 1, y = foobaz("foo")) gdf <- dplyr::new_grouped_df( df, data_frame(x = 1, .rows = list(1L)), class = "vctrs_foobar" ) expect_error(vec_rbind(gdf, gdf), NA) }) test_that("can cast to unspecified `NA` with `vec_cast()` and `vec_cast_common()` (#2099)", { # In the `vec_cast()` case no `vec_ptype()` call is made, which means that no # finalization step is required. In the `vec_cast_common()` case, the # underlying call to `vec_ptype_common()` calls `vec_ptype(NA)` but also # finalizes that to `logical()` on the way out, so this still works. expect_identical(vec_cast(TRUE, to = NA), TRUE) expect_identical(vec_cast_common(TRUE, .to = NA), list(TRUE)) # `vec_cast()` itself does not call `vec_ptype()` and does not finalize, # so this stays and the cast fails # (this behavior is questionable, but is very much an edge case) expect_snapshot(error = TRUE, { vec_cast(TRUE, to = unspecified(1)) }) # `vec_cast_common()` calls `vec_ptype_common()`, which always finalises, # so this technically works (but again, it is an edge case) expect_identical(vec_cast_common(TRUE, .to = unspecified(1)), list(TRUE)) }) # Golden tests ------------------------------------------------------- test_that("casting performs expected allocations", { expect_snapshot({ # No allocations when shape doesn't change (#2006) x <- matrix(rep(1L, 1e2), ncol = 2) with_memory_prof(vec_cast(x, x)) # One allocation when type changes x <- matrix(rep(1L, 1e2), ncol = 2) y <- matrix(rep(1, 1e2), ncol = 2) with_memory_prof(vec_cast(x, y)) }) }) vctrs/tests/testthat/test-dim.R0000644000176200001440000000126615065005761016300 0ustar liggesusers# vec_dim ----------------------------------------------------------------- test_that("dim is dimensions", { expect_equal(vec_dim(array(dim = c(1))), c(1)) expect_equal(vec_dim(array(dim = c(1, 1))), c(1, 1)) expect_equal(vec_dim(array(dim = c(1, 1, 1))), c(1, 1, 1)) }) test_that("dim_n is number of dimensions", { expect_equal(vec_dim_n(array(dim = c(1))), 1) expect_equal(vec_dim_n(array(dim = c(1, 1))), 2) expect_equal(vec_dim_n(array(dim = c(1, 1, 1))), 3) }) test_that("vector and 1-d array are equivalent", { x1 <- 1:5 x2 <- array(x1) expect_equal(vec_dim(x1), 5) expect_equal(vec_dim(x2), 5) expect_equal(vec_size(x1), 5) expect_equal(vec_size(x2), 5) }) vctrs/tests/testthat/test-type-data-table.R0000644000176200001440000000342014405105465020475 0ustar liggesusers# Never run on CRAN, even if they have data.table, because we don't regularly # check these on CI and we don't want a change in data.table to force a CRAN # failure for vctrs. skip_on_cran() # Avoids adding `data.table` to Suggests. # These tests are only run on the devs' machines. testthat_import_from("data.table", "data.table") test_that("common type of data.table and data.frame is data.table", { expect_identical( vec_ptype2(data.table(x = TRUE), data.table(y = 2)), data.table(x = lgl(), y = dbl()) ) expect_identical( vec_ptype2(data.table(x = TRUE), data.frame(y = 2)), data.table(x = lgl(), y = dbl()) ) expect_identical( vec_ptype2(data.frame(y = 2), data.table(x = TRUE)), data.table(y = dbl(), x = lgl()) ) expect_identical( vec_cast(data.table(y = 2), data.table(x = TRUE, y = 1L)), data.table(x = NA, y = 2L) ) expect_identical( vec_cast(data.frame(y = 2), data.table(x = TRUE, y = 1L)), data.table(x = NA, y = 2L) ) expect_identical( vec_cast(data.table(y = 2), data.frame(x = TRUE, y = 1L)), data.frame(x = NA, y = 2L) ) }) test_that("data.table and tibble do not have a common type", { expect_equal( vec_ptype_common(data.table(x = TRUE), tibble(y = 2)), tibble(x = lgl(), y = dbl()) ) expect_equal( vec_ptype_common(tibble(y = 2), data.table(x = TRUE)), tibble(y = dbl(), x = lgl()) ) expect_identical( vec_cast(data.table(y = 2), tibble(x = TRUE, y = 1L)), tibble(x = lgl(NA), y = 2L) ) expect_identical( vec_cast(tibble(y = 2), data.table(x = TRUE, y = 1L)), data_frame(x = lgl(NA), y = 2L) ) }) test_that("data table has formatting methods", { expect_snapshot({ dt <- data.table(x = 1, y = 2, z = 3) vec_ptype_abbr(dt) vec_ptype_full(dt) }) }) vctrs/tests/testthat/helper-expectations.R0000644000176200001440000000502215065005761020527 0ustar liggesusersexpect_dim <- function(x, shape) { dim <- dim2(x) expect_equal(dim, !!shape) } expect_lossy <- function(expr, result, x = NULL, to = NULL) { expr <- enquo(expr) expect_error(eval_tidy(expr), class = "vctrs_error_cast_lossy") out <- allow_lossy_cast(eval_tidy(expr), x_ptype = x, to_ptype = to) expect_identical(!!out, !!result) } expect_args <- function(x, y, x_arg, y_arg) { err <- catch_cnd( vec_ptype2(x, y, x_arg = x_arg, y_arg = y_arg), classes = "vctrs_error_incompatible_type" ) expect_true(!is_null(err)) expect_true(grepl(paste0("combine `", x_arg, "`"), err$message, fixed = TRUE)) expect_true(grepl(paste0("and `", y_arg, "`"), err$message, fixed = TRUE)) expect_identical(list(err$x_arg, err$y_arg), list(x_arg, y_arg)) } # Work around deparsing of !! on old versions of R as_label2 <- function(expr) { expr <- duplicate(expr, shallow = FALSE) label <- as_label(fix_bang(expr)) label <- gsub("+++", "!!!", label, fixed = TRUE) label <- gsub("++", "!!", label, fixed = TRUE) label } fix_bang <- function(expr) { curr <- expr while (!is_null(curr)) { car <- node_car(curr) if (is_triple_bang(car)) { replace_triple_bang(car) } else if (is_double_bang(car)) { replace_double_bang(car) } else if (is_call(car)) { node_poke_car(curr, fix_bang(car)) } curr <- node_cdr(curr) } expr } is_double_bang <- function(expr) { is_call(expr, "!") && is_call(node_cadr(expr), "!") } is_triple_bang <- function(expr) { is_double_bang(expr) && is_call(node_cadr(node_cadr(expr)), "!") } replace_double_bang <- function(expr) { node_poke_car(expr, sym("+")) node_poke_car(node_cadr(expr), sym("+")) } replace_triple_bang <- function(expr) { replace_double_bang(expr) node_poke_car(node_cadr(node_cadr(expr)), sym("+")) } expect_syntactic <- function(name, exp_syn_name) { expect_identical( syn_name <- make_syntactic(name), exp_syn_name ) expect_identical(syn_name, make.names(syn_name)) } expect_error_cnd <- function( object, class, message = NULL, ..., .fixed = TRUE ) { cnd <- expect_error( object, regexp = message, class = class[[1]], fixed = .fixed ) expect_true(inherits_all(cnd, class)) exp_fields <- list2(...) expect_true(is_empty(setdiff(!!names(exp_fields), names(cnd)))) expect_equal(cnd[names(exp_fields)], exp_fields) } scrub_internal_error_line_number <- function(x) { # Because it varies by OS sub( pattern = "at line [[:digit:]]+", replacement = "at line ", x = x ) } vctrs/tests/testthat/test-type-list-of.R0000644000176200001440000005425415120272011020052 0ustar liggesusers# ------------------------------------------------------------------------------ # new_list_of test_that("works without any arguments", { # Default is logical type, no size restriction expect_identical( new_list_of(), new_list_of(x = list(), ptype = logical(), size = NULL) ) }) test_that("constructor requires list input", { expect_snapshot(error = TRUE, { new_list_of(1) }) expect_snapshot(error = TRUE, { new_list_of(mtcars) }) }) test_that("must lock at least one of ptype or size", { expect_snapshot(error = TRUE, { new_list_of(ptype = NULL, size = NULL) }) }) test_that("can lock ptype, size, or both", { x <- new_list_of(ptype = integer(), size = NULL) expect_identical(list_of_ptype(x), integer()) expect_null(list_of_size(x)) x <- new_list_of(ptype = NULL, size = 5L) expect_null(list_of_ptype(x)) expect_identical(list_of_size(x), 5L) x <- new_list_of(ptype = integer(), size = 5L) expect_identical(list_of_ptype(x), integer()) expect_identical(list_of_size(x), 5L) }) test_that("validates `ptype`", { expect_snapshot(error = TRUE, { new_list_of(ptype = lm(1 ~ 1)) }) x <- new_list_of(ptype = 1:5) expect_identical(list_of_ptype(x), integer()) }) test_that("finalizes `ptype`", { x <- new_list_of(ptype = unspecified()) expect_identical(list_of_ptype(x), logical()) }) test_that("validates `size`", { expect_snapshot(error = TRUE, { new_list_of(size = 1.1) }) expect_snapshot(error = TRUE, { new_list_of(size = 1:2) }) expect_snapshot(error = TRUE, { new_list_of(size = -5) }) }) test_that("has vctrs classes", { expect_identical( class(new_list_of()), c("vctrs_list_of", "vctrs_vctr", "list") ) }) test_that("can add extra class", { expect_s3_class(new_list_of(class = "foo_list_of"), "foo_list_of") }) test_that("can add extra attributes", { x <- new_list_of(foo = "bar") expect_identical(attr(x, "foo"), "bar") }) # ------------------------------------------------------------------------------ # is_list_of test_that("is_list_of works as expected", { expect_false(is_list_of(list(1))) expect_true(is_list_of(list_of(1))) }) test_that("can check for list of", { expect_snapshot(error = TRUE, { check_list_of(1) }) }) # ------------------------------------------------------------------------------ # list_of test_that("default behavior infers ptype and doesn't lock size", { x <- list_of(1) expect_identical(list_of_ptype(x), double()) expect_null(list_of_size(x)) }) test_that("errors if can't determine type", { expect_snapshot(error = TRUE, { list_of(.ptype = NULL) }) expect_snapshot(error = TRUE, { list_of(1, "a") }) }) test_that("errors if can't determine size", { expect_snapshot(error = TRUE, { list_of(.ptype = zap(), .size = NULL) }) expect_snapshot(error = TRUE, { list_of(1:2, 3:5, .ptype = zap(), .size = NULL) }) }) test_that("can specify ptype, size, or both", { x <- list_of(.ptype = integer(), .size = zap()) expect_identical(list_of_ptype(x), integer()) expect_null(list_of_size(x)) x <- list_of(.ptype = zap(), .size = 5L) expect_null(list_of_ptype(x)) expect_identical(list_of_size(x), 5L) x <- list_of(.ptype = integer(), .size = 5L) expect_identical(list_of_ptype(x), integer()) expect_identical(list_of_size(x), 5L) }) # ------------------------------------------------------------------------------ # as_list_of test_that("can convert from list to list-of", { x <- list(1) expect_identical(as_list_of(x), list_of(1)) }) test_that("default behavior infers ptype and doesn't lock size", { x <- as_list_of(list(1)) expect_identical(list_of_ptype(x), double()) expect_null(list_of_size(x)) }) test_that("errors if can't determine type", { expect_snapshot(error = TRUE, { as_list_of(list()) }) }) test_that("errors if can't determine size", { expect_snapshot(error = TRUE, { as_list_of(list(), .ptype = integer(), .size = NULL) }) }) test_that("can specify ptype, size, or both", { x <- as_list_of(list(), .ptype = integer(), .size = zap()) expect_identical(list_of_ptype(x), integer()) expect_null(list_of_size(x)) x <- as_list_of(list(), .ptype = zap(), .size = 5L) expect_null(list_of_ptype(x)) expect_identical(list_of_size(x), 5L) x <- as_list_of(list(), .ptype = integer(), .size = 5L) expect_identical(list_of_ptype(x), integer()) expect_identical(list_of_size(x), 5L) }) test_that("as_list_of on list_of is a no-op", { x <- list_of(1) expect_identical(as_list_of(x), x) # Used to be able to do this in `as_list_of.vctrs_list_of`, but we determined # you should just go through `list_of()` again. expect_identical(as_list_of(x, .ptype = integer()), x) }) # ------------------------------------------------------------------------------ # list_of_size test_that("`list_of_size()` returns the `size`", { x <- list_of(1:2, .size = 2, .ptype = zap()) expect_identical(list_of_size(x), 2L) x <- list_of(1:2, .size = zap(), .ptype = integer()) expect_null(list_of_size(x)) }) test_that("`list_of_size()` validates `x`", { expect_snapshot(error = TRUE, { list_of_size(list(1)) }) }) # ------------------------------------------------------------------------------ # list_of_ptype test_that("`list_of_ptype()` returns the `size`", { x <- list_of(1:2, .ptype = integer(), .size = zap()) expect_identical(list_of_ptype(x), integer()) x <- list_of(1:2, .ptype = zap(), .size = 2L) expect_null(list_of_ptype(x)) }) test_that("`list_of_ptype()` validates `x`", { expect_snapshot(error = TRUE, { list_of_ptype(list(1)) }) }) # ------------------------------------------------------------------------------ # as.list() test_that("can convert to base list", { x <- list_of(1) expect_identical(as.list(x), list(1)) }) # ------------------------------------------------------------------------------ # as.character() test_that("list_of() has as.character() method (tidyverse/tidyr#654)", { exp <- rep(paste0("<", vec_ptype_abbr(mtcars), ">"), 2) expect_identical(as.character(list_of(mtcars, mtcars)), exp) }) # ------------------------------------------------------------------------------ # Formatting test_that("can print empty list-of", { expect_snapshot(list_of(.ptype = integer(), .size = 5L)) }) test_that("print method gives human friendly output", { # Just ptype x <- list_of(1, 2:3, .ptype = double(), .size = zap()) expect_snapshot(cat(vec_ptype_full(x))) expect_snapshot(cat(vec_ptype_abbr(x))) # Just size x <- list_of(1, 2:3, .ptype = zap(), .size = 2L) expect_snapshot(cat(vec_ptype_full(x))) expect_snapshot(cat(vec_ptype_abbr(x))) # Both ptype and size x <- list_of(1, 2:3, .ptype = double(), .size = 2L) expect_snapshot(cat(vec_ptype_full(x))) expect_snapshot(cat(vec_ptype_abbr(x))) }) test_that("print method gives human friendly output for multi line types", { ptype <- data_frame(x = integer(), y = double(), z = character()) # Just ptype x <- list_of(.ptype = ptype, .size = zap()) expect_snapshot(cat(vec_ptype_full(x))) expect_snapshot(cat(vec_ptype_abbr(x))) # Just size x <- list_of(.ptype = zap(), .size = 2L) expect_snapshot(cat(vec_ptype_full(x))) expect_snapshot(cat(vec_ptype_abbr(x))) # Both ptype and size x <- list_of(.ptype = ptype, .size = 2L) expect_snapshot(cat(vec_ptype_full(x))) expect_snapshot(cat(vec_ptype_abbr(x))) }) test_that("str method is reasonably correct", { x <- list_of(1, 2:3) expect_snapshot(str(x)) expect_snapshot(str(list(list(x, y = 2:1)))) expect_snapshot(str(x[0])) expect_snapshot(str(list(list(x[0], y = 2:1)))) }) # ------------------------------------------------------------------------------ # Subsetting test_that("[ preserves type", { x <- list_of(1) expect_identical(x[1], x) x <- list_of(1, .ptype = double(), .size = 1) expect_identical(x[1], x) }) test_that("[[ works", { x <- list_of(a = 1, b = 2) expect_identical(x[[1]], 1) expect_identical(x[["b"]], 2) expect_snapshot(error = TRUE, { x[[3]] }) expect_snapshot(error = TRUE, { x[["c"]] }) }) test_that("$ works", { x <- list_of(a = 1, b = 2) expect_identical(x$b, 2) expect_snapshot(error = TRUE, { x$c }) }) test_that("[<- coerces and recycles", { # Just type x <- list_of(a = 1, b = 2, .ptype = double(), .size = zap()) x[1] <- list(NULL) expect_identical( x, list_of(a = NULL, b = 2, .ptype = double(), .size = zap()) ) x[1] <- list(3) expect_identical(x[[1]], 3) # Casts automatically x[1] <- list(4:5) expect_identical(x[[1]], c(4, 5)) expect_snapshot(error = TRUE, { x[1] <- list("5") }) # Just size x <- list_of(a = 1:2, b = c("c", "d"), .ptype = zap(), .size = 2L) x[1] <- list(NULL) expect_identical( x, list_of(a = NULL, b = c("c", "d"), .ptype = zap(), .size = 2L) ) x[1] <- list(c(5, 6)) expect_identical(x[[1]], c(5, 6)) # Recycles automatically x[1] <- list(7) expect_identical(x[[1]], c(7, 7)) expect_snapshot(error = TRUE, { x[1] <- list(c(1, 2, 3)) }) }) test_that("[[<- coerces and recycles", { # Just type x <- list_of(a = 1, b = 2, .ptype = double(), .size = zap()) x[[1]] <- 3 expect_identical(x[[1]], 3) # Casts automatically x[[1]] <- 4:5 expect_identical(x[[1]], c(4, 5)) expect_snapshot(error = TRUE, { x[[1]] <- "5" }) # Just size x <- list_of(a = 1:2, b = c("c", "d"), .ptype = zap(), .size = 2L) x[[1]] <- c(5, 6) expect_identical(x[[1]], c(5, 6)) # Recycles automatically x[[1]] <- 7 expect_identical(x[[1]], c(7, 7)) expect_snapshot(error = TRUE, { x[[1]] <- c(1, 2, 3) }) }) test_that("$<- coerces and recycles", { # Just type x <- list_of(a = 1, b = 2, .ptype = double(), .size = zap()) x$a <- 3 expect_identical(x$a, 3) # Casts automatically x$a <- 4:5 expect_identical(x$a, c(4, 5)) expect_snapshot(error = TRUE, { x$a <- "5" }) # Just size x <- list_of(a = 1:2, b = c("c", "d"), .ptype = zap(), .size = 2L) x$a <- c(5, 6) expect_identical(x$a, c(5, 6)) # Recycles automatically x$a <- 7 expect_identical(x$a, c(7, 7)) expect_snapshot(error = TRUE, { x$a <- c(1, 2, 3) }) }) test_that("[<- chops non list `value`s", { # Chops into `list(4, 5)` before casting or recycling x <- list_of(a = 1, b = 2, c = 3) x[2:3] <- c(4L, 5L) expect_identical(x, list_of(a = 1, b = 4, c = 5)) # Chops into `list(7, 8)` before casting or recycling x <- list_of(a = 1:2, b = 2:3, c = 3:4, .size = 2L) x[2:3] <- c(7, 8) expect_identical( x, list_of(a = 1:2, b = c(7L, 7L), c = c(8L, 8L), .size = 2L) ) }) test_that("[<- can shorten with `NULL` (#2112)", { x <- list_of(a = 1, b = 2, c = 3) x[2] <- NULL expect_identical(x, list_of(a = 1, c = 3)) x <- list_of(a = 1, b = 2, c = 3) x[2:3] <- NULL expect_identical(x, list_of(a = 1)) # Like base lists x <- list(a = 1, b = 2, c = 3) x[2] <- NULL expect_identical(x, list(a = 1, c = 3)) }) test_that("[[<- can shorten with `NULL` (#2112)", { x <- list_of(a = 1, b = 2) x[[2]] <- NULL expect_identical(x, list_of(a = 1)) x <- list_of(a = 1, b = 2) x[["b"]] <- NULL expect_identical(x, list_of(a = 1)) # Like base lists x <- list(a = 1, b = 2) x[[2]] <- NULL expect_identical(x, list(a = 1)) }) test_that("$<- can shorten with `NULL` (#2112)", { x <- list_of(a = 1, b = 2) x$b <- NULL expect_identical(x, list_of(a = 1)) # Like base lists x <- list(a = 1, b = 2) x$b <- NULL expect_identical(x, list(a = 1)) }) test_that("assignment can increase size of vector", { x <- list_of(x = 1) x[[2]] <- 2 x$z <- 3 x[4:5] <- c(4, 5) expect_length(x, 5) }) # ------------------------------------------------------------------------------ # Type system test_that("list_of() are vectors", { expect_true(obj_is_vector(list_of(1))) expect_true(vec_is(list_of(1))) }) test_that("ptype2: list + list_of", { expect_identical(vec_ptype2(list(), new_list_of()), list()) expect_identical(vec_ptype2(new_list_of(), list()), list()) }) test_that("ptype2: list_of + list_of", { # Self-self x <- list_of(.ptype = integer(), .size = zap()) expect_identical(vec_ptype2(x, x), x) x <- list_of(.ptype = zap(), .size = 2L) expect_identical(vec_ptype2(x, x), x) x <- list_of(.ptype = integer(), .size = 2L) expect_identical(vec_ptype2(x, x), x) # All `size` variants with one fixed `ptype` x <- list_of(.ptype = integer(), .size = zap()) y <- list_of(.ptype = zap(), .size = 2L) expect_identical(vec_ptype2(x, y), list()) expect_identical(vec_ptype2(y, x), list()) x <- list_of(.ptype = integer(), .size = 2L) y <- list_of(.ptype = zap(), .size = 2L) expect_identical(vec_ptype2(x, y), list_of(.ptype = zap(), .size = 2L)) expect_identical(vec_ptype2(y, x), list_of(.ptype = zap(), .size = 2L)) x <- list_of(.ptype = integer(), .size = 1L) y <- list_of(.ptype = zap(), .size = 2L) expect_identical(vec_ptype2(x, y), list_of(.ptype = zap(), .size = 2L)) expect_identical(vec_ptype2(y, x), list_of(.ptype = zap(), .size = 2L)) x <- list_of(.ptype = integer(), .size = 2L) y <- list_of(.ptype = zap(), .size = 3L) expect_identical(vec_ptype2(x, y), list()) expect_identical(vec_ptype2(y, x), list()) # All `ptype` variants with one fixed `size` x <- list_of(.ptype = zap(), .size = 1L) y <- list_of(.ptype = integer(), .size = zap()) expect_identical(vec_ptype2(x, y), list()) expect_identical(vec_ptype2(y, x), list()) x <- list_of(.ptype = integer(), .size = 1L) y <- list_of(.ptype = integer(), .size = zap()) expect_identical(vec_ptype2(x, y), list_of(.ptype = integer(), .size = zap())) expect_identical(vec_ptype2(y, x), list_of(.ptype = integer(), .size = zap())) x <- list_of(.ptype = integer(), .size = 1L) y <- list_of(.ptype = double(), .size = zap()) expect_identical(vec_ptype2(x, y), list_of(.ptype = double(), .size = zap())) expect_identical(vec_ptype2(y, x), list_of(.ptype = double(), .size = zap())) x <- list_of(.ptype = integer(), .size = 1L) y <- list_of(.ptype = character(), .size = zap()) expect_identical(vec_ptype2(x, y), list()) expect_identical(vec_ptype2(y, x), list()) # All variants with both `size` and `ptype` fixed x <- list_of(.ptype = integer(), .size = 1L) y <- list_of(.ptype = integer(), .size = 1L) expect_identical(vec_ptype2(x, y), x) expect_identical(vec_ptype2(y, x), x) x <- list_of(.ptype = integer(), .size = 2L) y <- list_of(.ptype = double(), .size = 1L) expect_identical(vec_ptype2(x, y), list_of(.ptype = double(), .size = 2L)) expect_identical(vec_ptype2(y, x), list_of(.ptype = double(), .size = 2L)) x <- list_of(.ptype = integer(), .size = 2L) y <- list_of(.ptype = double(), .size = 3L) expect_identical(vec_ptype2(x, y), list_of(.ptype = double(), .size = zap())) expect_identical(vec_ptype2(y, x), list_of(.ptype = double(), .size = zap())) x <- list_of(.ptype = integer(), .size = 2L) y <- list_of(.ptype = character(), .size = 1L) expect_identical(vec_ptype2(x, y), list_of(.ptype = zap(), .size = 2L)) expect_identical(vec_ptype2(y, x), list_of(.ptype = zap(), .size = 2L)) }) test_that("cast: list_of to list", { x <- list_of(1, .ptype = double(), .size = zap()) expect_identical(vec_cast(x, list()), list(1)) x <- list_of(1, .ptype = zap(), .size = 1L) expect_identical(vec_cast(x, list()), list(1)) x <- list_of(1, .ptype = double(), .size = 1L) expect_identical(vec_cast(x, list()), list(1)) }) test_that("cast: list to list_of", { # Just type to <- list_of(1, .ptype = double(), .size = zap()) expect_identical( vec_cast(list(), to), list_of(.ptype = double(), .size = zap()) ) expect_snapshot(error = TRUE, { vec_cast(list("x"), to) }) # Just size to <- list_of(1:2, .ptype = zap(), .size = 2L) expect_identical( vec_cast(list(), to), list_of(.ptype = zap(), .size = 2L) ) expect_identical( vec_cast(list(1, "x"), to), list_of(c(1, 1), c("x", "x"), .ptype = zap(), .size = 2L) ) expect_snapshot(error = TRUE, { vec_cast(list(1:3), to) }) # Both type and size to <- list_of(1:2, .ptype = integer(), .size = 2L) expect_identical( vec_cast(list(), to), list_of(.ptype = integer(), .size = 2L) ) expect_identical( vec_cast(list(1, 2:3), to), list_of(c(1L, 1L), 2:3, .ptype = integer(), .size = 2L) ) expect_snapshot(error = TRUE, { vec_cast(list(1:3), to) }) expect_snapshot(error = TRUE, { vec_cast(list("x"), to) }) }) test_that("cast: list_of to list_of", { # Starts just type restricted x <- list_of(1, 2) # To a different type restricted to <- list_of(.ptype = integer(), .size = zap()) expect_identical(vec_cast(x, to), list_of(1L, 2L)) to <- list_of(.ptype = character(), .size = zap()) expect_snapshot(error = TRUE, { vec_cast(x, to) }) # To a size restricted instead to <- list_of(.ptype = zap(), .size = 1L) expect_identical(vec_cast(x, to), list_of(1, 2, .ptype = zap(), .size = 1L)) to <- list_of(.ptype = zap(), .size = 2L) expect_identical( vec_cast(x, to), list_of(c(1, 1), c(2, 2), .ptype = zap(), .size = 2L) ) y <- list_of(1:2, 2:3) to <- list_of(.ptype = zap(), .size = 3L) expect_snapshot(error = TRUE, { vec_cast(y, to) }) # To both type and size restricted to <- list_of(.ptype = integer(), .size = 1L) expect_identical( vec_cast(x, to), list_of(1L, 2L, .ptype = integer(), .size = 1L) ) to <- list_of(.ptype = integer(), .size = 2L) expect_identical( vec_cast(x, to), list_of(c(1L, 1L), c(2L, 2L), .ptype = integer(), .size = 2L) ) }) test_that("list coercions are symmetric and unchanging", { types <- list( list(), list_of(.ptype = integer()), list_of(.ptype = double()), list_of(.ptype = character()) ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) local_options(width = 200) expect_snapshot(print(mat)) }) test_that("max, list_of> is list_of>", { r_int <- list_of(.ptype = integer()) r_dbl <- list_of(.ptype = double()) expect_identical(vec_ptype_common(r_int, r_int), r_int) expect_identical(vec_ptype_common(r_dbl, r_dbl), r_dbl) expect_identical(vec_ptype_common(r_int, r_dbl), r_dbl) expect_identical(vec_ptype_common(r_dbl, r_int), r_dbl) r_one <- list_of(.ptype = zap(), .size = 1L) r_two <- list_of(.ptype = zap(), .size = 2L) expect_identical(vec_ptype_common(r_one, r_one), r_one) expect_identical(vec_ptype_common(r_two, r_two), r_two) expect_identical(vec_ptype_common(r_one, r_two), r_two) expect_identical(vec_ptype_common(r_two, r_one), r_two) }) test_that("can cast to self type", { x <- list_of(1, .ptype = double(), .size = zap()) expect_identical(vec_cast(x, x), x) x <- list_of(c(1, 2), .ptype = zap(), .size = 2L) expect_identical(vec_cast(x, x), x) x <- list_of(c(1, 2), .ptype = double(), .size = 2L) expect_identical(vec_cast(x, x), x) }) test_that("list_of casting retains outer names", { x <- list_of(x = 1, 2, z = 3) to <- list_of(.ptype = integer()) expect_named(vec_cast(x, to), c("x", "", "z")) to <- list_of(.ptype = zap(), .size = 2L) expect_named(vec_cast(x, to), c("x", "", "z")) }) test_that("safe casts work as expected", { x <- list_of(1) expect_equal(vec_cast(NULL, x), NULL) expect_equal(vec_cast(NA, x), list_of(NULL, .ptype = double())) expect_identical(vec_cast(list(1), x), list_of(1)) expect_identical(vec_cast(list(TRUE), x), list_of(1)) expect_identical(vec_cast(x, list()), list(1)) expect_identical(vec_cast(x, list()), list(1)) expect_error( vec_cast(list_of(1), list_of("")), class = "vctrs_error_incompatible_type" ) # These used to be allowed expect_error(vec_cast(1L, x), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1, x), class = "vctrs_error_incompatible_type") }) test_that("error call is passed to inner cast methods", { fn1 <- function() vec_cast(list_of(1), list_of("")) fn2 <- function() vec_cast(list(1), list_of("")) expect_snapshot({ (expect_error(fn1())) (expect_error(fn2())) }) }) test_that("lossy casts generate warning (no longer the case)", { # This used to be a lossy cast warning expect_error( vec_cast(list(c(1.5, 1), 1L), to = list_of(1L)), class = "vctrs_error_incompatible_type" ) }) test_that("invalid casts generate error", { expect_error( vec_cast(factor("a"), list_of(1)), class = "vctrs_error_incompatible_type" ) }) test_that("vec_ptype2(>, NA) is symmetric (#687)", { lof <- list_of(1, 2, 3, .ptype = double(), .size = zap()) expect_identical(vec_ptype2(lof, NA), vec_ptype(lof)) expect_identical(vec_ptype2(NA, lof), vec_ptype(lof)) lof <- list_of(1, 2, 3, .ptype = zap(), .size = 1L) expect_identical(vec_ptype2(lof, NA), vec_ptype(lof)) expect_identical(vec_ptype2(NA, lof), vec_ptype(lof)) }) test_that("list_of() coerces to list() and list_of() (#1701)", { expect_equal(vec_ptype_common(list_of(1), list()), list()) expect_equal(vec_cast_common(list_of(1), list()), list(list(1), list())) expect_equal(vec_ptype_common(list_of(1), list("")), list()) expect_equal(vec_cast_common(list_of(1), list("")), list(list(1), list(""))) # Fallback on common type failure expect_equal( vec_ptype_common(list_of(1), list_of("")), list() ) expect_equal( vec_ptype_common(list_of(1), list(), list_of("")), list() ) # Fallback on common size failure expect_equal( vec_ptype_common( list_of(1:2, .ptype = zap(), .size = 2L), list_of(1:3, .ptype = zap(), .size = 3L) ), list() ) expect_equal( vec_ptype_common( list_of(1:2, .ptype = zap(), .size = 2L), list(), list_of(1:3, .ptype = zap(), .size = 3L) ), list() ) }) test_that("can concatenate list and list-of (#1161)", { # With fixed type expect_equal( vec_c(list(1), list_of(2)), list(1, 2) ) expect_equal( vec_c(list(""), list_of(2)), list("", 2) ) # With fixed size expect_equal( vec_c(list(1), list_of(2:3, .ptype = zap(), .size = 2)), list(1, 2:3) ) expect_equal( vec_c(list(""), list_of(2:3, .ptype = zap(), .size = 2)), list("", 2:3) ) }) test_that("can combine a mix of named and unnamed list-ofs (#784)", { a <- new_list_of(list(x = 1L), ptype = integer()) b <- new_list_of(list(2L), ptype = integer()) expect <- new_list_of(list(x = 1L, 2L), ptype = integer()) expect_identical(vec_c(a, b), expect) }) vctrs/tests/testthat/test-subscript.R0000644000176200001440000001035715065005761017546 0ustar liggesuserstest_that("vec_as_subscript() coerces unspecified vectors", { expect_identical( vec_as_subscript(NA), NA ) expect_identical( vec_as_subscript(NA, logical = "error"), na_int ) expect_identical( vec_as_subscript(NA, logical = "error", numeric = "error"), na_chr ) }) test_that("vec_as_subscript() coerces subtypes and supertypes", { expect_identical(vec_as_subscript(factor("foo")), "foo") with_lgl_subtype({ expect_identical(vec_as_subscript(new_lgl_subtype(TRUE)), TRUE) }) with_lgl_supertype({ expect_identical(vec_as_subscript(new_lgl_supertype(TRUE)), TRUE) }) }) test_that("vec_as_subscript() handles NULL", { expect_identical(vec_as_subscript(NULL), int()) expect_error( vec_as_subscript(NULL, numeric = "error"), class = "vctrs_error_subscript_type" ) }) test_that("vec_as_subscript() handles symbols", { expect_identical(vec_as_subscript(quote(foo)), "foo") expect_identical(vec_as_subscript(quote(``)), "\u5e78") expect_error( vec_as_subscript(quote(foo), character = "error"), class = "vctrs_error_subscript_type" ) }) test_that("can customise subscript errors", { expect_snapshot({ (expect_error( with_tibble_cols(vec_as_subscript(env())), class = "vctrs_error_subscript_type" )) }) expect_snapshot({ (expect_error( with_dm_tables(vec_as_subscript(env())), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_subscript() checks dimensionality", { expect_snapshot({ (expect_error( vec_as_subscript(matrix(TRUE, nrow = 1)), class = "vctrs_error_subscript_type" )) (expect_error( vec_as_subscript(array(TRUE, dim = c(1, 1, 1))), class = "vctrs_error_subscript_type" )) (expect_error( with_tibble_rows(vec_as_subscript(matrix(TRUE, nrow = 1))), class = "vctrs_error_subscript_type" )) }) }) test_that("vec_as_subscript() works with vectors of dimensionality 1", { arr <- array(TRUE, dim = 1) expect_identical(vec_as_subscript(arr), arr) }) test_that("vec_as_subscript() forbids subscript types", { expect_snapshot( error = TRUE, vec_as_subscript(1L, logical = "error", numeric = "error") ) expect_snapshot( error = TRUE, vec_as_subscript("foo", logical = "error", character = "error") ) expect_snapshot(error = TRUE, vec_as_subscript(TRUE, logical = "error")) expect_snapshot(error = TRUE, vec_as_subscript("foo", character = "error")) expect_snapshot(error = TRUE, vec_as_subscript(NULL, numeric = "error")) expect_snapshot( error = TRUE, vec_as_subscript(quote(foo), character = "error") ) }) test_that("vec_as_subscript2() forbids subscript types", { expect_snapshot(error = TRUE, vec_as_subscript2(1L, numeric = "error")) expect_snapshot(error = TRUE, vec_as_subscript2("foo", character = "error")) expect_snapshot(error = TRUE, vec_as_subscript2(TRUE)) }) test_that("vec_as_subscript2() retains the call when throwing vec_as_subscript() errors (#1605)", { expect_snapshot( error = TRUE, vec_as_subscript2(1L, numeric = "error", call = call("foo")) ) expect_snapshot(error = TRUE, vec_as_subscript2(1.5, call = call("foo"))) }) test_that("vec_as_subscript2() retains the call when erroring on logical input (#1605)", { expect_snapshot(error = TRUE, vec_as_subscript2(TRUE, call = call("foo"))) }) test_that("vec_as_subscript() evaluates arg lazily", { expect_silent(vec_as_subscript(1L, arg = print("oof"))) expect_silent(vec_as_subscript_result( 1L, arg = print("oof"), NULL, logical = "error", numeric = "cast", character = "error" )) }) test_that("vec_as_subscript2() evaluates arg lazily", { expect_silent(vec_as_subscript2(1L, arg = print("oof"))) expect_silent(vec_as_subscript2_result( 1L, arg = print("oof"), NULL, numeric = "cast", character = "error" )) }) test_that("`logical = 'cast'` is deprecated", { expect_snapshot( error = TRUE, vec_as_subscript2(TRUE, logical = "cast") ) # `logical = "error"` still works expect_snapshot( error = TRUE, vec_as_subscript2(TRUE, logical = "error") ) }) test_that("lossy cast errors for scalar subscripts work (#1606)", { expect_snapshot( error = TRUE, vec_as_subscript2(1.5) ) }) vctrs/tests/testthat/test-slice-chop.R0000644000176200001440000003035715113325071017551 0ustar liggesuserstest_that("vec_chop() throws error with non-vector inputs", { expect_error(vec_chop(NULL), class = "vctrs_error_scalar_type") expect_error(vec_chop(environment()), class = "vctrs_error_scalar_type") }) test_that("atomics are split into a list", { x <- 1:5 expect_equal(vec_chop(x), as.list(x)) x <- letters[1:5] expect_equal(vec_chop(x), as.list(x)) }) test_that("atomic names are kept", { x <- set_names(1:5) result <- lapply(vec_chop(x), names) expect_equal(result, as.list(names(x))) }) test_that("base R classed objects are split into a list", { fctr <- factor(c("a", "b")) expect <- lapply(vec_seq_along(fctr), vec_slice, x = fctr) expect_equal(vec_chop(fctr), expect) date <- new_date(c(0, 1)) expect <- lapply(vec_seq_along(date), vec_slice, x = date) expect_equal(vec_chop(date), expect) }) test_that("base R classed object names are kept", { fctr <- set_names(factor(c("a", "b"))) result <- lapply(vec_chop(fctr), names) expect_equal(result, as.list(names(fctr))) }) test_that("list elements are split", { x <- list(1, 2) result <- list(vec_slice(x, 1), vec_slice(x, 2)) expect_equal(vec_chop(x), result) }) test_that("data frames are split rowwise", { x <- data_frame(x = 1:2, y = c("a", "b")) result <- list(vec_slice(x, 1), vec_slice(x, 2)) expect_equal(vec_chop(x), result) }) test_that("vec_chop() keeps data frame row names", { x <- data_frame(x = 1:2, y = c("a", "b")) rownames(x) <- c("r1", "r2") result <- lapply(vec_chop(x), rownames) expect_equal(result, list("r1", "r2")) }) test_that("vec_chop() keeps data frame row names for data frames with 0 columns (#1722)", { x <- data_frame(.size = 3) rownames(x) <- c("r1", "r2", "r3") out <- lapply(vec_chop(x), rownames) expect_identical(out, list("r1", "r2", "r3")) out <- vec_chop(x, indices = list(c(2, NA), 3)) out <- lapply(out, rownames) expect_identical(out, list(c("r2", "...2"), "r3")) out <- vec_chop(x, sizes = c(1, 2, 0)) out <- lapply(out, rownames) expect_identical(out, list("r1", c("r2", "r3"), character())) }) test_that("data frames with 0 columns retain the right number of rows (#1722)", { x <- data_frame(.size = 4) one <- data_frame(.size = 1L) expect_identical( vec_chop(x), list(one, one, one, one) ) expect_identical( vec_chop(x, indices = list(c(1, 3, 2), c(3, NA))), list( data_frame(.size = 3), data_frame(.size = 2) ) ) expect_identical( vec_chop(x, sizes = c(3, 1, 0)), list( data_frame(.size = 3), data_frame(.size = 1), data_frame(.size = 0) ) ) }) test_that("matrices / arrays are split rowwise", { x <- array(1:12, c(2, 2, 2)) result <- list(vec_slice(x, 1), vec_slice(x, 2)) expect_equal(vec_chop(x), result) }) test_that("matrix / array row names are kept", { x <- array(1:12, c(2, 2, 2), dimnames = list(c("r1", "r2"), c("c1", "c2"))) result <- lapply(vec_chop(x), rownames) expect_equal(result, list("r1", "r2")) }) test_that("matrices / arrays without row names have other dimension names kept", { x <- array(1:12, c(2, 2, 2), dimnames = list(NULL, c("c1", "c2"))) result <- lapply(vec_chop(x), colnames) expect_equal(result, list(c("c1", "c2"), c("c1", "c2"))) }) test_that("vec_chop() doesn't restore when attributes have already been restored", { local_methods( `[.vctrs_foobar` = function(x, i, ...) structure("dispatched", foo = "bar"), vec_restore.vctrs_foobar = function(...) { structure("dispatched-and-restored", foo = "bar") } ) result <- vec_chop(foobar(NA))[[1]] expect_equal(result, structure("dispatched", foo = "bar")) result <- vec_chop(foobar(NA), indices = list(1))[[1]] expect_equal(result, structure("dispatched", foo = "bar")) result <- vec_chop(foobar(NA), sizes = 1)[[1]] expect_equal(result, structure("dispatched", foo = "bar")) }) test_that("vec_chop() does not restore when attributes have not been restored by `[`", { local_methods( `[.vctrs_foobar` = function(x, i, ...) "dispatched", vec_restore.vctrs_foobar = function(...) "dispatched-and-restored" ) result <- vec_chop(foobar(NA))[[1]] expect_equal(result, "dispatched") result <- vec_chop(foobar(NA), indices = list(1))[[1]] expect_equal(result, "dispatched") result <- vec_chop(foobar(NA), sizes = 1)[[1]] expect_equal(result, "dispatched") }) test_that("vec_chop() falls back to `[` for shaped objects with no proxy", { x <- foobar(1) dim(x) <- c(1, 1) result <- vec_chop(x)[[1]] expect_equal(result, x) result <- vec_chop(x, indices = list(1))[[1]] expect_equal(result, x) result <- vec_chop(x, sizes = 1)[[1]] expect_equal(result, x) }) test_that("`indices` are validated", { expect_snapshot(error = TRUE, { vec_chop(1, indices = 1) }) expect_snapshot({ (expect_error( vec_chop(1, indices = list(1.5)), class = "vctrs_error_subscript_type" )) }) expect_snapshot({ (expect_error( vec_chop(1, indices = list(2)), class = "vctrs_error_subscript_oob" )) }) }) test_that("`sizes` are validated", { expect_snapshot(error = TRUE, { vec_chop("a", sizes = "a") }) expect_snapshot(error = TRUE, { vec_chop("a", sizes = 2) }) expect_snapshot(error = TRUE, { vec_chop("a", sizes = -1) }) expect_snapshot(error = TRUE, { vec_chop("a", sizes = NA_integer_) }) expect_snapshot(error = TRUE, { vec_chop("a", sizes = c(1, 1)) }) }) test_that("can't use both `indices` and `sizes`", { expect_snapshot(error = TRUE, { vec_chop(1, indices = list(1), sizes = 1) }) }) test_that("`sizes` allows `0`", { expect_identical( vec_chop(c("a", "b"), sizes = c(1, 0, 0, 1, 0)), list("a", character(), character(), "b", character()) ) }) test_that("size 0 `indices` list is allowed", { expect_equal(vec_chop(1, indices = list()), list()) }) test_that("individual index values of size 0 are allowed", { expect_equal(vec_chop(1, indices = list(integer())), list(numeric())) df <- data.frame(a = 1, b = "1") expect_equal(vec_chop(df, indices = list(integer())), list(vec_ptype(df))) }) test_that("individual index values of `NULL` are allowed", { expect_equal(vec_chop(1, indices = list(NULL)), list(numeric())) df <- data.frame(a = 1, b = "1") expect_equal(vec_chop(df, indices = list(NULL)), list(vec_ptype(df))) }) test_that("data frame row names are kept when `indices` or `sizes` are used", { x <- data_frame(x = 1:2, y = c("a", "b")) rownames(x) <- c("r1", "r2") result <- lapply(vec_chop(x, indices = list(1, 1:2)), rownames) expect_equal(result, list("r1", c("r1", "r2"))) result <- lapply(vec_chop(x, sizes = c(1, 0, 1)), rownames) expect_equal(result, list("r1", character(), "r2")) }) test_that("vec_chop(, indices/sizes =) can be equivalent to the default", { x <- 1:5 indices <- as.list(vec_seq_along(x)) expect_equal(vec_chop(x, indices = indices), vec_chop(x)) sizes <- vec_rep(1L, times = vec_size(x)) expect_equal(vec_chop(x, sizes = sizes), vec_chop(x)) }) test_that("vec_chop(, indices/sizes =) can be equivalent to the default", { x <- data.frame(x = 1:5) indices <- as.list(vec_seq_along(x)) expect_equal(vec_chop(x, indices = indices), vec_chop(x)) sizes <- vec_rep(1L, times = vec_size(x)) expect_equal(vec_chop(x, sizes = sizes), vec_chop(x)) }) test_that("vec_chop(, indices/sizes =) can be equivalent to the default", { x <- array(1:8, c(2, 2, 2)) indices <- as.list(vec_seq_along(x)) expect_equal(vec_chop(x, indices = indices), vec_chop(x)) sizes <- vec_rep(1L, times = vec_size(x)) expect_equal(vec_chop(x, sizes = sizes), vec_chop(x)) }) test_that("`indices` cannot use names", { x <- set_names(1:3, c("a", "b", "c")) expect_error( vec_chop(x, indices = list("a", c("b", "c"))), class = "vctrs_error_subscript_type" ) x <- array(1:4, c(2, 2), dimnames = list(c("r1", "r2"))) expect_error( vec_chop(x, indices = list("r1")), class = "vctrs_error_subscript_type" ) x <- data.frame(x = 1, row.names = "r1") expect_error( vec_chop(x, indices = list("r1")), class = "vctrs_error_subscript_type" ) }) test_that("fallback method with `indices` and `sizes` works", { fctr <- factor(c("a", "b")) indices <- list(1, c(1, 2)) sizes <- c(1, 0, 1) expect_equal( vec_chop(fctr, indices = indices), map(indices, vec_slice, x = fctr) ) expect_equal( vec_chop(fctr, sizes = sizes), list(vec_slice(fctr, 1), vec_slice(fctr, 0), vec_slice(fctr, 2)) ) }) test_that("vec_chop() falls back to `[` for shaped objects with no proxy when `indices` or `sizes` are provided", { x <- foobar(1) dim(x) <- c(1, 1) result <- vec_chop(x, indices = list(1))[[1]] expect_equal(result, x) result <- vec_chop(x, sizes = 1)[[1]] expect_equal(result, x) }) test_that("vec_chop() with data frame proxies always uses the proxy's length info", { local_methods( vec_proxy.vctrs_proxy = function(x, ...) { x <- proxy_deref(x) new_data_frame(list(x = x$x, y = x$y)) }, vec_restore.vctrs_proxy = function(x, to, ...) { new_proxy(list(x = x$x, y = x$y)) } ) x <- new_proxy(list(x = 1:2, y = 3:4)) result <- vec_chop(x) result1 <- result[[1]] result2 <- result[[2]] expect1 <- new_proxy(list(x = 1L, y = 3L)) expect2 <- new_proxy(list(x = 2L, y = 4L)) expect_identical(proxy_deref(result1), proxy_deref(expect1)) expect_identical(proxy_deref(result2), proxy_deref(expect2)) }) test_that("ALTREP objects always generate materialized chops (#1450)", { x <- chr_rle(foo = 10L, bar = 5L) # `x` starts in compact form expect_false(chr_rle_is_materialized(x)) result <- vec_chop(x) # Chopping materializes `x` expect_true(chr_rle_is_materialized(x)) # And chopped elements are not ALTREP vectors expect_false(any(map_lgl(result, is_altrep))) expect <- vec_chop(c(rep("foo", 10), rep("bar", 5))) expect_identical(result, expect) }) test_that("`vec_chop(x, indices)` backwards compatible behavior works", { # No issues here expect_identical( vec_chop(1:2, list(1, 2)), vec_chop(1:2, indices = list(1, 2)) ) # Errors still talk about `indices` expect_snapshot(error = TRUE, { vec_chop(1:2, 1) }) expect_snapshot(error = TRUE, { vec_chop(1, list(1), sizes = 1) }) # These cases aren't allowed because they weren't possible previously either expect_snapshot(error = TRUE, { vec_chop(1, list(1), 2) }) expect_snapshot(error = TRUE, { vec_chop(1, list(1), indices = list(1)) }) }) # vec_chop + compact_seq -------------------------------------------------- # `start` is 0-based test_that("can chop base vectors with compact seqs", { start <- 1L size <- 2L expect_identical(vec_chop_seq(lgl(1, 0, 1), start, size), list(lgl(0, 1))) expect_identical(vec_chop_seq(int(1, 2, 3), start, size), list(int(2, 3))) expect_identical(vec_chop_seq(dbl(1, 2, 3), start, size), list(dbl(2, 3))) expect_identical(vec_chop_seq(cpl(1, 2, 3), start, size), list(cpl(2, 3))) expect_identical( vec_chop_seq(chr("1", "2", "3"), start, size), list(chr("2", "3")) ) expect_identical(vec_chop_seq(raw2(1, 2, 3), start, size), list(raw2(2, 3))) expect_identical(vec_chop_seq(list(1, 2, 3), start, size), list(list(2, 3))) }) test_that("can chop with a decreasing compact seq", { expect_equal(vec_chop_seq(int(1, 2, 3), 1L, 2L, FALSE), list(int(2, 1))) }) test_that("can chop with multiple compact seqs", { start <- c(1L, 0L) size <- c(1L, 3L) expect_equal( vec_chop_seq(int(1, 2, 3), start, size), list(int(2), int(1, 2, 3)) ) }) test_that("can chop S3 objects using the fallback method with compact seqs", { x <- factor(c("a", "b", "c", "d")) expect_equal(vec_chop_seq(x, 0L, 0L), list(vec_slice(x, integer()))) expect_equal(vec_chop_seq(x, 0L, 1L), list(vec_slice(x, 1L))) expect_equal(vec_chop_seq(x, 2L, 2L), list(vec_slice(x, 3:4))) }) test_that("data frames with 0 columns retain the right number of rows with compact seqs (#1722)", { x <- data_frame(.size = 4) out <- vec_chop_seq(x, starts = c(0L, 0L, 2L), sizes = c(0L, 2L, 1L)) out <- map_int(out, vec_size) expect_identical(out, c(0L, 2L, 1L)) }) test_that("`vec_chop()` can't take `compact_seq()` indices directly", { expect_snapshot(error = TRUE, transform = scrub_internal_error_line_number, { vec_chop(1:2, indices = list(compact_seq(1, 2))) }) }) vctrs/tests/testthat/helper-encoding.R0000644000176200001440000000174015156537555017626 0ustar liggesusersencodings <- function(bytes = FALSE) { string <- "\u00B0C" utf8 <- iconv(string, from = Encoding(string), to = "UTF-8") unknown <- iconv(string, from = Encoding(string), to = "", mark = FALSE) latin1 <- iconv(string, from = Encoding(string), to = "latin1") out <- list(utf8 = utf8, unknown = unknown, latin1 = latin1) if (bytes) { out <- list2(!!!out, bytes = encoding_bytes()) } out } encoding_bytes <- function() { string <- "\u00B0C" unknown <- iconv(string, from = Encoding(string), to = "", mark = FALSE) bytes <- unknown Encoding(bytes) <- "bytes" bytes } expect_equal_encoding <- function(object, expected) { args <- vec_recycle_common(object, expected) expect_identical(Encoding(args[[1L]]), Encoding(args[[2L]])) } expect_utf8_encoded <- function(object) { expect_identical(Encoding(object), rep("UTF-8", length(object))) } expect_latin1_encoded <- function(object) { expect_identical(Encoding(object), rep("latin1", length(object))) } vctrs/tests/testthat/helper-cast.R0000644000176200001440000000034615065005761016757 0ustar liggesusersexpect_lossy_cast <- function(expr) { cnd <- NULL out <- with_handlers( warning = calling(function(x) { cnd <<- x cnd_muffle(x) }), expr ) expect_s3_class(cnd, "vctrs_warning_cast_lossy") out } vctrs/tests/testthat/test-expand.R0000644000176200001440000001006215065005761017000 0ustar liggesuserstest_that("expands the first column slowest by default", { x <- 1:4 y <- 1:3 z <- 1:2 expect_identical( vec_expand_grid(x = x, y = y, z = z), data_frame( x = vec_rep(vec_rep_each(x, times = 6), times = 1), y = vec_rep(vec_rep_each(y, times = 2), times = 4), z = vec_rep(vec_rep_each(z, times = 1), times = 12) ) ) }) test_that("can expand the first column fastest with `.vary`", { x <- 1:4 y <- 1:3 z <- 1:2 expect_identical( vec_expand_grid(x = x, y = y, z = z, .vary = "fastest"), data_frame( x = vec_rep(vec_rep_each(x, times = 1), times = 6), y = vec_rep(vec_rep_each(y, times = 4), times = 2), z = vec_rep(vec_rep_each(z, times = 12), times = 1) ) ) }) test_that("size 0 elements force a size 0 result", { expect_identical( vec_expand_grid(x = 1:3, y = integer(), z = 1:2), data_frame(x = integer(), y = integer(), z = integer()) ) expect_identical( vec_expand_grid(x = integer()), data_frame(x = integer()) ) }) test_that("returns 1 row and 0 cols with no input", { # Because `prod(integer()) == 1L` expect_identical(vec_expand_grid(), data_frame(.size = 1L)) }) test_that("drops `NULL` values", { expect_identical( vec_expand_grid(NULL, NULL), vec_expand_grid() ) # And that happens before all names checks expect_identical( vec_expand_grid(x = 1:2, x = NULL, y = 1:3, NULL), vec_expand_grid(x = 1:2, y = 1:3) ) }) test_that("works with data frame inputs", { x <- data_frame(a = 1:2, b = 2:3) y <- 1:3 expect_identical( vec_expand_grid(x = x, y = y), data_frame( x = vec_rep(vec_rep_each(x, times = 3), times = 1), y = vec_rep(vec_rep_each(y, times = 1), times = 2), ) ) }) test_that("`.name_repair` isn't affected by `.vary`", { expect <- vec_as_names(c("a", "b", "a", "z"), repair = "unique_quiet") expect_named( vec_expand_grid( a = 1, b = 2, a = 3, z = 4, .vary = "slowest", .name_repair = "unique_quiet" ), expect ) expect_named( vec_expand_grid( a = 1, b = 2, a = 3, z = 4, .vary = "fastest", .name_repair = "unique_quiet" ), expect ) }) test_that("can use `.name_repair`", { expect_identical( vec_expand_grid(a = 1:2, a = 2:3, .name_repair = "minimal"), data_frame( a = c(1L, 1L, 2L, 2L), a = c(2L, 3L, 2L, 3L), .name_repair = "minimal" ) ) }) test_that("inputs must be named", { expect_snapshot(error = TRUE, { vec_expand_grid(1) }) expect_snapshot(error = TRUE, { vec_expand_grid(x = 1, 2, y = 3) }) }) test_that("catches duplicate names by default", { expect_snapshot(error = TRUE, { vec_expand_grid(a = 1, a = 2) }) }) test_that("errors on non vectors and mentions the element name", { expect_snapshot(error = TRUE, { vec_expand_grid(y = environment()) }) }) test_that("can adjust the `.error_call`", { my_expand_grid <- function() { vec_expand_grid(x = environment(), .error_call = current_env()) } expect_snapshot(error = TRUE, { my_expand_grid() }) }) test_that("errors nicely when expansion results in a size larger than `R_len_t`", { # Windows 32-bit doesn't support long vectors of this size, and the # intermediate `r_ssize` will be too large skip_if(.Machine$sizeof.pointer < 8L, message = "No long vector support") x <- seq_len((2^31 - 1) / 2) y <- 1:3 expect_snapshot(error = TRUE, { vec_expand_grid(x = x, y = y) }) }) test_that("errors nicely when expansion results in a size larger than `R_xlen_t`", { # Windows 32-bit doesn't support long vectors of this size, and the # intermediate `r_ssize` will be too large skip_if(.Machine$sizeof.pointer < 8L, message = "No long vector support") x <- seq_len(2^31 - 1) expect_snapshot(error = TRUE, transform = scrub_internal_error_line_number, { vec_expand_grid(x = x, y = x) }) }) test_that("validates `.vary`", { expect_snapshot(error = TRUE, { vec_expand_grid(.vary = 1) }) expect_snapshot(error = TRUE, { vec_expand_grid(.vary = "x") }) }) vctrs/tests/testthat/test-slice-assign.R0000644000176200001440000016145615113325071020111 0ustar liggesuserstest_that("slice-assign throws error with non-vector inputs", { x <- environment() expect_error(vec_slice(x, 1L) <- 1L, class = "vctrs_error_scalar_type") }) test_that("slice-assign throws error with non-vector `value`", { x <- 1L expect_error(vec_slice(x, 1L) <- NULL, class = "vctrs_error_scalar_type") expect_error( vec_slice(x, 1L) <- environment(), class = "vctrs_error_scalar_type" ) }) test_that("assign throws error with non-vector `value`", { x <- 1L expect_snapshot(error = TRUE, { vec_assign(x, 1L, NULL) }) expect_snapshot(error = TRUE, { vec_assign(x, 1L, NULL, slice_value = TRUE) }) expect_snapshot(error = TRUE, { vec_assign(x, 1L, NULL, value_arg = "foo") }) expect_snapshot(error = TRUE, { vec_assign(x, 1L, NULL, slice_value = TRUE, value_arg = "foo") }) expect_snapshot(error = TRUE, { vec_assign(x, 1L, environment(), value_arg = "foo") }) expect_snapshot(error = TRUE, { vec_assign(x, 1L, environment(), slice_value = TRUE, value_arg = "foo") }) }) test_that("can slice-assign NULL", { x <- NULL vec_slice(x, 1L) <- 1 expect_identical(x, NULL) }) test_that("can assign on NULL `x`", { x <- NULL expect_identical(vec_assign(x, TRUE, 1), NULL) expect_identical(vec_assign(x, TRUE, 1, slice_value = TRUE), NULL) }) test_that("can slice-assign base vectors", { x <- rep(FALSE, 3) vec_slice(x, 2) <- TRUE expect_identical(x, lgl(FALSE, TRUE, FALSE)) x <- rep(0L, 3) vec_slice(x, 2) <- 1L expect_identical(x, int(0L, 1L, 0L)) x <- rep(0., 3) vec_slice(x, 2) <- 1 expect_identical(x, dbl(0, 1, 0)) x <- rep(0i, 3) vec_slice(x, 2) <- 1i expect_identical(x, cpl(0i, 1i, 0i)) x <- rep("", 3) vec_slice(x, 2) <- "foo" expect_identical(x, chr("", "foo", "")) x <- as.raw(rep(0, 3)) vec_slice(x, 2) <- as.raw(1) expect_identical(x, as.raw(c(0, 1, 0))) x <- rep(list(NULL), 3) vec_slice(x, 2) <- list(NA) expect_identical(x, list(NULL, NA, NULL)) }) test_that("can assign base vectors", { x <- rep(FALSE, 3) expect_identical(vec_assign(x, 2, TRUE), lgl(FALSE, TRUE, FALSE)) expect_identical(x, rep(FALSE, 3)) x <- rep(0L, 3) expect_identical(vec_assign(x, 2, 1L), int(0L, 1L, 0L)) expect_identical(x, rep(0L, 3)) x <- rep(0., 3) expect_identical(vec_assign(x, 2, 1), dbl(0, 1, 0)) expect_identical(x, rep(0., 3)) x <- rep(0i, 3) expect_identical(vec_assign(x, 2, 1i), cpl(0i, 1i, 0i)) expect_identical(x, rep(0i, 3)) x <- rep("", 3) expect_identical(vec_assign(x, 2, "foo"), chr("", "foo", "")) expect_identical(x, rep("", 3)) x <- as.raw(rep(0, 3)) expect_identical(vec_assign(x, 2, as.raw(1)), as.raw(c(0, 1, 0))) expect_identical(x, as.raw(rep(0, 3))) x <- rep(list(NULL), 3) expect_identical(vec_assign(x, 2, list(NA)), list(NULL, NA, NULL)) expect_identical(x, rep(list(NULL), 3)) }) test_that("can assign base vectors with logical indices", { # Logical indices have their own optimized path so we test them specially condition <- c(TRUE, FALSE, NA, TRUE) x <- rep(FALSE, 4) value <- c(NA, TRUE, TRUE, TRUE) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, lgl(NA, FALSE, FALSE, TRUE)) x <- rep(0L, 4) value <- c(NA, 1L, 2L, 3L) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, int(NA, 0L, 0L, 3L)) x <- rep(0, 4) value <- c(NA, 1, 2, 3) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, dbl(NA, 0, 0, 3)) x <- rep(0i, 4) na <- complex(real = NA, imaginary = NA) value <- c(na, 1i, 2i, 3i) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, c(na, 0i, 0i, 3i)) x <- rep("", 4) value <- c(NA, "foo", "bar", "baz") y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, c(NA, "", "", "baz")) x <- as.raw(rep(0, 4)) value <- as.raw(c(1, 2, 3, 4)) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, as.raw(c(1, 0, 0, 4))) x <- rep(list(1), 4) value <- list(NA, 2, 3, 4) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, list(NA, 1, 1, 4)) }) test_that("can assign base vectors with recycling of `value`", { x <- rep(FALSE, 3) expect_identical(vec_assign(x, c(3, 1), TRUE), lgl(TRUE, FALSE, TRUE)) expect_identical(x, rep(FALSE, 3)) x <- rep(0L, 3) expect_identical(vec_assign(x, c(3, 1), 1L), int(1L, 0L, 1L)) expect_identical(x, rep(0L, 3)) x <- rep(0., 3) expect_identical(vec_assign(x, c(3, 1), 1), dbl(1, 0, 1)) expect_identical(x, rep(0., 3)) x <- rep(0i, 3) expect_identical(vec_assign(x, c(3, 1), 1i), cpl(1i, 0i, 1i)) expect_identical(x, rep(0i, 3)) x <- rep("", 3) expect_identical(vec_assign(x, c(3, 1), "foo"), chr("foo", "", "foo")) expect_identical(x, rep("", 3)) x <- as.raw(rep(0, 3)) expect_identical(vec_assign(x, c(3, 1), as.raw(1)), as.raw(c(1, 0, 1))) expect_identical(x, as.raw(rep(0, 3))) x <- rep(list(NULL), 3) expect_identical(vec_assign(x, c(3, 1), list(NA)), list(NA, NULL, NA)) expect_identical(x, rep(list(NULL), 3)) }) test_that("can assign base vectors with logical indices with recycled `value`", { # Logical indices have their own optimized path so we test them specially condition <- c(TRUE, FALSE, NA, TRUE) x <- rep(FALSE, 4) value <- NA y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, lgl(NA, FALSE, FALSE, NA)) x <- rep(0L, 4) value <- NA y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, int(NA, 0L, 0L, NA)) x <- rep(0, 4) value <- NA y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, dbl(NA, 0, 0, NA)) x <- rep(0i, 4) na <- complex(real = NA, imaginary = NA) value <- na y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, c(na, 0i, 0i, na)) x <- rep("", 4) value <- NA y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, c(NA, "", "", NA)) x <- as.raw(rep(0, 4)) value <- as.raw(1) y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, as.raw(c(1, 0, 0, 1))) x <- rep(list(1), 4) value <- NA y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, list(NULL, 1, 1, NULL)) }) test_that("can assign shaped base vectors", { mat <- as.matrix x <- mat(rep(FALSE, 3)) expect_identical(vec_assign(x, 2, TRUE), mat(lgl(FALSE, TRUE, FALSE))) expect_identical(x, mat(rep(FALSE, 3))) x <- mat(rep(0L, 3)) expect_identical(vec_assign(x, 2, 1L), mat(int(0L, 1L, 0L))) expect_identical(x, mat(rep(0L, 3))) x <- mat(rep(0, 3)) expect_identical(vec_assign(x, 2, 1), mat(dbl(0, 1, 0))) expect_identical(x, mat(rep(0, 3))) x <- mat(rep(0i, 3)) expect_identical(vec_assign(x, 2, 1i), mat(cpl(0i, 1i, 0i))) expect_identical(x, mat(rep(0i, 3))) x <- mat(rep("", 3)) expect_identical(vec_assign(x, 2, "foo"), mat(chr("", "foo", ""))) expect_identical(x, mat(rep("", 3))) x <- mat(as.raw(rep(0, 3))) expect_identical(vec_assign(x, 2, as.raw(1)), mat(as.raw(c(0, 1, 0)))) expect_identical(x, mat(as.raw(rep(0, 3)))) mat <- as.matrix x <- mat(rep(list(NULL), 3)) expect_identical(vec_assign(x, 2, list(NA)), mat(list(NULL, NA, NULL))) expect_identical(x, mat(rep(list(NULL), 3))) }) test_that("can assign shaped base vectors with logical indices", { # Logical indices have their own optimized path so we test them specially mat <- as.matrix condition <- c(TRUE, FALSE, NA, TRUE) x <- mat(rep(FALSE, 4)) value <- mat(c(NA, TRUE, TRUE, TRUE)) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(c(NA, FALSE, FALSE, TRUE))) x <- mat(rep(0L, 4)) value <- mat(c(NA, 1L, 2L, 3L)) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(c(NA, 0L, 0L, 3L))) x <- mat(rep(0, 4)) value <- mat(c(NA, 1, 2, 3)) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(c(NA, 0, 0, 3))) x <- mat(rep(0i, 4)) value <- mat(c(1i, 2i, 3i, 4i)) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(c(1i, 0i, 0i, 4i))) x <- mat(rep("", 4)) value <- mat(c(NA, "foo", "bar", "baz")) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(c(NA, "", "", "baz"))) x <- mat(as.raw(rep(0, 4))) value <- mat(as.raw(c(1, 2, 3, 4))) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(as.raw(c(1, 0, 0, 4)))) x <- mat(rep(list(1), 4)) value <- mat(list(NA, 2, 3, 4)) y <- vec_assign(x, condition, vec_slice(value, condition)) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(list(NA, 1, 1, 4))) }) test_that("can assign shaped base vectors with recycling of `value`", { mat <- as.matrix x <- mat(rep(FALSE, 3)) expect_identical(vec_assign(x, c(3, 1), TRUE), mat(lgl(TRUE, FALSE, TRUE))) expect_identical(x, mat(rep(FALSE, 3))) x <- mat(rep(0L, 3)) expect_identical(vec_assign(x, c(3, 1), 1L), mat(int(1L, 0L, 1L))) expect_identical(x, mat(rep(0L, 3))) x <- mat(rep(0, 3)) expect_identical(vec_assign(x, c(3, 1), 1), mat(dbl(1, 0, 1))) expect_identical(x, mat(rep(0, 3))) x <- mat(rep(0i, 3)) expect_identical(vec_assign(x, c(3, 1), 1i), mat(cpl(1i, 0i, 1i))) expect_identical(x, mat(rep(0i, 3))) x <- mat(rep("", 3)) expect_identical(vec_assign(x, c(3, 1), "foo"), mat(chr("foo", "", "foo"))) expect_identical(x, mat(rep("", 3))) x <- mat(as.raw(rep(0, 3))) expect_identical(vec_assign(x, c(3, 1), as.raw(1)), mat(as.raw(c(1, 0, 1)))) expect_identical(x, mat(as.raw(rep(0, 3)))) mat <- as.matrix x <- mat(rep(list(NULL), 3)) expect_identical(vec_assign(x, c(3, 1), list(NA)), mat(list(NA, NULL, NA))) expect_identical(x, mat(rep(list(NULL), 3))) }) test_that("can assign shaped base vectors with logical indices with recycling of `value`", { # Logical indices have their own optimized path so we test them specially mat <- as.matrix condition <- c(TRUE, FALSE, NA, TRUE) x <- mat(rep(FALSE, 4)) value <- NA y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(c(NA, FALSE, FALSE, NA))) x <- mat(rep(0L, 4)) value <- NA y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(c(NA, 0L, 0L, NA))) x <- mat(rep(0, 4)) value <- NA y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(c(NA, 0, 0, NA))) x <- mat(rep(0i, 4)) value <- NA na <- complex(real = NA, imaginary = NA) y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(c(na, 0i, 0i, na))) x <- mat(rep("", 4)) value <- NA y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(c(NA, "", "", NA))) x <- mat(as.raw(rep(1, 4))) value <- NA y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(as.raw(c(0, 1, 1, 0)))) x <- mat(rep(list(1), 4)) value <- NA y <- vec_assign(x, condition, value) z <- vec_assign(x, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, mat(list(NULL, 1, 1, NULL))) }) test_that("can assign object of any dimensionality", { x1 <- ones(3) x2 <- ones(3, 4) x3 <- ones(3, 4, 5) x4 <- ones(3, 4, 5, 6) expect_identical(vec_assign(x1, 1L, 2L), array(rep(c(2, 1, 1), 1), dim = 3)) expect_identical( vec_assign(x2, 1L, 2L), array(rep(c(2, 1, 1), 3), dim = c(3, 4)) ) expect_identical( vec_assign(x3, 1L, 2L), array(rep(c(2, 1, 1), 12), dim = c(3, 4, 5)) ) expect_identical( vec_assign(x4, 1L, 2L), array(rep(c(2, 1, 1), 60), dim = c(3, 4, 5, 6)) ) # With recycling of `value` expect_identical( vec_assign(x1, c(3L, 2L), 2L), array(rep(c(1, 2, 2), 1), dim = 3) ) expect_identical( vec_assign(x2, c(3L, 2L), 2L), array(rep(c(1, 2, 2), 3), dim = c(3, 4)) ) expect_identical( vec_assign(x3, c(3L, 2L), 2L), array(rep(c(1, 2, 2), 12), dim = c(3, 4, 5)) ) expect_identical( vec_assign(x4, c(3L, 2L), 2L), array(rep(c(1, 2, 2), 60), dim = c(3, 4, 5, 6)) ) }) test_that("can assign object of any dimensionality with logical indices", { # Logical indices have their own optimized path so we test them specially # Non barrier (integer, double, logical, etc) x1 <- ones(4) x2 <- ones(4, 5) x3 <- ones(4, 5, 6) x4 <- ones(4, 5, 6, 7) # Barrier (list, character) x1_list <- ones_list(4) x2_list <- ones_list(4, 5) x3_list <- ones_list(4, 5, 6) x4_list <- ones_list(4, 5, 6, 7) condition <- c(TRUE, FALSE, NA, TRUE) # No recycling value <- c(2, 3, 4, 5) value_list <- as.list(value) y <- vec_assign(x1, condition, vec_slice(value, condition)) z <- vec_assign(x1, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(c(2, 1, 1, 5), 1), dim = dim(x1))) y <- vec_assign(x1_list, condition, vec_slice(value_list, condition)) z <- vec_assign(x1_list, condition, value_list, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(list(2, 1, 1, 5), 1), dim = dim(x1_list))) y <- vec_assign(x2, condition, vec_slice(value, condition)) z <- vec_assign(x2, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(c(2, 1, 1, 5), 4), dim = dim(x2))) y <- vec_assign(x2_list, condition, vec_slice(value_list, condition)) z <- vec_assign(x2_list, condition, value_list, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(list(2, 1, 1, 5), 4), dim = dim(x2_list))) y <- vec_assign(x3, condition, vec_slice(value, condition)) z <- vec_assign(x3, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(c(2, 1, 1, 5), 20), dim = dim(x3))) y <- vec_assign(x3_list, condition, vec_slice(value_list, condition)) z <- vec_assign(x3_list, condition, value_list, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(list(2, 1, 1, 5), 20), dim = dim(x3_list))) y <- vec_assign(x4, condition, vec_slice(value, condition)) z <- vec_assign(x4, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(c(2, 1, 1, 5), 120), dim = dim(x4))) y <- vec_assign(x4_list, condition, vec_slice(value_list, condition)) z <- vec_assign(x4_list, condition, value_list, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(list(2, 1, 1, 5), 120), dim = dim(x4_list))) # `value` recycling value <- 2 value_list <- as.list(value) y <- vec_assign(x1, condition, value) z <- vec_assign(x1, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(c(2, 1, 1, 2), 1), dim = dim(x1))) y <- vec_assign(x1_list, condition, value_list) z <- vec_assign(x1_list, condition, value_list, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(list(2, 1, 1, 2), 1), dim = dim(x1_list))) y <- vec_assign(x2, condition, value) z <- vec_assign(x2, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(c(2, 1, 1, 2), 4), dim = dim(x2))) y <- vec_assign(x2_list, condition, value_list) z <- vec_assign(x2_list, condition, value_list, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(list(2, 1, 1, 2), 4), dim = dim(x2_list))) y <- vec_assign(x3, condition, value) z <- vec_assign(x3, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(c(2, 1, 1, 2), 20), dim = dim(x3))) y <- vec_assign(x3_list, condition, value_list) z <- vec_assign(x3_list, condition, value_list, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(list(2, 1, 1, 2), 20), dim = dim(x3_list))) y <- vec_assign(x4, condition, value) z <- vec_assign(x4, condition, value, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(c(2, 1, 1, 2), 120), dim = dim(x4))) y <- vec_assign(x4_list, condition, value_list) z <- vec_assign(x4_list, condition, value_list, slice_value = TRUE) expect_identical(y, z) expect_identical(y, array(rep(list(2, 1, 1, 2), 120), dim = dim(x4_list))) }) test_that("atomics can't be assigned in lists", { x <- list(NULL) expect_error(vec_slice(x, 1) <- 1, class = "vctrs_error_incompatible_type") expect_error(vec_assign(x, 1, 2), class = "vctrs_error_incompatible_type") expect_error( vec_slice(x, 1) <- "foo", class = "vctrs_error_incompatible_type" ) expect_error(vec_assign(x, 1, "foo"), class = "vctrs_error_incompatible_type") }) test_that("Unspecified `NA` vector can be assigned into lists", { x <- list(1, 2) vec_slice(x, 1) <- NA expect_identical(x, list(NULL, 2)) }) test_that("monitoring test - unspecified() can be assigned in lists", { x <- list(1, 2) expect_error(vec_slice(x, 1) <- unspecified(1), NA) expect_equal(x, list(NULL, 2)) }) test_that("can assign and slice-assign data frames", { df <- data.frame(x = 1:2) df$y <- data.frame(a = 2:1) orig <- duplicate(df, shallow = FALSE) other <- data.frame(x = 3) other$y <- data.frame(a = 3) exp <- data.frame(x = int(3, 2)) exp$y <- data.frame(a = int(3, 1)) expect_identical(vec_assign(df, 1, other), exp) expect_identical(df, orig) expect_identical(vec_assign(df, 1, other, slice_value = TRUE), exp) expect_identical(df, orig) vec_slice(df, 1) <- other expect_identical(df, exp) }) test_that("can assign using logical index", { x <- c(2, 1) vec_slice(x, TRUE) <- 3 expect_equal(x, c(3, 3)) vec_slice(x, c(TRUE, FALSE)) <- 4 expect_equal(x, c(4, 3)) expect_snapshot({ (expect_error( vec_assign(x, c(TRUE, FALSE, TRUE), 5), class = "vctrs_error_subscript_size" )) }) expect_snapshot({ (expect_error( vec_assign(x, c(TRUE, FALSE, TRUE), 5, slice_value = TRUE), class = "vctrs_error_subscript_size" )) }) expect_snapshot({ (expect_error( vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ]), class = "vctrs_error_subscript_size" )) }) expect_snapshot({ (expect_error( vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ], slice_value = TRUE), class = "vctrs_error_subscript_size" )) }) }) test_that("assign `value` size depends on `slice_value`", { x <- c(1, 2, 3) # `value` size depends on number of `NA` or `TRUE` values in `i` expect_snapshot(error = TRUE, { vec_assign(x, c(TRUE, NA, FALSE), c(1, 2, 3)) }) # `value` size depends on size of `x` expect_snapshot(error = TRUE, { vec_assign(x, c(TRUE, NA, FALSE), c(1, 2), slice_value = TRUE) }) }) test_that("assign ignores NA in logical subsetting", { x <- c(NA, 1, 2) expect_equal(vec_assign(x, x > 0, 1), c(NA, 1, 1)) expect_equal(vec_assign(x, x > 0, 1, slice_value = TRUE), c(NA, 1, 1)) expect_equal(vec_assign(x, x > 0, c(NA, 2:1)), c(NA, 2, 1)) expect_equal( vec_assign(x, x > 0, c(NA, 2:1), slice_value = TRUE), c(NA, 2, 1) ) }) test_that("assign with arrays ignores NA in logical subsetting", { mat <- as.matrix x <- c(NA, 1, 2) expect_equal(vec_assign(mat(x), x > 0, 1), mat(c(NA, 1, 1))) expect_equal(vec_assign(mat(x), x > 0, c(NA, 2:1)), mat(c(NA, 2, 1))) }) test_that("assign ignores NA in integer subsetting", { x <- 0:2 expect_equal(vec_assign(x, c(NA, 2:3), 1), c(0, 1, 1)) expect_equal(vec_assign(x, c(NA, 2:3), c(NA, 2:1)), c(0, 2, 1)) }) test_that("assign with arrays ignores NA in integer subsetting", { mat <- as.matrix x <- mat(0:2) expect_equal(vec_assign(x, c(NA, 2:3), 1), mat(c(0, 1, 1))) expect_equal(vec_assign(x, c(NA, 2:3), c(NA, 2:1)), mat(c(0, 2, 1))) }) test_that("can't modify subset with missing argument", { x <- 1:3 expect_error(vec_slice(x, ) <- 2L) }) test_that("can modify subset with recycled NA argument", { x <- 1:3 vec_slice(x, NA) <- 2L expect_identical(x, 1:3) }) test_that("can modify subset with recycled TRUE argument", { x <- 1:3 vec_slice(x, TRUE) <- 2L expect_identical(x, rep(2L, 3)) }) test_that("can modify subset with recycled FALSE argument", { x <- 1:3 vec_slice(x, FALSE) <- 2L expect_identical(x, 1:3) }) test_that("can modify subset with NULL argument", { x <- 1:3 vec_slice(x, NULL) <- 2L expect_identical(x, 1:3) x <- vec_assign(x, NULL, 2L) expect_identical(x, 1:3) x <- vec_assign(x, NULL, 2L, slice_value = TRUE) expect_identical(x, 1:3) }) test_that("can slice-assign with missing indices", { # Atomic case x <- 1:3 y <- 4:6 test <- c(NA, TRUE, FALSE) vec_slice(x, test) <- vec_slice(y, test) expect_identical(x, int(1, 5, 3)) # Barrier case x <- as.list(1:3) y <- as.list(4:6) test <- c(NA, TRUE, FALSE) vec_slice(x, test) <- vec_slice(y, test) expect_identical(x, as.list(int(1, 5, 3))) # Atomic array case x <- array(1:12, dim = c(3, 2, 2)) y <- array(13:24, dim = c(3, 2, 2)) test <- c(NA, TRUE, FALSE) vec_slice(x, test) <- vec_slice(y, test) expect <- array( int(1, 14, 3, 4, 17, 6, 7, 20, 9, 10, 23, 12), dim = c(3, 2, 2) ) expect_identical(x, expect) # Barrier array case x <- array(as.list(1:12), dim = c(3, 2, 2)) y <- array(as.list(13:24), dim = c(3, 2, 2)) test <- c(NA, TRUE, FALSE) vec_slice(x, test) <- vec_slice(y, test) expect <- array( as.list(int(1, 14, 3, 4, 17, 6, 7, 20, 9, 10, 23, 12)), dim = c(3, 2, 2) ) expect_identical(x, expect) }) test_that("slice-assign checks vectorness", { x <- foobar(list(1)) expect_error(vec_slice(x, 1) <- 10, class = "vctrs_error_scalar_type") }) test_that("a coercible RHS is cast to LHS before assignment (#140)", { x <- 1:2 expect_error(vec_slice(x, 1) <- "1", class = "vctrs_error_incompatible_type") x <- c("foo", "bar") expect_error(vec_slice(x, 1) <- 1, class = "vctrs_error_incompatible_type") x <- 1:2 expect_error(vec_slice(x, 1) <- 3.5, class = "vctrs_error_cast_lossy") allow_lossy_cast(vec_slice(x, 1) <- 3.5) expect_identical(x, int(3, 2)) x <- matrix(1:4, 2) vec_slice(x, 1) <- matrix(c(FALSE, FALSE), 1) expect_identical(x, matrix(int(0, 2, 0, 4), 2)) expect_error( vec_assign(x, 1, matrix(c("", ""), 1)), class = "vctrs_error_incompatible_type" ) }) test_that("slice-assign takes the proxy", { local_proxy() x <- new_proxy(1:3) y <- new_proxy(20:21) vec_slice(x, 2:3) <- y expect_identical(proxy_deref(x), int(1, 20, 21)) }) test_that("can use names to assign with a named object", { x0 <- c(a = 1, b = 2, c = 3) x1 <- c(a = 1, a = 2, a = 3) vec_slice(x0, "b") <- 4 expect_identical(x0, c(a = 1, b = 4, c = 3)) # Only first is changed vec_slice(x1, "a") <- 4 expect_identical(x1, c(a = 4, a = 2, a = 3)) x <- c(a = 1, b = 2, c = 3) expect_identical( vec_assign(x, c("c", "a"), c(4, 5)), c(a = 5, b = 2, c = 4) ) # Slices `value` by `i` after matching `i` against `x` names expect_identical( vec_assign(x, c("c", "a"), c(4, 5, 6), slice_value = TRUE), c(a = 4, b = 2, c = 6) ) expect_snapshot(error = TRUE, { vec_assign(x, c("c", "a"), c(4, 5, 6)) }) expect_snapshot(error = TRUE, { vec_assign(x, c("c", "a"), c(4, 5), slice_value = TRUE) }) }) test_that("can't use names to assign with an unnamed object", { x0 <- 1:3 expect_error( vec_slice(x0, letters[1]) <- 4L, "Can't use character names to index an unnamed vector.", fixed = TRUE ) expect_error( vec_assign(x0, letters[1], 4L, slice_value = TRUE), "Can't use character names to index an unnamed vector.", fixed = TRUE ) expect_error( vec_slice(x0, letters[25:27]) <- 5L, "Can't use character names to index an unnamed vector.", fixed = TRUE ) }) test_that("slice-assign falls back to `[<-` when proxy is not implemented", { obj <- foobar(c("foo", "bar", "baz")) expect_error( vec_slice(obj, 1:2) <- TRUE, class = "vctrs_error_incompatible_type" ) vec_slice(obj, 1:2) <- foobar("quux") vec_ptype2(foobar(""), foobar("")) vec_cast(foobar(""), foobar("")) #> Error: Can't cast to local_methods( `[<-.vctrs_foobar` = function(x, i, value) { x <- unclass(x) x[i] <- "dispatched" x }, vec_ptype2.logical.vctrs_foobar = function(...) foobar(""), vec_ptype2.vctrs_foobar = function(...) foobar(""), vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.logical = function(x, to, ...) { foobar(rep("", length(x))) } ) obj <- foobar(c("foo", "bar", "baz")) obj2 <- vec_assign(obj, 1:2, TRUE) expect_identical(obj2, foobar(c("dispatched", "dispatched", "baz"))) obj2 <- vec_assign(obj, c(TRUE, FALSE, TRUE), TRUE) expect_identical(obj2, foobar(c("dispatched", "bar", "dispatched"))) # We handle `NA` for the end user obj2 <- vec_assign(obj, c(1, NA, 2), TRUE) expect_identical(obj2, foobar(c("dispatched", "dispatched", "baz"))) }) test_that("vec_assign() can always assign unspecified values into foreign vector types", { obj <- foobar(c("foo", "bar", "baz")) expect <- foobar(c(NA, "bar", "baz")) expect_identical(vec_assign(obj, 1, NA), expect) expect_identical(vec_assign(obj, 1, unspecified(1)), expect) expect_identical( vec_assign(obj, 1, c(NA, NA, NA), slice_value = TRUE), expect ) expect_identical( vec_assign(obj, 1, unspecified(3), slice_value = TRUE), expect ) }) test_that("slice-assign casts to `to` before falling back to `[<-` (#443)", { called <- FALSE local_methods( vec_proxy.vctrs_proxy = proxy_deref, vec_ptype2.vctrs_proxy = function(...) NULL, vec_ptype2.vctrs_proxy.vctrs_foobar = function(...) new_proxy(NA), vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.vctrs_proxy = function(x, ...) foobar(proxy_deref(x)), `[<-.vctrs_foobar` = function(x, i, value) { called <<- TRUE expect_identical(value, foobar(10)) } ) x <- foobar(1) y <- new_proxy(10) vec_slice(x, 1) <- y expect_true(called) }) test_that("index and value are sliced before falling back", { # Works around a bug in base R `[<-` before we call it, in particular, related # to the fact that `[<-` doesn't allow `NA` in subassign indices, but we do. lhs <- foobar(int(1:5)) # With location vector expect_identical( vec_assign(lhs, c(NA, 1), foobar(int(6:7))), foobar(int(7, 2:5)) ) # With location vector and `slice_value = TRUE` expect_identical( vec_assign(lhs, c(NA, 1), foobar(int(6:10)), slice_value = TRUE), foobar(int(6, 2:5)) ) # With condition vector expect_identical( vec_assign(lhs, c(NA, FALSE, TRUE, NA, TRUE), foobar(int(6:9))), foobar(int(1, 2, 7, 4, 9)) ) # With condition vector and `slice_value = TRUE` expect_identical( vec_assign( lhs, c(NA, FALSE, TRUE, NA, TRUE), foobar(int(6:10)), slice_value = TRUE ), foobar(int(1, 2, 8, 4, 10)) ) }) test_that("size 1 value is expected to be handled by the `[<-` fallback", { # i.e., we don't pre recycle size 1 value to the size of the index because # we expect that most `[<-` fallbacks eventually call base `[<-`, which # recycles size 1 value efficiently. See `vec_assign_fallback()`. lhs <- foobar(1:4) rhs <- foobar(0L) exp <- foobar(c(1L, 0L, 3L, 0L)) expect_identical(vec_assign(lhs, c(2L, 4L), rhs), exp) expect_identical( vec_assign(lhs, c(FALSE, TRUE, FALSE, TRUE), rhs), exp ) expect_identical( vec_assign(lhs, c(FALSE, TRUE, FALSE, TRUE), rhs, slice_value = TRUE), exp ) }) test_that("can assign to data frame", { x <- data_frame(x = 1:3) y <- data_frame(x = 20) expect_identical(vec_assign(x, 2, y), data_frame(x = int(1, 20, 3))) }) test_that("can assign to data frame with `slice_value`", { x <- data_frame(x = 1:4) y <- data_frame(x = 21:24) i <- c(TRUE, FALSE, NA, TRUE) expect_identical( vec_assign(x, i, y, slice_value = TRUE), data_frame(x = int(21, 2, 3, 24)) ) }) test_that("can assign to a data frame with matrix columns (#625)", { df <- tibble(x = 1:2, y = matrix(1:4, nrow = 2)) expect_identical(vec_assign(df, 2L, df[1, ]), vec_slice(df, c(1, 1))) }) test_that("assigning to a factor doesn't produce corrupt levels (#853)", { x <- factor(c("a", NA), levels = c("a", "b")) value <- factor("b", levels = "b") res <- vec_assign(x, 2, value) expect_identical(res, factor(c("a", "b"))) res <- vec_assign(x, 1:2, value) expect_identical(res, factor(c("b", "b"), levels = c("a", "b"))) }) test_that("can slice-assign unspecified vectors with default type2 method", { local_rational_class() x <- rational(1:2, 2:3) x[[1]] <- NA expect_identical(x, rational(c(NA, 2L), c(NA, 3L))) }) test_that("`vec_assign()` evaluates arg lazily", { expect_silent(vec_assign(1L, 1L, 1L, x_arg = print("oof"))) expect_silent(vec_assign(1L, 1L, 1L, value_arg = print("oof"))) }) test_that("`vec_assign()` requires recyclable value", { expect_snapshot({ (expect_error( vec_assign(1:3, 1:2, 1:3), class = "vctrs_error_recycle_incompatible_size" )) }) expect_snapshot({ (expect_error( vec_assign(1:3, 1:2, 1:2, slice_value = TRUE), class = "vctrs_error_recycle_incompatible_size" )) }) }) test_that("logical subscripts must match size of indexed vector", { expect_snapshot({ (expect_error( vec_assign(1:2, c(TRUE, FALSE, TRUE), 5), class = "vctrs_error_subscript_size" )) }) expect_snapshot( (expect_error( vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ]), class = "vctrs_error_subscript_size" )) ) }) test_that("must assign existing elements", { expect_snapshot({ (expect_error( vec_assign(1:3, 5, 10), class = "vctrs_error_subscript_oob" )) (expect_error( vec_assign(1:3, "foo", 10), "unnamed vector" )) (expect_error( vec_slice(letters, -100) <- "foo", class = "vctrs_error_subscript_oob" )) (expect_error( vec_assign(set_names(letters), "foo", "bar"), class = "vctrs_error_subscript_oob" )) }) }) test_that("must assign with proper negative locations", { expect_snapshot({ (expect_error( vec_assign(1:3, c(-1, 1), 1:2), class = "vctrs_error_subscript_type" )) (expect_error( vec_assign(1:3, c(-1, NA), 1:2), class = "vctrs_error_subscript_type" )) }) }) test_that("`vec_assign()` error args can be overridden", { expect_snapshot({ (expect_error( vec_assign(1:2, 1L, "x", x_arg = "foo", value_arg = "bar"), class = "vctrs_error_incompatible_type" )) (expect_error( vec_assign(1:2, 1L, 1:2, value_arg = "bar"), class = "vctrs_error_recycle_incompatible_size" )) }) }) test_that("names are not assigned by default", { vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) vec_out <- c(a = 1L, b = 4L, c = 3L) expect_identical( vec_assign(vec_x, 2, vec_y), vec_out ) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "FOO") df_out <- new_data_frame(list(x = c(1L, 4L, 3L)), row.names = letters[1:3]) expect_identical( vec_assign(df_x, 2, df_y), df_out ) mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("FOO")) mat_out <- matrix(c(1L, 4L, 3L), dimnames = list(letters[1:3])) expect_identical( vec_assign(mat_x, 2, mat_y), mat_out ) nested_x <- new_data_frame( list(df = df_x, mat = mat_x, vec = vec_x), row.names = c("foo", "bar", "baz") ) nested_y <- new_data_frame( list(df = df_y, mat = mat_y, vec = vec_y), row.names = c("quux") ) nested_out <- new_data_frame( list(df = df_out, mat = mat_out, vec = vec_out), row.names = c("foo", "bar", "baz") ) expect_identical( vec_assign(nested_x, 2, nested_y), nested_out ) }) test_that("can optionally assign names", { vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L) vec_out <- c(a = 1L, FOO = 4L, c = 3L) expect_identical( vec_assign_params(vec_x, 2, vec_y, assign_names = TRUE), vec_out ) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4L), row.names = "FOO") df_out <- new_data_frame( list(x = c(1L, 4L, 3L)), row.names = c("a", "FOO", "c") ) expect_identical( vec_assign_params(df_x, 2, df_y, assign_names = TRUE), df_out ) mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4L, 1, dimnames = list("FOO")) mat_out <- matrix(c(1L, 4L, 3L), dimnames = list(c("a", "FOO", "c"))) expect_identical( vec_assign_params(mat_x, 2, mat_y, assign_names = TRUE), mat_out ) nested_x <- new_data_frame( list(df = df_x, mat = mat_x, vec = vec_x), row.names = c("foo", "bar", "baz") ) nested_y <- new_data_frame( list(df = df_y, mat = mat_y, vec = vec_y), row.names = c("quux") ) nested_out <- new_data_frame( list(df = df_out, mat = mat_out, vec = vec_out), row.names = c("foo", "quux", "baz") ) expect_identical( vec_assign_params(nested_x, 2, nested_y, assign_names = TRUE), nested_out ) }) test_that("can optionally assign names with `slice_value`", { vec_x <- set_names(1:3, letters[1:3]) vec_y <- c(FOO = 4L, BAR = 5L, BAZ = 6L) vec_out <- c(a = 1L, BAR = 5L, BAZ = 6L) expect_identical( vec_assign_params( vec_x, c(FALSE, TRUE, TRUE), vec_y, assign_names = TRUE, slice_value = TRUE ), vec_out ) df_x <- new_data_frame(list(x = 1:3), row.names = letters[1:3]) df_y <- new_data_frame(list(x = 4:6), row.names = c("FOO", "BAR", "BAZ")) df_out <- new_data_frame( list(x = c(1L, 5L, 6L)), row.names = c("a", "BAR", "BAZ") ) expect_identical( vec_assign_params( df_x, c(FALSE, TRUE, TRUE), df_y, assign_names = TRUE, slice_value = TRUE ), df_out ) mat_x <- matrix(1:3, 3, dimnames = list(letters[1:3])) mat_y <- matrix(4:6, 3, dimnames = list(c("FOO", "BAR", "BAZ"))) mat_out <- matrix(c(1L, 5L, 6L), dimnames = list(c("a", "BAR", "BAZ"))) expect_identical( vec_assign_params( mat_x, c(FALSE, TRUE, TRUE), mat_y, assign_names = TRUE, slice_value = TRUE ), mat_out ) nested_x <- new_data_frame( list(df = df_x, mat = mat_x, vec = vec_x), row.names = c("foo", "bar", "baz") ) nested_y <- new_data_frame( list(df = df_y, mat = mat_y, vec = vec_y), row.names = c(c("x", "y", "z")) ) nested_out <- new_data_frame( list(df = df_out, mat = mat_out, vec = vec_out), row.names = c("foo", "y", "z") ) expect_identical( vec_assign_params( nested_x, c(FALSE, TRUE, TRUE), nested_y, assign_names = TRUE, slice_value = TRUE ), nested_out ) }) test_that("can optionally assign names (OO case)", { # In case upstream attributes handling changes skip_on_cran() # `set_names()` must be on the inside, otherwise the POSIXlt object # gets a `balanced` attribute of `NA` oo_x <- as_posixlt(set_names( c("2020-01-01", "2020-01-02", "2020-01-03"), letters[1:3] )) oo_y <- as_posixlt(c(FOO = "2020-01-04")) oo_out <- as_posixlt(c( a = "2020-01-01", FOO = "2020-01-04", c = "2020-01-03" )) expect_identical( vec_assign_params(oo_x, 2, oo_y, assign_names = TRUE), oo_out ) nested_x <- new_data_frame( list(oo = oo_x), row.names = c("foo", "bar", "baz") ) nested_y <- new_data_frame(list(oo = oo_y), row.names = c("quux")) nested_out <- new_data_frame( list(oo = oo_out), row.names = c("foo", "quux", "baz") ) expect_identical( vec_assign_params(nested_x, 2, nested_y, assign_names = TRUE), nested_out ) }) test_that("assigning names clears existing names even if the new value doesn't have any (#2019)", { x <- c(a = 1, b = 2) # Keeps existing names expect_identical( vec_assign_params(x, 1L, 0), c(a = 0, b = 2) ) expect_identical( vec_assign_params(x, 1L, c(c = 0)), c(a = 0, b = 2) ) # Clears or replaces names expect_identical( vec_assign_params(x, 1L, 0, assign_names = TRUE), set_names(c(0, 2), c("", "b")) ) expect_identical( vec_assign_params(x, 1L, c(c = 0), assign_names = TRUE), set_names(c(0, 2), c("c", "b")) ) }) test_that("assignment requires that the value proxy is the same type as the output proxy", { x <- foobar(1) y <- foobar("a") local_foobar_proxy() local_methods( vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) x ) expect_error( vec_assign(x, 1, y), "`double` incompatible with `value` proxy of type `character`" ) }) test_that("assignment allows a df `value`'s column to be a different type than its proxy (#1082)", { x <- new_data_frame(list(x = foobar(1))) y <- new_data_frame(list(x = foobar(2))) local_methods( # proxying foobar wraps it in a 1 col df vec_proxy.vctrs_foobar = function(x, ...) { attributes(x) <- NULL new_data_frame(list(vec = x)) }, # restoring extracts the column vec_restore.vctrs_foobar = function(x, to, ...) { foobar(x$vec) }, vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) x ) expect1 <- new_data_frame(list(x = foobar(c(1, 1)))) expect2 <- new_data_frame(list(x = foobar(2))) expect_identical(vec_rbind(x, x), expect1) expect_identical(vec_assign(x, 1, y), expect2) }) test_that("monitoring: assignment to a data frame with unshared columns doesn't overwrite (#986)", { # We now require R >= 4.0.0, so this test no longer needs the < 4.0.0 # branches, but we keep it around for historical reference x <- new_df_unshared_col() value <- new_data_frame(list(x = 2)) expect <- new_data_frame(list(x = 1L)) # - On R < 4.0.0, the NAMED value of the column is 0. # - On R >= 4.0.0, the refcnt of the column is 1 from the call to # `SET_VECTOR_ELT()` in `new_df_unshared_col()`. expect_false(maybe_shared_col(x, 1L)) new <- vec_assign(x, 1, value) # On R < 4.0.0, `vec_assign()` shallow duplicates `x`, which recursively # bumps the NAMED-ness of each column of `x` to the max value of 7 by # calling `ENSURE_NAMEDMAX()` on it. So the columns of `x` are all considered # shared from that. # On R >= 4.0.0, references are tracked more precisely. # - `new_df_unshared_col()` calls `SET_VECTOR_ELT()` when setting the # column into `x`, bumping the column's namedness to 1. # - Then, at the start of `df_assign()`, `x` is shallow duplicated and # assigned to `out`. This calls `ENSURE_NAMEDMAX()` on each column, # however this does nothing on R 4.0.0. The refcnt of each column is instead # incremented by 1 by calls to `SET_VECTOR_ELT()` in `duplicate1()`. # So now it is at 2. # - But then in `df_assign()` we use `SET_VECTOR_ELT()` on `out`, overwriting # each column. This actually decrements the refcnt on the value that was # in `out` before the column was overwritten. The column of `out` that it # decrements the refcnt for is the same SEXP as that column in `x`, so now # it is back to 1, and it is not considered shared. if (getRversion() >= "4.0.0") { expect_false(maybe_shared_col(x, 1L)) } else { expect_true(maybe_shared_col(x, 1L)) } # Expect no changes to `x`! expect_identical(x, expect) }) test_that("monitoring: assignment to atomic vectors doesn't modify by reference", { x <- c(1, 2, 3) expect <- c(1, 2, 3) vec_assign(x, 2, 3) expect_identical(x, expect) }) test_that("monitoring: assignment to POSIXlt doesn't modify by reference (#1951)", { original <- as.POSIXlt("2020-11-01") expect_original <- as.POSIXlt("2020-11-01") value <- as.POSIXlt("2020-12-02") expect <- as.POSIXlt("2020-12-02") actual <- vec_assign(original, 1, value) expect_identical(expect, actual) expect_identical(original, expect_original) }) test_that("monitoring: assignment to `vctrs_rcrd` doesn't modify by reference (#1951)", { original <- new_rcrd(list(x = 1:5)) expect_original <- new_rcrd(list(x = 1:5)) value <- new_rcrd(list(x = 0L)) expect <- new_rcrd(list(x = c(0L, 2:5))) actual <- vec_assign(original, 1, value) expect_identical(expect, actual) expect_identical(original, expect_original) }) # vec_assign + compact_seq ------------------------------------------------- test_that("can assign base vectors with compact seqs", { # `start` is 0-based start <- 1L size <- 2L increasing <- TRUE x <- c(FALSE, FALSE, FALSE) value <- c(TRUE, NA, TRUE) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, c(FALSE, NA, TRUE)) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- c(1L, 2L, 3L) value <- c(4L, 5L, 6L) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, c(1L, 5L, 6L)) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- c(1, 2, 3) value <- c(4, 5, 6) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, c(1, 5, 6)) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- c(1i, 2i, 3i) value <- c(4i, 5i, 6i) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, c(1i, 5i, 6i)) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- c("1", "2", "3") value <- c("4", "5", "6") value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, c("1", "5", "6")) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- raw2(1, 2, 3) value <- raw2(4, 5, 6) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, raw2(1, 5, 6)) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- list(1, 2, 3) value <- list(4, 5, 6) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, list(1, 5, 6)) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) }) test_that("can assign shaped base vectors with compact seqs", { # `start` is 0-based start <- 1L size <- 2L increasing <- TRUE mat <- as.matrix x <- mat(c(FALSE, FALSE, FALSE)) value <- mat(c(TRUE, NA, TRUE)) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, mat(c(FALSE, NA, TRUE))) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- mat(c(1L, 2L, 3L)) value <- mat(c(4L, 5L, 6L)) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, mat(c(1L, 5L, 6L))) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- mat(c(1, 2, 3)) value <- mat(c(4, 5, 6)) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, mat(c(1, 5, 6))) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- mat(c(1i, 2i, 3i)) value <- mat(c(4i, 5i, 6i)) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, mat(c(1i, 5i, 6i))) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- mat(c("1", "2", "3")) value <- mat(c("4", "5", "6")) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, mat(c("1", "5", "6"))) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- mat(raw2(1, 2, 3)) value <- mat(raw2(4, 5, 6)) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, mat(raw2(1, 5, 6))) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) x <- mat(list(1, 2, 3)) value <- mat(list(4, 5, 6)) value_sliced <- vec_slice_seq(value, start, size, increasing) y <- vec_assign_seq(x, value_sliced, start, size, increasing) expect_identical(y, mat(list(1, 5, 6))) expect_identical( vec_assign_seq(x, value, start, size, increasing, slice_value = TRUE), y ) }) test_that("can assign shaped base vectors with compact seqs and recycled `value`", { # `start` is 0-based start <- 1L size <- 2L increasing <- TRUE mat <- as.matrix expect_identical( vec_assign_seq(mat(lgl(1, 0, 1)), NA, start, size, increasing), mat(lgl(1, NA, NA)) ) expect_identical( vec_assign_seq(mat(int(1, 2, 3)), NA, start, size, increasing), mat(int(1, NA, NA)) ) expect_identical( vec_assign_seq(mat(dbl(1, 2, 3)), NA, start, size, increasing), mat(dbl(1, NA, NA)) ) expect_identical( vec_assign_seq(mat(cpl2(1, 2, 3)), NA, start, size, increasing), mat(cpl2(1, NA, NA)) ) expect_identical( vec_assign_seq(mat(chr("1", "2", "3")), NA, start, size, increasing), mat(chr("1", NA, NA)) ) expect_identical( vec_assign_seq(mat(raw2(1, 2, 3)), raw2(1), start, size, increasing), mat(raw2(1, 1, 1)) ) expect_identical( vec_assign_seq(mat(list(1, 2, 3)), NA, start, size, increasing), mat(list(1, NULL, NULL)) ) }) test_that("can assign shaped base vectors with decreasing compact seqs and recycled `value`", { # `start` is 0-based start <- 2L size <- 2L increasing <- FALSE mat <- as.matrix expect_identical( vec_assign_seq(mat(lgl(1, 0, 1)), NA, start, size, increasing), mat(lgl(1, NA, NA)) ) expect_identical( vec_assign_seq(mat(int(1, 2, 3)), NA, start, size, increasing), mat(int(1, NA, NA)) ) expect_identical( vec_assign_seq(mat(dbl(1, 2, 3)), NA, start, size, increasing), mat(dbl(1, NA, NA)) ) expect_identical( vec_assign_seq(mat(cpl2(1, 2, 3)), NA, start, size, increasing), mat(cpl2(1, NA, NA)) ) expect_identical( vec_assign_seq(mat(chr("1", "2", "3")), NA, start, size, increasing), mat(chr("1", NA, NA)) ) expect_identical( vec_assign_seq(mat(raw2(1, 2, 3)), raw2(1), start, size, increasing), mat(raw2(1, 1, 1)) ) expect_identical( vec_assign_seq(mat(list(1, 2, 3)), NA, start, size, increasing), mat(list(1, NULL, NULL)) ) }) test_that("can assign shaped base vectors with size 0 compact seqs", { # `start` is 0-based start <- 1L size <- 0L increasing <- TRUE mat <- as.matrix expect_identical( vec_assign_seq(mat(lgl(1, 0, 1)), NA, start, size, increasing), mat(mat(lgl(1, 0, 1))) ) expect_identical( vec_assign_seq(mat(int(1, 2, 3)), NA, start, size, increasing), mat(int(1, 2, 3)) ) expect_identical( vec_assign_seq(mat(dbl(1, 2, 3)), NA, start, size, increasing), mat(dbl(1, 2, 3)) ) expect_identical( vec_assign_seq(mat(cpl(1, 2, 3)), NA, start, size, increasing), mat(cpl(1, 2, 3)) ) expect_identical( vec_assign_seq(mat(chr("1", "2", "3")), NA, start, size, increasing), mat(chr("1", "2", "3")) ) expect_identical( vec_assign_seq(mat(raw2(1, 2, 3)), raw2(1), start, size, increasing), mat(raw2(1, 2, 3)) ) expect_identical( vec_assign_seq(mat(list(1, 2, 3)), NA, start, size, increasing), mat(list(1, 2, 3)) ) }) test_that("can assign object of any dimensionality with compact seqs", { x1 <- ones(3) x2 <- ones(3, 4) x3 <- ones(3, 4, 5) x4 <- ones(3, 4, 5, 6) # `start` is 0-based start <- 0L size <- 2L increasing <- TRUE mat <- as.matrix expect_identical( vec_assign_seq(x1, 2, start, size, increasing), array(rep(c(2, 2, 1), 1), dim = 3) ) expect_identical( vec_assign_seq(x2, 2, start, size, increasing), array(rep(c(2, 2, 1), 4), dim = c(3, 4)) ) expect_identical( vec_assign_seq(x3, 2, start, size, increasing), array(rep(c(2, 2, 1), 20), dim = c(3, 4, 5)) ) expect_identical( vec_assign_seq(x4, 2, start, size, increasing), array(rep(c(2, 2, 1), 120), dim = c(3, 4, 5, 6)) ) }) # vec_assign + compact_condition ----------------------------------------------- test_that("can assign base vectors with compact conditions", { i <- c(FALSE, TRUE, TRUE) i_compact <- as_compact_condition(i) x <- c(FALSE, FALSE, FALSE) value <- c(TRUE, NA, TRUE) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, c(FALSE, NA, TRUE)) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- c(1L, 2L, 3L) value <- c(4L, 5L, 6L) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, c(1L, 5L, 6L)) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- c(1, 2, 3) value <- c(4, 5, 6) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, c(1, 5, 6)) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- c(1i, 2i, 3i) value <- c(4i, 5i, 6i) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, c(1i, 5i, 6i)) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- c("1", "2", "3") value <- c("4", "5", "6") value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, c("1", "5", "6")) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- raw2(1, 2, 3) value <- raw2(4, 5, 6) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, raw2(1, 5, 6)) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- list(1, 2, 3) value <- list(4, 5, 6) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, list(1, 5, 6)) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) }) test_that("can assign shaped base vectors with compact conditions", { i <- c(FALSE, TRUE, TRUE) i_compact <- as_compact_condition(i) mat <- as.matrix x <- mat(c(FALSE, FALSE, FALSE)) value <- mat(c(TRUE, NA, TRUE)) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, mat(c(FALSE, NA, TRUE))) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- mat(c(1L, 2L, 3L)) value <- mat(c(4L, 5L, 6L)) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, mat(c(1L, 5L, 6L))) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- mat(c(1, 2, 3)) value <- mat(c(4, 5, 6)) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, mat(c(1, 5, 6))) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- mat(c(1i, 2i, 3i)) value <- mat(c(4i, 5i, 6i)) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, mat(c(1i, 5i, 6i))) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- mat(c("1", "2", "3")) value <- mat(c("4", "5", "6")) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, mat(c("1", "5", "6"))) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- mat(raw2(1, 2, 3)) value <- mat(raw2(4, 5, 6)) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, mat(raw2(1, 5, 6))) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) x <- mat(list(1, 2, 3)) value <- mat(list(4, 5, 6)) value_sliced <- vec_slice(value, i) y <- vec_assign_compact_condition(x, i_compact, value_sliced) expect_identical(y, mat(list(1, 5, 6))) expect_identical( vec_assign_compact_condition(x, i_compact, value, slice_value = TRUE), y ) }) test_that("can assign shaped base vectors with compact conditions and recycled `value`", { i <- as_compact_condition(c(FALSE, TRUE, TRUE)) mat <- as.matrix expect_identical( vec_assign_compact_condition(mat(lgl(1, 0, 1)), i, NA), mat(lgl(1, NA, NA)) ) expect_identical( vec_assign_compact_condition(mat(int(1, 2, 3)), i, NA), mat(int(1, NA, NA)) ) expect_identical( vec_assign_compact_condition(mat(dbl(1, 2, 3)), i, NA), mat(dbl(1, NA, NA)) ) expect_identical( vec_assign_compact_condition(mat(cpl2(1, 2, 3)), i, NA), mat(cpl2(1, NA, NA)) ) expect_identical( vec_assign_compact_condition(mat(chr("1", "2", "3")), i, NA), mat(chr("1", NA, NA)) ) expect_identical( vec_assign_compact_condition(mat(raw2(1, 2, 3)), i, raw2(1)), mat(raw2(1, 1, 1)) ) expect_identical( vec_assign_compact_condition(mat(list(1, 2, 3)), i, NA), mat(list(1, NULL, NULL)) ) }) test_that("can assign shaped base vectors with size 0 compact conditions", { i <- as_compact_condition(logical()) mat <- as.matrix expect_identical( vec_assign_compact_condition(mat(lgl(1, 0, 1)), i, NA), mat(mat(lgl(1, 0, 1))) ) expect_identical( vec_assign_compact_condition(mat(int(1, 2, 3)), i, NA), mat(int(1, 2, 3)) ) expect_identical( vec_assign_compact_condition(mat(dbl(1, 2, 3)), i, NA), mat(dbl(1, 2, 3)) ) expect_identical( vec_assign_compact_condition(mat(cpl(1, 2, 3)), i, NA), mat(cpl(1, 2, 3)) ) expect_identical( vec_assign_compact_condition(mat(chr("1", "2", "3")), i, NA), mat(chr("1", "2", "3")) ) expect_identical( vec_assign_compact_condition(mat(raw2(1, 2, 3)), i, raw2(1)), mat(raw2(1, 2, 3)) ) expect_identical( vec_assign_compact_condition(mat(list(1, 2, 3)), i, NA), mat(list(1, 2, 3)) ) }) test_that("can assign object of any dimensionality with compact conditions", { x1 <- ones(3) x2 <- ones(3, 4) x3 <- ones(3, 4, 5) x4 <- ones(3, 4, 5, 6) i <- as_compact_condition(c(TRUE, TRUE, FALSE)) expect_identical( vec_assign_compact_condition(x1, i, 2), array(rep(c(2, 2, 1), 1), dim = 3) ) expect_identical( vec_assign_compact_condition(x2, i, 2), array(rep(c(2, 2, 1), 4), dim = c(3, 4)) ) expect_identical( vec_assign_compact_condition(x3, i, 2), array(rep(c(2, 2, 1), 20), dim = c(3, 4, 5)) ) expect_identical( vec_assign_compact_condition(x4, i, 2), array(rep(c(2, 2, 1), 120), dim = c(3, 4, 5, 6)) ) }) vctrs/tests/testthat/test-recycle.R0000644000176200001440000001443615113325071017151 0ustar liggesuserstest_that("vec_recycle_common() reports error context", { my_function <- function(...) vec_recycle_common(...) expect_snapshot({ (expect_error(my_function(this_arg = 1:2, that_arg = int()))) (expect_error(my_function(this_arg = 1:2, that_arg = int(), .size = 2))) (expect_error(my_function( this_arg = 1:2, that_arg = int(), .arg = "my_arg" ))) (expect_error(my_function( this_arg = 1:2, that_arg = int(), .size = 2, .arg = "my_arg" ))) }) }) # vec_recycle() ------------------------------------------------------------- test_that("vec_recycle recycles size 1 to any other size", { x <- 1 x0 <- numeric() x2 <- c(x, x) expect_equal(vec_recycle(x, 1), x) expect_equal(vec_recycle(x, 0), x0) expect_equal(vec_recycle(x, 2), x2) }) test_that("vec_recycle(): incompatible lengths get error messages", { x2 <- c(1, 2) expect_snapshot({ (expect_error( vec_recycle(x2, 1), class = "vctrs_error_recycle_incompatible_size" )) }) expect_error( vec_recycle(x2, 0), class = "vctrs_error_recycle_incompatible_size" ) expect_error( vec_recycle(x2, 3), class = "vctrs_error_recycle_incompatible_size" ) }) test_that("can recycle arrays", { x <- matrix(1:2, 1) x2 <- matrix(1:2, 2, 2, byrow = TRUE) x0 <- matrix(integer(), 0, 2) expect_equal(vec_recycle(x, 1), x) expect_equal(vec_recycle(x, 0), x0) expect_equal(vec_recycle(x, 2), x2) # List arrays data <- c(list(1), list(2)) x <- matrix(data, 1) x2 <- matrix(data, 2, 2, byrow = TRUE) x0 <- matrix(list(), 0, 2) expect_equal(vec_recycle(x, 1), x) expect_equal(vec_recycle(x, 0), x0) expect_equal(vec_recycle(x, 2), x2) }) test_that("vec_recycle() evaluates x_arg lazily", { expect_silent(vec_recycle(1L, 1L, x_arg = print("oof"))) }) test_that("recycling to size 1 has informative error", { expect_snapshot({ (expect_error( vec_recycle(1:2, 1), class = "vctrs_error_recycle_incompatible_size" )) }) }) test_that("incompatible recycling size has informative error", { expect_snapshot(error = TRUE, vec_recycle(1:2, 4)) expect_snapshot(error = TRUE, vec_recycle(1:2, 4, x_arg = "foo")) }) # Empty ------------------------------------------------------------------- test_that("empty input returns empty list", { expect_equal(vec_recycle_common(), list()) }) # Vectors ----------------------------------------------------------------- test_that("NULL is idempotent", { expect_equal(vec_recycle_common(NULL, NULL), list(NULL, NULL)) expect_equal(vec_recycle_common(1:5, NULL), list(1:5, NULL)) expect_equal(vec_recycle_common(NULL, 1:5), list(NULL, 1:5)) }) test_that("equal lengths returned as is", { x <- 1:3 expect_equal(vec_recycle_common(x, x), list(x, x)) expect_equal(vec_recycle_common(x[1], x[1]), list(x[1], x[1])) expect_equal(vec_recycle_common(x[0], x[0]), list(x[0], x[0])) }) test_that("vec_recycle_common recycles size 1 to any other size", { x1 <- 1 x3 <- rep(1, 3) x0 <- numeric() expect_equal(vec_recycle_common(x1, x3), list(x3, x3)) expect_equal(vec_recycle_common(x3, x1), list(x3, x3)) expect_equal(vec_recycle_common(x1, x0), list(x0, x0)) }) test_that("vec_recycle_common(): incompatible lengths get error messages", { expect_snapshot({ (expect_error( vec_recycle_common(1:2, 1:3), class = "vctrs_error_incompatible_size" )) }) expect_error( vec_recycle_common(1:3, 1:2), class = "vctrs_error_incompatible_size" ) expect_error( vec_recycle_common(numeric(), 1:2), class = "vctrs_error_incompatible_size" ) }) test_that("vec_recycle_common errors on scalars", { expect_snapshot(error = TRUE, { vec_recycle_common(1, lm(1 ~ 1)) }) # Index is correct with `NULL`s expect_snapshot(error = TRUE, { vec_recycle_common(1, NULL, lm(1 ~ 1)) }) }) test_that("vec_recycle_common doesn't mutate the input in place", { x <- list(a = 1, b = 2:3) expect_identical( vec_recycle_common(!!!x), list(a = c(1, 1), b = 2:3) ) expect_identical( x, list(a = 1, b = 2:3) ) }) test_that("vec_recycle_common retains names", { expect_named(vec_recycle_common(a = 1, b = 2), c("a", "b")) expect_named(vec_recycle_common(a = 1, b = 2:3), c("a", "b")) }) # Matrices ---------------------------------------------------------------- test_that("can vec_recycle_common matrices", { x <- matrix(nrow = 4, ncol = 4) x1 <- x[1, , drop = FALSE] expect_equal(vec_recycle_common(x, x), list(x, x)) expect_equal(vec_recycle_common(x1, x), list(x, x)) }) test_that("recycling matrices respects incompatible sizes", { x <- matrix(nrow = 4, ncol = 4) x2 <- x[1:2, , drop = FALSE] x0 <- x[0, , drop = FALSE] expect_snapshot({ (expect_error( vec_recycle_common(x2, x), class = "vctrs_error_incompatible_size" )) }) expect_error( vec_recycle_common(x0, x), class = "vctrs_error_incompatible_size" ) }) # Data frames ------------------------------------------------------------ test_that("can vec_recycle_common data frames", { x <- data.frame(a = rep(1, 3), b = rep(2, 3)) x1 <- vec_slice(x, 1L) expect_equal(vec_recycle_common(x, x), list(x, x)) expect_equal(vec_recycle_common(x1, x), list(x, x)) }) test_that("recycling data frames respects incompatible sizes", { x <- data.frame(a = rep(1, 3), b = rep(2, 3)) x2 <- vec_slice(x, 1:2) x0 <- vec_slice(x, integer()) expect_snapshot({ (expect_error( vec_recycle_common(x2, x), class = "vctrs_error_incompatible_size" )) }) expect_error( vec_recycle_common(x0, x), class = "vctrs_error_incompatible_size" ) }) test_that("can vec_recycle_common matrix and data frame", { mt <- matrix(nrow = 2, ncol = 2) df <- data.frame(x = c(1, 1), y = c(2, 2)) expect_equal( vec_recycle_common(vec_slice(mt, 1L), df), list(mt, df) ) expect_equal( vec_recycle_common(mt, vec_slice(df, 1L)), list(mt, df) ) }) test_that("recycling data frames with matrices respects incompatible sizes", { mt <- matrix(nrow = 2, ncol = 2) df <- data.frame(x = c(1, 1), y = c(2, 2)) expect_error( vec_recycle_common(vec_slice(mt, integer()), df), class = "vctrs_error_incompatible_size" ) expect_error( vec_recycle_common(mt, vec_slice(df, 0L)), class = "vctrs_error_incompatible_size" ) }) vctrs/tests/testthat/helper-if-else.R0000644000176200001440000000545315065005761017355 0ustar liggesusers# Helpers for testing `vec_if_else()` # # These ensure that we have consistent output and errors across the # atomic and generic paths. We use them when possible. expect_identical_vec_if_else <- function( ..., condition, true, false, missing = NULL, ptype = NULL, condition_arg = "condition", true_arg = "true", false_arg = "false", missing_arg = "missing", expect ) { expect_something_vec_if_else( expect_fn = expect_identical, condition = condition, true = true, false = false, missing = missing, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg, expect = expect ) } expect_something_vec_if_else <- function( expect_fn, ..., condition, true, false, missing = NULL, ptype = NULL, condition_arg = "condition", true_arg = "true", false_arg = "false", missing_arg = "missing", expect ) { check_dots_empty0(...) expect_fn( vec_if_else( condition = condition, true = true, false = false, missing = missing, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg ), expected = expect ) true_vctr <- new_vctr(true) false_vctr <- new_vctr(false) if (is.null(missing)) { missing_vctr <- NULL } else { missing_vctr <- new_vctr(missing) } expect_vctr <- new_vctr(expect) expect_fn( vec_if_else( condition = condition, true = true_vctr, false = false_vctr, missing = missing_vctr, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg ), expected = expect_vctr ) } expect_snapshot_vec_if_else <- function( ..., condition, true, false, missing = NULL, ptype = NULL, condition_arg = "condition", true_arg = "true", false_arg = "false", missing_arg = "missing", error = FALSE ) { check_dots_empty0(...) expect_snapshot(error = error, { vec_if_else( condition = condition, true = true, false = false, missing = missing, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg ) }) true_vctr <- new_vctr(true) false_vctr <- new_vctr(false) if (is.null(missing)) { missing_vctr <- NULL } else { missing_vctr <- new_vctr(missing) } expect_snapshot(error = error, { vec_if_else( condition = condition, true = true_vctr, false = false_vctr, missing = missing_vctr, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg ) }) } vctrs/tests/testthat/test-fill.R0000644000176200001440000001002515065005761016446 0ustar liggesuserstest_that("works with empty input", { x <- integer() expect_identical(vec_fill_missing(x, direction = "down"), x) expect_identical(vec_fill_missing(x, direction = "up"), x) expect_identical(vec_fill_missing(x, direction = "downup"), x) expect_identical(vec_fill_missing(x, direction = "updown"), x) expect_identical(vec_fill_missing(x, direction = "downup", max_fill = 1), x) expect_identical(vec_fill_missing(x, direction = "updown", max_fill = 1), x) }) test_that("works with data frames with rows but no columns", { x <- new_data_frame(n = 2L) expect_identical(vec_fill_missing(x, direction = "down"), x) expect_identical(vec_fill_missing(x, direction = "up"), x) expect_identical(vec_fill_missing(x, direction = "downup"), x) expect_identical(vec_fill_missing(x, direction = "updown"), x) expect_identical(vec_fill_missing(x, direction = "downup", max_fill = 1), x) expect_identical(vec_fill_missing(x, direction = "updown", max_fill = 1), x) }) test_that("vectors with all missing values are left unchanged", { x <- c(NA, NA, NA) expect_identical(vec_fill_missing(x, direction = "down"), x) expect_identical(vec_fill_missing(x, direction = "up"), x) expect_identical(vec_fill_missing(x, direction = "downup"), x) expect_identical(vec_fill_missing(x, direction = "updown"), x) expect_identical(vec_fill_missing(x, direction = "downup", max_fill = 1), x) expect_identical(vec_fill_missing(x, direction = "updown", max_fill = 1), x) }) test_that("`NA_real_` and `NaN` are both considered missing", { expect_identical( vec_fill_missing(c(1, NA_real_, NaN)), c(1, 1, 1) ) }) test_that("missings are filled correctly", { x <- c(NA, 1, NA, 2, NA, NA) expect_identical(vec_fill_missing(x, "down"), c(NA, 1, 1, 2, 2, 2)) expect_identical(vec_fill_missing(x, "up"), c(1, 1, 2, 2, NA, NA)) expect_identical(vec_fill_missing(x, "downup"), c(1, 1, 1, 2, 2, 2)) expect_identical(vec_fill_missing(x, "updown"), c(1, 1, 2, 2, 2, 2)) }) test_that("`max_fill` limits the sequential fill amount", { x <- c(NA, NA, 1, NA, NA, NA, 3, NA, NA) expect_identical( vec_fill_missing(x, "down", max_fill = 1), c(NA, NA, 1, 1, NA, NA, 3, 3, NA) ) expect_identical( vec_fill_missing(x, "downup", max_fill = 1), c(NA, 1, 1, 1, NA, NA, 3, 3, NA) ) expect_identical( vec_fill_missing(x, "down", max_fill = 2), c(NA, NA, 1, 1, 1, NA, 3, 3, 3) ) expect_identical( vec_fill_missing(x, "downup", max_fill = 2), c(1, 1, 1, 1, 1, NA, 3, 3, 3) ) expect_identical( vec_fill_missing(x, "up", max_fill = 1), c(NA, 1, 1, NA, NA, 3, 3, NA, NA) ) expect_identical( vec_fill_missing(x, "updown", max_fill = 1), c(NA, 1, 1, NA, NA, 3, 3, 3, NA) ) expect_identical( vec_fill_missing(x, "up", max_fill = 2), c(1, 1, 1, NA, 3, 3, 3, NA, NA) ) expect_identical( vec_fill_missing(x, "updown", max_fill = 2), c(1, 1, 1, NA, 3, 3, 3, 3, 3) ) }) test_that("fills data frames", { df <- data_frame(x = c(NA, NA, NA, 2), y = c(NA, 1, NA, 3)) expect_identical(vec_fill_missing(df, "down"), vec_slice(df, c(1, 2, 2, 4))) expect_identical(vec_fill_missing(df, "up"), vec_slice(df, c(2, 2, 4, 4))) }) test_that("can fill rcrd types", { x <- new_rcrd(list(x = c(1, NA, NA), y = c(1, 2, NA))) expect_identical(vec_fill_missing(x, "down"), vec_slice(x, c(1, 2, 2))) expect_identical(vec_fill_missing(x, "up"), vec_slice(x, c(1, 2, 3))) expect_identical(vec_fill_missing(x, "updown"), vec_slice(x, c(1, 2, 2))) }) test_that("validates `direction`", { expect_error(vec_fill_missing(1, 1), "`direction` must be one of") expect_error(vec_fill_missing(1, "foo"), "`direction` must be one of") }) test_that("validates `max_fill`", { expect_error(vec_fill_missing(1, max_fill = -1), "`max_fill` must be") expect_error(vec_fill_missing(1, max_fill = c(1L, 2L)), "`max_fill` must be") expect_error( vec_fill_missing(1, max_fill = NA_integer_), "`max_fill` must be" ) expect_error( vec_fill_missing(1, max_fill = "x"), class = "vctrs_error_incompatible_type" ) }) vctrs/tests/testthat/test-parallel.R0000644000176200001440000001276615113325071017323 0ustar liggesuserstest_that("9 possible variations of each combination are right", { N <- NA expect_identical(vec_pall(T, T, .missing = NA), T) expect_identical(vec_pall(T, F, .missing = NA), F) expect_identical(vec_pall(T, N, .missing = NA), N) expect_identical(vec_pall(F, T, .missing = NA), F) expect_identical(vec_pall(F, F, .missing = NA), F) expect_identical(vec_pall(F, N, .missing = NA), F) expect_identical(vec_pall(N, T, .missing = NA), N) expect_identical(vec_pall(N, F, .missing = NA), F) expect_identical(vec_pall(N, N, .missing = NA), N) expect_identical(vec_pall(T, T, .missing = TRUE), T) expect_identical(vec_pall(T, F, .missing = TRUE), F) expect_identical(vec_pall(T, N, .missing = TRUE), T) expect_identical(vec_pall(F, T, .missing = TRUE), F) expect_identical(vec_pall(F, F, .missing = TRUE), F) expect_identical(vec_pall(F, N, .missing = TRUE), F) expect_identical(vec_pall(N, T, .missing = TRUE), T) expect_identical(vec_pall(N, F, .missing = TRUE), F) expect_identical(vec_pall(N, N, .missing = TRUE), T) expect_identical(vec_pall(T, T, .missing = FALSE), T) expect_identical(vec_pall(T, F, .missing = FALSE), F) expect_identical(vec_pall(T, N, .missing = FALSE), F) expect_identical(vec_pall(F, T, .missing = FALSE), F) expect_identical(vec_pall(F, F, .missing = FALSE), F) expect_identical(vec_pall(F, N, .missing = FALSE), F) expect_identical(vec_pall(N, T, .missing = FALSE), F) expect_identical(vec_pall(N, F, .missing = FALSE), F) expect_identical(vec_pall(N, N, .missing = FALSE), F) expect_identical(vec_pany(T, T, .missing = NA), T) expect_identical(vec_pany(T, F, .missing = NA), T) expect_identical(vec_pany(T, N, .missing = NA), T) expect_identical(vec_pany(F, T, .missing = NA), T) expect_identical(vec_pany(F, F, .missing = NA), F) expect_identical(vec_pany(F, N, .missing = NA), N) expect_identical(vec_pany(N, T, .missing = NA), T) expect_identical(vec_pany(N, F, .missing = NA), N) expect_identical(vec_pany(N, N, .missing = NA), N) expect_identical(vec_pany(T, T, .missing = TRUE), T) expect_identical(vec_pany(T, F, .missing = TRUE), T) expect_identical(vec_pany(T, N, .missing = TRUE), T) expect_identical(vec_pany(F, T, .missing = TRUE), T) expect_identical(vec_pany(F, F, .missing = TRUE), F) expect_identical(vec_pany(F, N, .missing = TRUE), T) expect_identical(vec_pany(N, T, .missing = TRUE), T) expect_identical(vec_pany(N, F, .missing = TRUE), T) expect_identical(vec_pany(N, N, .missing = TRUE), T) expect_identical(vec_pany(T, T, .missing = FALSE), T) expect_identical(vec_pany(T, F, .missing = FALSE), T) expect_identical(vec_pany(T, N, .missing = FALSE), T) expect_identical(vec_pany(F, T, .missing = FALSE), T) expect_identical(vec_pany(F, F, .missing = FALSE), F) expect_identical(vec_pany(F, N, .missing = FALSE), F) expect_identical(vec_pany(N, T, .missing = FALSE), T) expect_identical(vec_pany(N, F, .missing = FALSE), F) expect_identical(vec_pany(N, N, .missing = FALSE), F) }) test_that("works with empty inputs", { expect_identical(vec_pall(logical(), logical()), logical()) expect_identical(vec_pany(logical(), logical()), logical()) }) test_that("works with no inputs", { expect_identical(vec_pall(), logical()) expect_identical(vec_pany(), logical()) }) test_that("works with no inputs and specified `.size`", { expect_identical(vec_pall(.size = 3), c(TRUE, TRUE, TRUE)) expect_identical(vec_pany(.size = 3), c(FALSE, FALSE, FALSE)) }) test_that("no casting is done", { expect_snapshot(error = TRUE, { vec_pall(1) }) expect_snapshot(error = TRUE, { vec_pany(1) }) # Arrays expect_snapshot(error = TRUE, { vec_pall(array(TRUE)) }) expect_snapshot(error = TRUE, { vec_pany(array(TRUE)) }) # Class expect_snapshot(error = TRUE, { vec_pall(structure(TRUE, class = "foo")) }) expect_snapshot(error = TRUE, { vec_pany(structure(TRUE, class = "foo")) }) }) test_that("no recycling is done", { expect_snapshot(error = TRUE, { vec_pall(TRUE, c(TRUE, TRUE, TRUE)) }) expect_snapshot(error = TRUE, { vec_pany(TRUE, c(TRUE, TRUE, TRUE)) }) # With `.size` expect_snapshot(error = TRUE, { vec_pall(TRUE, c(TRUE, TRUE, TRUE), .size = 3L) }) expect_snapshot(error = TRUE, { vec_pany(TRUE, c(TRUE, TRUE, TRUE), .size = 3L) }) }) test_that("validates `.missing`", { expect_snapshot(error = TRUE, vec_pall(.missing = c(TRUE, FALSE))) expect_snapshot(error = TRUE, vec_pany(.missing = c(TRUE, FALSE))) expect_snapshot(error = TRUE, vec_pall(.missing = 1)) expect_snapshot(error = TRUE, vec_pany(.missing = 1)) expect_snapshot(error = TRUE, vec_pall(.missing = NULL)) expect_snapshot(error = TRUE, vec_pany(.missing = NULL)) }) test_that("validates `.size`", { expect_snapshot(error = TRUE, vec_pall(.size = c(1, 2))) expect_snapshot(error = TRUE, vec_pany(.size = c(1, 2))) expect_snapshot(error = TRUE, vec_pall(.size = 1.5)) expect_snapshot(error = TRUE, vec_pany(.size = 1.5)) expect_snapshot(error = TRUE, vec_pall(.size = NA_integer_)) expect_snapshot(error = TRUE, vec_pany(.size = NA_integer_)) }) test_that("names are used in errors", { expect_snapshot(error = TRUE, { vec_pall(1.5, .arg = "x") }) expect_snapshot(error = TRUE, { vec_pall(a = 1.5, .arg = "x") }) expect_snapshot(error = TRUE, { x <- c(TRUE, FALSE) y <- logical() vec_pany(x, y) }) expect_snapshot(error = TRUE, { x <- c(TRUE, FALSE) y <- logical() vec_pany(a = x, b = y, .arg = "x", .error_call = quote(foo())) }) }) vctrs/tests/testthat/test-names.R0000644000176200001440000007575515072256373016655 0ustar liggesuserslocal_name_repair_quiet() # vec_names() --------------------------------------------------------- test_that("vec_names() retrieves names", { expect_null(vec_names(letters)) expect_identical(vec_names(set_names(letters)), letters) expect_identical(vec_names(mtcars), row.names(mtcars)) expect_null(vec_names(unrownames(mtcars))) expect_identical(vec_names(Titanic), dimnames(Titanic)[[1]]) x <- matrix(1L, dimnames = list("row", "col")) expect_identical(vec_names(x), dimnames(x)[[1]]) }) test_that("vec_names() dispatches", { local_methods( names.vctrs_foobar = function(x) "dispatched!" ) expect_identical(vec_names(foobar()), "dispatched!") }) # vec_names2() ------------------------------------------------------------- test_that("vec_names2() repairs names", { expect_identical(vec_names2(1:2), c("", "")) expect_identical(vec_names2(1:2, repair = "unique"), c("...1", "...2")) expect_identical( vec_names2(set_names(1:2, c("_foo", "_bar")), repair = "universal"), c("._foo", "._bar") ) }) test_that("vec_names2() treats data frames and arrays as vectors", { expect_identical(vec_names2(mtcars), row.names(mtcars)) expect_identical(vec_names2(as.matrix(mtcars)), row.names(mtcars)) df <- unrownames(mtcars) exp <- rep_len("", nrow(mtcars)) expect_identical(vec_names2(df), exp) expect_identical(vec_names2(as.matrix(df)), exp) }) test_that("vec_names2() accepts and checks repair function", { expect_identical( vec_names2(1:2, repair = function(nms) rep_along(nms, "foo")), c("foo", "foo") ) expect_error( vec_names2(1:2, repair = function(nms) "foo"), "length 1 instead of length 2" ) }) test_that("vec_names2() repairs names before invoking repair function", { x <- set_names(1:2, c(NA, NA)) expect_identical(vec_names2(x, repair = identity), c("", "")) }) test_that("vec_names2() result is correct for *_quiet repair", { expect_identical( vec_names2(1:2, repair = "unique"), vec_names2(1:2, repair = "unique_quiet") ) expect_identical( vec_names2(1:2, repair = "universal"), vec_names2(1:2, repair = "universal_quiet") ) }) # vec_as_names() ----------------------------------------------------------- test_that("vec_as_names() requires character vector", { expect_error(vec_as_names(NULL), "`names` must be a character vector") }) test_that("vec_as_names() validates `repair`", { expect_snapshot({ (expect_error(my_vec_as_names("x", my_repair = "foo"), "can't be \"foo\"")) (expect_error(my_vec_as_names(1, my_repair = 1), "string or a function")) }) }) test_that("vec_as_names() repairs names", { expect_identical(vec_as_names(chr(NA, NA)), c("", "")) expect_identical( vec_as_names(chr(NA, NA), repair = "unique"), c("...1", "...2") ) expect_identical( vec_as_names(chr("_foo", "_bar"), repair = "universal"), c("._foo", "._bar") ) expect_identical( vec_as_names(chr("a", "b"), repair = "check_unique"), c("a", "b") ) }) test_that("vec_as_names() checks unique names", { expect_snapshot({ (expect_error(my_vec_as_names(chr(NA), my_repair = "check_unique"))) (expect_error(my_vec_as_names(chr(""), my_repair = "check_unique"))) (expect_error(my_vec_as_names(chr("a", "a"), my_repair = "check_unique"))) (expect_error(my_vec_as_names(chr("..1"), my_repair = "check_unique"))) (expect_error(my_vec_as_names(chr("..."), my_repair = "check_unique"))) }) }) test_that("vec_as_names() result is correct for *_quiet repair", { expect_identical( vec_as_names(chr("_foo", "_bar"), repair = "unique"), vec_as_names(chr("_foo", "_bar"), repair = "unique_quiet") ) expect_identical( vec_as_names(chr("_foo", "_bar"), repair = "universal"), vec_as_names(chr("_foo", "_bar"), repair = "universal_quiet") ) }) test_that("vec_as_names() keeps the names of a named vector", { x_unnamed <- c(NA, "", "..1", "...2") x_names <- letters[1:4] x <- set_names(x_unnamed, x_names) expect_identical( set_names(vec_as_names(x_unnamed, repair = "minimal"), x_names), vec_as_names(x, repair = "minimal") ) expect_identical( set_names(vec_as_names(x_unnamed, repair = "unique"), x_names), vec_as_names(x, repair = "unique") ) expect_identical( set_names(vec_as_names(x_unnamed, repair = "universal"), x_names), vec_as_names(x, repair = "universal") ) }) test_that("vec_as_names() accepts and checks repair function", { f <- local({ local_obj <- "foo" ~ rep_along(.x, local_obj) }) expect_identical(vec_as_names(c("", ""), repair = f), c("foo", "foo")) expect_snapshot( error = TRUE, my_vec_as_names(c("", ""), my_repair = function(nms) "foo") ) }) test_that("vec_as_names() repairs names before invoking repair function", { expect_identical(vec_as_names(chr(NA, NA), repair = identity), c("", "")) }) test_that("vec_as_names() is noisy by default", { local_name_repair_verbose() expect_snapshot({ # Noisy name repair vec_as_names(c("x", "x"), repair = "unique") # Quiet name repair vec_as_names(c("x", "x"), repair = "unique", quiet = TRUE) # Hint at repair argument, if known (expect_error( my_vec_as_names(c("x", "x"), my_repair = "check_unique") )) # request quiet via name repair string, don't specify `quiet` vec_as_names(c("1", "1"), repair = "unique_quiet") vec_as_names(c("1", "1"), repair = "universal_quiet") # request quiet via name repair string, specify `quiet` = TRUE vec_as_names(c("1", "1"), repair = "unique_quiet", quiet = TRUE) vec_as_names(c("1", "1"), repair = "universal_quiet", quiet = TRUE) # request quiet via name repair string, specify `quiet` = FALSE vec_as_names(c("1", "1"), repair = "unique_quiet", quiet = FALSE) vec_as_names(c("1", "1"), repair = "universal_quiet", quiet = FALSE) }) }) test_that("validate_minimal_names() checks names", { expect_snapshot({ (expect_error(validate_minimal_names(1), "must return a character vector")) (expect_error(validate_minimal_names(NULL), "can't return `NULL`")) (expect_error(validate_minimal_names(chr(NA)), "can't return `NA` values")) }) }) test_that("validate_unique() checks unique names", { expect_snapshot({ (expect_error(validate_unique(chr(NA)), "`NA`")) (expect_error( validate_unique(chr("")), class = "vctrs_error_names_cannot_be_empty" )) (expect_error( validate_unique(chr("a", "a")), class = "vctrs_error_names_must_be_unique" )) (expect_error( validate_unique(chr("..1")), class = "vctrs_error_names_cannot_be_dot_dot" )) (expect_error( validate_unique(chr("...")), class = "vctrs_error_names_cannot_be_dot_dot" )) }) }) test_that("vec_as_names_validate() validates repair arguments", { expect_identical( validate_name_repair_arg(c("unique", "check_unique")), "unique" ) expect_identical( validate_name_repair_arg(~ toupper(.))(letters), LETTERS ) }) test_that("vec_as_names() is quiet when function is supplied (#1018)", { expect_silent( vctrs::vec_as_names( c("a", "b"), repair = function(x) paste0(x, "a"), quiet = FALSE ) ) }) test_that("vec_as_names() evaluates repair_arg lazily", { expect_silent(vec_as_names(letters, repair_arg = print("oof"))) }) # vec_repair_names() ------------------------------------------------------- test_that("vec_repair_names() repairs names", { expect_identical(vec_repair_names(1:2), set_names(1:2, c("", ""))) expect_identical( vec_repair_names(1:2, "unique"), set_names(1:2, c("...1", "...2")) ) expect_identical( vec_repair_names(set_names(1:2, c("_foo", "_bar")), "universal"), set_names(1:2, c("._foo", "._bar")) ) }) test_that("vec_repair_names() handles data frames and arrays", { df <- data.frame(x = 1:2) expect_identical(vec_repair_names(df), df) expect_identical(row.names(vec_repair_names(as.matrix(df))), c("", "")) expect_identical( row.names(vec_repair_names(as.matrix(df), "unique")), c("...1", "...2") ) }) # vec_set_names() ----------------------------------------------------------- test_that("vec_set_names() sets atomic names", { x <- 1:2 names <- c("x1", "x2") exp <- set_names(x, names) expect_equal(vec_set_names(x, names), exp) }) test_that("vec_set_names() sets matrix/array names", { x <- matrix(1:2) names <- c("x1", "x2") exp <- x rownames(exp) <- names expect_equal(vec_set_names(x, names), exp) y <- array(1:4, dim = c(2, 1, 2)) exp <- y rownames(exp) <- names expect_equal(vec_set_names(y, names), exp) }) test_that("vec_set_names() doesn't alter names", { x <- matrix(1, dimnames = list(rows = "a", cols = "x")) vec_set_names(x, "y") expect_equal(vec_names2(x), "a") expect_equal(colnames(x), "x") vec_set_names(x, NULL) expect_equal(vec_names2(x), "a") expect_equal(colnames(x), "x") y <- array( 1:4, dim = c(1, 2, 2), dimnames = list(rows = "a", one = 1:2, two = 1:2) ) vec_set_names(y, "y") expect_equal(vec_names2(y), "a") vec_set_names(y, NULL) expect_equal(vec_names2(y), "a") }) test_that("vec_set_names() sets row names on data frames", { expect_identical( vec_set_names(data_frame(x = 1), "foo"), new_data_frame(list(x = 1), row.names = "foo") ) expect_identical( vec_set_names(data_frame(x = 1:2), c("foo", "foo")), new_data_frame(list(x = 1:2), row.names = c("foo...1", "foo...2")) ) }) test_that("vec_set_names() correctly sets names on POSIXlt objects", { x <- as.POSIXlt(new_datetime(0)) exp <- set_names(x, "a") expect_equal(vec_set_names(x, "a"), exp) }) test_that("vec_set_names() falls back to `names<-` with proxied objects", { x <- structure(1, class = "foobar") exp <- set_names(x, "a") expect_equal(vec_set_names(x, "a"), exp) local_methods(`names<-.foobar` = function(x, value) "fallback!") expect_equal(vec_set_names(x, "a"), "fallback!") }) test_that("vec_set_names() falls back to `rownames<-` with shaped proxied objects", { x <- structure(1:2, dim = c(2L, 1L), class = "foobar") names <- c("r1", "r2") exp <- x rownames(exp) <- names expect_equal(vec_set_names(x, names), exp) # `rownames<-` is not generic, but eventually calls `dimnames<-` which is local_methods(`dimnames<-.foobar` = function(x, value) "fallback!") expect_equal(vec_set_names(x, names), "fallback!") }) test_that("vec_set_names() can set NULL names", { x <- 1:2 expect_equal(vec_set_names(x, NULL), x) x_named <- set_names(x) expect_equal(vec_set_names(x_named, NULL), x) x_mat <- as.matrix(x) expect_equal(vec_set_names(x_mat, NULL), x_mat) x_mat_named <- x_mat rownames(x_mat_named) <- c("1", "2") exp <- matrix(x_mat, dimnames = list(NULL, NULL)) expect_equal(vec_set_names(x_mat_named, NULL), exp) }) test_that("vec_set_names() errors with bad `names`", { expect_snapshot({ (expect_error(vec_set_names(1, 1), "character vector, not a double")) (expect_error(vec_set_names(1, c("x", "y")), "The size of `names`, 2")) }) }) test_that("vec_names() and vec_set_names() work with 1-dimensional arrays", { x <- array(1:2, dimnames = list(c("a", "b"))) expect_identical(vec_names(x), c("a", "b")) expect_identical(vec_names(vec_set_names(x, c("A", "B"))), c("A", "B")) }) test_that("vec_set_names() is consistent with `names<-` regarding `NULL` inputs", { # See also https://github.com/tidyverse/purrr/pull/1224 # Can "clear" names on `NULL` expect_identical(`names<-`(NULL, NULL), NULL) expect_identical(vec_set_names(NULL, NULL), NULL) # But trying to make a "named `NULL`" is an error # (Don't capture the message, these are base R errors) expect_error(`names<-`(NULL, character())) expect_error(vec_set_names(NULL, character())) # This is more obviously an error, because the size of the names doesn't match expect_error(`names<-`(NULL, "x")) expect_error(vec_set_names(NULL, "x")) }) # minimal names ------------------------------------------------------------- test_that("minimal names are made from `n` when `name = NULL`", { expect_identical(minimal_names(1:2), c("", "")) }) test_that("as_minimal_names() checks input", { expect_error(as_minimal_names(1:3), "must be a character vector") }) test_that("minimal names have '' instead of NAs", { expect_identical(as_minimal_names(c("", NA, "", NA)), c("", "", "", "")) }) test_that("repairing minimal names copes with NULL input names", { x <- 1:3 x_named <- vec_repair_names(x) expect_equal(names(x_named), rep("", 3)) }) test_that("as_minimal_names() is idempotent", { x <- c("", "", NA) expect_identical(as_minimal_names(x), as_minimal_names(as_minimal_names(x))) }) test_that("minimal_names() treats data frames and arrays as vectors", { expect_identical(minimal_names(mtcars), row.names(mtcars)) expect_identical(minimal_names(as.matrix(mtcars)), row.names(mtcars)) df <- unrownames(mtcars) exp <- rep_len("", nrow(mtcars)) expect_identical(minimal_names(df), exp) expect_identical(minimal_names(as.matrix(df)), exp) }) test_that("as_minimal_names() copies on write", { nms <- chr(NA, NA) as_minimal_names(nms) expect_identical(nms, chr(NA, NA)) nms <- c("a", "b") out <- as_minimal_names(nms) expect_true(is_reference(nms, out)) }) # unique names ------------------------------------------------------------- test_that("unique_names() handles unnamed vectors", { expect_identical(unique_names(1:3), c("...1", "...2", "...3")) }) test_that("as_unique_names() is a no-op when no repairs are needed", { x <- c("x", "y") out <- as_unique_names(x) expect_true(is_reference(out, x)) expect_identical(out, c("x", "y")) }) test_that("as_unique_names() eliminates emptiness and duplication", { x <- c("", "x", "y", "x") expect_identical(as_unique_names(x), c("...1", "x...2", "y", "x...4")) }) test_that("as_unique_names(): solo empty or NA gets suffix", { expect_identical(as_unique_names(""), "...1") expect_identical(as_unique_names(NA_character_), "...1") }) test_that("as_unique_names() treats ellipsis like empty string", { expect_identical(as_unique_names("..."), as_unique_names("")) }) test_that("two_three_dots() does its job and no more", { x <- c(".", ".1", "...1", "..1a") expect_identical(two_to_three_dots(x), x) expect_identical(two_to_three_dots(c("..1", "..22")), c("...1", "...22")) }) test_that("two dots then number treated like three dots then number", { expect_identical(as_unique_names("..2"), as_unique_names("...5")) }) test_that("as_unique_names() strips positional suffixes, re-applies as needed", { x <- c("...20", "a...1", "b", "", "a...2...34") expect_identical(as_unique_names(x), c("...1", "a...2", "b", "...4", "a...5")) expect_identical(as_unique_names("a...1"), "a") expect_identical(as_unique_names(c("a...2", "a")), c("a...1", "a...2")) expect_identical( as_unique_names(c("a...3", "a", "a")), c("a...1", "a...2", "a...3") ) expect_identical( as_unique_names(c("a...2", "a", "a")), c("a...1", "a...2", "a...3") ) expect_identical( as_unique_names(c("a...2", "a...2", "a...2")), c("a...1", "a...2", "a...3") ) }) test_that("as_unique_names() is idempotent", { x <- c("...20", "a...1", "b", "", "a...2") expect_identical(as_unique_names(!!x), as_unique_names(as_unique_names(!!x))) }) test_that("unique-ification has an 'algebraic'-y property", { ## inspired by, but different from, this guarantee about base::make.unique() ## make.unique(c(A, B)) == make.unique(c(make.unique(A), B)) ## If A is already unique, then make.unique(c(A, B)) preserves A. ## I haven't formulated what we guarantee very well yet, but it's probably ## implicit in this test (?) x <- c("...20", "a...1", "b", "", "a...2", "d") y <- c("", "a...3", "b", "...3", "e") ## fix names on each, catenate, fix the whole z1 <- as_unique_names( c( as_unique_names(x), as_unique_names(y) ) ) ## fix names on x, catenate, fix the whole z2 <- as_unique_names( c( as_unique_names(x), y ) ) ## fix names on y, catenate, fix the whole z3 <- as_unique_names( c( x, as_unique_names(y) ) ) ## catenate, fix the whole z4 <- as_unique_names( c( x, y ) ) expect_identical(z1, z2) expect_identical(z1, z3) expect_identical(z1, z4) }) test_that("unique_names() and as_unique_names() are verbose or silent", { local_name_repair_verbose() expect_snapshot(unique_names(1:2)) expect_snapshot(as_unique_names(c("", ""))) expect_message(regexp = NA, unique_names(1:2, quiet = TRUE)) expect_message(regexp = NA, as_unique_names(c("", ""), quiet = TRUE)) }) test_that("names with only duplicates are repaired", { expect_identical(unique_names(list(x = NA, x = NA)), c("x...1", "x...2")) }) # Universal names ---------------------------------------------------------- test_that("zero-length input", { expect_equal(as_universal_names(character()), character()) }) test_that("universal names are not changed", { expect_equal(as_universal_names(letters), letters) }) test_that("as_universal_names() is idempotent", { x <- c(NA, "", "x", "x", "a1:", "_x_y}") expect_identical( as_universal_names(x), as_universal_names(as_universal_names(x)) ) }) test_that("dupes get a suffix", { expect_equal( as_universal_names(c("a", "b", "a", "c", "b")), c("a...1", "b...2", "a...3", "c", "b...5") ) }) test_that("as_universal_names(): solo empty or NA gets suffix", { expect_identical(as_universal_names(""), "...1") expect_identical(as_universal_names(NA_character_), "...1") }) test_that("as_universal_names() treats ellipsis like empty string", { expect_identical(as_universal_names("..."), as_universal_names("")) }) test_that("solo dot is unchanged", { expect_equal(as_universal_names("."), ".") }) test_that("dot, dot gets suffix", { expect_equal(as_universal_names(c(".", ".")), c("....1", "....2")) }) test_that("dot-dot, dot-dot gets suffix", { expect_equal(as_universal_names(c("..", "..")), c(".....1", ".....2")) }) test_that("empty, dot becomes suffix, dot", { expect_equal(as_universal_names(c("", ".")), c("...1", ".")) }) test_that("empty, empty, dot becomes suffix, suffix, dot", { expect_equal(as_universal_names(c("", "", ".")), c("...1", "...2", ".")) }) test_that("dot, dot, empty becomes suffix, suffix, suffix", { expect_equal(as_universal_names(c(".", ".", "")), c("....1", "....2", "...3")) }) test_that("dot, empty, dot becomes suffix, suffix, suffix", { expect_equal(as_universal_names(c(".", "", ".")), c("....1", "...2", "....3")) }) test_that("empty, dot, empty becomes suffix, dot, suffix", { expect_equal(as_universal_names(c("", ".", "")), c("...1", ".", "...3")) }) test_that("'...j' gets stripped then names are modified", { expect_equal(as_universal_names(c("...6", "...1...2")), c("...1", "...2")) expect_equal(as_universal_names("if...2"), ".if") }) test_that("complicated inputs", { expect_equal( as_universal_names(c( "", ".", NA, "if...4", "if", "if...8", "for", "if){]1" )), c("...1", ".", "...3", ".if...4", ".if...5", ".if...6", ".for", "if...1") ) }) test_that("message", { local_name_repair_verbose() expect_snapshot(as_universal_names(c("a b", "b c"))) }) test_that("quiet", { expect_message( as_universal_names("", quiet = TRUE), NA ) }) test_that("unique then universal is universal, with shuffling", { x <- c("", ".2", "..3", "...4", "....5", ".....6", "......7", "...") expect_identical( as_universal_names(as_unique_names(x)), as_universal_names(x) ) x2 <- x[c(7L, 4L, 3L, 6L, 5L, 1L, 2L, 8L)] expect_identical( as_universal_names(as_unique_names(x2)), as_universal_names(x2) ) x3 <- x[c(3L, 2L, 4L, 6L, 8L, 1L, 5L, 7L)] expect_identical( as_universal_names(as_unique_names(x3)), as_universal_names(x3) ) }) test_that("zero-length inputs given character names", { out <- vec_repair_names(character(), "universal") expect_equal(names(out), character()) }) test_that("unnamed input gives uniquely named output", { out <- vec_repair_names(1:3, "universal") expect_equal(names(out), c("...1", "...2", "...3")) }) test_that("messages by default", { local_name_repair_verbose() expect_snapshot(vec_repair_names(set_names(1, "a:b"), "universal")) expect_snapshot(vec_repair_names(set_names(1, "a:b"), ~ make.names(.))) }) test_that("quiet = TRUE", { expect_message( vec_repair_names(set_names(1, ""), "universal", quiet = TRUE), NA ) }) test_that("non-universal names", { out <- vec_repair_names(set_names(1, "a b"), "universal") expect_equal(names(out), "a.b") expect_equal(as_universal_names("a b"), "a.b") }) # make_syntactic() --------------------------------------------------------- test_that("make_syntactic(): empty or NA", { expect_syntactic( c("", NA_character_), c(".", ".") ) }) test_that("make_syntactic(): reserved words", { expect_syntactic( c("if", "TRUE", "Inf", "NA_real_", "normal"), c(".if", ".TRUE", ".Inf", ".NA_real_", "normal") ) }) test_that("make_syntactic(): underscore", { expect_syntactic( c("_", "_1", "_a}"), c("._", "._1", "._a.") ) }) test_that("make_syntactic(): dots", { expect_syntactic( c(".", "..", "...", "...."), c(".", "..", "....", "....") ) }) test_that("make_syntactic(): number", { expect_syntactic( c("0", "1", "22", "333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): number then character", { expect_syntactic( c("0a", "1b", "22c", "333d"), c("..0a", "..1b", "..22c", "..333d") ) }) test_that("make_syntactic(): number then non-character", { expect_syntactic( c("0)", "1&", "22*", "333@"), c("..0.", "..1.", "..22.", "..333.") ) }) test_that("make_syntactic(): dot then number", { expect_syntactic( c(".0", ".1", ".22", ".333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): dot then number then character", { expect_syntactic( c(".0a", ".1b", ".22c", ".333d"), c("..0a", "..1b", "..22c", "..333d") ) }) test_that("make_syntactic(): dot then number then non-character", { expect_syntactic( c(".0)", ".1&", ".22*", ".333@"), c("..0.", "..1.", "..22.", "..333.") ) }) test_that("make_syntactic(): dot dot then number", { expect_syntactic( c("..0", "..1", "..22", "..333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): dot dot dot then number", { expect_syntactic( c("...0", "...1", "...22", "...333"), c("...0", "...1", "...22", "...333") ) }) test_that("make_syntactic(): dot dot dot dot then number", { expect_syntactic( c("....0", "....1", "....22", "....333"), c("....0", "....1", "....22", "....333") ) }) test_that("make_syntactic(): dot dot dot dot dot then number", { expect_syntactic( c(".....0", ".....1", ".....22", ".....333"), c(".....0", ".....1", ".....22", ".....333") ) }) test_that("make_syntactic(): dot dot then number then character", { expect_syntactic( c("..0a", "..1b", "..22c", "..333d"), c("..0a", "..1b", "..22c", "..333d") ) }) test_that("make_syntactic(): dot dot then number then non-character", { expect_syntactic( c("..0)", "..1&", "..22*", "..333@"), c("..0.", "..1.", "..22.", "..333.") ) }) # Duplication -------------------------------------------------------------- test_that("Minimal name repair duplicates if needed", { x1 <- NA_character_ x3 <- c(x1, x1) # Called to check absence of side effect vec_as_names(x3, repair = "minimal") expect_identical(x3, c(NA_character_, NA_character_)) }) test_that("Unique name repair duplicates if needed", { x1 <- "fa\u00e7ile" x3 <- c(x1, x1) # Called to check absence of side effect vec_as_names(x3, repair = "unique") expect_identical(x3, c("fa\u00e7ile", "fa\u00e7ile")) }) # Encoding ------------------------------------------------------------- test_that("Name repair works with non-UTF-8 names", { x1 <- "fa\u00e7ile" skip_if_not(Encoding(x1) == "UTF-8") x2 <- iconv(x1, from = "UTF-8", to = "latin1") skip_if_not(Encoding(x2) == "latin1") x3 <- c(x2, x2) expect_equal(vec_as_names(x3, repair = "unique"), paste0(x3, "...", 1:2)) }) # Conditions ----------------------------------------------------------- test_that("names cannot be empty", { expect_error_cnd( stop_names_cannot_be_empty(c("", "")), class = c( "vctrs_error_names_cannot_be_empty", "vctrs_error_names", "vctrs_error" ), message = "Names can't be empty.", names = c("", "") ) }) test_that("names cannot be dot dot", { expect_error_cnd( stop_names_cannot_be_dot_dot(c("..1", "..2")), class = c( "vctrs_error_names_cannot_be_dot_dot", "vctrs_error_names", "vctrs_error" ), message = "Names can't be of the form `...` or `..j`.", names = c("..1", "..2") ) }) test_that("names must be unique", { expect_error_cnd( stop_names_must_be_unique(c("x", "y", "y", "x")), class = c( "vctrs_error_names_must_be_unique", "vctrs_error_names", "vctrs_error" ), message = "Names must be unique.", names = c("x", "y", "y", "x") ) }) # Legacy repair -------------------------------------------------------- test_that("vec_as_names_legacy() works", { expect_identical(vec_as_names_legacy(chr()), chr()) expect_identical( vec_as_names_legacy(c("a", "a", "", "")), c("a", "a1", "V1", "V2") ) expect_identical( vec_as_names_legacy(c("a", "a", "", ""), sep = "_"), c("a", "a_1", "V_1", "V_2") ) expect_identical( vec_as_names_legacy(c("a", "a", "", ""), prefix = "foo"), c("a", "a1", "foo1", "foo2") ) expect_identical( vec_as_names_legacy(c("a", "a", "", ""), prefix = "foo", sep = "_"), c("a", "a_1", "foo_1", "foo_2") ) # From tibble expect_identical(vec_as_names_legacy(c("x", "x")), c("x", "x1")) expect_identical(vec_as_names_legacy(c("", "")), c("V1", "V2")) expect_identical(vec_as_names_legacy(c("", "V1")), c("V2", "V1")) expect_identical(vec_as_names_legacy(c("", "V", "V")), c("V2", "V", "V1")) }) # Name specification --------------------------------------------------- test_that("NULL name specs works with scalars", { expect_identical(apply_name_spec(NULL, "foo", NULL, 1L), "foo") expect_named(vec_c(foo = 1), "foo") expect_identical(apply_name_spec(NULL, "foo", chr(), 0L), chr()) expect_equal(vec_c(foo = dbl()), set_names(dbl(), "")) expect_named(vec_c(foo = set_names(dbl())), chr()) expect_named(vec_c(foo = set_names(dbl()), bar = set_names(dbl())), chr()) expect_error( apply_name_spec(NULL, "foo", c("a", "b")), "vector of length > 1" ) expect_error(apply_name_spec(NULL, "foo", NULL, 2L), "vector of length > 1") expect_snapshot({ (expect_error(vec_c(foo = c(a = 1, b = 2)), "vector of length > 1")) (expect_error(vec_c(foo = 1:2), "vector of length > 1")) (expect_error(vec_c(x = c(xx = 1)), "named vector")) }) }) test_that("function name spec is applied", { spec <- function(outer, inner) { sep <- if (is_character(inner)) "_" else ":" paste0(outer, sep, inner) } expect_identical(apply_name_spec(spec, "foo", NULL, 1L), "foo") expect_named(vec_c(foo = 1, .name_spec = spec), "foo") expect_identical( apply_name_spec(spec, "foo", c("a", "b")), c("foo_a", "foo_b") ) expect_named( vec_c(foo = c(a = 1, b = 2), .name_spec = spec), c("foo_a", "foo_b") ) expect_identical(apply_name_spec(spec, "foo", NULL, 2L), c("foo:1", "foo:2")) expect_named(vec_c(foo = 1:2, .name_spec = spec), c("foo:1", "foo:2")) }) test_that("can pass lambda formula as name spec", { expect_named( vec_c(foo = c(a = 1, b = 2), .name_spec = ~ paste(.x, .y, sep = "_")), c("foo_a", "foo_b") ) expect_error( vec_c(foo = c(a = 1, b = 2), .name_spec = env()), "Can't convert `.name_spec`", fixed = TRUE ) }) test_that("can pass glue string as name spec", { expect_named( vec_c(foo = c(a = 1, b = 2), .name_spec = "{outer}_{inner}"), c("foo_a", "foo_b") ) expect_named( vec_c(foo = 1:2, .name_spec = "{outer}_{inner}"), c("foo_1", "foo_2") ) expect_error( vec_c(foo = c(a = 1, b = 2), .name_spec = c("a", "b")), "single string" ) }) test_that("can pass 'inner' string as name spec", { expect_named(vec_c(foo = c(a = 1, b = 2), .name_spec = "inner"), c("a", "b")) expect_named(vec_c(foo = 1:2, .name_spec = "inner"), NULL) expect_named( list_unchop( list(outer = c(a = 1)), indices = list(1:2), name_spec = "inner" ), c("a", "a") ) }) test_that("`outer` is recycled after name spec is invoked in functions that use `apply_name_spec()`", { expect_identical( vec_c(outer = 1:2, .name_spec = "{outer}"), c(outer = 1L, outer = 2L) ) expect_identical( vec_rbind( outer = data_frame(x = 1:2), .names_to = NULL, .name_spec = "{outer}" ), new_data_frame(list(x = 1:2), row.names = c("outer...1", "outer...2")) ) expect_identical( list_unchop( list(outer = c(a = 1)), indices = list(1:2), name_spec = "{outer}" ), c(outer = 1, outer = 1) ) expect_identical( list_unchop( list(outer = c(a = 1)), indices = list(1:2), name_spec = "{outer}_{inner}" ), c(outer_a = 1, outer_a = 1) ) # Note that `apply_name_spec()` itself doesn't recycle, it expects the caller # to do so expect_identical( unstructure(apply_name_spec("{outer}", "outer", NULL, n = 2L)), "outer" ) }) test_that("apply_name_spec() doesn't recycle inputs (#1099)", { # We used to recycle the output for the caller, but now we check that the # output is recyclable and just return it even if it is size 1, expecting the # caller to be able to handle it, possibly efficiently with `chr_assign()`. out <- unstructure(apply_name_spec("foo", "outer", c("a", "b", "c"))) expect_identical(out, "foo") inner <- NULL outer <- NULL spec <- function(outer, inner) { inner <<- inner outer <<- outer } apply_name_spec(spec, "outer", c("a", "b", "c")) expect_identical(inner, c("a", "b", "c")) expect_identical(outer, "outer") apply_name_spec(spec, "outer", "a", n = 3L) expect_identical(inner, "a") expect_identical(outer, "outer") }) test_that("apply_name_spec() checks recyclability of output", { # These are recyclable expect_identical( apply_name_spec(function(...) c("a", "b"), "outer", "inner", n = 2L), c("a", "b") ) expect_identical( apply_name_spec(function(...) "a", "outer", "inner", n = 2L), "a" ) # This is not expect_snapshot(error = TRUE, { apply_name_spec(function(...) c("a", "b", "c"), "outer", "inner", n = 2L) }) }) test_that("r_chr_paste_prefix() works", { nms <- c("foo", "bar") expect_equal( .Call(ffi_chr_paste_prefix, nms, "baz", "."), c("baz.foo", "baz.bar") ) # Greater than `VCTRS_PASTE_BUFFER_MAX_SIZE` long_prefix <- strrep("a", 5000) expect_equal( .Call(ffi_chr_paste_prefix, nms, long_prefix, "."), paste0(long_prefix, ".", nms) ) }) test_that("vec_as_names() uses internal error if `repair_arg` is not supplied", { expect_snapshot({ (expect_error(vec_as_names("", repair = "foobar", call = quote(tilt())))) (expect_error(vec_as_names("", repair = env(), call = quote(tilt())))) }) }) vctrs/tests/testthat/test-shape.R0000644000176200001440000000772515065005761016635 0ustar liggesusers# common shape ------------------------------------------------------------ test_that("vec_shape2() applies recycling rules", { expect_equal(vec_shape2(shaped_int(1, 5, 5), shaped_int(1)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1), shaped_int(1, 5, 5)), c(0L, 5L, 5L)) expect_equal(vec_shape2(shaped_int(1, 1), shaped_int(1, 5, 5)), c(0L, 5L, 5L)) expect_equal( vec_shape2(shaped_int(1, 1, 1), shaped_int(1, 5, 5)), c(0L, 5L, 5L) ) expect_equal( vec_shape2(shaped_int(1, 1, 5), shaped_int(1, 5, 1)), c(0L, 5L, 5L) ) expect_equal( vec_shape2(shaped_int(1, 5, 1), shaped_int(1, 1, 5)), c(0L, 5L, 5L) ) expect_equal( vec_shape2(shaped_int(1, 1, 1), shaped_int(1, 5, 5)), c(0L, 5L, 5L) ) expect_equal( vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 1, 1)), c(0L, 0L, 5L) ) }) test_that("incompatible shapes throw errors", { expect_snapshot({ (expect_error( vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1)), class = "vctrs_error_incompatible_type" )) (expect_error( vec_shape2(shaped_int(1, 5, 0), shaped_int(1, 1, 5)), class = "vctrs_error_incompatible_type" )) }) }) test_that("can override error args", { expect_snapshot({ (expect_error( vec_shape2( shaped_int(1, 0, 5), shaped_int(1, 5, 1), x_arg = "foo", y_arg = "bar" ), class = "vctrs_error_incompatible_type" )) }) }) test_that("vec_shape2() evaluates arg lazily", { expect_silent(vec_shape2( shaped_int(1, 5, 5), shaped_int(1), x_arg = print("oof") )) expect_silent(vec_shape2( shaped_int(1, 5, 5), shaped_int(1), y_arg = print("oof") )) }) # broadcasting ------------------------------------------------------------- test_that("can broadcast to higher dimension, but not lower", { expect_identical(shape_broadcast_(1, NULL), 1) expect_null(shape_broadcast_(NULL, 1)) expect_equal( shape_broadcast_(1, shaped_int(0, 4)), array(1, c(1, 4)) ) expect_error( shape_broadcast_(shaped_int(1, 1, 1), shaped_int(4, 4)), class = "vctrs_error_incompatible_type" ) expect_error( shape_broadcast_(shaped_int(3, 2), shaped_int(3, 3)), class = "vctrs_error_incompatible_type" ) }) test_that("shape_broadcast_() applies recycling rules", { expect_equal( shape_broadcast_(array(1:4, c(1, 1, 4)), shaped_int(0, 4, 4))[1, , ], matrix(1:4, 4, 4, byrow = TRUE) ) expect_equal( shape_broadcast_(array(1:4, c(1, 4, 1)), shaped_int(0, 4, 4))[1, , ], matrix(1:4, 4, 4) ) expect_equal( shape_broadcast_(array(1L, c(1, 1)), shaped_int(1, 0)), matrix(integer(), nrow = 1) ) expect_error( shape_broadcast_(array(1L, c(1, 2)), shaped_int(1, 0)), "Non-recyclable dimensions", class = "vctrs_error_incompatible_type" ) expect_error( shape_broadcast_(array(1L, c(1, 0)), shaped_int(1, 1)), "Non-recyclable dimensions", class = "vctrs_error_incompatible_type" ) }) test_that("can combine shaped native classes (#1290, #1329)", { x <- new_datetime(c(1, 1e6)) dim(x) <- c(1, 2) out <- vec_c(x, x) expect_s3_class(out, c("POSIXct", "POSIXt")) expect_dim(out, c(2, 2)) y <- new_datetime(1:3 + 0.0) dim(y) <- c(1, 3) expect_snapshot(error = TRUE, vec_c(x, y)) d <- structure(Sys.Date(), dim = 1) expect_equal( vec_rbind(data.frame(d), data.frame(d)), data.frame(d = structure(rep(Sys.Date(), 2), dim = 2)) ) }) test_that("factor casts support shape", { x <- factor(c("x", "y", "z")) dim(x) <- c(3, 1) dimnames(x) <- list(c("r1", "r2", "r3"), "c1") y <- factor(c("w", "x", "y", "z")) dim(y) <- c(2, 2) exp <- factor( c("x", "y", "z", "x", "y", "z"), levels = c("w", "x", "y", "z") ) dim(exp) <- c(3, 2) dimnames(exp) <- list(c("r1", "r2", "r3"), c("c1", "c1")) expect_equal(vec_cast(x, y), exp) x <- factor(c("x", "y", "z")) dim(x) <- c(3, 1) y <- factor(c("x", "y", "z")) expect_snapshot(error = TRUE, vec_cast(x, y)) }) vctrs/tests/testthat/test-lifecycle-deprecated.R0000644000176200001440000000275515065005761021570 0ustar liggesuserstest_that("vec_as_index() still works", { local_options(lifecycle_verbosity = "quiet") expect_identical(vec_as_index(-2, 10), vec_as_location(-2, 10)) expect_identical( vec_as_index("cyl", length(mtcars), names(mtcars)), vec_as_location("cyl", length(mtcars), names(mtcars)) ) }) test_that("vec_repeat() still works", { local_options(lifecycle_verbosity = "quiet") expect_identical(vec_repeat(1:2, times = 2), vec_rep(1:2, 2)) expect_identical(vec_repeat(1:2, each = 2), vec_rep_each(1:2, 2)) }) test_that("vec_unchop() is soft-deprecated", { local_options(lifecycle_verbosity = "warning") expect_snapshot(vec_unchop(list(1), indices = list(1))) }) test_that("vec_unchop() still works", { local_options(lifecycle_verbosity = "quiet") expect_identical( vec_unchop(list(1L, 2:3), indices = list(2, c(3, 1))), c(3L, 1L, 2L) ) }) test_that("vec_equal_na() is soft-deprecated", { local_options(lifecycle_verbosity = "warning") expect_snapshot(vec_equal_na(c(1, NA))) }) test_that("vec_equal_na() still works", { local_options(lifecycle_verbosity = "quiet") expect_identical( vec_equal_na(c(1, NA, 2, NA)), c(FALSE, TRUE, FALSE, TRUE) ) }) test_that("vec_is_list() still works", { expect_false(vec_is_list(1)) expect_true(vec_is_list(list())) }) test_that("vec_check_list() still works", { my_check <- function(x) vec_check_list(x) expect_snapshot(error = TRUE, { vec_check_list(1) }) expect_snapshot(error = TRUE, { my_check(1) }) }) vctrs/tests/testthat/test-dictionary.R0000644000176200001440000002674115120273010017663 0ustar liggesusers# counting ---------------------------------------------------------------- test_that("vec_count counts number observations", { x <- vec_count(rep(1:3, 1:3), sort = "key") expect_equal(x, data.frame(key = 1:3, count = 1:3)) }) test_that("vec_count(sort = 'count') uses a stable sort when there are ties (#1588)", { x <- c("a", "b", "b", "a", "d") expect_identical( vec_count(x, sort = "count"), data_frame(key = c("a", "b", "d"), count = c(2L, 2L, 1L)) ) }) test_that("vec_count works with matrices", { x <- matrix(c(1, 1, 1, 2, 2, 1), c(3, 2)) out <- vec_count(x) exp <- data_frame(key = c(NA, NA), count = int(2L, 1L)) exp$key <- vec_slice(x, c(1, 3)) expect_identical(out, exp) }) test_that("vec_count works with arrays", { x <- array(c(rep(1, 3), rep(2, 3)), dim = c(3, 2, 1)) expect <- data.frame(key = NA, count = 3) expect$key <- vec_slice(x, 1L) expect_equal(vec_count(x), expect) }) test_that("vec_count works for zero-length input", { x <- vec_count(integer(), sort = "none") expect_equal(x, data.frame(key = integer(), count = integer())) }) test_that("vec_count works with different encodings", { x <- vec_count(encodings()) expect_equal(x, new_data_frame(list(key = encodings()[1], count = 3L))) }) test_that("vec_count recursively takes the equality proxy", { local_comparable_tuple() x <- tuple(c(1, 1, 2), 1:3) df <- data_frame(x = x) expect <- data_frame(key = vec_slice(df, c(1, 3)), count = c(2L, 1L)) expect_equal(vec_count(df), expect) }) # duplicates and uniques -------------------------------------------------- test_that("vec_duplicated reports on duplicates regardless of position", { x <- c(1, 1, 2, 3, 4, 4) expect_equal(vec_duplicate_detect(x), c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE)) }) test_that("vec_duplicate_any returns single TRUE/FALSE", { expect_false(vec_duplicate_any(c(1:10))) expect_true(vec_duplicate_any(c(1:10, 1))) }) test_that("vec_duplicate_id gives position of first found", { x <- c(1, 2, 3, 1, 4) expect_equal(vec_duplicate_id(x), c(1, 2, 3, 1, 5)) }) test_that("vec_unique matches unique", { x <- sample(100, 1000, replace = TRUE) expect_equal(vec_unique(x), unique(x)) expect_equal(vec_unique(c("x", "x")), "x") }) test_that("vec_unique matches unique for matrices", { x <- matrix(c(1, 1, 2, 2), ncol = 2) expect_equal(vec_unique(x), unique(x)) }) test_that("vec_unique_count matches length + unique", { x <- sample(100, 1000, replace = TRUE) expect_equal(vec_unique_count(x), length(unique(x))) }) test_that("also works for data frames", { df <- data.frame(x = 1:3, y = letters[3:1], stringsAsFactors = FALSE) idx <- c(1L, 1L, 1L, 2L, 2L, 3L) df2 <- df[idx, , drop = FALSE] rownames(df2) <- NULL expect_equal(vec_duplicate_detect(df2), vec_duplicate_detect(idx)) expect_equal(vec_unique(df2), vec_slice(df, vec_unique(idx))) count <- vec_count(df2, sort = "key") expect_equal(count$key, df) expect_equal(count$count, vec_count(idx)$count) exp <- tibble(x = c(1, 1, 2), y = c(1, 2, 3)) expect_identical(vec_unique(vec_slice(exp, c(1, 1, 2, 3))), exp) }) test_that("vec_unique() handles matrices (#327)", { x <- matrix(c(1, 2, 3, 4), c(2, 2)) y <- matrix(c(1, 2, 3, 5), c(2, 2)) expect_identical(vec_unique(list(x, x)), list(x)) expect_identical(vec_unique(list(x, y)), list(x, y)) x <- matrix(c(1, 2, 1, 1, 2, 1), nrow = 3) expect_identical(vec_unique(x), vec_slice(x, 1:2)) }) test_that("vec_unique() works with 1D arrays", { # 1D arrays are dispatched to `as.data.frame.vector()` which # currently does not strip dimensions. This caused an infinite # recursion. expect_identical(vec_unique(array(1:2)), array(1:2)) x <- new_vctr(c(1, 1, 1, 2, 1, 2), dim = c(3, 2)) expect_identical(vec_unique(x), new_vctr(c(1, 1, 2, 1), dim = c(2, 2))) }) test_that("unique functions take the equality proxy (#375)", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_true(vec_in(tuple(2, 100), x)) expect_identical(vec_match(tuple(2, 100), x), 2L) }) test_that("unique functions take the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 1, 2), 1:3) df <- data_frame(x = x) expect_equal(vec_unique(df), vec_slice(df, c(1, 3))) expect_equal(vec_unique_count(df), 2L) expect_equal(vec_unique_loc(df), c(1, 3)) }) test_that("duplicate functions take the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 1, 2), 1:3) df <- data_frame(x = x) expect_equal(vec_duplicate_any(df), TRUE) expect_equal(vec_duplicate_detect(df), c(TRUE, TRUE, FALSE)) expect_equal(vec_duplicate_id(df), c(1, 1, 3)) }) test_that("unique functions treat positive and negative 0 as equivalent (#637)", { expect_equal(vec_unique(c(0, -0)), 0) expect_equal(vec_unique_count(c(0, -0)), 1) expect_equal(vec_unique_loc(c(0, -0)), 1) }) test_that("unique functions work with different encodings", { encs <- encodings() expect_equal(vec_unique(encs), encs[1]) expect_equal(vec_unique_count(encs), 1L) expect_equal(vec_unique_loc(encs), 1L) }) test_that("unique functions can handle scalar types in lists", { x <- list(a ~ b, a ~ b, a ~ c) expect_equal(vec_unique(x), vec_slice(x, c(1, 3))) x <- list(call("x"), call("y"), call("x")) expect_equal(vec_unique(x), vec_slice(x, c(1, 2))) }) test_that("duplicate functions works with different encodings", { encs <- encodings() expect_equal(vec_duplicate_id(encs), rep(1, 3)) expect_equal(vec_duplicate_detect(encs), rep(TRUE, 3)) expect_equal(vec_duplicate_any(encs), TRUE) }) test_that("vec_unique() returns differently encoded strings in the order they appear", { encs <- encodings() x <- c(encs$unknown, encs$utf8) y <- c(encs$utf8, encs$unknown) expect_equal_encoding(vec_unique(x), encs$unknown) expect_equal_encoding(vec_unique(y), encs$utf8) }) test_that("vec_unique() works on lists containing expressions", { x <- list(expression(x), expression(y), expression(x)) expect_equal(vec_unique(x), x[1:2]) }) test_that("vec_unique() works with glm objects (#643)", { # class(model$family$initialize) == "expression" model <- glm(mpg ~ wt, data = mtcars) expect_equal(vec_unique(list(model, model)), list(model)) }) test_that("can take the unique locations of dfs with list-cols", { df <- tibble(x = list(1, 2, 1, 3), y = list(1, 2, 1, 3)) expect_identical(vec_unique_loc(df), c(1L, 2L, 4L)) }) # matching ---------------------------------------------------------------- test_that("vec_match() matches match()", { n <- c(1:3, NA) h <- c(4, 2, 1, NA) expect_equal(vec_match(n, h), match(n, h)) expect_equal(vec_match(1.5, c(2, 1.5, NA)), match(1.5, c(2, 1.5, NA))) expect_equal(vec_match("x", "x"), match("x", "x")) }) test_that("vec_match() and vec_in() check types", { expect_snapshot({ df1 <- data_frame(x = data_frame(foo = 1)) df2 <- data_frame(x = data_frame(foo = "")) (expect_error(vec_match(df1, df2), class = "vctrs_error_incompatible_type")) (expect_error( vec_match(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type" )) (expect_error(vec_in(df1, df2), class = "vctrs_error_incompatible_type")) (expect_error( vec_in(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type" )) }) }) test_that("vec_in() matches %in%", { n <- c(1:3, NA) h <- c(4, 2, 1, NA) expect_equal(vec_in(n, h), n %in% h) }) test_that("can opt out of NA matching", { n <- c(1, NA) h <- c(1:3, NA) expect_equal(vec_in(n, h, na_equal = FALSE), c(TRUE, NA)) }) test_that("vec_match works with empty data frame", { out <- vec_match( new_data_frame(n = 3L), new_data_frame(n = 0L) ) expect_equal(out, vec_init(integer(), 3)) }) test_that("matching functions take the equality proxy (#375)", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_identical(vec_unique_loc(x), 1:2) expect_identical(unique(x), tuple(c(1, 2), 1:2)) expect_true(vec_duplicate_any(x)) expect_identical(vec_duplicate_id(x), c(1L, 2L, 1L)) expect_identical(vec_unique_count(x), 2L) expect_identical(vec_duplicate_detect(x), c(TRUE, FALSE, TRUE)) }) test_that("can take the unique loc of 1d arrays (#461)", { x <- array(c(1, 1, 2, 2, 3)) y <- array(c(1, 1, 2, 2, 3), dimnames = list(NULL)) expect_identical(vctrs::vec_unique_loc(x), int(1, 3, 5)) expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5)) z <- array(c(1, 1, 2, 2, 3, 4), c(3, 2)) expect_silent(expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5))) }) test_that("matching functions work with different encodings", { encs <- encodings() expect_equal(vec_match(encs, encs[1]), rep(1, 3)) expect_equal(vec_in(encs, encs[1]), rep(TRUE, 3)) }) test_that("matching functions take the equality proxy recursively", { local_comparable_tuple() x <- tuple(c(1, 2), 1:2) df <- data_frame(x = x) y <- tuple(c(2, 3), c(3, 3)) df2 <- data_frame(x = y) expect_equal(vec_match(df, df2), c(NA, 1)) expect_equal(vec_in(df, df2), c(FALSE, TRUE)) }) test_that("can propagate missing values while matching", { exp <- c(NA, 3L, NA, 1L) expect_identical( vec_match(lgl(NA, TRUE, NA, FALSE), lgl(FALSE, NA, TRUE), na_equal = FALSE), exp ) expect_identical( vec_match(int(NA, 1L, NA, 2L), int(2L, NA, 1L), na_equal = FALSE), exp ) expect_identical( vec_match(dbl(NA, 1, NA, 2), dbl(2, NA, 1), na_equal = FALSE), exp ) expect_identical( vec_match(cpl(NA, 1, NA, 2), cpl(2, NA, 1), na_equal = FALSE), exp ) expect_identical( vec_match(chr(NA, "1", NA, "2"), chr("2", NA, "1"), na_equal = FALSE), exp ) expect_identical( vec_match(list(NULL, 1, NULL, 2), list(2, NULL, 1), na_equal = FALSE), exp ) # No missing values for raw vectors expect_identical( vec_match(raw2(0, 1, 0, 2), raw2(2, 0, 1), na_equal = FALSE), c(2L, 3L, 2L, 1L) ) }) test_that("can propagate missingness of incomplete rcrd observations (#1386)", { x <- new_rcrd(list(x = c(1, 1, NA, NA), y = c(1, NA, 1, NA))) expect_identical(vec_match(x, x, na_equal = FALSE), c(1L, NA, NA, NA)) # Matches `vec_detect_complete()` results expect_identical(vec_detect_complete(x), c(TRUE, FALSE, FALSE, FALSE)) }) test_that("can propagate NaN as a missing value (#1252)", { expect_identical( vec_match(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), int(NA, NA) ) expect_identical( vec_in(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), lgl(NA, NA) ) }) test_that("missing values are propagated across columns", { for (na_value in list(NA, na_int, na_dbl, na_cpl, na_chr, list(NULL))) { df <- data_frame(x = 1, y = data_frame(foo = 2, bar = na_value), z = 3) expect_identical(vec_match(df, df), 1L) expect_identical(vec_match(df, df, na_equal = FALSE), na_int) } }) test_that("can't supply NA as `na_equal`", { expect_error(vec_match(NA, NA, na_equal = NA), "single `TRUE` or `FALSE`") }) test_that("vec_match() and vec_in() silently fall back to base data frame", { expect_silent(expect_identical( vec_match(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), 1:32 )) expect_silent(expect_identical( vec_in(foobar(mtcars), foobar(tibble::as_tibble(mtcars))), rep(TRUE, 32) )) }) test_that("vec_in() evaluates arg lazily", { expect_silent(vec_in(1L, 1L, needles_arg = print("oof"))) expect_silent(vec_in(1L, 1L, haystack_arg = print("oof"))) }) test_that("vec_match() evaluates arg lazily", { expect_silent(vec_match(1L, 1L, needles_arg = print("oof"))) expect_silent(vec_match(1L, 1L, haystack_arg = print("oof"))) }) vctrs/tests/testthat/test-type-misc.R0000644000176200001440000001725215113335375017444 0ustar liggesuserstest_that("`numeric_version` is a vector (#723)", { x <- numeric_version("0.1.0") y <- numeric_version("0.2.0") z <- c(x, y) expect_true(vec_is(x)) expect_true(vec_equal(x, x)) expect_false(vec_equal(x, y)) expect_identical(vec_equal(y, z), c(FALSE, TRUE)) expect_identical(vec_unique(z), z) expect_identical(vec_unique(c(y, z, x)), z[2:1]) }) test_that("`numeric_version` falls back to base methods", { x <- utils::packageVersion("rlang") y <- utils::packageVersion("vctrs") z <- c(x, y) # `z` is a `list-of`-like type but slicing 1 element returns the # atomic type. To implement this in vctrs we'd need to provide a way # of customising the "wrapper" type for size > 1 vectors. expect_identical(vec_slice(z, 1:2), z) expect_identical(vec_slice(z, 1), x) expect_identical(vec_slice(z, 2), y) expect_identical(vec_c(x, y), z) }) test_that("`numeric_version` has an equality, comparison, and order proxy", { numeric_row <- function(...) { out <- list2(...) out <- map(out, as.integer) names(out) <- paste0("...", seq_len(8L)) new_data_frame(out, n = 1L) } x <- numeric_version(c("1.2-3", "1.21.1", "3", "2.21.0.9000", "0.5.01")) expect <- vec_rbind( numeric_row(1, 2, 3, 0, 0, 0, 0, 0), numeric_row(1, 21, 1, 0, 0, 0, 0, 0), numeric_row(3, 0, 0, 0, 0, 0, 0, 0), numeric_row(2, 21, 0, 9000, 0, 0, 0, 0), numeric_row(0, 5, 1, 0, 0, 0, 0, 0) ) expect_identical(vec_proxy_equal(x), expect) expect_identical(vec_proxy_compare(x), expect) expect_identical(vec_proxy_order(x), expect) }) test_that("`numeric_version` proxy works with empty vectors", { x <- numeric_version(character()) expect <- vec_rep(list(integer()), times = 8L) names(expect) <- paste0("...", seq_len(8L)) expect <- new_data_frame(expect, n = 0L) expect_identical(vec_proxy_equal(x), expect) }) test_that("`numeric_version` proxy handles pseudo-`NA`", { numeric_row <- function(...) { out <- list2(...) out <- map(out, as.integer) names(out) <- paste0("...", seq_len(8L)) new_data_frame(out, n = 1L) } x <- numeric_version(c("1_1", "1.2", NA), strict = FALSE) expect <- vec_rbind( numeric_row(NA, NA, NA, NA, NA, NA, NA, NA), numeric_row(1, 2, 0, 0, 0, 0, 0, 0), numeric_row(NA, NA, NA, NA, NA, NA, NA, NA) ) expect_identical(vec_proxy_equal(x), expect) expect_identical(vec_proxy_compare(x), expect) expect_identical(vec_proxy_order(x), expect) }) test_that("`numeric_version` works with functions using the equality proxy", { x <- numeric_version( c("1.2-3", "1.21.1", "1_1", "0.5", "1.3"), strict = FALSE ) y <- numeric_version( c("1.21.1", "1.21.1", "1_2", "0.05", "1_3"), strict = FALSE ) expect_identical(vec_unique(x), x) expect_identical(vec_unique(y), y[c(1, 3, 4)]) expect_identical(vec_detect_missing(y), c(FALSE, FALSE, TRUE, FALSE, TRUE)) expect_identical(vec_equal(x, y), c(FALSE, TRUE, NA, TRUE, NA)) expect_identical( vec_equal(x, y, na_equal = TRUE), c(FALSE, TRUE, TRUE, TRUE, FALSE) ) }) test_that("`numeric_version` works with functions using the comparison proxy", { x <- numeric_version( c("1.2-3", "1.21.1", "1_1", "0.5", "1.3"), strict = FALSE ) y <- numeric_version( c("1.21.1", "1.21.1", "1_2", "0.05", "1_3"), strict = FALSE ) expect_identical(vec_compare(x, y), c(-1L, 0L, NA, 0L, NA)) expect_identical(vec_compare(x, y, na_equal = TRUE), c(-1L, 0L, 0L, 0L, 1L)) # Specifically related to base R taking a joint proxy in `Ops.numeric_version` x <- numeric_version("3.3") y <- numeric_version("3.21") # `.encode_numeric_version(x) < .encode_numeric_version(y)` == FALSE # `x < y` == TRUE expect_identical(vec_compare(x, y), -1L) }) test_that("`numeric_version` works with functions using the order proxy (tidyverse/dplyr#6680)", { x <- numeric_version( c("1.2-3", "1.21.1", "1_1", "0.5", "1.30"), strict = FALSE ) y <- numeric_version( c("1.21.1", "1.21.1", "1_2", "0.05", "1_3"), strict = FALSE ) expect_identical(vec_order(y), c(4L, 1L, 2L, 3L, 5L)) expect_identical(vec_order_radix(y), c(4L, 1L, 2L, 3L, 5L)) expect_identical(vec_order(y, na_value = "smallest"), c(3L, 5L, 4L, 1L, 2L)) expect_identical( vec_order_radix(y, na_value = "smallest"), c(3L, 5L, 4L, 1L, 2L) ) expect_identical( vec_locate_matches(x, y), data_frame( needles = c(1L, 2L, 2L, 3L, 3L, 4L, 5L), haystack = c(NA, 1L, 2L, 3L, 5L, 4L, NA) ) ) expect_identical( vec_locate_matches(x, y, condition = "<"), data_frame( needles = c(1L, 1L, 2L, 3L, 4L, 4L, 5L), haystack = c(1L, 2L, NA, NA, 1L, 2L, NA) ) ) }) test_that("`numeric_version` proxy can handle at most 8 components", { x <- numeric_version("1.2.3.4.5.6.7.8") expect_silent(vec_proxy_equal(x)) x <- numeric_version("1.2.3.4.5.6.7.8.9") expect_snapshot(error = TRUE, { vec_proxy_equal(x) }) }) test_that("`numeric_version` can compare against components with 8 components", { x <- numeric_version("2.3.4.5.6.7.8.9") y <- c(x, numeric_version(c("1.1", "11.2", "2.1"))) expect_identical(vec_compare(x, y), c(0L, 1L, -1L, 1L)) }) test_that("`package_version` and `R_system_version` use the `numeric_version` proxy", { x <- numeric_version("1.5.6") y <- package_version("1.5.6") z <- R_system_version("1.5.6") expect_identical(vec_proxy_equal(y), vec_proxy_equal(x)) expect_identical(vec_proxy_equal(z), vec_proxy_equal(x)) }) test_that("can slice `ts` vectors", { x <- ts(1:3) expect_identical(vec_ptype(x), x[0]) expect_identical(vec_slice(x, 2), x[2]) }) test_that("can concatenate `ts` vectors", { x <- ts(1:3) expect_identical(vec_c(x, x), c(x, x)) df <- data_frame(x = x) expect_identical(vec_rbind(df, df), data_frame(x = c(x, x))) }) test_that("`omit` class is numeric (#1160)", { x <- c(NA, 1:3, NA) omit <- attr(na.omit(x), "na.action") expect_identical( vec_ptype_common(omit, omit), structure(int(), class = "omit") ) expect_identical(vec_ptype_common(1.5, omit), dbl()) expect_identical(vec_ptype_common(omit, 1L), int()) expect_identical(vec_cast_common(omit, omit), list(omit, omit)) expect_identical(vec_cast_common(omit, 1L), list(unstructure(omit), 1L)) expect_identical( vec_cast_common(1.5, omit), list(1.5, unstructure(as.double(omit))) ) expect_error(vec_cast(1L, omit), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1.0, omit), class = "vctrs_error_incompatible_type") expect_identical(vec_slice(omit, 1), structure(1L, class = "omit")) expect_identical( vec_c(omit, omit), structure(c(1L, 5L, 1L, 5L), class = "omit") ) expect_identical(vec_c(omit, omit, 10L), c(1L, 5L, 1L, 5L, 10L)) expect_identical(vec_slice(x, omit), x[omit]) }) test_that("`exclude` class is numeric (#1160)", { x <- c(NA, 1:3, NA) exc <- attr(na.exclude(x), "na.action") expect_identical( vec_ptype_common(exc, exc), structure(int(), class = "exclude") ) expect_identical(vec_ptype_common(1.5, exc), dbl()) expect_identical(vec_ptype_common(exc, 1L), int()) expect_identical(vec_cast_common(exc, exc), list(exc, exc)) expect_identical(vec_cast_common(exc, 1L), list(unstructure(exc), 1L)) expect_identical( vec_cast_common(1.5, exc), list(1.5, unstructure(as.double(exc))) ) expect_error(vec_cast(1L, exc), class = "vctrs_error_incompatible_type") expect_error(vec_cast(1.0, exc), class = "vctrs_error_incompatible_type") expect_identical(vec_slice(exc, 1), structure(1L, class = "exclude")) expect_identical( vec_c(exc, exc), structure(c(1L, 5L, 1L, 5L), class = "exclude") ) expect_identical(vec_c(exc, exc, 10L), c(1L, 5L, 1L, 5L, 10L)) expect_identical(vec_slice(x, exc), x[exc]) }) vctrs/tests/testthat/test-type-tibble.R0000644000176200001440000000646215065005761017752 0ustar liggesuserstest_that("tibble beats data frame", { df <- new_data_frame() dt <- tibble::tibble() expect_s3_class(vec_ptype_common(dt, df), "tbl_df") expect_s3_class(vec_ptype_common(df, dt), "tbl_df") }) test_that("can cast tibble to df and vice versa", { df <- new_data_frame() tib <- tibble::tibble() expect_identical(vec_cast(df, tib), tib) expect_identical(vec_cast(tib, df), df) expect_identical(vec_cast(tib, tib), tib) }) test_that("can't cast vector to tibble", { dt <- tibble::tibble() v <- logical() expect_snapshot({ local_error_call(call("my_function")) (expect_error(vec_ptype2(v, dt), class = "vctrs_error_incompatible_type")) (expect_error(vec_ptype2(dt, v), class = "vctrs_error_incompatible_type")) (expect_error(vec_cast(v, dt), class = "vctrs_error_incompatible_type")) }) }) test_that("casting to and from tibble preserves row names", { out <- vec_cast(mtcars, tibble::as_tibble(mtcars)) expect_identical(row.names(out), row.names(mtcars)) out <- vec_cast(out, unrownames(mtcars)) expect_identical(row.names(out), row.names(mtcars)) }) test_that("no common type between list and tibble", { dt <- tibble::tibble() l <- list() expect_error(vec_ptype2(l, dt), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(dt, l), class = "vctrs_error_incompatible_type") }) test_that("vec_restore restores tibbles", { df1 <- tibble::tibble(x = 1:4) df2 <- vec_restore(vec_data(df1), df1) expect_s3_class(df2, "tbl_df") }) test_that("the type of a tibble with an unspecified column retains unspecifiedness", { df1 <- tibble::tibble(x = 1, y = NA) df2 <- tibble::tibble(x = 1, y = unspecified(1)) expect <- tibble::tibble(x = numeric(), y = unspecified()) expect_identical(vec_ptype(df1), expect) expect_identical(vec_ptype(df2), expect) }) test_that("vec_ptype_finalise() works recursively over tibbles", { df <- tibble(x = numeric(), y = unspecified()) expect <- tibble(x = numeric(), y = logical()) expect_identical(vec_ptype_finalise(df), expect) }) test_that("vec_ptype_finalise() can handle tibble df columns", { df <- tibble(x = numeric(), y = tibble(z = unspecified())) expect <- tibble(x = numeric(), y = tibble(z = logical())) expect_identical(vec_ptype_finalise(df), expect) }) test_that("can use ptype2 and cast with tibble that has incorrect class vector", { tib1 <- structure(data.frame(x = 1), class = c("tbl_df", "data.frame")) tib2 <- structure(data.frame(y = 2), class = c("tbl_df", "data.frame")) exp <- structure( data.frame(x = dbl(), y = dbl()), class = c("tbl_df", "data.frame") ) requireNamespace("tibble") expect_identical( vec_ptype_common(tib1, tib2), exp ) expect_identical( vec_ptype_common(tib1, data.frame(y = 2)), tibble::new_tibble(exp, nrow = nrow(exp)) ) expect_identical( vec_ptype_common(data.frame(x = 1), tib2), tibble::new_tibble(exp, nrow = nrow(exp)) ) expect_identical( vec_cast(tib1, tib1), tib1 ) expect_snapshot({ local_error_call(call("my_function")) (expect_error( vec_cast(tib1, tib2), class = "vctrs_error_cast" )) (expect_error( vec_cast(tib1, data.frame(y = 2)), class = "vctrs_error_cast" )) (expect_error( vec_cast(data.frame(x = 1), tib2), class = "vctrs_error_cast" )) }) }) vctrs/tests/testthat/helper-list-combine.R0000644000176200001440000001364115057550670020421 0ustar liggesusers# Helper for `list_combine()` tests that tests a `list_combine()` call across # 3 variants: # # - Normal case # - Homogenous fallback case (all S3 objects with the same class but no ptype2 # method and no `c()` method. these end up using the "main" loop due to the # lack of `c()` method.) # - `c()` fallback case (all S3 objects with the same class and a `c()` method. # these end up using the `base_c_invoke()` fallback path) # # It's important that all variants are fairly consistent expect_identical_list_combine <- function( ..., x, indices, size, default = NULL, unmatched = "default", multiple = "last", slice_x = FALSE, ptype = NULL, name_spec = NULL, name_repair = "minimal", x_arg = "x", indices_arg = "indices", default_arg = "default", expect ) { expect_something_list_combine( expect_identical, ..., x = x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg, expect = expect, foobar_expect = TRUE ) } expect_named_list_combine <- function( ..., x, indices, size, default = NULL, unmatched = "default", multiple = "last", slice_x = FALSE, ptype = NULL, name_spec = NULL, name_repair = "minimal", x_arg = "x", indices_arg = "indices", default_arg = "default", expect ) { expect_something_list_combine( expect_named, ..., x = x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg, expect = expect, foobar_expect = FALSE ) } expect_snapshot_list_combine <- function( ..., x, indices, size, default = NULL, unmatched = "default", multiple = "last", slice_x = FALSE, ptype = NULL, name_spec = NULL, name_repair = "minimal", x_arg = "x", indices_arg = "indices", default_arg = "default", error = FALSE ) { check_dots_empty0(...) expect_snapshot(error = error, { list_combine( x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg ) }) x_foobar <- lapply(x, function(elt) { if (is.null(elt)) { elt } else { foobar(elt) } }) if (is.null(default)) { default_foobar <- NULL } else { default_foobar <- foobar(default) } # Homogeneous fallback expect_snapshot(error = error, { list_combine( x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg ) }) # So they show up differently in the snapshot x_foobar_c <- x_foobar default_foobar_c <- default_foobar # `c()` fallback with_c_foobar({ expect_snapshot(error = error, { list_combine( x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg ) }) }) } expect_something_list_combine <- function( expect_fn, ..., x, indices, size, default = NULL, unmatched = "default", multiple = "last", slice_x = FALSE, ptype = NULL, name_spec = NULL, name_repair = "minimal", x_arg = "x", indices_arg = "indices", default_arg = "default", expect, foobar_expect = TRUE ) { check_dots_empty0(...) expect_fn( list_combine( x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg ), expected = expect ) x_foobar <- lapply(x, function(elt) { if (is.null(elt)) { elt } else { foobar(elt) } }) if (is.null(default)) { default_foobar <- NULL } else { default_foobar <- foobar(default) } if (foobar_expect) { expect_foobar <- foobar(expect) } else { expect_foobar <- expect } # Homogeneous fallback expect_fn( list_combine( x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg ), expected = expect_foobar ) if (foobar_expect) { expect_foobar_c <- foobar_c(expect) } else { expect_foobar_c <- expect } # `c()` fallback with_c_foobar({ expect_fn( list_combine( x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg ), expected = expect_foobar_c ) }) } vctrs/tests/testthat/test-encoding.R0000644000176200001440000001145115156537555017326 0ustar liggesuserstest_that("get expected re-encoded-ness of various encodings", { x <- unlist(encodings(bytes = TRUE), use.names = FALSE) # For the `°C` strings: # - `TRUE` for marked as `CE_UTF8` # - `FALSE` for marked as `CE_NATIVE`, even if `utf8locale = true` # - `FALSE` for marked as `CE_LATIN1` # - `FALSE` for marked as `CE_BYTES` expect_identical( chr_is_ascii_or_utf8(x), c(TRUE, FALSE, FALSE, FALSE) ) }) test_that("can UTF-8 encode a character vector of various encodings (#553)", { x <- unlist(encodings(), use.names = FALSE) results <- obj_encode_utf8(x) expect_equal_encoding(results, encodings()$utf8) }) test_that("UTF-8 encodes all encodings", { encs <- encodings() for (enc in encs) { expect_equal_encoding(obj_encode_utf8(enc), encodings()$utf8) } }) test_that("can UTF-8 encode a list containing character vectors with different encodings", { results <- obj_encode_utf8(encodings()) results <- unlist(results) expect_equal_encoding(results, encodings()$utf8) }) test_that("UTF-8 encoding fails purposefully with any bytes", { expect_error( obj_encode_utf8(encoding_bytes()), "translating strings with \"bytes\" encoding" ) }) test_that("UTF-8 encoding fails purposefully when mixing with bytes with other encodings", { for (enc in encodings()) { x <- c(encoding_bytes(), enc) expect_error( obj_encode_utf8(x), "translating strings with \"bytes\" encoding" ) } }) test_that("attributes are kept through UTF-8 encoding (#599)", { encs <- encodings() x <- c(encs$utf8, encs$latin1) x <- structure(x, names = c("a", "b"), extra = 1) expect_equal(attributes(obj_encode_utf8(x)), attributes(x)) }) test_that("UTF-8 encoding is robust against scalar types contained in lists (#633)", { x <- list(a = z ~ y, b = z ~ z) expect_equal(obj_encode_utf8(x), x) }) test_that("UTF-8 encoding can still occur even if a scalar type is in a list", { encs <- encodings() x <- list(a = z ~ y, b = encs$latin1) result <- obj_encode_utf8(x) expect_equal_encoding(result$b, encs$utf8) }) test_that("UTF-8 encoding occurs inside scalars contained in a list", { encs <- encodings() scalar <- structure(list(x = encs$latin1), class = "scalar_list") lst <- list(scalar) result <- obj_encode_utf8(lst) expect_equal_encoding(result[[1]]$x, encs$utf8) }) test_that("UTF-8 encoding treats data frames elements of lists as lists (#1233)", { encs <- encodings() field <- c(encs$utf8, encs$latin1) a <- new_rcrd(list(field = field)) df <- data.frame(a = a, b = 1:2) x <- list(df) # Recursive proxy won't proxy list elements, # so the rcrd column in the data frame won't get proxied proxy <- vec_proxy_equal(x) result <- obj_encode_utf8(proxy) expect_identical(result, x) result_field <- field(result[[1]]$a, "field") expect_field <- c(encs$utf8, encs$utf8) expect_equal_encoding(result_field, expect_field) }) test_that("attributes are UTF-8 encoded", { utf8 <- encodings()$utf8 latin1 <- encodings()$latin1 a <- structure(1, enc = utf8) b <- structure(1, enc = latin1) c <- structure(1, enc1 = utf8, enc2 = list(latin1), enc3 = latin1) x <- list(a, b, c) result <- obj_encode_utf8(x) a_enc <- attr(result[[1]], "enc") b_enc <- attr(result[[2]], "enc") c_enc1 <- attr(result[[3]], "enc1") c_enc2 <- attr(result[[3]], "enc2")[[1]] c_enc3 <- attr(result[[3]], "enc3") expect_utf8_encoded(a_enc) expect_utf8_encoded(b_enc) expect_utf8_encoded(c_enc1) expect_utf8_encoded(c_enc2) expect_utf8_encoded(c_enc3) expect <- list( structure(1, enc = utf8), structure(1, enc1 = utf8, enc2 = list(utf8), enc3 = utf8) ) expect_identical(vec_unique(x), expect) # Ensure nothing has changed in the original objects a_enc <- attr(a, "enc") b_enc <- attr(b, "enc") c_enc1 <- attr(c, "enc1") c_enc2 <- attr(c, "enc2")[[1]] c_enc3 <- attr(c, "enc3") expect_utf8_encoded(a_enc) expect_latin1_encoded(b_enc) expect_utf8_encoded(c_enc1) expect_latin1_encoded(c_enc2) expect_latin1_encoded(c_enc3) }) test_that("attributes are UTF-8 encoded recursively", { utf8 <- encodings()$utf8 latin1 <- encodings()$latin1 nested <- structure(1, latin1 = latin1) x <- structure(2, nested = nested, foo = 1, latin1 = latin1) result <- obj_encode_utf8(x) attrib <- attributes(result) attrib_nested <- attributes(attrib$nested) expect_equal_encoding(attrib$latin1, utf8) expect_equal_encoding(attrib_nested$latin1, utf8) }) test_that("NAs aren't converted to 'NA' (#1291)", { utf8 <- c(NA, encodings()$utf8) latin1 <- c(NA, encodings()$latin1) result1 <- obj_encode_utf8(utf8) result2 <- obj_encode_utf8(latin1) expect_equal_encoding(result1, utf8) expect_equal_encoding(result2, utf8) expect_identical(result1[[1]], NA_character_) expect_identical(result2[[1]], NA_character_) }) vctrs/tests/testthat/helper-restart.R0000644000176200001440000000300214362266120017476 0ustar liggesusers# Example usage of ptype2 and cast restart. This handler treats any # input that inherits from as a . In other words, it # allows incompatible inputs to benefit from all # coercion methods. with_ordered_restart <- function(expr) { withCallingHandlers( expr, vctrs_error_incompatible_type = function(cnd) { x <- cnd[["x"]] y <- cnd[["y"]] restart <- FALSE if (is.ordered(x)) { restart <- TRUE x <- factor(as.character(x), levels = levels(x)) } if (is.ordered(y)) { restart <- TRUE y <- factor(as.character(y), levels = levels(y)) } # Don't recurse and let ptype2 error keep its course if (!restart) { return(zap()) } x_arg <- cnd[["x_arg"]] y_arg <- cnd[["y_arg"]] call <- cnd[["call"]] # Recurse with factor methods and restart with the result if (inherits(cnd, "vctrs_error_ptype2")) { out <- vec_ptype2(x, y, x_arg = x_arg, y_arg = y_arg, call = call) restart <- "vctrs_restart_ptype2" } else if (inherits(cnd, "vctrs_error_cast")) { out <- vec_cast(x, y, x_arg = x_arg, to_arg = y_arg, call = call) restart <- "vctrs_restart_cast" } else { return(zap()) } # Old-R compat for `tryInvokeRestart()` try_restart <- function(restart, ...) { if (!is_null(findRestart(restart))) { invokeRestart(restart, ...) } } try_restart(restart, out) } ) } vctrs/tests/testthat/test-type-vctr.R0000644000176200001440000005303415065005761017464 0ustar liggesuserstest_that("constructor sets attributes", { x <- new_vctr(1:4, class = "x", x = 1) expect_equal(x, structure(1:4, class = c("x", "vctrs_vctr"), x = 1)) x <- new_vctr(1:4, class = "x", x = 1, inherit_base_type = TRUE) expect_equal( x, structure(1:4, class = c("x", "vctrs_vctr", "integer"), x = 1) ) }) test_that(".data must be a vector", { expect_error(new_vctr(mean), "vector type") }) test_that("attributes other than names are ignored", { out <- new_vctr(structure(1, a = 1)) expect_null(attributes(out)$a) }) test_that("default format method is internal", { x <- new_vctr(1, class = "x") expect_equal(format(x), format(x)) }) test_that("vctr class is proxied", { expect_identical(vec_proxy(new_vctr(1:3)), new_vctr(1:3)) expect_identical( vec_proxy(new_vctr(as.list(1:3))), unclass(new_vctr(as.list(1:3))) ) expect_true(vec_is(new_vctr(as.list(1:3)))) }) test_that("Can opt out of base type", { x <- new_vctr(1, class = "x", inherit_base_type = FALSE) expect_s3_class(x, c("x", "vctrs_vctr"), exact = TRUE) }) test_that("base type is always set for lists", { expect_s3_class(new_vctr(list()), "list") }) test_that("cannot opt out of the base type with lists", { expect_error( new_vctr(list(), inherit_base_type = FALSE), "must inherit from the base type" ) }) test_that("data frames are not allowed", { expect_error(new_vctr(mtcars), "can't be a data frame") }) test_that("attributes must be named", { expect_error(vec_set_attributes(1, list(1)), "must be named") expect_error(vec_set_attributes(1, list(y = 1, 2)), "2 does not") }) test_that("can strip all attributes without adding new ones", { expect_equal(vec_set_attributes(structure(1, a = 1), NULL), 1) }) test_that("`c.vctrs_vctr()` checks for default method arguments (#791)", { expect_error(c(new_vctr(1), recursive = TRUE), "`recursive` must be") expect_error(c(new_vctr(1), use.names = FALSE), "`use.names` must be") }) # Cast/restore ------------------------------------------------------------ test_that("cast to NULL returns x", { x <- new_vctr(1, class = "x") expect_equal(vec_cast(NULL, x), NULL) }) test_that("cast succeeds if attributes equal", { x1 <- new_vctr(1, class = "x", a = 1, b = 2) x2 <- new_vctr(2, class = "x", a = 1, b = 2) expect_equal(vec_cast(x1, x2), x1) expect_equal(vec_cast(x2, x1), x2) }) test_that("and fails if attributes are different", { x1 <- new_vctr(1, class = "x", a = 1, b = 2) x2 <- new_vctr(2, class = "x", a = 2, b = 2) expect_error(vec_cast(x1, x2), class = "vctrs_error_incompatible_type") }) test_that("restoring to atomic vector of same type preserves attributes", { x1 <- new_vctr(1, class = "x") x2 <- new_vctr(2, class = "x") expect_equal(vec_restore(2, x1), x2) }) test_that("restoring to atomic vector of different type throws error", { x1 <- new_vctr(1, class = "x") expect_error(vec_restore("x", x1), class = "vctrs_error_incompatible_type") }) test_that("base coercion methods mapped to vec_cast", { x <- new_vctr(1, inherit_base_type = FALSE) expect_error(as.logical(x), class = "vctrs_error_incompatible_type") expect_error(as.integer(x), class = "vctrs_error_incompatible_type") expect_error(as.logical(x), class = "vctrs_error_incompatible_type") expect_error(as.double(x), class = "vctrs_error_incompatible_type") expect_error(as.character(x), class = "vctrs_error_incompatible_type") expect_error(as.Date(x), class = "vctrs_error_incompatible_type") expect_error(as.POSIXct(x), class = "vctrs_error_incompatible_type") expect_error(as.POSIXlt(x), class = "vctrs_error_incompatible_type") expect_equal(as.list(x), list(x)) }) test_that("as.data.frame creates data frame", { x <- new_vctr(1:3) df <- as.data.frame(x) expect_s3_class(df, "data.frame") expect_equal(nrow(df), 3) expect_named(df, "x") }) test_that("as.data.frame on shaped vctrs doesn't bring along extra attributes", { x <- new_vctr(1:3, foo = "bar", dim = c(3L, 1L)) df <- as.data.frame(x) expect_null(attr(df, "foo", exact = TRUE)) }) test_that("as.data.frame2() unclasses input to avoid dispatch on as.data.frame()", { x <- structure( 1:2, dim = c(1L, 2L), dimnames = list("r1", c("c1", "c2")), class = "foo" ) expect <- data.frame(c1 = 1L, c2 = 2L, row.names = "r1") local_methods(as.data.frame.foo = function(x, ...) "dispatched!") expect_identical(as.data.frame2(x), expect) }) test_that("as.list() chops vectors", { expect_identical( as.list(new_vctr(1:3)), list(new_vctr(1L), new_vctr(2L), new_vctr(3L)) ) x <- new_vctr(as.list(1:3)) expect_identical(as.list(x), as.list(1:3)) }) # equality + comparison + arith + math --------------------------------------- test_that("equality functions remapped", { x <- new_vctr(c(1, 1, NA), inherit_base_type = FALSE) expect_error(x == 1, class = "vctrs_error_incompatible_type") expect_error(x != 1, class = "vctrs_error_incompatible_type") expect_equal(is.na(x), c(FALSE, FALSE, TRUE)) expect_true(anyNA(x)) expect_equal(unique(x), new_vctr(c(1, NA), inherit_base_type = FALSE)) expect_equal(duplicated(x), c(FALSE, TRUE, FALSE)) expect_true(anyDuplicated(x)) }) test_that("is.na<-() supported", { x <- new_vctr(1:4) is.na(x) <- c(FALSE, FALSE, TRUE, NA) expect_identical(x, new_vctr(c(1:2, NA, 4L))) x <- new_vctr(1:4) is.na(x) <- TRUE expect_identical(x, new_vctr(rep(NA_integer_, 4))) x <- new_vctr(1:4) is.na(x) <- c(2, 3) expect_identical(x, new_vctr(c(1L, NA, NA, 4L))) names <- c("a", "b", "c", "d") x <- set_names(new_vctr(1:4), names) is.na(x) <- c("d", "b", "b") expect_identical(x, set_names(new_vctr(c(1L, NA, 3L, NA)), names)) x <- new_vctr(1:4) expect_error(is.na(x) <- "x", "character names to index an unnamed vector") expect_error(is.na(x) <- 5, class = "vctrs_error_subscript_oob") }) test_that("comparison functions remapped", { local_methods( vec_proxy_compare.bizzaro = function(x, ...) -vec_data(x) ) x1 <- new_vctr(c(1, 2), class = "bizzaro") x2 <- new_vctr(2, class = "bizzaro") expect_equal(order(x1), c(2L, 1L)) expect_equal(x1 < x2, c(FALSE, FALSE)) expect_equal(x1 <= x2, c(FALSE, TRUE)) expect_equal(x1 > x2, c(TRUE, FALSE)) expect_equal(x1 >= x2, c(TRUE, TRUE)) }) test_that("operators remapped", { local_methods( vec_arith.bizzaro = function(op, x, y) 1L ) x <- new_vctr(c(1, 2), class = "bizzaro") expect_equal(x + 1, 1L) expect_equal(x - 1, 1L) expect_equal(x * 1, 1L) expect_equal(x / 1, 1L) expect_equal(x^1, 1L) expect_equal(x %% 1, 1L) expect_equal(x %/% 1, 1L) expect_equal(x & 1, 1L) expect_equal(x | 1, 1L) expect_equal(!x, 1L) expect_equal(+x, 1L) expect_equal(-x, 1L) }) test_that("math functions overridden", { local_methods( vec_math.bizzaro = function(fn, x, ...) vec_math_base(fn, 2L) ) x <- new_vctr(c(1, NA), class = "bizzaro") expect_equal(mean(x), 2L) expect_equal(sum(x), 2L) expect_equal(is.finite(x), TRUE) expect_equal(is.infinite(x), FALSE) expect_equal(is.nan(x), FALSE) }) test_that("diff matches base R", { local_methods( vec_arith.vctrs_minus = function(op, x, y) vec_arith_base(op, x, y) ) x1 <- cumsum(cumsum(1:10)) x2 <- new_vctr(x1, class = "vctrs_minus") expect_equal(diff(x2), diff(x1)) expect_equal(diff(x2, lag = 2L), diff(x1, lag = 2L)) expect_equal(diff(x2, differences = 2L), diff(x1, differences = 2L)) expect_equal(diff(x2, lag = 11), x2[0L]) expect_equal(diff(x2, differences = 11), x2[0L]) }) test_that("na.omit() works and retains metadata", { x <- new_vctr(c(a = 1, b = NA, c = 2)) result <- na.omit(x) expect <- vec_slice(x, c(1, 3)) attr(expect, "na.action") <- structure(c(b = 2L), class = "omit") expect_identical(result, expect) }) test_that("na.omit() returns its input unchanged if there are no missing values", { x <- new_vctr(c(a = 1, b = 2)) expect_identical(na.omit(x), x) }) test_that("na.exclude() works and retains metadata", { x <- new_vctr(c(a = 1, b = NA, c = 2)) result <- na.exclude(x) expect <- vec_slice(x, c(1, 3)) attr(expect, "na.action") <- structure(c(b = 2L), class = "exclude") expect_identical(result, expect) }) test_that("na.fail() works", { x <- new_vctr(c(a = 1, b = 2)) expect_identical(na.fail(x), x) x <- new_vctr(c(a = 1, b = NA, c = 2)) expect_snapshot(error = TRUE, na.fail(x)) }) # names ------------------------------------------------------------------- test_that("`NA_character_` names are repaired to the empty string (#784)", { expect_named(new_vctr(set_names(1, NA_character_)), "") expect_named(new_vctr(set_names(1:2, c("a", NA))), c("a", "")) }) test_that("the empty string is an allowed name (#784)", { expect_named(new_vctr(set_names(1, "")), "") expect_named(new_vctr(set_names(1:2, c("", "x"))), c("", "x")) }) test_that("can not provide invalid names", { x <- new_vctr(c(a = 1, b = 2)) expect_error(names(x) <- "x", "length") expect_error(names(x) <- c("x", "y", "z"), "length") expect_error(names(x) <- NULL, NA) }) test_that("can set names to the empty string (#784)", { x <- new_vctr(c(a = 1, b = 2)) names(x) <- c("", "") expect_named(x, c("", "")) names(x) <- c("", "x") expect_named(x, c("", "x")) }) test_that("setting names to `NA_character_` repairs to the empty string (#784)", { x <- new_vctr(1:2) names(x) <- c(NA_character_, NA_character_) expect_named(x, c("", "")) names(x) <- c("x", NA_character_) expect_named(x, c("x", "")) }) test_that("can use [ and [[ with names", { local_methods( vec_ptype2.vctrs_vctr.double = function(...) dbl(), vec_ptype2.double.vctrs_vctr = function(...) dbl() ) x <- new_vctr(c(a = 1, b = 2)) expect_equal(x["b"], new_vctr(c(b = 2))) expect_equal(x[["b"]], new_vctr(2)) # [[ drops names x[["c"]] <- 3 expect_equal(x[["c"]], new_vctr(3)) x["d"] <- 4 expect_equal(x[["d"]], new_vctr(4)) }) test_that("can use [ and [[ with names - list vctr", { local_methods( vec_ptype2.vctrs_vctr = function(...) list(), vec_ptype2.list.vctrs_vctr = function(...) list() ) y <- new_vctr(list(a = 1, b = 2)) y[["c"]] <- 3 expect_equal(y[["c"]], 3) y["d"] <- list(4) expect_equal(y[["d"]], 4) }) test_that("can use [[<- to replace n-dimensional elements", { local_methods( vec_restore.vctrs_mtrx = function(x, to, ...) x, vec_ptype2.double.vctrs_mtrx = function(...) dbl(), vec_ptype2.vctrs_mtrx = function(...) dbl() ) x <- new_vctr(rep(1, times = 4), dim = c(2, 2), class = "vctrs_mtrx") x[[2, 2]] <- 4 expect_equal(x[[2, 2]], 4) }) test_that("subsetting preserves attributes", { x <- new_vctr(c(a = 1, b = 2)) attr(x, "extra") <- TRUE y <- x[1] expect_equal(attr(x, "extra"), TRUE) }) test_that("$ inherits from underlying vector", { x1 <- new_vctr(c(a = 1, b = 2)) expect_error(x1$a, "atomic vectors") expect_error(x1$a <- 2, "atomic vectors") x2 <- new_vctr(list(a = 1, b = 2)) expect_equal(x2$a, 1) x2$a <- 10 expect_equal(x2$a, 10) }) # unsupported/unimplemented operations -------------------------------------- test_that("can't touch protected attributes", { x <- new_vctr(1:4) expect_error(dim(x) <- c(2, 2), class = "vctrs_error_unsupported") expect_error(dimnames(x) <- list("x"), class = "vctrs_error_unsupported") expect_error(levels(x) <- "x", class = "vctrs_error_unsupported") # It is expected that unimplemented `levels()` returns `NULL` expect_null(levels(x)) # But it's ok to set names to NULL; this happens at least in vec_c # and maybe elsewhere. We may need to back off on this level of # strictness in the future expect_error(names(x) <- NULL, NA) }) test_that("summary is unimplemented", { x <- new_vctr(1:4) expect_error(summary(x), class = "vctrs_error_unimplemented") }) # hidden class ------------------------------------------------------------ # We can't construct classes in test because the methods are not found # when vctr generics call other generics. Instead we rely on a very simple # class implemented in vctr.R test_that("class preserved when subsetting", { h <- new_hidden(1:4) expect_s3_class(h, "hidden") expect_s3_class(h[1], "hidden") expect_s3_class(h[[1]], "hidden") expect_s3_class(rep(h[1], 2), "hidden") expect_s3_class(as.list(h)[[1]], "hidden") length(h) <- 3 expect_s3_class(h, "hidden") }) test_that("RHS cast when using subset assign", { local_hidden() h <- new_hidden(1) expect_error(h[[1]] <- "x", class = "vctrs_error_incompatible_type") expect_error(h[1] <- "x", class = "vctrs_error_incompatible_type") h[2] <- 1 expect_equal(h, new_hidden(c(1, 1))) h[[2]] <- 2 expect_equal(h, new_hidden(c(1, 2))) }) test_that("c passes on to vec_c", { local_hidden() h <- new_hidden(1) expect_equal(c(h), h) expect_equal(c(h, NULL), h) expect_equal(c(h, 1), rep(h, 2)) expect_equal(c(h, h), rep(h, 2)) }) test_that("rbind does not fail with an unclear message (#1186)", { # In general, vec_rbind() should be preferred. In many cases rbind() does # the right thing, this test exists to alert us if this changes in the future. skip_on_cran() local_hidden() h <- new_hidden(1) # A failure in levels() for vctrs_vctr classes was the underlying issue. expect_null(levels(h)) df <- data_frame(h = h) expect_equal(rbind(df), df) expect_equal(rbind(df, NULL), df) expect_equal( rbind(df, data_frame(h = 1)), unrownames(df[c(1, 1), , drop = FALSE]) ) expect_equal(rbind(df, df), unrownames(df[c(1, 1), , drop = FALSE])) # An example where the result differs, to alert us if the rbind() contract # changes expect_equal(rbind(data_frame(h = 1), df), data_frame(h = c(1, 1))) }) test_that("summaries preserve class", { h <- new_hidden(c(1, 2)) expect_equal(sum(h), new_hidden(3)) expect_equal(mean(h), new_hidden(1.5)) }) test_that("methods using vec_proxy_compare agree with base", { h <- new_hidden(c(1:10)) h_na <- new_hidden(c(NA, 1:10)) expect_agree <- function(f, x, na.rm = FALSE) { f <- enexpr(f) expect_equal( vec_data((!!f)(x, na.rm = na.rm)), (!!f)(vec_data(x), na.rm = na.rm) ) } expect_agree(min, h) expect_agree(max, h) expect_agree(range, h) expect_agree(min, h_na) expect_agree(max, h_na) expect_agree(range, h_na) expect_agree(min, h_na, na.rm = TRUE) expect_agree(max, h_na, na.rm = TRUE) expect_agree(range, h_na, na.rm = TRUE) }) test_that("can put in data frame", { h <- new_hidden(1:4) expect_named(as.data.frame(h), "h") expect_named(data.frame(x = h), "x") }) test_that("base coercions default to vec_cast", { local_hidden() h <- new_hidden(1) expect_error(as.character(h), class = "vctrs_error_incompatible_type") expect_error(as.integer(h), class = "vctrs_error_incompatible_type") expect_error(generics::as.factor(h), class = "vctrs_error_incompatible_type") expect_error(generics::as.ordered(h), class = "vctrs_error_incompatible_type") expect_error( generics::as.difftime(h), class = "vctrs_error_incompatible_type" ) expect_equal(as.logical(h), TRUE) expect_equal(as.double(h), 1) }) test_that("default print and str methods are useful", { h <- new_hidden(1:4) expect_snapshot(h) expect_snapshot(h[0]) expect_snapshot(str(h)) }) test_that("default print method shows names", { h <- new_hidden(c(A = 1, B = 2, C = 3)) expect_snapshot(h) }) test_that("can't transpose", { h <- new_hidden(1:4) expect_error(t(h), class = "vctrs_error_unsupported") }) test_that("shaped vctrs can be cast to data frames", { x <- new_vctr(1:4, dim = 4) expect_identical(as.data.frame(x), data.frame(V1 = 1:4)) x <- new_vctr(1:4, dim = c(2, 2)) expect_identical(as.data.frame(x), data.frame(V1 = 1:2, V2 = 3:4)) }) # slicing ----------------------------------------------------------------- test_that("additional subscripts are handled (#269)", { new_2d <- function(.data, dim) { vctrs::new_vctr(.data, dim = dim, class = "vctrs_2d") } x <- new_2d(c(1, 2), dim = c(2L, 1L)) expect_identical(x[1], new_2d(1, dim = c(1, 1))) expect_identical(x[1, 1], new_2d(1, dim = c(1, 1))) expect_identical(x[, 1], new_2d(c(1, 2), dim = c(2, 1))) }) # summary generics -------------------------------------------------------- test_that("na.rm is forwarded to summary generics", { x <- new_vctr(dbl(1, 2, NA)) expect_identical(mean(x, na.rm = FALSE), new_vctr(dbl(NA))) expect_identical(mean(x, na.rm = TRUE), new_vctr(1.5)) expect_identical(min(x, na.rm = FALSE), new_vctr(dbl(NA))) expect_identical(min(x, na.rm = TRUE), new_vctr(1)) expect_identical(max(x, na.rm = FALSE), new_vctr(dbl(NA))) expect_identical(max(x, na.rm = TRUE), new_vctr(2)) x <- new_vctr(lgl(TRUE, NA)) expect_identical(all(x, na.rm = FALSE), lgl(NA)) expect_identical(all(x, na.rm = TRUE), TRUE) }) test_that("Summary generics behave identically to base for empty vctrs (#88)", { expect_warning( expect_identical( new_vctr(max(numeric())), max(new_vctr(numeric())) ) ) expect_warning( expect_identical( new_vctr(min(numeric())), min(new_vctr(numeric())) ) ) expect_identical( new_vctr(range(1)), range(new_vctr(1)) ) expect_identical( new_vctr(prod(numeric())), prod(new_vctr(numeric())) ) expect_identical( new_vctr(sum(numeric())), sum(new_vctr(numeric())) ) expect_identical( new_vctr(cummax(numeric())), cummax(new_vctr(numeric())) ) expect_identical( new_vctr(cummin(numeric())), cummin(new_vctr(numeric())) ) expect_identical( new_vctr(cumprod(numeric())), cumprod(new_vctr(numeric())) ) expect_identical( new_vctr(cumsum(numeric())), cumsum(new_vctr(numeric())) ) expect_identical( new_vctr(mean(numeric())), mean(new_vctr(numeric())) ) }) test_that("generic predicates return logical vectors (#251)", { x <- new_vctr(c(1, 2)) expect_identical(is.finite(x), c(TRUE, TRUE)) expect_identical(is.infinite(x), c(FALSE, FALSE)) expect_identical(is.nan(x), c(FALSE, FALSE)) x <- new_vctr(TRUE) expect_identical(any(x), TRUE) expect_identical(all(x), TRUE) }) test_that("xtfrm() converts logical types to integer", { expect_identical( xtfrm(new_vctr(c(TRUE, FALSE, NA), foo = "bar")), c(1L, 0L, NA) ) }) test_that("xtfrm() unwraps integer and double atomic types", { expect_identical(xtfrm(new_vctr(1:3, foo = "bar")), 1:3) expect_identical(xtfrm(new_vctr(1:3 + 0, foo = "bar")), 1:3 + 0) }) test_that("xtfrm() works with character subclass", { expect_identical(xtfrm(new_vctr(chr())), int()) }) test_that("xtfrm() maintains ties when falling through to vec_rank() (#1354)", { x <- new_vctr(c("F", "F", "M", "A", "M", "A")) expect_identical(xtfrm(x), c(2L, 2L, 3L, 1L, 3L, 1L)) }) test_that("xtfrm() propagates NAs when falling through to vec_rank()", { x <- new_vctr(c("F", NA)) expect_identical(xtfrm(x), c(1L, NA)) }) test_that("xtfrm() uses C locale ordering with character proxies", { x <- new_vctr(c("A", "a", "B")) expect_identical(xtfrm(x), c(1L, 3L, 2L)) }) test_that("xtfrm() works on rcrd types", { x <- new_rcrd(list(x = c(1, 2, 1, NA), y = c(2, 1, 1, NA))) expect_identical(xtfrm(x), c(2L, 3L, 1L, NA)) }) test_that("Summary generics behave as expected if na.rm = TRUE and all values are NA (#1357)", { expect_identical(min(new_vctr(NA_real_), na.rm = TRUE), new_vctr(Inf)) expect_identical(max(new_vctr(NA_real_), na.rm = TRUE), new_vctr(-Inf)) expect_identical( range(new_vctr(NA_real_), na.rm = TRUE), new_vctr(c(Inf, -Inf)) ) expect_identical( min(new_vctr(NA_integer_), na.rm = TRUE), new_vctr(NA_integer_) ) expect_identical( max(new_vctr(NA_integer_), na.rm = TRUE), new_vctr(NA_integer_) ) expect_identical( range(new_vctr(NA_integer_), na.rm = TRUE), new_vctr(c(NA_integer_, NA_integer_)) ) expect_identical( min(new_vctr(NA_character_), na.rm = TRUE), new_vctr(NA_character_) ) expect_identical( max(new_vctr(NA_character_), na.rm = TRUE), new_vctr(NA_character_) ) expect_identical( range(new_vctr(NA_character_), na.rm = TRUE), new_vctr(c(NA_character_, NA_character_)) ) expect_identical(min(new_vctr(NA), na.rm = TRUE), new_vctr(NA)) expect_identical(max(new_vctr(NA), na.rm = TRUE), new_vctr(NA)) expect_identical(range(new_vctr(NA), na.rm = TRUE), new_vctr(c(NA, NA))) }) test_that("Summary generics behave as expected for empty vctrs (#1357)", { expect_identical(min(new_vctr(numeric()), na.rm = TRUE), new_vctr(Inf)) expect_identical(max(new_vctr(numeric()), na.rm = TRUE), new_vctr(-Inf)) expect_identical( range(new_vctr(numeric()), na.rm = TRUE), new_vctr(c(Inf, -Inf)) ) expect_identical( min(new_vctr(integer()), na.rm = TRUE), new_vctr(NA_integer_) ) expect_identical( max(new_vctr(integer()), na.rm = TRUE), new_vctr(NA_integer_) ) expect_identical( range(new_vctr(integer()), na.rm = TRUE), new_vctr(c(NA_integer_, NA_integer_)) ) expect_identical( min(new_vctr(character()), na.rm = TRUE), new_vctr(NA_character_) ) expect_identical( max(new_vctr(character()), na.rm = TRUE), new_vctr(NA_character_) ) expect_identical( range(new_vctr(character()), na.rm = TRUE), new_vctr(c(NA_character_, NA_character_)) ) expect_identical(min(new_vctr(logical()), na.rm = TRUE), new_vctr(NA)) expect_identical(max(new_vctr(logical()), na.rm = TRUE), new_vctr(NA)) expect_identical( range(new_vctr(logical()), na.rm = TRUE), new_vctr(c(NA, NA)) ) }) test_that("anyNA(recursive = TRUE) works with lists (#1278)", { x <- list_of(1:4, c(2, NA, 5)) expect_false(anyNA(x)) expect_true(anyNA(x, recursive = TRUE)) x <- new_vctr(list(1:4, list(c(2, NA, 5)))) expect_false(anyNA(x)) expect_true(anyNA(x, recursive = TRUE)) }) vctrs/tests/testthat/_snaps/0000755000176200001440000000000015157321032015677 5ustar liggesusersvctrs/tests/testthat/_snaps/type2.md0000644000176200001440000001275515157321022017275 0ustar liggesusers# base coercions are symmetric and unchanging Code mat Output logical integer double character raw list logical "logical" "integer" "double" NA NA NA integer "integer" "integer" "double" NA NA NA double "double" "double" "double" NA NA NA character NA NA NA "character" NA NA raw NA NA NA NA "raw" NA list NA NA NA NA NA "list" # vec_ptype2() data frame methods builds argument tags Code vec_ptype2("foo", 10) Condition Error: ! Can't combine `"foo"` and `10` . --- Code df1 <- tibble(x = tibble(y = tibble(z = 1))) df2 <- tibble(x = tibble(y = tibble(z = "a"))) vec_ptype2(df1, df2) Condition Error: ! Can't combine `df1$x$y$z` and `df2$x$y$z` . # can override scalar vector error message for base scalar types Code (expect_error(vec_ptype2(NULL, quote(x), y_arg = "foo"), class = "vctrs_error_scalar_type") ) Output Error: ! `foo` must be a vector, not a symbol. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. Code (expect_error(vec_ptype2(quote(x), NULL, x_arg = "foo"), class = "vctrs_error_scalar_type") ) Output Error: ! `foo` must be a vector, not a symbol. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # can override scalar vector error message for S3 types Code (expect_error(vec_ptype2(NULL, foobar(), y_arg = "foo"), class = "vctrs_error_scalar_type") ) Output Error: ! `foo` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. Code (expect_error(vec_ptype2(foobar(), NULL, x_arg = "foo"), class = "vctrs_error_scalar_type") ) Output Error: ! `foo` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # ptype2 and cast errors when same class fallback is impossible are informative Code (expect_error(vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE)), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't convert `foobar(1, bar = TRUE)` to . Code (expect_error(vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE)), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't combine `foobar(1, bar = TRUE)` and `foobar(2, baz = TRUE)` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # Incompatible attributes bullets are not show when methods are implemented Code with_foobar_cast <- (function(expr) { with_methods(vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) { vec_default_cast(x, to, ...) }, expr) }) with_foobar_ptype2 <- (function(expr) { with_methods(vec_ptype2.vctrs_foobar = function(...) NULL, vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { vec_default_ptype2(x, y, ...) }, expr) }) (expect_error(with_foobar_cast(vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE))), class = "vctrs_error_incompatible_type")) Output Error: ! Can't convert `foobar(1, bar = TRUE)` to . Code (expect_error(with_foobar_ptype2(vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE))), class = "vctrs_error_incompatible_type")) Output Error: ! Can't combine `foobar(1, bar = TRUE)` and `foobar(2, baz = TRUE)` . # error indexing is correct with unspecifieds Code vec_ptype_common(1, NA, "x") Condition Error: ! Can't combine `..1` and `..3` . --- Code vec_ptype_common(NA, 1, "x") Condition Error: ! Can't combine `..2` and `..3` . --- Code vec_ptype_common(NA, NA, 1, "x") Condition Error: ! Can't combine `..3` and `..4` . --- Code vec_ptype_common(1, NA, NA, 1, "x") Condition Error: ! Can't combine `..1` and `..5` . vctrs/tests/testthat/_snaps/compare.md0000644000176200001440000000151315157320776017665 0ustar liggesusers# error is thrown when comparing complexes (#1655) Code (expect_error(vec_compare(complex(), complex()))) Output Error in `vec_compare()`: ! Can't compare complexes. # `na_equal` is validated Code (expect_error(vec_compare(1, 1, na_equal = 1))) Output Error in `vec_compare()`: ! `na_equal` must be `TRUE` or `FALSE`, not the number 1. Code (expect_error(vec_compare(1, 1, na_equal = c(TRUE, FALSE)))) Output Error in `vec_compare()`: ! `na_equal` must be `TRUE` or `FALSE`, not a logical vector. # can't supply NA as `na_equal` Code vec_compare(NA, NA, na_equal = NA) Condition Error in `vec_compare()`: ! `na_equal` must be `TRUE` or `FALSE`, not `NA`. vctrs/tests/testthat/_snaps/bind.md0000644000176200001440000003276715157320776017172 0ustar liggesusers# incompatible columns throws common type error Code (expect_error(vec_rbind(x_int, x_chr), class = "vctrs_error_incompatible_type")) Output Error in `vec_rbind()`: ! Can't combine `..1$x` and `..2$x` . Code (expect_error(vec_rbind(x_int, x_chr, .error_call = call("foo")), class = "vctrs_error_incompatible_type") ) Output Error in `foo()`: ! Can't combine `..1$x` and `..2$x` . Code (expect_error(vec_rbind(x_int, x_chr, .ptype = x_chr, .error_call = call("foo")), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't convert `..1$x` to match type of `x` . # names are supplied if needed Code out <- vec_rbind(data_frame(...1 = 1), 1) Message New names: * `` -> `...1` # can repair names in `vec_rbind()` (#229) Code (expect_error(vec_rbind(.name_repair = "none"), "can't be `\"none\"`")) Output Error: ! `.name_repair` can't be `"none"`. It must be one of `"unique"`, `"universal"`, or `"check_unique"`. Code (expect_error(vec_rbind(.name_repair = "minimal"), "can't be `\"minimal\"`")) Output Error: ! `.name_repair` can't be `"minimal"`. It must be one of `"unique"`, `"universal"`, or `"check_unique"`. Code (expect_error(vec_rbind(list(a = 1, a = 2), .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique")) Output Error in `vec_rbind()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. # can repair names quietly Code res_unique <- vec_rbind(c(x = 1, x = 2), c(x = 3, x = 4), .name_repair = "unique_quiet") res_universal <- vec_rbind(c(`if` = 1, `in` = 2), c(`if` = 3, `for` = 4), .name_repair = "universal_quiet") --- Code res_unique <- vec_cbind(x = 1, x = 2, .name_repair = "unique_quiet") res_universal <- vec_cbind(`if` = 1, `in` = 2, .name_repair = "universal_quiet") # vec_rbind() fails with arrays of dimensionality > 3 Code (expect_error(vec_rbind(array(NA, c(1, 1, 1))))) Output Error in `vec_rbind()`: ! Can't bind arrays. Code (expect_error(vec_rbind(array(NA, c(1, 1, 1)), .error_call = call("foo")))) Output Error in `foo()`: ! Can't bind arrays. # can assign row names in vec_rbind() Code (expect_error(vec_rbind(foo = df1, df2, .names_to = NULL), "specification")) Output Error in `vec_rbind()`: ! Can't merge the outer name `foo` with a vector of length > 1. Please supply a `.name_spec` specification. # names of `...` are used for type and cast errors even when zapped Code vec_rbind(!!!xs) Condition Error in `vec_rbind()`: ! Can't combine `a$x` and `b$x` . --- Code vec_rbind(!!!xs, .ptype = data_frame(x = double())) Condition Error in `vec_rbind()`: ! Can't convert `b$x` to match type of `x` . # vec_cbind() reports error context Code (expect_error(vec_cbind(foobar(list())))) Output Error in `vec_cbind()`: ! `..1` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. Code (expect_error(vec_cbind(foobar(list()), .error_call = call("foo")))) Output Error in `foo()`: ! `..1` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. Code (expect_error(vec_cbind(a = 1:2, b = int()))) Output Error in `vec_cbind()`: ! Can't recycle `a` (size 2) to match `b` (size 0). Code (expect_error(vec_cbind(a = 1:2, b = int(), .error_call = call("foo")))) Output Error in `foo()`: ! Can't recycle `a` (size 2) to match `b` (size 0). # duplicate names are de-deduplicated Code (expect_named(vec_cbind(x = 1, x = 1), c("x...1", "x...2"))) Message New names: * `x` -> `x...1` * `x` -> `x...2` Output x...1 x...2 1 1 1 Code (expect_named(vec_cbind(data.frame(x = 1), data.frame(x = 1)), c("x...1", "x...2"))) Message New names: * `x` -> `x...1` * `x` -> `x...2` Output x...1 x...2 1 1 1 # can repair names in `vec_cbind()` (#227) Code (expect_error(vec_cbind(a = 1, a = 2, .name_repair = "none"), "can't be `\"none\"`")) Output Error: ! `.name_repair` can't be `"none"`. It must be one of `"unique"`, `"universal"`, `"check_unique"`, or `"minimal"`. Code (expect_error(vec_cbind(a = 1, a = 2, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique") ) Output Error in `vec_cbind()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. # can supply `.names_to` to `vec_rbind()` (#229) Code (expect_error(vec_rbind(data_frame(), .names_to = letters))) Output Error in `vec_rbind()`: ! `.names_to` must be `NULL`, a string, or an `rlang::zap()` object. Code (expect_error(vec_rbind(data_frame(), .names_to = 10))) Output Error in `vec_rbind()`: ! `.names_to` must be `NULL`, a string, or an `rlang::zap()` object. Code (expect_error(vec_rbind(data_frame(), .names_to = letters, .error_call = call( "foo")))) Output Error in `foo()`: ! `.names_to` must be `NULL`, a string, or an `rlang::zap()` object. # vec_cbind() fails with arrays of dimensionality > 3 Code (expect_error(vec_cbind(a))) Output Error in `vec_cbind()`: ! Can't bind arrays. Code (expect_error(vec_cbind(a, .error_call = call("foo")))) Output Error in `foo()`: ! Can't bind arrays. Code (expect_error(vec_cbind(x = a))) Output Error in `vec_cbind()`: ! Can't bind arrays. # vec_rbind() name repair messages are useful Code vec_rbind(1, 2) Message New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 1 1 2 2 Code vec_rbind(1, 2, .names_to = NULL) Message New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 1 1 2 2 Code vec_rbind(1, 2, ...10 = 3) Message New names: * `` -> `...1` New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 1 1 2 2 3 3 Code vec_rbind(1, 2, ...10 = 3, .names_to = NULL) Message New names: * `` -> `...1` New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 ...1 1 ...2 2 ...3 3 Code vec_rbind(a = 1, b = 2) Message New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 1 1 2 2 Code vec_rbind(a = 1, b = 2, .names_to = NULL) Message New names: * `` -> `...1` New names: * `` -> `...1` Output ...1 a 1 b 2 Code vec_rbind(c(a = 1), c(b = 2)) Output a b 1 1 NA 2 NA 2 Code vec_rbind(c(a = 1), c(b = 2), .names_to = NULL) Output a b 1 1 NA 2 NA 2 # vec_rbind() is silent when assigning duplicate row names of df-cols Code vec_rbind(df, df) Output mpg 1 21.0 2 21.0 3 22.8 4 21.0 5 21.0 6 22.8 --- Code vec_rbind(mtcars[1:4, ], mtcars[1:3, ]) Output mpg cyl disp hp drat wt qsec vs am gear carb Mazda RX4...1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4 Wag...2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 Datsun 710...3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 Mazda RX4...5 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4 Wag...6 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 Datsun 710...7 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 # vec_cbind() name repair messages are useful Code vec_cbind(1, 2) Message New names: * `` -> `...1` * `` -> `...2` Output ...1 ...2 1 1 2 Code vec_cbind(1, 2, ...10 = 3) Message New names: * `` -> `...1` * `` -> `...2` * `...10` -> `...3` Output ...1 ...2 ...3 1 1 2 3 Code vec_cbind(a = 1, b = 2) Output a b 1 1 2 Code vec_cbind(c(a = 1), c(b = 2)) Message New names: * `` -> `...1` * `` -> `...2` Output ...1 ...2 1 1 2 # rbind repairs names of data frames (#704) Code (expect_error(vec_rbind(df, df, .name_repair = "check_unique"), class = "vctrs_error_names_must_be_unique") ) Output Error in `vec_rbind()`: ! Names must be unique. x These names are duplicated: * "x" at locations 1 and 2. Code (expect_error(vec_rbind(df, df, .name_repair = "check_unique", .error_call = call( "foo")), class = "vctrs_error_names_must_be_unique")) Output Error in `foo()`: ! Names must be unique. x These names are duplicated: * "x" at locations 1 and 2. # vec_rbind() fails with complex foreign S3 classes Code x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error(vec_rbind(set_names(x, "x"), set_names(y, "x")), class = "vctrs_error_incompatible_type") ) Output Error in `vec_rbind()`: ! Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # vec_rbind() fails with complex foreign S4 classes Code joe <- .Counts(1L, name = "Joe") jane <- .Counts(2L, name = "Jane") (expect_error(vec_rbind(set_names(joe, "x"), set_names(jane, "y")), class = "vctrs_error_incompatible_type") ) Output Error in `vec_rbind()`: ! Can't combine `..1` and `..2` . # row-binding performs expected allocations Code ints <- rep(list(1L), 100) named_ints <- rep(list(set_names(1:3, letters[1:3])), 100) # Integers as rows suppressMessages(with_memory_prof(vec_rbind_list(ints))) Output [1] 2.74KB Code suppressMessages(with_memory_prof(vec_rbind_list(named_ints))) Output [1] 3.62KB Code # Data frame with named columns df <- data_frame(x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c( "A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb")))) dfs <- rep(list(df), 100) with_memory_prof(vec_rbind_list(dfs)) Output [1] 10.4KB Code # Data frame with rownames (non-repaired, non-recursive case) df <- data_frame(x = 1:2) dfs <- rep(list(df), 100) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) Output [1] 7.63KB Code # Data frame with rownames (repaired, non-recursive case) dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) Output [1] 13.8KB Code # Data frame with rownames (non-repaired, recursive case) (#1217) df <- data_frame(x = 1:2, y = data_frame(x = 1:2)) dfs <- rep(list(df), 100) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) Output [1] 13KB Code # Data frame with rownames (repaired, recursive case) (#1217) dfs <- map(dfs, set_rownames_recursively) with_memory_prof(vec_rbind_list(dfs)) Output [1] 25.3KB vctrs/tests/testthat/_snaps/dictionary.md0000644000176200001440000000217315157320776020407 0ustar liggesusers# vec_match() and vec_in() check types Code df1 <- data_frame(x = data_frame(foo = 1)) df2 <- data_frame(x = data_frame(foo = "")) (expect_error(vec_match(df1, df2), class = "vctrs_error_incompatible_type")) Output Error in `vec_match()`: ! Can't combine `x$foo` and `x$foo` . Code (expect_error(vec_match(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type")) Output Error in `vec_match()`: ! Can't combine `n$x$foo` and `h$x$foo` . Code (expect_error(vec_in(df1, df2), class = "vctrs_error_incompatible_type")) Output Error in `vec_in()`: ! Can't combine `x$foo` and `x$foo` . Code (expect_error(vec_in(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type") ) Output Error in `vec_in()`: ! Can't combine `n$x$foo` and `h$x$foo` . vctrs/tests/testthat/_snaps/type-sf.md0000644000176200001440000000023115157321033017605 0ustar liggesusers# `crs` attributes of `sfc` vectors must be the same Code vctrs::vec_c(x, y) Condition Error: ! arguments have different crs vctrs/tests/testthat/_snaps/parallel.md0000644000176200001440000000766215157321010020024 0ustar liggesusers# no casting is done Code vec_pall(1) Condition Error in `vec_pall()`: ! `..1` must be a logical vector, not the number 1. --- Code vec_pany(1) Condition Error in `vec_pany()`: ! `..1` must be a logical vector, not the number 1. --- Code vec_pall(array(TRUE)) Condition Error in `vec_pall()`: ! `..1` must be a logical vector, not a logical 1D array. --- Code vec_pany(array(TRUE)) Condition Error in `vec_pany()`: ! `..1` must be a logical vector, not a logical 1D array. --- Code vec_pall(structure(TRUE, class = "foo")) Condition Error in `vec_pall()`: ! `..1` must be a logical vector, not a object. --- Code vec_pany(structure(TRUE, class = "foo")) Condition Error in `vec_pany()`: ! `..1` must be a logical vector, not a object. # no recycling is done Code vec_pall(TRUE, c(TRUE, TRUE, TRUE)) Condition Error in `vec_pall()`: ! `..2` must have size 1, not size 3. --- Code vec_pany(TRUE, c(TRUE, TRUE, TRUE)) Condition Error in `vec_pany()`: ! `..2` must have size 1, not size 3. --- Code vec_pall(TRUE, c(TRUE, TRUE, TRUE), .size = 3L) Condition Error in `vec_pall()`: ! `..1` must have size 3, not size 1. --- Code vec_pany(TRUE, c(TRUE, TRUE, TRUE), .size = 3L) Condition Error in `vec_pany()`: ! `..1` must have size 3, not size 1. # validates `.missing` Code vec_pall(.missing = c(TRUE, FALSE)) Condition Error in `vec_pall()`: ! `.missing` must be `NA`, `FALSE`, or `TRUE`. --- Code vec_pany(.missing = c(TRUE, FALSE)) Condition Error in `vec_pany()`: ! `.missing` must be `NA`, `FALSE`, or `TRUE`. --- Code vec_pall(.missing = 1) Condition Error in `vec_pall()`: ! `.missing` must be `NA`, `FALSE`, or `TRUE`. --- Code vec_pany(.missing = 1) Condition Error in `vec_pany()`: ! `.missing` must be `NA`, `FALSE`, or `TRUE`. --- Code vec_pall(.missing = NULL) Condition Error in `vec_pall()`: ! `.missing` must be `NA`, `FALSE`, or `TRUE`. --- Code vec_pany(.missing = NULL) Condition Error in `vec_pany()`: ! `.missing` must be `NA`, `FALSE`, or `TRUE`. # validates `.size` Code vec_pall(.size = c(1, 2)) Condition Error in `vec_pall()`: ! `.size` must be a scalar integer or double. --- Code vec_pany(.size = c(1, 2)) Condition Error in `vec_pany()`: ! `.size` must be a scalar integer or double. --- Code vec_pall(.size = 1.5) Condition Error in `vec_pall()`: ! `.size` must be a whole number, not a decimal number. --- Code vec_pany(.size = 1.5) Condition Error in `vec_pany()`: ! `.size` must be a whole number, not a decimal number. --- Code vec_pall(.size = NA_integer_) Condition Error in `vec_pall()`: ! negative length vectors are not allowed --- Code vec_pany(.size = NA_integer_) Condition Error in `vec_pany()`: ! negative length vectors are not allowed # names are used in errors Code vec_pall(1.5, .arg = "x") Condition Error in `vec_pall()`: ! `x[[1]]` must be a logical vector, not the number 1.5. --- Code vec_pall(a = 1.5, .arg = "x") Condition Error in `vec_pall()`: ! `x$a` must be a logical vector, not the number 1.5. --- Code x <- c(TRUE, FALSE) y <- logical() vec_pany(x, y) Condition Error in `vec_pany()`: ! `..2` must have size 2, not size 0. --- Code x <- c(TRUE, FALSE) y <- logical() vec_pany(a = x, b = y, .arg = "x", .error_call = quote(foo())) Condition Error in `foo()`: ! `x$b` must have size 2, not size 0. vctrs/tests/testthat/_snaps/equal.md0000644000176200001440000000143415157320777017351 0ustar liggesusers# throws error for unsuported type Code vec_equal(expression(x), expression(x)) Condition Error in `vec_equal()`: ! `x` must be a vector, not an expression vector. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # `na_equal` is validated Code vec_equal(1, 1, na_equal = 1) Condition Error in `vec_equal()`: ! `na_equal` must be `TRUE` or `FALSE`. --- Code vec_equal(1, 1, na_equal = c(TRUE, FALSE)) Condition Error in `vec_equal()`: ! `na_equal` must be `TRUE` or `FALSE`. # can't supply NA as `na_equal` Code vec_equal(NA, NA, na_equal = NA) Condition Error in `vec_equal()`: ! `na_equal` must be `TRUE` or `FALSE`. vctrs/tests/testthat/_snaps/type-misc.md0000644000176200001440000000031215157321017020132 0ustar liggesusers# `numeric_version` proxy can handle at most 8 components Code vec_proxy_equal(x) Condition Error in `vec_proxy_equal()`: ! `x` can't contain more than 8 version components. vctrs/tests/testthat/_snaps/conditions.md0000644000176200001440000001642615157320776020421 0ustar liggesusers# incompatible type error validates `action` Code (expect_error(stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = "c")) ) Output Error in `stop_incompatible_type()`: ! `action` must be one of "combine" or "convert", not "c". Code (expect_error(stop_incompatible_type(1, 1, x_arg = "", y_arg = "", action = 1))) Output Error in `stop_incompatible_type()`: ! `action` must be a character vector, not the number 1. # can override arg in OOB conditions Code (expect_error(with_subscript_data(vec_slice(set_names(letters), "foo"), NULL), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. Code (expect_error(with_subscript_data(vec_slice(set_names(letters), "foo"), quote( foo)), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. Code (expect_error(with_subscript_data(vec_slice(set_names(letters), "foo"), quote( foo(bar))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. # scalar type errors are informative Code (expect_error(vec_slice(foobar(list(1)), 1), class = "vctrs_error_scalar_type")) Output Error in `vec_slice()`: ! `x` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. Code (expect_error(stop_scalar_type(foobar(list(1)), arg = "foo"), class = "vctrs_error_scalar_type") ) Output Error: ! `foo` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # empty names errors are informative Code (expect_error(vec_as_names(c("x", "", "y"), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty") ) Output Error: ! Names can't be empty. x Empty name found at location 2. Code (expect_error(vec_as_names(c("x", "", "y", ""), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty")) Output Error: ! Names can't be empty. x Empty names found at locations 2 and 4. Code (expect_error(vec_as_names(rep("", 10), repair = "check_unique"), class = "vctrs_error_names_cannot_be_empty") ) Output Error: ! Names can't be empty. x Empty names found at locations 1, 2, 3, 4, 5, etc. # dot dot names errors are informative Code (expect_error(vec_as_names(c("..1", "..1", "..1", "...", "z"), repair = "check_unique"), class = "vctrs_error_names_cannot_be_dot_dot")) Output Error: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..1" at locations 1, 2, and 3. * "..." at location 4. Code (expect_error(vec_as_names(c(rep("..1", 20), rep(c("..2", "..3", "..4", "...", "..5"), 2)), repair = "check_unique"), class = "vctrs_error_names_cannot_be_dot_dot") ) Output Error: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..1" at locations 1, 2, 3, 4, 5, etc. * "..2" at locations 21 and 26. * "..3" at locations 22 and 27. * "..4" at locations 23 and 28. * "..." at locations 24 and 29. * ... # unique names errors are informative Code (expect_error(vec_as_names(c("x", "x", "x", "y", "y", "z"), repair = "check_unique"), class = "vctrs_error_names_must_be_unique")) Output Error: ! Names must be unique. x These names are duplicated: * "x" at locations 1, 2, and 3. * "y" at locations 4 and 5. Code (expect_error(vec_as_names(c(rep("x", 20), rep(c("a", "b", "c", "d", "e"), 2)), repair = "check_unique"), class = "vctrs_error_names_must_be_unique")) Output Error: ! Names must be unique. x These names are duplicated: * "x" at locations 1, 2, 3, 4, 5, etc. * "a" at locations 21 and 26. * "b" at locations 22 and 27. * "c" at locations 23 and 28. * "d" at locations 24 and 29. * ... # lossy cast from character to factor mentions loss of generality Code (expect_error(vec_cast("a", factor("b")), class = "vctrs_error_cast_lossy")) Output Error: ! Can't convert from `"a"` to > due to loss of generality. * Locations: 1 # lossy cast `conditionMessage()` result matches `cnd_message()` (#1592) Code cat(conditionMessage(cnd)) Output Can't convert from `1.5` to due to loss of precision. * Locations: 1 # ordered cast failures mention conversion Code (expect_error(vec_cast(ordered("x"), ordered("y")), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't convert `ordered("x")` > to >. # incompatible size errors Code (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = ""))) Output Error: ! Can't recycle input of size 2 to size 3. Code (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = ""))) Output Error: ! Can't recycle `foo` (size 2) to size 3. Code (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = "", y_arg = "bar")) ) Output Error: ! Can't recycle input of size 2 to match `bar` (size 3). Code (expect_error(stop_incompatible_size(1:2, 3:5, 2L, 3L, x_arg = quote(foo), y_arg = quote(bar)))) Output Error: ! Can't recycle `foo` (size 2) to match `bar` (size 3). vctrs/tests/testthat/_snaps/type-date-time.md0000644000176200001440000000221015157321017021047 0ustar liggesusers# datetime coercions are symmetric and unchanging Code print(mat) Output date datetime datetime POSIXlt duration duration date "date" "datetime" "datetime" "datetime" NA NA datetime "datetime" "datetime" "datetime" "datetime" NA NA datetime "datetime" "datetime" "datetime" "datetime" NA NA POSIXlt "datetime" "datetime" "datetime" "datetime" NA NA duration NA NA NA NA "duration" "duration" duration NA NA NA NA "duration" "duration" vctrs/tests/testthat/_snaps/match.md0000644000176200001440000006177715157321012017335 0ustar liggesusers# must have at least 1 column to match Code vec_locate_matches(data_frame(), data_frame()) Condition Error in `vec_locate_matches()`: ! Must have at least 1 column to match on. --- Code vec_locate_matches(data_frame(), data_frame(), error_call = call("foo")) Condition Error in `foo()`: ! Must have at least 1 column to match on. # common type of `needles` and `haystack` is taken Code vec_locate_matches(x, y) Condition Error in `vec_locate_matches()`: ! Can't combine `needles` and `haystack` . --- Code vec_locate_matches(x, y, needles_arg = "x", error_call = call("foo")) Condition Error in `foo()`: ! Can't combine `x` and `haystack` . # `incomplete` can error informatively Code (expect_error(vec_locate_matches(NA, 1, incomplete = "error"))) Output Error in `vec_locate_matches()`: ! `needles` can't contain missing values. x Location 1 contains missing values. Code (expect_error(vec_locate_matches(NA, 1, incomplete = "error", needles_arg = "foo")) ) Output Error in `vec_locate_matches()`: ! `foo` can't contain missing values. x Location 1 contains missing values. Code (expect_error(vec_locate_matches(NA, 1, incomplete = "error", needles_arg = "foo", error_call = call("fn")))) Output Error in `fn()`: ! `foo` can't contain missing values. x Location 1 contains missing values. # `incomplete` is validated Code (expect_error(vec_locate_matches(1, 2, incomplete = 1.5))) Output Error in `vec_locate_matches()`: ! Can't convert from `incomplete` to due to loss of precision. * Locations: 1 Code (expect_error(vec_locate_matches(1, 2, incomplete = c("match", "drop")))) Output Error in `vec_locate_matches()`: ! `incomplete` must be length 1, not length 2. Code (expect_error(vec_locate_matches(1, 2, incomplete = "x"))) Output Error in `vec_locate_matches()`: ! `incomplete` must be one of: "compare", "match", "drop", or "error". Code (expect_error(vec_locate_matches(1, 2, incomplete = "x", error_call = call("fn"))) ) Output Error in `vec_locate_matches()`: ! `incomplete` must be one of: "compare", "match", "drop", or "error". # `multiple` is validated Code (expect_error(vec_locate_matches(1, 2, multiple = 1.5))) Output Error in `vec_locate_matches()`: ! `multiple` must be a string. Code (expect_error(vec_locate_matches(1, 2, multiple = c("first", "last")))) Output Error in `vec_locate_matches()`: ! `multiple` must be a string. Code (expect_error(vec_locate_matches(1, 2, multiple = "x"))) Output Error in `vec_locate_matches()`: ! `multiple` must be one of "all", "any", "first", or "last". Code (expect_error(vec_locate_matches(1, 2, multiple = "x", error_call = call("fn"))) ) Output Error in `vec_locate_matches()`: ! `multiple` must be one of "all", "any", "first", or "last". # `multiple` can error informatively Code (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. Code (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error", needles_arg = "foo"))) Output Error in `vec_locate_matches()`: ! Each value of `foo` can match at most 1 value from `haystack`. x Location 1 of `foo` matches multiple values. Code (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error", needles_arg = "foo", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `foo` can match at most 1 value from `haystack`. x Location 1 of `foo` matches multiple values. Code (expect_error(vec_locate_matches(1L, c(1L, 1L), multiple = "error", needles_arg = "foo", haystack_arg = "bar"))) Output Error in `vec_locate_matches()`: ! Each value of `foo` can match at most 1 value from `bar`. x Location 1 of `foo` matches multiple values. # `multiple` can warn informatively Code (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning"))) Output Warning in `vec_locate_matches()`: Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. Code (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning", needles_arg = "foo"))) Output Warning in `vec_locate_matches()`: Each value of `foo` can match at most 1 value from `haystack`. x Location 1 of `foo` matches multiple values. Code (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning", needles_arg = "foo", error_call = call("fn")))) Output Warning in `fn()`: Each value of `foo` can match at most 1 value from `haystack`. x Location 1 of `foo` matches multiple values. Code (expect_warning(vec_locate_matches(1L, c(1L, 1L), multiple = "warning", needles_arg = "foo", haystack_arg = "bar"))) Output Warning in `vec_locate_matches()`: Each value of `foo` can match at most 1 value from `bar`. x Location 1 of `foo` matches multiple values. # errors on multiple matches that come from different nesting containers Code vec_locate_matches(df, df2, condition = c("<=", "<="), multiple = "error") Condition Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # errors when a match from a different nesting container is processed early on Code vec_locate_matches(needles, haystack, condition = "<", multiple = "error") Condition Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `multiple = 'error' / 'warning'` throw correctly when combined with `relationship` Code (expect_error(vec_locate_matches(x, y, relationship = "one-to-one", multiple = "error")) ) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. --- Code (expect_error(vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "error"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. --- Code vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "warning") Condition Warning in `vec_locate_matches()`: Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. Warning in `vec_locate_matches()`: Detected an unexpected many-to-many relationship between `needles` and `haystack`. x Location 2 of `needles` matches multiple values. x Location 1 of `haystack` matches multiple values. Output needles haystack 1 1 2 2 2 1 3 2 3 4 3 1 5 3 3 --- Code vec_locate_matches(x, y, relationship = "one-to-one", multiple = "warning") Condition Warning in `vec_locate_matches()`: Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. --- Code (expect_error(vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "error"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. --- Code vec_locate_matches(x, y, relationship = "warn-many-to-many", multiple = "warning") Condition Warning in `vec_locate_matches()`: Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. Output needles haystack 1 1 2 2 2 1 3 2 3 # `relationship` handles one-to-one case Code (expect_error(vec_locate_matches(c(2, 1), c(1, 1), relationship = "one-to-one")) ) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. Code (expect_error(vec_locate_matches(c(1, 1), c(1, 2), relationship = "one-to-one")) ) Output Error in `vec_locate_matches()`: ! Each value of `haystack` can match at most 1 value from `needles`. x Location 1 of `haystack` matches multiple values. # `relationship` handles one-to-many case Code (expect_error(vec_locate_matches(c(1, 2, 2), c(2, 1), relationship = "one-to-many")) ) Output Error in `vec_locate_matches()`: ! Each value of `haystack` can match at most 1 value from `needles`. x Location 1 of `haystack` matches multiple values. # `relationship` handles many-to-one case Code (expect_error(vec_locate_matches(c(1, 2), c(1, 2, 2), relationship = "many-to-one")) ) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 2 of `needles` matches multiple values. # `relationship` handles warn-many-to-many case Code (expect_warning(vec_locate_matches(c(1, 2, 1), c(1, 2, 2), relationship = "warn-many-to-many")) ) Output Warning in `vec_locate_matches()`: Detected an unexpected many-to-many relationship between `needles` and `haystack`. x Location 2 of `needles` matches multiple values. x Location 1 of `haystack` matches multiple values. Code (expect_warning(vec_locate_matches(c(1, 1, 2), c(2, 2, 1), relationship = "warn-many-to-many")) ) Output Warning in `vec_locate_matches()`: Detected an unexpected many-to-many relationship between `needles` and `haystack`. x Location 3 of `needles` matches multiple values. x Location 3 of `haystack` matches multiple values. # `relationship` considers `incomplete` matches as possible multiple matches Code (expect_error(vec_locate_matches(x, y, relationship = "one-to-many"))) Output Error in `vec_locate_matches()`: ! Each value of `haystack` can match at most 1 value from `needles`. x Location 1 of `haystack` matches multiple values. # `relationship` errors on multiple matches that come from different nesting containers Code (expect_error(vec_locate_matches(df, df2, condition = c("<=", "<="), relationship = "many-to-one"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `relationship` errors when a match from a different nesting container is processed early on Code (expect_error(vec_locate_matches(needles, haystack, condition = "<", relationship = "many-to-one"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `relationship` can still detect problematic `haystack` relationships when `multiple = first/last` are used Code (expect_error(vec_locate_matches(c(3, 1, 1), c(2, 1, 3, 3), multiple = "first", relationship = "one-to-one"))) Output Error in `vec_locate_matches()`: ! Each value of `haystack` can match at most 1 value from `needles`. x Location 2 of `haystack` matches multiple values. Code (expect_error(vec_locate_matches(c(3, 1, 1), c(2, 1, 3, 3), multiple = "first", relationship = "one-to-many"))) Output Error in `vec_locate_matches()`: ! Each value of `haystack` can match at most 1 value from `needles`. x Location 2 of `haystack` matches multiple values. # `relationship` and `remaining` work properly together Code out <- vec_locate_matches(c(1, 2, 2), c(2, 3, 1, 1, 4), relationship = "warn-many-to-many", remaining = NA_integer_) Condition Warning in `vec_locate_matches()`: Detected an unexpected many-to-many relationship between `needles` and `haystack`. x Location 1 of `needles` matches multiple values. x Location 1 of `haystack` matches multiple values. # `relationship` errors if `condition` creates multiple matches Code (expect_error(vec_locate_matches(1, c(1, 2), condition = "<=", relationship = "many-to-one")) ) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `relationship` still errors if `filter` hasn't removed all multiple matches Code (expect_error(vec_locate_matches(1, c(1, 2, 1), condition = "<=", filter = "min", relationship = "many-to-one"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `relationship` errors when we have >1 size 1 matches across containers (tidyverse/dplyr#6835) Code vec_locate_matches(x, y, condition = c("<=", ">="), filter = c("none", "none"), relationship = "one-to-one") Condition Error in `vec_locate_matches()`: ! Each value of `needles` can match at most 1 value from `haystack`. x Location 1 of `needles` matches multiple values. # `relationship` errors respect argument tags and error call Code (expect_error(vec_locate_matches(1L, c(1L, 1L), relationship = "one-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `foo` can match at most 1 value from `bar`. x Location 1 of `foo` matches multiple values. Code (expect_error(vec_locate_matches(c(1L, 1L), 1L, relationship = "one-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `bar` can match at most 1 value from `foo`. x Location 1 of `bar` matches multiple values. Code (expect_error(vec_locate_matches(c(1L, 1L), 1L, relationship = "one-to-many", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `bar` can match at most 1 value from `foo`. x Location 1 of `bar` matches multiple values. Code (expect_error(vec_locate_matches(1L, c(1L, 1L), relationship = "many-to-one", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `foo` can match at most 1 value from `bar`. x Location 1 of `foo` matches multiple values. # `relationship` warnings respect argument tags and error call Code (expect_warning(vec_locate_matches(c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", needles_arg = "foo", haystack_arg = "bar", error_call = call("fn")))) Output Warning in `fn()`: Detected an unexpected many-to-many relationship between `foo` and `bar`. x Location 1 of `foo` matches multiple values. x Location 1 of `bar` matches multiple values. Code (expect_warning(vec_locate_matches(c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", needles_arg = "foo", error_call = call("fn")))) Output Warning in `fn()`: Detected an unexpected many-to-many relationship between `foo` and `haystack`. x Location 1 of `foo` matches multiple values. x Location 1 of `haystack` matches multiple values. Code (expect_warning(vec_locate_matches(c(1L, 1L), c(1L, 1L), relationship = "warn-many-to-many", haystack_arg = "bar", error_call = call("fn")))) Output Warning in `fn()`: Detected an unexpected many-to-many relationship between `needles` and `bar`. x Location 1 of `needles` matches multiple values. x Location 1 of `bar` matches multiple values. # `relationship` is validated Code (expect_error(vec_locate_matches(1, 2, relationship = 1.5))) Output Error in `vec_locate_matches()`: ! `relationship` must be a string. Code (expect_error(vec_locate_matches(1, 2, relationship = c("one-to-one", "one-to-many")))) Output Error in `vec_locate_matches()`: ! `relationship` must be a string. Code (expect_error(vec_locate_matches(1, 2, relationship = "x"))) Output Error in `vec_locate_matches()`: ! `relationship` must be one of "none", "one-to-one", "one-to-many", "many-to-one", "many-to-many", or "warn-many-to-many". Code (expect_error(vec_locate_matches(1, 2, relationship = "x", error_call = call( "fn")))) Output Error in `vec_locate_matches()`: ! `relationship` must be one of "none", "one-to-one", "one-to-many", "many-to-one", "many-to-many", or "warn-many-to-many". # `no_match` can error informatively Code (expect_error(vec_locate_matches(1, 2, no_match = "error"))) Output Error in `vec_locate_matches()`: ! Each value of `needles` must have a match in `haystack`. x Location 1 of `needles` does not have a match. Code (expect_error(vec_locate_matches(1, 2, no_match = "error", needles_arg = "foo")) ) Output Error in `vec_locate_matches()`: ! Each value of `foo` must have a match in `haystack`. x Location 1 of `foo` does not have a match. Code (expect_error(vec_locate_matches(1, 2, no_match = "error", needles_arg = "foo", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `foo` must have a match in `haystack`. x Location 1 of `foo` does not have a match. Code (expect_error(vec_locate_matches(1, 2, no_match = "error", needles_arg = "foo", haystack_arg = "bar"))) Output Error in `vec_locate_matches()`: ! Each value of `foo` must have a match in `bar`. x Location 1 of `foo` does not have a match. # errors with the right location on unmatched needles when different nesting containers are present Code (expect_error(vec_locate_matches(df, df2, condition = ">=", no_match = "error")) ) Output Error in `vec_locate_matches()`: ! Each value of `needles` must have a match in `haystack`. x Location 2 of `needles` does not have a match. # `no_match` is validated Code (expect_error(vec_locate_matches(1, 2, no_match = 1.5))) Output Error in `vec_locate_matches()`: ! Can't convert from `no_match` to due to loss of precision. * Locations: 1 Code (expect_error(vec_locate_matches(1, 2, no_match = c(1L, 2L)))) Output Error in `vec_locate_matches()`: ! `no_match` must be length 1, not length 2. Code (expect_error(vec_locate_matches(1, 2, no_match = "x"))) Output Error in `vec_locate_matches()`: ! `no_match` must be either "drop" or "error". Code (expect_error(vec_locate_matches(1, 2, no_match = "x", error_call = call("fn"))) ) Output Error in `vec_locate_matches()`: ! `no_match` must be either "drop" or "error". # `remaining` can error informatively Code (expect_error(vec_locate_matches(1, 2, remaining = "error"))) Output Error in `vec_locate_matches()`: ! Each value of `haystack` must be matched by `needles`. x Location 1 of `haystack` was not matched. Code (expect_error(vec_locate_matches(1, 2, remaining = "error", needles_arg = "foo")) ) Output Error in `vec_locate_matches()`: ! Each value of `haystack` must be matched by `foo`. x Location 1 of `haystack` was not matched. Code (expect_error(vec_locate_matches(1, 2, remaining = "error", needles_arg = "foo", error_call = call("fn")))) Output Error in `fn()`: ! Each value of `haystack` must be matched by `foo`. x Location 1 of `haystack` was not matched. Code (expect_error(vec_locate_matches(1, 2, remaining = "error", needles_arg = "foo", haystack_arg = "bar"))) Output Error in `vec_locate_matches()`: ! Each value of `bar` must be matched by `foo`. x Location 1 of `bar` was not matched. # `remaining` is validated Code (expect_error(vec_locate_matches(1, 2, remaining = 1.5))) Output Error in `vec_locate_matches()`: ! Can't convert from `remaining` to due to loss of precision. * Locations: 1 Code (expect_error(vec_locate_matches(1, 2, remaining = c(1L, 2L)))) Output Error in `vec_locate_matches()`: ! `remaining` must be length 1, not length 2. Code (expect_error(vec_locate_matches(1, 2, remaining = "x"))) Output Error in `vec_locate_matches()`: ! `remaining` must be either "drop" or "error". Code (expect_error(vec_locate_matches(1, 2, remaining = "x", error_call = call("fn"))) ) Output Error in `vec_locate_matches()`: ! `remaining` must be either "drop" or "error". # potential overflow on large output size is caught informatively Code (expect_error(vec_locate_matches(1:1e+07, 1:1e+07, condition = ">="))) Output Error in `vec_locate_matches()`: ! Match procedure results in an allocation larger than 2^31-1 elements. i Attempted allocation size was 50000005000000. i This is an internal error that was detected in the vctrs package. Please report it at with a reprex () and the full backtrace. vctrs/tests/testthat/_snaps/slice.md0000644000176200001440000001072215157321015017323 0ustar liggesusers# vec_slice throws error with non-vector subscripts Code (expect_error(vec_slice(1:3, Sys.Date()), class = "vctrs_error_subscript_type")) Output Error in `vec_slice()`: ! Can't subset elements with `i`. x `i` must be logical, numeric, or character, not a object. Code (expect_error(vec_slice(1:3, matrix(TRUE, nrow = 1)), class = "vctrs_error_subscript_type") ) Output Error in `vec_slice()`: ! Can't subset elements with `i`. x Subscript `i` must be a simple vector, not a matrix. # can't index beyond the end of a vector Code (expect_error(vec_slice(1:2, 3L), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't subset elements past the end. i Location 3 doesn't exist. i There are only 2 elements. Code (expect_error(vec_slice(1:2, -3L), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't negate elements past the end. i Location 3 doesn't exist. i There are only 2 elements. # can slice with double indices Code (expect_error(vec_as_location(2^31, 3L), class = "vctrs_error_subscript_type")) Output Error: ! Can't subset elements with `2^31`. x Can't convert from `2^31` to due to loss of precision. # vec_slice() works with Altrep classes with custom extract methods Code vec_slice(x, idx) Condition Error in `vec_slice()`: ! Can't subset elements past the end. i Location 16 doesn't exist. i There are only 15 elements. # Unnamed vector with character subscript is caught Code vec_slice(1:3, letters[1]) Condition Error in `vec_slice()`: ! Can't use character names to index an unnamed vector. # Negative subscripts are checked Code vec_slice(1:3, -c(1L, NA)) Condition Error in `vec_slice()`: ! Can't subset elements with `i`. x Negative locations can't have missing values. i Subscript `i` has a missing value at location 2. --- Code vec_slice(1:3, c(-1L, 1L)) Condition Error in `vec_slice()`: ! Can't subset elements with `i`. x Negative and positive locations can't be mixed. i Subscript `i` has a positive value at location 2. # oob error messages are properly constructed Code vec_slice(c(bar = 1), "foo") Condition Error in `vec_slice()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. --- Code vec_slice(letters, c(100, 1000)) Condition Error in `vec_slice()`: ! Can't subset elements past the end. i Locations 100 and 1000 don't exist. i There are only 26 elements. --- Code vec_slice(letters, c(1, 100:103, 2, 104:110)) Condition Error in `vec_slice()`: ! Can't subset elements past the end. i Locations 100, 101, 102, ..., 109, and 110 don't exist. i There are only 26 elements. --- Code vec_slice(set_names(letters), c("foo", "bar")) Condition Error in `vec_slice()`: ! Can't subset elements that don't exist. x Elements `foo` and `bar` don't exist. --- Code vec_slice(set_names(letters), toupper(letters)) Condition Error in `vec_slice()`: ! Can't subset elements that don't exist. x Elements `A`, `B`, `C`, `D`, `E`, etc. don't exist. # vec_init() validates `n` Code (expect_error(vec_init(1L, 1.5))) Output Error in `vec_init()`: ! `n` must be a whole number, not a fractional number. Code (expect_error(vec_init(1L, c(1, 2)))) Output Error in `vec_init()`: ! `n` must be a single number, not a double vector. Code (expect_error(vec_init(1L, -1L))) Output Error in `vec_init()`: ! `n` must be a positive number or zero. Code (expect_error(vec_init(1L, NA))) Output Error in `vec_init()`: ! `n` must be a single number, not `NA`. Code (expect_error(vec_init(1L, NA_integer_))) Output Error in `vec_init()`: ! `n` must be a single number, not an integer `NA`. vctrs/tests/testthat/_snaps/type-unspecified.md0000644000176200001440000000013415157321020021471 0ustar liggesusers# has useful print method Code unspecified() Output [0] vctrs/tests/testthat/_snaps/slice-assign.md0000644000176200001440000001550615157321015020612 0ustar liggesusers# assign throws error with non-vector `value` Code vec_assign(x, 1L, NULL) Condition Error in `vec_assign()`: ! Input must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_assign(x, 1L, NULL, slice_value = TRUE) Condition Error in `vec_assign()`: ! Input must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_assign(x, 1L, NULL, value_arg = "foo") Condition Error in `vec_assign()`: ! `foo` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_assign(x, 1L, NULL, slice_value = TRUE, value_arg = "foo") Condition Error in `vec_assign()`: ! `foo` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_assign(x, 1L, environment(), value_arg = "foo") Condition Error in `vec_assign()`: ! `foo` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_assign(x, 1L, environment(), slice_value = TRUE, value_arg = "foo") Condition Error in `vec_assign()`: ! `foo` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # can assign using logical index Code (expect_error(vec_assign(x, c(TRUE, FALSE, TRUE), 5), class = "vctrs_error_subscript_size") ) Output Error: ! Can't assign elements. x Logical subscript must be size 1 or 2, not 3. --- Code (expect_error(vec_assign(x, c(TRUE, FALSE, TRUE), 5, slice_value = TRUE), class = "vctrs_error_subscript_size")) Output Error: ! Can't assign elements. x Logical subscript must be size 1 or 2, not 3. --- Code (expect_error(vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ]), class = "vctrs_error_subscript_size") ) Output Error: ! Can't assign elements. x Logical subscript must be size 1 or 32, not 2. --- Code (expect_error(vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ], slice_value = TRUE), class = "vctrs_error_subscript_size")) Output Error: ! Can't assign elements. x Logical subscript must be size 1 or 32, not 2. # assign `value` size depends on `slice_value` Code vec_assign(x, c(TRUE, NA, FALSE), c(1, 2, 3)) Condition Error in `vec_assign()`: ! Can't recycle input of size 3 to size 2. --- Code vec_assign(x, c(TRUE, NA, FALSE), c(1, 2), slice_value = TRUE) Condition Error in `vec_assign()`: ! Can't recycle input of size 2 to size 3. # can use names to assign with a named object Code vec_assign(x, c("c", "a"), c(4, 5, 6)) Condition Error in `vec_assign()`: ! Can't recycle input of size 3 to size 2. --- Code vec_assign(x, c("c", "a"), c(4, 5), slice_value = TRUE) Condition Error in `vec_assign()`: ! Can't recycle input of size 2 to size 3. # `vec_assign()` requires recyclable value Code (expect_error(vec_assign(1:3, 1:2, 1:3), class = "vctrs_error_recycle_incompatible_size") ) Output Error in `vec_assign()`: ! Can't recycle input of size 3 to size 2. --- Code (expect_error(vec_assign(1:3, 1:2, 1:2, slice_value = TRUE), class = "vctrs_error_recycle_incompatible_size") ) Output Error in `vec_assign()`: ! Can't recycle input of size 2 to size 3. # logical subscripts must match size of indexed vector Code (expect_error(vec_assign(1:2, c(TRUE, FALSE, TRUE), 5), class = "vctrs_error_subscript_size") ) Output Error: ! Can't assign elements. x Logical subscript must be size 1 or 2, not 3. --- Code (expect_error(vec_assign(mtcars, c(TRUE, FALSE), mtcars[1, ]), class = "vctrs_error_subscript_size") ) Output Error: ! Can't assign elements. x Logical subscript must be size 1 or 32, not 2. # must assign existing elements Code (expect_error(vec_assign(1:3, 5, 10), class = "vctrs_error_subscript_oob")) Output Error: ! Can't assign to elements past the end. i Location 5 doesn't exist. i There are only 3 elements. Code (expect_error(vec_assign(1:3, "foo", 10), "unnamed vector")) Output Error in `vec_assign()`: ! Can't use character names to index an unnamed vector. Code (expect_error(vec_slice(letters, -100) <- "foo", class = "vctrs_error_subscript_oob") ) Output Error: ! Can't negate elements past the end. i Location 100 doesn't exist. i There are only 26 elements. Code (expect_error(vec_assign(set_names(letters), "foo", "bar"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't assign to elements that don't exist. x Element `foo` doesn't exist. # must assign with proper negative locations Code (expect_error(vec_assign(1:3, c(-1, 1), 1:2), class = "vctrs_error_subscript_type") ) Output Error: ! Can't assign elements. x Negative and positive locations can't be mixed. i Subscript has a positive value at location 2. Code (expect_error(vec_assign(1:3, c(-1, NA), 1:2), class = "vctrs_error_subscript_type") ) Output Error: ! Can't assign elements. x Negative locations can't have missing values. i Subscript has a missing value at location 2. # `vec_assign()` error args can be overridden Code (expect_error(vec_assign(1:2, 1L, "x", x_arg = "foo", value_arg = "bar"), class = "vctrs_error_incompatible_type")) Output Error in `vec_assign()`: ! Can't convert `bar` to match type of `foo` . Code (expect_error(vec_assign(1:2, 1L, 1:2, value_arg = "bar"), class = "vctrs_error_recycle_incompatible_size") ) Output Error in `vec_assign()`: ! Can't recycle `bar` (size 2) to size 1. vctrs/tests/testthat/_snaps/case-when.md0000644000176200001440000002071615157320774020115 0ustar liggesusers# `conditions` inputs can be size zero Code vec_case_when(list(logical()), list(1:2)) Condition Error in `vec_case_when()`: ! Can't recycle `values[[1]]` (size 2) to size 0. # `values` are cast to their common type Code vec_case_when(list(FALSE, TRUE), list(1, "x")) Condition Error in `vec_case_when()`: ! Can't combine `values[[1]]` and `values[[2]]` . # `values` must be size 1 or same size as the `conditions` Code vec_case_when(list(c(TRUE, FALSE, TRUE, TRUE)), list(1:3)) Condition Error in `vec_case_when()`: ! Can't recycle `values[[1]]` (size 3) to size 4. # `default` must be size 1 or same size as `conditions` (exact same as any other `values` input) Code vec_case_when(list(FALSE), list(1L), default = 2:3) Condition Error in `vec_case_when()`: ! Can't recycle `default` (size 2) to size 1. # `default_arg` can be customized Code vec_case_when(list(FALSE), list(1L), default = 2:3, default_arg = "foo") Condition Error in `vec_case_when()`: ! Can't recycle `foo` (size 2) to size 1. --- Code vec_case_when(list(FALSE), list(1L), default = "x", default_arg = "foo") Condition Error in `vec_case_when()`: ! Can't combine and `foo` . # `conditions_arg` is validated Code vec_case_when(list("x"), list(1), conditions_arg = 1) Condition Error in `vec_case_when()`: ! `arg` must be a string. # `values_arg` is validated Code vec_case_when(list(TRUE), list(lm(1 ~ 1)), values_arg = 1) Condition Error in `vec_case_when()`: ! `arg` must be a string. # `default_arg` is validated Code vec_case_when(list(TRUE), list(1), default = "x", default_arg = 1) Condition Error in `vec_case_when()`: ! `arg` must be a string. # `conditions` must all be the same size Code vec_case_when(list(c(TRUE, FALSE), TRUE), list(1, 2)) Condition Error in `vec_case_when()`: ! `conditions[[2]]` must have size 2, not size 1. --- Code vec_case_when(list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2)) Condition Error in `vec_case_when()`: ! `conditions[[2]]` must have size 2, not size 3. # `conditions` must be logical (and aren't cast to logical!) Code vec_case_when(list(1), list(2)) Condition Error in `vec_case_when()`: ! `conditions[[1]]` must be a logical vector, not the number 1. --- Code vec_case_when(list(TRUE, 3.5), list(2, 4)) Condition Error in `vec_case_when()`: ! `conditions[[2]]` must be a logical vector, not the number 3.5. --- Code vec_case_when(list(x), list(1), default = 2) Condition Error in `vec_case_when()`: ! `conditions[[1]]` must be a logical vector, not a object. # `conditions` can't be arrays (#6862) Code vec_case_when(list(x), list(y)) Condition Error in `vec_case_when()`: ! `conditions[[1]]` must be a logical vector, not a logical matrix. --- Code vec_case_when(list(x), list(y), size = 3) Condition Error in `vec_case_when()`: ! `conditions[[1]]` must be a logical vector, not a logical matrix. --- Code vec_case_when(list(x), list(y)) Condition Error in `vec_case_when()`: ! `conditions[[1]]` must be a logical vector, not a logical 1D array. # `size` overrides the `conditions` sizes Code vec_case_when(list(TRUE), list(1), size = 5) Condition Error in `vec_case_when()`: ! `conditions[[1]]` must have size 5, not size 1. --- Code vec_case_when(list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2), size = 2) Condition Error in `vec_case_when()`: ! `conditions[[2]]` must have size 2, not size 3. # `ptype` overrides the `values` types Code vec_case_when(list(FALSE, TRUE), list(1, 2), ptype = character()) Condition Error in `vec_case_when()`: ! Can't convert `values[[2]]` to . # number of `conditions` and `values` must be the same Code vec_case_when(list(TRUE), list()) Condition Error in `vec_case_when()`: ! `conditions` must have size 0, not size 1. --- Code vec_case_when(list(TRUE, TRUE), list(1)) Condition Error in `vec_case_when()`: ! `conditions` must have size 1, not size 2. # dots must be empty Code vec_case_when(list(TRUE), list(1), 2) Condition Error in `vec_case_when()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? # `conditions` must be a list Code vec_case_when(1, list(2)) Condition Error in `vec_case_when()`: ! `conditions` must be a list, not the number 1. # `values` must be a list Code vec_case_when(list(TRUE), 1) Condition Error in `vec_case_when()`: ! `values` must be a list, not the number 1. # named inputs show up in the error message Code vec_case_when(list(x = 1.5), list(1)) Condition Error in `vec_case_when()`: ! `conditions$x` must be a logical vector, not the number 1.5. --- Code vec_case_when(list(x = 1.5), list(1), conditions_arg = "foo") Condition Error in `vec_case_when()`: ! `foo$x` must be a logical vector, not the number 1.5. --- Code vec_case_when(list(x = 1.5), list(1), conditions_arg = "") Condition Error in `vec_case_when()`: ! `x` must be a logical vector, not the number 1.5. --- Code vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2)) Condition Error in `vec_case_when()`: ! `conditions$y` must have size 1, not size 2. --- Code vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "foo") Condition Error in `vec_case_when()`: ! `foo$y` must have size 1, not size 2. --- Code vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "") Condition Error in `vec_case_when()`: ! `y` must have size 1, not size 2. --- Code vec_case_when(list(TRUE, FALSE), list(1, x = "y")) Condition Error in `vec_case_when()`: ! Can't combine `values[[1]]` and `values$x` . --- Code vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "foo") Condition Error in `vec_case_when()`: ! Can't combine `foo[[1]]` and `foo$x` . --- Code vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "") Condition Error in `vec_case_when()`: ! Can't combine `..1` and `x` . --- Code vec_case_when(list(TRUE), list(NULL)) Condition Error in `vec_case_when()`: ! `values[[1]]` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_case_when(list(TRUE), list(x = NULL)) Condition Error in `vec_case_when()`: ! `values$x` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_case_when(list(TRUE), list(NULL), values_arg = "foo") Condition Error in `vec_case_when()`: ! `foo[[1]]` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_case_when(list(TRUE), list(x = NULL), values_arg = "foo") Condition Error in `vec_case_when()`: ! `foo$x` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_case_when(list(TRUE), list(NULL), values_arg = "") Condition Error in `vec_case_when()`: ! `..1` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_case_when(list(TRUE), list(x = NULL), values_arg = "") Condition Error in `vec_case_when()`: ! `x` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # `unmatched` errors are correct Code vec_case_when(conditions, values, unmatched = "error") Condition Error in `vec_case_when()`: ! Each location must be matched. x Locations 4, 5, 6, and 7 are unmatched. vctrs/tests/testthat/_snaps/size.md0000644000176200001440000000676715157321013017212 0ustar liggesusers# vec_as_short_length() checks inputs Code (expect_error(my_function(-1))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a positive number or zero. Code (expect_error(my_function(1:2))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a single number, not an integer vector. Code (expect_error(my_function(1.5))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a whole number, not a fractional number. Code (expect_error(my_function(NA))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a single number, not `NA`. Code (expect_error(my_function(na_int))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a single number, not an integer `NA`. Code (expect_error(my_function("foo"))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a single number, not the string "foo". Code (expect_error(my_function(foobar(1:2)))) Output Error in `vec_as_short_length()`: ! `my_arg` must be a single number, not `NULL`. Code (expect_error(my_function(.Machine$double.xmax))) Output Error in `vec_as_short_length()`: ! `my_arg` is too large a number. # vec_as_short_length() has a special error about long vector support Code (expect_error(my_function(.Machine$integer.max + 1))) Output Error in `vec_as_short_length()`: ! `my_arg` is too large a number and long vectors are not supported. # vec_size_common() checks inputs Code (expect_error(vec_size_common(.size = "foo"))) Output Error in `vec_size_common()`: ! `.size` must be a single number, not the string "foo". Code (expect_error(vec_size_common(.size = 1:2))) Output Error in `vec_size_common()`: ! `.size` must be a single number, not an integer vector. # vec_size_common() mentions `arg` in errors Code (expect_error(my_function(this_arg = 1:2, that_arg = int()))) Output Error in `my_function()`: ! Can't recycle `my_arg$this_arg` (size 2) to match `my_arg$that_arg` (size 0). # `.absent` must be supplied when `...` is empty Code (expect_error(vec_size_common(.absent = NULL))) Output Error in `vec_size_common()`: ! `.absent` must be supplied when `...` is empty. # `.absent` must be a length 1 integer if provided Code (expect_error(vec_size_common(.absent = 1), "must be a single integer")) Output Error in `vec_size_common()`: ! `.absent` must be a single integer. Code (expect_error(vec_size_common(.absent = c(1L, 2L)), "must be a single integer")) Output Error in `vec_size_common()`: ! `.absent` must be a single integer. # argument tags are forwarded Code vec_size_common(1:2, 1, 1:4) Condition Error: ! Can't recycle `..1` (size 2) to match `..3` (size 4). --- Code vec_size_common(foo = 1:2, 1, bar = 1:4) Condition Error: ! Can't recycle `foo` (size 2) to match `bar` (size 4). vctrs/tests/testthat/_snaps/type-data-table.md0000644000176200001440000000040115157321015021172 0ustar liggesusers# data table has formatting methods Code dt <- data.table(x = 1, y = 2, z = 3) vec_ptype_abbr(dt) Output [1] "dt[,3]" Code vec_ptype_full(dt) Output [1] "data.table<\n x: double\n y: double\n z: double\n>" vctrs/tests/testthat/_snaps/type.md0000644000176200001440000001005615157321021017202 0ustar liggesusers# output tests Code vec_ptype_show() Output Prototype: NULL --- Code vec_ptype_show(integer()) Output Prototype: integer --- Code vec_ptype_show(integer(), double()) Output Prototype: 0. ( , ) = 1. ( , ) = --- Code vec_ptype_show(logical(), integer(), double()) Output Prototype: 0. ( , ) = 1. ( , ) = 2. ( , ) = # vec_ptype_common() includes index in argument tag Code vec_ptype_common(df1, df2) Condition Error: ! Can't combine `..1$x$y$z` and `..2$x$y$z` . --- Code vec_ptype_common(df1, df1, df2) Condition Error: ! Can't combine `..1$x$y$z` and `..3$x$y$z` . --- Code vec_ptype_common(large_df1, large_df2) Condition Error: ! Can't combine `..1$foobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobar$y$z` and `..2$foobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobar$y$z` . --- Code vec_ptype_common(foo = TRUE, bar = "foo") Condition Error: ! Can't combine `foo` and `bar` . --- Code vec_ptype_common(foo = TRUE, baz = FALSE, bar = "foo") Condition Error: ! Can't combine `foo` and `bar` . --- Code vec_ptype_common(foo = df1, bar = df2) Condition Error: ! Can't combine `foo$x$y$z` and `bar$x$y$z` . --- Code vec_ptype_common(df1, df1, bar = df2) Condition Error: ! Can't combine `..1$x$y$z` and `bar$x$y$z` . --- Code vec_ptype_common(TRUE, !!!list(1, "foo")) Condition Error: ! Can't combine `..2` and `..3` . --- Code vec_ptype_common(TRUE, !!!list(1, 2), "foo") Condition Error: ! Can't combine `..2` and `..4` . --- Code vec_ptype_common(1, !!!list(TRUE, FALSE), "foo") Condition Error: ! Can't combine `..1` and `..4` . --- Code vec_ptype_common(foo = TRUE, !!!list(FALSE, FALSE), bar = "foo") Condition Error: ! Can't combine `foo` and `bar` . --- Code vec_ptype_common(foo = TRUE, !!!list(bar = 1, "foo")) Condition Error: ! Can't combine `bar` and `..3` . --- Code vec_ptype_common(foo = TRUE, !!!list(bar = "foo")) Condition Error: ! Can't combine `foo` and `bar` . --- Code vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), baz = "chr") Condition Error: ! Can't combine `foo` and `baz` . --- Code vec_ptype_common(foo = TRUE, !!!list(bar = FALSE), !!!list(baz = "chr")) Condition Error: ! Can't combine `foo` and `baz` . # vec_ptype_common() handles spliced names consistently (#1570) Code vec_ptype_common(a = "foo", b = "bar", y = NULL, z = 1) Condition Error: ! Can't combine `a` and `z` . Code vec_ptype_common(!!!args1, !!!args2) Condition Error: ! Can't combine `a` and `z` . Code vec_ptype_common(!!!args1, "{y_name}" := NULL, "{z_name}" := 1) Condition Error: ! Can't combine `a` and `z` . # `.finalise` is validated Code vec_ptype_common(.finalise = 1) Condition Error in `vec_ptype_common()`: ! `.finalise` must be `TRUE` or `FALSE`. --- Code vec_ptype_common_params(.finalise = 1) Condition Error in `vec_ptype_common_params()`: ! `.finalise` must be `TRUE` or `FALSE`. vctrs/tests/testthat/_snaps/error-call.md0000644000176200001440000003013715157320777020306 0ustar liggesusers# failing common type reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't combine `2` and `chr()` . # failing cast reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert `2` to . --- Code (expect_error(my_function(df1, df2))) Output Error in `my_function()`: ! Can't convert `lhs$y` to match type of `y` . --- Code (expect_error(my_function(df1, df2))) Output Error in `my_function()`: ! Can't convert `lhs$y` to match type of `y` . # lossy cast reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert from `2` to due to loss of precision. * Locations: 1 # failing common size reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't recycle input of size 2 to size 10. --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't recycle `..1` (size 2) to match `..2` (size 10). # unsupported error reports correct error call Code (expect_error(my_function())) Output Error in `dim<-`: ! `dim<-.vctrs_vctr()` not supported. --- Code (expect_error(my_function())) Output Error in `median()`: ! `median.vctrs_vctr()` not implemented. # scalar error reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! `foobar()` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # size error reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! `1:2` must have size 1, not size 2. # bare casts report correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert from `1.5` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert from `1.5` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert from `2L` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert `matrix(TRUE)` to . Can't decrease dimensionality from 2 to 1. # base S3 casts report correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't convert from `"a"` to > due to loss of generality. * Locations: 1 # names validation reports correct error call Code (expect_error(my_function())) Output Error in `my_function()`: ! Names can't be empty. x Empty name found at location 2. --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Names must be unique. x These names are duplicated: * "x" at locations 1 and 2. i Use argument `repair` to specify repair strategy. --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..." at location 1. # subscript validation reports correct error calls Code (expect_error(my_function())) Output Error in `vctrs::num_as_location()`: ! `missing` must be one of "propagate", "remove", or "error". --- Code (expect_error(my_function())) Output Error in `my_function()`: ! Can't subset elements past the end. i Location 10 doesn't exist. i There are only 2 elements. --- Code (expect_error(my_function(1.5))) Output Error in `my_function()`: ! Can't subset elements with `my_arg`. x Can't convert from `my_arg` to due to loss of precision. --- Code (expect_error(my_function(1.5))) Output Error in `my_function()`: ! Can't subset elements. x Can't convert from to due to loss of precision. --- Code (expect_error(my_function(list()))) Output Error in `my_function()`: ! Can't subset elements with `my_arg`. x `my_arg` must be logical, numeric, or character, not an empty list. --- Code (expect_error(my_function(1.5))) Output Error in `vec_as_location()`: ! Can't convert from `n` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function(NA))) Output Error in `my_function()`: ! Can't subset elements. x Subscript can't contain missing values. x It has a missing value at location 1. # `vec_ptype()` reports correct error call Code (expect_error(my_function(env()))) Output Error in `my_function()`: ! Input must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. Code (expect_error(my_function(foobar(list())))) Output Error in `my_function()`: ! Input must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # `vec_slice()` uses `error_call` Code (expect_error(my_function(env(), 1))) Output Error in `my_function()`: ! `x` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. Code (expect_error(my_function(1, 2))) Output Error in `my_function()`: ! Can't subset elements past the end. i Location 2 doesn't exist. i There is only 1 element. # vec_slice() reports self in error context Code (expect_error(vec_slice(foobar(list()), 1))) Output Error in `vec_slice()`: ! `x` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. Code (expect_error(vec_slice(list(), env()))) Output Error in `vec_slice()`: ! Can't subset elements with `i`. x `i` must be logical, numeric, or character, not an environment. # list_sizes() reports error context Code (expect_error(list_sizes(foobar(list())))) Output Error in `list_sizes()`: ! `x` must be a list, not a object. Code (expect_error(list_sizes(list(env())))) Output Error in `list_sizes()`: ! `x[[1]]` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. Code (expect_error(list_sizes(list(1, 2, env())))) Output Error in `list_sizes()`: ! `x[[3]]` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. Code (expect_error(list_sizes(list(1, 2, foo = env())))) Output Error in `list_sizes()`: ! `x$foo` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # vec_size() reports error context Code (expect_error(vec_size(env()))) Output Error in `vec_size()`: ! `x` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # vec_cast_common() reports error context Code (expect_error(my_function(my_arg = 1.5, .to = int()))) Output Error in `my_function()`: ! Can't convert from `my_arg` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function(my_arg = 1.5, .to = int(), .arg = "my_arg"))) Output Error in `my_function()`: ! Can't convert from `my_arg$my_arg` to due to loss of precision. * Locations: 1 --- Code (expect_error(my_function(this_arg = 1, that_arg = "foo", .arg = "my_arg"))) Output Error in `my_function()`: ! Can't combine `my_arg$this_arg` and `my_arg$that_arg` . --- Code (expect_error(my_function(1, "foo", .arg = "my_arg"))) Output Error in `my_function()`: ! Can't combine `my_arg[[1]]` and `my_arg[[2]]` . --- Code (expect_error(my_function(this_arg = x, that_arg = y))) Output Error in `my_function()`: ! Can't combine `this_arg$x` and `that_arg$x` . # vec_ptype_common() reports error context Code (expect_error(my_function(this_arg = 1, that_arg = "foo"))) Output Error in `my_function()`: ! Can't combine `this_arg` and `that_arg` . --- Code (expect_error(my_function(this_arg = 1, that_arg = "foo", .arg = "my_arg"))) Output Error in `my_function()`: ! Can't combine `my_arg$this_arg` and `my_arg$that_arg` . --- Code (expect_error(my_function(1, "foo", .arg = "my_arg"))) Output Error in `my_function()`: ! Can't combine `my_arg[[1]]` and `my_arg[[2]]` . vctrs/tests/testthat/_snaps/list-combine.md0000644000176200001440000015116215157321010020610 0ustar liggesusers# `list_combine()` works with homogeneous fallback in `default` Code list_combine(list(foobar(1), 1), indices = list(1, 2), size = 2) Condition Error in `list_combine()`: ! Can't combine `x[[1]]` and `x[[2]]` . --- Code list_combine(list(foobar(1)), indices = list(1), size = 2, default = 1) Condition Error in `list_combine()`: ! Can't combine and `default` . # list_combine() fails with complex foreign S3 classes Code x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") list_combine(list(x, y), indices = list(1, 2), size = 2) Condition Error in `list_combine()`: ! Can't combine `x[[1]]` and `x[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . --- Code x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") list_combine(list(x, y), indices = list(1, 2), size = 2, error_call = call( "foo"), x_arg = "arg") Condition Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . --- Code x <- structure(foobar(1), attr_foo = "foo") default <- structure(foobar(2), attr_foo = "bar") list_combine(list(x), indices = list(1), size = 2, default = default) Condition Error in `list_combine()`: ! Can't combine and `default` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . --- Code x <- structure(foobar(1), attr_foo = "foo") default <- structure(foobar(2), attr_foo = "bar") list_combine(list(x), indices = list(1), size = 2, default = default, default_arg = "d") Condition Error in `list_combine()`: ! Can't combine and `d` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # list_combine() fails with complex foreign S4 classes Code joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") (expect_error(list_combine(list(joe, jane), indices = list(1:2, 3), size = 3), class = "vctrs_error_incompatible_type")) Output Error in `list_combine()`: ! Can't combine `x[[1]]` and `x[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . Code (expect_error(list_combine(list(joe, jane), indices = list(1:2, 3), size = 3, error_call = call("foo"), x_arg = "arg"), class = "vctrs_error_incompatible_type") ) Output Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # list_combine() falls back to c() if S3 method is available Code (expect_error(list_combine(list(foobar(1), foobar(2)), indices = list(c(1, 3), integer()), size = 2), class = "vctrs_error_subscript_oob")) Output Error: ! Can't subset elements past the end. i Location 3 doesn't exist. i There are only 2 elements. --- Code x <- list(foobar(1:2)) indices <- list(1:3) (expect_error(list_combine(x, indices = indices, size = 3))) Output Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 3. Code (expect_error(list_combine(x, indices = indices, size = 3, x_arg = "arg", error_call = call("foo")))) Output Error in `foo()`: ! Can't recycle `arg[[1]]` (size 2) to size 3. # list_combine() falls back for S4 classes with a registered c() method Code (expect_error(list_combine(list(joe, 1, jane), indices = list(c(1, 2), 3, 4), size = 4), class = "vctrs_error_incompatible_type")) Output Error in `list_combine()`: ! Can't combine `x[[1]]` and `x[[2]]` . # can ignore names in `list_combine()` by providing a `zap()` name-spec (#232) Code (expect_error(list_combine(list(a = c(b = 1:2)), indices = list(1:2), size = 2)) ) Output Error in `list_combine()`: ! Can't merge the outer name `a` with a vector of length > 1. Please supply a `.name_spec` specification. Code (expect_error(list_combine(list(a = c(b = 1:2)), indices = list(1:2), size = 2, error_call = call("foo")))) Output Error in `list_combine()`: ! Can't merge the outer name `a` with a vector of length > 1. Please supply a `.name_spec` specification. --- Code x <- list(a = c(b = c("a", "b")), b = 3L) (expect_error(list_combine(x, indices = list(1:2, 3), size = 3, name_spec = zap()), class = "vctrs_error_incompatible_type")) Output Error in `list_combine()`: ! Can't combine `x$a` and `x$b` . Code x <- list(a = c(foo = 1:2), b = c(bar = "")) (expect_error(list_combine(x, indices = list(2:1, 3), size = 3, name_spec = zap()), class = "vctrs_error_incompatible_type")) Output Error in `list_combine()`: ! Can't combine `x$a` and `x$b` . # list_combine() fails if foreign classes are not homogeneous and there is no c() method Code list_combine(list(x), indices = list(c(1, 2)), size = 3, default = default) Condition Error in `list_combine()`: ! Can't combine and `default` . # recycling error indices are correct even with `NULL` removal Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[3]]` (size 2) to size 3. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[3]]` (size 2) to size 3. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[3]]` (size 2) to size 3. # `x_arg` works Code list_combine(list(1, "2"), indices = list(1, 2), size = 2, x_arg = "xs") Condition Error in `list_combine()`: ! Can't combine `xs[[1]]` and `xs[[2]]` . --- Code list_combine(list(1, 2), indices = list(1, 2, 3), size = 2, x_arg = "xs") Condition Error in `list_combine()`: ! `indices` must have size 2, not size 3. # `indices_arg` works Code list_combine(list(1, 2), indices = 1, size = 2, indices_arg = "i") Condition Error in `list_combine()`: ! `i` must be a list, not the number 1. --- Code list_combine(list(1, 2), indices = list(1, 2, 3), size = 2, indices_arg = "i") Condition Error in `list_combine()`: ! `i` must have size 2, not size 3. # `...` must be empty Code list_combine(list(1, 2), indices = list(1, 2), size = 2, "foo") Condition Error in `list_combine()`: ! `...` must be empty. x Problematic argument: * ..1 = "foo" i Did you forget to name an argument? # list_combine() `size` type is validated Code list_combine(list(1), indices = list(1), size = "x") Condition Error in `list_combine()`: ! `size` must be a scalar integer or double. # list_combine() `indices` are validated against `size` Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements past the end. i Location 3 doesn't exist. i There are only 2 elements. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements past the end. i Location 3 doesn't exist. i There are only 2 elements. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements past the end. i Location 3 doesn't exist. i There are only 2 elements. # list_combine() `default` vector check is done Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! `d` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! `d` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! `d` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # list_combine() `default` size check is done Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `d` (size 2) to size 1. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `d` (size 2) to size 1. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `d` (size 2) to size 1. # list_combine() `default` is taken into account when computing `ptype` Code list_combine(list(x = 1), indices = list(1), size = 2, default = "a", default_arg = "d") Condition Error in `list_combine()`: ! Can't combine and `d` . --- Code list_combine(list(x = 1L), indices = list(1), size = 2, default = 1.5, default_arg = "d", ptype = integer()) Condition Error in `list_combine()`: ! Can't convert from `d` to due to loss of precision. * Locations: 1 # list_combine() `unmatched = 'error'` errors with unmatched `indices` when `size` is used Code list_combine(list(1, 1), indices = list(1, 1), size = 2, unmatched = "error") Condition Error in `list_combine()`: ! Each location must be matched. x Location 2 is unmatched. --- Code list_combine(list(1, 1), indices = list(1, NA), size = 2, unmatched = "error") Condition Error in `list_combine()`: ! Each location must be matched. x Location 2 is unmatched. --- Code list_combine(list(1:9, 1:9), indices = list(c(TRUE, FALSE, NA, TRUE, FALSE, NA, TRUE, FALSE, NA), c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, NA, NA, NA)), size = 9, unmatched = "error") Condition Error in `list_combine()`: ! Each location must be matched. x Locations 5, 6, 8, and 9 are unmatched. --- Code list_combine(list(1, 3), indices = list(1, 3), size = 3, unmatched = "error") Condition Error in `list_combine()`: ! Each location must be matched. x Location 2 is unmatched. --- Code list_combine(list(1, 1), indices = list(c(TRUE, FALSE), c(TRUE, FALSE)), size = 2, unmatched = "error") Condition Error in `list_combine()`: ! Each location must be matched. x Location 2 is unmatched. --- Code list_combine(list(), indices = list(), size = 2, unmatched = "error") Condition Error in `list_combine()`: ! Each location must be matched. x Locations 1 and 2 are unmatched. # list_combine() `unmatched = 'error'` errors pluralize correctly Code list_combine(list(1, 3), indices = list(1, 3), size = 3, unmatched = "error") Condition Error in `list_combine()`: ! Each location must be matched. x Location 2 is unmatched. --- Code list_combine(list(1, 3), indices = list(1, 3), size = 4, unmatched = "error") Condition Error in `list_combine()`: ! Each location must be matched. x Locations 2 and 4 are unmatched. --- Code list_combine(list(1, 3), indices = list(1, 3), size = 100, unmatched = "error") Condition Error in `list_combine()`: ! Each location must be matched. x Locations 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ..., 99, and 100 are unmatched. # list_combine() `unmatched = 'error'` can't be set when `default` is also set Code list_combine(list(1), indices = list(1), default = 1, size = 1, unmatched = "error") Condition Error in `list_combine()`: ! Can't set `default` when `unmatched = "error"`. --- Code list_combine(list(1), indices = list(1), default = 1, size = 1, unmatched = "error", default_arg = ".default", error_call = quote(foo())) Condition Error in `foo()`: ! Can't set `.default` when `unmatched = "error"`. # list_combine() `unmatched` is validated Code list_combine(list(1), indices = list(1), size = 1, unmatched = "e") Condition Error in `list_combine()`: ! `unmatched` must be either "default" or "error", not "e". --- Code list_combine(list(1), indices = list(1), size = 1, unmatched = c("a", "b")) Condition Error in `list_combine()`: ! `unmatched` must be a string, not a character vector. --- Code list_combine(list(1), indices = list(1), size = 1, unmatched = NA_character_) Condition Error in `list_combine()`: ! `unmatched` must be a string, not a character `NA`. --- Code list_combine(list(1), indices = list(1), size = 1, unmatched = "e", error_call = quote( foo())) Condition Error in `foo()`: ! `unmatched` must be either "default" or "error", not "e". --- Code list_combine(list(1), indices = list(1), size = 1, unmatched = c("a", "b"), error_call = quote(foo())) Condition Error in `foo()`: ! `unmatched` must be a string, not a character vector. # list_combine() `multiple` is validated Code list_combine(list(1), indices = list(1), size = 1, multiple = "a") Condition Error in `list_combine()`: ! `multiple` must be either "last" or "first", not "a". --- Code list_combine(list(1), indices = list(1), size = 1, multiple = c("a", "b")) Condition Error in `list_combine()`: ! `multiple` must be a string, not a character vector. --- Code list_combine(list(1), indices = list(1), size = 1, multiple = NA_character_) Condition Error in `list_combine()`: ! `multiple` must be a string, not a character `NA`. --- Code list_combine(list(1), indices = list(1), size = 1, multiple = "a", error_call = quote( foo())) Condition Error in `foo()`: ! `multiple` must be either "last" or "first", not "a". --- Code list_combine(list(1), indices = list(1), size = 1, multiple = c("a", "b"), error_call = quote(foo())) Condition Error in `foo()`: ! `multiple` must be a string, not a character vector. # `NA` indices are considered unmatched locations Code list_combine(x = list(1, 2:3), indices = list(1, c(NA, 3)), size = 3, unmatched = "error") Condition Error in `list_combine()`: ! Each location must be matched. x Location 2 is unmatched. --- Code list_combine(x = list(1, 2:3), indices = list(c(TRUE, FALSE, FALSE), c(FALSE, NA, TRUE)), size = 3, unmatched = "error") Condition Error in `list_combine()`: ! Each location must be matched. x Location 2 is unmatched. # `x` must be a list Code list_combine(1, indices = list(1), size = 1) Condition Error in `list_combine()`: ! `x` must be a list, not the number 1. --- Code list_combine(1, indices = list(1), size = 1, error_call = call("foo"), x_arg = "arg") Condition Error in `foo()`: ! `arg` must be a list, not the number 1. --- Code list_combine(data.frame(x = 1), indices = list(1), size = 1) Condition Error in `list_combine()`: ! `x` must be a list, not a object. --- Code list_combine(array(list(1)), indices = list(1), size = 1) Condition Error in `list_combine()`: ! `x` must be a list, not a list 1D array. # `indices` must be a list Code list_combine(list(1), indices = 1, size = 1) Condition Error in `list_combine()`: ! `indices` must be a list, not the number 1. --- Code list_combine(list(1), indices = 1, size = 1, error_call = call("foo")) Condition Error in `foo()`: ! `indices` must be a list, not the number 1. --- Code list_combine(list(1), indices = data.frame(x = 1), size = 1) Condition Error in `list_combine()`: ! `indices` must be a list, not a object. # `x` and `indices` must be lists of the same size Code list_combine(list(1, 2), indices = list(1), size = 1) Condition Error in `list_combine()`: ! `indices` must have size 2, not size 1. # common type failure after common class fallback reports the original class (#1981) Code list_combine(list(int, dbl), indices = list(1, 2), size = 2) Condition Error in `list_combine()`: ! Can't combine `x[[1]]` and `x[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . --- Code list_combine(list(int, int, dbl), indices = list(1, 2, 3), size = 3) Condition Error in `list_combine()`: ! Can't combine `x[[1]]` and `x[[3]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . --- Code list_combine(list(int, int), indices = list(1, 2), size = 2, default = dbl) Condition Error in `list_combine()`: ! Can't combine and `default` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # NULL is a valid index Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements past the end. i Location 2 doesn't exist. i There is only 1 element. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements past the end. i Location 2 doesn't exist. i There is only 1 element. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements past the end. i Location 2 doesn't exist. i There is only 1 element. # combining recycles elements of x to the size of the index Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `arg[[1]]` (size 2) to size 3. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `arg[[1]]` (size 2) to size 3. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `arg[[1]]` (size 2) to size 3. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `arg[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `arg[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `arg[[1]]` (size 2) to size 4. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `arg[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `arg[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `arg[[1]]` (size 2) to size 4. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. # combining takes the common type Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't combine `x[[1]]` and `x[[2]]` . --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't combine `x[[1]]` and `x[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't combine `x[[1]]` and `x[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # common type failure uses positional errors Code x <- list(1, a = "x", 2) (expect_error(list_combine(x, indices = list(2, 1, 3), size = 3))) Output Error in `list_combine()`: ! Can't combine `x[[1]]` and `x$a` . Code (expect_error(list_combine(x, indices = list(2, 1, 3), size = 3, ptype = double())) ) Output Error in `list_combine()`: ! Can't convert `x$a` to . Code y <- list(1, a = 2.5) (expect_error(list_combine(y, indices = list(2, 1), size = 2, ptype = integer())) ) Output Error in `list_combine()`: ! Can't convert from `x$a` to due to loss of precision. * Locations: 1 # can specify a ptype to override common type Code list_combine(x, indices = indices, size = 2, ptype = integer()) Condition Error in `list_combine()`: ! Can't convert from `x[[1]]` to due to loss of precision. * Locations: 1 --- Code list_combine(x, indices = indices, size = 2, ptype = integer(), error_call = call( "foo"), x_arg = "arg") Condition Error in `foo()`: ! Can't convert from `arg[[1]]` to due to loss of precision. * Locations: 1 # outer names are recycled in the right order Code list_combine(x, indices = list(c(1, 2), 3), size = 3) Condition Error in `list_combine()`: ! Can't merge the outer name `x` with a vector of length > 1. Please supply a `.name_spec` specification. # list_combine() can repair names quietly Code res <- list_combine(vec_chop(x, indices = indices), indices = indices, size = 3, name_repair = "unique_quiet") --- Code res <- list_combine(vec_chop(x, indices = indices), indices = indices, size = 3, name_repair = "universal_quiet") # list_combine() errors on unsupported location values Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements. x Subscript can't contain `0` values. i It has a `0` value at location 1. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements. x Subscript can't contain `0` values. i It has a `0` value at location 1. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements. x Subscript can't contain `0` values. i It has a `0` value at location 1. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements. x Subscript can't contain negative locations. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements. x Subscript can't contain negative locations. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements. x Subscript can't contain negative locations. # list_combine() fallback doesn't support `name_spec` or `ptype` Code foo <- structure(foobar(1), foo = "foo") bar <- structure(foobar(2), bar = "bar") (expect_error(with_c_foobar(list_combine(list(foo, bar), indices = list(1, 2), size = 2, name_spec = "{outer}_{inner}")), "name specification")) Output Error in `list_combine()`: ! Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . Code (expect_error(with_c_foobar(list_combine(list(foo, bar), indices = list(1, 2), size = 2, name_spec = "{outer}_{inner}", error_call = call("foo"))), "name specification")) Output Error in `foo()`: ! Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . Code x <- list(foobar(1)) (expect_error(with_c_foobar(list_combine(x, indices = list(1), size = 1, ptype = "")), class = "vctrs_error_incompatible_type")) Output Error in `list_combine()`: ! Can't convert `x[[1]]` to . # list_combine() does not support non-numeric S3 indices Code (expect_error(list_combine(list(1), indices = list(factor("x")), size = 1), class = "vctrs_error_subscript_type")) Output Error: ! Can't subset elements. x Subscript must be numeric, not the string "x". Code (expect_error(list_combine(list(1), indices = list(foobar(1L)), size = 1), class = "vctrs_error_subscript_type")) Output Error: ! Can't subset elements. x Subscript must be numeric, not a object. # `list_combine()` with logical `indices` checks `indices` size Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements. x Subscript must be numeric, not a logical vector. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements. x Subscript must be numeric, not a logical vector. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error: ! Can't subset elements. x Subscript must be numeric, not a logical vector. # `multiple` shows correctly indexed errors Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[2]]` (size 3) to size 4. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[2]]` (size 3) to size 4. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[2]]` (size 3) to size 4. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[2]]` (size 3) to size 4. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[2]]` (size 3) to size 4. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[2]]` (size 3) to size 4. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[2]]` (size 3) to size 4. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[2]]` (size 3) to size 4. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Can't recycle `x[[1]]` (size 2) to size 4. # `compact_seq()` `indices` work with `unmatched` Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Each location must be matched. x Location 3 is unmatched. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Each location must be matched. x Location 3 is unmatched. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Each location must be matched. x Location 3 is unmatched. --- Code list_combine(x, indices = indices, size = size, default = default, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Each location must be matched. x Locations 2 and 3 are unmatched. --- Code list_combine(x_foobar, indices = indices, size = size, default = default_foobar, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Each location must be matched. x Locations 2 and 3 are unmatched. --- Code list_combine(x_foobar_c, indices = indices, size = size, default = default_foobar_c, unmatched = unmatched, multiple = multiple, slice_x = slice_x, ptype = ptype, name_spec = name_spec, name_repair = name_repair, x_arg = x_arg, indices_arg = indices_arg, default_arg = default_arg) Condition Error in `list_combine()`: ! Each location must be matched. x Locations 2 and 3 are unmatched. vctrs/tests/testthat/_snaps/cast.md0000644000176200001440000000531115157320775017170 0ustar liggesusers# Casting to named argument mentions 'match type ' Code vec_cast(1, "", x_arg = "foo", to_arg = "bar") Condition Error: ! Can't convert `foo` to match type of `bar` . --- Code vec_cast(1, "", x_arg = "foo") Condition Error: ! Can't convert `foo` to . # cast errors create helpful messages (#57, #225) Code vec_cast(1.5, 10L) Condition Error: ! Can't convert from `1.5` to due to loss of precision. * Locations: 1 --- Code vec_cast(factor("foo"), 10) Condition Error: ! Can't convert `factor("foo")` > to . --- Code x <- tibble(a = tibble(b = 1.5)) y <- tibble(a = tibble(b = 10L)) vec_cast(x, y) Condition Error: ! Can't convert from `x$a$b` to `a$b` due to loss of precision. * Locations: 1 --- Code x <- tibble(a = tibble(b = factor("foo"))) y <- tibble(a = tibble(b = 10)) vec_cast(x, y) Condition Error: ! Can't convert `x$a$b` > to match type of `a$b` . --- Code x <- tibble(a = tibble(b = factor("foo"))) y <- tibble(a = tibble(b = 10)) vec_cast_common(x, y) Condition Error: ! Can't combine `..1$a$b` > and `..2$a$b` . # vec_cast() only attempts to fall back if `to` is a data frame (#1568) Code (expect_error(vec_cast(foobar(mtcars), 1), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't convert `foobar(mtcars)` to . # can signal deprecation warnings for lossy casts Code (expect_warning(expect_true(lossy_cast()))) Output Warning: Coercion with lossy casts was deprecated in vctrs 0.2.0. i Please use `allow_lossy_cast()` instead. i We detected a lossy transformation from `x` to `to` . The result will contain lower-resolution values or missing values. To suppress this warning, wrap your code with `allow_lossy_cast()`. # can cast to unspecified `NA` with `vec_cast()` and `vec_cast_common()` (#2099) Code vec_cast(TRUE, to = unspecified(1)) Condition Error: ! Can't convert `TRUE` to . # casting performs expected allocations Code x <- matrix(rep(1L, 100), ncol = 2) with_memory_prof(vec_cast(x, x)) Output [1] 0B Code x <- matrix(rep(1L, 100), ncol = 2) y <- matrix(rep(1, 100), ncol = 2) with_memory_prof(vec_cast(x, y)) Output [1] 848B vctrs/tests/testthat/_snaps/rank.md0000644000176200001440000000274015157321011017154 0ustar liggesusers# `x` must not be `NULL` (#1823, #1967) Code vec_rank(NULL) Condition Error: ! `x` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_rank(NULL, incomplete = "na") Condition Error: ! `x` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_rank(NULL, ties = "sequential", incomplete = "na") Condition Error: ! `x` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # `ties` is validated Code vec_rank(1, ties = "foo") Condition Error in `vec_rank()`: ! `ties` must be one of "min", "max", "sequential", or "dense", not "foo". --- Code vec_rank(1, ties = 1) Condition Error in `vec_rank()`: ! `ties` must be a string or character vector. # `incomplete` is validated Code vec_rank(1, incomplete = NA) Condition Error in `vec_rank()`: ! `incomplete` must be a string or character vector. --- Code vec_rank(1, incomplete = c(TRUE, FALSE)) Condition Error in `vec_rank()`: ! `incomplete` must be a string or character vector. --- Code vec_rank(1, incomplete = "foo") Condition Error in `vec_rank()`: ! `incomplete` must be one of "rank" or "na", not "foo". vctrs/tests/testthat/_snaps/recode.md0000644000176200001440000002273715157321012017473 0ustar liggesusers# `unmatched` errors are correct Code vec_recode_values(c(1, 2), from = 1, to = 0, unmatched = "error") Condition Error in `vec_recode_values()`: ! Each location must be matched. x Location 2 is unmatched. --- Code vec_recode_values(c(1, NA), from = 1, to = 0, unmatched = "error") Condition Error in `vec_recode_values()`: ! Each location must be matched. x Location 2 is unmatched. --- Code vec_recode_values(1:100, from = 1, to = 0, unmatched = "error") Condition Error in `vec_recode_values()`: ! Each location must be matched. x Locations 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ..., 99, and 100 are unmatched. # `x` and `from` common type errors are correct Code vec_recode_values(1, from = "a", to = 1) Condition Error in `vec_recode_values()`: ! Can't convert `from` to match type of `x` . --- Code vec_recode_values(1, from = list("a"), to = 1, from_as_list_of_vectors = TRUE) Condition Error in `vec_recode_values()`: ! Can't convert `from[[1]]` to . # `to` and `default` `ptype` errors are correct when it is inferred Code vec_recode_values(1, from = 1:2, to = list(1, "x"), to_as_list_of_vectors = TRUE) Condition Error in `vec_recode_values()`: ! Can't combine `to[[1]]` and `to[[2]]` . --- Code vec_recode_values(1, from = 1:2, to = list(1, 2), default = "x", to_as_list_of_vectors = TRUE) Condition Error in `vec_recode_values()`: ! Can't combine `default` and . --- Code vec_recode_values(1, from = 1:2, to = 1, default = "x") Condition Error in `vec_recode_values()`: ! Can't combine `to` and `default` . # `to` and `default` `ptype` errors are correct when it is user supplied Code vec_recode_values(1, from = 1, to = 1, ptype = foobar()) Condition Error in `vec_recode_values()`: ! `ptype` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_recode_values(1, from = 1, to = 1, ptype = character()) Condition Error in `vec_recode_values()`: ! Can't convert `to` to . --- Code vec_recode_values(1, from = 1, to = list(a = 1), ptype = character(), to_as_list_of_vectors = TRUE) Condition Error in `vec_recode_values()`: ! Can't convert `to$a` to . --- Code vec_recode_values(1, from = 1, to = "x", default = 1, ptype = character()) Condition Error in `vec_recode_values()`: ! Can't convert `default` to . # `to` size is validated Code vec_recode_values(1:5, from = 1, to = 2:3) Condition Error in `vec_recode_values()`: ! Can't recycle `to` (size 2) to size 1. --- Code vec_recode_values(1:5, from = list(1), to = 2:3, from_as_list_of_vectors = TRUE) Condition Error in `vec_recode_values()`: ! Can't recycle `to` (size 2) to size 1. --- Code vec_recode_values(1:5, from = 1, to = list(2, 3), to_as_list_of_vectors = TRUE) Condition Error in `vec_recode_values()`: ! Can't recycle `to` (size 2) to size 1. --- Code vec_recode_values(1:5, from = list(1), to = list(2, 3), from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE) Condition Error in `vec_recode_values()`: ! Can't recycle `to` (size 2) to size 1. --- Code vec_recode_values(1:5, from = 1, to = list(a = 2:3), to_as_list_of_vectors = TRUE) Condition Error in `vec_recode_values()`: ! Can't recycle `to$a` (size 2) to size 5. # `default` size is validated Code vec_recode_values(1:5, from = 1, to = 2, default = 1:2) Condition Error in `vec_recode_values()`: ! Can't recycle `default` (size 2) to size 5. # `x` must be a vector Code vec_recode_values(foobar(), from = 1, to = 2, x_arg = ".x") Condition Error in `vec_recode_values()`: ! `.x` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # `from` must be a vector or list of vectors Code vec_recode_values(1, from = foobar(), to = 2, from_arg = ".from") Condition Error in `vec_recode_values()`: ! `.from` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_recode_values(1, from = 1, to = 2, from_as_list_of_vectors = TRUE, from_arg = ".from") Condition Error in `vec_recode_values()`: ! `.from` must be a list, not the number 1. --- Code vec_recode_values(1, from = list(a = foobar()), to = 2, from_as_list_of_vectors = TRUE, from_arg = ".from") Condition Error in `vec_recode_values()`: ! `.from$a` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # `to` must be a vector or list of vectors Code vec_recode_values(1, from = 1, to = foobar(), to_arg = ".to") Condition Error in `vec_recode_values()`: ! `.to` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_recode_values(1, from = 1, to = 2, to_as_list_of_vectors = TRUE, to_arg = ".to") Condition Error in `vec_recode_values()`: ! `.to` must be a list, not the number 2. --- Code vec_recode_values(1, from = 1, to = list(a = foobar()), to_as_list_of_vectors = TRUE, to_arg = ".to") Condition Error in `vec_recode_values()`: ! `.to$a` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # `default` must be a vector Code vec_recode_values(1, from = 1, to = 2, default = foobar(), default_arg = ".default") Condition Error in `vec_recode_values()`: ! `.default` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # `from_as_list_of_vectors` and `to_as_list_of_vectors` are validated Code vec_recode_values(1, from = 1, to = 1, from_as_list_of_vectors = "x") Condition Error in `vec_recode_values()`: ! `from_as_list_of_vectors` must be `TRUE` or `FALSE`. --- Code vec_recode_values(1, from = 1, to = 1, to_as_list_of_vectors = "x") Condition Error in `vec_recode_values()`: ! `to_as_list_of_vectors` must be `TRUE` or `FALSE`. --- Code vec_replace_values(1, from = 1, to = 1, from_as_list_of_vectors = "x") Condition Error in `vec_replace_values()`: ! `from_as_list_of_vectors` must be `TRUE` or `FALSE`. --- Code vec_replace_values(1, from = 1, to = 1, to_as_list_of_vectors = "x") Condition Error in `vec_replace_values()`: ! `to_as_list_of_vectors` must be `TRUE` or `FALSE`. # `unmatched` is validated Code vec_recode_values(1, from = 1, to = 1, unmatched = "e") Condition Error in `vec_recode_values()`: ! `unmatched` must be either "default" or "error", not "e". vctrs/tests/testthat/_snaps/shape.md0000644000176200001440000000262715157321012017326 0ustar liggesusers# incompatible shapes throw errors Code (expect_error(vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1)), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't combine and . x Incompatible sizes 0 and 5 along axis 2. Code (expect_error(vec_shape2(shaped_int(1, 5, 0), shaped_int(1, 1, 5)), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't combine and . x Incompatible sizes 0 and 5 along axis 3. # can override error args Code (expect_error(vec_shape2(shaped_int(1, 0, 5), shaped_int(1, 5, 1), x_arg = "foo", y_arg = "bar"), class = "vctrs_error_incompatible_type")) Output Error: ! Can't combine `foo` and `bar` . x Incompatible sizes 0 and 5 along axis 2. # can combine shaped native classes (#1290, #1329) Code vec_c(x, y) Condition Error: ! Can't combine `..1` > and `..2` >. x Incompatible sizes 2 and 3 along axis 2. # factor casts support shape Code vec_cast(x, y) Condition Error: ! Can't convert `x` [,1]> to >. Can't decrease dimensionality from 2 to 1. vctrs/tests/testthat/_snaps/group.md0000644000176200001440000000015715157320777017377 0ustar liggesusers# print method is useful Code x Output [1] 1x3 2x2 1x1 vctrs/tests/testthat/_snaps/slice-chop.md0000644000176200001440000000535315157321013020254 0ustar liggesusers# `indices` are validated Code vec_chop(1, indices = 1) Condition Error: ! `indices` must be a list of index values, or `NULL`. --- Code (expect_error(vec_chop(1, indices = list(1.5)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Can't convert from to due to loss of precision. --- Code (expect_error(vec_chop(1, indices = list(2)), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements past the end. i Location 2 doesn't exist. i There is only 1 element. # `sizes` are validated Code vec_chop("a", sizes = "a") Condition Error: ! Can't convert `sizes` to . --- Code vec_chop("a", sizes = 2) Condition Error: ! `sizes` can't contain sizes larger than 1. --- Code vec_chop("a", sizes = -1) Condition Error: ! `sizes` can't contain negative sizes. --- Code vec_chop("a", sizes = NA_integer_) Condition Error: ! `sizes` can't contain missing values. --- Code vec_chop("a", sizes = c(1, 1)) Condition Error: ! `sizes` must sum to size 1, not size 2. # can't use both `indices` and `sizes` Code vec_chop(1, indices = list(1), sizes = 1) Condition Error: ! Can't supply both `indices` and `sizes`. # `vec_chop(x, indices)` backwards compatible behavior works Code vec_chop(1:2, 1) Condition Error: ! `indices` must be a list of index values, or `NULL`. --- Code vec_chop(1, list(1), sizes = 1) Condition Error: ! Can't supply both `indices` and `sizes`. --- Code vec_chop(1, list(1), 2) Condition Error in `vec_chop()`: ! `...` must be empty. x Problematic arguments: * ..1 = list(1) * ..2 = 2 i Did you forget to name an argument? --- Code vec_chop(1, list(1), indices = list(1)) Condition Error in `vec_chop()`: ! `...` must be empty. x Problematic argument: * ..1 = list(1) i Did you forget to name an argument? # `vec_chop()` can't take `compact_seq()` indices directly Code vec_chop(1:2, indices = list(compact_seq(1, 2))) Condition Error in `vec_chop()`: ! `compact_seq` are not allowed. i In file 'slice-chop.c' at line . i This is an internal error that was detected in the vctrs package. Please report it at with a reprex () and the full backtrace. vctrs/tests/testthat/_snaps/type-vctr.md0000644000176200001440000000075415157321021020162 0ustar liggesusers# na.fail() works Code na.fail(x) Condition Error in `na.fail()`: ! missing values in object # default print and str methods are useful Code h Output [1] xxx xxx xxx xxx --- Code h[0] Output --- Code str(h) Output hidden [1:4] xxx, xxx, xxx, xxx # default print method shows names Code h Output A B C xxx xxx xxx vctrs/tests/testthat/_snaps/assert.md0000644000176200001440000004635215157320775017551 0ustar liggesusers# obj_check_vector() errors on scalars Code obj_check_vector(quote(foo)) Condition Error: ! `quote(foo)` must be a vector, not a symbol. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code obj_check_vector(foobar()) Condition Error: ! `foobar()` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # obj_check_vector() error respects `arg` and `call` Code my_check_vector(foobar()) Condition Error in `my_check_vector()`: ! `foo` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # obj_check_vector() error contains FAQ links and correct bullets Code obj_check_vector(x) Condition Error: ! `x` must be a vector, not an expression vector. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code obj_check_vector(x) Condition Error: ! `x` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code obj_check_vector(x) Condition Error: ! `x` must be a vector, not a object. x Detected incompatible data frame structure. A data frame is normally treated as a vector, but an incompatible class ordering was detected. To be compatible, the subclass must come before , not after. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # assertion failures are explained Code vec_assert(lgl(), chr()) Condition Error: ! `lgl()` must be a vector with type . Instead, it has type . --- Code vec_assert(lgl(), factor()) Condition Error: ! `lgl()` must be a vector with type >. Instead, it has type . --- Code vec_assert(lgl(), factor(levels = "foo")) Condition Error: ! `lgl()` must be a vector with type >. Instead, it has type . --- Code vec_assert(factor(levels = "bar"), factor(levels = "foo")) Condition Error: ! `factor(levels = "bar")` must be a vector with type >. Instead, it has type >. --- Code vec_assert(factor(), chr()) Condition Error: ! `factor()` must be a vector with type . Instead, it has type >. --- Code vec_assert(lgl(), data.frame()) Condition Error: ! `lgl()` must be a vector with type >. Instead, it has type . --- Code vec_assert(lgl(), data.frame(x = 1)) Condition Error: ! `lgl()` must be a vector with type >. Instead, it has type . --- Code vec_assert(lgl(), data.frame(x = 1, y = 2)) Condition Error: ! `lgl()` must be a vector with type: > Instead, it has type . --- Code vec_assert(data.frame(), chr()) Condition Error: ! `data.frame()` must be a vector with type . Instead, it has type >. --- Code vec_assert(data.frame(x = 1), chr()) Condition Error: ! `data.frame(x = 1)` must be a vector with type . Instead, it has type >. --- Code vec_assert(data.frame(x = 1), data.frame(x = "foo")) Condition Error: ! `data.frame(x = 1)` must be a vector with type >. Instead, it has type >. --- Code vec_assert(data.frame(x = 1), data.frame(x = "foo", y = 2)) Condition Error: ! `data.frame(x = 1)` must be a vector with type: > Instead, it has type >. --- Code vec_assert(data.frame(x = 1, y = 2), chr()) Condition Error: ! `data.frame(x = 1, y = 2)` must be a vector with type . Instead, it has type: > --- Code vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo")) Condition Error: ! `data.frame(x = 1, y = 2)` must be a vector with type >. Instead, it has type: > --- Code vec_assert(data.frame(x = 1, y = 2), data.frame(x = "foo", y = 2)) Condition Error: ! `data.frame(x = 1, y = 2)` must be a vector with type: > Instead, it has type: > # vec_assert() validates `size` (#1470) Code (expect_error(vec_assert(1, size = c(2, 3)))) Output Error in `vec_assert()`: ! `size` must be length 1, not length 2. Code (expect_error(vec_assert(1, size = 1.5))) Output Error in `vec_assert()`: ! Can't convert from `size` to due to loss of precision. * Locations: 1 Code (expect_error(vec_assert(1, size = "x"))) Output Error in `vec_assert()`: ! Can't convert `size` to . # vec_check_size() errors on the wrong size Code vec_check_size(1:5, size = 1L) Condition Error: ! `1:5` must have size 1, not size 5. --- Code vec_check_size(1:5, size = 10L) Condition Error: ! `1:5` must have size 10, not size 5. # vec_check_size() errors on scalars Code vec_check_size(quote(foo), size = 1L) Condition Error: ! `quote(foo)` must be a vector, not a symbol. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_check_size(foobar(), size = 1L) Condition Error: ! `foobar()` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # vec_check_size() error respects `arg` and `call` Code my_check_size(1L, size = 5L) Condition Error in `my_check_size()`: ! `foo` must have size 5, not size 1. --- Code my_check_size(foobar(), size = 5L) Condition Error in `my_check_size()`: ! `foo` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # vec_check_size() validates `size` Code vec_check_size(1, size = "x") Condition Error in `vec_check_size()`: ! `size` must be a scalar integer or double. --- Code vec_check_size(1, size = c(1L, 2L)) Condition Error in `vec_check_size()`: ! `size` must be a scalar integer or double. --- Code vec_check_size(1, size = 1.5) Condition Error in `vec_check_size()`: ! `size` must be a whole number, not a decimal number. # vec_check_recyclable() errors on the wrong size Code vec_check_recyclable(1:5, size = 1L) Condition Error: ! Can't recycle `1:5` (size 5) to size 1. --- Code vec_check_recyclable(1:5, size = 10L) Condition Error: ! Can't recycle `1:5` (size 5) to size 10. # vec_check_recyclable() errors on scalars Code vec_check_recyclable(quote(foo), size = 1L) Condition Error: ! `quote(foo)` must be a vector, not a symbol. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code vec_check_recyclable(foobar(), size = 1L) Condition Error: ! `foobar()` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # vec_check_recyclable() error respects `arg` and `call` Code my_check_recyclable(1:2, size = 5L) Condition Error in `my_check_recyclable()`: ! Can't recycle `foo` (size 2) to size 5. --- Code my_check_recyclable(foobar(), size = 5L) Condition Error in `my_check_recyclable()`: ! `foo` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # vec_check_recyclable() validates `size` Code vec_check_recyclable(1, size = "x") Condition Error in `vec_check_recyclable()`: ! `size` must be a scalar integer or double. --- Code vec_check_recyclable(1, size = c(1L, 2L)) Condition Error in `vec_check_recyclable()`: ! `size` must be a scalar integer or double. --- Code vec_check_recyclable(1, size = 1.5) Condition Error in `vec_check_recyclable()`: ! `size` must be a whole number, not a decimal number. # list_all_vectors() works Code (expect_error(list_all_vectors(env()))) Output Error in `list_all_vectors()`: ! `x` must be a list, not an environment. # obj_check_list() works Code my_function <- (function(my_arg) obj_check_list(my_arg)) (expect_error(my_function(env()))) Output Error in `my_function()`: ! `my_arg` must be a list, not an environment. # obj_check_list() uses a special error when `arg` is the empty string (#1604) Code obj_check_list(1, arg = "") Condition Error: ! Input must be a list, not the number 1. # obj_check_list() and list_check_all_vectors() work Code my_function <- (function(my_arg) list_check_all_vectors(my_arg)) (expect_error(my_function(env()))) Output Error in `list_check_all_vectors()`: ! `x` must be a list, not an environment. Code (expect_error(my_function(list(1, env())))) Output Error in `my_function()`: ! `my_arg[[2]]` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. Code (expect_error(my_function(list(1, name = env())))) Output Error in `my_function()`: ! `my_arg$name` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. Code (expect_error(my_function(list(1, foo = env())))) Output Error in `my_function()`: ! `my_arg$foo` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # list_check_all_size() works Code my_function <- (function(my_arg, size) list_check_all_size(my_arg, size)) (expect_error(list_check_all_size(list(1:2, 1:3), 2))) Output Error: ! `list(1:2, 1:3)[[2]]` must have size 2, not size 3. Code (expect_error(my_function(list(1:2, 1:3), 2))) Output Error in `my_function()`: ! `my_arg[[2]]` must have size 2, not size 3. Code (expect_error(my_function(list(NULL, 1:2), 2))) Output Error in `my_function()`: ! `my_arg[[1]]` must have size 2, not size 0. # list_check_all_recyclable() works Code my_function <- (function(my_arg, size) { list_check_all_recyclable(my_arg, size) }) (expect_error(list_check_all_recyclable(list(1:2, 1:3), 2))) Output Error: ! Can't recycle `list(1:2, 1:3)[[2]]` (size 3) to size 2. Code (expect_error(my_function(list(1:2, 1:3), 2))) Output Error in `my_function()`: ! Can't recycle `my_arg[[2]]` (size 3) to size 2. Code (expect_error(my_function(list(NULL, 1:2), 2))) Output Error in `my_function()`: ! Can't recycle `my_arg[[1]]` (size 0) to size 2. # list_all_size() and list_check_all_size() error on scalars Code (expect_error(list_all_size(x, 2))) Output Error in `list_all_size()`: ! `x[[1]]` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. Code my_function <- (function(my_arg, size) list_check_all_size(my_arg, size)) (expect_error(my_function(x, 2))) Output Error in `my_function()`: ! `my_arg[[1]]` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # list_all_recyclable() and list_check_all_recyclable() error on scalars Code (expect_error(list_all_recyclable(x, 2))) Output Error in `list_all_recyclable()`: ! `x[[1]]` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. Code my_function <- (function(my_arg, size) { list_check_all_recyclable(my_arg, size) }) (expect_error(my_function(x, 2))) Output Error in `my_function()`: ! `my_arg[[1]]` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # list_all_size() and list_check_all_size() throw error using internal call on non-list input Code (expect_error(list_all_size(1, 2))) Output Error in `list_all_size()`: ! `x` must be a list, not the number 1. Code (expect_error(list_check_all_size(1, 2, arg = "arg", call = call("foo")))) Output Error in `list_check_all_size()`: ! `x` must be a list, not the number 1. # list_all_size() and list_check_all_size() validate `size` Code (expect_error(list_all_size(list(), size = "x"))) Output Error in `list_all_size()`: ! `size` must be a scalar integer or double. Code (expect_error(list_check_all_size(list(), size = "x"))) Output Error in `list_check_all_size()`: ! `size` must be a scalar integer or double. # list_all_recyclable() and list_check_all_recyclable() validate `size` Code (expect_error(list_all_recyclable(list(), size = "x"))) Output Error in `list_all_recyclable()`: ! `size` must be a scalar integer or double. Code (expect_error(list_check_all_recyclable(list(), size = "x"))) Output Error in `list_check_all_recyclable()`: ! `size` must be a scalar integer or double. # list_check_all_size() works with `allow_null` Code list_check_all_size(x, size = 1) Condition Error: ! `x[[2]]` must have size 1, not size 0. --- Code list_check_all_size(x, size = 1) Condition Error: ! `x[[2]]` must have size 1, not size 0. --- Code list_check_all_size(x, size = 1, allow_null = TRUE) Condition Error: ! `x[[3]]` must have size 1, not size 2. # list_check_all_vectors() works with `allow_null` Code list_check_all_vectors(x) Condition Error: ! `x[[2]]` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code list_check_all_vectors(x) Condition Error: ! `x[[2]]` must be a vector, not `NULL`. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. --- Code list_check_all_vectors(x, allow_null = TRUE) Condition Error: ! `x[[3]]` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # list_check_all_recyclable() works with `allow_null` Code list_check_all_recyclable(x, size = 2) Condition Error: ! Can't recycle `x[[2]]` (size 0) to size 2. --- Code list_check_all_recyclable(x, size = 2) Condition Error: ! Can't recycle `x[[2]]` (size 0) to size 2. --- Code list_check_all_recyclable(x, size = 2, allow_null = TRUE) Condition Error: ! Can't recycle `x[[3]]` (size 3) to size 2. # informative messages when 1d array doesn't match vector Code (expect_error(vec_assert(x, int()))) Output Error: ! `x` must be a vector with type . Instead, it has type . vctrs/tests/testthat/_snaps/type-data-frame.md0000644000176200001440000001364315157321016021212 0ustar liggesusers# data frames print nicely Code vec_ptype_show(mtcars) Output Prototype: data.frame< mpg : double cyl : double disp: double hp : double drat: double wt : double qsec: double vs : double am : double gear: double carb: double > --- Code vec_ptype_show(iris) Output Prototype: data.frame< Sepal.Length: double Sepal.Width : double Petal.Length: double Petal.Width : double Species : factor > # embedded data frames print nicely Code vec_ptype_show(df) Output Prototype: data.frame< x: integer a: data.frame< a: integer b: character > b: list_of c: list_of< data.frame< x: integer y: character > > > # `x` must be a list Code (expect_error(new_data_frame(1), "`x` must be a list")) Output Error: ! `x` must be a list # if supplied, `n` must be an integer of size 1 Code (expect_error(new_data_frame(n = c(1L, 2L)), "must be an integer of size 1")) Output Error in `new_data_frame()`: ! `n` must be an integer of size 1. Code (expect_error(new_data_frame(n = "x"), "must be an integer of size 1")) Output Error in `new_data_frame()`: ! `n` must be an integer of size 1. # if supplied, `n` can't be negative or missing (#1477) Code (expect_error(new_data_frame(n = -1L))) Output Error in `new_data_frame()`: ! `n` can't be negative. Code (expect_error(new_data_frame(n = NA_integer_))) Output Error in `new_data_frame()`: ! `n` can't be missing. # `class` must be a character vector Code (expect_error(new_data_frame(class = 1), "must be NULL or a character vector")) Output Error: ! `class` must be NULL or a character vector # data_frame() and df_list() report error context Code (expect_error(data_frame(a = 1, a = 1))) Output Error in `data_frame()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `.name_repair` to specify repair strategy. Code (expect_error(data_frame(a = 1, a = 1, .error_call = call("foo")))) Output Error in `foo()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `.name_repair` to specify repair strategy. Code (expect_error(data_frame(a = 1:2, b = int()))) Output Error in `data_frame()`: ! Can't recycle `a` (size 2) to match `b` (size 0). Code (expect_error(data_frame(a = 1:2, b = int(), .error_call = call("foo")))) Output Error in `foo()`: ! Can't recycle `a` (size 2) to match `b` (size 0). Code (expect_error(df_list(a = 1, a = 1))) Output Error in `df_list()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `.name_repair` to specify repair strategy. Code (expect_error(df_list(a = 1, a = 1, .error_call = call("foo")))) Output Error in `foo()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `.name_repair` to specify repair strategy. Code (expect_error(df_list(a = 1:2, b = int()))) Output Error in `df_list()`: ! Can't recycle `a` (size 2) to match `b` (size 0). Code (expect_error(df_list(a = 1:2, b = int(), .error_call = call("foo")))) Output Error in `foo()`: ! Can't recycle `a` (size 2) to match `b` (size 0). # input is tidy recycled Code expect_error(data_frame(1:2, 1:3), class = "vctrs_error_incompatible_size") # `.unpack` is validated Code df_list(.unpack = 1) Condition Error in `df_list()`: ! `.unpack` must be `TRUE` or `FALSE`. --- Code df_list(.unpack = c(TRUE, FALSE)) Condition Error in `df_list()`: ! `.unpack` must be `TRUE` or `FALSE`. # `.name_repair` can be quiet Code dfl_unique <- df_list(1, 2, .name_repair = "unique_quiet") dfl_universal <- df_list(`if` = 1, `in` = 2, .name_repair = "universal_quiet") df_unique <- data_frame(1, 2, .name_repair = "unique_quiet") df_universal <- data_frame(`if` = 1, `in` = 2, .name_repair = "universal_quiet") # data frame fallback handles column types (#999) Code local_error_call(call("my_function")) (expect_error(vec_ptype2(df1, df3), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't combine `df1$x` and `df3$x` . Code (expect_error(vec_ptype2(df3, df1), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't combine `df3$x` and `df1$x` . Code (expect_error(vec_cast(df2, df1), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't convert from `df2` > to > due to loss of precision. vctrs/tests/testthat/_snaps/list-of-transpose.md0000644000176200001440000000240115157321000021602 0ustar liggesusers# `x` is validated Code list_of_transpose(1) Condition Error in `list_of_transpose()`: ! `1` must be a ``, not the number 1. --- Code list_of_transpose(1, x_arg = "x", error_call = quote(foo())) Condition Error in `foo()`: ! `x` must be a ``, not the number 1. # `x` must be a fully specified list of Code x <- list_of(.ptype = integer(), .size = zap()) list_of_transpose(x) Condition Error in `list_of_transpose()`: ! `x` must be a fully specified ``. i `size` is not specified. --- Code x <- list_of(.ptype = zap(), .size = 1) list_of_transpose(x) Condition Error in `list_of_transpose()`: ! `x` must be a fully specified ``. i `ptype` is not specified. # `...` must be empty Code list_of_transpose(list_of2(1), 2) Condition Error in `list_of_transpose()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? # doesn't allow `NULL` elements Code list_of_transpose(list_of2(1:4, NULL, 5:8)) Condition Error in `list_of_transpose()`: ! `list_of2(1:4, NULL, 5:8)` can't contain `NULL` values. vctrs/tests/testthat/_snaps/type-asis.md0000644000176200001440000000110015157321014020127 0ustar liggesusers# AsIs objects throw ptype2 errors with their underlying types Code (expect_error(vec_ptype2(I(1), I("x")), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't combine `I(1)` and `I("x")` . # AsIs objects throw cast errors with their underlying types Code (expect_error(vec_cast(I(1), I(factor("x"))), class = "vctrs_error_incompatible_type") ) Output Error: ! Can't convert `I(1)` to >. vctrs/tests/testthat/_snaps/names.md0000644000176200001440000002034315157321007017330 0ustar liggesusers# vec_as_names() validates `repair` Code (expect_error(my_vec_as_names("x", my_repair = "foo"), "can't be \"foo\"")) Output Error in `my_vec_as_names()`: ! `my_repair` can't be "foo". See `?vctrs::vec_as_names`. Code (expect_error(my_vec_as_names(1, my_repair = 1), "string or a function")) Output Error in `my_vec_as_names()`: ! `my_repair` must be a string or a function. See `?vctrs::vec_as_names`. # vec_as_names() checks unique names Code (expect_error(my_vec_as_names(chr(NA), my_repair = "check_unique"))) Output Error: ! Names repair functions can't return `NA` values. Code (expect_error(my_vec_as_names(chr(""), my_repair = "check_unique"))) Output Error in `my_vec_as_names()`: ! Names can't be empty. x Empty name found at location 1. Code (expect_error(my_vec_as_names(chr("a", "a"), my_repair = "check_unique"))) Output Error in `my_vec_as_names()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `my_repair` to specify repair strategy. Code (expect_error(my_vec_as_names(chr("..1"), my_repair = "check_unique"))) Output Error in `my_vec_as_names()`: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..1" at location 1. Code (expect_error(my_vec_as_names(chr("..."), my_repair = "check_unique"))) Output Error in `my_vec_as_names()`: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..." at location 1. # vec_as_names() accepts and checks repair function Code my_vec_as_names(c("", ""), my_repair = function(nms) "foo") Condition Error in `my_vec_as_names()`: ! Repaired names have length 1 instead of length 2. # vec_as_names() is noisy by default Code vec_as_names(c("x", "x"), repair = "unique") Message New names: * `x` -> `x...1` * `x` -> `x...2` Output [1] "x...1" "x...2" Code vec_as_names(c("x", "x"), repair = "unique", quiet = TRUE) Output [1] "x...1" "x...2" Code (expect_error(my_vec_as_names(c("x", "x"), my_repair = "check_unique"))) Output Error in `my_vec_as_names()`: ! Names must be unique. x These names are duplicated: * "x" at locations 1 and 2. i Use argument `my_repair` to specify repair strategy. Code vec_as_names(c("1", "1"), repair = "unique_quiet") Output [1] "1...1" "1...2" Code vec_as_names(c("1", "1"), repair = "universal_quiet") Output [1] "...1...1" "...1...2" Code vec_as_names(c("1", "1"), repair = "unique_quiet", quiet = TRUE) Output [1] "1...1" "1...2" Code vec_as_names(c("1", "1"), repair = "universal_quiet", quiet = TRUE) Output [1] "...1...1" "...1...2" Code vec_as_names(c("1", "1"), repair = "unique_quiet", quiet = FALSE) Output [1] "1...1" "1...2" Code vec_as_names(c("1", "1"), repair = "universal_quiet", quiet = FALSE) Output [1] "...1...1" "...1...2" # validate_minimal_names() checks names Code (expect_error(validate_minimal_names(1), "must return a character vector")) Output Error: ! Names repair functions must return a character vector. Code (expect_error(validate_minimal_names(NULL), "can't return `NULL`")) Output Error: ! Names repair functions can't return `NULL`. Code (expect_error(validate_minimal_names(chr(NA)), "can't return `NA` values")) Output Error: ! Names repair functions can't return `NA` values. # validate_unique() checks unique names Code (expect_error(validate_unique(chr(NA)), "`NA`")) Output Error: ! Names repair functions can't return `NA` values. Code (expect_error(validate_unique(chr("")), class = "vctrs_error_names_cannot_be_empty") ) Output Error: ! Names can't be empty. x Empty name found at location 1. Code (expect_error(validate_unique(chr("a", "a")), class = "vctrs_error_names_must_be_unique") ) Output Error: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. Code (expect_error(validate_unique(chr("..1")), class = "vctrs_error_names_cannot_be_dot_dot") ) Output Error: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..1" at location 1. Code (expect_error(validate_unique(chr("...")), class = "vctrs_error_names_cannot_be_dot_dot") ) Output Error: ! Names can't be of the form `...` or `..j`. x These names are invalid: * "..." at location 1. # vec_set_names() errors with bad `names` Code (expect_error(vec_set_names(1, 1), "character vector, not a double")) Output Error in `vec_set_names()`: ! `names` must be a character vector, not a double. Code (expect_error(vec_set_names(1, c("x", "y")), "The size of `names`, 2")) Output Error in `vec_set_names()`: ! The size of `names`, 2, must be the same as the size of `x`, 1. # unique_names() and as_unique_names() are verbose or silent Code unique_names(1:2) Message New names: * `` -> `...1` * `` -> `...2` Output [1] "...1" "...2" --- Code as_unique_names(c("", "")) Message New names: * `` -> `...1` * `` -> `...2` Output [1] "...1" "...2" # message Code as_universal_names(c("a b", "b c")) Message New names: * `a b` -> `a.b` * `b c` -> `b.c` Output [1] "a.b" "b.c" # messages by default Code vec_repair_names(set_names(1, "a:b"), "universal") Message New names: * `a:b` -> `a.b` Output a.b 1 --- Code vec_repair_names(set_names(1, "a:b"), ~ make.names(.)) Message New names: * `a:b` -> `a.b` Output a.b 1 # NULL name specs works with scalars Code (expect_error(vec_c(foo = c(a = 1, b = 2)), "vector of length > 1")) Output Error in `vec_c()`: ! Can't merge the outer name `foo` with a vector of length > 1. Please supply a `.name_spec` specification. Code (expect_error(vec_c(foo = 1:2), "vector of length > 1")) Output Error in `vec_c()`: ! Can't merge the outer name `foo` with a vector of length > 1. Please supply a `.name_spec` specification. Code (expect_error(vec_c(x = c(xx = 1)), "named vector")) Output Error in `vec_c()`: ! Can't merge the outer name `x` with a named vector. Please supply a `.name_spec` specification. # apply_name_spec() checks recyclability of output Code apply_name_spec(function(...) c("a", "b", "c"), "outer", "inner", n = 2L) Condition Error: ! Can't recycle input of size 3 to size 2. # vec_as_names() uses internal error if `repair_arg` is not supplied Code (expect_error(vec_as_names("", repair = "foobar", call = quote(tilt())))) Output Error in `vec_as_names()`: ! `repair` can't be "foobar". See `?vctrs::vec_as_names`. Code (expect_error(vec_as_names("", repair = env(), call = quote(tilt())))) Output Error in `vec_as_names()`: ! `repair` must be a string or a function. See `?vctrs::vec_as_names`. vctrs/tests/testthat/_snaps/slice-interleave.md0000644000176200001440000001127615157321013021462 0ustar liggesusers# allows for name repair Code vec_interleave(x, x, .name_repair = "unique") Message New names: * `x` -> `x...1` * `x` -> `x...2` Output x...1 x...2 1 1 # can repair names quietly Code res_unique <- vec_interleave(c(x = 1), c(x = 2), .name_repair = "unique_quiet") res_universal <- vec_interleave(c(`if` = 1), c(`in` = 2), .name_repair = "universal_quiet") # reports type errors Code vec_interleave(1, "x") Condition Error in `vec_interleave()`: ! Can't combine `..1` and `..2` . --- Code vec_interleave(1, "x", .error_call = quote(foo())) Condition Error in `foo()`: ! Can't combine `..1` and `..2` . --- Code vec_interleave(1, "x", .ptype = double()) Condition Error in `vec_interleave()`: ! Can't convert `..2` to . --- Code vec_interleave(1, "x", .ptype = double(), .error_call = quote(foo())) Condition Error in `foo()`: ! Can't convert `..2` to . --- Code vec_interleave(1, NULL, "x") Condition Error in `vec_interleave()`: ! Can't combine `..1` and `..3` . --- Code vec_interleave(1, NULL, "x", .ptype = double()) Condition Error in `vec_interleave()`: ! Can't convert `..3` to . # reports recycling errors Code vec_interleave(1:2, 1:3) Condition Error in `vec_interleave()`: ! Can't recycle `..1` (size 2) to match `..2` (size 3). --- Code vec_interleave(1:2, 1:3, .error_call = quote(foo())) Condition Error in `foo()`: ! Can't recycle `..1` (size 2) to match `..2` (size 3). --- Code vec_interleave(1:2, 3:4, .size = 3) Condition Error in `vec_interleave()`: ! Can't recycle `..1` (size 2) to size 3. --- Code vec_interleave(1:2, 3:4, .size = 3, .error_call = quote(foo())) Condition Error in `foo()`: ! Can't recycle `..1` (size 2) to size 3. --- Code vec_interleave(1:2, NULL, 1:3) Condition Error in `vec_interleave()`: ! Can't recycle `..1` (size 2) to match `..3` (size 3). --- Code vec_interleave(1:2, NULL, 1:3, .size = 2) Condition Error in `vec_interleave()`: ! Can't recycle `..3` (size 3) to size 2. # reports scalar errors Code vec_interleave(lm(1 ~ 1)) Condition Error in `vec_interleave()`: ! `..1` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_interleave(lm(1 ~ 1), .error_call = quote(foo())) Condition Error in `foo()`: ! `..1` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_interleave(1, NULL, lm(1 ~ 1)) Condition Error in `vec_interleave()`: ! `..3` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_interleave(1, NULL, lm(1 ~ 1), .error_call = quote(foo())) Condition Error in `foo()`: ! `..3` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # `list_interleave()` checks for a list Code list_interleave(1) Condition Error in `list_interleave()`: ! `1` must be a list, not the number 1. vctrs/tests/testthat/_snaps/empty.md0000644000176200001440000000025315157320776017375 0ustar liggesusers# validates `x` Code x <- array(list(1), dim = c(1, 1)) list_drop_empty(x) Condition Error in `list_drop_empty()`: ! `x` must be a list. vctrs/tests/testthat/_snaps/type-table.md0000644000176200001440000000043215157321020020263 0ustar liggesusers# cannot decrease dimensionality Code (expect_error(vec_cast(x, y), class = "vctrs_error_incompatible_type")) Output Error: ! Can't convert `x` to . Can't decrease dimensionality from 3 to 2. vctrs/tests/testthat/_snaps/list-unchop.md0000644000176200001440000003374615157321004020502 0ustar liggesusers# `x` must be a list Code list_unchop(1, indices = list(1)) Condition Error in `list_unchop()`: ! `x` must be a list, not the number 1. --- Code list_unchop(1, indices = list(1), error_call = call("foo"), error_arg = "arg") Condition Error in `foo()`: ! `arg` must be a list, not the number 1. --- Code list_unchop(data.frame(x = 1), indices = list(1)) Condition Error in `list_unchop()`: ! `x` must be a list, not a object. # `indices` must be a list Code list_unchop(list(1), indices = 1) Condition Error in `list_unchop()`: ! `indices` must be a list, not the number 1. --- Code list_unchop(list(1), indices = 1, error_call = call("foo")) Condition Error in `foo()`: ! `indices` must be a list, not the number 1. --- Code list_unchop(list(1), indices = data.frame(x = 1)) Condition Error in `list_unchop()`: ! `indices` must be a list, not a object. # `x` and `indices` must be lists of the same size Code list_unchop(list(1, 2), indices = list(1)) Condition Error in `list_unchop()`: ! `indices` must have size 2, not size 1. # `NULL` is a valid index Code list_unchop(list(1, 2), indices = list(NULL, 2)) Condition Error: ! Can't subset elements past the end. i Location 2 doesn't exist. i There is only 1 element. --- Code list_unchop(list(foobar(1), foobar(2)), indices = list(NULL, 2)) Condition Error: ! Can't subset elements past the end. i Location 2 doesn't exist. i There is only 1 element. --- Code list_unchop(list(foobar(1), foobar(2)), indices = list(NULL, 2)) Condition Error: ! Can't subset elements past the end. i Location 2 doesn't exist. i There is only 1 element. # unchopping recycles elements of x to the size of the index Code list_unchop(list(1:2), indices = indices) Condition Error in `list_unchop()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_unchop(list(1:2), indices = indices, error_call = call("foo"), error_arg = "arg") Condition Error in `foo()`: ! Can't recycle `arg[[1]]` (size 2) to size 3. --- Code list_unchop(list(foobar(1:2)), indices = indices) Condition Error in `list_unchop()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_unchop(list(foobar(1:2)), indices = indices, error_call = call("foo"), error_arg = "arg") Condition Error in `foo()`: ! Can't recycle `arg[[1]]` (size 2) to size 3. --- Code list_unchop(list(foobar(1:2)), indices = indices) Condition Error in `list_unchop()`: ! Can't recycle `x[[1]]` (size 2) to size 3. --- Code list_unchop(list(foobar(1:2)), indices = indices, error_call = call("foo"), error_arg = "arg") Condition Error in `foo()`: ! Can't recycle `arg[[1]]` (size 2) to size 3. # unchopping takes the common type Code (expect_error(list_unchop(x, indices = indices), class = "vctrs_error_incompatible_type") ) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x[[2]]` . Code (expect_error(list_unchop(x, indices = indices, error_call = call("foo"), error_arg = "arg"), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . # common type failure uses positional errors Code x <- list(1, a = "x", 2) (expect_error(list_unchop(x))) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x$a` . Code (expect_error(list_unchop(x, indices = list(2, 1, 3)))) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x$a` . Code (expect_error(list_unchop(x, ptype = double()))) Output Error in `list_unchop()`: ! Can't convert `x$a` to . Code (expect_error(list_unchop(x, indices = list(2, 1, 3), ptype = double()))) Output Error in `list_unchop()`: ! Can't convert `x$a` to . Code y <- list(1, a = 2.5) (expect_error(list_unchop(y, ptype = integer()))) Output Error in `list_unchop()`: ! Can't convert from `x$a` to due to loss of precision. * Locations: 1 Code (expect_error(list_unchop(y, indices = list(2, 1), ptype = integer()))) Output Error in `list_unchop()`: ! Can't convert from `x$a` to due to loss of precision. * Locations: 1 # can specify a ptype to override common type Code (expect_error(list_unchop(x, indices = indices, ptype = integer()))) Output Error in `list_unchop()`: ! Can't convert from `x[[1]]` to due to loss of precision. * Locations: 1 Code (expect_error(list_unchop(x, indices = indices, ptype = integer(), error_call = call( "foo"), error_arg = "arg"))) Output Error in `foo()`: ! Can't convert from `arg[[1]]` to due to loss of precision. * Locations: 1 # list_unchop() can repair names quietly Code res <- list_unchop(vec_chop(x, indices = indices), indices = indices, name_repair = "unique_quiet") --- Code res <- list_unchop(vec_chop(x, indices = indices), indices = indices, name_repair = "universal_quiet") # list_unchop() errors on unsupported location values Code list_unchop(list(1, 2), indices = list(c(1, 2), 0)) Condition Error: ! Can't subset elements. x Subscript can't contain `0` values. i It has a `0` value at location 1. --- Code list_unchop(list(1), indices = list(-1)) Condition Error: ! Can't subset elements. x Subscript can't contain negative locations. --- Code list_unchop(list(foobar(1), foobar(2)), indices = list(c(1, 2), 0)) Condition Error: ! Can't subset elements. x Subscript can't contain `0` values. i It has a `0` value at location 1. --- Code list_unchop(list(foobar(1)), indices = list(-1)) Condition Error: ! Can't subset elements. x Subscript can't contain negative locations. --- Code list_unchop(list(foobar(1), foobar(2)), indices = list(c(1, 2), 0)) Condition Error: ! Can't subset elements. x Subscript can't contain `0` values. i It has a `0` value at location 1. --- Code list_unchop(list(foobar(1)), indices = list(-1)) Condition Error: ! Can't subset elements. x Subscript can't contain negative locations. # list_unchop() fails with complex foreign S3 classes Code x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error(list_unchop(list(x, y)), class = "vctrs_error_incompatible_type")) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . Code (expect_error(list_unchop(list(x, y), error_call = call("foo"), error_arg = "arg"), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # list_unchop() fails with complex foreign S4 classes Code joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") (expect_error(list_unchop(list(joe, jane)), class = "vctrs_error_incompatible_type") ) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . Code (expect_error(list_unchop(list(joe, jane), error_call = call("foo"), error_arg = "arg"), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # list_unchop() falls back to c() if S3 method is available Code (expect_error(list_unchop(list(foobar(1), foobar(2)), indices = list(c(1, 3), integer())), class = "vctrs_error_subscript_oob")) Output Error: ! Can't subset elements past the end. i Location 3 doesn't exist. i There are only 2 elements. --- Code x <- list(foobar(1:2)) indices <- list(1:3) (expect_error(list_unchop(x, indices = indices))) Output Error in `list_unchop()`: ! Can't recycle `x[[1]]` (size 2) to size 3. Code (expect_error(list_unchop(x, indices = indices, error_arg = "arg", error_call = call( "foo")))) Output Error in `foo()`: ! Can't recycle `arg[[1]]` (size 2) to size 3. # list_unchop() falls back for S4 classes with a registered c() method Code (expect_error(list_unchop(list(joe, 1, jane), indices = list(c(1, 2), 3, 4)), class = "vctrs_error_incompatible_type")) Output Error in `list_unchop()`: ! Can't combine `x[[1]]` and `x[[2]]` . # list_unchop() fallback doesn't support (most) `name_spec` or `ptype` Code list_unchop(list(foo, bar), indices = list(1, 2), name_spec = "{outer}_{inner}") Condition Error in `list_unchop()`: ! Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . --- Code list_unchop(list(foo, bar), indices = list(1, 2), name_spec = "{outer}_{inner}", error_call = call("foo")) Condition Error in `foo()`: ! Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . --- Code list_unchop(x, indices = list(1), ptype = "") Condition Error in `list_unchop()`: ! Can't convert `x[[1]]` to . # list_unchop() does not support non-numeric S3 indices Code (expect_error(list_unchop(list(1), indices = list(factor("x"))), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript must be numeric, not the string "x". Code (expect_error(list_unchop(list(1), indices = list(foobar(1L))), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript must be numeric, not a object. # can ignore names in `list_unchop()` by providing a `zap()` name-spec (#232) Code (expect_error(list_unchop(list(a = c(b = 1:2))))) Output Error in `list_unchop()`: ! Can't merge the outer name `a` with a vector of length > 1. Please supply a `.name_spec` specification. Code (expect_error(list_unchop(list(a = c(b = 1:2)), error_call = call("foo")))) Output Error in `list_unchop()`: ! Can't merge the outer name `a` with a vector of length > 1. Please supply a `.name_spec` specification. --- Code x <- list(a = c(b = letters), b = 3L) (expect_error(list_unchop(x, name_spec = zap()), class = "vctrs_error_incompatible_type") ) Output Error in `list_unchop()`: ! Can't combine `x$a` and `x$b` . Code x <- list(a = c(foo = 1:2), b = c(bar = "")) (expect_error(list_unchop(x, indices = list(2:1, 3), name_spec = zap()), class = "vctrs_error_incompatible_type") ) Output Error in `list_unchop()`: ! Can't combine `x$a` and `x$b` . # can ignore outer names in `list_unchop()` by providing a 'inner' name-spec (#1988) Code list_unchop(list(x = c(a = 1), y = c(b = "2")), indices = list(1, 2), name_spec = "inner") Condition Error in `list_unchop()`: ! Can't combine `x$x` and `x$y` . # calls cast method even with empty objects Code list_unchop(list(foobar(integer()), foobar(integer(), foo = "bar")), indices = list( integer(), integer())) Condition Error in `list_unchop()`: ! Can't convert `x[[2]]` to . vctrs/tests/testthat/_snaps/type-list-of.md0000644000176200001440000001721315157321020020556 0ustar liggesusers# constructor requires list input Code new_list_of(1) Condition Error in `new_list_of()`: ! `x` must be a list, not the number 1. --- Code new_list_of(mtcars) Condition Error in `new_list_of()`: ! `x` must be a list, not a object. # must lock at least one of ptype or size Code new_list_of(ptype = NULL, size = NULL) Condition Error in `new_list_of()`: ! Must specify at least one of `ptype` or `size`. # validates `ptype` Code new_list_of(ptype = lm(1 ~ 1)) Condition Error in `new_list_of()`: ! `ptype` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # validates `size` Code new_list_of(size = 1.1) Condition Error in `new_list_of()`: ! `size` must be a whole number, not the number 1.1. --- Code new_list_of(size = 1:2) Condition Error in `new_list_of()`: ! `size` must be a whole number, not an integer vector. --- Code new_list_of(size = -5) Condition Error in `new_list_of()`: ! `size` must be a whole number larger than or equal to 0, not the number -5. # can check for list of Code check_list_of(1) Condition Error: ! `1` must be a ``, not the number 1. # errors if can't determine type Code list_of(.ptype = NULL) Condition Error in `list_of()`: ! Can't find common type for elements of `x`. --- Code list_of(1, "a") Condition Error in `list_of()`: ! Can't combine `..1` and `..2` . --- Code as_list_of(list()) Condition Error in `as_list_of()`: ! Can't find common type for elements of `x`. # errors if can't determine size Code list_of(.ptype = zap(), .size = NULL) Condition Error in `list_of()`: ! Can't find common size for elements of `x`. --- Code list_of(1:2, 3:5, .ptype = zap(), .size = NULL) Condition Error in `list_of()`: ! Can't recycle `..1` (size 2) to match `..2` (size 3). --- Code as_list_of(list(), .ptype = integer(), .size = NULL) Condition Error in `as_list_of()`: ! Can't find common size for elements of `x`. # `list_of_size()` validates `x` Code list_of_size(list(1)) Condition Error in `list_of_size()`: ! `x` must be a ``, not a list. # `list_of_ptype()` validates `x` Code list_of_ptype(list(1)) Condition Error in `list_of_ptype()`: ! `x` must be a ``, not a list. # can print empty list-of Code list_of(.ptype = integer(), .size = 5L) Output [0]> # print method gives human friendly output Code cat(vec_ptype_full(x)) Output list_of --- Code cat(vec_ptype_abbr(x)) Output list --- Code cat(vec_ptype_full(x)) Output list_of --- Code cat(vec_ptype_abbr(x)) Output list --- Code cat(vec_ptype_full(x)) Output list_of --- Code cat(vec_ptype_abbr(x)) Output list # print method gives human friendly output for multi line types Code cat(vec_ptype_full(x)) Output list_of< data.frame< x: integer y: double z: character > > --- Code cat(vec_ptype_abbr(x)) Output list --- Code cat(vec_ptype_full(x)) Output list_of --- Code cat(vec_ptype_abbr(x)) Output list --- Code cat(vec_ptype_full(x)) Output list_of< data.frame< x: integer y: double z: character >[2] > --- Code cat(vec_ptype_abbr(x)) Output list # str method is reasonably correct Code str(x) Output list [1:2] $ : num 1 $ : num [1:2] 2 3 @ ptype: num(0) --- Code str(list(list(x, y = 2:1))) Output List of 1 $ :List of 2 ..$ : list [1:2] .. ..$ : num 1 .. ..$ : num [1:2] 2 3 .. ..@ ptype: num(0) ..$ y: int [1:2] 2 1 --- Code str(x[0]) Output list [1:0] list() @ ptype: num(0) --- Code str(list(list(x[0], y = 2:1))) Output List of 1 $ :List of 2 ..$ : list [1:0] list() .. ..@ ptype: num(0) ..$ y: int [1:2] 2 1 # [[ works Code x[[3]] Condition Error: ! Invalid index: out of bounds --- Code x[["c"]] Condition Error: ! Invalid index: field name 'c' not found # $ works Code x$c Condition Error: ! Invalid index: field name 'c' not found # [<- coerces and recycles Code x[1] <- list("5") Condition Error in `lapply()`: ! Can't convert `X[[i]]` to . --- Code x[1] <- list(c(1, 2, 3)) Condition Error in `lapply()`: ! Can't recycle input of size 3 to size 2. # [[<- coerces and recycles Code x[[1]] <- "5" Condition Error in `[[<-`: ! Can't convert `value` to . --- Code x[[1]] <- c(1, 2, 3) Condition Error in `[[<-`: ! Can't recycle input of size 3 to size 2. # $<- coerces and recycles Code x$a <- "5" Condition Error in `$<-`: ! Can't convert `value` to . --- Code x$a <- c(1, 2, 3) Condition Error in `$<-`: ! Can't recycle input of size 3 to size 2. # cast: list to list_of Code vec_cast(list("x"), to) Condition Error: ! Can't convert `..1` to . --- Code vec_cast(list(1:3), to) Condition Error: ! Can't recycle `..1` (size 3) to size 2. --- Code vec_cast(list(1:3), to) Condition Error: ! Can't recycle `..1` (size 3) to size 2. --- Code vec_cast(list("x"), to) Condition Error: ! Can't convert `..1` to . # cast: list_of to list_of Code vec_cast(x, to) Condition Error: ! Can't convert `..1` to . --- Code vec_cast(y, to) Condition Error: ! Can't recycle `..1` (size 2) to size 3. # list coercions are symmetric and unchanging Code print(mat) Output list list_of list_of list_of list "list" "list" "list" "list" list_of "list" "list_of" "list_of" "list" list_of "list" "list_of" "list_of" "list" list_of "list" "list" "list" "list_of" # error call is passed to inner cast methods Code (expect_error(fn1())) Output Error in `fn1()`: ! Can't convert `..1` to . Code (expect_error(fn2())) Output Error in `fn2()`: ! Can't convert `..1` to . vctrs/tests/testthat/_snaps/subscript-loc.md0000644000176200001440000011325515157321017021024 0ustar liggesusers# vec_as_location2() requires integer or character inputs Code (expect_error(vec_as_location2(TRUE, 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `TRUE`. x `TRUE` must be numeric or character, not `TRUE`. Code (expect_error(vec_as_location2(mtcars, 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `mtcars`. x `mtcars` must be numeric or character, not a object. Code (expect_error(vec_as_location2(env(), 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `env()`. x `env()` must be numeric or character, not an environment. Code (expect_error(vec_as_location2(foobar(), 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `foobar()`. x `foobar()` must be numeric or character, not a object. Code (expect_error(vec_as_location2(2.5, 10L), class = "vctrs_error_subscript_type")) Output Error: ! Can't extract element with `2.5`. x Can't convert from `2.5` to due to loss of precision. Code (expect_error(vec_as_location2(Inf, 10L), class = "vctrs_error_subscript_type")) Output Error: ! Can't extract element with `Inf`. x Can't convert from `Inf` to due to loss of precision. Code (expect_error(vec_as_location2(-Inf, 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `-Inf`. x Can't convert from `-Inf` to due to loss of precision. Code # Idem with custom `arg` (expect_error(vec_as_location2(foobar(), 10L, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x `foo` must be numeric or character, not a object. Code (expect_error(vec_as_location2(2.5, 3L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Can't convert from `foo` to due to loss of precision. Code (expect_error(with_tibble_rows(vec_as_location2(TRUE)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't remove row with `foo(bar)`. x `foo(bar)` must be numeric or character, not `TRUE`. # vec_as_location() requires integer, character, or logical inputs Code (expect_error(vec_as_location(mtcars, 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `mtcars`. x `mtcars` must be logical, numeric, or character, not a object. Code (expect_error(vec_as_location(env(), 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `env()`. x `env()` must be logical, numeric, or character, not an environment. Code (expect_error(vec_as_location(foobar(), 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `foobar()`. x `foobar()` must be logical, numeric, or character, not a object. Code (expect_error(vec_as_location(2.5, 10L), class = "vctrs_error_subscript_type")) Output Error: ! Can't subset elements with `2.5`. x Can't convert from `2.5` to due to loss of precision. Code (expect_error(vec_as_location(list(), 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `list()`. x `list()` must be logical, numeric, or character, not an empty list. Code (expect_error(vec_as_location(function() NULL, 10L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `function() NULL`. x `function() NULL` must be logical, numeric, or character, not a function. Code (expect_error(vec_as_location(Sys.Date(), 3L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `Sys.Date()`. x `Sys.Date()` must be logical, numeric, or character, not a object. Code # Idem with custom `arg` (expect_error(vec_as_location(env(), 10L, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x `foo` must be logical, numeric, or character, not an environment. Code (expect_error(vec_as_location(foobar(), 10L, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x `foo` must be logical, numeric, or character, not a object. Code (expect_error(vec_as_location(2.5, 3L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Can't convert from `foo` to due to loss of precision. # vec_as_location() and variants check for OOB elements (#1605) Code # Numeric indexing (expect_error(vec_as_location(10L, 2L), class = "vctrs_error_subscript_oob")) Output Error: ! Can't subset elements past the end. i Location 10 doesn't exist. i There are only 2 elements. Code (expect_error(vec_as_location(-10L, 2L), class = "vctrs_error_subscript_oob")) Output Error: ! Can't negate elements past the end. i Location 10 doesn't exist. i There are only 2 elements. Code (expect_error(vec_as_location2(10L, 2L), class = "vctrs_error_subscript_oob")) Output Error: ! Can't extract elements past the end. i Location 10 doesn't exist. i There are only 2 elements. Code # Character indexing (expect_error(vec_as_location("foo", 1L, names = "bar"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. Code (expect_error(vec_as_location2("foo", 1L, names = "bar"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't extract elements that don't exist. x Element `foo` doesn't exist. Code (expect_error(vec_as_location2("foo", 1L, names = "bar", call = call("baz")), class = "vctrs_error_subscript_oob")) Output Error in `baz()`: ! Can't extract elements that don't exist. x Element `foo` doesn't exist. # vec_as_location2() requires length 1 inputs Code (expect_error(vec_as_location2(1:2, 2L), class = "vctrs_error_subscript_type")) Output Error: ! Can't extract element with `1:2`. x Subscript `1:2` must be size 1, not 2. Code (expect_error(vec_as_location2(c("foo", "bar"), 2L, c("foo", "bar")), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `c("foo", "bar")`. x Subscript `c("foo", "bar")` must be size 1, not 2. Code # Idem with custom `arg` (expect_error(vec_as_location2(1:2, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be size 1, not 2. Code (expect_error(vec_as_location2(mtcars, 10L, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x `foo` must be numeric or character, not a object. Code (expect_error(vec_as_location2(1:2, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be size 1, not 2. # vec_as_location2() requires positive integers Code (expect_error(vec_as_location2(0, 2L), class = "vctrs_error_subscript_type")) Output Error: ! Can't extract element with `0`. x Subscript `0` must be a positive location, not 0. Code (expect_error(vec_as_location2(-1, 2L), class = "vctrs_error_subscript_type")) Output Error: ! Can't extract element with `-1`. x Subscript `-1` must be a positive location, not -1. Code # Idem with custom `arg` (expect_error(vec_as_location2(0, 2L, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be a positive location, not 0. # vec_as_location2() fails with NA Code (expect_error(vec_as_location2(na_int, 2L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `na_int`. x Subscript `na_int` must be a location, not an integer `NA`. Code (expect_error(vec_as_location2(na_chr, 1L, names = "foo"), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract element with `na_chr`. x Subscript `na_chr` must be a location, not a character `NA`. Code # Idem with custom `arg` (expect_error(vec_as_location2(na_int, 2L, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be a location, not an integer `NA`. # num_as_location() optionally forbids negative indices Code (expect_error(num_as_location(dbl(1, -1), 2L, negative = "error"), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `dbl(1, -1)`. x Subscript `dbl(1, -1)` can't contain negative locations. # num_as_location() optionally forbids zero indices Code (expect_error(num_as_location(0L, 1L, zero = "error"), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `0L`. x Subscript `0L` can't contain `0` values. i It has a `0` value at location 1. Code (expect_error(num_as_location(c(0, 0, 0, 0, 0, 0), 1, zero = "error"), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `c(0, 0, 0, 0, 0, 0)`. x Subscript `c(0, 0, 0, 0, 0, 0)` can't contain `0` values. i It has 6 `0` values at locations 1, 2, 3, 4, 5, etc. # vec_as_location() checks for mix of negative and missing locations Code (expect_error(vec_as_location(-c(1L, NA), 30), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `-c(1L, NA)`. x Negative locations can't have missing values. i Subscript `-c(1L, NA)` has a missing value at location 2. Code (expect_error(vec_as_location(-c(1L, rep(NA, 10)), 30), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `-c(1L, rep(NA, 10))`. x Negative locations can't have missing values. i Subscript `-c(1L, rep(NA, 10))` has 10 missing values at locations 2, 3, 4, 5, 6, etc. # vec_as_location() checks for mix of negative and positive locations Code (expect_error(vec_as_location(c(-1L, 1L), 30), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `c(-1L, 1L)`. x Negative and positive locations can't be mixed. i Subscript `c(-1L, 1L)` has a positive value at location 2. Code (expect_error(vec_as_location(c(-1L, rep(1L, 10)), 30), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `c(-1L, rep(1L, 10))`. x Negative and positive locations can't be mixed. i Subscript `c(-1L, rep(1L, 10))` has 10 positive values at locations 2, 3, 4, 5, 6, etc. # logical subscripts must match size of indexed vector Code (expect_error(vec_as_location(c(TRUE, FALSE), 3), class = "vctrs_error_subscript_size") ) Output Error: ! Can't subset elements with `c(TRUE, FALSE)`. x Logical subscript `c(TRUE, FALSE)` must be size 1 or 3, not 2. # character subscripts require named vectors Code (expect_error(vec_as_location(letters[1], 3), "unnamed vector")) Output Error in `vec_as_location()`: ! Can't use character names to index an unnamed vector. # can optionally extend beyond the end Code (expect_error(num_as_location(3, 1, oob = "extend"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 1. x Subscript `3` contains non-consecutive location 3. Code (expect_error(num_as_location(c(1, 3), 1, oob = "extend"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 1. x Subscript `c(1, 3)` contains non-consecutive location 3. Code (expect_error(num_as_location(c(1:5, 7), 3, oob = "extend"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 3. x Subscript `c(1:5, 7)` contains non-consecutive locations 4 and 7. Code (expect_error(num_as_location(c(1:5, 7, 1), 3, oob = "extend"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 3. x Subscript `c(1:5, 7, 1)` contains non-consecutive locations 4 and 7. Code (expect_error(class = "vctrs_error_subscript_oob", num_as_location(c(1:5, 7, 1, 10), 3, oob = "extend"))) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 3. x Subscript `c(1:5, 7, 1, 10)` contains non-consecutive locations 4, 7, and 10. # num_as_location() errors when inverting oob negatives unless `oob = 'remove'` (#1630) Code num_as_location(-4, 3, oob = "error", negative = "invert") Condition Error: ! Can't negate elements past the end. i Location 4 doesn't exist. i There are only 3 elements. --- Code num_as_location(c(-4, 4, 5), 3, oob = "extend", negative = "invert") Condition Error: ! Can't negate elements past the end. i Location 4 doesn't exist. i There are only 3 elements. # num_as_location() errors on disallowed zeros when inverting negatives (#1612) Code num_as_location(c(0, -1), n = 2L, negative = "invert", zero = "error") Condition Error: ! Can't subset elements with `c(0, -1)`. x Subscript `c(0, -1)` can't contain `0` values. i It has a `0` value at location 1. --- Code num_as_location(c(-1, 0), n = 2L, negative = "invert", zero = "error") Condition Error: ! Can't subset elements with `c(-1, 0)`. x Subscript `c(-1, 0)` can't contain `0` values. i It has a `0` value at location 2. # num_as_location() with `oob = 'extend'` doesn't allow ignored oob negative values (#1614) Code num_as_location(-6L, 5L, oob = "extend", negative = "ignore") Condition Error: ! Can't negate elements past the end. i Location 6 doesn't exist. i There are only 5 elements. --- Code num_as_location(c(-7L, 6L), 5L, oob = "extend", negative = "ignore") Condition Error: ! Can't negate elements past the end. i Location 7 doesn't exist. i There are only 5 elements. --- Code num_as_location(c(-7L, NA), 5L, oob = "extend", negative = "ignore") Condition Error: ! Can't negate elements past the end. i Location 7 doesn't exist. i There are only 5 elements. # num_as_location() with `oob = 'error'` reports negative and positive oob values Code num_as_location(c(-6L, 7L), n = 5L, oob = "error", negative = "ignore") Condition Error: ! Can't subset elements past the end. i Locations 6 and 7 don't exist. i There are only 5 elements. # missing values are supported in error formatters Code (expect_error(num_as_location(c(1, NA, 2, 3), 1), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements past the end. i Locations 2 and 3 don't exist. i There is only 1 element. Code (expect_error(num_as_location(c(1, NA, 3), 1, oob = "extend"), class = "vctrs_error_subscript_oob") ) Output Error: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 1. x Subscript `c(1, NA, 3)` contains non-consecutive location 3. # can disallow missing values Code (expect_error(vec_as_location(c(1, NA), 2, missing = "error"), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript can't contain missing values. x It has a missing value at location 2. Code (expect_error(vec_as_location(c(1, NA, 2, NA), 2, missing = "error", arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements. x Subscript can't contain missing values. x It has missing values at locations 2 and 4. Code (expect_error(with_tibble_cols(vec_as_location(c(1, NA, 2, NA), 2, missing = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain missing values. x It has missing values at locations 2 and 4. Code (expect_error(with_tibble_cols(vec_as_location(NA, 1, missing = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain missing values. x It has a missing value at location 1. Code (expect_error(with_tibble_cols(vec_as_location(NA, 3, missing = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain missing values. x It has a missing value at location 1. Code (expect_error(with_tibble_cols(vec_as_location(c(TRUE, NA, FALSE), 3, missing = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain missing values. x It has a missing value at location 2. Code (expect_error(with_tibble_cols(vec_as_location(NA_character_, 2, missing = "error", names = c("x", "y"))), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain missing values. x It has a missing value at location 1. # can alter logical missing value handling (#1595) Code vec_as_location(x, n = 4L, missing = "error") Condition Error: ! Can't subset elements. x Subscript can't contain missing values. x It has missing values at locations 2 and 4. --- Code vec_as_location(x, n = 2L, missing = "error") Condition Error: ! Can't subset elements. x Subscript can't contain missing values. x It has a missing value at location 1. # can alter character missing value handling (#1595) Code vec_as_location(x, n = 2L, names = names, missing = "error") Condition Error: ! Can't subset elements. x Subscript can't contain missing values. x It has missing values at locations 1 and 3. # can alter integer missing value handling (#1595) Code vec_as_location(x, n = 4L, missing = "error") Condition Error: ! Can't subset elements. x Subscript can't contain missing values. x It has missing values at locations 1 and 3. # can alter negative integer missing value handling (#1595) Code num_as_location(x, n = 4L, missing = "propagate", negative = "invert") Condition Error: ! Can't subset elements with `x`. x Negative locations can't have missing values. i Subscript `x` has 2 missing values at locations 2 and 3. --- Code num_as_location(x, n = 4L, missing = "error", negative = "invert") Condition Error: ! Can't subset elements with `x`. x Negative locations can't have missing values. i Subscript `x` has 2 missing values at locations 2 and 3. # empty string character indices never match empty string names (#1489) Code vec_as_location("", n = 2L, names = names) Condition Error: ! Can't subset elements. x Subscript can't contain the empty string. x It has an empty string at location 1. --- Code vec_as_location(c("", "y", ""), n = 2L, names = names) Condition Error: ! Can't subset elements. x Subscript can't contain the empty string. x It has an empty string at locations 1 and 3. # can customise subscript type errors Code # With custom `arg` (expect_error(num_as_location(-1, 2, negative = "error", arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Subscript `foo` can't contain negative locations. Code (expect_error(num_as_location2(-1, 2, negative = "error", arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be a positive location, not -1. Code (expect_error(vec_as_location2(0, 2, arg = "foo", call = call("my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be a positive location, not 0. Code (expect_error(vec_as_location2(na_dbl, 2, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be a location, not an integer `NA`. Code (expect_error(vec_as_location2(c(1, 2), 2, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't extract element with `foo`. x Subscript `foo` must be size 1, not 2. Code (expect_error(vec_as_location(c(TRUE, FALSE), 3, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_size")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Logical subscript `foo` must be size 1 or 3, not 2. Code (expect_error(vec_as_location(c(-1, NA), 3, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Negative locations can't have missing values. i Subscript `foo` has a missing value at location 2. Code (expect_error(vec_as_location(c(-1, 1), 3, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Negative and positive locations can't be mixed. i Subscript `foo` has a positive value at location 2. Code (expect_error(num_as_location(c(1, 4), 2, oob = "extend", arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_oob")) Output Error in `my_function()`: ! Can't subset elements beyond the end with non-consecutive locations. i Input has size 2. x Subscript `foo` contains non-consecutive location 4. Code (expect_error(num_as_location(0, 1, zero = "error", arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_type")) Output Error in `my_function()`: ! Can't subset elements with `foo`. x Subscript `foo` can't contain `0` values. i It has a `0` value at location 1. Code # With tibble columns (expect_error(with_tibble_cols(num_as_location(-1, 2, negative = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain negative locations. Code (expect_error(with_tibble_cols(num_as_location2(-1, 2, negative = "error")), class = "vctrs_error_subscript_type")) Output Error: ! Can't rename column with `foo(bar)`. x Subscript `foo(bar)` must be a positive location, not -1. Code (expect_error(with_tibble_cols(vec_as_location2(0, 2)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename column with `foo(bar)`. x Subscript `foo(bar)` must be a positive location, not 0. Code (expect_error(with_tibble_cols(vec_as_location2(na_dbl, 2)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename column with `foo(bar)`. x Subscript `foo(bar)` must be a location, not an integer `NA`. Code (expect_error(with_tibble_cols(vec_as_location2(c(1, 2), 2)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename column with `foo(bar)`. x Subscript `foo(bar)` must be size 1, not 2. Code (expect_error(with_tibble_cols(vec_as_location(c(TRUE, FALSE), 3)), class = "vctrs_error_subscript_size") ) Output Error: ! Can't rename columns with `foo(bar)`. x Logical subscript `foo(bar)` must be size 1 or 3, not 2. Code (expect_error(with_tibble_cols(vec_as_location(c(-1, NA), 3)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename columns with `foo(bar)`. x Negative locations can't have missing values. i Subscript `foo(bar)` has a missing value at location 2. Code (expect_error(with_tibble_cols(vec_as_location(c(-1, 1), 3)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename columns with `foo(bar)`. x Negative and positive locations can't be mixed. i Subscript `foo(bar)` has a positive value at location 2. Code (expect_error(with_tibble_cols(num_as_location(c(1, 4), 2, oob = "extend")), class = "vctrs_error_subscript_oob")) Output Error: ! Can't rename columns beyond the end with non-consecutive locations. i Input has size 2. x Subscript `foo(bar)` contains non-consecutive location 4. Code (expect_error(with_tibble_cols(num_as_location(0, 1, zero = "error")), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename columns with `foo(bar)`. x Subscript `foo(bar)` can't contain `0` values. i It has a `0` value at location 1. # can customise OOB errors Code (expect_error(vec_slice(set_names(letters), "foo"), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. Code # With custom `arg` (expect_error(vec_as_location(30, length(letters), arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_oob")) Output Error in `my_function()`: ! Can't subset elements past the end. i Location 30 doesn't exist. i There are only 26 elements. Code (expect_error(vec_as_location("foo", NULL, letters, arg = "foo", call = call( "my_function")), class = "vctrs_error_subscript_oob")) Output Error in `my_function()`: ! Can't subset elements that don't exist. x Element `foo` doesn't exist. Code # With tibble columns (expect_error(with_tibble_cols(vec_slice(set_names(letters), "foo")), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't rename columns that don't exist. x Column `foo` doesn't exist. Code (expect_error(with_tibble_cols(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't rename columns that don't exist. i Location 30 doesn't exist. i There are only 26 columns. Code (expect_error(with_tibble_cols(vec_slice(set_names(letters), -30)), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't rename columns that don't exist. i Location 30 doesn't exist. i There are only 26 columns. Code # With tibble rows (expect_error(with_tibble_rows(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't remove rows that don't exist. x Rows `foo` and `bar` don't exist. Code (expect_error(with_tibble_rows(vec_slice(set_names(letters), 1:30)), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't remove rows past the end. i Locations 27, 28, 29, and 30 don't exist. i There are only 26 rows. Code (expect_error(with_tibble_rows(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't remove rows past the end. i Locations 27, 28, 29, and 30 don't exist. i There are only 26 rows. Code # With tidyselect select (expect_error(with_tidyselect_select(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't select columns that don't exist. x Columns `foo` and `bar` don't exist. Code (expect_error(with_tidyselect_select(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob") ) Output Error in `vec_slice()`: ! Can't select columns past the end. i Location 30 doesn't exist. i There are only 26 columns. Code (expect_error(with_tidyselect_select(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't select columns past the end. i Locations 27, 28, 29, and 30 don't exist. i There are only 26 columns. Code # With tidyselect relocate (expect_error(with_tidyselect_relocate(vec_slice(set_names(letters), c("foo", "bar"))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't relocate columns that don't exist. x Columns `foo` and `bar` don't exist. Code (expect_error(with_tidyselect_relocate(vec_slice(set_names(letters), 30)), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't relocate columns that don't exist. i Location 30 doesn't exist. i There are only 26 columns. Code (expect_error(with_tidyselect_relocate(vec_slice(set_names(letters), -(1:30))), class = "vctrs_error_subscript_oob")) Output Error in `vec_slice()`: ! Can't relocate columns that don't exist. i Locations 27, 28, 29, and 30 don't exist. i There are only 26 columns. # vec_as_location() checks dimensionality Code (expect_error(vec_as_location(matrix(TRUE, nrow = 1), 3L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `matrix(TRUE, nrow = 1)`. x Subscript `matrix(TRUE, nrow = 1)` must be a simple vector, not a matrix. Code (expect_error(vec_as_location(array(TRUE, dim = c(1, 1, 1)), 3L), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements with `array(TRUE, dim = c(1, 1, 1))`. x Subscript `array(TRUE, dim = c(1, 1, 1))` must be a simple vector, not an array. Code (expect_error(with_tibble_rows(vec_as_location(matrix(TRUE, nrow = 1), 3L)), class = "vctrs_error_subscript_type")) Output Error: ! Can't remove rows with `foo(bar)`. x Subscript `foo(bar)` must be a simple vector, not a matrix. # vec_as_location() UI Code vec_as_location(1, 1L, missing = "bogus") Condition Error in `vec_as_location()`: ! `missing` must be one of "propagate", "remove", or "error". # num_as_location() UI Code num_as_location(1, 1L, missing = "bogus") Condition Error in `num_as_location()`: ! `missing` must be one of "propagate", "remove", or "error". --- Code num_as_location(1, 1L, negative = "bogus") Condition Error in `num_as_location()`: ! `negative` must be one of "invert", "error", or "ignore". --- Code num_as_location(1, 1L, oob = "bogus") Condition Error in `num_as_location()`: ! `oob` must be one of "error", "remove", or "extend". --- Code num_as_location(1, 1L, zero = "bogus") Condition Error in `num_as_location()`: ! `zero` must be one of "remove", "error", or "ignore". # vec_as_location2() UI Code vec_as_location2(1, 1L, missing = "bogus") Condition Error in `vec_as_location2_result()`: ! `missing` must be one of "error" or "propagate", not "bogus". vctrs/tests/testthat/_snaps/type-factor.md0000644000176200001440000000046115157321016020461 0ustar liggesusers# factor/character coercions are symmetric and unchanging Code print(mat) Output ordered<> factor<> character ordered<> "ordered<>" NA "character" factor<> NA "factor<>" "character" character "character" "character" "character" vctrs/tests/testthat/_snaps/order.md0000644000176200001440000000163515157321011017336 0ustar liggesusers# `direction` is recycled right with array columns (#1753) Code vec_order_radix(df, direction = c("asc", "desc", "desc")) Condition Error: ! `direction` should have length 1 or length equal to the number of columns of `x` when `x` is a data frame. # `na_value` is recycled right with array columns (#1753) Code vec_order_radix(df, direction = c("smallest", "largest", "largest")) Condition Error: ! `direction` must contain only "asc" or "desc". # dots must be empty (#1647) Code vec_order(1, 2) Condition Error in `vec_order()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? --- Code vec_sort(1, 2) Condition Error in `vec_sort()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? vctrs/tests/testthat/_snaps/print-str.md0000644000176200001440000000203615157321010020160 0ustar liggesusers# show attributes Code obj_str(x) Output int [1:100] 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16... @ x: chr "a string" @ y: int [1:20] 1 2 3 4 5 6 7 8 9 10 ... @ z:'data.frame': 3 obs. of 1 variable: ..$ x: int [1:3] 1 2 3 --- Code obj_str(mtcars) Output df[,11] [1:32] 'data.frame': 32 obs. of 11 variables: $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... $ cyl : num 6 6 4 6 8 6 8 4 4 6 ... $ disp: num 160 160 108 258 360 ... $ hp : num 110 110 93 110 175 105 245 62 95 123 ... $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... $ wt : num 2.62 2.88 2.32 3.21 3.44 ... $ qsec: num 16.5 17 18.6 19.4 17 ... $ vs : num 0 0 1 1 0 1 0 1 1 1 ... $ am : num 1 1 1 0 0 0 0 0 0 0 ... $ gear: num 4 4 4 3 3 3 3 4 4 4 ... $ carb: num 4 4 1 1 2 1 4 2 2 4 ... @ row.names: chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ... vctrs/tests/testthat/_snaps/rep.md0000644000176200001440000000754415157321011017016 0ustar liggesusers# `vec_rep()` validates `times` Code (expect_error(my_vec_rep(1, "x"), class = "vctrs_error_incompatible_type")) Output Error in `my_vec_rep()`: ! Can't convert `my_times` to . Code (expect_error(my_vec_rep(1, c(1, 2)))) Output Error in `my_vec_rep()`: ! `my_times` must be a single number. Code (expect_error(my_vec_rep(1, -1))) Output Error in `my_vec_rep()`: ! `my_times` must be a positive number. Code (expect_error(my_vec_rep(1, NA_integer_))) Output Error in `my_vec_rep()`: ! `my_times` can't be missing. --- Code my_vec_rep(1, "x") Condition Error in `my_vec_rep()`: ! Can't convert `my_times` to . --- Code my_vec_rep(1, c(1, 2)) Condition Error in `my_vec_rep()`: ! `my_times` must be a single number. --- Code my_vec_rep(1, -1) Condition Error in `my_vec_rep()`: ! `my_times` must be a positive number. --- Code my_vec_rep(1, NA_integer_) Condition Error in `my_vec_rep()`: ! `my_times` can't be missing. # `vec_rep_each()` validates `times` Code (expect_error(my_vec_rep_each(1, "x"), class = "vctrs_error_incompatible_type")) Output Error in `my_vec_rep_each()`: ! Can't convert `my_times` to . Code (expect_error(my_vec_rep_each(1, -1))) Output Error in `my_vec_rep_each()`: ! `my_times` must be a vector of positive numbers. Location 1 is negative. Code (expect_error(my_vec_rep_each(c(1, 2), c(1, -1)))) Output Error in `my_vec_rep_each()`: ! `my_times` must be a vector of positive numbers. Location 2 is negative. Code (expect_error(my_vec_rep_each(1, NA_integer_))) Output Error in `my_vec_rep_each()`: ! `my_times` can't be missing. Location 1 is missing. Code (expect_error(my_vec_rep_each(c(1, 2), c(1, NA_integer_)))) Output Error in `my_vec_rep_each()`: ! `my_times` can't be missing. Location 2 is missing. --- Code my_vec_rep_each(1, "x") Condition Error in `my_vec_rep_each()`: ! Can't convert `my_times` to . --- Code my_vec_rep_each(1, -1) Condition Error in `my_vec_rep_each()`: ! `my_times` must be a vector of positive numbers. Location 1 is negative. --- Code my_vec_rep_each(c(1, 2), c(1, -1)) Condition Error in `my_vec_rep_each()`: ! `my_times` must be a vector of positive numbers. Location 2 is negative. --- Code my_vec_rep_each(1, NA_integer_) Condition Error in `my_vec_rep_each()`: ! `my_times` can't be missing. Location 1 is missing. --- Code my_vec_rep_each(c(1, 2), c(1, NA_integer_)) Condition Error in `my_vec_rep_each()`: ! `my_times` can't be missing. Location 2 is missing. # `vec_rep_each()` uses recyclying errors Code (expect_error(my_vec_rep_each(1:2, 1:3), class = "vctrs_error_recycle_incompatible_size") ) Output Error in `my_vec_rep_each()`: ! Can't recycle `my_times` (size 3) to size 2. --- Code my_vec_rep_each(1:2, 1:3) Condition Error in `my_vec_rep_each()`: ! Can't recycle `my_times` (size 3) to size 2. # errors on scalars Code vec_unrep(environment()) Condition Error in `vec_unrep()`: ! `x` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. vctrs/tests/testthat/_snaps/c.md0000644000176200001440000002141215157320775016460 0ustar liggesusers# common type failure uses error call and error arg (#1641, #1692) Code vec_c("x", 1, .error_call = call("foo"), .error_arg = "arg") Condition Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . --- Code vec_c("x", .ptype = integer(), .error_call = call("foo"), .error_arg = "arg") Condition Error in `foo()`: ! Can't convert `arg[[1]]` to . # common type failure uses positional errors Code (expect_error(vec_c(1, a = "x", 2))) Output Error in `vec_c()`: ! Can't combine `..1` and `a` . Code (expect_error(vec_c(1, a = "x", 2, .ptype = double(), .error_arg = "arg"))) Output Error in `vec_c()`: ! Can't convert `arg$a` to . Code (expect_error(vec_c(1, a = 2.5, .ptype = integer()))) Output Error in `vec_c()`: ! Can't convert from `a` to due to loss of precision. * Locations: 1 # vec_c() includes index in argument tag Code vec_c(df1, df2) Condition Error in `vec_c()`: ! Can't combine `..1$x$y$z` and `..2$x$y$z` . --- Code vec_c(df1, df1, df2) Condition Error in `vec_c()`: ! Can't combine `..1$x$y$z` and `..3$x$y$z` . --- Code vec_c(foo = df1, bar = df2) Condition Error in `vec_c()`: ! Can't combine `foo$x$y$z` and `bar$x$y$z` . # vec_c() can repair names quietly Code res_unique <- vec_c(x = TRUE, x = 0, .name_repair = "unique_quiet") res_universal <- vec_c(`if` = TRUE, `in` = 0, .name_repair = "universal_quiet") # vec_c() fails with complex foreign S3 classes Code x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error(vec_c(x, y), class = "vctrs_error_incompatible_type")) Output Error in `vec_c()`: ! Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . Code (expect_error(vec_c(x, y, .error_call = call("foo"), .error_arg = "arg"), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # vec_c() fails with complex foreign S4 classes Code joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") (expect_error(vec_c(joe, jane), class = "vctrs_error_incompatible_type")) Output Error in `vec_c()`: ! Can't combine `..1` and `..2` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . Code (expect_error(vec_c(joe, jane, .error_call = call("foo"), .error_arg = "arg"), class = "vctrs_error_incompatible_type")) Output Error in `foo()`: ! Can't combine `arg[[1]]` and `arg[[2]]` . x Some attributes are incompatible. i The author of the class should implement vctrs methods. i See . # vec_c() fallback doesn't support (most) `name_spec` or `ptype` Code (expect_error(with_c_foobar(vec_c(foobar(1), foobar(2), .name_spec = "{outer}_{inner}")), "name specification")) Output Error in `vec_c()`: ! Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . Code (expect_error(with_c_foobar(vec_c(foobar(1), foobar(2), .ptype = "")), class = "vctrs_error_incompatible_type") ) Output Error in `vec_c()`: ! Can't convert `..1` to . Code (expect_error(with_c_foobar(vec_c(foobar(1), foobar(2), .error_call = call( "foo"), .name_spec = "{outer}_{inner}")))) Output Error in `foo()`: ! Can't use a name specification with non-vctrs types. vctrs methods must be implemented for class `vctrs_foobar`. See . # can ignore names in `vec_c()` by providing a `zap()` name-spec (#232) Code (expect_error(vec_c(a = c(b = letters), b = 1, .name_spec = zap()), class = "vctrs_error_incompatible_type") ) Output Error in `vec_c()`: ! Can't combine `a` and `b` . # can ignore outer names in `vec_c()` by providing an 'inner' name-spec (#1988) Code vec_c(x = c(a = 1), y = c(b = "2"), .name_spec = "inner") Condition Error in `vec_c()`: ! Can't combine `x` and `y` . # calls cast method even with empty objects Code vec_c(foobar(integer()), foobar(integer(), foo = "bar")) Condition Error in `vec_c()`: ! Can't convert `..2` to . # concatenation performs expected allocations Code ints <- rep(list(1L), 100) dbls <- rep(list(1), 100) # # `vec_c()` # Integers with_memory_prof(vec_c_list(ints)) Output [1] 1.48KB Code # Doubles with_memory_prof(vec_c_list(dbls)) Output [1] 1.87KB Code # Integers to integer with_memory_prof(vec_c_list(ints, ptype = int())) Output [1] 1.27KB Code # Doubles to integer with_memory_prof(vec_c_list(dbls, ptype = int())) Output [1] 1.27KB Code # # `list_unchop()` # Integers with_memory_prof(list_unchop(ints)) Output [1] 1.48KB Code # Doubles with_memory_prof(list_unchop(dbls)) Output [1] 1.87KB Code # Integers to integer with_memory_prof(list_unchop(ints, ptype = int())) Output [1] 1.27KB Code # Doubles to integer with_memory_prof(list_unchop(dbls, ptype = int())) Output [1] 1.27KB Code # # Concatenation with names # Named integers ints <- rep(list(set_names(1:3, letters[1:3])), 100) with_memory_prof(list_unchop(ints)) Output [1] 4.65KB Code # Named matrices mat <- matrix(1:4, 2, dimnames = list(c("foo", "bar"))) mats <- rep(list(mat), 100) with_memory_prof(list_unchop(mats)) Output [1] 4.26KB Code # Data frame with named columns df <- data_frame(x = set_names(as.list(1:2), c("a", "b")), y = set_names(1:2, c( "A", "B")), z = data_frame(Z = set_names(1:2, c("Za", "Zb")))) dfs <- rep(list(df), 100) with_memory_prof(list_unchop(dfs)) Output [1] 9.13KB Code # Data frame with rownames (non-repaired, non-recursive case) df <- data_frame(x = 1:2) dfs <- rep(list(df), 100) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(list_unchop(dfs)) Output [1] 6.37KB Code # Data frame with rownames (repaired, non-recursive case) dfs <- map(dfs, set_rownames_recursively) with_memory_prof(list_unchop(dfs)) Output [1] 12.5KB Code # Data frame with rownames (non-repaired, recursive case) (#1217) df <- data_frame(x = 1:2, y = data_frame(x = 1:2)) dfs <- rep(list(df), 100) dfs <- map2(dfs, seq_along(dfs), set_rownames_recursively) with_memory_prof(list_unchop(dfs)) Output [1] 11.7KB Code # Data frame with rownames (repaired, recursive case) (#1217) dfs <- map(dfs, set_rownames_recursively) with_memory_prof(list_unchop(dfs)) Output [1] 24KB Code # list-ofs (#1496) make_list_of <- (function(n) { df <- tibble::tibble(x = new_list_of(vec_chop(1:n), ptype = integer())) vec_chop(df) }) with_memory_prof(list_unchop(make_list_of(1000))) Output [1] 59.7KB Code with_memory_prof(list_unchop(make_list_of(2000))) Output [1] 118KB Code with_memory_prof(list_unchop(make_list_of(4000))) Output [1] 235KB vctrs/tests/testthat/_snaps/complete.md0000644000176200001440000000133215157320775020045 0ustar liggesusers# catches `NULL` data frame columns Code vec_detect_complete(df) Condition Error in `vec_detect_complete()`: ! Unexpected `NULL` column found in a data frame. # catches scalar objects Code vec_detect_complete(lm(1 ~ 1)) Condition Error in `vec_size()`: ! `x` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. vctrs/tests/testthat/_snaps/recycle.md0000644000176200001440000000736115157321011017653 0ustar liggesusers# vec_recycle_common() reports error context Code (expect_error(my_function(this_arg = 1:2, that_arg = int()))) Output Error in `my_function()`: ! Can't recycle `this_arg` (size 2) to match `that_arg` (size 0). Code (expect_error(my_function(this_arg = 1:2, that_arg = int(), .size = 2))) Output Error in `my_function()`: ! Can't recycle `that_arg` (size 0) to size 2. Code (expect_error(my_function(this_arg = 1:2, that_arg = int(), .arg = "my_arg"))) Output Error in `my_function()`: ! Can't recycle `my_arg$this_arg` (size 2) to match `my_arg$that_arg` (size 0). Code (expect_error(my_function(this_arg = 1:2, that_arg = int(), .size = 2, .arg = "my_arg")) ) Output Error in `my_function()`: ! Can't recycle `my_arg$that_arg` (size 0) to size 2. # vec_recycle(): incompatible lengths get error messages Code (expect_error(vec_recycle(x2, 1), class = "vctrs_error_recycle_incompatible_size") ) Output Error: ! Can't recycle input of size 2 to size 1. # recycling to size 1 has informative error Code (expect_error(vec_recycle(1:2, 1), class = "vctrs_error_recycle_incompatible_size") ) Output Error: ! Can't recycle input of size 2 to size 1. # incompatible recycling size has informative error Code vec_recycle(1:2, 4) Condition Error: ! Can't recycle input of size 2 to size 4. --- Code vec_recycle(1:2, 4, x_arg = "foo") Condition Error: ! Can't recycle `foo` (size 2) to size 4. # vec_recycle_common(): incompatible lengths get error messages Code (expect_error(vec_recycle_common(1:2, 1:3), class = "vctrs_error_incompatible_size") ) Output Error: ! Can't recycle `..1` (size 2) to match `..2` (size 3). # vec_recycle_common errors on scalars Code vec_recycle_common(1, lm(1 ~ 1)) Condition Error: ! `..2` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_recycle_common(1, NULL, lm(1 ~ 1)) Condition Error: ! `..3` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # recycling matrices respects incompatible sizes Code (expect_error(vec_recycle_common(x2, x), class = "vctrs_error_incompatible_size") ) Output Error: ! Can't recycle `..1` (size 2) to match `..2` (size 4). # recycling data frames respects incompatible sizes Code (expect_error(vec_recycle_common(x2, x), class = "vctrs_error_incompatible_size") ) Output Error: ! Can't recycle `..1` (size 2) to match `..2` (size 3). vctrs/tests/testthat/_snaps/interval.md0000644000176200001440000000351615157321000020045 0ustar liggesusers# `missing` is validated Code (expect_error(vec_interval_locate_groups(1, 2, missing = "s"))) Output Error in `vec_interval_locate_groups()`: ! `missing` must be either "group" or "drop". --- Code (expect_error(vec_interval_locate_groups(1, 2, missing = c("group", "drop")))) Output Error in `vec_interval_locate_groups()`: ! `missing` must be a string. # common type is taken Code (expect_error(vec_interval_locate_groups(1, "x"))) Output Error: ! Can't combine `start` and `end` . --- Code (expect_error(vec_interval_locate_containers(1, "x"))) Output Error: ! Can't combine `start` and `end` . # `lower` and `upper` can't contain missing values Code (expect_error(vec_interval_complement(1, 2, lower = NA))) Output Error in `vec_interval_complement()`: ! `lower` can't contain missing values. Code (expect_error(vec_interval_complement(1, 2, upper = NA))) Output Error in `vec_interval_complement()`: ! `upper` can't contain missing values. Code start <- data_frame(x = 1, y = 2) end <- data_frame(x = 1, y = 3) (expect_error(vec_interval_complement(start, end, lower = data_frame(x = 1, y = NA))) ) Output Error in `vec_interval_complement()`: ! `lower` can't contain missing values. Code (expect_error(vec_interval_complement(start, end, upper = data_frame(x = 1, y = NA))) ) Output Error in `vec_interval_complement()`: ! `upper` can't contain missing values. vctrs/tests/testthat/_snaps/set.md0000644000176200001440000001000615157321012017007 0ustar liggesusers# errors nicely if common type can't be taken Code vec_set_intersect(1, "x") Condition Error in `vec_set_intersect()`: ! Can't combine `x` and `y` . --- Code vec_set_difference(1, "x") Condition Error in `vec_set_difference()`: ! Can't combine `x` and `y` . --- Code vec_set_union(1, "x") Condition Error in `vec_set_union()`: ! Can't combine `x` and `y` . --- Code vec_set_symmetric_difference(1, "x") Condition Error in `vec_set_symmetric_difference()`: ! Can't combine `x` and `y` . # dots must be empty Code vec_set_intersect(1, 2, 3) Condition Error in `vec_set_intersect()`: ! `...` must be empty. x Problematic argument: * ..1 = 3 i Did you forget to name an argument? --- Code vec_set_difference(1, 2, 3) Condition Error in `vec_set_difference()`: ! `...` must be empty. x Problematic argument: * ..1 = 3 i Did you forget to name an argument? --- Code vec_set_union(1, 2, 3) Condition Error in `vec_set_union()`: ! `...` must be empty. x Problematic argument: * ..1 = 3 i Did you forget to name an argument? --- Code vec_set_symmetric_difference(1, 2, 3) Condition Error in `vec_set_symmetric_difference()`: ! `...` must be empty. x Problematic argument: * ..1 = 3 i Did you forget to name an argument? # `ptype` is respected Code vec_set_intersect(1, 1.5, ptype = integer()) Condition Error in `vec_set_intersect()`: ! Can't convert from `y` to due to loss of precision. * Locations: 1 --- Code vec_set_difference(1, 1.5, ptype = integer()) Condition Error in `vec_set_difference()`: ! Can't convert from `y` to due to loss of precision. * Locations: 1 --- Code vec_set_union(1, 1.5, ptype = integer()) Condition Error in `vec_set_union()`: ! Can't convert from `y` to due to loss of precision. * Locations: 1 --- Code vec_set_symmetric_difference(1, 1.5, ptype = integer()) Condition Error in `vec_set_symmetric_difference()`: ! Can't convert from `y` to due to loss of precision. * Locations: 1 # `x_arg` and `y_arg` can be adjusted Code vec_set_intersect(1, "2", x_arg = "foo", y_arg = "bar") Condition Error in `vec_set_intersect()`: ! Can't combine `foo` and `bar` . --- Code vec_set_difference(1, "2", x_arg = "foo", y_arg = "bar") Condition Error in `vec_set_difference()`: ! Can't combine `foo` and `bar` . --- Code vec_set_union(1, "2", x_arg = "foo", y_arg = "bar") Condition Error in `vec_set_union()`: ! Can't combine `foo` and `bar` . --- Code vec_set_symmetric_difference(1, "2", x_arg = "foo", y_arg = "bar") Condition Error in `vec_set_symmetric_difference()`: ! Can't combine `foo` and `bar` . --- Code vec_set_intersect(1, "2", x_arg = "", y_arg = "") Condition Error in `vec_set_intersect()`: ! Can't combine and . # `error_call` can be adjusted Code my_set_intersect() Condition Error in `my_set_intersect()`: ! Can't combine `x` and `y` . --- Code my_set_difference() Condition Error in `my_set_difference()`: ! Can't combine `x` and `y` . --- Code my_set_union() Condition Error in `my_set_union()`: ! Can't combine `x` and `y` . --- Code my_set_symmetric_difference() Condition Error in `my_set_symmetric_difference()`: ! Can't combine `x` and `y` . vctrs/tests/testthat/_snaps/type-rcrd.md0000644000176200001440000000324515157321020020133 0ustar liggesusers# na.fail() works Code na.fail(x) Condition Error in `na.fail()`: ! missing values in object # print and str use format Code r Output [1] (1,1) (1,2) (1,3) (1,4) (1,5) (1,6) (1,7) (1,8) (1,9) [10] (1,10) (1,11) (1,12) (1,13) (1,14) (1,15) (1,16) (1,17) (1,18) [19] (1,19) (1,20) (1,21) (1,22) (1,23) (1,24) (1,25) (1,26) (1,27) [28] (1,28) (1,29) (1,30) (1,31) (1,32) (1,33) (1,34) (1,35) (1,36) [37] (1,37) (1,38) (1,39) (1,40) (1,41) (1,42) (1,43) (1,44) (1,45) [46] (1,46) (1,47) (1,48) (1,49) (1,50) (1,51) (1,52) (1,53) (1,54) [55] (1,55) (1,56) (1,57) (1,58) (1,59) (1,60) (1,61) (1,62) (1,63) [64] (1,64) (1,65) (1,66) (1,67) (1,68) (1,69) (1,70) (1,71) (1,72) [73] (1,73) (1,74) (1,75) (1,76) (1,77) (1,78) (1,79) (1,80) (1,81) [82] (1,82) (1,83) (1,84) (1,85) (1,86) (1,87) (1,88) (1,89) (1,90) [91] (1,91) (1,92) (1,93) (1,94) (1,95) (1,96) (1,97) (1,98) (1,99) [100] (1,100) --- Code str(r[1:10]) Output vctrs_tp [1:10] (1,1), (1,2), (1,3), (1,4), (1,5), (1,6), (1,7), (1,8), (1... --- Code str(list(list(list(r, 1:100)))) Output List of 1 $ :List of 1 ..$ :List of 2 .. ..$ : vctrs_tp [1:100] (1,1), (1,2), (1,3), (1,4), (1,5), (1,6), (1,7)... .. ..$ : int [1:100] 1 2 3 4 5 6 7 8 9 10 ... # dots are an error (#1295) Code foo[1, 2] Condition Error in `foo[1, 2]`: ! Can't index record vectors on dimensions greater than 1. vctrs/tests/testthat/_snaps/type-tibble.md0000644000176200001440000000324315157321020020440 0ustar liggesusers# can't cast vector to tibble Code local_error_call(call("my_function")) (expect_error(vec_ptype2(v, dt), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't combine `v` and `dt` . Code (expect_error(vec_ptype2(dt, v), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't combine `dt` and `v` . Code (expect_error(vec_cast(v, dt), class = "vctrs_error_incompatible_type")) Output Error in `my_function()`: ! Can't convert `v` to . # can use ptype2 and cast with tibble that has incorrect class vector Code local_error_call(call("my_function")) (expect_error(vec_cast(tib1, tib2), class = "vctrs_error_cast")) Output Error in `my_function()`: ! Can't convert from `tib1` > to > due to loss of precision. Code (expect_error(vec_cast(tib1, data.frame(y = 2)), class = "vctrs_error_cast")) Output Error in `my_function()`: ! Can't convert from `tib1` > to > due to loss of precision. Code (expect_error(vec_cast(data.frame(x = 1), tib2), class = "vctrs_error_cast")) Output Error in `my_function()`: ! Can't convert from `data.frame(x = 1)` > to > due to loss of precision. vctrs/tests/testthat/_snaps/expand.md0000644000176200001440000000432315157320776017520 0ustar liggesusers# inputs must be named Code vec_expand_grid(1) Condition Error in `vec_expand_grid()`: ! All inputs must be named. --- Code vec_expand_grid(x = 1, 2, y = 3) Condition Error in `vec_expand_grid()`: ! All inputs must be named. # catches duplicate names by default Code vec_expand_grid(a = 1, a = 2) Condition Error in `vec_expand_grid()`: ! Names must be unique. x These names are duplicated: * "a" at locations 1 and 2. i Use argument `.name_repair` to specify repair strategy. # errors on non vectors and mentions the element name Code vec_expand_grid(y = environment()) Condition Error in `vec_expand_grid()`: ! `y` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # can adjust the `.error_call` Code my_expand_grid() Condition Error in `my_expand_grid()`: ! `x` must be a vector, not an environment. i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. # errors nicely when expansion results in a size larger than `R_len_t` Code vec_expand_grid(x = x, y = y) Condition Error in `vec_expand_grid()`: ! Long vectors are not yet supported. Expansion results in an allocation larger than 2^31-1 elements. Attempted allocation size was 3221225469. # errors nicely when expansion results in a size larger than `R_xlen_t` Code vec_expand_grid(x = x, y = x) Condition Error in `vec_expand_grid()`: ! Result too large for an `r_ssize`. i In file './rlang/c-utils.h' at line . i This is an internal error that was detected in the vctrs package. Please report it at with a reprex () and the full backtrace. # validates `.vary` Code vec_expand_grid(.vary = 1) Condition Error in `vec_expand_grid()`: ! `.vary` must be a string or character vector. --- Code vec_expand_grid(.vary = "x") Condition Error in `vec_expand_grid()`: ! `.vary` must be one of "slowest" or "fastest", not "x". vctrs/tests/testthat/_snaps/if-else.md0000644000176200001440000002565015157321000017550 0ustar liggesusers# `condition` must be a condition vector Code vec_if_else(condition = condition, true = true, false = false, missing = missing, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! `condition` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_if_else(condition = condition, true = true_vctr, false = false_vctr, missing = missing_vctr, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! `condition` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_if_else(condition = condition, true = true, false = false, missing = missing, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! `condition` must be a logical vector, not the number 1. --- Code vec_if_else(condition = condition, true = true_vctr, false = false_vctr, missing = missing_vctr, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! `condition` must be a logical vector, not the number 1. --- Code vec_if_else(condition = condition, true = true, false = false, missing = missing, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! `condition` must be a logical vector, not a object. --- Code vec_if_else(condition = condition, true = true_vctr, false = false_vctr, missing = missing_vctr, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! `condition` must be a logical vector, not a object. --- Code vec_if_else(condition = condition, true = true, false = false, missing = missing, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! `condition` must be a logical vector, not a logical 1D array. --- Code vec_if_else(condition = condition, true = true_vctr, false = false_vctr, missing = missing_vctr, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! `condition` must be a logical vector, not a logical 1D array. # `true`, `false`, and `missing` must be vectors Code vec_if_else(condition = TRUE, true = lm(1 ~ 1), false = 2, missing = 0) Condition Error in `vec_if_else()`: ! `true` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_if_else(condition = TRUE, true = 1, false = lm(1 ~ 1), missing = 0) Condition Error in `vec_if_else()`: ! `false` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_if_else(condition = TRUE, true = 1, false = 2, missing = lm(1 ~ 1)) Condition Error in `vec_if_else()`: ! `missing` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # `true`, `false`, and `missing` must recycle to size of `condition` Code vec_if_else(condition = condition, true = true, false = false, missing = missing, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! Can't recycle `true` (size 2) to size 1. --- Code vec_if_else(condition = condition, true = true_vctr, false = false_vctr, missing = missing_vctr, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! Can't recycle `true` (size 2) to size 1. --- Code vec_if_else(condition = condition, true = true, false = false, missing = missing, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! Can't recycle `false` (size 2) to size 1. --- Code vec_if_else(condition = condition, true = true_vctr, false = false_vctr, missing = missing_vctr, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! Can't recycle `false` (size 2) to size 1. --- Code vec_if_else(condition = condition, true = true, false = false, missing = missing, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! Can't recycle `missing` (size 2) to size 1. --- Code vec_if_else(condition = condition, true = true_vctr, false = false_vctr, missing = missing_vctr, ptype = ptype, condition_arg = condition_arg, true_arg = true_arg, false_arg = false_arg, missing_arg = missing_arg) Condition Error in `vec_if_else()`: ! Can't recycle `missing` (size 2) to size 1. # `ptype` overrides common type Code vec_if_else(condition = TRUE, true = 1.5, false = 2, missing = 0, ptype = integer()) Condition Error in `vec_if_else()`: ! Can't convert from `true` to due to loss of precision. * Locations: 1 --- Code vec_if_else(condition = TRUE, true = 1, false = 2.5, missing = 0, ptype = integer()) Condition Error in `vec_if_else()`: ! Can't convert from `false` to due to loss of precision. * Locations: 1 --- Code vec_if_else(condition = TRUE, true = 1, false = 2, missing = 0.5, ptype = integer()) Condition Error in `vec_if_else()`: ! Can't convert from `missing` to due to loss of precision. * Locations: 1 --- Code vec_if_else(condition = TRUE, true = 1, false = new_date(2), missing = new_date( 0), ptype = new_date()) Condition Error in `vec_if_else()`: ! Can't convert `true` to . --- Code vec_if_else(condition = TRUE, true = new_date(1), false = 2, missing = new_date( 0), ptype = new_date()) Condition Error in `vec_if_else()`: ! Can't convert `false` to . --- Code vec_if_else(condition = TRUE, true = new_date(1), false = new_date(2), missing = 0, ptype = new_date()) Condition Error in `vec_if_else()`: ! Can't convert `missing` to . # takes the common type of `true` and `false` (tidyverse/dplyr#6243) Code vec_if_else(TRUE, 1, "x") Condition Error in `vec_if_else()`: ! Can't combine `true` and `false` . --- Code vec_if_else(TRUE, 1, "x", true_arg = "t", false_arg = "f", error_call = current_env()) Condition Error: ! Can't combine `t` and `f` . # includes `missing` in the common type computation if used Code vec_if_else(TRUE, 1, 2, missing = "x") Condition Error in `vec_if_else()`: ! Can't combine `true` and `missing` . --- Code vec_if_else(TRUE, 1L, 2, missing = "x") Condition Error in `vec_if_else()`: ! Can't combine `false` and `missing` . --- Code vec_if_else(TRUE, 1, 2L, missing = "x") Condition Error in `vec_if_else()`: ! Can't combine `true` and `missing` . # `condition` must be logical (and isn't cast to logical!) Code vec_if_else(1:10, 1, 2) Condition Error in `vec_if_else()`: ! `condition` must be a logical vector, not an integer vector. # `true`, `false`, and `missing` must recycle to the size of `condition` Code vec_if_else(x < 2, bad, x) Condition Error in `vec_if_else()`: ! Can't recycle `true` (size 2) to size 3. --- Code vec_if_else(x < 2, x, bad) Condition Error in `vec_if_else()`: ! Can't recycle `false` (size 2) to size 3. --- Code vec_if_else(x < 2, x, x, missing = bad) Condition Error in `vec_if_else()`: ! Can't recycle `missing` (size 2) to size 3. # must have empty dots Code vec_if_else(TRUE, 1, 2, missing = 3, 4) Condition Error in `vec_if_else()`: ! `...` must be empty. x Problematic argument: * ..1 = 4 i Did you forget to name an argument? # `ptype` overrides the common type Code vec_if_else(TRUE, 1L, 2.5, ptype = integer()) Condition Error in `vec_if_else()`: ! Can't convert from `false` to due to loss of precision. * Locations: 1 vctrs/tests/testthat/_snaps/ptype-abbr-full.md0000644000176200001440000000045315157321011021225 0ustar liggesusers# data.frames have good default abbr and full methods Code df <- foobar(data.frame(x = 1, y = "", z = TRUE)) vec_ptype_abbr(df) Output [1] "vctrs_fb[,3]" Code vec_ptype_full(df) Output [1] "vctrs_foobar<\n x: double\n y: character\n z: logical\n>" vctrs/tests/testthat/_snaps/type-idate.md0000644000176200001440000000102215157321016020263 0ustar liggesusers# can't get common type of Date and IDate Code vec_ptype2(x, y) Condition Error: ! Can't combine `x` and `y` . --- Code vec_ptype2(y, x) Condition Error: ! Can't combine `y` and `x` . # can't cast Date to IDate Code vec_cast(x, y) Condition Error: ! Can't convert `x` to . # can't cast IDate to Date Code vec_cast(x, y) Condition Error: ! Can't convert `x` to . vctrs/tests/testthat/_snaps/runs.md0000644000176200001440000000432615157321012017213 0ustar liggesusers# errors on scalars Code vec_identify_runs(foobar()) Condition Error in `vec_identify_runs()`: ! `x` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. --- Code vec_run_sizes(foobar()) Condition Error in `vec_run_sizes()`: ! `x` must be a vector, not a object. x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . i If this object comes from a package, please report this error to the package author. i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. # vec_locate_run_bounds() validates `which` Code vec_locate_run_bounds(1, which = "x") Condition Error in `vec_locate_run_bounds()`: ! `which` must be one of "start" or "end", not "x". --- Code vec_locate_run_bounds(1, which = 1) Condition Error in `vec_locate_run_bounds()`: ! `which` must be a string or character vector. --- Code vec_locate_run_bounds(1, which = c("foo", "bar")) Condition Error in `vec_locate_run_bounds()`: ! `arg` must be length 1 or a permutation of `c("start", "end")`. # vec_detect_run_bounds() validates `which` Code vec_detect_run_bounds(1, which = "x") Condition Error in `vec_detect_run_bounds()`: ! `which` must be one of "start" or "end", not "x". --- Code vec_detect_run_bounds(1, which = 1) Condition Error in `vec_detect_run_bounds()`: ! `which` must be a string or character vector. --- Code vec_detect_run_bounds(1, which = c("foo", "bar")) Condition Error in `vec_detect_run_bounds()`: ! `arg` must be length 1 or a permutation of `c("start", "end")`. vctrs/tests/testthat/_snaps/lifecycle-deprecated.md0000644000176200001440000000134615157320777022301 0ustar liggesusers# vec_unchop() is soft-deprecated Code vec_unchop(list(1), indices = list(1)) Condition Warning: `vec_unchop()` was deprecated in vctrs 0.5.0. i Please use `list_unchop()` instead. Output [1] 1 # vec_equal_na() is soft-deprecated Code vec_equal_na(c(1, NA)) Condition Warning: `vec_equal_na()` was deprecated in vctrs 0.5.0. i Please use `vec_detect_missing()` instead. Output [1] FALSE TRUE # vec_check_list() still works Code vec_check_list(1) Condition Error: ! `1` must be a list, not the number 1. --- Code my_check(1) Condition Error in `my_check()`: ! `x` must be a list, not the number 1. vctrs/tests/testthat/_snaps/hash.md0000644000176200001440000001042215157320777017162 0ustar liggesusers# hashes are consistent from run to run Code hash Output $lgl [1] 70 a2 85 ef b9 79 37 9e 59 df 73 0b $int [1] 70 a2 85 ef bf 3c 2c cf e0 2d 28 24 3e 2c d4 c2 86 cd 44 6a c1 c6 22 fb 7d [26] 28 01 b7 c4 de 70 e7 cc a2 b3 60 49 7e 5c 87 bd 77 d8 31 2c 27 c0 1a 4b f7 [51] f2 33 aa 4d 23 5e fe 51 52 6a 75 cd 5c f3 a1 18 08 77 d0 cc 3b 74 cd ce 28 [76] 80 da 82 82 70 1e db 49 88 ef 6a 83 16 45 b6 ef 6f a8 63 fc 59 f3 50 ab 1d [101] 56 e3 06 58 61 10 57 e3 99 3e c1 e0 53 77 22 cd 86 b9 75 87 14 33 2e e9 31 [126] 21 82 48 cb 87 24 99 c7 b1 e2 a0 e3 2a 76 8c 99 50 f3 0c 08 81 8d 36 aa b5 [151] 72 f0 41 78 9f 1a fc 7b 81 05 51 8d 37 0d 15 47 b7 a6 7b c3 7b 7b ce e5 26 [176] 1d 43 60 95 97 d2 f2 a8 41 23 3c 26 82 d1 13 cb 66 41 e5 1d 8b 2e 28 1f 9d [201] f1 6f 67 ae 0c 1b 2f a0 3a 45 d9 12 0a 93 29 eb 4d a4 f9 d8 72 99 a2 b1 2e [226] 0e 12 21 ee 74 0d fc 0f f3 a9 bf 4c f9 bf c1 ee b1 42 35 70 ec 24 34 41 bf [251] 2e 12 41 72 24 81 68 83 4e 10 76 9a c5 56 d1 1d 56 95 9d 60 e6 31 5f d2 48 [276] 68 0d dc b3 7a 3a b1 0c 83 22 aa b9 cc 3b 72 d4 6c 58 88 e4 ce 6d 3f 7b f9 [301] 13 5b 06 d1 b5 d1 03 4e 4b 89 b8 59 fe ca aa 94 e7 03 74 de fe d4 11 15 e9 [326] a0 37 7c e0 61 8b 3c da f6 91 e3 3d 55 02 7f 33 24 73 1b bb 11 65 50 e3 51 [351] 16 9c a2 dc 9b 79 e0 27 fc e1 fe c4 5a 7e 4c e4 cd a7 43 f9 d4 82 f5 8e 6b [376] f4 1a a5 4f 41 4b 7a 10 f2 d2 98 6e 7e 7f 11 0f c9 93 eb 84 3a a3 d6 05 9c $dbl1 [1] 6b 2c 06 3b 59 03 e1 f3 6a 26 91 9e dd 27 af 6c d0 c2 58 6f a7 71 b0 a8 c2 [26] f7 fe 63 40 5a f1 9d 92 c5 0e c6 05 c8 d4 68 2f cf 98 c6 69 41 ad 1d 2a 6f [51] bb a3 b1 f9 09 f0 e3 49 63 d1 0a af 42 9d 59 31 8b 1e db 3f f1 61 a8 d7 94 [76] 6e fd ec bf 5c fc 20 d8 cb 7a bb c7 ba 8b 60 53 00 d6 8c 9e 2b fc 76 73 e7 [101] f0 c9 4b ad da 6b 9e 41 9f 88 3f 20 ba 6a f2 99 56 48 e0 57 c0 ca 3d 7b ce [126] 54 60 0e 5b ad 1b 94 a3 cb 2f c3 e0 cb f9 67 f5 ae e1 39 73 17 5d 6d 70 0a [151] a5 bc 01 08 f3 9d 8c de 10 d3 f6 72 2d e8 19 ff fc c6 24 4e 95 b4 90 5e 7b [176] da e2 12 4e f4 b0 4a ed 85 af 2f e3 fc 48 33 5d aa 7f 78 05 2f d3 d2 44 c4 [201] 78 2e c8 e7 65 45 5d 15 af 8b 5e 5c 49 48 fb 55 d1 4e 09 d0 f6 19 7b 98 20 [226] 67 7c 2f 2e ba 70 2a 0a ad c8 48 3d 69 7b c5 99 67 d9 2e f4 5a e2 84 24 9c [251] 00 22 1d 75 e7 c6 fc 9c a3 6a dd 1d 96 b0 53 67 35 59 51 b7 8f a7 3f 78 39 [276] ed fa 73 2d 07 24 3b 9e 97 83 06 0d 2a d4 e0 f2 43 75 c3 6f 09 94 28 25 40 [301] f8 c1 9e 13 41 50 c3 d2 65 6f 01 b2 26 fb 1f d2 a8 5c 11 db b4 e6 4d e1 1d [326] 7d 43 c3 17 cd 2e ca ad 05 b4 bd 74 8d 37 9a 5a 1e 85 d4 0a f9 03 8f a7 6d [351] 23 c5 7b e7 54 ee e1 33 d1 8e a3 5d a4 cb 0d 80 3e c7 80 5c 77 d8 36 fb 94 [376] a5 a5 a2 72 8a 95 ab f3 da 47 90 da c7 49 a1 b1 81 01 19 29 96 b3 5c ca ba $dbl2 [1] b9 79 37 9e ea 40 52 d2 fa 14 1e 2f c1 55 60 05 35 18 46 24 ba 76 ed 56 75 [26] 78 db ad 6c 77 76 c5 67 0a 4b b9 d2 19 61 03 f4 69 d5 5d db d2 f9 7d 55 6a [51] d5 af 33 2d 76 69 9d f6 f8 de 6f d3 54 05 97 a0 7f 08 f0 19 33 e0 e4 4a 20 [76] 61 67 40 d0 00 b5 ad 11 fb 94 d6 28 78 ef 95 69 a6 e5 9f ed cf 10 69 4f 59 [101] bc 28 55 9c e7 bd ea 9d c2 cb 77 e7 9d 22 f6 ee ad 0c 46 0d 6d 15 f2 26 c0 [126] 7e 70 df 9c 1b ce cc cc 52 4f 75 87 f9 6a fa 5f b8 4d 42 e7 1b 72 ea 69 8a [151] 6b 7b ab 97 7b 86 e6 9a 7c 62 ac eb 08 df 3f 39 e8 a1 ec ce 45 f9 26 c5 27 [176] 47 b2 4c 65 ae 11 07 52 d4 6c 74 3d f4 62 1f 67 51 37 e4 1f eb b7 3e 51 26 [201] 5b da 32 b7 d5 fc 9d 73 8d 98 42 fa 90 23 50 bf 06 61 2f ac 54 8c 87 0e 53 [226] 3d 98 5f 44 55 57 9d 69 b7 59 9f 87 fe 1e 00 d7 1c 0c 34 ba 25 ce a1 77 68 [251] cc 7a f5 cf 2f a4 34 2f 60 a7 a0 c7 e9 cd 90 29 c5 55 06 c6 6a 99 e4 bc 46 [276] a4 c0 43 8c 3f f8 8c ee 17 d7 9f 8d 03 48 64 fa d1 55 85 81 c0 cd ac bc 6e [301] d5 59 0a 28 94 df 2a 7d ea ca 7f 09 5e 5e 47 0d 02 ad 8c 67 0a b8 52 e4 17 [326] 3a 25 5d 5b 17 34 01 09 18 7e ca 83 28 f9 f0 c1 b0 10 bc 30 aa 4b 5c 85 68 [351] 75 71 2e 3c c6 e8 a7 9f a9 36 16 81 cb 4b d7 94 88 15 6e f4 f9 a9 03 ac 21 [376] 65 43 cb 1a ca cd 69 96 b6 8e e0 9f 70 be b1 af c9 73 f2 4b c9 6b 2c 06 3b vctrs/tests/testthat/_snaps/subscript.md0000644000176200001440000001111515157321013020235 0ustar liggesusers# can customise subscript errors Code (expect_error(with_tibble_cols(vec_as_subscript(env())), class = "vctrs_error_subscript_type") ) Output Error: ! Can't rename columns with `foo(bar)`. x `foo(bar)` must be logical, numeric, or character, not an environment. --- Code (expect_error(with_dm_tables(vec_as_subscript(env())), class = "vctrs_error_subscript_type") ) Output Error: ! Can't extract tables with `foo(bar)`. x `foo(bar)` must be logical, numeric, or character, not an environment. # vec_as_subscript() checks dimensionality Code (expect_error(vec_as_subscript(matrix(TRUE, nrow = 1)), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript must be a simple vector, not a matrix. Code (expect_error(vec_as_subscript(array(TRUE, dim = c(1, 1, 1))), class = "vctrs_error_subscript_type") ) Output Error: ! Can't subset elements. x Subscript must be a simple vector, not an array. Code (expect_error(with_tibble_rows(vec_as_subscript(matrix(TRUE, nrow = 1))), class = "vctrs_error_subscript_type")) Output Error: ! Can't remove rows with `foo(bar)`. x Subscript `foo(bar)` must be a simple vector, not a matrix. # vec_as_subscript() forbids subscript types Code vec_as_subscript(1L, logical = "error", numeric = "error") Condition Error: ! Can't subset elements. x Subscript must be character, not the number 1. --- Code vec_as_subscript("foo", logical = "error", character = "error") Condition Error: ! Can't subset elements. x Subscript must be numeric, not the string "foo". --- Code vec_as_subscript(TRUE, logical = "error") Condition Error: ! Can't subset elements. x Subscript must be numeric or character, not `TRUE`. --- Code vec_as_subscript("foo", character = "error") Condition Error: ! Can't subset elements. x Subscript must be logical or numeric, not the string "foo". --- Code vec_as_subscript(NULL, numeric = "error") Condition Error: ! Can't subset elements. x Subscript must be logical or character, not `NULL`. --- Code vec_as_subscript(quote(foo), character = "error") Condition Error: ! Can't subset elements. x Subscript must be logical or numeric, not a symbol. # vec_as_subscript2() forbids subscript types Code vec_as_subscript2(1L, numeric = "error") Condition Error: ! Can't extract element. x Subscript must be character, not the number 1. --- Code vec_as_subscript2("foo", character = "error") Condition Error: ! Can't extract element. x Subscript must be numeric, not the string "foo". --- Code vec_as_subscript2(TRUE) Condition Error: ! Can't extract element. x Subscript must be numeric or character, not `TRUE`. # vec_as_subscript2() retains the call when throwing vec_as_subscript() errors (#1605) Code vec_as_subscript2(1L, numeric = "error", call = call("foo")) Condition Error in `foo()`: ! Can't extract element. x Subscript must be character, not the number 1. --- Code vec_as_subscript2(1.5, call = call("foo")) Condition Error in `foo()`: ! Can't extract element. x Can't convert from to due to loss of precision. # vec_as_subscript2() retains the call when erroring on logical input (#1605) Code vec_as_subscript2(TRUE, call = call("foo")) Condition Error in `foo()`: ! Can't extract element. x Subscript must be numeric or character, not `TRUE`. # `logical = 'cast'` is deprecated Code vec_as_subscript2(TRUE, logical = "cast") Condition Error in `vec_as_subscript2()`: ! `vctrs::vec_as_subscript2(logical = 'cast')` is deprecated. --- Code vec_as_subscript2(TRUE, logical = "error") Condition Error: ! Can't extract element. x Subscript must be numeric or character, not `TRUE`. # lossy cast errors for scalar subscripts work (#1606) Code vec_as_subscript2(1.5) Condition Error: ! Can't extract element. x Can't convert from to due to loss of precision. vctrs/tests/testthat/test-type-asis.R0000644000176200001440000000635615065005761017452 0ustar liggesusers# ------------------------------------------------------------------------------ # Printing test_that("I() wraps contents", { f <- factor() expect_equal(vec_ptype_abbr(I(f)), "I") expect_equal(vec_ptype_full(I(f)), "I>") }) test_that("AsIs class stripped from I()", { df <- data.frame(x = 1, y = 1:2) class(df) <- c("myclass", "data.frame") expect_equal( vec_ptype_full(I(df)), "I>" ) expect_equal(vec_ptype_full(I(df[1])), "I>") expect_equal(vec_ptype_full(I(df[0])), "I>") }) # ------------------------------------------------------------------------------ # Proxy / restore test_that("can slice AsIs class", { df <- data.frame(x = I(1:3), y = I(list(4, 5, 6))) expect_identical(vec_slice(df, 2:3), unrownames(df[2:3, ])) }) test_that("equality proxy is forwarded correctly for atomic types (#1557)", { # We don't define any equality proxies for base atomic types, but we can fake it local_methods(vec_proxy_equal.integer = function(x, ...) "dispatched") asis <- I(1L) expect_identical(vec_proxy_equal(asis), "dispatched") }) test_that("comparison proxy is forwarded correctly for atomic types (#1557)", { # vec_proxy_compare.raw() exists x <- raw() asis <- I(x) expect_identical(vec_proxy_compare(asis), vec_proxy_compare(x)) expect_identical(vec_proxy_compare(asis), integer()) }) test_that("order proxy is forwarded correctly for atomic types (#1557)", { # vec_proxy_order.list() exists x <- list(2, 1, 2) asis <- I(x) expect_identical(vec_proxy_order(asis), vec_proxy_order(x)) expect_identical(vec_proxy_order(asis), c(1L, 2L, 1L)) }) # ------------------------------------------------------------------------------ # Coercion test_that("can take the common type of identical AsIs objects", { expect_identical(vec_ptype2(I(1), I(1)), I(numeric())) }) test_that("AsIs objects throw ptype2 errors with their underlying types", { expect_snapshot({ (expect_error( vec_ptype2(I(1), I("x")), class = "vctrs_error_incompatible_type" )) }) }) test_that("AsIs always wraps the common type", { expect_identical(vec_ptype2(I(1L), 1), I(numeric())) expect_identical(vec_ptype2(1, I(1L)), I(numeric())) }) # ------------------------------------------------------------------------------ # Casting test_that("can cast one AsIs to another AsIs", { expect_identical(vec_cast(I(1), I(1)), I(1)) expect_identical(vec_cast(I(1), I(1L)), I(1L)) }) test_that("AsIs objects throw cast errors with their underlying types", { expect_snapshot({ (expect_error( vec_cast(I(1), I(factor("x"))), class = "vctrs_error_incompatible_type" )) }) }) test_that("casting from an AsIs drops the AsIs class", { expect_identical(vec_cast(I(1), 1), 1) }) test_that("casting to an AsIs adds the AsIs class", { expect_identical(vec_cast(1, I(1)), I(1)) }) # ------------------------------------------------------------------------------ # Misc test_that("can `vec_c()` with only AsIs objects", { expect_identical(vec_c(I(1), I(2)), I(c(1, 2))) expect_identical(vec_c(I(1), I(2L)), I(c(1, 2))) }) test_that("can `vec_c()` with AsIs objects mixed with other types", { expect_identical(vec_c(I(1L), 1), I(c(1, 1))) }) vctrs/tests/testthat/helper-vctrs.R0000644000176200001440000000426015072256373017172 0ustar liggesuserstestthat_import_from <- function(ns, names, env = caller_env()) { skip_if_not_installed(ns) import_from(ns, names, env = env) } shaped_int <- function(...) { array(NA_integer_, c(...)) } set_rownames_recursively <- function(x, i = NULL) { n <- vec_size(x) stopifnot(n <= length(letters)) for (j in seq_along(x)) { if (is.data.frame(x[[j]])) { x[[j]] <- set_rownames_recursively(x[[j]], i = i) } } row.names(x) <- paste0(letters[seq_len(n)], i) x } expect_waldo_equal <- function(type, act, exp, info, ...) { comp <- waldo::compare( act$val, exp$val, ..., x_arg = "actual", y_arg = "expected" ) expect( length(comp) == 0, sprintf( "`actual` (%s) not %s to `expected` (%s).\n\n%s", act$lab, type, exp$lab, paste0(comp, collapse = "\n\n") ), info = info ) invisible(act$val) } expect_identical <- function( object, expected, info = NULL, label = NULL, expected.label = NULL, ... ) { act <- quasi_label(enquo(object), label, arg = "object") exp <- quasi_label(enquo(expected), expected.label, arg = "expected") expect_waldo_equal("identical", act, exp, info, ...) } expect_equal <- function( object, expected, ..., tolerance = .Machine$double.eps^0.5, info = NULL, label = NULL, expected.label = NULL ) { act <- quasi_label(enquo(object), label, arg = "object") exp <- quasi_label(enquo(expected), expected.label, arg = "expected") expect_waldo_equal("equal", act, exp, info, ..., tolerance = tolerance) } raw2 <- function(...) { as.raw(vec_c(..., .ptype = integer())) } cpl2 <- function(...) { # R 4.4.0 changed `as.complex(NA_real/integer/logical)` so that it always uses # a `0` in the imaginary slot. While this is reasonable, it is annoying for # comparison purposes in tests, where we typically propagate the `NA`. As of # rlang 1.1.1, `cpl()` inherits this behavior change so we have a custom version # here that works the same on all R versions. # https://github.com/wch/r-source/commit/1a2aea9ac3c216fea718f33f712764afc34f6ee8 out <- list2(...) out <- as.complex(out) out[is.na(out)] <- complex(real = NA_real_, imaginary = NA_real_) out } vctrs/tests/testthat/test-split.R0000644000176200001440000000227015065005761016656 0ustar liggesuserstest_that("can split empty vector", { out <- vec_split(integer(), character()) expect_s3_class(out, "data.frame") expect_equal(out$key, character()) expect_equal(out$val, list()) }) test_that("split data frame with data frame", { df <- data.frame(x = c(1, 1, 2), y = c(1, 1, 1)) out <- vec_split(df, df) expect_s3_class(out, "data.frame") expect_equal(out$key, data.frame(x = c(1, 2), y = c(1, 1))) expect_equal( out$val, list( data.frame(x = c(1, 1), y = c(1, 1)), data.frame(x = 2, y = 1) ) ) }) test_that("x and by must be same size", { expect_error( vec_split(1:3, 1:2), "same size" ) }) test_that("split takes the equality proxy (#375)", { local_comparable_tuple() x <- tuple(c(1, 2, 1), 1:3) expect_identical(nrow(vec_split(1:3, x)), 2L) }) test_that("split works with different encodings", { encs <- encodings() expect_identical(nrow(vec_split(1:3, encs)), 1L) }) test_that("`key` and `value` retain names", { x <- c(a = 1, b = 2, c = 1, a = 1) split <- vec_split(x, x) expect_identical(split$key, c(a = 1, b = 2)) expect_identical(split$val[[1]], c(a = 1, c = 1, a = 1)) expect_identical(split$val[[2]], c(b = 2)) }) vctrs/tests/testthat/test-hash.R0000644000176200001440000001255515156537736016472 0ustar liggesusers# Vectorised -------------------------------------------------------------- test_that("vec_hash() produces same hash for same values", { x <- vec_hash(1:3) y <- do.call(c, map(1:3, vec_hash)) expect_identical(x, y) }) test_that("F, T, and NA hash to different values", { x <- map(c(TRUE, FALSE, NA), vec_hash) expect_length(unique(x), 3) }) test_that("vec_hash of double produces different values", { x <- vec_hash(c(1, 1, 2)) expect_true(identical(x[1:4], x[5:8])) expect_false(identical(x[5:8], x[9:12])) }) test_that("NA and NaN hash to different values", { x <- vec_hash(c(NA, NaN)) expect_false(identical(x[1:4], x[5:8])) }) test_that("same string hashes to same value", { x <- vec_hash(c("1", "1", "2")) expect_true(identical(x[1:4], x[5:8])) expect_false(identical(x[5:8], x[9:12])) }) test_that("list hashes to values of individual values", { cpl <- complex(real = c(1, 2), imaginary = c(3, 4)) x <- vec_hash(list(1:3, letters, cpl)) expect_identical(x[1:4], obj_hash(1:3)) expect_identical(x[5:8], obj_hash(letters)) expect_identical(x[9:12], obj_hash(cpl)) x <- map(list(list(1:3), list(letters), list(cpl)), vec_hash) expect_identical(x[[1]], obj_hash(1:3)) expect_identical(x[[2]], obj_hash(letters)) expect_identical(x[[3]], obj_hash(cpl)) }) test_that("hash of data frame works down rows", { df <- data.frame(x = 1:3, y = 1:3) x <- vec_hash(df) expect_length(x, 4 * vec_size(df)) expect_identical(x[1:4], vec_hash(df[1, ])) }) test_that("hashes are consistent from run to run", { # no string, since we're currently hashing the address in string pool df <- list( lgl = c(TRUE, FALSE, NA), int = 1:100, dbl1 = as.double(1:100), dbl2 = seq(0, 1, length = 100) ) hash <- lapply(df, vec_hash) # Big-endian results are byte-swapped, but otherwise equivalent. # Swap results so that there's no need to save results twice. if (.Platform$endian == "big") { hash <- lapply( hash, function(x) { writeBin(readBin(x, "int", 100, endian = "big"), x, endian = "little") } ) } local_options(max.print = 99999) expect_snapshot(hash) }) test_that("can hash list of non-vectors", { x <- list(quote(x), mean) expect_equal( vec_hash(x), c(obj_hash(x[[1]]), obj_hash(x[[2]])) ) }) test_that("can hash matrices", { x <- matrix(c(1, 1, 1, 2, 2, 1), c(3, 2)) expect_identical( vec_hash(x), vec_hash(x) ) x <- matrix(c(1, 2, 3, 4), c(2, 2)) expect_identical( vec_hash(x), vec_hash(x) ) expect_false(identical( vec_hash(x), vec_hash(c(1, 2)) )) y <- matrix(c(1, 2, 3, 5), c(2, 2)) expect_false(identical( vec_hash(x), vec_hash(y) )) }) test_that("can hash NA", { expect_identical( vec_hash(NA), vec_hash(NA), ) }) test_that("can hash 1D arrays", { # 1D arrays are dispatched to `as.data.frame.vector()` which # currently does not strip dimensions. This caused an infinite # recursion. expect_length(vec_hash(array(1:2)), 8) expect_identical(vec_hash(array(1:2)), vec_hash(1:2)) }) test_that("can hash raw vectors", { expect_identical( vec_hash(0:255), vec_hash(as.raw(0:255)) ) expect_identical( obj_hash(0:255), obj_hash(as.raw(0:255)) ) }) test_that("can hash complex vectors", { expect_identical( vec_hash(c(1, 2) + 0i), c(obj_hash(c(1, 0)), obj_hash(c(2, 0))) ) expect_identical( vec_hash(list(c(1, 2) + 0i)), obj_hash(c(1, 2) + 0i) ) }) test_that("hash treats positive and negative 0 as equivalent (#637)", { expect_equal(vec_hash(-0), vec_hash(0)) }) test_that("can hash lists of expressions", { expect_equal( vec_hash(list(expression(x), expression(y))), c(obj_hash(expression(x)), obj_hash(expression(y))) ) }) test_that("vec_hash() uses recursive equality proxy", { x <- new_data_frame(list(x = foobar(1:3))) default <- vec_hash(x) local_methods(vec_proxy_equal.vctrs_foobar = function(...) c(0, 0, 0)) overridden <- vec_hash(x) expect_false(identical(default, overridden)) }) # Object ------------------------------------------------------------------ test_that("equal objects hash to same value", { # just test function since they'll recurse through every other object type f1 <- function(x, y = NULL) x + y f2 <- function(x, y = NULL) x + y expect_false(identical(obj_hash(f1), obj_hash(f2))) expect_false(identical( vec_hash(data_frame(x = list(f1))), vec_hash(data_frame(x = list(f2))) )) attr(f1, "srcref") <- NULL attr(f2, "srcref") <- NULL expect_equal(obj_hash(f1), obj_hash(f2)) expect_equal( vec_hash(data_frame(x = list(f1))), vec_hash(data_frame(x = list(f2))) ) }) test_that("expression vectors hash to the same value as lists of calls/names", { expect_equal( obj_hash(expression(x, y)), obj_hash(list(as.name("x"), as.name("y"))) ) expect_equal( obj_hash(expression(mean(), sd())), obj_hash(list(call("mean"), call("sd"))) ) }) test_that("language argument names are considered", { expect_false(identical( obj_hash(call("fn", foo = 1)), obj_hash(call("fn", bar = 1)) )) }) test_that("pairlist tags are considered", { expect_false(identical( obj_hash(pairlist(foo = 1)), obj_hash(pairlist(bar = 1)) )) }) test_that("attribute pairlist tags are considered", { expect_false(identical( obj_hash(structure(1, foo = 1)), obj_hash(structure(1, bar = 1)) )) }) vctrs/tests/testthat/test-slice-interleave.R0000644000176200001440000001066315120272011020745 0ustar liggesuserstest_that("interleaving is working as expected", { expect_identical( vec_interleave(1:3, 4:6), c(1L, 4L, 2L, 5L, 3L, 6L) ) expect_identical( vec_interleave(1:3, 4:6, 7:9), c(1L, 4L, 7L, 2L, 5L, 8L, 3L, 6L, 9L) ) }) test_that("data frames can be interleaved", { x <- data_frame(x = 1:2, y = c("a", "b")) y <- data_frame(x = 3:4, y = c("c", "d")) expect_identical( vec_interleave(x, y), vec_slice(vec_c(x, y), c(1, 3, 2, 4)) ) }) test_that("works with `NULL` inputs", { expect_identical( vec_interleave(1:3, NULL, 4:6), vec_interleave(1:3, 4:6) ) }) test_that("allows for name repair", { x <- c(x = 1) expect_identical( vec_interleave(x, x), c(x = 1, x = 1) ) expect_snapshot(vec_interleave(x, x, .name_repair = "unique")) }) test_that("can repair names quietly", { local_name_repair_verbose() expect_snapshot({ res_unique <- vec_interleave( c(x = 1), c(x = 2), .name_repair = "unique_quiet" ) res_universal <- vec_interleave( c("if" = 1), c("in" = 2), .name_repair = "universal_quiet" ) }) expect_named(res_unique, c("x...1", "x...2")) expect_named(res_universal, c(".if", ".in")) }) test_that("works with name specs", { x <- c(x = 1) y <- 1 expect_named( vec_interleave(x = x, y = y, .name_spec = "{outer}_{inner}"), c("x_x", "y") ) }) test_that("recycles inputs", { expect_identical( vec_interleave(1:3, NA), c(1L, NA, 2L, NA, 3L, NA) ) expect_identical( vec_interleave(integer(), NA), integer() ) }) test_that("works with no inputs", { # Purposefully returns `unspecified()`, which is the more useful result # for generic programming against this expect_identical(vec_interleave(), unspecified()) expect_identical(vec_interleave(NULL), unspecified()) # `.size` affects the size of each element, thus it doesn't affect the output # when there are 0 elements expect_identical(vec_interleave(.size = 2), unspecified()) expect_identical(vec_interleave(NULL, .size = 2), unspecified()) }) test_that("works with length zero input", { expect_identical(vec_interleave(integer(), integer()), integer()) }) test_that("respects `.ptype`", { expect_identical(vec_interleave(.ptype = character()), character()) expect_identical(vec_interleave(NULL, .ptype = character()), character()) expect_identical(vec_interleave(1L, 2L, .ptype = numeric()), c(1, 2)) }) test_that("reports type errors", { expect_snapshot(error = TRUE, { vec_interleave(1, "x") }) expect_snapshot(error = TRUE, { vec_interleave(1, "x", .error_call = quote(foo())) }) expect_snapshot(error = TRUE, { vec_interleave(1, "x", .ptype = double()) }) expect_snapshot(error = TRUE, { vec_interleave(1, "x", .ptype = double(), .error_call = quote(foo())) }) # Index is right even with `NULL`! expect_snapshot(error = TRUE, { vec_interleave(1, NULL, "x") }) expect_snapshot(error = TRUE, { vec_interleave(1, NULL, "x", .ptype = double()) }) }) test_that("respects `.size`", { # Correctly does not report an error here expect_identical( vec_interleave(1:2, 3:4, .size = 2), c(1L, 3L, 2L, 4L) ) # Useful for recycling to a known element size # in the case of all size 1 elements expect_identical( vec_interleave(1, 2, .size = 2), c(1, 2, 1, 2) ) }) test_that("reports recycling errors", { expect_snapshot(error = TRUE, { vec_interleave(1:2, 1:3) }) expect_snapshot(error = TRUE, { vec_interleave(1:2, 1:3, .error_call = quote(foo())) }) expect_snapshot(error = TRUE, { vec_interleave(1:2, 3:4, .size = 3) }) expect_snapshot(error = TRUE, { vec_interleave(1:2, 3:4, .size = 3, .error_call = quote(foo())) }) # Index is right even with `NULL`! expect_snapshot(error = TRUE, { vec_interleave(1:2, NULL, 1:3) }) expect_snapshot(error = TRUE, { vec_interleave(1:2, NULL, 1:3, .size = 2) }) }) test_that("reports scalar errors", { expect_snapshot(error = TRUE, { vec_interleave(lm(1 ~ 1)) }) expect_snapshot(error = TRUE, { vec_interleave(lm(1 ~ 1), .error_call = quote(foo())) }) # Index is right even with `NULL`! expect_snapshot(error = TRUE, { vec_interleave(1, NULL, lm(1 ~ 1)) }) expect_snapshot(error = TRUE, { vec_interleave(1, NULL, lm(1 ~ 1), .error_call = quote(foo())) }) }) test_that("`list_interleave()` checks for a list", { expect_snapshot(error = TRUE, { list_interleave(1) }) }) vctrs/tests/testthat/test-list-unchop.R0000644000176200001440000010307615072256373020003 0ustar liggesuserstest_that("`x` must be a list", { expect_snapshot(error = TRUE, { list_unchop(1, indices = list(1)) }) expect_snapshot(error = TRUE, { list_unchop( 1, indices = list(1), error_call = call("foo"), error_arg = "arg" ) }) expect_snapshot(error = TRUE, { list_unchop(data.frame(x = 1), indices = list(1)) }) }) test_that("`indices` must be a list", { expect_snapshot(error = TRUE, { list_unchop(list(1), indices = 1) }) expect_snapshot(error = TRUE, { list_unchop(list(1), indices = 1, error_call = call("foo")) }) expect_snapshot(error = TRUE, { list_unchop(list(1), indices = data.frame(x = 1)) }) }) test_that("`indices` must be a list of integers", { expect_error( list_unchop(list(1), indices = list("x")), class = "vctrs_error_subscript_type" ) expect_error( list_unchop(list(1), indices = list(TRUE)), class = "vctrs_error_subscript_type" ) expect_error( list_unchop(list(1), indices = list(quote(name))), class = "vctrs_error_subscript_type" ) }) test_that("`x` and `indices` must be lists of the same size", { expect_snapshot(error = TRUE, { list_unchop(list(1, 2), indices = list(1)) }) }) test_that("can unchop with an AsIs list (#1463)", { x <- I(list(1, 2)) expect_identical(list_unchop(x), c(1, 2)) }) test_that("can unchop empty vectors", { expect_null(list_unchop(list())) expect_null(list_unchop(list(), indices = list())) expect_identical( list_unchop(list(), indices = list(), ptype = numeric()), numeric() ) }) test_that("can unchop a list of NULL", { expect_null(list_unchop(list(NULL), indices = list(integer()))) expect_identical( list_unchop(list(NULL), indices = list(integer()), ptype = numeric()), numeric() ) expect_identical( list_unchop( list(NULL, NULL), indices = list(integer(), integer()), ptype = numeric() ), numeric() ) }) test_that("NULLs are ignored when unchopped with other vectors", { expect_identical( list_unchop(list("a", NULL, "b")), c("a", "b") ) expect_identical( list_unchop(list("a", NULL, "b"), indices = list(2, integer(), 1)), c("b", "a") ) # Homogeneous fallback expect_identical( list_unchop(list(foobar("a"), NULL, foobar("b"))), foobar(c("a", "b")) ) expect_identical( list_unchop( list(foobar("a"), NULL, foobar("b")), indices = list(2, integer(), 1) ), foobar(c("b", "a")) ) expect_identical( list_unchop( list(foobar("a"), NULL, foobar("b")), indices = list(2, 3, 1) ), foobar(c("b", "a", NA)) ) # Homoegeneous fallback (`NULL` at front) expect_identical( list_unchop(list(NULL, foobar("a"), foobar("b"))), foobar(c("a", "b")) ) expect_identical( list_unchop( list(NULL, foobar("a"), foobar("b")), indices = list(integer(), 2, 1) ), foobar(c("b", "a")) ) expect_identical( list_unchop( list(NULL, foobar("a"), foobar("b")), indices = list(3, 2, 1) ), foobar(c("b", "a", NA)) ) # `c()` fallback with_c_foobar({ expect_identical( list_unchop(list(foobar("a"), NULL, foobar("b"))), foobar_c(c("a", "b")) ) expect_identical( list_unchop( list(foobar("a"), NULL, foobar("b")), indices = list(2, integer(), 1) ), foobar_c(c("b", "a")) ) expect_identical( list_unchop( list(foobar("a"), NULL, foobar("b")), indices = list(2, 3, 1) ), foobar_c(c("b", "a", NA)) ) }) # `c()` fallback (`NULL` at front) with_c_foobar({ expect_identical( list_unchop(list(NULL, foobar("a"), foobar("b"))), foobar_c(c("a", "b")) ) expect_identical( list_unchop( list(NULL, foobar("a"), foobar("b")), indices = list(integer(), 2, 1) ), foobar_c(c("b", "a")) ) expect_identical( list_unchop( list(NULL, foobar("a"), foobar("b")), indices = list(3, 2, 1) ), foobar_c(c("b", "a", NA)) ) }) }) test_that("can use a `NULL` element with a corresponding index", { # We've determined this is the behavior we are locked to in `list_unchop()`, # but in `list_combine()` we return `unspecified(2)` here because the user # also specifies a `size`, making this less ambiguous. expect_null(list_unchop(list(NULL), indices = list(1:2))) expect_identical( list_unchop(list(NULL), indices = list(1:2), ptype = integer()), c(NA_integer_, NA_integer_) ) x <- list("a", NULL, c("b", "c")) indices <- list(3L, c(1L, 4L), c(2L, 5L)) expect_identical(list_unchop(x, indices = indices), c(NA, "b", "a", NA, "c")) }) test_that("can unchop atomic vectors", { expect_identical(list_unchop(list(1, 2), indices = list(2, 1)), c(2, 1)) expect_identical( list_unchop(list("a", "b"), indices = list(2, 1)), c("b", "a") ) }) test_that("can unchop lists", { x <- list(list("a", "b"), list("c")) indices <- list(c(2, 3), 1) expect_identical(list_unchop(x, indices = indices), list("c", "a", "b")) }) test_that("NA is logical if no other types intervene", { expect_identical( list_unchop(list(logical()), indices = list(integer())), logical() ) expect_identical( list_unchop(list(NA), indices = list(1)), NA ) expect_identical( list_unchop(list(NA, NA), indices = list(1, 2)), c(NA, NA) ) }) test_that("can unchop data frames of 1 column", { indices <- list(c(3, 1), c(2, 4)) values <- list( data_frame(x = 1:2), data_frame(x = 3:4) ) expect_identical( list_unchop(values, indices = indices), data_frame(x = int(2, 3, 1, 4)) ) # Homogeneous fallback (#1975) values <- list( data_frame(x = foobar(1:2)), data_frame(x = foobar(3:4)) ) expect_identical( list_unchop(values, indices = indices), data_frame(x = foobar(int(2, 3, 1, 4))) ) # `c()` fallback with_c_foobar({ values <- list( data_frame(x = foobar(1:2)), data_frame(x = foobar(3:4)) ) expect_identical( list_unchop(values, indices = indices), data_frame(x = foobar_c(int(2, 3, 1, 4))) ) }) }) test_that("can unchop data frames of >1 column", { indices <- list(c(3, 1), c(2, 4)) values <- list( data_frame(x = 1:2, y = letters[1:2], z = c(1, 2)), data_frame(x = 3:4, y = letters[3:4], z = c(3, 4)) ) expect_identical( list_unchop(values, indices = indices), data_frame( x = int(2, 3, 1, 4), y = letters[c(2, 3, 1, 4)], z = dbl(2, 3, 1, 4), ) ) # Homogeneous fallback (#1975) # Mix of fallback and non-fallback columns values <- list( data_frame(x = foobar(1:2), y = foobar(letters[1:2]), z = c(1, 2)), data_frame(x = foobar(3:4), y = foobar(letters[3:4]), z = c(3, 4)) ) expect_identical( list_unchop(values, indices = indices), data_frame( x = foobar(int(2, 3, 1, 4)), y = foobar(letters[c(2, 3, 1, 4)]), z = dbl(2, 3, 1, 4) ) ) # `c()` fallback # Mix of fallback and non-fallback columns with_c_foobar({ values <- list( data_frame(x = foobar(1:2), y = foobar(letters[1:2]), z = c(1, 2)), data_frame(x = foobar(3:4), y = foobar(letters[3:4]), z = c(3, 4)) ) expect_identical( list_unchop(values, indices = indices), data_frame( x = foobar_c(int(2, 3, 1, 4)), y = foobar_c(letters[c(2, 3, 1, 4)]), z = dbl(2, 3, 1, 4) ) ) }) }) test_that("can unchop factors", { fctr1 <- factor("z") fctr2 <- factor(c("x", "y")) x <- list(fctr1, fctr2) indices <- list(2, c(3, 1)) # levels are in the order they are seen! expect <- factor(c("y", "z", "x"), levels = c("z", "x", "y")) expect_identical(list_unchop(x, indices = indices), expect) }) test_that("can fallback when unchopping matrices", { mat1 <- matrix(1:4, nrow = 2, ncol = 2) mat2 <- matrix(5:10, nrow = 3, ncol = 2) x <- list(mat1, mat2) indices <- list(c(4, 1), c(2, 3, 5)) expect <- vec_slice(vec_c(mat1, mat2), vec_order(vec_c(!!!indices))) expect_identical(list_unchop(x, indices = indices), expect) expect_identical(list_unchop(x), vec_c(mat1, mat2)) }) test_that("can fallback when unchopping arrays of >2D", { arr1 <- array(1:8, c(2, 2, 2)) arr2 <- matrix(9:10, c(1, 2)) x <- list(arr1, arr2) indices <- list(c(3, 1), 2) expect <- vec_slice(vec_c(arr1, arr2), vec_order(vec_c(!!!indices))) expect_identical(list_unchop(x, indices = indices), expect) expect_identical(list_unchop(x), vec_c(arr1, arr2)) }) test_that("can unchop with all size 0 elements and get the right ptype", { indices <- list(integer(), integer()) x <- list(integer(), integer()) expect_identical(list_unchop(x, indices = indices), integer()) # Homogeneous fallback x <- list(foobar(integer()), foobar(integer())) expect_identical(list_unchop(x, indices = indices), foobar(integer())) # `c()` fallback with_c_foobar({ x <- list(foobar(integer()), foobar(integer())) expect_identical(list_unchop(x, indices = indices), foobar_c(integer())) }) }) test_that("can unchop with some size 0 elements", { x <- list(integer(), 1:2, integer()) indices <- list(integer(), 2:1, integer()) expect_identical(list_unchop(x, indices = indices), 2:1) }) test_that("`NULL` is a valid index", { expect_identical( list_unchop(list(1, 2), indices = list(NULL, 1)), 2 ) expect_snapshot(error = TRUE, { list_unchop(list(1, 2), indices = list(NULL, 2)) }) # Homogeneous fallback expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = list(NULL, 1)), foobar(2) ) expect_snapshot(error = TRUE, { list_unchop(list(foobar(1), foobar(2)), indices = list(NULL, 2)) }) # `c()` fallback with_c_foobar({ expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = list(NULL, 1)), foobar_c(2) ) expect_snapshot(error = TRUE, { list_unchop(list(foobar(1), foobar(2)), indices = list(NULL, 2)) }) }) }) test_that("unchopping recycles elements of x to the size of the index", { indices <- list(c(3, 4, 5), c(2, 1)) expect_identical( list_unchop(list(1, 2), indices = indices), c(2, 2, 1, 1, 1) ) # Homogeneous fallback expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = indices), foobar(c(2, 2, 1, 1, 1)) ) # `c()` fallback with_c_foobar({ expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = indices), foobar_c(c(2, 2, 1, 1, 1)) ) }) indices <- list(1:3) expect_snapshot(error = TRUE, { list_unchop(list(1:2), indices = indices) }) expect_snapshot(error = TRUE, { list_unchop( list(1:2), indices = indices, error_call = call("foo"), error_arg = "arg" ) }) # Homogeneous fallback expect_snapshot(error = TRUE, { list_unchop(list(foobar(1:2)), indices = indices) }) expect_snapshot(error = TRUE, { list_unchop( list(foobar(1:2)), indices = indices, error_call = call("foo"), error_arg = "arg" ) }) # `c()` fallback with_c_foobar({ expect_snapshot(error = TRUE, { list_unchop(list(foobar(1:2)), indices = indices) }) expect_snapshot(error = TRUE, { list_unchop( list(foobar(1:2)), indices = indices, error_call = call("foo"), error_arg = "arg" ) }) }) }) test_that("unchopping takes the common type", { x <- list(1, "a") indices <- list(1, 2) expect_snapshot({ (expect_error( list_unchop(x, indices = indices), class = "vctrs_error_incompatible_type" )) (expect_error( list_unchop( x, indices = indices, error_call = call("foo"), error_arg = "arg" ), class = "vctrs_error_incompatible_type" )) }) x <- list(1, 2L) expect_type(list_unchop(x, indices = indices), "double") }) test_that("common type failure uses positional errors", { expect_snapshot({ x <- list(1, a = "x", 2) # Looking for `x[[1]]` and `x$a` (expect_error(list_unchop(x))) (expect_error(list_unchop(x, indices = list(2, 1, 3)))) # Directed cast should also produce directional errors (#1690) (expect_error(list_unchop(x, ptype = double()))) (expect_error(list_unchop(x, indices = list(2, 1, 3), ptype = double()))) # Lossy cast y <- list(1, a = 2.5) (expect_error(list_unchop(y, ptype = integer()))) (expect_error(list_unchop(y, indices = list(2, 1), ptype = integer()))) }) }) test_that("can specify a ptype to override common type", { indices <- list(1, 2) x <- list(1, 2L) expect_identical( list_unchop(x, indices = indices, ptype = integer()), c(1L, 2L) ) x <- list(1.5, 2) expect_snapshot({ (expect_error(list_unchop(x, indices = indices, ptype = integer()))) (expect_error(list_unchop( x, indices = indices, ptype = integer(), error_call = call("foo"), error_arg = "arg" ))) }) }) test_that("leaving `indices = NULL` unchops sequentially", { x <- list(1:2, 3:5, 6L) expect_identical(list_unchop(x), 1:6) # Homogeneous fallback x <- list(foobar(1:2), foobar(3:5), foobar(6L)) expect_identical(list_unchop(x), foobar(1:6)) # `c()` fallback with_c_foobar({ x <- list(foobar(1:2), foobar(3:5), foobar(6L)) expect_identical(list_unchop(x), foobar_c(1:6)) }) }) test_that("outer names are kept", { x <- list(x = 1, y = 2) expect_named(list_unchop(x), c("x", "y")) expect_named(list_unchop(x, indices = list(2, 1)), c("y", "x")) # Homogeneous fallback x <- list(x = foobar(1), y = foobar(2)) expect_named(list_unchop(x), c("x", "y")) expect_named(list_unchop(x, indices = list(2, 1)), c("y", "x")) # `c()` fallback (dependent on `c()` implementation) with_c_foobar({ x <- list(x = foobar(1), y = foobar(2)) expect_identical(list_unchop(x), foobar_c(c(x = 1, y = 2))) expect_named(list_unchop(x), c("x", "y")) expect_named(list_unchop(x, indices = list(2, 1)), c("y", "x")) }) }) test_that("outer names are recycled in the right order", { x <- list(x = 1, y = 2) expect_error(list_unchop(x, indices = list(c(1, 2), 3)), "Can't merge") expect_named( list_unchop(x, indices = list(c(1, 3), 2), name_spec = "{outer}_{inner}"), c("x_1", "y", "x_2") ) expect_named( list_unchop(x, indices = list(c(3, 1), 2), name_spec = "{outer}_{inner}"), c("x_2", "y", "x_1") ) }) test_that("outer names can be merged with inner names", { x <- list(x = c(a = 1), y = c(b = 2)) expect_error(list_unchop(x), "Can't merge") expect_named(list_unchop(x, name_spec = "{outer}_{inner}"), c("x_a", "y_b")) expect_named( list_unchop(x, indices = list(2, 1), name_spec = "{outer}_{inner}"), c("y_b", "x_a") ) }) test_that("preserves names when inputs are cast to a common type (#1689)", { expect_named(list_unchop(list(c(a = 1)), ptype = integer()), "a") expect_named( list_unchop(list(c(a = 1)), ptype = integer(), indices = list(1)), "a" ) # With name spec name_spec <- "{outer}_{inner}" expect_named( list_unchop(list(foo = c(a = 1)), ptype = integer(), name_spec = name_spec), "foo_a" ) expect_named( list_unchop( list(foo = c(a = 1)), ptype = integer(), name_spec = name_spec, indices = list(1) ), "foo_a" ) # When `x` elements are recycled, names are also recycled x <- list(c(a = 1), c(b = 2)) indices <- list(1:2, 3:4) expect_named( list_unchop(x, indices = indices, ptype = integer()), c("a", "a", "b", "b") ) expect_named( list_unchop( list(foo = c(a = 1)), ptype = integer(), name_spec = "inner" ), "a" ) expect_named( list_unchop( list(foo = c(a = 1)), ptype = integer(), name_spec = "inner", indices = list(1) ), "a" ) }) test_that("not all inputs have to be named", { x <- list(c(a = 1), 2, c(c = 3)) indices <- list(2, 1, 3) expect_named(list_unchop(x, indices = indices), c("", "a", "c")) # Homoegenous fallback x <- list(foobar(c(a = 1)), foobar(2), foobar(c(c = 3))) indices <- list(2, 1, 3) expect_named(list_unchop(x, indices = indices), c("", "a", "c")) # `c()` fallback with_c_foobar({ x <- list(foobar(c(a = 1)), foobar(2), foobar(c(c = 3))) indices <- list(2, 1, 3) out <- list_unchop(x, indices = indices) expect_foobar_c(out) expect_named(out, c("", "a", "c")) }) }) test_that("list_unchop() keeps data frame row names", { df1 <- data.frame(x = 1:2, row.names = c("r1", "r2")) df2 <- data.frame(x = 3:4, row.names = c("r3", "r4")) x <- list(df1, df2) indices <- list(c(3, 1), c(2, 4)) result <- list_unchop(x, indices = indices) expect <- c("r2", "r3", "r1", "r4") expect_identical(vec_names(result), expect) }) test_that("individual data frame columns retain vector names", { df1 <- data_frame(x = c(a = 1, b = 2)) df2 <- data_frame(x = c(c = 3)) x <- list(df1, df2) indices <- list(c(1, 2), 3) result <- list_unchop(x, indices = indices) expect_named(result$x, c("a", "b", "c")) # Names should be identical to equivalent `vec_c()` call expect_identical(list_unchop(x, indices = indices), vec_c(!!!x)) }) test_that("df-col row names are repaired silently", { df1 <- data_frame(x = new_data_frame(list(a = 1), row.names = "inner")) df2 <- data_frame(x = new_data_frame(list(a = 2), row.names = "inner")) x <- list(df1, df2) indices <- list(1, 2) expect_silent({ result <- list_unchop(x, indices = indices) }) expect_identical(vec_names(result$x), c("inner...1", "inner...2")) }) test_that("monitoring - can technically assign to the same location twice", { indices <- list(1:2, 1L) x <- list(1:2, 3L) expect_identical( list_unchop(x, indices = indices), c(3L, 2L, NA) ) # Homogeneous fallback x <- list(foobar(1:2), foobar(3L)) expect_identical( list_unchop(x, indices = indices), foobar(c(3L, 2L, NA)) ) # `c()` fallback with_c_foobar({ x <- list(foobar(1:2), foobar(3L)) expect_identical( list_unchop(x, indices = indices), foobar_c(c(3L, 2L, NA)) ) }) }) test_that("index values are validated", { x <- list(1, 2) indices1 <- list(4, 1) indices2 <- list(c(1, 4), 2) indices3 <- list(c(1, 3, 4), 2) expect_error( list_unchop(x, indices = indices1), class = "vctrs_error_subscript_oob" ) expect_error( list_unchop(x, indices = indices2), class = "vctrs_error_subscript_oob" ) expect_identical(list_unchop(x, indices = indices3), c(1, 2, 1, 1)) }) test_that("name repair is respected and happens after ordering according to `indices`", { local_name_repair_quiet() x <- list(c(a = 1), c(a = 2)) indices <- list(2, 1) expect_named(list_unchop(x, indices = indices), c("a", "a")) expect_named( list_unchop(x, indices = indices, name_repair = "unique"), c("a...1", "a...2") ) }) test_that("list_unchop() can repair names quietly", { local_name_repair_verbose() x <- c(x = "a", x = "b", x = "c") indices <- list(2, c(3, 1)) expect_snapshot({ res <- list_unchop( vec_chop(x, indices = indices), indices = indices, name_repair = "unique_quiet" ) }) expect_named(res, c("x...1", "x...2", "x...3")) x <- c("if" = "a", "in" = "b", "for" = "c") indices <- list(2, c(3, 1)) expect_snapshot({ res <- list_unchop( vec_chop(x, indices = indices), indices = indices, name_repair = "universal_quiet" ) }) expect_named(res, c(".if", ".in", ".for")) }) test_that("list_unchop() errors on unsupported location values", { expect_snapshot(error = TRUE, cnd_class = TRUE, { list_unchop(list(1, 2), indices = list(c(1, 2), 0)) }) expect_snapshot(error = TRUE, cnd_class = TRUE, { list_unchop(list(1), indices = list(-1)) }) # Homogeneous fallback expect_snapshot(error = TRUE, cnd_class = TRUE, { list_unchop(list(foobar(1), foobar(2)), indices = list(c(1, 2), 0)) }) expect_snapshot(error = TRUE, cnd_class = TRUE, { list_unchop(list(foobar(1)), indices = list(-1)) }) # `c()` fallback with_c_foobar({ expect_snapshot(error = TRUE, cnd_class = TRUE, { list_unchop(list(foobar(1), foobar(2)), indices = list(c(1, 2), 0)) }) expect_snapshot(error = TRUE, cnd_class = TRUE, { list_unchop(list(foobar(1)), indices = list(-1)) }) }) }) test_that("missing values propagate", { indices <- list(c(NA_integer_, NA_integer_), c(NA_integer_, 3)) expect_identical( list_unchop( list(1, 2), indices = indices ), c(NA, NA, 2, NA) ) # Homogenous fallback expect_identical( list_unchop( list(foobar(1), foobar(2)), indices = indices ), foobar(c(NA, NA, 2, NA)) ) # `c()` fallback with_c_foobar({ expect_identical( list_unchop( list(foobar(1), foobar(2)), indices = indices ), foobar_c(c(NA, NA, 2, NA)) ) }) }) test_that("list_unchop() works with simple homogeneous foreign S3 classes", { values <- list( foobar(1:2), foobar(3:4) ) indices <- list(c(1, 3), c(2, 4)) expect_identical( list_unchop(values, indices = indices), foobar(int(1, 3, 2, 4)) ) # And in data frame columns (#1975) values <- list( data_frame(x = foobar(1:2)), data_frame(x = foobar(3:4)) ) indices <- list(c(1, 3), c(2, 4)) expect_identical( list_unchop(values, indices = indices), data_frame(x = foobar(int(1, 3, 2, 4))) ) }) test_that("list_unchop() fails with complex foreign S3 classes", { expect_snapshot({ x <- structure(foobar(1), attr_foo = "foo") y <- structure(foobar(2), attr_bar = "bar") (expect_error( list_unchop(list(x, y)), class = "vctrs_error_incompatible_type" )) (expect_error( list_unchop(list(x, y), error_call = call("foo"), error_arg = "arg"), class = "vctrs_error_incompatible_type" )) }) }) test_that("list_unchop() fails with complex foreign S4 classes", { expect_snapshot({ joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") (expect_error( list_unchop(list(joe, jane)), class = "vctrs_error_incompatible_type" )) (expect_error( list_unchop(list(joe, jane), error_call = call("foo"), error_arg = "arg"), class = "vctrs_error_incompatible_type" )) }) }) test_that("list_unchop() falls back to c() if S3 method is available", { # Check off-by-one error expect_error( list_unchop(list(foobar(1), "", foobar(2)), indices = list(1, 2, 3)), class = "vctrs_error_incompatible_type" ) # Fallback when the class implements `c()` local_c_foobar() expect_identical( list_unchop(list(foobar(1), foobar(2))), foobar_c(c(1, 2)) ) expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = list(1, 2)), foobar_c(c(1, 2)) ) expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = list(2, 1)), foobar_c(c(2, 1)) ) expect_identical( list_unchop(list(NULL, foobar(1), NULL, foobar(2))), foobar_c(c(1, 2)) ) # OOB error is respected expect_error( list_unchop(list(foobar(1), foobar(2)), indices = list(1, 3)), class = "vctrs_error_subscript_oob" ) # Unassigned locations results in missing values. # Repeated assignment uses the last assigned value. expect_identical( list_unchop(list(foobar(c(1, 2)), foobar(3)), indices = list(c(1, 3), 1)), foobar_c(c(3, NA, 2)) ) expect_identical( list_unchop(list(foobar(c(1, 2)), foobar(3)), indices = list(c(2, NA), NA)), foobar_c(c(NA, 1, NA)) ) # Names are kept expect_identical( list_unchop( list(foobar(c(x = 1, y = 2)), foobar(c(x = 1))), indices = list(c(2, 1), 3) ), foobar_c(c(y = 2, x = 1, x = 1)) ) # Recycles to the size of index expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = list(c(1, 3), 2)), foobar_c(c(1, 2, 1)) ) expect_identical( list_unchop(list(foobar(1), foobar(2)), indices = list(c(1, 2), integer())), foobar_c(c(1, 1)) ) expect_snapshot({ (expect_error( list_unchop( list(foobar(1), foobar(2)), indices = list(c(1, 3), integer()) ), class = "vctrs_error_subscript_oob" )) }) expect_snapshot({ x <- list(foobar(1:2)) indices <- list(1:3) (expect_error(list_unchop(x, indices = indices))) (expect_error(list_unchop( x, indices = indices, error_arg = "arg", error_call = call("foo") ))) }) method_vctrs_c_fallback <- function(...) { xs <- list(...) xs <- map(xs, unclass) res <- exec("c", !!!xs) structure(res, class = "vctrs_c_fallback") } # Registered fallback s3_register("base::c", "vctrs_c_fallback", method_vctrs_c_fallback) expect_identical( list_unchop( list( structure(1, class = "vctrs_c_fallback"), structure(2, class = "vctrs_c_fallback") ), indices = list(2, 1) ), structure(c(2, 1), class = "vctrs_c_fallback") ) # Don't fallback for S3 lists which are treated as scalars by default expect_error( list_unchop(list(foobar(list(1)), foobar(list(2)))), class = "vctrs_error_scalar_type" ) }) test_that("list_unchop() falls back even when ptype is supplied", { expect_foobar( list_unchop(list(foobar(1), foobar(2)), ptype = foobar(dbl())) ) with_c_quux <- function(expr) { with_methods(expr, c.vctrs_foobar = function(...) quux(NextMethod())) } with_c_quux({ expect_quux( list_unchop( list(foobar(1), foobar(2)), indices = list(1, 2), ptype = foobar(dbl()) ) ) }) with_c_quux({ expect_quux( list_unchop( list(foobar(1, foo = TRUE), foobar(2, bar = TRUE)), indices = list(1, 2), ptype = foobar(dbl()) ) ) }) }) test_that("list_unchop() falls back for S4 classes with a registered c() method", { joe <- .Counts(c(1L, 2L), name = "Joe") jane <- .Counts(3L, name = "Jane") expect_snapshot({ (expect_error( list_unchop(list(joe, 1, jane), indices = list(c(1, 2), 3, 4)), class = "vctrs_error_incompatible_type" )) }) local_c_counts() expect_identical( list_unchop(list(joe, jane), indices = list(c(1, 3), 2)), .Counts(c(1L, 3L, 2L), name = "Dispatched") ) expect_identical( list_unchop(list(NULL, joe, jane), indices = list(integer(), c(1, 3), 2)), .Counts(c(1L, 3L, 2L), name = "Dispatched") ) # Unassigned locations results in missing values. # Repeated assignment uses the last assigned value. expect_identical( list_unchop(list(joe, jane), indices = list(c(1, 3), 1)), .Counts(c(3L, NA, 2L), name = "Dispatched") ) expect_identical( list_unchop(list(joe, jane), indices = list(c(2, NA), NA)), .Counts(c(NA, 1L, NA), name = "Dispatched") ) }) test_that("list_unchop() fallback doesn't support (most) `name_spec` or `ptype`", { local_c_foobar() foo <- structure(foobar(1), foo = "foo") bar <- structure(foobar(2), bar = "bar") expect_snapshot(error = TRUE, { list_unchop( list(foo, bar), indices = list(1, 2), name_spec = "{outer}_{inner}" ) }) expect_snapshot(error = TRUE, { list_unchop( list(foo, bar), indices = list(1, 2), name_spec = "{outer}_{inner}", error_call = call("foo") ) }) # Used to be an error about `ptype` x <- list(foobar(1)) expect_snapshot(error = TRUE, { list_unchop(x, indices = list(1), ptype = "") }) }) test_that("list_unchop() fallback does support `name_spec = 'inner'`", { # Because of how useful it is, and how easy it is to implement! expect_identical( with_c_foobar(list_unchop( list(foobar(1), foobar(2)), indices = list(1, 2), name_spec = "inner" )), foobar_c(c(1, 2)) ) expect_identical( with_c_foobar(list_unchop( list(x = foobar(1), y = foobar(2)), indices = list(1, 2), name_spec = "inner" )), foobar_c(c(1, 2)) ) expect_identical( with_c_foobar(list_unchop( list( x = foobar(c(a = 1)), y = foobar(c(b = 2)), z = foobar(3) ), indices = list(1, 2, 3), name_spec = "inner" )), foobar_c(c(a = 1, b = 2, 3)) ) }) test_that("list_unchop() supports numeric S3 indices", { local_methods( vec_ptype2.vctrs_foobar = function(x, y, ...) { UseMethod("vec_ptype2.vctrs_foobar") }, vec_ptype2.vctrs_foobar.integer = function(x, y, ...) foobar(integer()), vec_cast.integer.vctrs_foobar = function(x, to, ...) vec_data(x) ) expect_identical(list_unchop(list(1), indices = list(foobar(1L))), 1) }) test_that("list_unchop() does not support non-numeric S3 indices", { expect_snapshot({ (expect_error( list_unchop(list(1), indices = list(factor("x"))), class = "vctrs_error_subscript_type" )) (expect_error( list_unchop(list(1), indices = list(foobar(1L))), class = "vctrs_error_subscript_type" )) }) }) test_that("can ignore names in `list_unchop()` by providing a `zap()` name-spec (#232)", { expect_snapshot({ (expect_error(list_unchop(list(a = c(b = 1:2))))) (expect_error(list_unchop(list(a = c(b = 1:2)), error_call = call("foo")))) }) expect_identical( list_unchop(list(a = c(b = 1:2), b = 3L), name_spec = zap()), 1:3 ) expect_identical( list_unchop( list(a = c(foo = 1:2), b = c(bar = 3L)), indices = list(2:1, 3), name_spec = zap() ), c(2L, 1L, 3L) ) expect_snapshot({ x <- list(a = c(b = letters), b = 3L) (expect_error( list_unchop(x, name_spec = zap()), class = "vctrs_error_incompatible_type" )) x <- list(a = c(foo = 1:2), b = c(bar = "")) (expect_error( list_unchop( x, indices = list(2:1, 3), name_spec = zap() ), class = "vctrs_error_incompatible_type" )) }) }) test_that("can ignore outer names in `list_unchop()` by providing a 'inner' name-spec (#1988)", { expect_identical( list_unchop( list(x = c(a = 1, 2), y = c(3, b = 4)), indices = list(c(3, 1), c(2, 4)), name_spec = "inner" ), c(2, 3, a = 1, b = 4) ) # Importantly, outer names are still used in error messages! expect_snapshot(error = TRUE, { list_unchop( list(x = c(a = 1), y = c(b = "2")), indices = list(1, 2), name_spec = "inner" ) }) }) test_that("list_unchop() falls back to c() methods (#1120)", { expect_error( list_unchop(list(foobar(1), foobar(2, class = "foo"))), class = "vctrs_error_incompatible_type" ) local_methods( c.vctrs_foobar = function(...) { out <- NextMethod() paste0(rep_along(out, "dispatched"), seq_along(out)) } ) # Homogeneous subclasses xs <- list(foobar(1), foobar(2, class = "foo")) expect_identical( list_unchop(xs), c("dispatched1", "dispatched2") ) expect_identical( list_unchop(xs, indices = list(2, 1)), c("dispatched2", "dispatched1") ) # Different subclasses xs <- list( foobar(c(x = 1, y = 2), class = "foo"), foobar(c(x = 1), foo = 1) ) expect_identical( list_unchop(xs), c("dispatched1", "dispatched2", "dispatched3") ) expect_identical( list_unchop(xs, indices = list(c(2, 1), 3)), c("dispatched2", "dispatched1", "dispatched3") ) }) test_that("list_unchop() fails if foreign classes are not homogeneous and there is no c() method", { xs <- list( foobar(c(x = 1, y = 2), class = "foo"), foobar(c(x = 1), foo = 1) ) expect_error( list_unchop(xs), class = "vctrs_error_incompatible_type" ) expect_error( list_unchop(xs, indices = list(c(2, 1), 3)), class = "vctrs_error_incompatible_type" ) }) test_that("calls cast method even with empty objects", { # https://github.com/paleolimbot/wk/issues/230 # There is a common type, but the cast method is intended # to fail here for this test local_methods( vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { x }, vec_cast.vctrs_foobar.default = function(x, to, ...) { vec_default_cast(x, to) } ) expect_snapshot(error = TRUE, { list_unchop( list( foobar(integer()), foobar(integer(), foo = "bar") ), indices = list( integer(), integer() ) ) }) }) test_that("Size 1 unspecified `NA` that isn't used doesn't error (#1989)", { # Works because we don't actually recycle `NA` to size 0 in the loop, we # just check that it can recycle. If we recycled to `logical()`, it would # no longer look unspecified and an error would be thrown instead. expect_identical( list_unchop( list("x", NA), indices = list(1L, integer()) ), "x" ) expect_identical( list_unchop( list("x", NA), indices = list(integer(), 1L) ), NA_character_ ) }) test_that("list_unchop() and vec_c() are consistent-ish regarding `size` and empty inputs (#1980)", { x <- list() indices <- list() # These should be consistent and return `NULL` when no inputs are provided. # We treat this roughly equivalent to `unspecified(0)`. expect_identical(vec_c(), NULL) expect_identical(vec_c(), list_unchop(x, indices = indices)) # These should be consistent and return `NULL` when no inputs are provided. # We treat this roughly equivalent to `unspecified(0)`. expect_identical(vec_c(NULL), NULL) expect_identical( vec_c(NULL), list_unchop(list(NULL), indices = list(integer())) ) # This is ambiguous but we let this return `NULL` as well. # `list_combine()` doesn't have this ambiguity expect_null(list_unchop(list(NULL), indices = list(1:2))) expect_identical( list_combine(list(NULL), indices = list(1:2), size = 2), unspecified(2) ) # These should be consistent and return size 0 `ptype` expect_identical(vec_c(.ptype = integer()), integer()) expect_identical( vec_c(.ptype = integer()), list_unchop(x, indices = indices, ptype = integer()) ) }) vctrs/tests/testthat/test-type2.R0000644000176200001440000003032015132161317016556 0ustar liggesuserstest_that("base coercions are symmetric and unchanging", { types <- list( logical(), integer(), double(), character(), raw(), list() ) mat <- maxtype_mat(types) expect_true(isSymmetric(mat)) expect_snapshot(mat) }) test_that("new classes are uncoercible by default", { x <- structure(1:10, class = "vctrs_nonexistant") expect_error(vec_ptype2(1, x), class = "vctrs_error_incompatible_type") expect_error(vec_ptype2(x, 1), class = "vctrs_error_incompatible_type") }) test_that("vec_typeof2() returns common type", { nms <- names(base_empty_types) for (i in seq_along(base_empty_types)) { this <- nms[[i]] for (j in seq_along(base_empty_types)) { that <- nms[[j]] if (i <= j) { exp <- paste0("VCTRS_TYPE2_", this, "_", that) } else { exp <- paste0("VCTRS_TYPE2_", that, "_", this) } out <- vec_typeof2(base_empty_types[[this]], base_empty_types[[that]]) expect_identical(out, exp) } } }) test_that("vec_typeof2_s3() returns common type", { all_base_empty_types <- c(base_empty_types, base_s3_empty_types) nms_s3 <- names(base_s3_empty_types) nms <- names(all_base_empty_types) for (i in seq_along(all_base_empty_types)) { this <- nms[[i]] for (j in seq_along(all_base_empty_types)) { that <- nms[[j]] # Skip when we have two non s3 objects if (!(this %in% nms_s3) & !(that %in% nms_s3)) { next } if (i <= j) { exp <- paste0("VCTRS_TYPE2_S3_", this, "_", that) } else { exp <- paste0("VCTRS_TYPE2_S3_", that, "_", this) } out <- vec_typeof2_s3( all_base_empty_types[[this]], all_base_empty_types[[that]] ) expect_identical(out, exp) } } }) test_that("vec_ptype2() dispatches when inputs have shape", { expect_identical( dim(vec_ptype2(int(), matrix(nrow = 3, ncol = 4))), c(0L, 4L) ) expect_identical( dim(vec_ptype2(matrix("", nrow = 3), c("", "", ""))), c(0L, 1L) ) }) test_that("vec_ptype2() requires vectors", { expect_error(vec_ptype2(NULL, quote(name)), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(NA, quote(name)), class = "vctrs_error_scalar_type") expect_error( vec_ptype2(list(), quote(name)), class = "vctrs_error_scalar_type" ) expect_error(vec_ptype2(quote(name), NULL), class = "vctrs_error_scalar_type") expect_error(vec_ptype2(quote(name), NA), class = "vctrs_error_scalar_type") expect_error( vec_ptype2(quote(name), list()), class = "vctrs_error_scalar_type" ) expect_error( vec_ptype2(quote(name), quote(name)), class = "vctrs_error_scalar_type" ) }) test_that("vec_ptype2() with unspecified requires vectors", { expect_error( vec_ptype2(unspecified(), quote(name)), class = "vctrs_error_scalar_type" ) expect_error( vec_ptype2(quote(name), unspecified()), class = "vctrs_error_scalar_type" ) }) test_that("vec_ptype2() forwards argument tag", { expect_error( vec_ptype2(quote(name), list(), x_arg = "foo"), "`foo`", class = "vctrs_error_scalar_type" ) expect_error( vec_ptype2(list(), quote(name), y_arg = "foo"), "`foo`", class = "vctrs_error_scalar_type" ) }) test_that("stop_incompatible_type() checks for scalars", { expect_error( stop_incompatible_type(NA, foobar(), x_arg = "x", y_arg = "y"), class = "vctrs_error_scalar_type" ) expect_error( vec_ptype_common(NA, foobar()), class = "vctrs_error_scalar_type" ) expect_error( vec_ptype_common(foobar(), list()), class = "vctrs_error_scalar_type" ) }) test_that("vec_ptype2() methods forward args to stop_incompatible_type()", { expect_args(new_hidden(), lgl(), x_arg = "foo", y_arg = "bar") expect_args(lgl(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(int(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(dbl(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(chr(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(list(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args( new_rcrd(list(x = NA)), new_hidden(), x_arg = "foo", y_arg = "bar" ) expect_args(data.frame(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(Sys.Date(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args( as.difftime(1, units = "hours"), new_hidden(), x_arg = "foo", y_arg = "bar" ) expect_args(factor(), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(ordered(""), new_hidden(), x_arg = "foo", y_arg = "bar") expect_args(ordered(""), factor(), x_arg = "foo", y_arg = "bar") expect_args( bit64::as.integer64(1), new_hidden(), x_arg = "foo", y_arg = "bar" ) }) test_that("vec_ptype2() data frame methods builds argument tags", { # Bare objects expect_snapshot(error = TRUE, { vec_ptype2("foo", 10) }) # Nested dataframes expect_snapshot(error = TRUE, { df1 <- tibble(x = tibble(y = tibble(z = 1))) df2 <- tibble(x = tibble(y = tibble(z = "a"))) vec_ptype2(df1, df2) }) }) test_that("stop_incompatible_type() can be called without argument tags", { expect_error( stop_incompatible_type(1, 2, x_arg = "", y_arg = ""), " and ", class = "vctrs_error_incompatible_type" ) }) test_that("vec_ptype2() returns empty prototype when other input is NULL", { expect_identical(vec_ptype2(1:5, NULL), int()) expect_identical(vec_ptype2(NULL, 1:5), int()) }) test_that("Subclasses of data.frame dispatch to `vec_ptype2()` methods", { local_methods( vec_ptype2.quuxframe = function(x, y, ...) { UseMethod("vec_ptype2.quuxframe") }, vec_ptype2.quuxframe.data.frame = function(x, y, ...) "dispatched!", vec_ptype2.data.frame.quuxframe = function(x, y, ...) "dispatched!" ) quux <- structure(data.frame(), class = c("quuxframe", "data.frame")) expect_identical(vec_ptype2(quux, mtcars), "dispatched!") expect_identical(vec_ptype2(mtcars, quux), "dispatched!") quux <- structure( data.frame(), class = c("quuxframe", "tbl_df", "data.frame") ) expect_identical(vec_ptype2(quux, mtcars), "dispatched!") expect_identical(vec_ptype2(mtcars, quux), "dispatched!") }) test_that("Subclasses of `tbl_df` have `tbl_df` common type (#481)", { quux <- foobar(tibble()) expect_identical( vec_ptype_common(quux, tibble()), tibble() ) expect_identical( vec_ptype_common(tibble(), quux), tibble() ) }) test_that("Column name encodings are handled correctly in the common type (#553)", { encs <- encodings() data <- list(chr()) df_utf8 <- tibble::as_tibble(set_names(data, encs$utf8)) df_unknown <- tibble::as_tibble(set_names(data, encs$unknown)) expect_identical(vec_ptype2(df_utf8, df_unknown), df_utf8) }) test_that("vec_is_subtype() determines subtyping relationship", { expect_true(vec_is_subtype(lgl(), int())) expect_false(vec_is_subtype(int(), lgl())) expect_false(vec_is_subtype(lgl(), chr())) expect_false(vec_is_subtype(chr(), lgl())) local_methods( vec_ptype2.vctrs_foobar = function(x, y, ...) { UseMethod("vec_ptype2.vctrs_foobar") }, vec_ptype2.vctrs_foobar.logical = function(x, y, ...) logical(), vec_ptype2.logical.vctrs_foobar = function(x, y, ...) logical() ) expect_true(vec_is_subtype(foobar(TRUE), lgl())) expect_false(vec_is_subtype(lgl(), foobar(TRUE))) }) test_that("can override scalar vector error message for base scalar types", { expect_snapshot({ (expect_error( vec_ptype2(NULL, quote(x), y_arg = "foo"), class = "vctrs_error_scalar_type" )) (expect_error( vec_ptype2(quote(x), NULL, x_arg = "foo"), class = "vctrs_error_scalar_type" )) }) }) test_that("can override scalar vector error message for S3 types", { expect_snapshot({ (expect_error( vec_ptype2(NULL, foobar(), y_arg = "foo"), class = "vctrs_error_scalar_type" )) (expect_error( vec_ptype2(foobar(), NULL, x_arg = "foo"), class = "vctrs_error_scalar_type" )) }) }) test_that("ptype2 and cast errors when same class fallback is impossible are informative", { expect_snapshot({ (expect_error( vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE)), class = "vctrs_error_incompatible_type" )) (expect_error( vec_ptype2(foobar(1, bar = TRUE), foobar(2, baz = TRUE)), class = "vctrs_error_incompatible_type" )) }) }) test_that("Incompatible attributes bullets are not show when methods are implemented", { expect_snapshot({ with_foobar_cast <- function(expr) { with_methods( vec_cast.vctrs_foobar = function(...) NULL, vec_cast.vctrs_foobar.vctrs_foobar = function(x, to, ...) { vec_default_cast(x, to, ...) }, expr ) } with_foobar_ptype2 <- function(expr) { with_methods( vec_ptype2.vctrs_foobar = function(...) NULL, vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { vec_default_ptype2(x, y, ...) }, expr ) } (expect_error( with_foobar_cast(vec_cast(foobar(1, bar = TRUE), foobar(2, baz = TRUE))), class = "vctrs_error_incompatible_type" )) (expect_error( with_foobar_ptype2(vec_ptype2( foobar(1, bar = TRUE), foobar(2, baz = TRUE) )), class = "vctrs_error_incompatible_type" )) }) }) test_that("attributes no longer play a role in bare data frame fallback", { foobar_bud <- foobar(mtcars, bud = TRUE) foobar_boo <- foobar(mtcars, boo = TRUE) expect_equal( vec_ptype2(foobar_bud, foobar_boo), vec_slice(unrownames(mtcars), 0) ) expect_equal( vec_ptype2(foobar(mtcars), foobaz(mtcars)), vec_slice(unrownames(mtcars), 0) ) }) test_that("vec_ptype2() methods get prototypes", { x <- NULL y <- NULL local_methods(vec_ptype2.vctrs_foobar.vctrs_foobar = function(x, y, ...) { x <<- x y <<- y NULL }) vec_ptype2(foobar(1:3), foobar(letters)) expect_identical(x, foobar(int())) expect_identical(y, foobar(chr())) vec_ptype2(foobar(mtcars), foobar(iris)) expect_identical(x, foobar(mtcars[0, , drop = FALSE])) expect_identical(y, foobar(iris[0, , drop = FALSE])) }) test_that("vec_ptype2() allows vec_ptype() to return another type", { out <- with_methods( vec_restore.vctrs_foobar = function(x, to, ...) unstructure(x), vec_ptype2(foobar(1), foobar(2)) ) expect_identical(out, dbl()) }) test_that("vec_ptype2() evaluates x_arg and y_arg lazily", { expect_silent(vec_ptype2(1L, 1L, x_arg = print("oof"))) expect_silent(vec_ptype2(1L, 1L, y_arg = print("oof"))) }) test_that("can restart ptype2 errors", { x <- data_frame(x = ordered(c("a", "b", "c"))) y <- data_frame(x = ordered(c("A", "B", "C"))) exp <- c("a", "b", "c", "A", "B", "C") exp <- factor(exp, exp) expect_error(vec_rbind(x, y), class = "vctrs_error_incompatible_type") expect_equal( with_ordered_restart(vec_rbind(x, y)), data_frame(x = exp) ) z <- data_frame(x = chr()) expect_equal( with_ordered_restart(vec_ptype_common(x, y)), data_frame(x = exp[0]) ) expect_equal( with_ordered_restart(vec_ptype_common(x, y, z)), data_frame(x = chr()) ) expect_equal( with_ordered_restart(vec_cast_common(x, y)), list( data_frame(x = factor(c("a", "b", "c"), levels(exp))), data_frame(x = factor(c("A", "B", "C"), levels(exp))) ) ) expect_equal( with_ordered_restart(vec_cast_common(x, y, z)), list( data_frame(x = c("a", "b", "c")), data_frame(x = c("A", "B", "C")), data_frame(x = chr()) ) ) # Factor case y <- data_frame(x = factor(c("A", "B", "C"))) expect_equal( with_ordered_restart(vec_rbind(x, y)), data_frame(x = exp) ) }) test_that("subclasses of tibble are compatible", { tib <- foobar(tibble(x = 1)) ptype <- foobar(tibble(x = dbl())) expect_equal(vec_ptype_common(tib), ptype) expect_equal(vec_ptype_common(tib, tib), ptype) }) test_that("error indexing is correct with unspecifieds", { expect_snapshot(error = TRUE, { vec_ptype_common(1, NA, "x") }) expect_snapshot(error = TRUE, { vec_ptype_common(NA, 1, "x") }) expect_snapshot(error = TRUE, { vec_ptype_common(NA, NA, 1, "x") }) expect_snapshot(error = TRUE, { vec_ptype_common(1, NA, NA, 1, "x") }) }) vctrs/tests/testthat/test-complete.R0000644000176200001440000001145015065005761017333 0ustar liggesusers# vec_slice_complete ----------------------------------------------------------- test_that("can slice complete", { df <- data_frame(x = c(1, NA, 3), y = c(1, 2, NA)) expect_identical(vec_slice_complete(df), vec_slice(df, 1)) }) test_that("vec_slice_complete() works with size 0 input", { expect_identical(vec_slice_complete(integer()), integer()) expect_identical(vec_slice_complete(data.frame()), data.frame()) }) # vec_locate_complete ---------------------------------------------------------- test_that("vec_locate_complete() can locate complete", { df <- data_frame(x = c(1, NA, 3), y = c(1, 2, NA)) expect_identical(vec_locate_complete(df), 1L) }) test_that("vec_locate_complete() works with size 0 input", { expect_identical(vec_locate_complete(logical()), integer()) expect_identical(vec_locate_complete(data.frame()), integer()) }) # vec_detect_complete ---------------------------------------------------------- test_that("works with size zero input", { expect_identical(vec_detect_complete(integer()), logical()) expect_identical(vec_detect_complete(data.frame()), logical()) }) test_that("NA_real_ and NaN are both missing", { expect_identical(vec_detect_complete(c(NA_real_, NaN)), c(FALSE, FALSE)) }) test_that("works rowwise", { df <- data_frame(x = c(NA, 1, NA, 2), y = c(NA, NA, 2, 3)) expect <- c(FALSE, FALSE, FALSE, TRUE) expect_identical(vec_detect_complete(df), expect) df <- data_frame(x = c(1, 1), y = c(2, 2), z = c(1, NA)) expect <- c(TRUE, FALSE) expect_identical(vec_detect_complete(df), expect) }) test_that("works with data frames with rows but no columns", { expect_identical(vec_detect_complete(new_data_frame(n = 5L)), rep(TRUE, 5)) }) test_that("works with data frame columns", { col <- data_frame(a = c(1, NA, 2, 2), b = c(1, 2, NA, 3)) df <- data_frame(x = rep(1, 4), y = col) expect <- c(TRUE, FALSE, FALSE, TRUE) expect_identical(vec_detect_complete(df), expect) }) test_that("works with various types", { expect <- c(TRUE, TRUE, FALSE, TRUE, FALSE) expect_identical(vec_detect_complete(c(TRUE, TRUE, NA, FALSE, NA)), expect) expect_identical(vec_detect_complete(c(1L, 1L, NA, 2L, NA)), expect) expect_identical(vec_detect_complete(c(1, 1, NA, 2, NA)), expect) expect_identical( vec_detect_complete(complex( real = c(1, 1, NA, 2, 2), imaginary = c(1, 1, 2, 2, NA) )), expect ) expect_identical(vec_detect_complete(c("a", "a", NA, "b", NA)), expect) expect_identical(vec_detect_complete(list(1, 1, NULL, 2, NULL)), expect) # No missing raw value expect_identical(vec_detect_complete(as.raw(c(1, 1, 2, 2, 3))), rep(TRUE, 5)) }) test_that("takes the equality proxy", { x <- as.POSIXlt(c(NA, 0), origin = "1970-01-01") df <- data_frame(a = 1:2, x = x) expect <- c(FALSE, TRUE) expect_identical(vec_detect_complete(x), expect) expect_identical(vec_detect_complete(df), expect) }) test_that("columns with a data frame proxy are incomplete if any columns of the proxy are incomplete (#1404)", { df <- data_frame( x = c(NA, 0, 1, 2, 3), y = new_rcrd(list(a = c(1, 1, 1, NA, NA), b = c(2, 2, 2, 2, NA))), z = new_rcrd(list(a = c(1, 1, NA, 1, 1), b = c(2, 2, NA, NA, 1))) ) expect_identical(vec_detect_complete(df), c(FALSE, TRUE, FALSE, FALSE, FALSE)) }) test_that("can have rcrd fields of all types", { make_rcrd <- function(x) { new_rcrd(list(x = x)) } expect <- c(TRUE, TRUE, FALSE, TRUE, FALSE) expect_identical( vec_detect_complete(make_rcrd(c(TRUE, TRUE, NA, FALSE, NA))), expect ) expect_identical( vec_detect_complete(make_rcrd(c(1L, 1L, NA, 2L, NA))), expect ) expect_identical(vec_detect_complete(make_rcrd(c(1, 1, NA, 2, NA))), expect) expect_identical( vec_detect_complete(make_rcrd(complex( real = c(1, 1, NA, 2, 2), imaginary = c(1, 1, 2, 2, NA) ))), expect ) expect_identical( vec_detect_complete(make_rcrd(c("a", "a", NA, "b", NA))), expect ) expect_identical( vec_detect_complete(make_rcrd(list(1, 1, NULL, 2, NULL))), expect ) # No missing raw value expect_identical( vec_detect_complete(make_rcrd(as.raw(c(1, 1, 2, 2, 3)))), rep(TRUE, 5) ) }) test_that("works with arrays", { x <- array(c(1, 2, 3, NA), c(2, 2)) y <- array(c(1:3, NA, 5:8), c(2, 2, 2)) expect_identical(vec_detect_complete(x), c(TRUE, FALSE)) expect_identical(vec_detect_complete(y), c(TRUE, FALSE)) }) test_that("works with `NULL`", { # Consistent with `vec_detect_missing()` expect_identical(vec_detect_complete(NULL), logical()) }) test_that("catches `NULL` data frame columns", { df <- new_data_frame(list(x = integer(), y = NULL), n = 0L) expect_snapshot(error = TRUE, { vec_detect_complete(df) }) }) test_that("catches scalar objects", { expect_snapshot(error = TRUE, { vec_detect_complete(lm(1 ~ 1)) }) }) vctrs/tests/testthat/test-rank.R0000644000176200001440000001351315120272011016442 0ustar liggesuserstest_that("can rank with different types of `ties`", { x <- c(2L, 5L, 1L, 1L, 2L) expect_identical(vec_rank(x, ties = "min"), rank(x, ties.method = "min")) expect_identical(vec_rank(x, ties = "max"), rank(x, ties.method = "max")) expect_identical( vec_rank(x, ties = "sequential"), rank(x, ties.method = "first") ) expect_identical(vec_rank(x, ties = "dense"), c(2L, 3L, 1L, 1L, 2L)) }) test_that("can rank in descending order", { x <- c(2L, 5L, 1L, 1L, 2L) expect_identical( vec_rank(x, ties = "min", direction = "desc"), rank(-x, ties.method = "min") ) expect_identical( vec_rank(x, ties = "max", direction = "desc"), rank(-x, ties.method = "max") ) expect_identical( vec_rank(x, ties = "sequential", direction = "desc"), rank(-x, ties.method = "first") ) expect_identical( vec_rank(x, ties = "dense", direction = "desc"), c(2L, 1L, 3L, 3L, 2L) ) }) test_that("can rank incomplete values with `NA`", { x <- c(2, NA, 4, NaN, 4, 2, NA) expect_identical( vec_rank(x, ties = "min", incomplete = "na"), rank(x, ties.method = "min", na.last = "keep") ) expect_identical( vec_rank(x, ties = "max", incomplete = "na"), rank(x, ties.method = "max", na.last = "keep") ) expect_identical( vec_rank(x, ties = "sequential", incomplete = "na"), rank(x, ties.method = "first", na.last = "keep") ) expect_identical( vec_rank(x, ties = "dense", incomplete = "na"), c(1L, NA, 2L, NA, 2L, 1L, NA) ) # NaN are treated as missing, regardless of whether or not they are distinct from NA_real_ expect_identical( vec_rank(x, ties = "min", incomplete = "na", nan_distinct = TRUE), vec_rank(x, ties = "min", incomplete = "na", nan_distinct = FALSE) ) }) test_that("works correctly when `incomplete = 'na'` with no missing values", { x <- c(1, 2, 1, 5, 2) expect_identical(vec_rank(x, incomplete = "na"), rank(x, ties.method = "min")) }) test_that("when ranking incomplete values, all NA (or NaN) values get the same rank", { # this is in contrast to rank(), which treats all NA (NaN) as different x <- c(1, NA, 3, NaN, NA, 1, NaN) expect_identical( vec_rank(x, na_value = "largest"), c(1L, 4L, 3L, 4L, 4L, 1L, 4L) ) expect_identical( vec_rank(x, na_value = "smallest"), c(5L, 1L, 7L, 1L, 1L, 5L, 1L) ) # If distinct, NaN are always ranked between real numbers and NA_real_ expect_identical( vec_rank(x, na_value = "largest", nan_distinct = TRUE), c(1L, 6L, 3L, 4L, 6L, 1L, 4L) ) expect_identical( vec_rank(x, na_value = "smallest", nan_distinct = TRUE), c(5L, 1L, 7L, 3L, 1L, 5L, 3L) ) }) test_that("ranks character vectors in the C locale", { x <- c("B", "b", "a") expect_identical(vec_rank(x), c(1L, 3L, 2L)) }) test_that("works with data frames", { df <- data_frame( x = c(1, 2, 1, 2, 2), y = c(2, 2, 1, 2, 5) ) expect_identical(vec_rank(df, ties = "min"), c(2L, 3L, 1L, 3L, 5L)) expect_identical(vec_rank(df, ties = "sequential"), c(2L, 3L, 1L, 4L, 5L)) }) test_that("works with data frames with 0 columns and >0 rows (#1863)", { # All rows are treated as being from the same group df <- data_frame(.size = 5) expect_identical(vec_rank(df, ties = "min"), c(1L, 1L, 1L, 1L, 1L)) expect_identical(vec_rank(df, ties = "sequential"), c(1L, 2L, 3L, 4L, 5L)) expect_identical( vec_rank(df, ties = "sequential", direction = "desc"), c(1L, 2L, 3L, 4L, 5L) ) }) test_that("works with data frames with 0 columns and 0 rows (#1863)", { df <- data_frame(.size = 0) expect_identical(vec_rank(df, ties = "min"), integer()) expect_identical(vec_rank(df, ties = "sequential"), integer()) }) test_that("can control the direction per column", { df <- data_frame( x = c(1, 2, 1, 2, 2), y = c(2, 2, 1, 2, 5) ) df2 <- df df2$y <- -df2$y expect_identical( vec_rank(df, direction = c("asc", "desc")), vec_rank(df2, direction = "asc") ) }) test_that("incompleteness is respected in data frames and rcrds", { df <- data_frame( x = c(1, NA, NA, 1), y = c(NA, NA, 1, 1) ) expect_identical(vec_rank(df, incomplete = "na"), c(NA, NA, NA, 1L)) expect_identical( vec_rank(df, incomplete = "na", direction = "desc"), c(NA, NA, NA, 1L) ) x <- new_rcrd(list( x = c(1, 1, NA, NA, 1), y = c(1, NA, 1, NA, 1) )) expect_identical(vec_rank(x, incomplete = "na"), c(1L, NA, NA, NA, 1L)) }) test_that("can control `na_value` per column", { df <- data_frame( x = c(1, 1, NA, NA, NA), y = c(3, NA, NA, 2, 1) ) expect_identical( vec_rank(df, na_value = c("largest", "smallest")), c(2L, 1L, 3L, 5L, 4L) ) expect_identical( vec_rank(df, na_value = c("largest", "smallest"), direction = "desc"), c(4L, 5L, 3L, 1L, 2L) ) # But `incomplete = "na"` overrules it expect_identical( vec_rank(df, na_value = c("largest", "smallest"), incomplete = "na"), c(1L, NA, NA, NA, NA) ) expect_identical( vec_rank( df, na_value = c("largest", "smallest"), incomplete = "na", direction = "desc" ), c(1L, NA, NA, NA, NA) ) }) test_that("`x` must be a vector", { expect_error(vec_rank(identity), class = "vctrs_error_scalar_type") }) test_that("`x` must not be `NULL` (#1823, #1967)", { expect_snapshot(error = TRUE, { vec_rank(NULL) }) expect_snapshot(error = TRUE, { vec_rank(NULL, incomplete = "na") }) expect_snapshot(error = TRUE, { vec_rank(NULL, ties = "sequential", incomplete = "na") }) }) test_that("`ties` is validated", { expect_snapshot(error = TRUE, vec_rank(1, ties = "foo")) expect_snapshot(error = TRUE, vec_rank(1, ties = 1)) }) test_that("`incomplete` is validated", { expect_snapshot(error = TRUE, vec_rank(1, incomplete = NA)) expect_snapshot(error = TRUE, vec_rank(1, incomplete = c(TRUE, FALSE))) expect_snapshot(error = TRUE, vec_rank(1, incomplete = "foo")) }) vctrs/tests/testthat/test-type-data-frame.R0000644000176200001440000004564615156537555020535 0ustar liggesusers# printing ---------------------------------------------------------------- test_that("data frames print nicely", { expect_equal(vec_ptype_abbr(mtcars), "df[,11]") expect_snapshot(vec_ptype_show(mtcars)) expect_snapshot(vec_ptype_show(iris)) }) test_that("embedded data frames print nicely", { df <- data.frame(x = 1:3) df$a <- data.frame(a = 1:3, b = letters[1:3]) df$b <- list_of(1, 2, 3) df$c <- as_list_of(split(data.frame(x = 1:3, y = letters[1:3]), 1:3)) expect_snapshot(vec_ptype_show(df)) }) # coercing ---------------------------------------------------------------- test_that("data frame only combines with other data frames or NULL", { dt <- data.frame(x = 1) expect_equal(vec_ptype_common(dt, NULL), vec_ptype(dt)) expect_error( vec_ptype_common(dt, 1:10), class = "vctrs_error_incompatible_type" ) }) test_that("data frame takes max of individual variables", { dt1 <- data.frame(x = FALSE, y = 1L) dt2 <- data.frame(x = 1.5, y = 1.5) expect_equal(vec_ptype_common(dt1, dt2), vec_ptype_common(dt2)) }) test_that("data frame combines variables", { dt1 <- data.frame(x = 1) dt2 <- data.frame(y = 1) dt3 <- max(dt1, dt2) expect_equal( vec_ptype_common(dt1, dt2), vec_ptype_common(data.frame(x = double(), y = double())) ) }) test_that("empty data frame still has names", { df <- data.frame() out <- vec_ptype_common(df, df) expect_equal(names(out), character()) }) test_that("combining data frames with foreign classes uses fallback", { foo <- foobar(data.frame()) df <- data.frame() expect_identical(vec_ptype_common(foo, foo, foo), foo) expect_identical(vec_ptype_common(foo, foo, df, foo), df) expect_identical(vec_ptype2(foo, df), data.frame()) expect_identical(vec_ptype2(df, foo), data.frame()) expect_identical(vec_ptype_common(foo, df), df) expect_identical(vec_ptype_common(df, foo), df) cnds <- list() withCallingHandlers( warning = function(cnd) { cnds <<- append(cnds, list(cnd)) invokeRestart("muffleWarning") }, expect_identical( vec_ptype_common(foo, df, foo, foo), df ) ) # There are no longer any warnings expect_length(cnds, 0) expect_equal( vec_cbind(foobar(data.frame(x = 1)), data.frame(y = 2)), data.frame(x = 1, y = 2) ) expect_equal( vec_rbind(foo, data.frame(), foo), df ) }) # casting ----------------------------------------------------------------- test_that("safe casts work as expected", { df <- data.frame(x = 1, y = 0) expect_equal(vec_cast(NULL, df), NULL) expect_equal(vec_cast(df, df), df) expect_equal(vec_cast(data.frame(x = TRUE, y = FALSE), df), df) }) test_that("warn about lossy coercions", { df1 <- data.frame(x = factor("foo"), y = 1) df2 <- data.frame(x = factor("bar")) expect_lossy(vec_cast(df1, df1[1]), df1[1], x = df1, to = df1[1]) expect_lossy( vec_cast(df1[1], df2), data.frame(x = factor(NA, levels = "bar")), x = factor("foo"), to = factor("bar") ) out <- allow_lossy_cast( allow_lossy_cast( vec_cast(df1, df2), factor("foo"), factor("bar") ), df1, df2 ) expect_identical(out, data.frame(x = factor(NA, levels = "bar"))) }) test_that("invalid cast generates error", { expect_error( vec_cast(1L, data.frame()), class = "vctrs_error_incompatible_type" ) }) test_that("column order matches type", { df1 <- data.frame(x = 1, y = "a") df2 <- data.frame(x = TRUE, z = 3) df3 <- vec_cast(df2, vec_ptype_common(df1, df2)) expect_named(df3, c("x", "y", "z")) }) test_that("restore generates correct row/col names", { df1 <- data.frame(x = NA, y = 1:4, z = 1:4) df1$x <- data.frame(a = 1:4, b = 1:4) df2 <- vec_restore(lapply(df1[1:3], vec_slice, 1:2), df1) expect_named(df2, c("x", "y", "z")) expect_equal(.row_names_info(df2), -2) }) test_that("restore keeps automatic row/col names", { df1 <- data.frame(x = NA, y = 1:4, z = 1:4) df1$x <- data.frame(a = 1:4, b = 1:4) df2 <- vec_restore(df1, df1) expect_named(df2, c("x", "y", "z")) expect_equal(.row_names_info(df2), -4) }) test_that("cast to empty data frame preserves number of rows", { out <- vec_cast(new_data_frame(n = 10L), new_data_frame()) expect_equal(nrow(out), 10L) }) test_that("can cast unspecified to data frame", { df <- data.frame(x = 1, y = 2L) expect_identical(vec_cast(unspecified(3), df), vec_init(df, 3)) }) test_that("cannot cast list to data frame", { df <- data.frame(x = 1, y = 2L) expect_error( vec_cast(list(df, df), df), class = "vctrs_error_incompatible_type" ) }) test_that("can restore lists with empty names", { expect_identical(vec_restore(list(), data.frame()), data.frame()) }) test_that("can restore subclasses of data frames", { expect_identical( vec_restore(list(), subclass(data.frame())), subclass(data.frame()) ) local_methods( vec_restore.vctrs_foobar = function(x, to, ..., i) "dispatched" ) expect_identical(vec_restore(list(), subclass(data.frame())), "dispatched") }) test_that("df_cast() checks for names", { x <- new_data_frame(list(1)) y <- new_data_frame(list(2)) expect_error(vec_cast_common(x, y), "must have names") }) test_that("casting to and from data frame preserves row names", { out <- vec_cast(mtcars, unrownames(mtcars)) expect_identical(row.names(out), row.names(mtcars)) out <- vec_cast(out, unrownames(mtcars)) expect_identical(row.names(out), row.names(mtcars)) }) test_that("df_cast() evaluates arg lazily", { expect_silent(df_cast(data_frame(), data_frame(), x_arg = print("oof"))) expect_silent(df_cast(data_frame(), data_frame(), to_arg = print("oof"))) }) # df_ptype2 --------------------------------------------------------------- test_that("df_ptype2() evaluates arg lazily", { expect_silent(df_ptype2(data_frame(), data_frame(), x_arg = print("oof"))) expect_silent(df_ptype2(data_frame(), data_frame(), y_arg = print("oof"))) }) # new_data_frame ---------------------------------------------------------- test_that("can construct an empty data frame", { expect_identical(new_data_frame(), data.frame()) }) test_that("can validly set the number of rows when there are no columns", { expect <- structure( list(), class = "data.frame", row.names = .set_row_names(2L), names = character() ) expect_identical(new_data_frame(n = 2L), expect) }) test_that("can add additional classes", { expect_s3_class(new_data_frame(class = "foobar"), "foobar") expect_s3_class(new_data_frame(class = c("foo", "bar")), c("foo", "bar")) }) test_that("can add additional attributes", { expect <- data.frame() attr(expect, "foo") <- "bar" attr(expect, "a") <- "b" expect_identical(new_data_frame(foo = "bar", a = "b"), expect) }) test_that("size is pulled from first column if not supplied", { x <- new_data_frame(list(x = 1:5, y = 1:6)) expect_identical(.row_names_info(x, type = 1), -5L) }) test_that("can construct a data frame without column names", { expect_named(new_data_frame(list(1, 2)), NULL) }) test_that("the names on an empty data frame are an empty character vector", { expect_identical(names(new_data_frame()), character()) }) test_that("class attribute", { expect_identical( class(new_data_frame(list(a = 1))), "data.frame" ) expect_identical( class(new_data_frame(list(a = 1), class = "tbl_df")), c("tbl_df", "data.frame") ) expect_identical( class(new_data_frame( list(a = 1), class = c("tbl_df", "tbl", "data.frame") )), c("tbl_df", "tbl", "data.frame", "data.frame") ) expect_identical( class(new_data_frame(list(a = 1), class = "foo_frame")), c("foo_frame", "data.frame") ) expect_identical( class(exec( new_data_frame, list(a = 1), !!!attributes(new_data_frame(list(), class = "tbl_df")) )), c("tbl_df", "data.frame", "data.frame") ) expect_identical( class(exec( new_data_frame, list(a = 1), !!!attributes(new_data_frame(list(b = 1), class = "tbl_df")) )), c("tbl_df", "data.frame", "data.frame") ) }) test_that("attributes with special names are merged", { expect_identical( names(new_data_frame(list(a = 1))), "a" ) expect_identical( names(new_data_frame(list(a = 1), names = "name")), "name" ) expect_identical( names(new_data_frame(list(1), names = "name")), "name" ) expect_identical( attr(new_data_frame(list()), "row.names"), integer() ) expect_identical( .row_names_info(new_data_frame(list(), n = 3L)), -3L ) expect_identical( .row_names_info(new_data_frame(list(), n = 3L, row.names = 1:3)), 3L ) expect_identical( .row_names_info(new_data_frame(list(), n = 3L, row.names = c(NA, -3L))), -3L ) expect_identical( attr(new_data_frame(list(), n = 1L, row.names = "rowname"), "row.names"), "rowname" ) }) test_that("n and row.names (#894)", { # Can omit n if row.names attribute is given expect_identical( row.names(new_data_frame(list(), row.names = "rowname")), "rowname" ) expect_identical( attr(new_data_frame(list(), row.names = 2L), "row.names"), 2L ) expect_identical( row.names(new_data_frame(list(), row.names = chr())), chr() ) }) test_that("`row.names` completely overrides `n` and the implied size of `x`, even if incompatible (tidyverse/dplyr#6596)", { row_names <- c(NA, -3L) df <- new_data_frame(list(), n = 2L, row.names = row_names) expect_identical(.row_names_info(df, type = 0L), row_names) df <- new_data_frame(list(x = 1:2), row.names = row_names) expect_identical(.row_names_info(df, type = 0L), row_names) }) test_that("ALTREP `row.names` are not materialized by `new_data_frame()` (tidyverse/dplyr#6596)", { # We are careful in `new_data_frame()` to not call the `Dataptr()` or # `Length()` ALTREP methods, both of which would materialize our lazy # character here row_names <- new_lazy_character(~ c("a", "b")) x <- new_data_frame(list(), row.names = row_names) expect_false(lazy_character_is_materialized(.row_names_info(x, type = 0L))) x <- new_data_frame(list(x = 1:2), row.names = row_names) expect_false(lazy_character_is_materialized(.row_names_info(x, type = 0L))) x <- new_data_frame(list(), n = 2L, row.names = row_names) expect_false(lazy_character_is_materialized(.row_names_info(x, type = 0L))) }) test_that("`x` must be a list", { expect_snapshot( (expect_error( new_data_frame(1), "`x` must be a list" )) ) }) test_that("if supplied, `n` must be an integer of size 1", { expect_snapshot({ (expect_error( new_data_frame(n = c(1L, 2L)), "must be an integer of size 1" )) (expect_error(new_data_frame(n = "x"), "must be an integer of size 1")) }) }) test_that("if supplied, `n` can't be negative or missing (#1477)", { expect_snapshot({ (expect_error(new_data_frame(n = -1L))) (expect_error(new_data_frame(n = NA_integer_))) }) }) test_that("`class` must be a character vector", { expect_snapshot( (expect_error( new_data_frame(class = 1), "must be NULL or a character vector" )) ) }) test_that("flatten info is computed", { df_flatten_info <- function(x) { .Call(ffi_df_flatten_info, x) } expect_identical(df_flatten_info(mtcars), list(FALSE, ncol(mtcars))) df <- tibble(x = 1, y = tibble(x = 2, y = tibble(x = 3), z = 4), z = 5) expect_identical(df_flatten_info(df), list(TRUE, 5L)) }) test_that("can flatten data frames", { df_flatten <- function(x) { .Call(ffi_df_flatten, x) } expect_identical(df_flatten(mtcars), mtcars) df <- tibble(x = 1, y = tibble(x = 2, y = tibble(x = 3), z = 4), z = 5) expect_identical( df_flatten(df), new_data_frame(list(x = 1, x = 2, x = 3, z = 4, z = 5)) ) }) test_that("can flatten data frames with rcrd columns (#1318)", { col <- new_rcrd(list(a = 1)) df <- data_frame(col = col, y = 1) expect_identical(vec_proxy_equal(df), data_frame(col = 1, y = 1)) col <- new_rcrd(list(a = 1, b = 2)) df <- data_frame(col = col, y = 1) expect_identical(vec_proxy_equal(df), data_frame(a = 1, b = 2, y = 1)) }) test_that("new_data_frame() zaps existing attributes", { struct <- structure(list(), foo = 1) expect_identical( attributes(new_data_frame(struct)), attributes(new_data_frame(list())), ) expect_identical( attributes(new_data_frame(struct, bar = 2)), attributes(new_data_frame(list(), bar = 2)), ) }) test_that("with zero columns, implied size is 0", { # Empty data frame input df <- new_data_frame(n = 10L) # Input to `new_data_frame()` is treated as a list. # Existing `row.names` are ignored, even if they might provide # information in the case of 0 columns. expect_identical(nrow(new_data_frame(df)), 0L) }) # data_frame -------------------------------------------------------------- test_that("data_frame() and df_list() report error context", { expect_snapshot({ (expect_error(data_frame(a = 1, a = 1))) (expect_error(data_frame(a = 1, a = 1, .error_call = call("foo")))) (expect_error(data_frame(a = 1:2, b = int()))) (expect_error(data_frame(a = 1:2, b = int(), .error_call = call("foo")))) (expect_error(df_list(a = 1, a = 1))) (expect_error(df_list(a = 1, a = 1, .error_call = call("foo")))) (expect_error(df_list(a = 1:2, b = int()))) (expect_error(df_list(a = 1:2, b = int(), .error_call = call("foo")))) }) }) test_that("can construct data frames with empty input", { expect_identical(data_frame(), new_data_frame()) expect_named(data_frame(), character()) }) test_that("input is tidy recycled", { expect_identical( data_frame(x = 1, y = 1:3), data_frame(x = c(1, 1, 1), y = 1:3) ) expect_identical( data_frame(x = 1, y = integer()), data_frame(x = double(), y = integer()) ) expect_snapshot({ expect_error(data_frame(1:2, 1:3), class = "vctrs_error_incompatible_size") }) }) test_that("dots are dynamic", { list_2_data_frame <- function(x) data_frame(!!!x) expect_identical( list_2_data_frame(list(x = 1, y = 2)), data_frame(x = 1, y = 2) ) }) test_that("unnamed input is auto named with empty strings", { expect_named(data_frame(1, 2, .name_repair = "minimal"), c("", "")) }) test_that("unnamed data frames are auto unpacked", { expect_identical( data_frame(w = 1, data_frame(x = 2, y = 3), z = 4), data_frame(w = 1, x = 2, y = 3, z = 4) ) }) test_that("named data frames are not unpacked", { df_col <- data_frame(x = 2, y = 3) df <- data_frame(w = 1, col = data_frame(x = 2, y = 3), z = 4) expect_identical(df$col, df_col) }) test_that("unpacked data frames without names are caught", { df_col <- new_data_frame(list(1)) expect_error(data_frame(df_col), "corrupt data frame") }) test_that("unpacking in `df_list()` can be disabled with `.unpack = FALSE`", { out <- df_list( w = 1, data_frame(x = 2, y = 3), z = 4, .unpack = FALSE, .name_repair = "minimal" ) expect <- list( w = 1, data_frame(x = 2, y = 3), z = 4 ) expect_identical(out, expect) }) test_that("`.unpack` is validated", { expect_snapshot(error = TRUE, { df_list(.unpack = 1) }) expect_snapshot(error = TRUE, { df_list(.unpack = c(TRUE, FALSE)) }) }) test_that("`NULL` inputs are dropped", { expect_identical(data_frame(NULL, x = 1, NULL), data_frame(x = 1)) }) test_that("`NULL` inputs are dropped before name repair", { expect_identical( data_frame(x = NULL, x = 1, .name_repair = "check_unique"), data_frame(x = 1) ) }) test_that("`.size` can force a desired size", { df <- data_frame(x = 1, .size = 5) expect_identical(df$x, rep(1, 5)) expect_size(data_frame(.size = 5), 5L) }) test_that("`.name_repair` repairs names", { expect_message(res <- data_frame(x = 1, x = 1, .name_repair = "unique")) expect_named(res, c("x...1", "x...2")) }) test_that("`.name_repair` happens after auto-naming with empty strings", { expect_message(res <- data_frame(1, 2, .name_repair = "unique")) expect_named(res, c("...1", "...2")) }) test_that("`.name_repair` happens after splicing", { expect_message( res <- data_frame(x = 1, data_frame(x = 2), .name_repair = "unique") ) expect_named(res, c("x...1", "x...2")) }) test_that("`.name_repair` can be quiet", { local_name_repair_verbose() expect_snapshot({ dfl_unique <- df_list(1, 2, .name_repair = "unique_quiet") dfl_universal <- df_list( "if" = 1, "in" = 2, .name_repair = "universal_quiet" ) df_unique <- data_frame(1, 2, .name_repair = "unique_quiet") df_universal <- data_frame( "if" = 1, "in" = 2, .name_repair = "universal_quiet" ) }) expect_named(dfl_unique, c("...1", "...2")) expect_named(dfl_universal, c(".if", ".in")) expect_named(df_unique, c("...1", "...2")) expect_named(df_universal, c(".if", ".in")) }) # fallback ---------------------------------------------------------------- test_that("data frame fallback handles column types (#999)", { df1 <- foobar(data.frame(x = 1)) df2 <- foobar(data.frame(x = 1, y = 2)) df3 <- foobar(data.frame(x = "", y = 2)) common <- foobar(data.frame(x = dbl(), y = dbl())) expect_identical(vec_ptype2(df1, df2), common) expect_identical(vec_ptype2(df2, df1), common) exp <- foobar(data_frame(x = 1, y = na_dbl)) expect_identical(vec_cast(df1, df2), exp) expect_identical(vec_cast(set_tibble(df1), set_tibble(df2)), set_tibble(exp)) expect_snapshot({ local_error_call(call("my_function")) (expect_error( vec_ptype2(df1, df3), class = "vctrs_error_incompatible_type" )) (expect_error( vec_ptype2(df3, df1), class = "vctrs_error_incompatible_type" )) (expect_error( vec_cast(df2, df1), class = "vctrs_error_incompatible_type" )) }) expect_identical( vec_rbind(df1, df2), foobar(data.frame(x = c(1, 1), y = c(NA, 2))) ) # Attributes are not restored df1_attrib <- foobar(df1, foo = "foo") df2_attrib <- foobar(df2, bar = "bar") exp <- data.frame(x = c(1, 1), y = c(NA, 2)) expect_equal( vec_rbind(df1_attrib, df2_attrib), exp ) out <- with_methods( `[.vctrs_foobar` = function(x, i, ...) { new_data_frame( NextMethod(), dispatched = TRUE, class = "vctrs_foobar" ) }, vec_rbind(df1_attrib, df2_attrib) ) expect_identical(out, exp) }) test_that("falls back to tibble for tibble subclasses (#1025)", { foo <- foobar(tibble::as_tibble(mtcars)) expect_s3_class(vec_rbind(foo, mtcars), "tbl_df") expect_s3_class(vec_rbind(foo, mtcars, mtcars), "tbl_df") expect_s3_class(vec_rbind(foo, mtcars, foobar(mtcars)), "tbl_df") }) test_that("fallback is recursive", { df <- mtcars[1:3, 1, drop = FALSE] foo <- new_data_frame(list(x = foobar(df, foo = TRUE))) bar <- new_data_frame(list(x = foobar(df, bar = TRUE))) baz <- new_data_frame(list(y = 1:3, x = foobar(df, bar = TRUE))) exp <- new_data_frame(list(x = vec_rbind(df, df))) expect_equal(vec_rbind(foo, bar), exp) exp <- new_data_frame(list(x = vec_rbind(df, df), y = c(NA, NA, NA, 1:3))) expect_equal(vec_rbind(foo, baz), exp) }) vctrs/tests/testthat/test-runs.R0000644000176200001440000002112215065005761016507 0ustar liggesusers# vec_identify_runs ------------------------------------------------------------ test_that("vec_identify_runs() works with size zero input", { expect <- structure(integer(), n = 0L) expect_identical(vec_identify_runs(integer()), expect) expect_identical(vec_identify_runs(data.frame()), expect) }) test_that("works with atomic input of various types", { expect <- structure(c(1L, 1L, 2L, 2L, 3L), n = 3L) expect_identical(vec_identify_runs(c(TRUE, TRUE, FALSE, FALSE, TRUE)), expect) expect_identical(vec_identify_runs(c(1L, 1L, 2L, 2L, 3L)), expect) expect_identical(vec_identify_runs(c(1, 1, 2, 2, 3)), expect) expect_identical( vec_identify_runs(complex( real = c(1, 1, 2, 2, 2), imaginary = c(1, 1, 2, 2, 3) )), expect ) expect_identical(vec_identify_runs(c("a", "a", "b", "b", "c")), expect) expect_identical(vec_identify_runs(as.raw(c(1, 1, 2, 2, 3))), expect) expect_identical(vec_identify_runs(list(1, 1, 2, 2, 3)), expect) }) test_that("NA values are identical", { expect <- structure(c(1L, 1L), n = 1L) expect_identical(vec_identify_runs(c(NA, NA)), expect) expect_identical(vec_identify_runs(c(NA_integer_, NA_integer_)), expect) expect_identical(vec_identify_runs(c(NA_real_, NA_real_)), expect) expect_identical(vec_identify_runs(c(NA_complex_, NA_complex_)), expect) expect_identical(vec_identify_runs(c(NA_character_, NA_character_)), expect) # No NA type for raw expect_identical(vec_identify_runs(list(NULL, NULL)), expect) }) test_that("NA and NaN are different", { expect <- structure(c(1L, 2L), n = 2L) expect_identical(vec_identify_runs(c(NA_real_, NaN)), expect) }) test_that("normalizes character encodings", { encs <- encodings() x <- c(encs$utf8, encs$unknown, encs$latin1) expect_identical(vec_identify_runs(x), structure(rep(1L, 3), n = 1L)) }) test_that("errors on scalars", { expect_snapshot(error = TRUE, { vec_identify_runs(foobar()) }) }) test_that("works with data frames rowwise", { df <- data_frame(x = c(1, 1, 1, 2), y = c(1, 1, 2, 3)) expect <- structure(c(1L, 1L, 2L, 3L), n = 3L) expect_identical(vec_identify_runs(df), expect) df <- data_frame(x = c(1, 1, 1), y = c(2, 2, 2), z = c("b", "a", "a")) expect <- structure(c(1L, 2L, 2L), n = 2L) expect_identical(vec_identify_runs(df), expect) }) test_that("works with data frames with rows but no columns", { expect <- structure(rep(1L, 5), n = 1L) expect_identical(vec_identify_runs(new_data_frame(n = 5L)), expect) }) test_that("works with data frame columns", { col <- data_frame(a = c(1, 1, 2, 2), b = c(1, 2, 3, 3)) df <- data_frame(x = rep(1, 4), y = col) expect <- structure(c(1L, 2L, 3L, 3L), n = 3L) expect_identical(vec_identify_runs(df), expect) }) test_that("works with columns of various types", { # Use two columns to keep the data frame from being squashed to a vector add_col <- function(col) { x <- rep(1L, 5) data_frame(x = x, y = col) } expect <- structure(c(1L, 1L, 2L, 2L, 3L), n = 3L) expect_identical( vec_identify_runs(add_col(c(TRUE, TRUE, FALSE, FALSE, TRUE))), expect ) expect_identical(vec_identify_runs(add_col(c(1L, 1L, 2L, 2L, 3L))), expect) expect_identical(vec_identify_runs(add_col(c(1, 1, 2, 2, 3))), expect) expect_identical( vec_identify_runs(add_col(complex( real = c(1, 1, 2, 2, 2), imaginary = c(1, 1, 2, 2, 3) ))), expect ) expect_identical( vec_identify_runs(add_col(c("a", "a", "b", "b", "c"))), expect ) expect_identical(vec_identify_runs(add_col(as.raw(c(1, 1, 2, 2, 3)))), expect) expect_identical(vec_identify_runs(add_col(list(1, 1, 2, 2, 3))), expect) }) # vec_run_sizes ---------------------------------------------------------------- test_that("vec_run_sizes() works with size zero input", { expect_identical(vec_run_sizes(integer()), integer()) expect_identical(vec_run_sizes(data.frame()), integer()) }) test_that("works with atomic input of various types", { expect <- c(2L, 2L, 1L) expect_identical(vec_run_sizes(c(TRUE, TRUE, FALSE, FALSE, TRUE)), expect) expect_identical(vec_run_sizes(c(1L, 1L, 2L, 2L, 3L)), expect) expect_identical(vec_run_sizes(c(1, 1, 2, 2, 3)), expect) expect_identical( vec_run_sizes(complex( real = c(1, 1, 2, 2, 2), imaginary = c(1, 1, 2, 2, 3) )), expect ) expect_identical(vec_run_sizes(c("a", "a", "b", "b", "c")), expect) expect_identical(vec_run_sizes(as.raw(c(1, 1, 2, 2, 3))), expect) expect_identical(vec_run_sizes(list(1, 1, 2, 2, 3)), expect) }) test_that("NA values are identical", { expect <- 2L expect_identical(vec_run_sizes(c(NA, NA)), expect) expect_identical(vec_run_sizes(c(NA_integer_, NA_integer_)), expect) expect_identical(vec_run_sizes(c(NA_real_, NA_real_)), expect) expect_identical(vec_run_sizes(c(NA_complex_, NA_complex_)), expect) expect_identical(vec_run_sizes(c(NA_character_, NA_character_)), expect) # No NA type for raw expect_identical(vec_run_sizes(list(NULL, NULL)), expect) }) test_that("NA and NaN are different", { expect_identical(vec_run_sizes(c(NA_real_, NaN)), c(1L, 1L)) }) test_that("normalizes character encodings", { encs <- encodings() x <- c(encs$utf8, encs$unknown, encs$latin1) expect_identical(vec_run_sizes(x), 3L) }) test_that("errors on scalars", { expect_snapshot(error = TRUE, { vec_run_sizes(foobar()) }) }) test_that("works with data frames rowwise", { df <- data_frame(x = c(1, 1, 1, 2), y = c(1, 1, 2, 3)) expect_identical(vec_run_sizes(df), c(2L, 1L, 1L)) df <- data_frame(x = c(1, 1, 1), y = c(2, 2, 2), z = c("b", "a", "a")) expect_identical(vec_run_sizes(df), c(1L, 2L)) }) test_that("works with data frames with rows but no columns", { expect_identical(vec_run_sizes(new_data_frame(n = 5L)), 5L) }) test_that("works with data frame columns", { col <- data_frame(a = c(1, 1, 2, 2), b = c(1, 2, 3, 3)) df <- data_frame(x = rep(1, 4), y = col) expect_identical(vec_run_sizes(df), c(1L, 1L, 2L)) }) test_that("works with columns of various types", { # Use two columns to keep the data frame from being squashed to a vector add_col <- function(col) { x <- rep(1L, 5) data_frame(x = x, y = col) } expect <- c(2L, 2L, 1L) expect_identical( vec_run_sizes(add_col(c(TRUE, TRUE, FALSE, FALSE, TRUE))), expect ) expect_identical(vec_run_sizes(add_col(c(1L, 1L, 2L, 2L, 3L))), expect) expect_identical(vec_run_sizes(add_col(c(1, 1, 2, 2, 3))), expect) expect_identical( vec_run_sizes(add_col(complex( real = c(1, 1, 2, 2, 2), imaginary = c(1, 1, 2, 2, 3) ))), expect ) expect_identical(vec_run_sizes(add_col(c("a", "a", "b", "b", "c"))), expect) expect_identical(vec_run_sizes(add_col(as.raw(c(1, 1, 2, 2, 3)))), expect) expect_identical(vec_run_sizes(add_col(list(1, 1, 2, 2, 3))), expect) }) # vec_locate_run_bounds -------------------------------------------------------- test_that("can locate run starts", { expect_identical( vec_locate_run_bounds(c(1, 3, 3, 1, 5, 5, 6)), c(1L, 2L, 4L, 5L, 7L) ) }) test_that("can locate run ends", { expect_identical( vec_locate_run_bounds(c(1, 3, 3, 1, 5, 5, 6), which = "end"), c(1L, 3L, 4L, 6L, 7L) ) }) test_that("vec_locate_run_bounds() works with size zero input", { expect_identical(vec_locate_run_bounds(integer(), which = "start"), integer()) expect_identical(vec_locate_run_bounds(integer(), which = "end"), integer()) }) test_that("vec_locate_run_bounds() validates `which`", { expect_snapshot(error = TRUE, { vec_locate_run_bounds(1, which = "x") }) expect_snapshot(error = TRUE, { vec_locate_run_bounds(1, which = 1) }) expect_snapshot(error = TRUE, { vec_locate_run_bounds(1, which = c("foo", "bar")) }) }) # vec_detect_run_bounds -------------------------------------------------------- test_that("can detect run starts", { expect_identical( vec_detect_run_bounds(c(1, 3, 3, 1, 5, 5, 6)), c(TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE) ) }) test_that("can detect run ends", { expect_identical( vec_detect_run_bounds(c(1, 3, 3, 1, 5, 5, 6), which = "end"), c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE) ) }) test_that("vec_detect_run_bounds() works with size zero input", { expect_identical(vec_detect_run_bounds(integer(), which = "start"), logical()) expect_identical(vec_detect_run_bounds(integer(), which = "end"), logical()) }) test_that("vec_detect_run_bounds() validates `which`", { expect_snapshot(error = TRUE, { vec_detect_run_bounds(1, which = "x") }) expect_snapshot(error = TRUE, { vec_detect_run_bounds(1, which = 1) }) expect_snapshot(error = TRUE, { vec_detect_run_bounds(1, which = c("foo", "bar")) }) }) vctrs/tests/testthat/test-type-idate.R0000644000176200001440000000653615113335375017602 0ustar liggesusers# Never run on CRAN, even if they have data.table, because we don't regularly # check these on CI and we don't want a change in data.table to force a CRAN # failure for vctrs. skip_on_cran() # Avoids adding `data.table` to Suggests. # These tests are only run on the devs' machines. testthat_import_from("data.table", "as.IDate") # `as.IDate()` drops names https://github.com/Rdatatable/data.table/issues/7252 as_IDate_with_names <- function(x) { out <- as.IDate(x) names(out) <- names(x) out } # ------------------------------------------------------------------------------ # ptype test_that("ptype abbr", { x <- as.IDate("2019-01-01") expect_identical(vec_ptype_abbr(x), "IDate") }) test_that("ptype full", { x <- as.IDate("2019-01-01") expect_identical(vec_ptype_full(x), "IDate") }) # ------------------------------------------------------------------------------ # ptype2 test_that("can get common type of IDate and IDate", { x <- as_IDate_with_names(c(a = "2019-01-01")) # It shouldn't have names, but thats a vctrs problem expect <- as_IDate_with_names(set_names(integer(), character())) expect_identical(vec_ptype2(x, x), expect) }) test_that("can't get common type of Date and IDate", { x <- as.Date("2019-01-01") y <- as.IDate("2019-01-01") expect_snapshot(error = TRUE, { vec_ptype2(x, y) }) expect_snapshot(error = TRUE, { vec_ptype2(y, x) }) }) # ------------------------------------------------------------------------------ # cast test_that("can cast IDate to IDate", { x <- as_IDate_with_names(c(a = "2019-01-01")) expect_identical(vec_cast(x, x), x) }) test_that("can't cast Date to IDate", { x <- as.Date("2019-01-01") y <- as.IDate("2019-01-01") expect_snapshot(error = TRUE, { vec_cast(x, y) }) }) test_that("can't cast IDate to Date", { x <- as.IDate("2019-01-01") y <- as.Date("2019-01-01") expect_snapshot(error = TRUE, { vec_cast(x, y) }) }) # ------------------------------------------------------------------------------ # proxy / restore test_that("vec_proxy", { # Retains integer type https://github.com/r-lib/vctrs/issues/1961 x <- as.IDate("2019-01-01") expect_identical(vec_proxy(x), x) }) test_that("vec_restore", { x <- as.IDate("2019-01-01") proxy <- vec_proxy(x) expect_identical(vec_restore(proxy, x), x) }) test_that("proxy / restore retains names", { x <- as_IDate_with_names(c(a = "2019-01-01")) proxy <- vec_proxy(x) expect_named(proxy, "a") restored <- vec_restore(proxy, x) expect_named(restored, "a") }) test_that("vec_proxy_equal, vec_proxy_compare, vec_proxy_order", { # Doesn't change type, stays integer storage as well x <- as.IDate("2019-01-01") expect_identical(vec_proxy_equal(x), x) expect_identical(vec_proxy_compare(x), x) expect_identical(vec_proxy_order(x), x) }) # ------------------------------------------------------------------------------ # manipulation test_that("ptype retains integer type", { x <- as.IDate(c("2019-01-01", "2019-01-02")) expect_identical(typeof(vec_ptype(x)), "integer") expect_identical(vec_ptype(x), as.IDate(integer())) }) test_that("slicing retains integer type", { x <- as.IDate(c("2019-01-01", "2019-01-02")) expect_identical(typeof(vec_slice(x, 1)), "integer") }) test_that("slicing retains names", { x <- as_IDate_with_names(c(a = "2019-01-01")) expect_identical(vec_slice(x, 1), x) }) vctrs/tests/testthat/test-print-str.R0000644000176200001440000000025715065005761017470 0ustar liggesuserstest_that("show attributes", { x <- structure(1:100, x = "a string", y = 1:20, z = data.frame(x = 1:3)) expect_snapshot(obj_str(x)) expect_snapshot(obj_str(mtcars)) }) vctrs/tests/testthat/test-if-else.R0000644000176200001440000003061115132161317017042 0ustar liggesuserstest_that("`condition` must be a condition vector", { # No scalars expect_snapshot_vec_if_else( error = TRUE, condition = lm(1 ~ 1), true = 1, false = 2 ) # No casting expect_snapshot_vec_if_else( error = TRUE, condition = 1, true = 1, false = 2 ) # No objects expect_snapshot_vec_if_else( error = TRUE, condition = structure(TRUE, class = "foo"), true = 1, false = 2 ) # No dim expect_snapshot_vec_if_else( error = TRUE, condition = array(TRUE), true = 1, false = 2 ) }) test_that("`true`, `false`, and `missing` must be vectors", { expect_snapshot(error = TRUE, { vec_if_else( condition = TRUE, true = lm(1 ~ 1), false = 2, missing = 0 ) }) expect_snapshot(error = TRUE, { vec_if_else( condition = TRUE, true = 1, false = lm(1 ~ 1), missing = 0 ) }) expect_snapshot(error = TRUE, { vec_if_else( condition = TRUE, true = 1, false = 2, missing = lm(1 ~ 1) ) }) }) test_that("`true`, `false`, and `missing` must recycle to size of `condition`", { expect_snapshot_vec_if_else( error = TRUE, condition = TRUE, true = c(1, 2), false = 2, missing = 0 ) expect_snapshot_vec_if_else( error = TRUE, condition = TRUE, true = 1, false = c(1, 2), missing = 0 ) expect_snapshot_vec_if_else( error = TRUE, condition = TRUE, true = 1, false = 2, missing = c(1, 2) ) }) test_that("all combinations of `true`, `false`, and `missing` recycling (including names) are tested", { # 8 combinations expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(1L), false = set_names(7L), missing = set_names(13L), expect = set_names(int(1, 7, 13, 13, 1, 7)) ) expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(1:6), false = set_names(7L), missing = set_names(13L), expect = set_names(int(1, 7, 13, 13, 5, 7)) ) expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(1L), false = set_names(7:12), missing = set_names(13L), expect = set_names(int(1, 8, 13, 13, 1, 12)) ) expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(1L), false = set_names(7L), missing = set_names(13:18), expect = set_names(int(1, 7, 15, 16, 1, 7)) ) expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(1:6), false = set_names(7:12), missing = set_names(13L), expect = set_names(int(1, 8, 13, 13, 5, 12)) ) expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(1:6), false = set_names(7L), missing = set_names(13:18), expect = set_names(int(1, 7, 15, 16, 5, 7)) ) expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(1L), false = set_names(7:12), missing = set_names(13:18), expect = set_names(int(1, 8, 15, 16, 1, 12)) ) expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(1:6), false = set_names(7:12), missing = set_names(13:18), expect = set_names(int(1, 8, 15, 16, 5, 12)) ) }) test_that("not all of `true`, `false`, and `missing` must be named", { expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = 1L, false = set_names(7L), missing = set_names(13L), expect = set_names(int(1, 7, 13, 13, 1, 7), c("", "7", "13", "13", "", "7")) ) expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(1L), false = 7L, missing = set_names(13L), expect = set_names(int(1, 7, 13, 13, 1, 7), c("1", "", "13", "13", "1", "")) ) expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(1L), false = set_names(7L), missing = 13L, expect = set_names(int(1, 7, 13, 13, 1, 7), c("1", "7", "", "", "1", "7")) ) }) test_that("names are retained even with casting", { expect_named( vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(1L), false = set_names(7L), missing = set_names(13L), ptype = double() ), as.character(c(1, 7, 13, 13, 1, 7)) ) # Generic expect_named( vec_if_else( condition = c(TRUE, FALSE, NA, NA, TRUE, FALSE), true = set_names(new_date(1)), false = set_names(new_date(7)), missing = set_names(new_date(13)), ptype = new_datetime(tzone = "UTC") ), as.character(new_date(c(1, 7, 13, 13, 1, 7))) ) }) test_that("`ptype` overrides common type", { expect_identical( vec_if_else( condition = TRUE, true = 1, false = 2, missing = 0, ptype = integer() ), 1L ) # Generic expect_identical( vec_if_else( condition = TRUE, true = new_date(0), false = new_date(1), missing = new_date(2), ptype = new_datetime(tzone = "UTC") ), new_datetime(0, tzone = "UTC") ) expect_snapshot(error = TRUE, { vec_if_else( condition = TRUE, true = 1.5, false = 2, missing = 0, ptype = integer() ) }) expect_snapshot(error = TRUE, { vec_if_else( condition = TRUE, true = 1, false = 2.5, missing = 0, ptype = integer() ) }) expect_snapshot(error = TRUE, { vec_if_else( condition = TRUE, true = 1, false = 2, missing = 0.5, ptype = integer() ) }) # Generic expect_snapshot(error = TRUE, { vec_if_else( condition = TRUE, true = 1, false = new_date(2), missing = new_date(0), ptype = new_date() ) }) expect_snapshot(error = TRUE, { vec_if_else( condition = TRUE, true = new_date(1), false = 2, missing = new_date(0), ptype = new_date() ) }) expect_snapshot(error = TRUE, { vec_if_else( condition = TRUE, true = new_date(1), false = new_date(2), missing = 0, ptype = new_date() ) }) }) test_that("gives expected output with `true` and `false`", { expect_identical_vec_if_else( condition = c(TRUE, FALSE, TRUE), true = c(1, 2, 3), false = c(4, 5, 6), expect = c(1, 5, 3) ) }) test_that("gives expected output with `missing`", { expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA), true = c(1, 2, 3), false = c(4, 5, 6), expect = c(1, 5, NA) ) expect_identical_vec_if_else( condition = c(TRUE, FALSE, NA), true = c(1, 2, 3), false = c(4, 5, 6), missing = c(7, 8, 9), expect = c(1, 5, 9) ) }) test_that("works with arrays of all types", { mat <- as.matrix expect_identical( vec_if_else( c(TRUE, FALSE, TRUE), mat(lgl(TRUE, FALSE, NA)), mat(lgl(FALSE, TRUE, FALSE)) ), mat(lgl(TRUE, TRUE, NA)) ) expect_identical( vec_if_else( c(TRUE, FALSE, TRUE), mat(int(1, 2, 3)), mat(int(4, 5, 6)) ), mat(int(1, 5, 3)) ) expect_identical( vec_if_else( c(TRUE, FALSE, TRUE), mat(dbl(1, 2, 3)), mat(dbl(4, 5, 6)) ), mat(dbl(1, 5, 3)) ) expect_identical( vec_if_else( c(TRUE, FALSE, TRUE), mat(cpl(1, 2, 3)), mat(cpl(4, 5, 6)) ), mat(cpl(1, 5, 3)) ) expect_identical( vec_if_else( c(TRUE, FALSE, TRUE), mat(as.raw(c(1, 2, 3))), mat(as.raw(c(4, 5, 6))) ), mat(as.raw(c(1, 5, 3))) ) expect_identical( vec_if_else( c(TRUE, FALSE, TRUE), mat(chr("1", "2", "3")), mat(chr("4", "5", "6")) ), mat(chr("1", "5", "3")) ) expect_identical( vec_if_else( c(TRUE, FALSE, TRUE), mat(list(1, 2, 3)), mat(list(4, 5, 6)) ), mat(list(1, 5, 3)) ) }) test_that("missing value fall through is right for all atomic types", { expect_identical(vec_if_else(NA, TRUE, TRUE), NA) expect_identical(vec_if_else(NA, 1L, 1L), NA_integer_) expect_identical(vec_if_else(NA, 1, 1), NA_real_) expect_identical( vec_if_else(NA, cpl(1), cpl(1)), complex(real = NA, imaginary = NA) ) expect_identical(vec_if_else(NA, as.raw(1), as.raw(1)), as.raw(0)) expect_identical(vec_if_else(NA, "1", "1"), NA_character_) expect_identical(vec_if_else(NA, list(1), list(1)), list(NULL)) }) test_that("extraneous attributes on `true`, `false`, and `missing` are dropped", { # We do not consider these as part of the `ptype`! expect_identical( vec_if_else( condition = c(TRUE, FALSE, TRUE, NA), true = structure(1:4, foo = "bar"), false = structure(5:8, foo = "bar"), missing = structure(9:12, foo = "bar") ), int(1, 6, 3, 12) ) }) test_that("`ptype` is finalized before being used", { # Without `vec_ptype_final()`, this could result in `unspecified()` as the `ptype` expect_identical(vec_if_else(TRUE, TRUE, FALSE, ptype = NA), TRUE) # We try not to leak when we can help it expect_identical(vec_if_else(TRUE, TRUE, FALSE, ptype = unspecified(1)), TRUE) }) test_that("`vec_ptype2()`'s `left` is always set", { # This used to result in `left` being unset, so we'd hit an unreachable state expect_identical( vec_if_else( condition = FALSE, true = TRUE, false = NA, missing = FALSE ), NA ) }) # ------------------------------------------------------------------------------ # `dplyr::if_else()` tests test_that("scalar true and false are vectorised", { x <- c(TRUE, TRUE, FALSE, FALSE) expect_equal(vec_if_else(x, 1, 2), c(1, 1, 2, 2)) }) test_that("vector true and false are ok", { x <- c(-1, 0, 1) expect_equal(vec_if_else(x < 0, x, 0), c(-1, 0, 0)) expect_equal(vec_if_else(x > 0, x, 0), c(0, 0, 1)) }) test_that("missing values are missing", { expect_equal(vec_if_else(c(TRUE, NA, FALSE), -1, 1), c(-1, NA, 1)) }) test_that("works with lists", { x <- list(1, 2, 3) expect_equal( vec_if_else(c(TRUE, TRUE, FALSE), x, list(NULL)), list(1, 2, NULL) ) }) test_that("works with data frames", { true <- tibble(x = 1, y = 2) false <- tibble(x = 3, y = 4) expect_identical( vec_if_else(c(TRUE, FALSE, NA, TRUE), true, false), vec_c(true, false, NA, true) ) }) test_that("works with vctrs rcrd types", { true <- new_rcrd(list(x = 1, y = 2)) false <- new_rcrd(list(x = 3, y = 4)) expect_identical( vec_if_else(c(TRUE, FALSE, NA, TRUE), true, false), vec_c(true, false, NA, true) ) }) test_that("takes the common type of `true` and `false` (tidyverse/dplyr#6243)", { expect_identical(vec_if_else(TRUE, 1L, 1.5), 1) expect_snapshot(error = TRUE, { vec_if_else(TRUE, 1, "x") }) expect_snapshot(error = TRUE, { vec_if_else( TRUE, 1, "x", true_arg = "t", false_arg = "f", error_call = current_env() ) }) }) test_that("includes `missing` in the common type computation if used", { expect_identical(vec_if_else(TRUE, 1L, 2L, missing = 3), 1) expect_snapshot(error = TRUE, { vec_if_else(TRUE, 1, 2, missing = "x") }) expect_snapshot(error = TRUE, { vec_if_else(TRUE, 1L, 2, missing = "x") }) expect_snapshot(error = TRUE, { vec_if_else(TRUE, 1, 2L, missing = "x") }) }) test_that("can recycle to size 0 `condition`", { expect_identical(vec_if_else(logical(), 1, 2, missing = 3), double()) }) test_that("accepts logical conditions with attributes (tidyverse/dplyr#6678)", { x <- structure(TRUE, label = "foo") expect_identical(vec_if_else(x, 1, 2), 1) }) test_that("`condition` must be logical (and isn't cast to logical!)", { expect_snapshot(error = TRUE, { vec_if_else(1:10, 1, 2) }) }) test_that("`true`, `false`, and `missing` must recycle to the size of `condition`", { x <- 1:3 bad <- 1:2 expect_snapshot(error = TRUE, { vec_if_else(x < 2, bad, x) }) expect_snapshot(error = TRUE, { vec_if_else(x < 2, x, bad) }) expect_snapshot(error = TRUE, { vec_if_else(x < 2, x, x, missing = bad) }) }) test_that("must have empty dots", { expect_snapshot(error = TRUE, { vec_if_else(TRUE, 1, 2, missing = 3, 4) }) }) test_that("`ptype` overrides the common type", { expect_identical(vec_if_else(TRUE, 2, 1L, ptype = integer()), 2L) expect_snapshot(error = TRUE, { vec_if_else(TRUE, 1L, 2.5, ptype = integer()) }) }) vctrs/tests/testthat/test-empty.R0000644000176200001440000000230115120272011016636 0ustar liggesuserstest_that("can compact missing elements", { x <- list(NULL, 1, NULL) expect_identical(list_drop_empty(x), list(1)) }) test_that("can compact empty elements", { x <- list(1, NULL, integer(), NULL) expect_identical(list_drop_empty(x), list(1)) }) test_that("emptyness works with data frames", { x <- data_frame() y <- data_frame(.size = 2L) lst <- list(x, y) expect_identical(list_drop_empty(lst), list(y)) }) test_that("emptyness works with rcrd types", { x <- new_rcrd(list(foo = integer(), bar = numeric())) y <- new_rcrd(list(foo = 1L, bar = 1)) lst <- list(x, y) expect_identical(list_drop_empty(lst), list(y)) }) test_that("works with empty lists", { expect_identical(list_drop_empty(list()), list()) }) test_that("retains list type", { x <- list_of(NULL, integer()) expect_identical(list_drop_empty(x), list_of(.ptype = integer())) }) test_that("validates `x`", { expect_error(list_drop_empty(1), "must be a list") expect_error(list_drop_empty(data_frame()), "must be a list") # List arrays are not allowed, because we would not iterate over them # correctly expect_snapshot(error = TRUE, { x <- array(list(1), dim = c(1, 1)) list_drop_empty(x) }) }) vctrs/tests/testthat/helper-shape.R0000644000176200001440000000017515065005761017125 0ustar liggesusersshape_broadcast_ <- function(x, to, x_arg = "x", to_arg = "to") { shape_broadcast(x, to, x_arg = x_arg, to_arg = to_arg) } vctrs/tests/testthat/test-type-unspecified.R0000644000176200001440000001002115132161317020764 0ustar liggesuserstest_that("unknown type is idempotent", { types <- list( unspecified(), logical(), integer(), double(), character(), list(), new_list_of(ptype = integer()), new_factor(), new_ordered(), new_date(), new_datetime(), new_duration(), vec_ptype(matrix(1:4, 2)), vec_ptype(array(1:5, c(1, 5))) ) lhs <- map(types, vec_ptype2, x = unspecified()) expect_identical(types, lhs) lhs <- map(types, vec_ptype2, x = NA) expect_identical(types, lhs) rhs <- map(types, vec_ptype2, y = unspecified()) expect_identical(types, rhs) rhs <- map(types, vec_ptype2, y = NA) expect_identical(types, rhs) }) test_that("common type of unspecified and NULL is unspecified", { expect_identical(vec_ptype2(unspecified(), NULL), unspecified()) expect_identical(vec_ptype2(NULL, unspecified()), unspecified()) expect_identical(vec_ptype2(NA, NULL), unspecified()) expect_identical(vec_ptype2(NULL, NA), unspecified()) }) test_that("cannot take the common type of unspecified and a scalar list", { expect_error( vec_ptype2(unspecified(), foobar()), class = "vctrs_error_scalar_type" ) expect_error( vec_ptype2(foobar(), unspecified()), class = "vctrs_error_scalar_type" ) }) test_that("subsetting works", { expect_identical(unspecified(4)[2:3], unspecified(2)) }) test_that("has useful print method", { expect_snapshot(unspecified()) }) test_that("can finalise data frame containing unspecified columns", { df <- data.frame(y = NA, x = c(1, 2, NA)) ptype <- vec_ptype(df) expect_identical(ptype$y, unspecified()) finalised <- vec_ptype_finalise(ptype) expect_identical(finalised$y, lgl()) common <- vec_ptype_common(df, df) expect_identical(common$y, lgl()) }) test_that("can cast to common type data frame containing unspecified columns", { df <- data.frame(y = NA, x = c(1, 2, NA)) expect_identical(vec_cast_common(df, df), list(df, df)) }) test_that("unspecified vectors are always unspecified (#222)", { expect_true(is_unspecified(unspecified())) expect_true(is_unspecified(unspecified(1))) }) test_that("S3 vectors and shaped vectors are never unspecified", { expect_false(is_unspecified(foobar(NA))) expect_false(is_unspecified(foobar(lgl(NA, NA)))) expect_false(is_unspecified(matrix(NA, 2))) }) test_that("empty logical vector is not unspecified", { expect_false(is_unspecified(logical())) }) test_that("logical vectors of only `NA` are considered unspecified", { expect_true(is_unspecified(NA)) expect_true(is_unspecified(c(NA, NA))) expect_false(is_unspecified(c(NA, FALSE, NA))) }) test_that("extraneous attributes don't affect unspecifiedness", { # `names` expect_true(is_unspecified(set_names(c(NA, NA), c("a", "b")))) expect_true(is_unspecified(set_names(unspecified(2), c("a", "b")))) # Extraneous attributes expect_true(is_unspecified(structure(c(NA, NA), foo = "bar"))) expect_true(is_unspecified(structure(unspecified(2), foo = "bar"))) }) test_that("can finalise lengthy unspecified vectors", { expect_identical(vec_ptype_finalise(unspecified(3)), rep(NA, 3)) expect_identical(ununspecify(unspecified(3)), rep(NA, 3)) }) test_that("unspecified() validates input", { expect_identical(unspecified(1), unspecified(1L)) expect_error(unspecified(1:3), "must be a single number") }) test_that("tibble::type_sum() knows about unspecified", { expect_identical(tibble::type_sum(unspecified(3)), "???") }) test_that("casting to a scalar type errors", { expect_error(vec_cast(NA, quote(x)), class = "vctrs_error_scalar_type") expect_error( vec_cast(unspecified(1), quote(x)), class = "vctrs_error_scalar_type" ) }) test_that("monitoring test - can cast to unspecified from unspecified", { expect_identical(vec_cast(NA, unspecified()), unspecified(1)) expect_identical(vec_cast(unspecified(1), unspecified()), unspecified(1)) }) test_that("monitoring test - casting unspecified input to NA unspecified results in NA vector", { expect_identical(vec_cast(unspecified(1), NA), NA) expect_identical(vec_cast(NA, NA), NA) }) vctrs/tests/testthat/test-proxy-restore.R0000644000176200001440000002050215157273651020372 0ustar liggesuserstest_that("vec_restore() works with `NULL`", { expect_null(vec_restore(NULL, 1)) }) test_that("default vec_restore() restores attributes except names", { to <- structure(NA, foo = "foo", bar = "bar") expect_identical( vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar") ) to <- structure(NA, names = "a", foo = "foo", bar = "bar") expect_identical( vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar") ) to <- structure(NA, foo = "foo", names = "a", bar = "bar") expect_identical( vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar") ) to <- structure(NA, foo = "foo", bar = "bar", names = "a") expect_identical( vec_restore.default(NA, to), structure(NA, foo = "foo", bar = "bar") ) }) test_that("default vec_restore() restores / clears objectness", { # Non-object -> object x <- NA to <- structure(NA, class = "foo") out <- vec_restore.default(x, to) expect_true(is.object(out)) expect_s3_class(out, "foo") # Object -> non-object x <- structure(NA, class = "foo") to <- NA out <- vec_restore.default(x, to) expect_false(is.object(out)) expect_null(attributes(out)) }) test_that("data frame vec_restore() checks type", { expect_error( vec_restore(NA, mtcars), "Attempt to restore data frame from a logical" ) }) test_that("data frame restore forces character column names", { df <- new_data_frame(list(1)) expect_named(vec_restore(df, df), "") }) test_that("can use vctrs primitives from vec_restore() without inflooping", { local_methods( vec_restore.vctrs_foobar = function(x, to, ...) { vec_ptype(x) vec_init(x) obj_check_vector(x) vec_slice(x, 0) "woot" } ) foobar <- new_vctr(1:3, class = "vctrs_foobar") expect_identical(vec_slice(foobar, 2), "woot") }) test_that("dimensions are preserved by default restore method", { x <- foobar(1:4) dim(x) <- c(2, 2) dimnames(x) <- list(a = c("foo", "bar"), b = c("quux", "hunoz")) exp <- foobar(c(1L, 3L)) dim(exp) <- c(1, 2) dimnames(exp) <- list(a = "foo", b = c("quux", "hunoz")) expect_identical(vec_slice(x, 1), exp) }) test_that("names attribute isn't set when restoring 1D arrays using 2D+ objects", { x <- foobar(1:2) dim(x) <- c(2) nms <- c("foo", "bar") dimnames(x) <- list(nms) res <- vec_restore(x, matrix(1)) expect_null(attributes(res)$names) expect_equal(attr(res, "names"), nms) expect_equal(names(res), nms) }) test_that("arguments are not inlined in the dispatch call (#300)", { local_methods( vec_restore.vctrs_foobar = function(x, to, ...) sys.call(), vec_proxy.vctrs_foobar = unclass ) call <- vec_restore(foobar(list(1)), foobar(list(1))) expect_equal(call, quote(vec_restore.vctrs_foobar(x = x, to = to))) }) test_that("restoring to non-bare data frames calls `vec_bare_df_restore()` before dispatching", { x <- list(x = numeric()) to <- new_data_frame(x, class = "tbl_foobar") local_methods( vec_restore.tbl_foobar = function(x, to, ..., n) { if (is.data.frame(x)) { abort(class = "error_df_restore_was_called") } } ) expect_error(vec_restore(x, to), class = "error_df_restore_was_called") }) test_that("names of `x` are retained", { x <- structure(1, names = "x") to <- 2 expect_identical(vec_restore(x, to), x) to <- structure(2, names = "y") expect_identical(vec_restore(x, to), x) to <- structure(2, foo = "bar") expect_identical(vec_restore(x, to), structure(1, names = "x", foo = "bar")) }) test_that("column names and row names of `x` are retained", { x <- data.frame(x = 1, row.names = "foo") to <- data_frame(x = 2) expect_identical(vec_restore(x, to), x) to <- data_frame(x = 2, y = 3) expect_identical(vec_restore(x, to), x) to <- data.frame(x = 1, row.names = "a") expect_identical(vec_restore(x, to), x) to <- data_frame(x = 2) attr(to, "foo") <- "bar" expect <- data.frame(x = 1, row.names = "foo") attr(expect, "foo") <- "bar" expect_identical(vec_restore(x, to), expect) }) test_that("dim of `x` is retained", { x <- structure(1L, dim = c(1L, 1L)) to <- 2L expect_identical(vec_restore(x, to), x) to <- structure(1:2, dim = c(2L, 1L)) expect_identical(vec_restore(x, to), x) to <- structure(2L, foo = "bar") expect_identical( vec_restore(x, to), structure(1L, dim = c(1L, 1L), foo = "bar") ) }) test_that("dimnames of `x` are retained", { x <- structure(1L, dim = c(1L, 1L), dimnames = list("x", "y")) to <- 2L expect_identical(vec_restore(x, to), x) to <- structure(1:2, dim = c(2L, 1L)) expect_identical(vec_restore(x, to), x) to <- structure(2L, foo = "bar") expect_identical( vec_restore(x, to), structure(1L, dim = c(1L, 1L), dimnames = list("x", "y"), foo = "bar") ) }) test_that("row names are not restored if target is not a data frame", { proxy <- data.frame(x = 1) out <- vec_restore(proxy, to = foobar("")) exp <- list(names = "x", class = "vctrs_foobar") expect_mapequal(attributes(out), exp) }) test_that("names / dim / dimnames / row.names of `to` are cleared", { exp <- list(foo = TRUE, bar = TRUE) # `names` to <- structure(list(), foo = TRUE, names = chr(), bar = TRUE) out <- vec_restore_default(list(), to) expect_mapequal(attributes(out), exp) # `names` / `row.names` # Was broken by #943 to <- structure( list(), foo = TRUE, names = chr(), row.names = int(), bar = TRUE ) out <- vec_restore_default(list(), to) expect_mapequal(attributes(out), exp) # `dim` to <- structure(list(), foo = TRUE, dim = c(1L, 0L), bar = TRUE) out <- vec_restore_default(list(), to) expect_mapequal(attributes(out), exp) # `dim` / `dimnames` to <- structure( list(), foo = TRUE, dim = c(1L, 0L), dimnames = list("a", character()), bar = TRUE ) out <- vec_restore_default(list(), to) expect_mapequal(attributes(out), exp) }) test_that("vec_restore_default() is a no-op with no attributes", { x <- 1 expect_identical(vec_restore(x, x), x) }) test_that("vec_restore() clears unknown attributes from `x`", { # When `to` has no attributes x <- structure(1, foo = "bar") to <- 2 expect_identical(vec_restore(x, to), 1) # When `to` has attributes x <- structure(1, foo = "bar") to <- structure(2, a = "b") expect_identical(vec_restore(x, to), structure(1, a = "b")) }) test_that("vec_restore() clears incongruent names / dim / dimnames / rownames attributes", { # This is an array and should not have `names` x <- structure(1, names = "x", dim = c(1L, 1L)) to <- 2 expect_identical(vec_restore(x, to), structure(1, dim = c(1L, 1L))) # This is an array and should not have `row.names` x <- structure(1, row.names = "x", dim = c(1L, 1L)) to <- 2 expect_identical(vec_restore(x, to), structure(1, dim = c(1L, 1L))) # R won't even let you make `x`! So no need to test this case. # # This is a vector and should not have `dimnames` # x <- structure(1, names = "x", dimnames = list("a")) # to <- 2 # expect_identical(vec_restore(x, to), structure(1, names = "x")) }) test_that("names<- is not called with partial data (#1108)", { x <- set_names(foobar(1:2), c("a", "b")) values <- list() local_methods( `names<-.vctrs_foobar` = function(x, value) { if (!is_null(value)) { values <<- c(values, list(value)) } NextMethod() } ) vec_c(x, x) expect_equal(values, list(c("a", "b", "a", "b"))) }) test_that("recursive proxy and restore work with recursive records", { new_recursive_rcrd <- function(x) { new_rcrd( list(field = x), class = "my_recursive_rcrd" ) } internal <- new_rcrd(list(internal_field = 1:2)) x <- new_recursive_rcrd(data_frame(col = internal)) proxy <- vec_proxy_recurse(x) exp <- data_frame(field = data_frame(col = data_frame(internal_field = 1:2))) expect_equal(proxy, exp) expect_equal(vec_restore_recurse(proxy, x), x) # Non-recursive case doesn't proxy `internal` proxy <- vec_proxy(x) exp <- data_frame(field = data_frame(col = internal)) expect_equal(proxy, exp) expect_equal(vec_restore(proxy, x), x) x_exp <- new_recursive_rcrd(data_frame(col = vec_rep(internal, 2))) expect_equal( list_unchop(list(x, x)), x_exp ) df <- data_frame(x = x) df_exp <- data_frame(x = x_exp) expect_equal(vec_rbind(df, df), df_exp) expect_equal(vec_c(df, df), df_exp) }) vctrs/tests/testthat/test-type-table.R0000644000176200001440000002032715065005761017574 0ustar liggesusers# Print ------------------------------------------------------------------- test_that("ptype print methods are descriptive", { tab1 <- new_table() tab2 <- new_table(dim = c(0L, 1L, 2L, 1L)) expect_equal(vec_ptype_abbr(tab1), "table[1d]") expect_equal(vec_ptype_abbr(tab2), "table[,1,2,1]") expect_equal(vec_ptype_full(tab1), "table[1d]") expect_equal(vec_ptype_full(tab2), "table[,1,2,1]") }) # Coercion ---------------------------------------------------------------- test_that("can find a common type among tables with identical dimensions", { tab1 <- new_table() tab2 <- new_table(1:2, dim = c(1L, 2L, 1L)) expect_identical(vec_ptype2(tab1, tab1), new_table()) expect_identical(vec_ptype2(tab2, tab2), new_table(dim = c(0L, 2L, 1L))) }) test_that("size is not considered in the ptype", { x <- new_table(1:2, dim = 2L) y <- new_table(1:3, dim = 3L) expect_identical(vec_ptype2(x, y), new_table()) }) test_that("vec_ptype2() can broadcast table shapes", { x <- new_table(dim = c(0L, 1L)) y <- new_table(dim = c(0L, 2L)) expect_identical(vec_ptype2(x, y), new_table(dim = c(0L, 2L))) x <- new_table(dim = c(0L, 1L, 3L)) y <- new_table(dim = c(0L, 2L, 1L)) expect_identical(vec_ptype2(x, y), new_table(dim = c(0L, 2L, 3L))) }) test_that("vec_ptype2() never propagates dimnames", { x <- new_table(dim = c(0L, 1L), dimnames = list(character(), "x1")) y <- new_table(dim = c(0L, 2L), dimnames = list(character(), c("y1", "y2"))) expect_null(dimnames(vec_ptype2(x, x))) expect_null(dimnames(vec_ptype2(x, y))) }) test_that("implicit axes are broadcast", { x <- new_table(dim = c(0L, 2L)) y <- new_table(dim = c(0L, 1L, 3L)) expect_identical(vec_ptype2(x, y), new_table(dim = c(0L, 2L, 3L))) }) test_that("errors on non-broadcastable dimensions", { x <- new_table(dim = c(0L, 2L)) y <- new_table(dim = c(0L, 3L)) expect_error(vec_ptype2(x, y), class = "vctrs_error_incompatible_type") }) test_that("vec_ptype2() errors on non-tables", { expect_error( vec_ptype2(new_table(), 1), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(new_table(), 1L), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(new_table(), "1"), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(1, new_table()), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2(1L, new_table()), class = "vctrs_error_incompatible_type" ) expect_error( vec_ptype2("1", new_table()), class = "vctrs_error_incompatible_type" ) }) test_that("common types have symmetry when mixed with unspecified input", { x <- new_table() expect_identical(vec_ptype2(x, NA), new_table()) expect_identical(vec_ptype2(NA, x), new_table()) x <- new_table(dim = c(0L, 2L)) expect_identical(vec_ptype2(x, NA), new_table(dim = c(0L, 2L))) expect_identical(vec_ptype2(NA, x), new_table(dim = c(0L, 2L))) }) test_that("`table` delegates coercion", { expect_identical( vec_ptype2(new_table(1), new_table(FALSE)), new_table(double()) ) expect_error( vec_ptype2(new_table(1), new_table("")), class = "vctrs_error_incompatible_type" ) }) # Casting ----------------------------------------------------------------- test_that("can cast to an identically shaped table", { x <- new_table(1:5, dim = 5L) y <- new_table(1:8, dim = c(2L, 2L, 2L)) expect_identical(vec_cast(x, x), x) expect_identical(vec_cast(y, y), y) }) test_that("vec_cast() can broadcast table shapes", { # We test only the dim here and not the class because on R 3.2 # the `[.table` method did not exist and `shape_broadcast()` # gives back a matrix, not a table. x <- new_table(dim = c(0L, 1L)) y <- new_table(dim = c(0L, 2L)) expect_identical(dim(vec_cast(x, y)), c(0L, 2L)) x <- new_table(dim = c(0L, 1L, 1L)) y <- new_table(dim = c(0L, 2L, 3L)) expect_identical(dim(vec_cast(x, y)), c(0L, 2L, 3L)) }) test_that("cannot decrease axis length", { x <- new_table(dim = c(0L, 3L)) y <- new_table(dim = c(0L, 1L)) expect_error( vec_cast(x, y), "Non-recyclable", class = "vctrs_error_incompatible_type" ) }) test_that("cannot decrease dimensionality", { x <- new_table(dim = c(0L, 1L, 1L)) y <- new_table(dim = c(0L, 1L)) expect_snapshot({ (expect_error(vec_cast(x, y), class = "vctrs_error_incompatible_type")) }) }) test_that("vec_cast() errors on non-tables", { expect_error( vec_cast(new_table(), 1), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(new_table(), 1L), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(new_table(), "1"), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(1, new_table()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast(1L, new_table()), class = "vctrs_error_incompatible_type" ) expect_error( vec_cast("1", new_table()), class = "vctrs_error_incompatible_type" ) }) test_that("can cast from, but not to, unspecified", { x <- new_table() expect_error(vec_cast(x, NA), class = "vctrs_error_incompatible_type") expect_identical(vec_cast(NA, x), new_table(NA_integer_, dim = 1L)) x <- new_table(dim = c(0L, 2L)) expect_error(vec_cast(x, NA), class = "vctrs_error_incompatible_type") expect_identical( vec_cast(NA, x), new_table(c(NA_integer_, NA_integer_), dim = c(1L, 2L)) ) }) test_that("`table` delegates casting", { expect_identical( vec_cast(new_table(1), new_table(FALSE)), new_table(TRUE) ) expect_error( vec_cast(new_table(1), new_table("")), class = "vctrs_error_incompatible_type" ) }) # Misc -------------------------------------------------------------------- test_that("`new_table()` validates input", { expect_error(new_table(1L, 1), "`dim` must be an integer vector") expect_error(new_table(1:2, 1L), "must match the length of `x`") }) test_that("ptype is correct", { tab1 <- new_table(1L, dim = 1L) tab2 <- new_table(1:2, dim = c(1L, 2L, 1L)) expect_identical(vec_ptype(tab1), new_table()) expect_identical(vec_ptype(tab2), new_table(dim = c(0L, 2L, 1L))) }) test_that("can use a table in `vec_c()`", { expect_identical(vec_c(new_table()), new_table()) expect_identical(vec_c(new_table(), new_table()), new_table()) x <- new_table(1:5, 5L) y <- new_table(1:4, dim = c(2L, 2L)) expect_identical(vec_c(x, x), new_table(c(1:5, 1:5), dim = 10L)) expect_identical( vec_c(y, y), new_table(c(1:2, 1:2, 3:4, 3:4), dim = c(4L, 2L)) ) expect_identical( vec_c(x, y), new_table(c(1:5, 1:2, 1:5, 3:4), dim = c(7L, 2L)) ) }) test_that("names of the first dimension are kept in `vec_c()`", { x <- new_table(1:4, c(2L, 2L)) dimnames(x) <- list(c("r1", "r2"), c("c1", "c2")) xx <- vec_c(x, x) expect_identical(dimnames(xx), list(c("r1", "r2", "r1", "r2"), NULL)) }) test_that("can use a table in `list_unchop()`", { x <- new_table(1:4, dim = c(2L, 2L)) expect_identical(list_unchop(list(x)), x) expect_identical( list_unchop(list(x, x), indices = list(1:2, 4:3)), vec_slice(x, c(1:2, 2:1)) ) }) test_that("can concatenate tables", { x <- table(1:2) out <- vec_c(x, x) exp <- new_table(rep(1L, 4), dimnames = list(c("1", "2", "1", "2"))) expect_identical(out, exp) out <- vec_rbind(x, x) exp <- data_frame(`1` = new_table(c(1L, 1L)), `2` = new_table(c(1L, 1L))) expect_identical(out, exp) y <- table(list(1:2, 3:4)) # FIXME out <- vec_c(y, y) exp <- new_table( matrix(int(1, 0, 1, 0, 0, 1, 0, 1), nrow = 4), dim = c(4L, 2L), dimnames = list(c("1", "2", "1", "2"), NULL) ) expect_identical(out, exp) out <- vec_rbind(y, y) exp <- new_data_frame( list( `3` = int(1, 0, 1, 0), `4` = int(0, 1, 0, 1) ), row.names = c("1...1", "2...2", "1...3", "2...4") ) expect_identical(out, exp) skip("FIXME: dimnames of matrices are not properly concatenated") }) test_that("can concatenate tables of type double (#1190)", { x <- table(c(1, 2)) / 2 out <- vec_c(x, x) exp <- new_table( c(0.5, 0.5, 0.5, 0.5), dimnames = list(c("1", "2", "1", "2")) ) expect_identical(out, exp) out <- vec_rbind(x, x) exp <- data_frame(`1` = new_table(c(0.5, 0.5)), `2` = new_table(c(0.5, 0.5))) expect_identical(out, exp) }) vctrs/tests/testthat/test-order.R0000644000176200001440000014707615120272011016636 0ustar liggesusers# ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("can order size zero input", { expect_identical(vec_order_radix(integer()), integer()) }) test_that("can order integers", { x <- c(2L, 3L, 1L, 5L) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:5 expect_identical(vec_order_radix(x), order(x)) }) test_that("orders correctly around the UINT8_MAX boundary", { x <- 251:255 expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1L, 3L, 1L, 3L) expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("`NA` order defaults to last", { x <- c(1L, NA_integer_, 3L) expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) }) test_that("integer, small: `NA` order can be first", { x <- c(1L, NA_integer_, 3L) expect_identical(vec_order_radix(x, na_value = "smallest"), c(2L, 1L, 3L)) }) test_that("double: `direction` can be set to `desc`", { x <- c(1, 5, 3) expect_identical(vec_order_radix(x, direction = "desc"), c(2L, 3L, 1L)) x <- c(1L, .Machine$integer.max, 3L) expect_identical(vec_order_radix(x, direction = "desc"), c(2L, 3L, 1L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3L, NA_integer_, 1L, 2L) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA_integer_, NA_integer_) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order when in expected order", { x <- c(1L, 1L, 2L, NA, NA) expect_identical( vec_order_radix(x, direction = "asc", na_value = "largest"), 1:5 ) x <- c(NA, NA, 3L, 3L, 2L) expect_identical( vec_order_radix(x, direction = "desc", na_value = "largest"), 1:5 ) x <- c(NA, NA, 1L, 1L, 2L) expect_identical( vec_order_radix(x, direction = "asc", na_value = "smallest"), 1:5 ) x <- c(3L, 3L, 2L, NA, NA) expect_identical( vec_order_radix(x, direction = "desc", na_value = "smallest"), 1:5 ) }) test_that("can order when in strictly opposite of expected order (no ties)", { x <- c(NA, 2L, 1L) expect_identical( vec_order_radix(x, direction = "asc", na_value = "largest"), 3:1 ) x <- c(1L, 2L, NA) expect_identical( vec_order_radix(x, direction = "desc", na_value = "largest"), 3:1 ) x <- c(2L, 1L, NA) expect_identical( vec_order_radix(x, direction = "asc", na_value = "smallest"), 3:1 ) x <- c(NA, 1L, 2L) expect_identical( vec_order_radix(x, direction = "desc", na_value = "smallest"), 3:1 ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - counting # To trigger counting ordering, get above the insertion order boundary and then # have a range less than the counting order range boundary. test_that("can order integers with counting order", { x <- (ORDER_INSERTION_BOUNDARY + 1L):1L expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:(ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1:ORDER_INSERTION_BOUNDARY, 1L) expect_identical( vec_order_radix(x)[1:2], c(1L, ORDER_INSERTION_BOUNDARY + 1L) ) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3L, NA_integer_, 1L, 2L, 1:ORDER_INSERTION_BOUNDARY) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix # To trigger radix ordering, get above the insertion order boundary and then # have a range greater than the counting order range boundary. test_that("can order integers with radix order", { x <- c(INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L, 1:ORDER_INSERTION_BOUNDARY) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- c(1:ORDER_INSERTION_BOUNDARY, INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L) expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1:ORDER_INSERTION_BOUNDARY, 1L, INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L) expect_identical( vec_order_radix(x)[1:2], c(1L, ORDER_INSERTION_BOUNDARY + 1L) ) }) test_that("all combinations of `direction` and `na_value` work", { x <- c( 3L, NA_integer_, 1L, 2L, 1:ORDER_INSERTION_BOUNDARY, INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("can order all 1 value", { x <- rep(1L, ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), base_order(x)) expect_identical( vec_order_radix(x, direction = "desc"), base_order(x, decreasing = TRUE) ) }) test_that("all `NA` values works - ensures that we can compute the 'range' of all NAs", { x <- rep(NA_integer_, ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), base_order(x)) expect_identical( vec_order_radix(x, direction = "desc"), base_order(x, decreasing = TRUE) ) }) test_that("can order with many NAs first", { x <- c(rep(NA_integer_, ORDER_INSERTION_BOUNDARY + 1L), 2L) expect_identical(vec_order_radix(x), base_order(x)) expect_identical( vec_order_radix(x, na_value = "smallest"), base_order(x, na.last = FALSE) ) }) test_that("subtraction in counting order range computation works correctly (#1399)", { x <- c(rep(1L, ORDER_INSERTION_BOUNDARY), -2147483647L) expect_identical(vec_order_radix(x), base_order(x)) }) # ------------------------------------------------------------------------------ # vec_order_radix() # Really this just goes through the integer infrastructure. Just checking that # it is working. test_that("can order size zero input", { expect_identical(vec_order_radix(logical()), integer()) }) test_that("can order logicals", { x <- c(FALSE, TRUE, FALSE) expect_identical(vec_order_radix(x), order(x)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(TRUE, NA, FALSE) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA, NA) expect_identical(vec_order_radix(x), order(x)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("can order size zero input", { expect_identical(vec_order_radix(double()), integer()) }) test_that("can order doubles", { x <- c(2, 3, 1, 5) expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:5 + 0 expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1, 3, 1, 3) expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("`NA` order defaults to last", { x <- c(1, NA_real_, 3) expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) }) test_that("double: `NA` order can be first", { x <- c(1, NA_real_, 3) expect_identical(vec_order_radix(x, na_value = "smallest"), c(2L, 1L, 3L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3, NA_real_, 1, 2) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA_real_, NA_real_) expect_identical(vec_order_radix(x), order(x)) }) test_that("NA_real_ and NaN look identical for ordering", { x <- c(NA_real_, NaN) expect_identical(vec_order_radix(x, na_value = "largest"), c(1L, 2L)) expect_identical(vec_order_radix(x, na_value = "smallest"), c(1L, 2L)) }) test_that("double: -Inf / Inf order correctly", { x <- c(0, -Inf, Inf) expect_identical(vec_order_radix(x, direction = "asc"), c(2L, 1L, 3L)) expect_identical(vec_order_radix(x, direction = "desc"), c(3L, 1L, 2L)) }) test_that("double: -0 and 0 order identically / stably", { x <- c(0, -0) expect_identical(vec_order_radix(x, direction = "desc"), c(1L, 2L)) expect_identical(vec_order_radix(x, direction = "asc"), c(1L, 2L)) }) test_that("can order when in expected order", { x <- c(1, 1, 2, NA, NaN) expect_identical( vec_order_radix(x, direction = "asc", na_value = "largest"), 1:5 ) x <- c(NA, NaN, 3, 3, 2) expect_identical( vec_order_radix(x, direction = "desc", na_value = "largest"), 1:5 ) x <- c(NA, NaN, 1, 1, 2) expect_identical( vec_order_radix(x, direction = "asc", na_value = "smallest"), 1:5 ) x <- c(3, 3, 2, NA, NaN) expect_identical( vec_order_radix(x, direction = "desc", na_value = "smallest"), 1:5 ) }) test_that("can order when in expected order - using distinct NaN values", { x <- c(1, 1, 2, NaN, NA) expect_identical( vec_order_radix( x, direction = "asc", na_value = "largest", nan_distinct = TRUE ), 1:5 ) x <- c(NA, NaN, 3, 3, 2) expect_identical( vec_order_radix( x, direction = "desc", na_value = "largest", nan_distinct = TRUE ), 1:5 ) x <- c(NA, NaN, 1, 1, 2) expect_identical( vec_order_radix( x, direction = "asc", na_value = "smallest", nan_distinct = TRUE ), 1:5 ) x <- c(3, 3, 2, NaN, NA) expect_identical( vec_order_radix( x, direction = "desc", na_value = "smallest", nan_distinct = TRUE ), 1:5 ) }) test_that("can order when in strictly opposite of expected order (no ties)", { x <- c(NA, 2, 1) expect_identical( vec_order_radix(x, direction = "asc", na_value = "largest"), 3:1 ) x <- c(1, 2, NA) expect_identical( vec_order_radix(x, direction = "desc", na_value = "largest"), 3:1 ) x <- c(2, 1, NA) expect_identical( vec_order_radix(x, direction = "asc", na_value = "smallest"), 3:1 ) x <- c(NA, 1, 2) expect_identical( vec_order_radix(x, direction = "desc", na_value = "smallest"), 3:1 ) }) test_that("can order when in strictly opposite of expected order (no ties) - using distinct NaN values", { x <- c(NA, NaN, 2, 1) expect_identical( vec_order_radix( x, direction = "asc", na_value = "largest", nan_distinct = TRUE ), 4:1 ) x <- c(1, 2, NaN, NA) expect_identical( vec_order_radix( x, direction = "desc", na_value = "largest", nan_distinct = TRUE ), 4:1 ) x <- c(2, 1, NaN, NA) expect_identical( vec_order_radix( x, direction = "asc", na_value = "smallest", nan_distinct = TRUE ), 4:1 ) x <- c(NA, NaN, 1, 2) expect_identical( vec_order_radix( x, direction = "desc", na_value = "smallest", nan_distinct = TRUE ), 4:1 ) }) test_that("NaN is always placed next to numbers when treated as distinct", { x <- c(1, 2, NA, NaN) expect_identical( vec_order_radix( x, direction = "asc", na_value = "largest", nan_distinct = TRUE ), c(1L, 2L, 4L, 3L) ) expect_identical( vec_order_radix( x, direction = "asc", na_value = "smallest", nan_distinct = TRUE ), c(3L, 4L, 1L, 2L) ) expect_identical( vec_order_radix( x, direction = "desc", na_value = "largest", nan_distinct = TRUE ), c(3L, 4L, 2L, 1L) ) expect_identical( vec_order_radix( x, direction = "desc", na_value = "smallest", nan_distinct = TRUE ), c(2L, 1L, 4L, 3L) ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix # To trigger radix ordering, get above the insertion order boundary. There is # no intermediate counting sort for doubles. test_that("can order doubles with radix order", { x <- (ORDER_INSERTION_BOUNDARY + 1L):1L + 0 expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- 1:(ORDER_INSERTION_BOUNDARY + 1L) + 0 expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c(1:ORDER_INSERTION_BOUNDARY, 1L) + 0 expect_identical( vec_order_radix(x)[1:2], c(1L, ORDER_INSERTION_BOUNDARY + 1L) ) }) test_that("all combinations of `direction` and `na_value` work", { x <- c(3, NA_real_, 1, 2, 1:ORDER_INSERTION_BOUNDARY) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- rep(NA_real_, ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x), order(x)) }) test_that("NA_real_ and NaN generally look identical for ordering", { x <- rep(c(NA_real_, NaN), ORDER_INSERTION_BOUNDARY + 1L) expect_identical(vec_order_radix(x, na_value = "largest"), seq_along(x)) expect_identical(vec_order_radix(x, na_value = "smallest"), seq_along(x)) }) test_that("NA_real_ and NaN can be considered distinct with `nan_distinct`", { x <- rep(c(NA_real_, NaN), ORDER_INSERTION_BOUNDARY + 1L) loc_nan <- seq(2L, length(x), by = 2L) loc_na <- seq(1L, length(x), by = 2L) expect_identical( vec_order_radix(x, na_value = "largest", nan_distinct = TRUE), c(loc_nan, loc_na) ) expect_identical( vec_order_radix(x, na_value = "smallest", nan_distinct = TRUE), c(loc_na, loc_nan) ) }) test_that("-Inf / Inf order correctly", { x <- c(rep(0, ORDER_INSERTION_BOUNDARY), -Inf, Inf) expect_identical( vec_order_radix(x, direction = "asc"), order(x, decreasing = FALSE) ) expect_identical( vec_order_radix(x, direction = "desc"), order(x, decreasing = TRUE) ) }) test_that("double, large: -0 and 0 order identically / stably", { x <- c(rep(0, ORDER_INSERTION_BOUNDARY), -0) expect_identical( vec_order_radix(x, direction = "desc"), order(x, decreasing = TRUE) ) expect_identical( vec_order_radix(x, direction = "asc"), order(x, decreasing = FALSE) ) }) # ------------------------------------------------------------------------------ # vec_order_radix() test_that("can order size zero input", { expect_identical(vec_order_radix(complex()), integer()) }) test_that("can order complex", { x <- complex(real = c(3, 1, 2)) expect_identical(vec_order_radix(x), c(2L, 3L, 1L)) }) test_that("ordering on ties is done stably", { x <- complex(real = c(1, 3, 1, 3)) expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("imaginary section is used to break ties", { x <- complex( real = c(1L, 2L, 1L), imaginary = c(3L, 2L, 1L) ) expect_identical(vec_order_radix(x), c(3L, 1L, 2L)) }) test_that("can be used in a data frame", { x <- c(1L, 1L, 1L, 2L, 1L) y <- complex( real = c(1L, 2L, 1L, 3L, 1L), imaginary = c(3L, 2L, 1L, 4L, 1L) ) z <- c(1, 2, 5, 4, 3) # as second column df1 <- data.frame(x = x, y = y) # as first column df2 <- data.frame(y = y, x = x) # as second column with a third after it to break ties df3 <- data.frame(x = x, y = y, z = z) # Base R can't do radix sorting with complex expect_identical(vec_order_radix(df1), c(3L, 5L, 1L, 2L, 4L)) expect_identical(vec_order_radix(df2), c(3L, 5L, 1L, 2L, 4L)) expect_identical(vec_order_radix(df3), c(5L, 3L, 1L, 2L, 4L)) }) test_that("can be used in a data frame when all group sizes are >1", { # https://github.com/tidyverse/dplyr/issues/7708 # Group sizes of 3 (1L) and 2 (2L) x <- c(1L, 2L, 1L, 2L, 1L) y <- complex( # Group sizes of 3 (1L, 1L) and 2 (2L, 2L) real = c(1L, 2L, 1L, 2L, 1L), # This breaks ties within each group imaginary = c(3L, 2L, 2L, 4L, 1L) ) df <- data.frame(x = x, y = y) expect_identical(vec_order_radix(df), c(5L, 3L, 1L, 2L, 4L)) }) test_that("can be used in a data frame as the third column", { # Testing that `complex_first_pass` is reset between columns x <- c(1L, 1L, 1L, 2L, 2L) y <- complex( real = x, # This breaks one tie imaginary = c(1L, 2L, 1L, 2L, 2L) ) z <- complex( # This breaks one tie real = c(3L, 2L, 1L, 2L, 2L), # This breaks one tie imaginary = c(3L, 2L, 1L, 2L, 1L) ) df <- data.frame(x = x, y = y, z = z) expect_identical(vec_order_radix(df), c(3L, 1L, 2L, 5L, 4L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- complex(real = c(3, NA, 1.5, 2, NA), imaginary = c(1, 1, 1, 1, 2)) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) # In fixing #1403, we now align with base R expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("full gambit of tests involving missing values are working as expected (#1403)", { x <- complex( real = c(NaN, NA, NA, NA, NaN, NaN, 1, 1, 1, 2), imaginary = c(NA, NA, NaN, 1, NaN, 1, NA, NaN, 1, NA) ) df <- data_frame(a = rep(1L, length(x)), x = x) # {number}, {NaN}, {NaN + NA}, {NA + NaN}, {NA} expect <- c(9L, 5L, 6L, 8L, 1L, 3L, 2L, 4L, 7L, 10L) expect_identical( vec_order_radix( x, direction = "asc", na_value = "largest", nan_distinct = TRUE ), expect ) expect_identical( vec_order_radix( x, direction = "desc", na_value = "smallest", nan_distinct = TRUE ), expect ) expect_identical( vec_order_radix( df, direction = "asc", na_value = "largest", nan_distinct = TRUE ), expect ) expect_identical( vec_order_radix( df, direction = "desc", na_value = "smallest", nan_distinct = TRUE ), expect ) # {NA}, {NA + NaN}, {NaN + NA}, {NaN}, {number} expect <- c(2L, 4L, 7L, 10L, 3L, 1L, 5L, 6L, 8L, 9L) expect_identical( vec_order_radix( x, direction = "asc", na_value = "smallest", nan_distinct = TRUE ), expect ) expect_identical( vec_order_radix( x, direction = "desc", na_value = "largest", nan_distinct = TRUE ), expect ) expect_identical( vec_order_radix( df, direction = "asc", na_value = "smallest", nan_distinct = TRUE ), expect ) expect_identical( vec_order_radix( df, direction = "desc", na_value = "largest", nan_distinct = TRUE ), expect ) # {number}, {NA or NaN} expect <- c(9L, 1:7, 8L, 10L) expect_identical( vec_order_radix( x, direction = "asc", na_value = "largest", nan_distinct = FALSE ), expect ) expect_identical( vec_order_radix( x, direction = "desc", na_value = "smallest", nan_distinct = FALSE ), expect ) expect_identical( vec_order_radix( df, direction = "asc", na_value = "largest", nan_distinct = FALSE ), expect ) expect_identical( vec_order_radix( df, direction = "desc", na_value = "smallest", nan_distinct = FALSE ), expect ) # {NA or NaN}, {number} expect <- c(1:8, 10L, 9L) expect_identical( vec_order_radix( x, direction = "asc", na_value = "smallest", nan_distinct = FALSE ), expect ) expect_identical( vec_order_radix( x, direction = "desc", na_value = "largest", nan_distinct = FALSE ), expect ) expect_identical( vec_order_radix( df, direction = "asc", na_value = "smallest", nan_distinct = FALSE ), expect ) expect_identical( vec_order_radix( df, direction = "desc", na_value = "largest", nan_distinct = FALSE ), expect ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("can order size zero input", { expect_identical(vec_order_radix(character()), integer()) }) test_that("can order characters", { x <- c("xy", "x", "a", "bc") expect_identical(vec_order_radix(x), order(x)) }) test_that("can order sorted vector", { x <- c("a", "b", "c") expect_identical(vec_order_radix(x), order(x)) }) test_that("ordering on ties is done stably", { x <- c("ab", "ba", "ab", "ba") expect_identical(vec_order_radix(x)[1:2], c(1L, 3L)) expect_identical(vec_order_radix(x)[3:4], c(2L, 4L)) }) test_that("`NA` order defaults to last", { x <- c("x", NA_character_, "y") expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) }) test_that("character, small: `NA` order can be first", { x <- c("x", NA_character_, "y") expect_identical(vec_order_radix(x, na_value = "smallest"), c(2L, 1L, 3L)) }) test_that("character, small: `direction` can be set to `desc`", { x <- c("x", "abcde", "yz") expect_identical(vec_order_radix(x, direction = "desc"), c(3L, 1L, 2L)) }) test_that("all combinations of `direction` and `na_value` work", { x <- c("aaa", NA_character_, "a", "aa") expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[order(x, na.last = TRUE, decreasing = TRUE)] ) }) test_that("all `NA` values works", { x <- c(NA_character_, NA_character_) expect_identical(vec_order_radix(x), order(x)) }) test_that("all `''` values works", { x <- c("", "") expect_identical(vec_order_radix(x), order(x)) }) test_that("can order empty string vs ASCII value 1 'Start of Header'", { x <- c("\001", "") expect_identical(vec_order_radix(x), c(2L, 1L)) }) test_that("can be used in a data frame", { x <- c(1L, 4L, 1L, 3L, 1L) y <- c("zy", "zz", "abcd", "gfa", "zy") z <- c(1, 2, 5, 4, 3) # as second column df1 <- data.frame(x = x, y = y) # as first column df2 <- data.frame(y = y, x = x) # as second column with a third after it to break ties df3 <- data.frame(x = x, y = y, z = z) expect_identical(vec_order_radix(df1), base_order(df1)) expect_identical(vec_order_radix(df2), base_order(df2)) expect_identical(vec_order_radix(df3), base_order(df3)) }) test_that("can have multiple character columns in a data frame", { df <- data.frame( x = c("def", "aba", "aba", "aba", "q"), y = c("zy", "zz", "zz", "gfa", "zy"), z = c("foo", "qux", "bar", "baz", "boo") ) expect_identical(vec_order_radix(df), base_order(df)) }) test_that("can order with varying encodings by converting to UTF-8", { encs <- encodings() x <- c(encs$utf8, encs$unknown, encs$latin1, "AC") expect_identical(vec_order_radix(x), c(4L, 1L, 2L, 3L)) expect_identical(vec_order_radix(x, direction = "desc"), c(1L, 2L, 3L, 4L)) }) test_that("can order when in expected order", { x <- c("a", "a", "b", NA, NA) expect_identical( vec_order_radix(x, direction = "asc", na_value = "largest"), 1:5 ) x <- c(NA, NA, "c", "c", "b") expect_identical( vec_order_radix(x, direction = "desc", na_value = "largest"), 1:5 ) x <- c(NA, NA, "a", "a", "b") expect_identical( vec_order_radix(x, direction = "asc", na_value = "smallest"), 1:5 ) x <- c("c", "c", "b", NA, NA) expect_identical( vec_order_radix(x, direction = "desc", na_value = "smallest"), 1:5 ) }) test_that("can order when in strictly opposite of expected order (no ties)", { x <- c(NA, "b", "a") expect_identical( vec_order_radix(x, direction = "asc", na_value = "largest"), 3:1 ) x <- c("a", "b", NA) expect_identical( vec_order_radix(x, direction = "desc", na_value = "largest"), 3:1 ) x <- c("b", "a", NA) expect_identical( vec_order_radix(x, direction = "asc", na_value = "smallest"), 3:1 ) x <- c(NA, "a", "b") expect_identical( vec_order_radix(x, direction = "desc", na_value = "smallest"), 3:1 ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix # Have to get the number of unique strings above the ORDER_INSERTION_BOUNDARY # to trigger radix ordering. test_that("can order character vectors", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) expect_identical(vec_order_radix(x), base_order(x)) }) test_that("ordering on ties is done stably", { x <- c(paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)), "x1") expect_identical(vec_order_radix(x)[1:2], c(1L, length(x))) }) test_that("`NA` order defaults to last", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) x <- c(x, NA_character_, "y") expect_identical(vec_order_radix(x)[length(x)], length(x) - 1L) }) test_that("character, large: `NA` order can be first", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) x <- c(x, NA_character_, "y") expect_identical( vec_order_radix(x, na_value = "smallest")[[1L]], length(x) - 1L ) }) test_that("character, large: `direction` can be set to `desc`", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) expect_identical( vec_order_radix(x, direction = "desc"), base_order(x, decreasing = TRUE) ) }) test_that("after `NA` removal, all `''` works", { # `NA` prevents it from looking sorted to start with x <- rep("", ORDER_INSERTION_BOUNDARY + 1L) x <- c(NA, x, NA) expect_identical( vec_order_radix(x, direction = "asc", na_value = "largest"), base_order(x, na.last = TRUE, decreasing = FALSE) ) expect_identical( vec_order_radix(x, direction = "desc", na_value = "largest"), base_order(x, na.last = FALSE, decreasing = TRUE) ) expect_identical( vec_order_radix(x, direction = "asc", na_value = "smallest"), base_order(x, na.last = FALSE, decreasing = FALSE) ) expect_identical( vec_order_radix(x, direction = "desc", na_value = "smallest"), base_order(x, na.last = TRUE, decreasing = TRUE) ) # `""` coming through as a "chunk" y <- vec_rep_each(c("a", "b", "c", "d", "e"), length(x)) x <- vec_rep(x, 5) df <- data_frame(y = y, x = x) expect_identical( vec_order_radix(df, direction = "asc", na_value = "largest"), base_order(df, na.last = TRUE, decreasing = FALSE) ) expect_identical( vec_order_radix(df, direction = "desc", na_value = "largest"), base_order(df, na.last = FALSE, decreasing = TRUE) ) expect_identical( vec_order_radix(df, direction = "asc", na_value = "smallest"), base_order(df, na.last = FALSE, decreasing = FALSE) ) expect_identical( vec_order_radix(df, direction = "desc", na_value = "smallest"), base_order(df, na.last = TRUE, decreasing = TRUE) ) }) test_that("mixing `''` with other strings works", { x <- rep("", ORDER_INSERTION_BOUNDARY + 1L) x <- c("xyz", x, "abc", "xyz", "a") expect_identical( vec_order_radix(x, direction = "asc"), base_order(x, decreasing = FALSE) ) expect_identical( vec_order_radix(x, direction = "desc"), base_order(x, decreasing = TRUE) ) x <- rep(c("xyz", "abc", "xyz"), ORDER_INSERTION_BOUNDARY + 1L) x <- c("", x, "") expect_identical( vec_order_radix(x, direction = "asc"), base_order(x, decreasing = FALSE) ) expect_identical( vec_order_radix(x, direction = "desc"), base_order(x, decreasing = TRUE) ) }) test_that("we aren't indexing past an individual string", { # This is why the `\0` check is practically important # - `max_string_size = 3` # - After `pass = 0` we learn nothing # - After `pass = 1` we have two groups: # - byte b: `"abc\0"`, `"abd\0"` # - byte \0: `"a\0"` # - We refuse to recurse further into the `\0` byte group, otherwise we'd # index OOB x <- c( "abc", "abd", rep("a", ORDER_INSERTION_BOUNDARY + 1) ) expect_identical(vec_order_radix(x), base_order(x)) }) test_that("all combinations of `direction` and `na_value` work", { x <- paste0("x", seq(1L, ORDER_INSERTION_BOUNDARY + 1L)) x <- c(x, NA_character_, "x", "aa", "x1") expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "asc")], x[base_order(x, na.last = TRUE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "asc")], x[base_order(x, na.last = FALSE, decreasing = FALSE)] ) expect_identical( x[vec_order_radix(x, na_value = "largest", direction = "desc")], x[base_order(x, na.last = FALSE, decreasing = TRUE)] ) expect_identical( x[vec_order_radix(x, na_value = "smallest", direction = "desc")], x[base_order(x, na.last = TRUE, decreasing = TRUE)] ) }) # ------------------------------------------------------------------------------ # vec_order_radix() test_that("list elements are ordered by first appearance", { expect_identical(vec_order_radix(list(1:2, "a", 1:2)), c(1L, 3L, 2L)) }) test_that("missing values in lists are respected (#1401)", { x <- list(1, NULL, 2, NULL) expect_identical(vec_order_radix(x, na_value = "largest"), c(1L, 3L, 2L, 4L)) expect_identical(vec_order_radix(x, na_value = "smallest"), c(2L, 4L, 1L, 3L)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - insertion test_that("data frame with no columns and no rows returns integer()", { x <- data.frame() expect_identical(vec_order_radix(x), integer()) }) test_that("data frame with no columns and some rows returns sequential rows", { x <- new_data_frame(n = 5L) expect_identical(vec_order_radix(x), 1:5) }) test_that("can order with multiple pre-sorted keys", { df <- data.frame(x = 1:2, y = 3:4) expect_identical(vec_order_radix(df), 1:2) }) test_that("first column has ordering presedence", { df <- data.frame(x = c(3L, 2L, 1L), y = c(1L, 2L, 3L)) expect_identical(vec_order_radix(df), 3:1) }) test_that("secondary columns break ties - integer", { df <- data.frame( x = c(1L, 2L, 1L), y = c(3L, 2L, 1L) ) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) }) test_that("secondary columns break ties - double", { df <- data.frame( x = c(1, 2, 1), y = c(3, 2, 1) ) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) }) test_that("secondary columns break ties - logical", { df <- data.frame( x = c(FALSE, TRUE, FALSE), y = c(TRUE, TRUE, FALSE) ) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) }) test_that("orders correctly when first column is already ordered but second isn't", { df <- data.frame( x = c(1L, 1L, 2L, 2L), y = c(3L, 2L, 4L, 1L) ) expect_identical(vec_order_radix(df), c(2L, 1L, 4L, 3L)) }) test_that("orders correctly when first column is already ordered but second isn't - character", { df <- data.frame( x = c("a", "a", "b", "b"), y = c("c", "b", "d", "a") ) expect_identical(vec_order_radix(df), c(2L, 1L, 4L, 3L)) }) test_that("`direction` is recycled", { df <- data.frame( x = c(1L, 1L, 2L, 2L), y = c(3L, 2L, 4L, 1L) ) expect_identical(vec_order_radix(df, direction = "desc"), c(3L, 4L, 1L, 2L)) }) test_that("`na_value` is recycled", { df <- data.frame( x = c(1L, 1L, 2L, 2L, NA), y = c(3L, 2L, 4L, 1L, NA) ) expect_identical( vec_order_radix(df, na_value = "smallest"), c(5L, 2L, 1L, 4L, 3L) ) }) test_that("`direction` can be a vector", { df <- data.frame( x = c(1L, 1L, 2L, 2L, NA, 1L), y = c(3L, 2L, 4L, 1L, 3L, NA) ) expect_identical( vec_order_radix(df, direction = c("desc", "asc")), c(5L, 4L, 3L, 2L, 1L, 6L) ) }) test_that("`na_value` can be a vector", { df <- data.frame( x = c(1L, 1L, 2L, 2L, NA, NA), y = c(3L, 2L, 4L, 1L, NA, 2) ) expect_identical( vec_order_radix(df, na_value = c("smallest", "largest")), c(6L, 5L, 2L, 1L, 4L, 3L) ) }) test_that("`na_value` and `direction` can both be vectors", { df <- data.frame( x = c(1L, 1L, 2L, 2L, NA, NA), y = c(3L, 2L, 4L, 1L, NA, 2) ) expect_identical( vec_order_radix( df, direction = c("desc", "asc"), na_value = c("smallest", "largest") ), c(4:1, 6:5) ) }) test_that("`direction` is recycled right with array columns (#1753)", { df <- data_frame( x = matrix(c(1, 1, 1, 3, 2, 2), ncol = 2), y = 3:1 ) expect_identical( vec_order_radix(df, direction = c("asc", "desc")), c(2L, 3L, 1L) ) expect_snapshot(error = TRUE, { vec_order_radix(df, direction = c("asc", "desc", "desc")) }) df <- data_frame( x = array(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 3, 3), dim = c(3, 2, 2)), y = 3:1 ) expect_identical( vec_order_radix(df, direction = c("asc", "desc")), c(2L, 3L, 1L) ) }) test_that("`na_value` is recycled right with array columns (#1753)", { df <- data_frame( x = matrix(c(1, 1, 1, 3, NA, 2), ncol = 2), y = 3:1 ) expect_identical( vec_order_radix(df, na_value = c("largest", "smallest")), c(3L, 1L, 2L) ) expect_identical( vec_order_radix(df, na_value = c("smallest", "largest")), c(2L, 3L, 1L) ) expect_snapshot(error = TRUE, { vec_order_radix(df, direction = c("smallest", "largest", "largest")) }) df <- data_frame( x = array(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, NA, 3), dim = c(3, 2, 2)), y = 3:1 ) expect_identical( vec_order_radix(df, na_value = c("largest", "smallest")), c(3L, 1L, 2L) ) expect_identical( vec_order_radix(df, na_value = c("smallest", "largest")), c(2L, 3L, 1L) ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - counting test_that("can order 2+ integer column chunks with counting sort", { half <- floor(ORDER_INSERTION_BOUNDARY / 2) + 1L quarter_low <- floor(half / 2) quarter_high <- ceiling(half / 2) df <- data.frame( x = 1L, y = c(rep(2L, quarter_low), rep(1L, quarter_high), rep(3L, half)) ) expect_identical(vec_order_radix(df), base_order(df)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - radix test_that("can order 2+ integer column chunks with radix sort", { half <- floor(ORDER_INSERTION_BOUNDARY / 2) + 1L quarter_low <- floor(half / 2) quarter_high <- ceiling(half / 2) df <- data.frame( x = 1L, y = c( rep(2L, quarter_low), rep(1L, quarter_high), rep(3L, half), INT_ORDER_COUNTING_RANGE_BOUNDARY + 1L ) ) expect_identical(vec_order_radix(df), base_order(df)) }) test_that("can order 2+ double column chunks with radix sort", { half <- floor(ORDER_INSERTION_BOUNDARY / 2) + 1L quarter_low <- floor(half / 2) quarter_high <- ceiling(half / 2) df <- data.frame( x = 1, y = c( rep(2, quarter_low), rep(1, quarter_high), rep(3, half), INT_ORDER_COUNTING_RANGE_BOUNDARY + 1 ) ) expect_identical(vec_order_radix(df), base_order(df)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - chr_proxy_collate test_that("`chr_proxy_collate` transforms string input", { x <- c("b", "a", "A") expect_identical( vec_order_radix(x, chr_proxy_collate = tolower), c(2L, 3L, 1L) ) expect_identical( vec_order_radix(x, chr_proxy_collate = ~ tolower(.x)), c(2L, 3L, 1L) ) }) test_that("`chr_proxy_collate` works with data frame columns and is applied to all string columns", { df <- data_frame(x = c(1, 1, 1), y = c("B", "a", "a"), z = c("a", "D", "c")) expect_identical( vec_order_radix(df, chr_proxy_collate = tolower), c(3L, 2L, 1L) ) }) test_that("`chr_proxy_collate` is validated", { expect_error( vec_order_radix("x", chr_proxy_collate = 1), "Can't convert `chr_proxy_collate` to a function" ) expect_error( vec_order_radix("x", chr_proxy_collate = ~ c("y", "z")), "1, not 2" ) expect_error(vec_order_radix("x", chr_proxy_collate = ~1), "character vector") expect_error(vec_order_radix("x", chr_proxy_collate = function() { "y" })) }) test_that("`chr_proxy_collate` can return bytes-encoded strings (like `stringi::stri_sort_key()`)", { x <- c("A", "a", "b", "B") # Mimic stringi::stri_sort_key(x, locale = "en") sort_key <- function(x) { # dput(lapply(stringi::stri_sort_key(x, locale = "en"), charToRaw)) out <- list( as.raw(c(0x2a, 0x01, 0x05, 0x01, 0xdc)), as.raw(c(0x2a, 0x01, 0x05, 0x01, 0x05)), as.raw(c(0x2c, 0x01, 0x05, 0x01, 0x05)), as.raw(c(0x2c, 0x01, 0x05, 0x01, 0xdc)) ) out <- vapply(out, FUN.VALUE = character(1), function(x) { # Uses native encoding x <- rawToChar(x) Encoding(x) <- "bytes" x }) out } expect_identical( vec_order_radix(x, chr_proxy_collate = sort_key), c(2L, 1L, 3L, 4L) ) }) # ------------------------------------------------------------------------------ # vec_order_radix() - error checking test_that("`na_value` is checked", { expect_error( vec_order_radix(1L, na_value = "x"), "\"largest\" or \"smallest\"" ) expect_error( vec_order_radix(1L, na_value = c(TRUE, TRUE)), "must be a character vector" ) expect_error( vec_order_radix(1L, na_value = NA_character_), "can't be missing" ) }) test_that("`direction` is checked", { expect_error(vec_order_radix(1L, direction = "x"), "must contain only") expect_error(vec_order_radix(1L, direction = c("asc", "asc")), "single value") expect_error( vec_order_radix(1L, direction = NA_character_), "can't be missing" ) expect_error( vec_order_radix(data.frame(x = 1), direction = c("asc", "asc")), "length 1 or" ) }) test_that("`x` is checked", { expect_error(vec_order_radix(foobar()), class = "vctrs_error_scalar_type") }) # ------------------------------------------------------------------------------ # vec_order_radix() - groups test_that("groups can be reallocated if we exceed the max group data size", { set.seed(123) # The first column has all unique groups so 1 more than the default group # data size is needed and will be reallocated on the fly df <- data.frame( x = sample(GROUP_DATA_SIZE_DEFAULT + 1L, replace = TRUE), y = sample(GROUP_DATA_SIZE_DEFAULT + 1L, replace = TRUE), z = sample(GROUP_DATA_SIZE_DEFAULT + 1L, replace = TRUE) ) expect_identical(vec_order_radix(df), base_order(df)) }) # ------------------------------------------------------------------------------ # vec_order_radix() - comparison proxy test_that("ordering works with rcrd types", { x <- tuple(c(1, 2, 1), c(3, 2, 1)) expect_identical(vec_order_radix(x), c(3L, 1L, 2L)) }) test_that("data frame comparison proxies don't allow vector `direction` or `na_value`", { x <- tuple(c(1, 2, 1), c(3, 2, 1)) expect_error(vec_order_radix(x, direction = c("desc", "asc")), "single value") expect_error( vec_order_radix(x, na_value = c("largest", "smallest")), "single value" ) }) test_that("ordering works with df-cols", { df_col <- new_data_frame(list(y = c(2, 1, 2), z = c(3, 3, 3))) df <- new_data_frame(list(x = c(1, 1, 1), y = df_col)) expect_identical(vec_order_radix(df), c(2L, 1L, 3L)) # Can only supply a max of 2 `direction` or `na_value` values which get internally # expanded to 3 to match the flattened df proxy expect_identical( vec_order_radix(df, direction = c("asc", "desc")), c(1L, 3L, 2L) ) expect_error( vec_order_radix(df, direction = c("desc", "desc", "asc")), "or length equal to" ) }) test_that("ordering works with df-cols with 0 cols", { df_col <- new_data_frame(list(), n = 3L) df <- new_data_frame(list(x = c(1, 3, 1), y = df_col, z = c(2, 1, 1))) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) # Can supply 3 `direction` values even though the 0-col df-col gets dropped expect_identical( vec_order_radix(df, direction = c("asc", "desc", "desc")), c(1L, 3L, 2L) ) expect_error( vec_order_radix(df, direction = c("desc", "asc")), "or length equal to" ) }) test_that("ordering works with rcrd cols", { y <- tuple(c(1, 2, 1), c(3, 2, 1)) df <- new_data_frame(list(z = c(1, 1, 1), y = y)) expect_identical(vec_order_radix(df), c(3L, 1L, 2L)) # Can only supply a max of 2 `direction` values which get internally # expanded to 3 to match the flattened df proxy expect_identical( vec_order_radix(df, direction = c("asc", "desc")), c(2L, 1L, 3L) ) expect_error( vec_order_radix(df, direction = c("desc", "desc", "asc")), "or length equal to" ) }) # ------------------------------------------------------------------------------ # `vec_order_radix()` - Pre-existing tests test_that("can request NAs sorted first", { expect_equal( vec_order_radix(c(1, NA), direction = "asc", na_value = "largest"), 1:2 ) expect_equal( vec_order_radix(c(1, NA), direction = "desc", na_value = "largest"), 2:1 ) expect_equal( vec_order_radix(c(1, NA), direction = "asc", na_value = "smallest"), 2:1 ) expect_equal( vec_order_radix(c(1, NA), direction = "desc", na_value = "smallest"), 1:2 ) }) test_that("can sort data frames", { df <- data.frame(x = c(1, 2, 1), y = c(1, 2, 2)) out1 <- vec_sort(df) expect_equal(out1, data.frame(x = c(1, 1, 2), y = c(1, 2, 2))) out2 <- vec_sort(df, direction = "desc") expect_equal(out2, data.frame(x = c(2, 1, 1), y = c(2, 2, 1))) }) test_that("can sort empty data frames (#356)", { df1 <- data.frame() expect_equal(vec_sort(df1), df1) df2 <- data.frame(x = numeric(), y = integer()) expect_equal(vec_sort(df2), df2) }) test_that("can order tibbles that contain non-comparable objects", { expect_equal(vec_order_radix(data_frame(x = list(10, 2, 1))), 1:3) }) test_that("can order matrices and arrays (#306)", { x <- matrix(c(1, 1, 1, 1, 2, 1), ncol = 2) expect_identical(vec_order_radix(x), c(1L, 3L, 2L)) x <- array(1:8, c(2, 2, 2)) x[2] <- 1 x[3] <- 5 expect_identical(vec_order_radix(x), 2:1) }) test_that("can order empty data frames (#356)", { df1 <- data.frame() expect_equal(vec_order_radix(df1), integer()) df2 <- data.frame(x = numeric(), y = integer()) expect_equal(vec_order_radix(df2), integer()) }) test_that("can order data frames with data frame columns (#527)", { expect_equal( vec_order_radix(iris), vec_order_radix(data_frame(iris = iris)) ) }) test_that("can order data frames (and subclasses) with matrix columns", { df <- new_data_frame(n = 2L) df$x <- new_data_frame(list(y = matrix(1:2, 2))) expect_identical(vec_order_radix(df), 1:2) df$x <- tibble::tibble(y = matrix(1:2, 2)) expect_identical(vec_order_radix(df), 1:2) }) # ------------------------------------------------------------------------------ # vec_locate_sorted_groups() test_that("`vec_locate_sorted_groups()` is working", { x <- c(1, 3, 1, 5, 2, 5, 1) expect <- new_data_frame( list( key = c(1, 2, 3, 5), loc = list(c(1L, 3L, 7L), 5L, 2L, c(4L, 6L)) ) ) expect_identical(vec_locate_sorted_groups(x), expect) }) test_that("`chr_proxy_collate` can result in keys being seen as identical", { x <- c("b", "A", "a") y <- c("b", "a", "A") x_expect <- data_frame(key = c("A", "b"), loc = list(c(2L, 3L), 1L)) y_expect <- data_frame(key = c("a", "b"), loc = list(c(2L, 3L), 1L)) expect_identical( vec_locate_sorted_groups(x, chr_proxy_collate = tolower), x_expect ) expect_identical( vec_locate_sorted_groups(y, chr_proxy_collate = tolower), y_expect ) }) # ------------------------------------------------------------------------------ # `vec_order_info(nan_distinct = FALSE)` test_that("Indistinct NA and NaN are reported in the same group", { x <- c(NA, NaN) info <- vec_order_info(x, nan_distinct = FALSE) expect_identical(info[[1]], c(1L, 2L)) expect_identical(info[[2]], 2L) expect_identical(info[[3]], 2L) }) # ------------------------------------------------------------------------------ # `vec_order_info()` test_that("Zero column data frames with >0 rows work (#1863)", { # All rows are treated as being from the same group x <- data_frame(.size = 5) info <- vec_order_info(x) expect_identical(info[[1]], 1:5) # Order expect_identical(info[[2]], 5L) # Group sizes expect_identical(info[[3]], 5L) # Max group size }) test_that("Zero column data frames with exactly 0 rows work (#1863)", { # This is a particularly special case, since we don't actually push a group size x <- data_frame(.size = 0L) info <- vec_order_info(x) expect_identical(info[[1]], integer()) expect_identical(info[[2]], integer()) expect_identical(info[[3]], 0L) }) # ------------------------------------------------------------------------------ # vec_sort test_that("can sort data frames", { df <- data.frame(x = c(1, 2, 1), y = c(1, 2, 2)) out1 <- vec_sort(df) expect_equal(out1, data.frame(x = c(1, 1, 2), y = c(1, 2, 2))) out2 <- vec_sort(df, direction = "desc") expect_equal(out2, data.frame(x = c(2, 1, 1), y = c(2, 2, 1))) }) test_that("can sort empty data frames (#356)", { df1 <- data.frame() expect_equal(vec_sort(df1), df1) df2 <- data.frame(x = numeric(), y = integer()) expect_equal(vec_sort(df2), df2) }) # ------------------------------------------------------------------------------ # vec_order test_that("can request NAs sorted first", { expect_equal( vec_order(c(1, NA), direction = "asc", na_value = "largest"), 1:2 ) expect_equal( vec_order(c(1, NA), direction = "desc", na_value = "largest"), 2:1 ) expect_equal( vec_order(c(1, NA), direction = "asc", na_value = "smallest"), 2:1 ) expect_equal( vec_order(c(1, NA), direction = "desc", na_value = "smallest"), 1:2 ) }) test_that("can order complex vectors", { x <- complex(real = c(1, 2, 2, 3, 3), imaginary = c(5, 4, 3, 2, NA)) expect_equal( vec_order(x, direction = "asc", na_value = "largest"), c(1, 3, 2, 4, 5) ) expect_equal( vec_order(x, direction = "desc", na_value = "largest"), rev(c(1, 3, 2, 4, 5)) ) expect_equal( vec_order(x, direction = "asc", na_value = "smallest"), c(5, 1, 3, 2, 4) ) expect_equal( vec_order(x, direction = "desc", na_value = "smallest"), rev(c(5, 1, 3, 2, 4)) ) }) test_that("can order tibbles that contain non-comparable objects", { expect_equal(vec_order(data_frame(x = list(10, 2, 1))), 1:3) }) test_that("can order matrices and arrays (#306)", { x <- matrix(c(1, 1, 1, 1, 2, 1), ncol = 2) expect_identical(vec_order(x), c(1L, 3L, 2L)) x <- array(1:8, c(2, 2, 2)) x[2] <- 1 x[3] <- 5 expect_identical(vec_order(x), 2:1) }) test_that("can order zero column data frames (#356, #1499)", { df <- data_frame() expect_identical(vec_order(df), integer()) df <- data_frame(.size = 5L) expect_identical(vec_order(df), 1:5) df <- data_frame(.size = 5L) expect_identical(vec_order(df, direction = "desc"), 1:5) }) test_that("can order zero row data frames (#356, #1499)", { df <- data.frame(x = numeric()) expect_identical(vec_order(df), integer()) df <- data.frame(x = numeric(), y = integer()) expect_identical(vec_order(df), integer()) df <- data.frame(x = numeric(), y = integer()) expect_identical(vec_order(df, direction = "desc"), integer()) }) test_that("can order data frames with data frame columns (#527)", { expect_equal( vec_order(iris), vec_order(data_frame(iris = iris)) ) }) test_that("can order data frames (and subclasses) with matrix columns", { df <- new_data_frame(n = 2L) df$x <- new_data_frame(list(y = matrix(1:2, 2))) expect_identical(vec_order(df), 1:2) df$x <- tibble::tibble(y = matrix(1:2, 2)) expect_identical(vec_order(df), 1:2) }) test_that("classed proxies do not affect performance (tidyverse/dplyr#5423)", { skip_on_cran() x <- glue::glue("{1:10000}") expect_time_lt(vec_order(x), 0.2) }) test_that("can order data frames that don't allow removing the column names (#1298)", { skip_if_not_installed("withr") local_methods( `names<-.vctrs_foobar` = function(x, value) { if (is.null(value)) { abort("Cannot remove names.") } NextMethod() } ) df <- foobar(data.frame(x = 1, y = 2)) expect_silent(expect_identical(vec_order(df), 1L)) }) test_that("missing values in lists are respected (#1401)", { x <- list(1, NULL, 2, NULL) expect_identical(vec_order(x, na_value = "largest"), c(1L, 3L, 2L, 4L)) expect_identical(vec_order(x, na_value = "smallest"), c(2L, 4L, 1L, 3L)) }) test_that("dots must be empty (#1647)", { expect_snapshot(error = TRUE, { vec_order(1, 2) }) expect_snapshot(error = TRUE, { vec_sort(1, 2) }) }) vctrs/tests/testthat/test-recode.R0000644000176200001440000004567215120277415017000 0ustar liggesuserstest_that("from_as_list_of_vectors = TRUE / to_as_list_of_vectors = TRUE", { expect_identical( vec_recode_values( x = c(1, 2, 3), from = list(c(2, 3), c(4, 1)), to = list( c("a", "b", "c"), c("d", "e", "f") ), from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE ), c("d", "b", "c") ) }) test_that("from_as_list_of_vectors = TRUE / to_as_list_of_vectors = TRUE (optimized to `from_as_list_of_vectors = FALSE`, `to_as_list_of_vectors = FALSE`)", { # First optimized to `to_as_list_of_vectors = FALSE` # Then further optimized to `from_as_list_of_vectors = FALSE` expect_identical( vec_recode_values( x = c(1, 2, 3), from = list(c(2, 3), c(4, 1)), to = list( "a", "b" ), from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE ), c("b", "a", "a") ) }) test_that("from_as_list_of_vectors = TRUE / to_as_list_of_vectors = FALSE (always optimized to `from_as_list_of_vectors = FALSE`)", { expect_identical( vec_recode_values( x = c(1, 2, 3), from = list(c(2, 3), c(4, 1)), to = c("a", "b"), from_as_list_of_vectors = TRUE, to_as_list_of_vectors = FALSE ), c("b", "a", "a") ) }) test_that("from_as_list_of_vectors = FALSE / to_as_list_of_vectors = TRUE", { expect_identical( vec_recode_values( x = c(1, 2, 3), from = c(2, 1), to = list( c("a", "b", "c"), c("d", "e", "f") ), from_as_list_of_vectors = FALSE, to_as_list_of_vectors = TRUE ), c("d", "b", NA) ) }) test_that("from_as_list_of_vectors = FALSE / to_as_list_of_vectors = TRUE (optimized to `to_as_list_of_vectors = FALSE`)", { expect_identical( vec_recode_values( x = c(1, 2, 3), from = c(2, 1), to = list( "a", "b" ), from_as_list_of_vectors = FALSE, to_as_list_of_vectors = TRUE ), c("b", "a", NA) ) }) test_that("from_as_list_of_vectors = FALSE / to_as_list_of_vectors = FALSE", { expect_identical( vec_recode_values( x = c(1, 2, 3), from = c(2, 1), to = c("a", "b"), from_as_list_of_vectors = FALSE, to_as_list_of_vectors = FALSE ), c("b", "a", NA) ) }) test_that("can treat list input as a vector (i.e. not as a container of vectors)", { # This is why we have `from_as_list_of_vectors` and `to_as_list_of_vectors` as arguments expect_identical( vec_recode_values( x = list(1, 2, 3), from = list(2, 5, 1), to = list("x", 1:2, 1:3), ), list(1:3, "x", NULL) ) }) test_that("`to` names are kept during `from_as_list_of_vectors, !to_as_list_of_vectors` optimization recycling", { expect_identical( vec_recode_values( x = c(2, 1, 4, 5), from = list(1:2, 3:4, 5:6), to = c(a = "x", b = "y", c = "z"), from_as_list_of_vectors = TRUE ), c(a = "x", a = "x", b = "y", c = "z") ) }) test_that("`to` list names are dropped with `to_as_list_of_vectors`", { # With `to_as_list_of_vectors` optimization expect_identical( vec_recode_values( x = c(1, 1), from = 1, to = list(a = "x"), to_as_list_of_vectors = TRUE ), c("x", "x") ) # Without `to_as_list_of_vectors` optimization expect_identical( vec_recode_values( x = c(1, 1), from = 1, to = list(a = c("x", "y")), to_as_list_of_vectors = TRUE ), c("x", "y") ) }) test_that("`to` is recycled to `from_size` of `to_as_list_of_vectors`", { # With `to_as_list_of_vectors` expect_identical( vec_recode_values( x = c(1, 2, 3, 1, 4), from = c(1, 2), to = list(2), to_as_list_of_vectors = TRUE ), c(2, 2, NA, 2, NA) ) expect_identical( vec_recode_values( x = c(1, 2, 3, 1, 4), from = c(1, 2), to = list(c(2, 3, 4, 5, 6)), to_as_list_of_vectors = TRUE ), c(2, 3, NA, 5, NA) ) # This doesn't make too much sense for a user to provide, but is consistent expect_identical( vec_recode_values( x = c(1, 2, 3, 1, 4), from = list(c(1, 2), 4), to = list(2), from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE ), c(2, 2, NA, 2, 2) ) # This doesn't make too much sense for a user to provide, but is consistent expect_identical( vec_recode_values( x = c(1, 2, 3, 1, 4), from = list(c(1, 2), 4), to = list(c(2, 3, 4, 5, 6)), from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE ), c(2, 3, NA, 5, 6) ) # Without `to_as_list_of_vectors` expect_identical( vec_recode_values( x = c(1, 2, 3, 1), from = c(1, 2), to = 2 ), c(2, 2, NA, 2) ) expect_identical( vec_recode_values( x = c(1, 2, 3, 1, 4), from = list(c(1, 2), 4), to = 2, from_as_list_of_vectors = TRUE ), c(2, 2, NA, 2, 2) ) }) test_that("`from` list names are dropped with `from_as_list_of_vectors`", { # With `from_as_list_of_vectors, !to_as_list_of_vectors` optimization expect_identical( vec_recode_values( x = c(1, 2), from = list(a = 1, b = c(2, 3, 4)), to = c("x", "y"), from_as_list_of_vectors = TRUE ), c("x", "y") ) # Without `from_as_list_of_vectors, !to_as_list_of_vectors` optimization expect_identical( vec_recode_values( x = c(1, 2), from = list(a = 1, b = c(2, 3, 4)), to = list(x = c("x1", "x2"), y = c("y1", "y2")), from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE ), c("x1", "y2") ) }) test_that("`vec_replace_values()` retains names of `x`", { # Mimicking `[<-` and `base::replace()`. # Note how `vec_recode_values()` "creates a new vector", # so it pulls the names from `to` and `default`. expect_identical( vec_replace_values( x = c(a = 1, b = 2, c = 0), from = c(2, 1), to = c(x = 3, y = 4) ), c(a = 4, b = 3, c = 0) ) expect_identical( vec_recode_values( x = c(a = 1, b = 2, c = 0), from = c(2, 1), to = c(x = 3, y = 4) ), c(y = 4, x = 3, NA) ) }) test_that("`unmatched` errors are correct", { expect_snapshot(error = TRUE, { vec_recode_values(c(1, 2), from = 1, to = 0, unmatched = "error") }) expect_snapshot(error = TRUE, { # `NA` must be matched! vec_recode_values(c(1, NA), from = 1, to = 0, unmatched = "error") }) expect_snapshot(error = TRUE, { # Many locations vec_recode_values(1:100, from = 1, to = 0, unmatched = "error") }) }) test_that("`x` and `from` common type errors are correct", { expect_snapshot(error = TRUE, { vec_recode_values(1, from = "a", to = 1) }) expect_snapshot(error = TRUE, { vec_recode_values( 1, from = list("a"), to = 1, from_as_list_of_vectors = TRUE ) }) }) test_that("`to` and `default` `ptype` errors are correct when it is inferred", { expect_snapshot(error = TRUE, { vec_recode_values( 1, from = 1:2, to = list(1, "x"), to_as_list_of_vectors = TRUE ) }) expect_snapshot(error = TRUE, { vec_recode_values( 1, from = 1:2, to = list(1, 2), default = "x", to_as_list_of_vectors = TRUE ) }) expect_snapshot(error = TRUE, { vec_recode_values(1, from = 1:2, to = 1, default = "x") }) }) test_that("`to` and `default` `ptype` errors are correct when it is user supplied", { expect_snapshot(error = TRUE, { vec_recode_values(1, from = 1, to = 1, ptype = foobar()) }) expect_snapshot(error = TRUE, { vec_recode_values(1, from = 1, to = 1, ptype = character()) }) expect_snapshot(error = TRUE, { vec_recode_values( 1, from = 1, to = list(a = 1), ptype = character(), to_as_list_of_vectors = TRUE ) }) expect_snapshot(error = TRUE, { vec_recode_values(1, from = 1, to = "x", default = 1, ptype = character()) }) }) test_that("`to` size is validated", { expect_snapshot(error = TRUE, { vec_recode_values(1:5, from = 1, to = 2:3) }) expect_snapshot(error = TRUE, { vec_recode_values( 1:5, from = list(1), to = 2:3, from_as_list_of_vectors = TRUE ) }) expect_snapshot(error = TRUE, { vec_recode_values( 1:5, from = 1, to = list(2, 3), to_as_list_of_vectors = TRUE ) }) expect_snapshot(error = TRUE, { vec_recode_values( 1:5, from = list(1), to = list(2, 3), from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE ) }) expect_snapshot(error = TRUE, { vec_recode_values( 1:5, from = 1, to = list(a = 2:3), to_as_list_of_vectors = TRUE ) }) }) test_that("`default` size is validated", { expect_snapshot(error = TRUE, { vec_recode_values(1:5, from = 1, to = 2, default = 1:2) }) }) test_that("`x` must be a vector", { expect_snapshot(error = TRUE, { vec_recode_values(foobar(), from = 1, to = 2, x_arg = ".x") }) }) test_that("`from` must be a vector or list of vectors", { expect_snapshot(error = TRUE, { vec_recode_values(1, from = foobar(), to = 2, from_arg = ".from") }) expect_snapshot(error = TRUE, { vec_recode_values( 1, from = 1, to = 2, from_as_list_of_vectors = TRUE, from_arg = ".from" ) }) expect_snapshot(error = TRUE, { vec_recode_values( 1, from = list(a = foobar()), to = 2, from_as_list_of_vectors = TRUE, from_arg = ".from" ) }) }) test_that("`to` must be a vector or list of vectors", { expect_snapshot(error = TRUE, { vec_recode_values(1, from = 1, to = foobar(), to_arg = ".to") }) expect_snapshot(error = TRUE, { vec_recode_values( 1, from = 1, to = 2, to_as_list_of_vectors = TRUE, to_arg = ".to" ) }) expect_snapshot(error = TRUE, { vec_recode_values( 1, from = 1, to = list(a = foobar()), to_as_list_of_vectors = TRUE, to_arg = ".to" ) }) }) test_that("`default` must be a vector", { expect_snapshot(error = TRUE, { vec_recode_values( 1, from = 1, to = 2, default = foobar(), default_arg = ".default" ) }) }) test_that("`from_as_list_of_vectors` and `to_as_list_of_vectors` are validated", { expect_snapshot(error = TRUE, { vec_recode_values(1, from = 1, to = 1, from_as_list_of_vectors = "x") }) expect_snapshot(error = TRUE, { vec_recode_values(1, from = 1, to = 1, to_as_list_of_vectors = "x") }) expect_snapshot(error = TRUE, { vec_replace_values(1, from = 1, to = 1, from_as_list_of_vectors = "x") }) expect_snapshot(error = TRUE, { vec_replace_values(1, from = 1, to = 1, to_as_list_of_vectors = "x") }) }) test_that("`unmatched` is validated", { expect_snapshot(error = TRUE, { vec_recode_values(1, from = 1, to = 1, unmatched = "e") }) }) test_that("proof that `ptype` finalization is important", { # Imagine you have an input logical vector you are remapping # and it happens to only have `NA`s x <- c(NA, NA) from <- NA to <- FALSE # If no `ptype` finalization happened, then `ptype = x` would result in # `unspecified` being the output type and these would error expect_identical( vec_recode_values(x, from = from, to = to, default = x, ptype = x), c(FALSE, FALSE) ) expect_identical( vec_replace_values(x, from = from, to = to), c(FALSE, FALSE) ) }) test_that("common `ptype` of `to` isn't finalized until `default` has been included", { # If the common type of `to` is finalized early, we get `logical`, which can't # combine with `default`'s `character` type expect_identical( vec_recode_values( x = "a", from = "a", to = list(NA), default = "x", to_as_list_of_vectors = TRUE ), NA_character_ ) expect_identical( vec_recode_values( x = "a", from = "b", to = list(NA), default = "x", to_as_list_of_vectors = TRUE ), "x" ) }) test_that("extraneous `to` attributes don't end up on the final output", { x <- c(1, 2, 3) # TODO: Ideally the attributes wouldn't show up on the output, but # `list_combine()` doesn't clear them because `vec_ptype(to)` retains # them for some reason # https://github.com/r-lib/vctrs/issues/2025 from <- c(2, 3) to <- structure(c(0, -1), foo = "bar") expect_identical( vec_recode_values(x, from = from, to = to), structure(c(NA, 0, -1), foo = "bar") ) # TODO: Ideally the attributes wouldn't show up on the output, but # `list_combine()` doesn't clear them because `vec_ptype(to)` retains # them for some reason # https://github.com/r-lib/vctrs/issues/2025 from <- 2 to <- list( structure(c(0, -1, -2), foo = "bar") ) expect_identical( vec_recode_values(x, from = from, to = to, to_as_list_of_vectors = TRUE), structure(c(NA, -1, NA), foo = "bar") ) # Note that as soon as you force a `ptype2` computation, the attributes # disappear anyways, suggesting an inconsistency # `ptype2` forced by `default` from <- c(2, 3) to <- structure(c(0, -1), foo = "bar") expect_identical( vec_recode_values(x, from = from, to = to, default = NA_real_), c(NA, 0, -1) ) # `ptype2` forced by multiple `to` values from <- c(2, 3) to <- list( structure(c(0, -1, -2), foo = "bar"), c(-3, -4, -5) ) expect_identical( vec_recode_values(x, from = from, to = to, to_as_list_of_vectors = TRUE), c(NA, -1, -5) ) }) test_that("extraneous `x` attributes don't end up on the final output", { # TODO: Ideally the attributes wouldn't show up on the output, but # `list_combine()` doesn't clear them because `vec_ptype(x)` retains # them for some reason # https://github.com/r-lib/vctrs/issues/2025 x <- structure(1, foo = "bar") expect_identical( vec_replace_values(x, from = 1, to = 2), structure(2, foo = "bar") # 2 ) }) test_that("first `from` wins when there are overlaps", { # Same as `vec_case_when()` expect_identical( vec_recode_values( x = 1, from = c(1, 1), to = c("a", "b") ), "a" ) expect_identical( vec_recode_values( x = 1, from = list(c(1, 3), c(1, 2)), to = c("a", "b"), from_as_list_of_vectors = TRUE ), "a" ) expect_identical( vec_recode_values( x = c(1, 2), from = c(1, 1), to = list(c("a", "b"), c("c", "d")), to_as_list_of_vectors = TRUE ), c("a", NA) ) expect_identical( vec_recode_values( x = c(1, 2), from = list(c(1, 3), c(1, 2)), to = list(c("a", "b"), c("c", "d")), from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE ), c("a", "d") ) }) test_that("works when `from` is a list of size 1 elements and `to` doesn't simplify", { # This is a case where we don't actually build the `from_map` because the # `from` size doesn't change as it flattens expect_identical( vec_recode_values( x = c(1, 2), from = list(1, 2), to = list(c("a", "b"), c("c", "d")), from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE ), c("a", "d") ) }) test_that("works when `to` is a length >1 vector and every element of `x` is matched by `from`", { # This is an optimized case where we directly use the index provided by # `vec_match()` to slice `to` with expect_identical( vec_recode_values( x = c(1, 2), from = c(2, 1), to = c("a", "b") ), c("b", "a") ) }) test_that("data frames - vector `from`, vector `to`", { x <- data_frame(a = 1:3, b = 3:5) from <- data_frame(a = int(3, 1), b = c(5, 3)) # Recycling to <- data_frame(c = "a", d = "y") expect_identical( vec_recode_values(x, from = from, to = to), data_frame(c = c("a", NA, "a"), d = c("y", NA, "y")) ) to <- data_frame(c = c("a", "b"), d = c("x", "y")) expect_identical( vec_recode_values(x, from = from, to = to), data_frame(c = c("b", NA, "a"), d = c("y", NA, "x")) ) # List `from`, vector `to` # List `from`, list `to` }) test_that("data frames - vector `from`, list `to`", { x <- data_frame(a = 1:3, b = 3:5) from <- data_frame(a = int(3, 1), b = c(5, 3)) # Recycling to `from` size and `x` size to <- list( data_frame(c = "a", d = "y") ) expect_identical( vec_recode_values(x, from = from, to = to, to_as_list_of_vectors = TRUE), data_frame(c = c("a", NA, "a"), d = c("y", NA, "y")) ) # Recycling to `from` size to <- list( data_frame(c = c("a", "b", "c"), d = c("x", "y", "z")) ) expect_identical( vec_recode_values(x, from = from, to = to, to_as_list_of_vectors = TRUE), data_frame(c = c("a", NA, "c"), d = c("x", NA, "z")) ) # Recycling to `x` size to <- list( data_frame(c = "a", d = "x"), data_frame(c = "b", d = "y") ) expect_identical( vec_recode_values(x, from = from, to = to, to_as_list_of_vectors = TRUE), data_frame(c = c("b", NA, "a"), d = c("y", NA, "x")) ) }) test_that("data frames - list `from`, vector `to`", { x <- data_frame(a = 1:3, b = 3:5) # Recycling `to` to `from` size (this would be strange) from <- list( data_frame(a = 1L, b = 3L), data_frame(a = 3:4, b = 5:6) ) to <- data_frame(c = "a", d = "x") expect_identical( vec_recode_values(x, from = from, to = to, from_as_list_of_vectors = TRUE), data_frame(c = c("a", NA, "a"), d = c("x", NA, "x")) ) from <- list( data_frame(a = 1L, b = 3L), data_frame(a = 3:4, b = 5:6) ) to <- data_frame(c = c("a", "b"), d = c("x", "y")) expect_identical( vec_recode_values(x, from = from, to = to, from_as_list_of_vectors = TRUE), data_frame(c = c("a", NA, "b"), d = c("x", NA, "y")) ) }) test_that("data frames - list `from`, list `to`", { x <- data_frame(a = 1:3, b = 3:5) # Recycling `to` to `from` size (this would be strange) # Recycling `to` elements to `x` size from <- list( data_frame(a = 1L, b = 3L), data_frame(a = 3:4, b = 5:6) ) to <- list( data_frame(c = "a", d = "x") ) expect_identical( vec_recode_values( x, from = from, to = to, from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE ), data_frame(c = c("a", NA, "a"), d = c("x", NA, "x")) ) # Recycling `to` elements to `x` size from <- list( data_frame(a = 1L, b = 3L), data_frame(a = 3:4, b = 5:6) ) to <- list( data_frame(c = "a", d = "x"), data_frame(c = "b", d = "y") ) expect_identical( vec_recode_values( x, from = from, to = to, from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE ), data_frame(c = c("a", NA, "b"), d = c("x", NA, "y")) ) from <- list( data_frame(a = 1L, b = 3L), data_frame(a = 3:4, b = 5:6) ) to <- list( data_frame(c = c("a", "b", "c"), d = c("x", "y", "z")), data_frame(c = c("aa", "bb", "cc"), d = c("xx", "yy", "zz")) ) expect_identical( vec_recode_values( x, from = from, to = to, from_as_list_of_vectors = TRUE, to_as_list_of_vectors = TRUE ), data_frame(c = c("a", NA, "cc"), d = c("x", NA, "zz")) ) }) vctrs/tests/testthat/test-slice.R0000644000176200001440000006441315113335375016632 0ustar liggesuserstest_that("vec_slice throws error with non-vector inputs", { expect_error(vec_slice(environment(), 1L), class = "vctrs_error_scalar_type") }) test_that("vec_slice throws error with non-vector subscripts", { expect_snapshot({ (expect_error( vec_slice(1:3, Sys.Date()), class = "vctrs_error_subscript_type" )) (expect_error( vec_slice(1:3, matrix(TRUE, nrow = 1)), class = "vctrs_error_subscript_type" )) }) }) test_that("can subset base vectors", { i <- 2:3 expect_identical(vec_slice(lgl(1, 0, 1), i), lgl(0, 1)) expect_identical(vec_slice(int(1, 2, 3), i), int(2, 3)) expect_identical(vec_slice(dbl(1, 2, 3), i), dbl(2, 3)) expect_identical(vec_slice(cpl(1, 2, 3), i), cpl(2, 3)) expect_identical(vec_slice(chr("1", "2", "3"), i), chr("2", "3")) expect_identical(vec_slice(raw2(1, 2, 3), i), raw2(2, 3)) expect_identical(vec_slice(list(1, 2, 3), i), list(2, 3)) }) test_that("can subset shaped base vectors", { i <- 2:3 mat <- as.matrix expect_identical(vec_slice(mat(lgl(1, 0, 1)), i), mat(lgl(0, 1))) expect_identical(vec_slice(mat(int(1, 2, 3)), i), mat(int(2, 3))) expect_identical(vec_slice(mat(dbl(1, 2, 3)), i), mat(dbl(2, 3))) expect_identical(vec_slice(mat(cpl(1, 2, 3)), i), mat(cpl(2, 3))) expect_identical(vec_slice(mat(chr("1", "2", "3")), i), mat(chr("2", "3"))) expect_identical(vec_slice(mat(raw2(1, 2, 3)), i), mat(raw2(2, 3))) expect_identical(vec_slice(mat(list(1, 2, 3)), i), mat(list(2, 3))) }) test_that("can subset with missing indices", { for (i in list(int(2L, NA), lgl(FALSE, TRUE, NA))) { expect_identical(vec_slice(lgl(1, 0, 1), i), lgl(0, NA)) expect_identical(vec_slice(int(1, 2, 3), i), int(2, NA)) expect_identical(vec_slice(dbl(1, 2, 3), i), dbl(2, NA)) expect_identical(vec_slice(cpl2(1, 2, 3), i), cpl2(2, NA)) expect_identical(vec_slice(chr("1", "2", "3"), i), c("2", NA)) expect_identical(vec_slice(raw2(1, 2, 3), i), raw2(2, 0)) expect_identical(vec_slice(list(1, 2, 3), i), list(2, NULL)) } }) test_that("can subset with a recycled NA", { local_name_repair_quiet() expect_identical(vec_slice(1:3, NA), int(NA, NA, NA)) expect_identical(vec_slice(new_vctr(1:3), NA), new_vctr(int(NA, NA, NA))) rownames <- rep_len("", nrow(mtcars)) rownames <- vec_as_names(rownames, repair = "unique") expect_identical( vec_slice(mtcars, NA), structure(mtcars[NA, ], row.names = rownames) ) }) test_that("can subset with a recycled TRUE", { expect_identical(vec_slice(1:3, TRUE), 1:3) expect_identical(vec_slice(mtcars, TRUE), mtcars) expect_identical(vec_slice(new_vctr(1:3), TRUE), new_vctr(1:3)) expect_identical(vec_as_location(TRUE, 2), 1:2) }) test_that("can subset with a recycled FALSE", { expect_identical(vec_slice(1:3, FALSE), int()) expect_identical(vec_slice(mtcars, FALSE), mtcars[NULL, ]) expect_identical(vec_slice(new_vctr(1:3), FALSE), new_vctr(integer())) }) test_that("can't index beyond the end of a vector", { expect_snapshot({ (expect_error(vec_slice(1:2, 3L), class = "vctrs_error_subscript_oob")) (expect_error(vec_slice(1:2, -3L), class = "vctrs_error_subscript_oob")) }) }) test_that("slicing non existing elements fails", { expect_error( vec_as_location("foo", 1L, "f"), class = "vctrs_error_subscript_oob" ) expect_error(vec_slice(c(f = 1), "foo"), class = "vctrs_error_subscript_oob") }) test_that("can subset object of any dimensionality", { x0 <- c(1, 1) x1 <- ones(2) x2 <- ones(2, 3) x3 <- ones(2, 3, 4) x4 <- ones(2, 3, 4, 5) expect_equal(vec_slice(x0, 1L), 1) expect_identical(vec_slice(x1, 1L), ones(1)) expect_identical(vec_slice(x2, 1L), ones(1, 3)) expect_identical(vec_slice(x3, 1L), ones(1, 3, 4)) expect_identical(vec_slice(x4, 1L), ones(1, 3, 4, 5)) }) test_that("can subset using logical subscript", { x0 <- c(1, 1) expect_identical(vec_slice(x0, TRUE), x0) expect_identical(vec_slice(x0, c(TRUE, FALSE)), 1) expect_error( vec_slice(x0, c(TRUE, FALSE, TRUE)), class = "vctrs_error_subscript_size" ) expect_error( vec_slice(x0, lgl()), class = "vctrs_error_subscript_size" ) expect_error( vec_slice(mtcars, c(TRUE, FALSE)), class = "vctrs_error_subscript_size" ) }) test_that("can subset data frame columns", { df <- data.frame(x = 1:2) df$y <- data.frame(a = 2:1) expect_equal(vec_slice(df, 1L)$y, vec_slice(df$y, 1L)) }) test_that("can subset empty data frames", { df <- new_data_frame(n = 3L) expect_equal(vec_size(vec_slice(df, integer())), 0) expect_equal(vec_size(vec_slice(df, 1L)), 1) expect_equal(vec_size(vec_slice(df, 1:3)), 3) df$df <- df expect_equal(vec_size(vec_slice(df, integer())), 0) expect_equal(vec_size(vec_slice(df, 1L)), 1) expect_equal(vec_size(vec_slice(df, 1:3)), 3) }) test_that("ignores NA in logical subsetting", { x <- c(NA, 1, 2) expect_equal(vec_slice(x, x > 0), c(NA, 1, 2)) }) test_that("ignores NA in integer subsetting", { expect_equal(vec_slice(0:2, c(NA, 2:3)), c(NA, 1, 2)) }) test_that("can't slice with missing argument", { expect_error(vec_slice(1:3)) expect_error(vec_slice(mtcars)) expect_error(vec_slice(new_vctr(1:3))) }) test_that("can slice with NULL argument", { expect_identical(vec_slice(1:3, NULL), integer()) expect_identical(vec_slice(iris, NULL), iris[0, ]) expect_identical(vec_slice(new_vctr(1:3), NULL), new_vctr(integer())) }) test_that("slicing unclassed structures preserves attributes", { x <- structure(1:3, foo = "bar") expect_identical(vec_slice(x, 1L), structure(1L, foo = "bar")) }) test_that("can slice with negative indices", { expect_identical(vec_slice(1:3, -c(1L, 3L)), 2L) expect_identical(vec_slice(mtcars, -(1:30)), vec_slice(mtcars, 31:32)) expect_error(vec_slice(1:3, -c(1L, NA)), class = "vctrs_error_subscript_type") expect_error(vec_slice(1:3, c(-1L, 1L)), class = "vctrs_error_subscript_type") }) test_that("0 is ignored in negative indices", { expect_identical(vec_slice(1:3, c(-2L, 0L)), int(1L, 3L)) expect_identical(vec_slice(1:3, c(0L, -2L)), int(1L, 3L)) }) test_that("0 is ignored in positive indices", { expect_identical(vec_slice(1:3, 0L), int()) expect_identical(vec_slice(1:3, c(0L, 0L)), int()) expect_identical(vec_slice(1:3, c(0L, 2L, 0L)), 2L) }) test_that("can slice with double indices", { expect_identical(vec_slice(1:3, dbl(2, 3)), 2:3) expect_snapshot( (expect_error( vec_as_location(2^31, 3L), class = "vctrs_error_subscript_type" )) ) }) test_that("can slice with symbols", { expect_identical(vec_as_location(quote(b), 26, letters), 2L) }) test_that("can `vec_slice()` S3 objects without dispatch infloop", { expect_identical(new_vctr(1:3)[1], new_vctr(1L)) expect_identical(new_vctr(as.list(1:3))[1], new_vctr(list(1L))) }) test_that("can `vec_slice()` records", { out <- vec_slice(new_rcrd(list(a = 1L, b = 2L)), rep(1, 3)) expect_size(out, 3) out <- vec_init(new_rcrd(list(a = 1L, b = 2L)), 2) expect_size(out, 2) }) test_that("vec_restore() is called after proxied slicing", { local_methods( vec_proxy.vctrs_foobar = function(x, ...) x, vec_restore.vctrs_foobar = function(x, to, ...) "dispatch" ) expect_identical(vec_slice(foobar(1:3), 2), "dispatch") }) test_that("vec_slice() is proxied", { local_proxy() x <- vec_slice(new_proxy(1:3), 2:3) expect_identical(proxy_deref(x), 2:3) }) test_that("dimensions are preserved by vec_slice()", { # Fallback case x <- foobar(1:4) dim(x) <- c(2, 2) dimnames(x) <- list(a = c("foo", "bar"), b = c("quux", "hunoz")) out <- vec_slice(x, 1) exp <- foobar( c(1L, 3L), dim = c(1, 2), dimnames = list(a = "foo", b = c("quux", "hunoz")) ) expect_identical(out, exp) # Native case attrib <- NULL local_methods( vec_proxy.vctrs_foobar = function(x, ...) x, vec_restore.vctrs_foobar = function(x, to, ...) attrib <<- attributes(x) ) vec_slice(x, 1) exp <- list(dim = 1:2, dimnames = list(a = "foo", b = c("quux", "hunoz"))) expect_identical(attrib, exp) }) test_that("can slice shaped objects by name", { x <- matrix(1:2) expect_error(vec_slice(x, "foo"), "unnamed") dimnames(x) <- list(c("foo", "bar")) expect_equal(vec_slice(x, "foo"), vec_slice(x, 1L)) expect_error(vec_slice(x, "baz"), class = "vctrs_error_subscript_oob") }) test_that("vec_slice() unclasses input before calling `vec_restore()`", { oo <- NULL local_methods( vec_proxy.vctrs_foobar = function(x, ...) x, vec_restore.vctrs_foobar = function(x, ...) oo <<- is.object(x) ) x <- foobar(1:4) dim(x) <- c(2, 2) vec_slice(x, 1) expect_false(oo) }) test_that("can call `vec_slice()` from `[` methods with shaped objects without infloop", { local_methods( `[.vctrs_foobar` = function(x, i, ...) vec_slice(x, i) ) x <- foobar(1:4) dim(x) <- c(2, 2) exp <- foobar(c(1L, 3L)) dim(exp) <- c(1, 2) expect_identical(x[1], exp) }) test_that("vec_slice() restores attributes on shaped S3 objects correctly", { x <- factor(c("a", "b", "c", "d", "e", "f")) dim(x) <- c(3, 2) expect <- factor(c("a", "c", "d", "f"), levels = levels(x)) dim(expect) <- c(2, 2) expect_identical(vec_slice(x, c(1, 3)), expect) }) test_that("vec_slice() falls back to `[` with S3 objects", { local_methods( `[.vctrs_foobar` = function(x, i, ...) "dispatched" ) expect_identical(vec_slice(foobar(NA), 1), "dispatched") expect_error( vec_slice(foobar(list(NA)), 1), class = "vctrs_error_scalar_type" ) local_methods( vec_proxy.vctrs_foobar = function(x, ...) x ) expect_identical(vec_slice(foobar(list(NA)), 1), foobar(list(NA))) }) test_that("vec_slice() doesn't restore when attributes have already been restored", { local_methods( `[.vctrs_foobar` = function(x, i, ...) structure("dispatched", foo = "bar"), vec_restore.vctrs_foobar = function(...) stop("not called") ) expect_error(vec_slice(foobar(NA), 1), NA) }) test_that("vec_slice() doesn't restore when `[` method intentionally dropped attributes", { local_methods( `[.vctrs_foobar` = function(x, i, ...) unstructure(NextMethod()), vec_restore.vctrs_foobar = function(...) stop("not called") ) expect_identical(vec_slice(foobar(NA), 1), NA) }) test_that("can vec_slice() without inflooping when restore calls math generics", { local_methods( new_foobar = function(x) { new_vctr(as.double(x), class = "vctrs_foobar") }, vec_restore.vctrs_foobar = function(x, ...) { abs(x) sum(x) mean(x) is.finite(x) is.infinite(x) is.nan(x) new_foobar(x) } ) expect_identical(new_foobar(1:10)[1:2], new_foobar(1:2)) }) test_that("vec_restore() is called after slicing data frames", { local_methods( vec_restore.vctrs_tabble = function(...) "dispatched" ) df <- structure(mtcars, class = c("vctrs_tabble", "data.frame")) expect_identical(vec_slice(df, 1), "dispatched") }) test_that("additional subscripts are forwarded to `[`", { local_methods( `[.vctrs_foobar` = function(x, i, ...) vec_index(x, i, ...) ) x <- foobar(c("foo", "bar", "quux", "hunoz")) dim(x) <- c(2, 2) exp <- foobar("quux") dim(exp) <- c(1, 1) expect_identical(x[1, 2], exp) }) test_that("can use names to vec_slice() a named object", { x0 <- c(a = 1, b = 2) x1 <- c(a = 1, a = 2) expect_identical(vec_slice(x0, letters[1]), c(a = 1)) expect_identical(vec_slice(x0, letters[2:1]), c(b = 2, a = 1)) expect_identical(vec_slice(x1, letters[1]), c(a = 1)) expect_error(vec_slice(x0, letters[3:1]), class = "vctrs_error_subscript_oob") expect_error(vec_slice(x1, letters[2]), class = "vctrs_error_subscript_oob") }) test_that("can't use names to vec_slice() an unnamed object", { expect_error( vec_slice(1:3, letters[1]), "Can't use character names to index an unnamed vector.", fixed = TRUE ) expect_error( vec_slice(1:3, letters[25:27]), "Can't use character names to index an unnamed vector.", fixed = TRUE ) }) test_that("can slice with missing character indices (#244)", { expect_identical(vec_as_location(na_chr, 2L, c("x", "")), na_int) expect_identical(vec_slice(c(x = 1), na_chr), set_names(na_dbl, "")) expect_identical(vec_slice(c(x = "foo"), na_chr), set_names(na_chr, "")) }) test_that("can slice with numerics (#577)", { expect_identical(vec_as_location(1:2, 3), 1:2) expect_error(vec_as_location(1:2, 3.5), class = "vctrs_error_cast_lossy") }) test_that("missing indices don't create NA names", { x <- set_names(letters) expect_identical(vec_slice(x, na_int), set_names(na_chr, "")) expect_identical( vec_slice(x, int(1, NA, 3, NA)), chr(a = "a", NA, c = "c", NA) ) # Preserves existing NA names x <- set_names(1:2, c(NA, "foo")) expect_identical(vec_slice(x, 1:2), x) }) test_that("vec_slice() asserts vectorness (#301)", { expect_error(vec_slice(NULL, 1), class = "vctrs_error_scalar_type") }) test_that("slicing an unspecified logical vector returns a logical vector", { expect_identical(vec_slice(NA, integer()), logical()) expect_identical(vec_slice(NA, c(1, 1)), c(NA, NA)) }) test_that("slicing an unspecified() object returns an unspecified()", { expect_identical(vec_slice(unspecified(1), integer()), unspecified()) expect_identical(vec_slice(unspecified(1), c(1, 1)), unspecified(2)) }) test_that("vec_slice() works with Altrep classes with custom extract methods", { x <- chr_rle(foo = 10L, bar = 5L) expect_false(chr_rle_is_materialized(x)) idx <- c(9, 10, 11) expect_equal(vec_slice(x, idx), c("foo", "foo", "bar")) expect_false(chr_rle_is_materialized(x)) # With zero idx <- c(0, 1) expect_equal(vec_slice(x, idx), "foo") expect_false(chr_rle_is_materialized(x)) # With integer missing values idx <- c(0, NA, 2, NA) expect_equal(vec_slice(x, idx), c(NA, "foo", NA)) expect_false(chr_rle_is_materialized(x)) # With logical condition index idx <- c(TRUE, rep(FALSE, 8), TRUE, rep(FALSE, 2), TRUE, TRUE, TRUE) expect_equal(vec_slice(x, idx), c("foo", "foo", "bar", "bar", "bar")) expect_false(chr_rle_is_materialized(x)) # Everything idx <- TRUE expect_equal(vec_slice(x, idx), c(rep("foo", 10), rep("bar", 5))) expect_false(chr_rle_is_materialized(x)) # Nothing idx <- FALSE expect_equal(vec_slice(x, idx), character()) expect_false(chr_rle_is_materialized(x)) # Whole vector of missing values idx <- NA expect_equal(vec_slice(x, idx), rep(NA_character_, 15)) expect_false(chr_rle_is_materialized(x)) # Just 1 missing value idx <- NA_integer_ expect_equal(vec_slice(x, idx), rep(NA_character_, 1)) expect_false(chr_rle_is_materialized(x)) # OOB idx <- 16 expect_snapshot(error = TRUE, { vec_slice(x, idx) }) }) test_that("Unnamed vector with character subscript is caught", { expect_snapshot(error = TRUE, vec_slice(1:3, letters[1])) }) test_that("Negative subscripts are checked", { expect_snapshot(error = TRUE, vec_slice(1:3, -c(1L, NA))) expect_snapshot(error = TRUE, vec_slice(1:3, c(-1L, 1L))) }) test_that("oob error messages are properly constructed", { expect_snapshot(error = TRUE, vec_slice(c(bar = 1), "foo")) # Multiple OOB indices expect_snapshot(error = TRUE, vec_slice(letters, c(100, 1000))) expect_snapshot(error = TRUE, vec_slice(letters, c(1, 100:103, 2, 104:110))) expect_snapshot(error = TRUE, vec_slice(set_names(letters), c("foo", "bar"))) expect_snapshot(error = TRUE, vec_slice(set_names(letters), toupper(letters))) }) # vec_init ---------------------------------------------------------------- test_that("na of atomic vectors is as expected", { expect_equal(vec_init(TRUE), NA) expect_equal(vec_init(1L), NA_integer_) expect_equal(vec_init(1), NA_real_) expect_equal(vec_init("x"), NA_character_) expect_equal(vec_init(1i), NA_complex_) }) test_that("na of factor preserves levels", { f1 <- factor("a", levels = c("a", "b")) f2 <- vec_init(f1) expect_equal(levels(f1), levels(f2)) }) test_that("na of POSIXct preserves tz", { dt1 <- as.POSIXct("2010-01-01", tz = "America/New_York") dt2 <- vec_init(dt1) expect_equal(attr(dt2, "tzone"), "America/New_York") }) test_that("na of list is list(NULL)", { expect_equal(vec_init(list()), list(NULL)) }) test_that("na of array is 1d slice", { x1 <- array(1:12, c(2, 3, 4)) x2 <- vec_init(x1) expect_equal(x2, array(NA_integer_, c(1, 3, 4))) }) test_that("na of list-array is 1d slice", { x1 <- array(as.list(1:12), c(2, 3, 4)) x2 <- vec_init(x1) expect_equal(x2, array(list(), c(1, 3, 4))) }) test_that("vec_init() asserts vectorness (#301)", { expect_error(vec_init(NULL, 1L), class = "vctrs_error_scalar_type") }) test_that("vec_init() works with Altrep classes", { x <- chr_rle(foo = 1L, bar = 2L) expect_false(chr_rle_is_materialized(x)) expect_equal(vec_init(x, 2), rep(NA_character_, 2)) expect_false(chr_rle_is_materialized(x)) }) test_that("vec_init() validates `n`", { expect_snapshot({ (expect_error(vec_init(1L, 1.5))) (expect_error(vec_init(1L, c(1, 2)))) (expect_error(vec_init(1L, -1L))) (expect_error(vec_init(1L, NA))) (expect_error(vec_init(1L, NA_integer_))) }) }) # vec_slice + compact_rep ------------------------------------------------- # `i` is 1-based test_that("names are repaired correctly with compact reps and `NA_integer_`", { x <- list(a = 1L, b = 2L) expect <- set_names(list(NULL, NULL), c("", "")) expect_equal(vec_slice_rep(x, NA_integer_, 2L), expect) }) test_that("names are recycled correctly with compact reps", { expect_named(vec_slice_rep(c(x = 1L), 1L, 3L), c("x", "x", "x")) }) test_that("vec_slice() with compact_reps work with Altrep classes", { x <- chr_rle(foo = 10L, bar = 5L) expect_false(chr_rle_is_materialized(x)) expect_equal(vec_slice_rep(x, 10L, 3L), rep("foo", 3)) expect_false(chr_rle_is_materialized(x)) }) # vec_slice + compact_seq ------------------------------------------------- # `start` is 0-based test_that("can subset base vectors with compact seqs", { start <- 1L size <- 2L increasing <- TRUE expect_identical( vec_slice_seq(lgl(1, 0, 1), start, size, increasing), lgl(0, 1) ) expect_identical( vec_slice_seq(int(1, 2, 3), start, size, increasing), int(2, 3) ) expect_identical( vec_slice_seq(dbl(1, 2, 3), start, size, increasing), dbl(2, 3) ) expect_identical( vec_slice_seq(cpl(1, 2, 3), start, size, increasing), cpl(2, 3) ) expect_identical( vec_slice_seq(chr("1", "2", "3"), start, size, increasing), chr("2", "3") ) expect_identical( vec_slice_seq(raw2(1, 2, 3), start, size, increasing), raw2(2, 3) ) expect_identical( vec_slice_seq(list(1, 2, 3), start, size, increasing), list(2, 3) ) }) test_that("can subset base vectors with decreasing compact seqs", { start <- 2L size <- 2L increasing <- FALSE expect_identical( vec_slice_seq(lgl(1, 0, 1), start, size, increasing), lgl(1, 0) ) expect_identical( vec_slice_seq(int(1, 2, 3), start, size, increasing), int(3, 2) ) expect_identical( vec_slice_seq(dbl(1, 2, 3), start, size, increasing), dbl(3, 2) ) expect_identical( vec_slice_seq(cpl(1, 2, 3), start, size, increasing), cpl(3, 2) ) expect_identical( vec_slice_seq(chr("1", "2", "3"), start, size, increasing), chr("3", "2") ) expect_identical( vec_slice_seq(raw2(1, 2, 3), start, size, increasing), raw2(3, 2) ) expect_identical( vec_slice_seq(list(1, 2, 3), start, size, increasing), list(3, 2) ) }) test_that("can subset base vectors with size 0 compact seqs", { start <- 1L size <- 0L increasing <- TRUE expect_identical(vec_slice_seq(lgl(1, 0, 1), start, size, increasing), lgl()) expect_identical(vec_slice_seq(int(1, 2, 3), start, size, increasing), int()) expect_identical(vec_slice_seq(dbl(1, 2, 3), start, size, increasing), dbl()) expect_identical(vec_slice_seq(cpl(1, 2, 3), start, size, increasing), cpl()) expect_identical( vec_slice_seq(chr("1", "2", "3"), start, size, increasing), chr() ) expect_identical( vec_slice_seq(raw2(1, 2, 3), start, size, increasing), raw2() ) expect_identical( vec_slice_seq(list(1, 2, 3), start, size, increasing), list() ) }) test_that("can subset shaped base vectors with compact seqs", { start <- 1L size <- 2L increasing <- TRUE mat <- as.matrix expect_identical( vec_slice_seq(mat(lgl(1, 0, 1)), start, size, increasing), mat(lgl(0, 1)) ) expect_identical( vec_slice_seq(mat(int(1, 2, 3)), start, size, increasing), mat(int(2, 3)) ) expect_identical( vec_slice_seq(mat(dbl(1, 2, 3)), start, size, increasing), mat(dbl(2, 3)) ) expect_identical( vec_slice_seq(mat(cpl(1, 2, 3)), start, size, increasing), mat(cpl(2, 3)) ) expect_identical( vec_slice_seq(mat(chr("1", "2", "3")), start, size, increasing), mat(chr("2", "3")) ) expect_identical( vec_slice_seq(mat(raw2(1, 2, 3)), start, size, increasing), mat(raw2(2, 3)) ) expect_identical( vec_slice_seq(mat(list(1, 2, 3)), start, size, increasing), mat(list(2, 3)) ) }) test_that("can subset shaped base vectors with decreasing compact seqs", { start <- 2L size <- 2L increasing <- FALSE mat <- as.matrix expect_identical( vec_slice_seq(mat(lgl(1, 0, 1)), start, size, increasing), mat(lgl(1, 0)) ) expect_identical( vec_slice_seq(mat(int(1, 2, 3)), start, size, increasing), mat(int(3, 2)) ) expect_identical( vec_slice_seq(mat(dbl(1, 2, 3)), start, size, increasing), mat(dbl(3, 2)) ) expect_identical( vec_slice_seq(mat(cpl(1, 2, 3)), start, size, increasing), mat(cpl(3, 2)) ) expect_identical( vec_slice_seq(mat(chr("1", "2", "3")), start, size, increasing), mat(chr("3", "2")) ) expect_identical( vec_slice_seq(mat(raw2(1, 2, 3)), start, size, increasing), mat(raw2(3, 2)) ) expect_identical( vec_slice_seq(mat(list(1, 2, 3)), start, size, increasing), mat(list(3, 2)) ) }) test_that("can subset shaped base vectors with size 0 compact seqs", { start <- 1L size <- 0L increasing <- TRUE mat <- as.matrix expect_identical( vec_slice_seq(mat(lgl(1, 0, 1)), start, size, increasing), mat(lgl()) ) expect_identical( vec_slice_seq(mat(int(1, 2, 3)), start, size, increasing), mat(int()) ) expect_identical( vec_slice_seq(mat(dbl(1, 2, 3)), start, size, increasing), mat(dbl()) ) expect_identical( vec_slice_seq(mat(cpl(1, 2, 3)), start, size, increasing), mat(cpl()) ) expect_identical( vec_slice_seq(mat(chr("1", "2", "3")), start, size, increasing), mat(chr()) ) expect_identical( vec_slice_seq(mat(raw2(1, 2, 3)), start, size, increasing), mat(raw2()) ) expect_identical( vec_slice_seq(mat(list(1, 2, 3)), start, size, increasing), mat(list()) ) }) test_that("can subset object of any dimensionality with compact seqs", { x0 <- c(1, 1) x1 <- ones(2) x2 <- ones(2, 3) x3 <- ones(2, 3, 4) x4 <- ones(2, 3, 4, 5) expect_equal(vec_slice_seq(x0, 0L, 1L), 1) expect_identical(vec_slice_seq(x1, 0L, 1L), ones(1)) expect_identical(vec_slice_seq(x2, 0L, 1L), ones(1, 3)) expect_identical(vec_slice_seq(x3, 0L, 1L), ones(1, 3, 4)) expect_identical(vec_slice_seq(x4, 0L, 1L), ones(1, 3, 4, 5)) }) test_that("can subset data frames with compact seqs", { df <- data_frame(x = 1:5, y = letters[1:5]) expect_equal(vec_slice_seq(df, 0L, 0L), vec_slice(df, integer())) expect_equal(vec_slice_seq(df, 0L, 1L), vec_slice(df, 1L)) expect_equal(vec_slice_seq(df, 0L, 3L), vec_slice(df, 1:3)) expect_equal(vec_slice_seq(df, 2L, 3L, FALSE), vec_slice(df, 3:1)) df$df <- df expect_equal(vec_slice_seq(df, 0L, 0L), vec_slice(df, integer())) expect_equal(vec_slice_seq(df, 0L, 1L), vec_slice(df, 1L)) expect_equal(vec_slice_seq(df, 0L, 3L), vec_slice(df, 1:3)) expect_equal(vec_slice_seq(df, 2L, 3L, FALSE), vec_slice(df, 3:1)) }) test_that("can subset S3 objects using the fallback method with compact seqs", { x <- factor(c("a", "b", "c", "d")) expect_equal(vec_slice_seq(x, 0L, 0L), vec_slice(x, integer())) expect_equal(vec_slice_seq(x, 0L, 1L), vec_slice(x, 1L)) expect_equal(vec_slice_seq(x, 2L, 2L), vec_slice(x, 3:4)) expect_equal(vec_slice_seq(x, 3L, 2L, FALSE), vec_slice(x, 4:3)) }) test_that("vec_slice() with compact_seqs work with Altrep classes", { x <- chr_rle(foo = 2L, bar = 3L) expect_false(chr_rle_is_materialized(x)) expect_equal(vec_slice_seq(x, 1L, 3L), c("foo", "bar", "bar")) expect_false(chr_rle_is_materialized(x)) }) test_that("vec_slice() handles symbols and OO objects", { expect_identical(vec_slice(c(a = 1, b = 2), quote(b)), c(b = 2)) expect_identical(vec_slice(c(a = 1, b = 2), factor("b")), c(b = 2)) expect_error( vec_slice(c(a = 1, b = 2), foobar("b")), class = "vctrs_error_subscript_type" ) }) test_that("vec_init() handles names in columns", { expect_identical( vec_init(data_frame(x = c(a = 1, b = 2)))$x, named(na_dbl) ) expect_identical( vec_init(data_frame(x = c(1, 2)))$x, na_dbl ) }) test_that("vec_slice() restores unrestored but named foreign classes", { x <- foobar(c(x = 1)) expect_identical(vec_slice(x, 1), x) expect_identical(vec_chop(x), list(x)) expect_identical(vec_chop(x, indices = list(1)), list(x)) expect_identical(vec_ptype(x), foobar(named(dbl()))) expect_identical(vec_ptype(x), foobar(named(dbl()))) expect_identical(vec_ptype_common(x, x), foobar(named(dbl()))) out <- vec_ptype_common_fallback(x, x) expect_true(is_common_class_fallback(out)) expect_identical(fallback_class(out), "vctrs_foobar") }) test_that("scalar type error is thrown when `vec_slice_unsafe()` is called directly (#1139)", { x <- foobar(as.list(1:3)) expect_error(vec_slice_seq(x, 1L, 1L), class = "vctrs_error_scalar_type") }) test_that("column sizes are checked before slicing (#552)", { x <- structure(list(a = 1, b = 2:3), row.names = 1:2, class = "data.frame") expect_error( vctrs::vec_slice(x, 2), "Column `a` (size 1) must match the data frame (size 2)", fixed = TRUE ) }) test_that("base_vec_rep() slices data frames with the base::rep() UI", { df <- data_frame(x = data_frame(y = 1:2)) expect_identical( base_vec_rep(df, length.out = 4), vec_slice(df, c(1:2, 1:2)) ) }) test_that("vec_size_assign() slices data frames with the base::rep() UI", { df <- data_frame(x = data_frame(y = 1:3)) expect_identical( vec_size_assign(df, 2), vec_slice(df, 1:2) ) expect_identical( vec_size_assign(df, 4), vec_slice(df, c(1:3, NA)) ) }) vctrs/tests/testthat.R0000644000176200001440000000006614276722575014563 0ustar liggesuserslibrary(testthat) library(vctrs) test_check("vctrs") vctrs/MD50000644000176200001440000011127215157552632011742 0ustar liggesusers473d240179c23f04d1b2f155596e3ec5 *DESCRIPTION bb38d60fdc799f700b0f8537f9fc4110 *LICENSE 26e8ea3522f7f470d41408ce931449af *LICENSE.note dc3f838ef66b256caca5df59b42deb4a *NAMESPACE 2a04377f59d1c8225afc3c035145b74c *NEWS.md aa8a25ae369276c1dab0b1a900b9365e *R/aaa.R 0bbbaba064d71671123f99018084f9de *R/altrep-lazy-character.R d48dd39d64c50737eef1ea7d09ad9260 *R/altrep-rle.R d0e98a3acb7a27924a7fac1435a02047 *R/altrep.R 17e8cbae43be8dc32f6ee62370e2ad90 *R/arith.R 895b7039d5edcf74243b459f4af96119 *R/assert.R daeeaed772388fd23a7734aae3dffec7 *R/bind.R d2944c5661f0ac2a5a0972d0160cbe1b *R/c.R 3eed2cdd3c3c67bc354bda6590b806ce *R/case-when.R 6b463b9ce06696aa9554a87229bc8628 *R/cast.R 98855aa091d66f673084102c815c3b7e *R/compare.R 03af8886c6365797bedd4aadfbf72680 *R/complete.R 33e6576093e43972ccea8ff733b61c5a *R/conditions.R 428977c05cd18a827e623ace11c82c81 *R/dictionary.R 5ef9a5b69bf5cc6d7536abeb5f954d1f *R/dim.R 376d5a1ea32d07e820ac1be281f5a8e9 *R/empty.R 2230385458965bdcda39305fb6d9a466 *R/encoding.R dcfc36c0ab9bf7b104c52cbbf32ffcea *R/equal.R 800c1ae83f6d707bca6ebff52ef921cb *R/expand.R 739accacf5bb90826b42705e10532a96 *R/faq-developer.R 23e1082e791534d5f3d88a606451fb86 *R/faq-internal.R 6cd021993d4cd7565edce119d2966a83 *R/faq.R 4086555f3ef197850db758371ae41269 *R/fields.R 7c5bdada6056f58298f4ab9b3f056267 *R/fill.R 5b9b710c6191eb234a7cd29a042ec72c *R/group.R 6c1c4a1cd7bb82f0102f7961d1fd1874 *R/hash.R d529ad8e26301142ebb8a0926cea77db *R/if-else.R 4e9bd17a652cb4314c1fc3a00647077f *R/import-standalone-linked-version.R c9f0f46847bb062472c67e9176d79f4a *R/import-standalone-obj-type.R a6dfe5a5049fb897fec1dd3352a45b51 *R/import-standalone-purrr.R f0289ac3a8cc612523ebf40c6bb9e5c7 *R/import-standalone-types-check.R 0bfb0df3b296601de44cd8b4b6547c01 *R/interval.R 56727de8bb321157998abbcad9118a87 *R/list-combine.R ab3208002b1524b1d08cdd15b56125f4 *R/list-of-transpose.R ef8cae23aceb35fb2061c4c464f4cfdb *R/list-unchop.R f7473387fe0bb1735dfbd1f31ae44de0 *R/match.R 19b436887a2e7e055505a220313faf93 *R/missing.R 53a85805d8b8d35535597a69beec6ce0 *R/names.R 3a821b1c8551b93b103d03e3c472128e *R/numeric.R 2a445fc4fe86abaabd34de555a826ec6 *R/order.R 03af06a59268cc73d32cb8a722e83dce *R/parallel.R 095dc8a64618bd64f4cf6606c6245f99 *R/print-str.R 99729d4eb6c10b15a3fe3dd124288e66 *R/proxy.R bc81c37b880c3bec9c807c3129773418 *R/ptype-abbr-full.R fef0db1b81b97fe152387035d0a55382 *R/rank.R f685c5fd6b5865fce52d28e1d5bad209 *R/recode.R 7197336c9738c8e46b5a8f1d338be6ac *R/recycle.R 028c7a4bd63c1abca883b4b4274de9e4 *R/register-s3.R aacc4d17d3e1aa4f33395ab960bcefd9 *R/rep.R 4017b5a8feabc7f1704da270e20378b3 *R/runs.R 9d7189324f8556c4637486b11a8eddfa *R/set.R 0237c3ef9001b9d442524f69c4a551d5 *R/shape.R 02edfacf74221fc99a41f209b796b041 *R/size.R 8b65e5c92a42466e3e3858b0052eaeaf *R/slice-chop.R a1f6ebb4a3694d4d58656b845bd71a81 *R/slice-interleave.R 2862d00fed1911d9b1af3578ec88f3e7 *R/slice.R 5c42e22ac68dcbecca0f9a54cb2219fb *R/split.R be6e4e1237a53535ab62bfd94dd43071 *R/subscript-loc.R e34ca89b54726aeef2636f7c087cf7f1 *R/subscript.R ee6f53133e7f63ffad088a9fa70fd20a *R/type-asis.R 603fb14f083401b053d4e4015f3114d3 *R/type-bare.R 4f90329fad04eb5b52d7f16c7d2b432b *R/type-data-frame.R ebec573d77d615cc3f6e262791a89960 *R/type-data-table.R 14b9fed4d51df176cf90f54c5f08d11f *R/type-date-time.R dbb0821db339bb4992260d0d168bea31 *R/type-dplyr.R eb1f50f5f2db23a9299572ebf234f902 *R/type-explore.R d5b4baffa9fc6784a9dc0ef90b1541fe *R/type-factor.R d5f5f0f83afcdf9a3d0142d25c3fbb98 *R/type-idate.R fae6476a250483c894fab44736e22cae *R/type-integer64.R c5f9178a38b91efdbcabea8e4d3c538f *R/type-list-of.R 47e63c3f8b201914c2e35114da8a1cbc *R/type-misc.R 994786742bb2c30cdaacfe8bf61b9ba1 *R/type-rcrd.R 70dd3eb1a75afe17289de679993e0487 *R/type-sclr.R ec3caeb26fc70967664a88a026c2a406 *R/type-sf.R 99282f06db3b453ce22d6cbef59c82e7 *R/type-table.R 94a617667f5f065a8b62e7538aee2f2b *R/type-tibble.R d72530c3216622633f4b5e32f5d1fe01 *R/type-unspecified.R cd6eb46f35d926401048eafd58b58ab0 *R/type-vctr.R dea4e12c120ffcb564f9e28393d72819 *R/type.R b5d4e8890dd3d0cc45935be4ddd13495 *R/type2.R 758dc22d9f27ea0aebeb325cb6c96571 *R/utils-cli.R 9917fbe787f348f90b798f7af23a4892 *R/utils.R f3b1b436ba0bc9fd01be816deede40d5 *R/vctrs-deprecated.R ef1cdf3aa2406d681fa626c8ba5cb6c5 *R/vctrs-package.R 4adf6a33b208fe0ffb04fa878a536c8b *R/zzz.R 93af8d5136e0750bee063bde3268ebfb *README.md ec1cc4f60520c65e33e5dc14be1cbb37 *build/vignette.rds 43cc81e569685f0ede8af83150d4f016 *inst/WORDLIST ebbada9679b83b39ca4407d1d02628d2 *inst/doc/pillar.R 7dec628565582a504f5aa9744f865cd9 *inst/doc/pillar.Rmd 0ab1f4b12ae81136b3d9353b0b37953f *inst/doc/pillar.html 13cb32f68f85286db696c80089dcab9f *inst/doc/s3-vector.R 25c232a97efcf11ce02678a722282e43 *inst/doc/s3-vector.Rmd 49ff68ae5697dc2d24651e0fc442e931 *inst/doc/s3-vector.html cd3e6a61510015a8cad3dbc88a890846 *inst/doc/stability.R 4839ddd2a359470622d6edcca6c49edf *inst/doc/stability.Rmd 122e1db66fcbfd22e54ae407cac341e8 *inst/doc/stability.html 97bea39da468c17c2c7b6fc50ab4798e *inst/doc/type-size.R 317b475c17b8b2a70f68cca0402f9eb9 *inst/doc/type-size.Rmd 8c2532e0a1b0e6872a9bfd2205fd1eba *inst/doc/type-size.html 3dd589fac3687f547a97fcf05c4695b0 *inst/include/vctrs.c bd134daaca5fb676412074660e3a00bc *inst/include/vctrs.h 6b865e241282842dfe6ebf1861cf5d5b *man/as-is.Rd 7f8848a4407442328bc8f4a931294ff7 *man/data_frame.Rd 15a8baf0db9014ad118f860c62f960bc *man/df_list.Rd bbc1254fac87c05a95ecb28846a893cd *man/df_ptype2.Rd 8129dc2228cc653f7f8cc89402adf503 *man/faq-compatibility-types.Rd c8f030b331b929901ad27ef08a3a953f *man/faq-error-incompatible-attributes.Rd b1dcf851e65ed114d023924c7698d92a *man/faq-error-scalar-type.Rd 853adb55a5e326ffce89148289ed888c *man/faq/developer/howto-coercion-data-frame.Rmd 1d2b404542db0d4e3917841f49381bdb *man/faq/developer/howto-coercion.Rmd abaaebf8139cdad9598b2d57eec4b3a3 *man/faq/developer/howto-faq-fix-scalar-type-error.Rmd ae1e2d8343daa530961f5fff4e31d454 *man/faq/developer/links-coercion.Rmd 0af8c2fcec3117584270e0f5d91b0611 *man/faq/developer/reference-compatibility.Rmd 7e11e2d99450d1d6c4a8677c12b6d43c *man/faq/developer/snippet-roxy-workflow.Rmd e2e62755d3bdb05a31548998c611f233 *man/faq/developer/theory-coercion.Rmd 34f562c8bdfb9397d8400b4241d5196e *man/faq/developer/theory-recycling.Rmd fb7ded0ec15e59cc621f0b881b053c38 *man/faq/internal/matches-algorithm.Rmd fc5504fff8aed9f919dbf4c261538f5b *man/faq/internal/ptype2-identity.Rmd 1f872537bbe23063c93840d93ad7a542 *man/faq/setup.Rmd b061365fbea176c543525a5f395653a5 *man/faq/user/faq-compatibility-types.Rmd b18af578ec9a68627e7b91a62de9abd5 *man/faq/user/faq-error-scalar-type.Rmd 856c670ccf80ab0a433a5a1a172b313d *man/fields.Rd 69eb638d262c60d5bd5ed2aa4717190c *man/figures/cast.png f5627d4a9d3071aad2dec7dbd3d33eca *man/figures/coerce.png e01d20997a9c8d069ef28e3c6013bd8c *man/figures/combined.png cb1e46f469cfbbbde29c8b5113e1d789 *man/figures/lifecycle-archived.svg c0d2e5a54f1fa4ff02bf9533079dd1f7 *man/figures/lifecycle-defunct.svg a1b8c987c676c16af790f563f96cbb1f *man/figures/lifecycle-deprecated.svg c3978703d8f40f2679795335715e98f4 *man/figures/lifecycle-experimental.svg 952b59dc07b171b97d5d982924244f61 *man/figures/lifecycle-maturing.svg 27b879bf3677ea76e3991d56ab324081 *man/figures/lifecycle-questioning.svg 6902bbfaf963fbc4ed98b86bda80caa2 *man/figures/lifecycle-soft-deprecated.svg 53b3f893324260b737b3c46ed2a0e643 *man/figures/lifecycle-stable.svg 1c1fe7a759b86dc6dbcbe7797ab8246c *man/figures/lifecycle-superseded.svg 865c1e8576113b430bcb2d69bfdb0e76 *man/figures/logo.png 25e2f330cc7176f22ad3cc5a3ba4dc22 *man/figures/sizes-recycling.png ff7dec2d166e669c6b6abf085030f369 *man/figures/sizes.graffle 793b22af8f7b1648699748172cab8af7 *man/figures/types.graffle ecb1898f0d3232051d9a1969c0bc8570 *man/figures/vec-count-deps.graffle 0c3de09d34b6b7e94e7671c08e9df3f8 *man/figures/vec-count-deps.png 2995e7508f8f053600f8e21cec6b60c3 *man/figures/vec-count-deps.svg 51d8334d1b47a69a61b50f40cade0652 *man/howto-faq-coercion-data-frame.Rd ba1da74b9301c190b1370c3c20c6bb9f *man/howto-faq-coercion.Rd 00e36497e717e90878ecc719cdcaab53 *man/howto-faq-fix-scalar-type-error.Rd adb03fe5026bdbf0027002f12f8792d5 *man/int64.Rd 025067de2ede9eb33d7ef832ed1f7355 *man/internal-faq-matches-algorithm.Rd 18b1edae3671bc721550ca7dac4a566b *man/internal-faq-ptype2-identity.Rd 58409cba3d4781f8900687ded62c43e0 *man/list-of-attributes.Rd 9cf4aa8bc3872e7df5002bf973170cea *man/list_combine.Rd 4354e70c6d6ce4afa68f9cdcfd7ab71c *man/list_drop_empty.Rd d98986791cc523a366e08768fee7c9d2 *man/list_of.Rd 45006cc9d3834e3f7586b72ff295db2c *man/list_of_transpose.Rd 7248aea971f376cc98c96022c47c0139 *man/list_unchop.Rd 96052f38ddd3ccff3dd2e6b657aedeae *man/maybe_lossy_cast.Rd 135c38fc7e19016453eafc6f609a052a *man/missing.Rd b128d06416ce1660cb50f4d598655565 *man/name_spec.Rd af444cb8c20bd85d2ee2877ae42a65ce *man/new_data_frame.Rd 96a6f14e3014f3b7e3e9db90d0516fc4 *man/new_date.Rd 75dcc37c27e47ab214a3fb275e49365a *man/new_factor.Rd 45ddc7549c673a67f6f165937c51f398 *man/new_list_of.Rd 7bba3e354732b69c84eee331683d240d *man/new_rcrd.Rd 055e37a6ce35ad98995b28f9202480fd *man/new_vctr.Rd c0fa2bbc4e3dd93c4ccedbad7e5bdb33 *man/obj_is_list.Rd 0432b9bc77e37a36a76ba1f65697ea6e *man/obj_print.Rd 4bd8924e7585671c5f2995ca1fcb8578 *man/op-empty-default.Rd 5725105cbca9da7c11bc0e6dd8c0c799 *man/order-radix.Rd 133289553cfa62cfbb10636951fe2585 *man/parallel-operators.Rd 15020f537e44b4e8d6b5824773f8f2d9 *man/reference-faq-compatibility.Rd 6cc6bb80b7b76a85fc455520fd6f9d81 *man/runs.Rd 189eee68fe149af920039bea657e0563 *man/s3_register.Rd febea111fcbfa3fadeb25ea4a5239008 *man/table.Rd c5808ec0a28a1a0a2d0ea946a2e8107e *man/theory-faq-coercion.Rd eb50d76bc6f40753af13ee3d0eead436 *man/theory-faq-recycling.Rd 160b76db439f9a485c412da17e680549 *man/vctrs-conditions.Rd 00d4712f39f895035e899b48827887d0 *man/vctrs-data-frame.Rd a543f5ac20d91093b4b9044ab7893be1 *man/vctrs-package.Rd 5413ef0c259a651728bcae1ea28580fd *man/vctrs-unspecified.Rd 9edb893b34d686716edd7c8b7eadb89c *man/vec-case-and-replace.Rd dda307667dc86b6623072107b301658a *man/vec-recode-and-replace.Rd a67939fd9d6798d77c6410fdee060590 *man/vec-rep.Rd 1a01e323f320ffce3c4622820c17991f *man/vec-set.Rd 41aec7be7b1c7722937983d2fdf1b8e3 *man/vec_arith.Rd c380682876bd006a1e52eb8949a5b2e9 *man/vec_as_index.Rd 1b8da78e4955f4c62f4fb0bad38b001f *man/vec_as_location.Rd 1cd97290bb613faa6826ee2aff7d2687 *man/vec_as_names.Rd 4751d9e1734abce229836322b3cd18ec *man/vec_as_names_legacy.Rd fa32201a048d4f34b3ca84295aa0e5d6 *man/vec_as_subscript.Rd ef0f4d8cc4e9bc4786dbc6f7c8b11c8c *man/vec_assert.Rd 2b6294d7e57187d4fc6f9aa55ee173f9 *man/vec_bind.Rd 27c126fb46fb5e3ac1215a1b7cf7362c *man/vec_c.Rd da8a64be2f72048945150d170c50f049 *man/vec_cast.Rd 6497ea65f7a25c6b57f7da8868a969b7 *man/vec_cbind_frame_ptype.Rd 756a8b9722484acae830490840901625 *man/vec_chop.Rd f6a9055845c6ced7d866638412ff4a63 *man/vec_compare.Rd 8e6770ca84e71b3575b983f3eb347240 *man/vec_count.Rd ed716990b6a50620562e744ee927ab75 *man/vec_data.Rd d5da420b8a8686cf2e0910970d326b24 *man/vec_default_ptype2.Rd 0c73ae7aa558931401b22ba50c5a7faa *man/vec_detect_complete.Rd e1817b7f472d5eb862fae6e02081941c *man/vec_duplicate.Rd cf78fee622d53673b7e08d5c4406c222 *man/vec_empty.Rd f36ede10cbd5b8a0f67247217f49e2a3 *man/vec_equal.Rd fd36dc7754da38f3882e56d83a55095c *man/vec_equal_na.Rd 73575c3ace93171061dccf6fe4cf8e0a *man/vec_expand_grid.Rd 0e561b4e3348dd18146a73f7dea6748f *man/vec_fill_missing.Rd 5d0d078849fd53c6606cdc677368a23a *man/vec_group.Rd 10e2996acc436bed15f468b62f905619 *man/vec_if_else.Rd 7efd75d6b26d82f2e451873a8d720c9c *man/vec_init.Rd 3e9155dee61cbdf2ad1deffef6caf8c0 *man/vec_interleave.Rd f8e6735b44208198a78e209a62a06e55 *man/vec_is_list.Rd 3f495b669ea95026c8a3d6512f1cd75a *man/vec_locate_matches.Rd 08318af9e975aa8b9680fc77ddf5906a *man/vec_locate_sorted_groups.Rd 32e800db80121a8b74d5e45f31fbf2c2 *man/vec_match.Rd 6a0ece01d92f2d27a52fa1fc6ec99d36 *man/vec_math.Rd f930bd4d1e373a74ae18e09aa5ac01c0 *man/vec_names.Rd 711d2d36bffdf9c60781c993fd087b5f *man/vec_order.Rd effec5ea0d7340ceff0b6a784fe4e3f3 *man/vec_proxy.Rd f93b5ac3e9244b22bc44c0c1707b98ef *man/vec_proxy_compare.Rd f4f71559cb455487816c1bda66e8c168 *man/vec_proxy_equal.Rd e91e55ea6c6c06d6e74dc9dac6b2a221 *man/vec_ptype.Rd 61b0865d2076f200909eeab97143bace *man/vec_ptype2.Rd f2e5b287c46b120025e0974e5e136089 *man/vec_ptype_full.Rd 2ed2a803f564a69008048054cb981fda *man/vec_rank.Rd 43d99ffe2d453416bc8bd46c36684cd3 *man/vec_recycle.Rd c3c863bc5fe6d0bb6466b176a66c3548 *man/vec_repeat.Rd 998136cc73dfd5148c60699abd8424f0 *man/vec_seq_along.Rd 57ef40dec90ececb810274f2e7f703d7 *man/vec_size.Rd 5900f6943b8e1acf71f348baec307aee *man/vec_slice.Rd 45bfe942611373e1b51f27bd83a535c9 *man/vec_split.Rd a1c483af2aa4664b8757c8447bf03c59 *man/vec_type.Rd ce5376a2b112b89288b871e17c550d02 *man/vec_unchop.Rd 2b6faf07f1ecd9709c37d9ac6c14d6a0 *man/vec_unique.Rd 0440a0140b576ed3b869ee21ecf25897 *man/vector-checks.Rd 1ac3ccfefdee618d4e30507fd0e26932 *src/Makevars 5c4f7dd6b14d87bc5e6b0dda3dc51c63 *src/altrep-lazy-character.c 94ef46fff2076c0628c545ba00928573 *src/altrep-rle.c 959e5592d4c9a7b99abbbbb65f435c1f *src/altrep-rle.h 25aad487b851bb1cf86381928b317605 *src/altrep.c 3c9c19fa52da7b9e2384681543b043bf *src/arg-counter.c a761f81af3b62ea714088650c2452fba *src/arg-counter.h bbfe493e6bfb818b751d67f06bae5394 *src/arg.c 8db3587a39141a9f4d10be3ebdd61071 *src/arg.h a0d66ba3536b3059fa6af274074a3ed5 *src/assert.c 3fdf21ef694d4199166f4a8412a83a40 *src/assert.h e96874864ef689aa2112f8279a7150eb *src/bind.c 083a4d4c356a9a6d5d5a7348ed918fbc *src/c.c 2e7854ee099d2a97a64f95348387d7d5 *src/c.h 711dc7146229b7c1540a25406f7215be *src/callables.c 3031dd45801793ce5977100371d06142 *src/case-when.c 427f350600cd9e19014f0e6e28978e5d *src/case-when.h a7468aaaedc7fd178aac30cc76cb082d *src/cast-bare.c 4786b323c411d0a502cc5088c12790aa *src/cast-bare.h 746465bd4daf7e3e518117aac4632efd *src/cast-dispatch.c 97f9ea6c3e03726f4f3c7e01f9efa672 *src/cast-dispatch.h 005937860685d879ac776f593dbb7b0e *src/cast.c a179982441f8f33adbb8b989f0598f73 *src/cast.h 81bb0c1c583abcdedcef1fb4e1af848c *src/compare.c cd6c5a305948e04a771fc634aedeb166 *src/compare.h 7b324360af343c581e3feb384e5b5ed3 *src/complete.c e1ebe187a40acce72660350d75d574c1 *src/complete.h c1e25ae50801e47eaa36b6f649d7acae *src/conditions.c 4dd6c8cce37fed2743cb0e4de038c45f *src/conditions.h eec1742d9716cc9e97a25ff9295ba555 *src/decl/arg-decl.h 929b7e226ecd665392655c87a0b53b14 *src/decl/bind-decl.h af921587083480096e7bd36710e4310d *src/decl/case-when-decl.h f5840400de6cf0f29d23ccd1f5624e67 *src/decl/cast-decl.h 97ffd65ddd0467ca608b64b070822e82 *src/decl/compare-decl.h 4af40c03c0d1e0abb525bfc5c48e0b53 *src/decl/dictionary-decl.h 293d3e4c8453bfc07ba7323197171d76 *src/decl/empty-decl.h d2e4a68d80359da81d08f13eb8204784 *src/decl/encoding-decl.h b075b3fab89210fa7f50985ecbd4e8c3 *src/decl/equal-decl.h 37c2cafc0d795af35bfcd4ad0d0fb800 *src/decl/expand-decl.h 20bd2ba0fd1484b9d215693bee14af9d *src/decl/group-decl.h da0abe3c9f4720e735c04dba7020b156 *src/decl/hash-decl.h 19cad159299114d6f4fa4f655143845d *src/decl/if-else-decl.h 7affcadacd58c51956e2c12051becee2 *src/decl/interval-decl.h 286fff67f68c31352be17285d14d0762 *src/decl/list-combine-decl.h 1bf8cd7c320b7cb51fea8c55a4b06bd3 *src/decl/list-unchop-decl.h 2819da1b40a204cf28a881723b9365e4 *src/decl/match-decl.h 040c94470b1078814a75c0f7192b2e35 *src/decl/match-joint-decl.h 0b9a1f6068eb1a52be11cce018ab1731 *src/decl/missing-decl.h 8183bd4349e17260837f84b4b63fc863 *src/decl/names-decl.h ec590683b1c056cd23498fa915d422a1 *src/decl/order-decl.h 654108917edcf64d623cb901a6dbdbfa *src/decl/parallel-decl.h b8eaf318cb35e81a7687e386fb448025 *src/decl/poly-op-decl.h 064f3a42baaddcfee15ca581653a11b0 *src/decl/proxy-decl.h 88f29eac7a416ba50bd2823b6e6bb1f8 *src/decl/proxy-restore-decl.h 550ef255c5d7b10d37199d2cab8ce9a5 *src/decl/ptype-common-decl.h e0cf2d6986f6809586ef42a95a301299 *src/decl/ptype-decl.h 823945900f9fecbd2c4e324e4cc31cc1 *src/decl/ptype2-decl.h f9ee2140c89a3b46d21a57a4cc40dd91 *src/decl/ptype2-dispatch-decl.h 39f9dfa970d529d829aafecbdaeb311d *src/decl/rank-decl.h 67f18886fe64e6fe480cf05ea31939fe *src/decl/recode-decl.h dee83a2f55e2571233017bee6c1baefe *src/decl/rep-decl.h d41d8cd98f00b204e9800998ecf8427e *src/decl/rlang-dev-decl.h ccc116f10919ee64555f6fefe906107a *src/decl/runs-decl.h b84682f0c0e6b944d0b281da0efee3b4 *src/decl/set-decl.h 6bc28f0133e0d24ddd10e7cb2274faa7 *src/decl/shape-decl.h 05fb7ab36487fff3e7f897ffeed19f5e *src/decl/size-common-decl.h d3e8fd4675b913e19725972c4e2cbcf0 *src/decl/size-decl.h 3fed46e86269b3933f3010a406bc0d9c *src/decl/slice-assign-decl.h fa32dd09cfed1e5108349d6abca484ac *src/decl/slice-chop-decl.h 982d146cc9ba842da840a5148e5f0a14 *src/decl/slice-decl.h dd25c644fda90b433cb9e417932e3e22 *src/decl/slice-interleave-decl.h 433555dc73a06e3feee90942b96007c3 *src/decl/subscript-decl.h 1c4e53904d5ed995cae8864bfb363c24 *src/decl/subscript-loc-decl.h 1118874b8c129c708319c01377f53d9d *src/decl/type-data-frame-decl.h f68cd5eba5e27490973538366080e5f4 *src/decl/type-info-decl.h 7680470962f71a166726dcd0f737fc25 *src/decl/type-integer64-decl.h 373619649f37a04eca81cd284b208c85 *src/decl/typeof2-s3-decl.h b438c6f0c7e84d50a2e8d80ba40244a2 *src/decl/unspecified-decl.h ef7f38c88e5730928fb0be4e2e2a57de *src/decl/utils-dispatch-decl.h c48c0e054f11ccdaf79e3ae83523705f *src/dictionary.c 4abb897af0d3794f3ef8ce587fdb05c3 *src/dictionary.h 2b3cf083097c5a1877b4afe04521abde *src/dim.c 3298a10daf357000330d2a80f94d62c8 *src/dim.h 950a06022f25325773266094620ced0b *src/empty.c ae50eb569dbc09fb936fa5380ec70f50 *src/encoding.c fbb4e5eac67ee31c258d898f9a934aa2 *src/encoding.h e0548f21816bb24871cedce171e86215 *src/equal.c 582da22d0624829b3e4b23ef0dfe8657 *src/equal.h fde6f68d787dd7e8dabae6cde3191cb2 *src/expand.c ede349cc5a6b749e0e8a7fb4bc23bc4e *src/expand.h a9d40a84cc373f66e70ed18962569343 *src/fields.c 2df4677e0457d9778f94944d89a15655 *src/fill.c a0951cadc85a49f04002fabef6713745 *src/globals.c 3ce6d340685a24d16a110c3473dba929 *src/globals.h 3890d7d7d011f470766789b4bab86b38 *src/group.c 32b9453f80cb11322c6d39586d5483d6 *src/growable.c d999e88654a41d30aafc88828b3d5c60 *src/hash.c 512f9cc6f12790d3d4505d578a71de17 *src/hash.h ac8f40b12a03a5172f84ca812339800f *src/if-else.c f2b50b10b21592ff39fcc20580008e09 *src/if-else.h 769aa699808aaf9d870d149d060fb746 *src/init.c 603be3fe39a682f1b768bcb225575c4e *src/interval.c d764e771de3831539365136d73abd415 *src/lazy.h 878bd80b263a48a1b635c62fa297350c *src/list-combine.c 8c2c1921adbb72b0cbc6b8bf4efa16cc *src/list-combine.h 8cbd40a3a5445fca12ea5fb4586280ba *src/list-unchop.c 92bacee5ef1bb94865d614eddf5af40d *src/match-compare.h bac90d61ab66b89ab3e143fc48408c72 *src/match-joint.c 3c3253e8f0505f5fbebf757fd4029abe *src/match-joint.h dd4896e77201f00ce52d9064c602978d *src/match.c ff81f4f529189c02a877a601e953a0a1 *src/missing.c ebff739c7d8c8d139ea046c5bf20aa0d *src/missing.h 7115df9a0a8b12a531617dfc6e75bea0 *src/names.c f34a3c1cbf0ffcfc5e332210fd8b76a1 *src/names.h baa9b1251c6689540fe992d9c524d40c *src/order-collate.c 00b21986bcf0db18e425a9b688f17167 *src/order-collate.h 5424e3243c187ec2843b476e58016299 *src/order-groups.c b62509f182818d16384a7b7940aa53d0 *src/order-groups.h 158536568480888dbc3c7e12db16262e *src/order-sortedness.c 50ed94306d7dd961a1ccea9caf22fb74 *src/order-sortedness.h 65cf6735eb63666e639033721d108460 *src/order.c 375371c00f00f09ff32588a8a4094e7a *src/order.h e612d8ea3990c51a2ce939c9ef182fa2 *src/ownership.h 59663c94a7e39a82cd6744526ea0e6ae *src/parallel.c 122395bf336f5bb2d431a735227a5bc7 *src/parallel.h 5c056394f679da5cd87650716c0ad335 *src/poly-op.c 71fccd7bb4f44d91cd2192b35f046e61 *src/poly-op.h 4fa2140d7e115876c08427cb98c0498c *src/proxy-restore.c ab734701ca1039ae9dde8cd4baf48941 *src/proxy-restore.h d596e6695fbfe8f51db533dd2174f72e *src/proxy.c 17d8d3155bcf4dbb6b3d800bd35b65b4 *src/proxy.h a443cb845da924771da9442c0a167c78 *src/ptype-common.c 1a79d0c8e2593be72033e404bdbf289e *src/ptype-common.h 9ac63b9dba5f8c0d441271a234717425 *src/ptype.c dadcd154897f739353d448a24c425bf6 *src/ptype.h 0a932d7bcf57de229f01952a77b10199 *src/ptype2-dispatch.c e26e009263f81757151c4e89b17f43f7 *src/ptype2-dispatch.h 09c89878d33d8c84ad849ccd65d1ad0f *src/ptype2.c ca9be87e596496035313df9bf6e7ade5 *src/ptype2.h b9fe599d5a6d93b755fa62c01a908b2e *src/rank.c 10acfa5b332088013ac63c33fb7b0965 *src/recode.c c05300680ffd43d5677280dd72fc9d6c *src/recode.h 4f6bbdee0dd94adcb770d7e505c1a115 *src/rep.c 493a8a114e46a07b1fa78a17cfcaa166 *src/rep.h 81455781e011851726af965360b8635f *src/rlang-dev.c 76880e6ab9b473cc6502de3571116e03 *src/rlang-dev.h f675812d8927e9b13fce5f1e8ed31f1b *src/rlang.c 0608535fc0aaf4efdf5a51912e6a6c84 *src/rlang/arg.c f33102fbb69a55bf8a314cd6cea529bd *src/rlang/arg.h 032997de0eb7a70acd881d9a53b6efb3 *src/rlang/attrib.c 3e42584f7654b259dd1b49842b3ef33c *src/rlang/attrib.h 9c9b7d12443672c8dee0a4615b56c2fc *src/rlang/c-utils.c bf583f300a656f07bb842e3f25b96107 *src/rlang/c-utils.h 22e5d851558dcb490b87b5ffebbcb6f8 *src/rlang/call.c 291751537e6eb339c08899eca084dee6 *src/rlang/call.h 7e95330c7c69a3090b2c3f6b62412b0c *src/rlang/cnd.c c323a0eff3440f67d2652c7d9f7ba22e *src/rlang/cnd.h b176a262a2ea2e884f4914289466de7b *src/rlang/cpp/rlang.cpp 29bd3e7d88cdb8a4a641f05ee089069c *src/rlang/cpp/vec.cpp e087224ebfb79ff325c2224665ab8a90 *src/rlang/debug.c ada02ec97cd21a5717e586f6be114b76 *src/rlang/debug.h c4c0b92858e604f1e651c5aa247ac5b0 *src/rlang/decl/cnd-decl.h c42446da8c4955c10b5acc67ec1ee130 *src/rlang/decl/df-decl.h 1aa66d903f3a4cb578ca16d7188511c6 *src/rlang/decl/dict-decl.h bcd3be7be1e7d4c9056adc78687c9f71 *src/rlang/decl/dyn-list-of-decl.h 9069f6115ab0b8cc64574cb3dfd5678a *src/rlang/decl/env-binding-decl.h acb47374357e2572e2c74ace4bcf5739 *src/rlang/decl/env-decl.h bf8df189e2ff68b0c0fe62d076814545 *src/rlang/decl/obj-decl.h 5cbb39aebac52f777c30ab65c94b2461 *src/rlang/decl/stack-decl.h e235b255aac77f597c1b0ea723f628e3 *src/rlang/decl/walk-decl.h db6c3d3e9e9424851c462289c9f08032 *src/rlang/df.c 59d548d90ba18659db46a8d6407913f1 *src/rlang/df.h 543a9da88f455acab0e00bed195c33ea *src/rlang/dict.c 1cf19f35f85a08af0c2c18bccb8fcb29 *src/rlang/dict.h ca299d0a21f7c1b3354f1af93f663929 *src/rlang/dyn-array.c 3a77133e2b00de3f8bb61adbae4510f4 *src/rlang/dyn-array.h aadc788be09a2bd9a565c3e825eb428c *src/rlang/dyn-list-of.c 3b0bab5449ddb404e1ea6365c7eb38af *src/rlang/dyn-list-of.h 47a9a0e3ab73162187997cfdb0fbd82d *src/rlang/env-binding.c e7386f3f3b48887c5654c13e5336429a *src/rlang/env-binding.h 6841b5e1e0c4c430525052cf8016bd1d *src/rlang/env.c 3a5f4e5731304faf675e782360c935e3 *src/rlang/env.h 3d14354720fb5e921e054b499c477fff *src/rlang/eval.c 4ca79fe26c1b1cfd826dbc1bd8db3709 *src/rlang/eval.h 01f0a156858ea14578547720b4e94232 *src/rlang/export.c 9f78f01cd55ea638136022dd7bb6eeb1 *src/rlang/export.h ec36cc014954270b6b827f3844f5cf1b *src/rlang/fn.c afe3dad269ac797b1faa96bd5b374e93 *src/rlang/fn.h edbdbd626761de992dbb9f7a3b98b65d *src/rlang/formula.c b1fe913e42251fedac387df505833e8a *src/rlang/formula.h 97d8e16a62839f989c53127d4339efb6 *src/rlang/globals.c a4832cd11e7ced32e2e5bbca7cef8a7f *src/rlang/globals.h 883cf705478e061c810b25eec596eff6 *src/rlang/node.c 838e203b8802e4be9b09e03f2ddb3ac0 *src/rlang/node.h 6c50f61ab770398065ef1cc3992a9978 *src/rlang/obj.c a9d225f90fcbc919915f5a2364279151 *src/rlang/obj.h 4576f3bc9e1dafe198460d4c2b0e88f4 *src/rlang/parse.c fbbf64374cf70d42e7040794e1b30c7c *src/rlang/parse.h a5e865607e4e717784f5f951533d372c *src/rlang/quo.c 91e7e7c43c3edea2509fb3d1bbc3b81c *src/rlang/quo.h 1603af28dc8aec3d8111a5f8b38ebf1c *src/rlang/rlang-types.h 69de9a2af3bc3255ca756728aea90a70 *src/rlang/rlang.c 1df8d5d44c1f9339c7ef93a437aa5a57 *src/rlang/rlang.h a24cd0a56d6993488d143ed8f5ffb7d7 *src/rlang/rlang.hpp 3115b50ab84dff920d09f95b4f73e395 *src/rlang/session.c c4f658b2f142941877450cc6a721942c *src/rlang/session.h 6225f1c2d3182e4bb957b8f1aba27937 *src/rlang/stack.c 3205bebc161f15bb38b38d88df1d6812 *src/rlang/stack.h e08266d8bec196133ce136c495e7f4ad *src/rlang/state.h 267ea357e68ff19af4f151c76b8c21c3 *src/rlang/sym.c 6a3a6aa98e58a296b6a86a177ad1b0a9 *src/rlang/sym.h 6ec4b806d0cad8eddae9250f2cbf3231 *src/rlang/vec-chr.c ae5f17aa96c3e532b67f3b9448ef5037 *src/rlang/vec-chr.h bf006c89dcffdc61228a385ee5ad342f *src/rlang/vec-lgl.c fda191ce4e58fefe29b8d9a734a6b7ef *src/rlang/vec-lgl.h 59cbf3a32be65ef4a5e95aefc9cc8afb *src/rlang/vec.c 32a20d759795163abe023042889cd8cf *src/rlang/vec.h aad9cb50156b9ab0f02ea91fb31db1b7 *src/rlang/vendor.c 1f93206ea8ca8cab1275e816347e852a *src/rlang/vendor.h e6050a519de98f82529d300b78a19117 *src/rlang/walk.c 83491b8bfe5110dda8e27b4fe2041399 *src/rlang/walk.h aaf7eef0673459c6aca58d54d2c6a8ab *src/runs.c d1ba3e476de9acbc56add4a5f05be91f *src/runs.h cce7a01860f96f03d86d402661ac1a74 *src/set.c 911c713ec8ef667d54a935105c5c15b4 *src/set.h 8c6d54d7048003508c1302d9c72259a3 *src/shape.c 4e716d14080c0897a04032a1000f5fa7 *src/shape.h 95f88ec4682c2a99f2d56a9572fa2085 *src/size-common.c 36757a994bcdd900a7faae19f39fd336 *src/size-common.h 50152ae85a750c7432450d008dece536 *src/size.c 70c1a056935b4afb15034cc4182632cb *src/size.h 244795f2c66fc1b97c6144e8fcf169e5 *src/slice-array.c 4beb6ddf2b1db093bb80e52c34e0dd3e *src/slice-assign-array.c c672f17f3402f58d67ab12d560da504e *src/slice-assign.c 1623d2c67cd1d3f96f0ebf56bc76dfee *src/slice-assign.h e895ab068ef747f19580e7054db96228 *src/slice-chop.c 358c27e046a4da2fd1e2715994c48077 *src/slice-chop.h 4a57fdabb671be40f9c4dcd387ca68c4 *src/slice-interleave.c 4e7f7a220582153f6fb2b9e8b5890980 *src/slice-interleave.h 67b7067cdd112ffff068318305435857 *src/slice.c a15eaf122bb3b8f8a0fe7ddc569349f8 *src/slice.h 3ee26e1d25d8d496c04742ffb4397354 *src/split.c 639ca0f3947bde4eeb671831421a9b84 *src/strides.h 0258f7e83799e5f89b8dee53b63240cb *src/subscript-loc.c 8c53dba4633aeaae89e1ce3778ad2780 *src/subscript-loc.h cc6a305c27dbb57f0792ad44b18f7eba *src/subscript.c 617f4f6807023f20d8556896e5e9f3a2 *src/subscript.h e531dac11054e7e5bc951dce1b940973 *src/type-complex.h f584df904c5e363d0858ec8e95bdcd25 *src/type-data-frame.c c712748c200299a0345eb2cd369880ba *src/type-data-frame.h 6ed57237aadec21779c43f43280acf29 *src/type-date-time.c 936d5393728118799618259e9015f273 *src/type-factor.c 0297ec91ffb729ed371b96e60f479b32 *src/type-factor.h 870fa35aa2dcd1d1e9b33543596ec921 *src/type-info.c 8c817d61d686f67b6e96c289e1d751a4 *src/type-info.h b84ff44169a9872fd1d249e75ac3eb26 *src/type-integer64.c 38f29aa3c41a29e872fbedf8605d1845 *src/type-tibble.c 28ed2e746b7f23f5a6a0f1d8dfd7cb7d *src/type-tibble.h d3623e72c54d9f737854bfeabc2b9c48 *src/typeof2-s3.c e74459d774e495cb76aa341db3105dc8 *src/typeof2-s3.h d801f5b1745be1548adb5fae28290080 *src/typeof2.c f0cad8b72a42ea05f555e1f2a6ad2f82 *src/typeof2.h c7137e0edee2bf0572c71243e8021199 *src/unspecified.c eb232081ec64d1a993015d4f6cb1e8fa *src/unspecified.h 9fe0b4c41b6546790ac050493b6d2bfc *src/utils-dispatch.c d4b56393cfba18d7c55cdb8c9ce5f182 *src/utils-dispatch.h 7a4dd479e119dcec1099272ee95de1ec *src/utils.c d4ecad0f70f4d37b7c2e13db17b55333 *src/utils.h a4010f4dad6c08a08c819a68619c06f4 *src/vctrs-core.h d8863bf3f555dffe16fae54c6b351306 *src/vctrs.h fa10273e066c29ff70e18a49cad0e508 *src/vec-bool.h e949168c2ec3c8f10f0efb88420955e3 *src/vec-int.h 6fa3c9d9579894e4c3a6838f04733401 *src/version.c 8ed8f6a8bd8f35f801d615323a1830a6 *tests/testthat.R 10f848ae4fbfd2b016307e01cc0759c5 *tests/testthat/_snaps/assert.md ecf9ad92531426beb5f002d59ed538c0 *tests/testthat/_snaps/bind.md c11fbc7a7e0d96e0041f35bd69a53f5e *tests/testthat/_snaps/c.md ccb5868940046fac78f7d9bc62f83e6c *tests/testthat/_snaps/case-when.md 5b47ccd0b62f7b3fe4c9c675617506c1 *tests/testthat/_snaps/cast.md 095a349d8d4687659ebee35cff31e16d *tests/testthat/_snaps/compare.md 17ae75d61ddfe41de3b27a2545ba91ff *tests/testthat/_snaps/complete.md a6731badf24ee1a25909a2a277452ccb *tests/testthat/_snaps/conditions.md 8a8b789f2314fa86c9f6c7c680d1a9fa *tests/testthat/_snaps/dictionary.md d23f7ffe26db8f1cb0254ba16230bcf7 *tests/testthat/_snaps/empty.md 171bf01bc8a5f1dc2c10cfa78a8bda09 *tests/testthat/_snaps/equal.md 8cac0051b948342b449d95201f1a22b3 *tests/testthat/_snaps/error-call.md f8408ae72d7fd8eef8e16353db3eeb8a *tests/testthat/_snaps/expand.md 1c8913fc381a13e1e87f68340c8ec71a *tests/testthat/_snaps/group.md 3e9f1f3efa867cf57cbfd59d80985427 *tests/testthat/_snaps/hash.md b0964355424e98dfe078507ba1b9f7b2 *tests/testthat/_snaps/if-else.md 3244f798510f053b0c0253ef3baa7c73 *tests/testthat/_snaps/interval.md 5eb46ec1f853d51dd0232bc6731941ed *tests/testthat/_snaps/lifecycle-deprecated.md 6804b9edface56dddc7edfe538b46c37 *tests/testthat/_snaps/list-combine.md b7a5cd9502207f22234f75fdf4cf3822 *tests/testthat/_snaps/list-of-transpose.md 72a49827fa2af10b21cc921483628a42 *tests/testthat/_snaps/list-unchop.md 0d0ea2eb109702fb053445d1ae9fa590 *tests/testthat/_snaps/match.md a5fb326175a3b1a8ffa7789b8daacef3 *tests/testthat/_snaps/names.md 376e5c53749aabd269289e8470b4dddd *tests/testthat/_snaps/order.md aba7bf71dcbd23d0f361a0fb977f50c0 *tests/testthat/_snaps/parallel.md 1f836fe52827822a61873a2066086f97 *tests/testthat/_snaps/print-str.md be55f35d347fc4444929098fc4dec7b0 *tests/testthat/_snaps/ptype-abbr-full.md c09212c542ea19decfb51111c437cb35 *tests/testthat/_snaps/rank.md 29eeb2145b2789c8071323759b061c82 *tests/testthat/_snaps/recode.md e9090f4e32809dc8156dab83932a2b87 *tests/testthat/_snaps/recycle.md 302aa2345bc8e160696af226c8fdca00 *tests/testthat/_snaps/rep.md c1dd334b3e5c4e2014264c510ad3d333 *tests/testthat/_snaps/runs.md 9216d41bbcd072e7c70eed3355326984 *tests/testthat/_snaps/set.md 0dd03b8a4ec7d22cbaff17e8792887d6 *tests/testthat/_snaps/shape.md 5ac529d16617ed2208bfed986a597403 *tests/testthat/_snaps/size.md 82e83d5c091924ef502a844b33782791 *tests/testthat/_snaps/slice-assign.md ae71564644bf60836b1622bf4c74cf59 *tests/testthat/_snaps/slice-chop.md 7e70edb6cead5046d492d9d5bfad857a *tests/testthat/_snaps/slice-interleave.md aa77cbf63b049b0a673e26c6848ad631 *tests/testthat/_snaps/slice.md c4961acb4782c454d9dc0d5545628819 *tests/testthat/_snaps/subscript-loc.md ca0504fb5ae7d131dd3e80700e961a77 *tests/testthat/_snaps/subscript.md 03ccb96343f8abbd5da6f1dc8f3e85c2 *tests/testthat/_snaps/type-asis.md a2160e562be4ebd1e519f070a8f5ebd5 *tests/testthat/_snaps/type-data-frame.md ea3a59312ad55942deadec7b6708e2cd *tests/testthat/_snaps/type-data-table.md e90f78c52769831ea831cb590d2a6fb0 *tests/testthat/_snaps/type-date-time.md d8c3e6040f0ac0aa9ba06bcdf97545a1 *tests/testthat/_snaps/type-factor.md 6e9f865114dc26d2cecc7c352a19463a *tests/testthat/_snaps/type-idate.md 2630bc58b90b4640e909a486773d64ee *tests/testthat/_snaps/type-list-of.md d3b2f0b00f269145e8dc7e9fd4e1a3da *tests/testthat/_snaps/type-misc.md 7a96b1e292942be1722196424e8b924b *tests/testthat/_snaps/type-rcrd.md 42ceb2100a3ba4e0935e78a14af5ba1a *tests/testthat/_snaps/type-sf.md e83d5684f802acdfd5b1b26d903a640f *tests/testthat/_snaps/type-table.md 7dac22d321c2d7963974dee2bc498afc *tests/testthat/_snaps/type-tibble.md 4070cfc552068f834bf164dc7bb6c98c *tests/testthat/_snaps/type-unspecified.md 27d77098d05716a4a24928a504ffabe7 *tests/testthat/_snaps/type-vctr.md 311424a3bad04f0c5a44a2c79788ddc7 *tests/testthat/_snaps/type.md ebb4758aabfd5c1e4944ae12de654017 *tests/testthat/_snaps/type2.md 77de33e8c8693d37f99a280ea599a7b0 *tests/testthat/helper-c.R 08695ffa46e394a7330adce6962c7e07 *tests/testthat/helper-cast.R 7f9b29a481e9f6a85b0f051625eb6de8 *tests/testthat/helper-conditions.R 851263fa302d3e71a6c6bcce12b3fee4 *tests/testthat/helper-encoding.R 1b69ec8f03749708b08732f174959f23 *tests/testthat/helper-expectations.R 0140fe16ca92f6ccc106b0a653a8e6d0 *tests/testthat/helper-if-else.R 7eef47451ae4ef6e4436992be1ca42e5 *tests/testthat/helper-list-combine.R eec91a5285332b37236367ea1861b74b *tests/testthat/helper-list-of-transpose.R ead65126fe8184d9cddc7f63cff8e786 *tests/testthat/helper-memory.R 87aa31dfdb729ace70d16bcafeb1d67d *tests/testthat/helper-names.R 38d40d76f56b6e6e59caab46790e1cff *tests/testthat/helper-order.R acccb888200b46aee1cbccb3b196dfe9 *tests/testthat/helper-performance.R 1c51eac6a9c2059e5226acc5e421b322 *tests/testthat/helper-rational.R 6c7c09bb9ba81f9d4b8f122792b26656 *tests/testthat/helper-restart.R 224fe051010ee8f288d6a700fa345770 *tests/testthat/helper-s3.R 304f19ca28a88dd55d4ebc9187b3be5c *tests/testthat/helper-s4.R de9467f2b170e23da8bdba8c747de64b *tests/testthat/helper-shape.R 08de2e3c3356bb1199ec01f7840186ff *tests/testthat/helper-size.R e05655ee9cc75972c2be428f7a6efee7 *tests/testthat/helper-type-dplyr.R 4e96c9aba5d25277c76f2eafbf4797a0 *tests/testthat/helper-types.R 5a070c45743eb33996257472ea57a8bc *tests/testthat/helper-vctrs.R d0509b42f4a6669cf24cf3897576c575 *tests/testthat/test-arith.R 505a8556120a76736f169629f62160ac *tests/testthat/test-assert.R 71a4f2ac3a3e52092356a4f389cf3304 *tests/testthat/test-bind.R d2f76479cac77c26538f0cf5b046e060 *tests/testthat/test-c.R 9d1b3ed20a56765b4c6bd1ed9b3ddf5f *tests/testthat/test-case-when.R 8d0155003c6fb700ed3f35ea62cdec66 *tests/testthat/test-cast.R a16b660ccd2db8d1c576e86f46e56c16 *tests/testthat/test-compare.R 6e98dd853ae4f780ab01ece3ddefa99f *tests/testthat/test-complete.R 212fe3c9ae8cb65074080772e0260cc1 *tests/testthat/test-conditions.R c1020821c695a9225a50d707e89a4806 *tests/testthat/test-dictionary.R b8df5fc1c46481528f87107e788a3ad5 *tests/testthat/test-dim.R c159a66c99612e39b13232c167ff1e6d *tests/testthat/test-empty.R 689d6a93d9200a22dbb806bf01f757db *tests/testthat/test-encoding.R 101a3770331dd13aecd2d2c6a1baefb2 *tests/testthat/test-equal.R b7dd6e43a8684914ded5a1a7b7537ec5 *tests/testthat/test-error-call.R 4972ba8bc9a1a2638b030ba304524cc6 *tests/testthat/test-expand.R b969427130409849c1ff5935f5107d80 *tests/testthat/test-fields.R 43b67867bf30f6de3ea92d957e5a8087 *tests/testthat/test-fill.R ed67a9146b3c48b8efc1e5e13f12da76 *tests/testthat/test-group.R c78ab3511935da4bef39a4997220c75c *tests/testthat/test-hash.R 0477568d36f78c03ea6fa8b507fa01d6 *tests/testthat/test-if-else.R 4c52894709751ffedc1a990e8d1c9d5e *tests/testthat/test-interval.R 2f74473329b433cfc44debe9c8b9cd0a *tests/testthat/test-lifecycle-deprecated.R e0ef3464b44128c66a147f866a8df38e *tests/testthat/test-list-combine.R b130c1afe6f632c89d17f3cc5338cf4a *tests/testthat/test-list-of-transpose.R 7598c24a654c3d362e2a864ac6f91d1c *tests/testthat/test-list-unchop.R 3444fa58de8a0435d41648d25b65542b *tests/testthat/test-match.R b5614140aa4268610647fc87686f3344 *tests/testthat/test-missing.R c3b4fc3088b6849dc43ded1b64032121 *tests/testthat/test-names.R 2cdb260f06b9b11c3ce8d601705e34a8 *tests/testthat/test-order.R 46523c4d3993a03bab557d2db5dec117 *tests/testthat/test-parallel.R 41f46f06e4d4a1b3a0dc3bf9ae8f9568 *tests/testthat/test-print-str.R f7521b48ce1be01b3ea84c5a451087e2 *tests/testthat/test-proxy-restore.R 3b31cbcc0e8bdc82f853cf2015592324 *tests/testthat/test-proxy.R e0b1c0a95f8a9758c0af8426c8c8dbfd *tests/testthat/test-ptype-abbr-full.R df69f7eb73fd07d1a2285ba5ff8c6ebe *tests/testthat/test-rank.R 72cc1e0cd6abbab35680b32d2095cd09 *tests/testthat/test-recode.R 674b9b0a309af4ef2eabf1b4526761d0 *tests/testthat/test-recycle.R ec405a5ce9d001d86444c1d644e0728f *tests/testthat/test-rep.R 8727bd0cba54aca4fee82f0d13b48e3f *tests/testthat/test-runs.R 1edf63284c9fd3a48308f7a7dd9b2986 *tests/testthat/test-s4.R cbc46c8d34855b5af2f73992314dbcfe *tests/testthat/test-set.R 7875155433f1be618e267ec4579ed254 *tests/testthat/test-shape.R 179d5dc330eff7a39983d449122d8a6f *tests/testthat/test-size.R 4777e3b3ab7bc44f3999f52498788d70 *tests/testthat/test-slice-assign.R 91d51ea09d61541684570fa35178d363 *tests/testthat/test-slice-chop.R ef64671185af631ced34a4c832b276cc *tests/testthat/test-slice-interleave.R 792b0cbcb54815feda0f4832dab6b6b7 *tests/testthat/test-slice.R 196f2157283fb712306369b83cef5431 *tests/testthat/test-split.R da46c9976ca015da2bab74285f697f96 *tests/testthat/test-subscript-loc.R 7de194b11fadd308d51ff52aa193a43e *tests/testthat/test-subscript.R fc6ebe4586bd53b0e8b70ada1053bbeb *tests/testthat/test-type-asis.R 03a71d1ac7451cee063eb2cba3ceb381 *tests/testthat/test-type-bare.R a800d0d28faf22bf839798a95faea9a9 *tests/testthat/test-type-data-frame.R 051c2a8ab161e9f2ded850a1331e5275 *tests/testthat/test-type-data-table.R bc66a1fd1e998769a611442dbe08e79a *tests/testthat/test-type-date-time.R ff3afe61fc617323223db4cb385b5855 *tests/testthat/test-type-dplyr.R 3d1630cf3627a08aefa0192c26a27278 *tests/testthat/test-type-factor.R 6558c2c61d06b507b657898cdf4229d6 *tests/testthat/test-type-idate.R 49a1527a03b3b13a73b937eb6a7ff69d *tests/testthat/test-type-integer64.R 64aa45e61601f8effeabbe7e36f3d071 *tests/testthat/test-type-list-of.R 6adcaf979dd2b60a7b34202a4fc8b997 *tests/testthat/test-type-misc.R 03668e67c5b17350c50cf0d898b42924 *tests/testthat/test-type-rational.R 6cf67629b0d651a3ba3181bf19c3516b *tests/testthat/test-type-rcrd.R 652c6effd0c40a9f3ce47669ec569520 *tests/testthat/test-type-sclr.R 2ce9dec8f57f84f351ac5c4268303fb6 *tests/testthat/test-type-sf.R 8cf6beed5080e6f7383a469c6780c107 *tests/testthat/test-type-table.R cdc0d68533b10a8b848fb32d4af96bc1 *tests/testthat/test-type-tibble.R e391d8f927c40c97c9c6305532572ca9 *tests/testthat/test-type-unspecified.R 25a935512876a294b0c858ee166115ee *tests/testthat/test-type-vctr.R 7d00ca2129bd6b071c59a14ddae41560 *tests/testthat/test-type.R 83388c0c44507fab409fb7cae8881b15 *tests/testthat/test-type2.R 366e51f4f2457bd1416faf7515453213 *tests/testthat/test-utils.R 467829c51c041a5c9985bfc03c451096 *tests/testthat/test-vctrs.R 7dec628565582a504f5aa9744f865cd9 *vignettes/pillar.Rmd 25c232a97efcf11ce02678a722282e43 *vignettes/s3-vector.Rmd 4839ddd2a359470622d6edcca6c49edf *vignettes/stability.Rmd 317b475c17b8b2a70f68cca0402f9eb9 *vignettes/type-size.Rmd vctrs/R/0000755000176200001440000000000015157004241011613 5ustar liggesusersvctrs/R/set.R0000644000176200001440000001036615072256373012552 0ustar liggesusers#' Set operations #' #' @description #' - `vec_set_intersect()` returns all values in both `x` and `y`. #' #' - `vec_set_difference()` returns all values in `x` but not `y`. Note #' that this is an asymmetric set difference, meaning it is not commutative. #' #' - `vec_set_union()` returns all values in either `x` or `y`. #' #' - `vec_set_symmetric_difference()` returns all values in either `x` or `y` #' but not both. This is a commutative difference. #' #' Because these are _set_ operations, these functions only return unique values #' from `x` and `y`, returned in the order they first appeared in the original #' input. Names of `x` and `y` are retained on the result, but names are always #' taken from `x` if the value appears in both inputs. #' #' These functions work similarly to [base::intersect()], [base::setdiff()], and #' [base::union()], but don't strip attributes and can be used with data frames. #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' #' @param x,y A pair of vectors. #' #' @param ptype If `NULL`, the default, the output type is determined by #' computing the common type between `x` and `y`. If supplied, both `x` and #' `y` will be cast to this type. #' #' @param x_arg,y_arg Argument names for `x` and `y`. These are used in error #' messages. #' #' @returns #' A vector of the common type of `x` and `y` (or `ptype`, if supplied) #' containing the result of the corresponding set function. #' #' @details #' Missing values are treated as equal to other missing values. For doubles and #' complexes, `NaN` are equal to other `NaN`, but not to `NA`. #' #' @section Dependencies: #' #' ## `vec_set_intersect()` #' - [vec_proxy_equal()] #' - [vec_slice()] #' - [vec_ptype2()] #' - [vec_cast()] #' #' ## `vec_set_difference()` #' - [vec_proxy_equal()] #' - [vec_slice()] #' - [vec_ptype2()] #' - [vec_cast()] #' #' ## `vec_set_union()` #' - [vec_proxy_equal()] #' - [vec_slice()] #' - [vec_ptype2()] #' - [vec_cast()] #' - [vec_c()] #' #' ## `vec_set_symmetric_difference()` #' - [vec_proxy_equal()] #' - [vec_slice()] #' - [vec_ptype2()] #' - [vec_cast()] #' - [vec_c()] #' #' @name vec-set #' @examples #' x <- c(1, 2, 1, 4, 3) #' y <- c(2, 5, 5, 1) #' #' # All unique values in both `x` and `y`. #' # Duplicates in `x` and `y` are always removed. #' vec_set_intersect(x, y) #' #' # All unique values in `x` but not `y` #' vec_set_difference(x, y) #' #' # All unique values in either `x` or `y` #' vec_set_union(x, y) #' #' # All unique values in either `x` or `y` but not both #' vec_set_symmetric_difference(x, y) #' #' # These functions can also be used with data frames #' x <- data_frame( #' a = c(2, 3, 2, 2), #' b = c("j", "k", "j", "l") #' ) #' y <- data_frame( #' a = c(1, 2, 2, 2, 3), #' b = c("j", "l", "j", "l", "j") #' ) #' #' vec_set_intersect(x, y) #' vec_set_difference(x, y) #' vec_set_union(x, y) #' vec_set_symmetric_difference(x, y) #' #' # Vector names don't affect set membership, but if you'd like to force #' # them to, you can transform the vector into a two column data frame #' x <- c(a = 1, b = 2, c = 2, d = 3) #' y <- c(c = 2, b = 1, a = 3, d = 3) #' #' vec_set_intersect(x, y) #' #' x <- data_frame(name = names(x), value = unname(x)) #' y <- data_frame(name = names(y), value = unname(y)) #' #' vec_set_intersect(x, y) NULL #' @rdname vec-set #' @export vec_set_intersect <- function( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) { check_dots_empty0(...) .Call(ffi_vec_set_intersect, x, y, ptype, environment()) } #' @rdname vec-set #' @export vec_set_difference <- function( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) { check_dots_empty0(...) .Call(ffi_vec_set_difference, x, y, ptype, environment()) } #' @rdname vec-set #' @export vec_set_union <- function( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) { check_dots_empty0(...) .Call(ffi_vec_set_union, x, y, ptype, environment()) } #' @rdname vec-set #' @export vec_set_symmetric_difference <- function( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) { check_dots_empty0(...) .Call(ffi_vec_set_symmetric_difference, x, y, ptype, environment()) } vctrs/R/size.R0000644000176200001440000001166615075743736012744 0ustar liggesusers#' Number of observations #' #' @description #' #' `vec_size(x)` returns the size of a vector. `vec_is_empty()` #' returns `TRUE` if the size is zero, `FALSE` otherwise. #' #' The size is distinct from the [length()] of a vector because it #' generalises to the "number of observations" for 2d structures, #' i.e. it's the number of rows in matrix or a data frame. This #' definition has the important property that every column of a data #' frame (even data frame and matrix columns) have the same size. #' `vec_size_common(...)` returns the common size of multiple vectors. #' #' `list_sizes()` returns an integer vector containing the size of each element #' of a list. It is nearly equivalent to, but faster than, #' `map_int(x, vec_size)`, with the exception that `list_sizes()` will #' error on non-list inputs, as defined by [obj_is_list()]. `list_sizes()` is #' to `vec_size()` as [lengths()] is to [length()]. #' #' @seealso [vec_slice()] for a variation of `[` compatible with `vec_size()`, #' and [vec_recycle()] to [recycle][theory-faq-recycling] vectors to common #' length. #' @section Invariants: #' * `vec_size(dataframe)` == `vec_size(dataframe[[i]])` #' * `vec_size(matrix)` == `vec_size(matrix[, i, drop = FALSE])` #' * `vec_size(vec_c(x, y))` == `vec_size(x)` + `vec_size(y)` #' #' @inheritParams rlang::args_error_context #' #' @param x,... Vector inputs or `NULL`. #' @param .size If `NULL`, the default, the output size is determined by #' recycling the lengths of all elements of `...`. Alternatively, you can #' supply `.size` to force a known size; in this case, `x` and `...` are #' ignored. #' @param .absent The size used when no input is provided, or when all input #' is `NULL`. If left as `NULL` when no input is supplied, an error is thrown. #' @return An integer (or double for long vectors). #' #' `vec_size_common()` returns `.absent` if all inputs are `NULL` or #' absent, `0L` by default. #' #' #' @details #' #' There is no vctrs helper that retrieves the number of columns: as this #' is a property of the [type][vec_ptype_show()]. #' #' `vec_size()` is equivalent to `NROW()` but has a name that is easier to #' pronounce, and throws an error when passed non-vector inputs. #' #' #' @section The size of NULL: #' #' The size of `NULL` is hard-coded to `0L` in `vec_size()`. #' `vec_size_common()` returns `.absent` when all inputs are `NULL` #' (if only some inputs are `NULL`, they are simply ignored). #' #' A default size of 0 makes sense because sizes are most often #' queried in order to compute a total size while assembling a #' collection of vectors. Since we treat `NULL` as an absent input by #' principle, we return the identity of sizes under addition to #' reflect that an absent input doesn't take up any size. #' #' Note that other defaults might make sense under different #' circumstances. For instance, a default size of 1 makes sense for #' finding the common size because 1 is the identity of the recycling #' rules. #' #' @section Dependencies: #' - [vec_proxy()] #' #' @export #' @examples #' vec_size(1:100) #' vec_size(mtcars) #' vec_size(array(dim = c(3, 5, 10))) #' #' vec_size_common(1:10, 1:10) #' vec_size_common(1:10, 1) #' vec_size_common(integer(), 1) #' #' list_sizes(list("a", 1:5, letters)) vec_size <- function(x) { .Call(ffi_size, x, environment()) } #' @export #' @rdname vec_size vec_size_common <- function( ..., .size = NULL, .absent = 0L, .arg = "", .call = caller_env() ) { .External2(ffi_size_common, list2(...), .size, .absent) } #' @rdname vec_size #' @export list_sizes <- function(x) { .Call(ffi_list_sizes, x, environment()) } #' @rdname vec_size #' @export vec_is_empty <- function(x) { vec_size(x) == 0L } #' Default value for empty vectors #' #' Use this inline operator when you need to provide a default value for #' empty (as defined by [vec_is_empty()]) vectors. #' #' @param x A vector #' @param y Value to use if `x` is empty. To preserve type-stability, should #' be the same type as `x`. #' @rdname op-empty-default #' @export #' @examples #' 1:10 %0% 5 #' integer() %0% 5 `%0%` <- function(x, y) { if (vec_is_empty(x)) y else x } # sequences ------------------------------------------------------------------- #' Useful sequences #' #' `vec_seq_along()` is equivalent to [seq_along()] but uses size, not length. #' `vec_init_along()` creates a vector of missing values with size matching #' an existing object. #' #' @param x,y Vectors #' @return #' * `vec_seq_along()` an integer vector with the same size as `x`. #' * `vec_init_along()` a vector with the same type as `x` and the same size #' as `y`. #' @export #' @examples #' vec_seq_along(mtcars) #' vec_init_along(head(mtcars)) vec_seq_along <- function(x) { seq_len(vec_size(x)) } #' @export #' @rdname vec_seq_along vec_init_along <- function(x, y = x) { vec_slice(x, rep_len(NA_integer_, vec_size(y))) } vec_as_short_length <- function(n, arg = caller_arg(n), call = caller_env()) { .Call(ffi_as_short_length, n, environment()) } vctrs/R/altrep-lazy-character.R0000644000176200001440000000221415105704057016140 0ustar liggesusers#' Lazy character vector #' #' `new_lazy_character()` takes a function with no arguments which must return #' a character vector of arbitrary length. The function will be evaluated #' exactly once whenever any properties of the character vector are required #' (including the length or any vector elements). #' #' A "real" production level implementation might work more like #' `carrier::crate()`, where the function is isolated and users must explicitly #' provide any data required to evaluate the function, since the time of #' evaluation is unknown. #' #' As of June 2023, running `x <- new_lazy_character(~ c("x", "y"))` in the #' RStudio console will call the ALTREP length method, which materializes the #' object. Doing this in a terminal session running R does not, so it is an #' RStudio issue. This doesn't affect tests run within a `test_that()` block. #' #' @param fn A function with no arguments returning a character vector. #' #' @noRd new_lazy_character <- function(fn) { fn <- as_function(fn) .Call(ffi_altrep_new_lazy_character, fn) } lazy_character_is_materialized <- function(x) { .Call(ffi_altrep_lazy_character_is_materialized, x) } vctrs/R/dim.R0000644000176200001440000000311315072256373012520 0ustar liggesusers#' Actual vector dimensions #' #' @description #' * `vec_dim_n()` gives the dimensionality (i.e. number of dimensions) #' * `vec_dim()` returns the size of each dimension #' #' These functions access the raw `"dim"` attribute of the object #' and do not dispatch over the [dim()] generic. #' #' @details #' Unlike base R, we treat vectors with `NULL` dimensions as 1d. This #' simplifies the type system by eliding a special case. Compared to the base R #' equivalent, `vec_dim()` returns [length()], not `NULL`, when `x` is 1d. #' #' @seealso #' `dim2()`, a variant of [dim()] that returns [length()] if an object #' doesn't have dimensions. #' #' @param x A vector #' @noRd #' @examples #' # Compared to base R #' x <- 1:5 #' dim(x) #' vec_dim(x) NULL # FIXME: Should `vec_dim()` return the size instead of the length? vec_dim <- function(x) { .Call(vctrs_dim, x) } vec_dim_n <- function(x) { .Call(vctrs_dim_n, x) } has_dim <- function(x) { .Call(vctrs_has_dim, x) } #' Perceived vector dimensions #' #' @description #' `dim2()` is a variant of [dim()] that returns [vec_size()] if an object #' doesn't have dimensions. #' #' @details #' Unlike base R, we treat vectors with `NULL` dimensions as 1d. This #' simplifies the type system by eliding a special case. Compared to the base R #' equivalent, `dim2()` returns [length()], not `NULL`, when `x` is 1d. #' #' @seealso #' `vec_dim()`, a variant that never dispatches over the [dim()] generic. #' #' @param x A vector #' @noRd #' @examples #' # Compared to base R #' x <- 1:5 #' dim(x) #' vec_dim(x) dim2 <- function(x) { dim(x) %||% length(x) } vctrs/R/empty.R0000644000176200001440000000074014713505651013105 0ustar liggesusers#' Drop empty elements from a list #' #' `list_drop_empty()` removes empty elements from a list. This includes `NULL` #' elements along with empty vectors, like `integer(0)`. This is equivalent to, #' but faster than, `vec_slice(x, list_sizes(x) != 0L)`. #' #' @section Dependencies: #' - [vec_slice()] #' #' @param x A list. #' #' @export #' @examples #' x <- list(1, NULL, integer(), 2) #' list_drop_empty(x) list_drop_empty <- function(x) { .Call(vctrs_list_drop_empty, x) } vctrs/R/type-factor.R0000644000176200001440000001350315065005761014203 0ustar liggesusers#' Factor/ordered factor S3 class #' #' A [factor] is an integer with attribute `levels`, a character vector. There #' should be one level for each integer between 1 and `max(x)`. #' An [ordered] factor has the same properties as a factor, but possesses #' an extra class that marks levels as having a total ordering. #' #' These functions help the base factor and ordered factor classes fit in to #' the vctrs type system by providing constructors, coercion functions, #' and casting functions. `new_factor()` and `new_ordered()` are low-level #' constructors - they only check that types, but not values, are valid, so #' are for expert use only. #' #' @param x Integer values which index in to `levels`. #' @param levels Character vector of labels. #' @param ...,class Used to for subclasses. #' @keywords internal #' @export new_factor <- function( x = integer(), levels = character(), ..., class = character() ) { stopifnot(is.integer(x)) stopifnot(is.character(levels)) structure( x, levels = levels, ..., class = c(class, "factor") ) } #' @export #' @rdname new_factor new_ordered <- function(x = integer(), levels = character()) { new_factor(x = x, levels = levels, class = "ordered") } #' @export vec_proxy.factor <- function(x, ...) { x } #' @export vec_proxy.ordered <- function(x, ...) { x } #' @export vec_restore.factor <- function(x, to, ...) { NextMethod() } #' @export vec_restore.ordered <- function(x, to, ...) { NextMethod() } # Print ------------------------------------------------------------------- #' @export vec_ptype_full.factor <- function(x, ...) { paste0("factor<", hash_label(levels(x)), ">", vec_ptype_shape(x)) } #' @export vec_ptype_abbr.factor <- function(x, ...) { "fct" } #' @export vec_ptype_full.ordered <- function(x, ...) { paste0("ordered<", hash_label(levels(x)), ">", vec_ptype_shape(x)) } #' @export vec_ptype_abbr.ordered <- function(x, ...) { "ord" } # Coerce ------------------------------------------------------------------ #' @rdname new_factor #' @export vec_ptype2.factor #' @method vec_ptype2 factor #' @export vec_ptype2.factor <- function(x, y, ...) { UseMethod("vec_ptype2.factor") } #' @export vec_ptype2.factor.factor <- function(x, y, ...) { stop_native_implementation("vec_ptype2.factor.factor") } #' @export vec_ptype2.character.factor <- function(x, y, ...) { stop_native_implementation("vec_ptype2.character.factor") } #' @export vec_ptype2.factor.character <- function(x, y, ...) { stop_native_implementation("vec_ptype2.factor.character") } #' @rdname new_factor #' @export vec_ptype2.ordered #' @method vec_ptype2 ordered #' @export vec_ptype2.ordered <- function(x, y, ...) { UseMethod("vec_ptype2.ordered") } #' @export vec_ptype2.ordered.ordered <- function(x, y, ...) { stop_native_implementation("vec_ptype2.ordered.ordered") } #' @export vec_ptype2.ordered.character <- function(x, y, ...) { stop_native_implementation("vec_ptype2.ordered.character") } #' @export vec_ptype2.character.ordered <- function(x, y, ...) { stop_native_implementation("vec_ptype2.character.ordered") } #' @export vec_ptype2.ordered.factor <- function(x, y, ...) { vec_default_ptype2(x, y, ...) } #' @export vec_ptype2.factor.ordered <- function(x, y, ...) { vec_default_ptype2(x, y, ...) } # Cast -------------------------------------------------------------------- #' @rdname new_factor #' @export vec_cast.factor #' @method vec_cast factor #' @export vec_cast.factor <- function(x, to, ...) { UseMethod("vec_cast.factor") } fct_cast <- function(x, to, ..., call = caller_env()) { fct_cast_impl(x, to, ..., ordered = FALSE, call = call) } fct_cast_impl <- function( x, to, ..., x_arg = "", to_arg = "", ordered = FALSE, call = caller_env() ) { if (length(levels(to)) == 0L) { levels <- levels(x) if (is.null(levels)) { exclude <- NA levels <- unique(x) } else { exclude <- NULL } factor( as.character(x), levels = levels, ordered = ordered, exclude = exclude ) } else { lossy <- !(x %in% levels(to) | is.na(x)) out <- factor( x, levels = levels(to), ordered = ordered, exclude = NULL ) maybe_lossy_cast( out, x, to, lossy, loss_type = "generality", x_arg = x_arg, to_arg = to_arg, call = call ) } } #' @export vec_cast.factor.factor <- function(x, to, ...) { fct_cast(x, to, ...) } #' @export vec_cast.factor.character <- function(x, to, ...) { fct_cast(x, to, ...) } #' @export vec_cast.character.factor <- function(x, to, ...) { stop_native_implementation("vec_cast.character.factor") } #' @rdname new_factor #' @export vec_cast.ordered #' @method vec_cast ordered #' @export vec_cast.ordered <- function(x, to, ...) { UseMethod("vec_cast.ordered") } ord_cast <- function(x, to, ..., call = caller_env()) { fct_cast_impl(x, to, ..., ordered = TRUE, call = call) } #' @export vec_cast.ordered.ordered <- function(x, to, ...) { ord_cast(x, to, ...) } #' @export vec_cast.ordered.character <- function(x, to, ...) { ord_cast(x, to, ...) } #' @export vec_cast.character.ordered <- function(x, to, ...) { stop_native_implementation("vec_cast.character.ordered") } # Math and arithmetic ----------------------------------------------------- #' @export vec_math.factor <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } #' @export vec_arith.factor <- function(op, x, y, ...) { stop_unsupported(x, op) } # Helpers ----------------------------------------------------------------- hash_label <- function(x, length = 5) { if (length(x) == 0) { "" } else { # Can't use obj_hash() because it hashes the string pointers # for performance, so the values in the test change each time substr(rlang::hash(x), 1, length) } } levels_union <- function(x, y) { union(levels(x), levels(y)) } vctrs/R/complete.R0000644000176200001440000000300514713505651013554 0ustar liggesusers#' Complete #' #' @description #' `vec_detect_complete()` detects "complete" observations. An observation is #' considered complete if it is non-missing. For most vectors, this implies that #' `vec_detect_complete(x) == !vec_detect_missing(x)`. #' #' For data frames and matrices, a row is only considered complete if all #' elements of that row are non-missing. To compare, `!vec_detect_missing(x)` #' detects rows that are partially complete (they have at least one non-missing #' value). #' #' @details #' A [record][new_rcrd] type vector is similar to a data frame, and is only #' considered complete if all fields are non-missing. #' #' @param x A vector #' #' @return #' A logical vector with the same size as `x`. #' #' @seealso [stats::complete.cases()] #' @export #' @examples #' x <- c(1, 2, NA, 4, NA) #' #' # For most vectors, this is identical to `!vec_detect_missing(x)` #' vec_detect_complete(x) #' !vec_detect_missing(x) #' #' df <- data_frame( #' x = x, #' y = c("a", "b", NA, "d", "e") #' ) #' #' # This returns `TRUE` where all elements of the row are non-missing. #' # Compare that with `!vec_detect_missing()`, which detects rows that have at #' # least one non-missing value. #' df2 <- df #' df2$all_non_missing <- vec_detect_complete(df) #' df2$any_non_missing <- !vec_detect_missing(df) #' df2 vec_detect_complete <- function(x) { .Call(vctrs_detect_complete, x) } vec_slice_complete <- function(x) { .Call(vctrs_slice_complete, x) } vec_locate_complete <- function(x) { .Call(vctrs_locate_complete, x) } vctrs/R/import-standalone-types-check.R0000644000176200001440000002531015072256373017627 0ustar liggesusers# Standalone file: do not edit by hand # Source: https://github.com/r-lib/rlang/blob/HEAD/R/standalone-types-check.R # Generated by: usethis::use_standalone("r-lib/rlang", "types-check") # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-types-check.R # last-updated: 2023-03-13 # license: https://unlicense.org # dependencies: standalone-obj-type.R # imports: rlang (>= 1.1.0) # --- # # ## Changelog # # 2025-09-19: # - `check_logical()` gains an `allow_na` argument (@jonthegeek, #1724) # # 2024-08-15: # - `check_character()` gains an `allow_na` argument (@martaalcalde, #1724) # # 2023-03-13: # - Improved error messages of number checkers (@teunbrand) # - Added `allow_infinite` argument to `check_number_whole()` (@mgirlich). # - Added `check_data_frame()` (@mgirlich). # # 2023-03-07: # - Added dependency on rlang (>= 1.1.0). # # 2023-02-15: # - Added `check_logical()`. # # - `check_bool()`, `check_number_whole()`, and # `check_number_decimal()` are now implemented in C. # # - For efficiency, `check_number_whole()` and # `check_number_decimal()` now take a `NULL` default for `min` and # `max`. This makes it possible to bypass unnecessary type-checking # and comparisons in the default case of no bounds checks. # # 2022-10-07: # - `check_number_whole()` and `_decimal()` no longer treat # non-numeric types such as factors or dates as numbers. Numeric # types are detected with `is.numeric()`. # # 2022-10-04: # - Added `check_name()` that forbids the empty string. # `check_string()` allows the empty string by default. # # 2022-09-28: # - Removed `what` arguments. # - Added `allow_na` and `allow_null` arguments. # - Added `allow_decimal` and `allow_infinite` arguments. # - Improved errors with absent arguments. # # # 2022-09-16: # - Unprefixed usage of rlang functions with `rlang::` to # avoid onLoad issues when called from rlang (#1482). # # 2022-08-11: # - Added changelog. # # nocov start # Scalars ----------------------------------------------------------------- .standalone_types_check_dot_call <- .Call check_bool <- function( x, ..., allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if ( !missing(x) && .standalone_types_check_dot_call( ffi_standalone_is_bool_1.0.7, x, allow_na, allow_null ) ) { return(invisible(NULL)) } stop_input_type( x, c("`TRUE`", "`FALSE`"), ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_string <- function( x, ..., allow_empty = TRUE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { is_string <- .rlang_check_is_string( x, allow_empty = allow_empty, allow_na = allow_na, allow_null = allow_null ) if (is_string) { return(invisible(NULL)) } } stop_input_type( x, "a single string", ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } .rlang_check_is_string <- function(x, allow_empty, allow_na, allow_null) { if (is_string(x)) { if (allow_empty || !is_string(x, "")) { return(TRUE) } } if (allow_null && is_null(x)) { return(TRUE) } if (allow_na && (identical(x, NA) || identical(x, na_chr))) { return(TRUE) } FALSE } check_name <- function( x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { is_string <- .rlang_check_is_string( x, allow_empty = FALSE, allow_na = FALSE, allow_null = allow_null ) if (is_string) { return(invisible(NULL)) } } stop_input_type( x, "a valid name", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } IS_NUMBER_true <- 0 IS_NUMBER_false <- 1 IS_NUMBER_oob <- 2 check_number_decimal <- function( x, ..., min = NULL, max = NULL, allow_infinite = TRUE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (missing(x)) { exit_code <- IS_NUMBER_false } else if ( 0 == (exit_code <- .standalone_types_check_dot_call( ffi_standalone_check_number_1.0.7, x, allow_decimal = TRUE, min, max, allow_infinite, allow_na, allow_null )) ) { return(invisible(NULL)) } .stop_not_number( x, ..., exit_code = exit_code, allow_decimal = TRUE, min = min, max = max, allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_number_whole <- function( x, ..., min = NULL, max = NULL, allow_infinite = FALSE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (missing(x)) { exit_code <- IS_NUMBER_false } else if ( 0 == (exit_code <- .standalone_types_check_dot_call( ffi_standalone_check_number_1.0.7, x, allow_decimal = FALSE, min, max, allow_infinite, allow_na, allow_null )) ) { return(invisible(NULL)) } .stop_not_number( x, ..., exit_code = exit_code, allow_decimal = FALSE, min = min, max = max, allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } .stop_not_number <- function( x, ..., exit_code, allow_decimal, min, max, allow_na, allow_null, arg, call ) { if (allow_decimal) { what <- "a number" } else { what <- "a whole number" } if (exit_code == IS_NUMBER_oob) { min <- min %||% -Inf max <- max %||% Inf if (min > -Inf && max < Inf) { what <- sprintf("%s between %s and %s", what, min, max) } else if (x < min) { what <- sprintf("%s larger than or equal to %s", what, min) } else if (x > max) { what <- sprintf("%s smaller than or equal to %s", what, max) } else { abort("Unexpected state in OOB check", .internal = TRUE) } } stop_input_type( x, what, ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_symbol <- function( x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { if (is_symbol(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a symbol", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_arg <- function( x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { if (is_symbol(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an argument name", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_call <- function( x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { if (is_call(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a defused call", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_environment <- function( x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { if (is_environment(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an environment", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_function <- function( x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { if (is_function(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a function", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_closure <- function( x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { if (is_closure(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an R function", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_formula <- function( x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { if (is_formula(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a formula", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } # Vectors ----------------------------------------------------------------- # TODO: Figure out what to do with logical `NA` and `allow_na = TRUE` check_character <- function( x, ..., allow_na = TRUE, allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { if (is_character(x)) { if (!allow_na && any(is.na(x))) { abort( sprintf("`%s` can't contain NA values.", arg), arg = arg, call = call ) } return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a character vector", ..., allow_null = allow_null, arg = arg, call = call ) } check_logical <- function( x, ..., allow_na = TRUE, allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { if (is_logical(x)) { if (!allow_na && any(is.na(x))) { abort( sprintf("`%s` can't contain NA values.", arg), arg = arg, call = call ) } return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a logical vector", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_data_frame <- function( x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { if (!missing(x)) { if (is.data.frame(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a data frame", ..., allow_null = allow_null, arg = arg, call = call ) } # nocov end vctrs/R/type-sf.R0000644000176200001440000000721215156000460013325 0ustar liggesusers# Imported at load-time in `sf_env` st_crs = function(...) stop_sf() st_precision = function(...) stop_sf() st_as_sf = function(...) stop_sf() stop_sf = function() abort("Internal error: Failed sf import.") sf_deps = c( "st_crs", "st_precision", "st_as_sf" ) sf_env = env() # sf namespace local(envir = sf_env, { # Registered at load-time (same for all other methods) vec_proxy_sf = function(x, ...) { x } vec_restore_sf = function(x, to, ...) { sfc_name = attr(to, "sf_column") crs = st_crs(to) prec = st_precision(to) st_as_sf( x, sf_column_name = sfc_name, crs = crs, precision = prec, stringsAsFactors = FALSE ) } sf_ptype2 = function(x, y, ...) { data = vctrs::df_ptype2(x, y, ...) # Workaround for `c()` fallback sentinels. Must be fixed before # moving the methods downstream. opts <- match_fallback_opts(...) if (identical(opts$s3_fallback, S3_FALLBACK_true)) { return(data) } x_sf <- inherits(x, "sf") y_sf <- inherits(y, "sf") if (x_sf && y_sf) { # Take active geometry from left-hand side sfc_name = attr(x, "sf_column") # CRS and precision must match crs = common_crs(x, y) prec = common_prec(x, y) } else if (x_sf) { sfc_name = attr(x, "sf_column") crs = st_crs(x) prec = st_precision(x) } else if (y_sf) { sfc_name = attr(y, "sf_column") crs = st_crs(y) prec = st_precision(y) } else { stop("Internal error: Expected at least one `sf` input.") } st_as_sf( data, sf_column_name = sfc_name, crs = crs, precision = prec, stringsAsFactors = FALSE ) } vec_ptype2_sf_sf = function(x, y, ...) { sf_ptype2(x, y, ...) } vec_ptype2_sf_data.frame = function(x, y, ...) { sf_ptype2(x, y, ...) } vec_ptype2_data.frame_sf = function(x, y, ...) { sf_ptype2(x, y, ...) } # Maybe we should not have these methods, but they are currently # required to avoid the base-df fallback vec_ptype2_sf_tbl_df = function(x, y, ...) { new_data_frame(sf_ptype2(x, y, ...)) } vec_ptype2_tbl_df_sf = function(x, y, ...) { new_data_frame(sf_ptype2(x, y, ...)) } sf_cast = function(x, to, ...) { data = vctrs::df_cast(x, to, ...) # Workaround for `c()` fallback sentinels. Must be fixed before # moving the methods downstream. opts <- match_fallback_opts(...) if (identical(opts$s3_fallback, S3_FALLBACK_true)) { return(data) } sfc_name = attr(to, "sf_column") crs = st_crs(to) prec = st_precision(to) st_as_sf( data, sf_column_name = sfc_name, crs = crs, precision = prec, stringsAsFactors = FALSE ) } vec_cast_sf_sf = function(x, to, ...) { sf_cast(x, to, ...) } vec_cast_sf_data.frame = function(x, to, ...) { sf_cast(x, to, ...) } vec_cast_data.frame_sf = function(x, to, ...) { df_cast(x, to, ...) } vec_proxy_order_sfc <- function(x, ...) { # These are list columns, so they need to use the order-by-appearance proxy # that is defined by `vec_proxy_order.list()` x <- unstructure(x) vec_proxy_order(x) } # take conservative approach of requiring equal CRS and precision common_crs = function(x, y) { lhs = st_crs(x) rhs = st_crs(y) if (lhs != rhs) { stop("coordinate reference systems not equal: use st_transform() first?") } lhs } common_prec = function(x, y) { lhs = st_precision(x) rhs = st_precision(y) if (lhs != rhs) { stop("precisions not equal") } lhs } }) # local(envir = sf_env) env_bind(ns_env("vctrs"), !!!as.list(sf_env)) vctrs/R/vctrs-deprecated.R0000644000176200001440000001155715065005761015214 0ustar liggesusers#' Is a vector empty #' #' @description #' #' `r lifecycle::badge("defunct")` #' #' This function is defunct, please use [vec_is_empty()]. #' #' @param x An object. #' #' @keywords internal #' @export vec_empty <- function(x) { # Defunct: 2019-06 lifecycle::deprecate_stop( when = "0.2.0", what = "vec_empty()", with = "vec_is_empty()" ) } #' Deprecated type functions #' #' @description #' #' `r lifecycle::badge("deprecated")` #' #' These functions have been renamed: #' #' * `vec_type()` => [vec_ptype()] #' * `vec_type2()` => [vec_ptype2()] #' * `vec_type_common()` => [vec_ptype_common()] #' #' @param x,y,...,.ptype Arguments for deprecated functions. #' #' @keywords internal #' @export vec_type <- function(x) { # Deprecated: 2019-06 lifecycle::deprecate_warn( when = "0.2.0", what = "vec_type()", with = "vec_ptype()", always = TRUE ) vec_ptype(x) } #' @rdname vec_type #' @export vec_type_common <- function(..., .ptype = NULL) { # Deprecated: 2019-06 lifecycle::deprecate_warn( when = "0.2.0", what = "vec_type_common()", with = "vec_ptype_common()", always = TRUE ) vec_ptype_common(..., .ptype = .ptype) } #' @rdname vec_type #' @export vec_type2 <- function(x, y, ...) { # Deprecated: 2019-06 lifecycle::deprecate_warn( when = "0.2.0", what = "vec_type2()", with = "vec_ptype2()", always = TRUE ) vec_ptype2(x, y, ...) } #' Convert to an index vector #' #' @description #' #' `r lifecycle::badge("deprecated")` #' #' `vec_as_index()` has been renamed to [vec_as_location()] and is #' deprecated as of vctrs 0.2.2. #' #' @inheritParams vec_as_location #' #' @keywords internal #' @export vec_as_index <- function(i, n, names = NULL) { # Soft-deprecated: 2020-01 lifecycle::deprecate_soft( when = "0.2.2", what = "vec_as_index()", with = "vec_as_location()" ) n <- vec_cast(n, integer()) vec_check_size(n, size = 1L) i <- vec_as_subscript(i) # Picked up from the environment at the C level arg <- NULL .Call( ffi_as_location, i = i, n = n, names = names, loc_negative = "invert", loc_oob = "error", loc_zero = "remove", missing = "propagate", env = environment() ) } #' Expand the length of a vector #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `vec_repeat()` has been replaced with [vec_rep()] and [vec_rep_each()] and is #' deprecated as of vctrs 0.3.0. #' #' @param x A vector. #' @param each Number of times to repeat each element of `x`. #' @param times Number of times to repeat the whole vector of `x`. #' @return A vector the same type as `x` with size `vec_size(x) * times * each`. #' @keywords internal #' @export vec_repeat <- function(x, each = 1L, times = 1L) { # Soft-deprecated: 2020-03 lifecycle::deprecate_soft( when = "0.3.0", what = "vec_repeat()", with = I("either `vec_rep()` or `vec_rep_each()`") ) vec_check_size(each, size = 1L) vec_check_size(times, size = 1L) idx <- rep(vec_seq_along(x), times = times, each = each) vec_slice(x, idx) } #' Chopping #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `vec_unchop()` has been renamed to [list_unchop()] and is deprecated as of #' vctrs 0.5.0. #' #' @inheritParams list_unchop #' @inherit list_unchop return #' #' @keywords internal #' @export vec_unchop <- function( x, indices = NULL, ptype = NULL, name_spec = NULL, name_repair = c("minimal", "unique", "check_unique", "universal") ) { # Soft-deprecated: 2022-09 lifecycle::deprecate_soft("0.5.0", "vec_unchop()", "list_unchop()") list_unchop( x = x, indices = indices, ptype = ptype, name_spec = name_spec, name_repair = name_repair ) } #' Missing values #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `vec_equal_na()` has been renamed to [vec_detect_missing()] and is deprecated #' as of vctrs 0.5.0. #' #' @inheritParams vec_detect_missing #' #' @return #' A logical vector the same size as `x`. #' #' @keywords internal #' @export vec_equal_na <- function(x) { # Soft-deprecated: 2022-09 lifecycle::deprecate_soft("0.5.0", "vec_equal_na()", "vec_detect_missing()") vec_detect_missing(x) } #' List checks #' #' @description #' `r lifecycle::badge("deprecated")` #' #' These functions have been deprecated as of vctrs 0.6.0. #' #' - `vec_is_list()` has been renamed to [obj_is_list()]. #' - `vec_check_list()` has been renamed to [obj_check_list()]. #' #' @inheritParams obj_is_list #' #' @keywords internal #' @export vec_is_list <- function(x) { # Silently-deprecated: 2023-03 # lifecycle::deprecate_soft("0.6.0", "vec_is_list()", "obj_is_list()") obj_is_list(x) } #' @rdname vec_is_list #' @export vec_check_list <- function(x, ..., arg = caller_arg(x), call = caller_env()) { # Silently-deprecated: 2023-03 # lifecycle::deprecate_soft("0.6.0", "vec_check_list()", "obj_check_list()") obj_check_list(x, ..., arg = arg, call = call) } vctrs/R/recycle.R0000644000176200001440000000316715113325071013372 0ustar liggesusers#' Vector recycling #' #' `vec_recycle(x, size)` recycles a single vector to a given size. #' `vec_recycle_common(...)` recycles multiple vectors to their common size. All #' functions obey the [vctrs recycling rules][theory-faq-recycling], and will #' throw an error if recycling is not possible. See [vec_size()] for the precise #' definition of size. #' #' @inheritParams rlang::args_error_context #' #' @param x A vector to recycle. #' @param ... Depending on the function used: #' * For `vec_recycle_common()`, vectors to recycle. #' * For `vec_recycle()`, these dots should be empty. #' @param size Desired output size. #' @param .size Desired output size. If omitted, #' will use the common size from [vec_size_common()]. #' @param x_arg Argument name for `x`. These are used in error #' messages to inform the user about which argument has an #' incompatible size. #' #' @section Dependencies: #' - [vec_slice()] #' #' @export #' @examples #' # Inputs with 1 observation are recycled #' vec_recycle_common(1:5, 5) #' vec_recycle_common(integer(), 5) #' \dontrun{ #' vec_recycle_common(1:5, 1:2) #' } #' #' # Data frames and matrices are recycled along their rows #' vec_recycle_common(data.frame(x = 1), 1:5) #' vec_recycle_common(array(1:2, c(1, 2)), 1:5) #' vec_recycle_common(array(1:3, c(1, 3, 1)), 1:5) vec_recycle <- function(x, size, ..., x_arg = "", call = caller_env()) { check_dots_empty0(...) .Call(ffi_recycle, x, size, environment()) } #' @export #' @rdname vec_recycle vec_recycle_common <- function( ..., .size = NULL, .arg = "", .call = caller_env() ) { .External2(ffi_recycle_common, list2(...), .size) } vctrs/R/fill.R0000644000176200001440000000243515065005761012676 0ustar liggesusers#' Fill in missing values with the previous or following value #' #' @description #' `vec_fill_missing()` fills gaps of missing values with the previous or #' following non-missing value. #' #' @param x A vector #' @param direction Direction in which to fill missing values. Must be either #' `"down"`, `"up"`, `"downup"`, or `"updown"`. #' @param max_fill A single positive integer specifying the maximum number of #' sequential missing values that will be filled. If `NULL`, there is #' no limit. #' #' @export #' @examples #' x <- c(NA, NA, 1, NA, NA, NA, 3, NA, NA) #' #' # Filling down replaces missing values with the previous non-missing value #' vec_fill_missing(x, direction = "down") #' #' # To also fill leading missing values, use `"downup"` #' vec_fill_missing(x, direction = "downup") #' #' # Limit the number of sequential missing values to fill with `max_fill` #' vec_fill_missing(x, max_fill = 1) #' #' # Data frames are filled rowwise. Rows are only considered missing #' # if all elements of that row are missing. #' y <- c(1, NA, 2, NA, NA, 3, 4, NA, 5) #' df <- data_frame(x = x, y = y) #' df #' #' vec_fill_missing(df) vec_fill_missing <- function( x, direction = c("down", "up", "downup", "updown"), max_fill = NULL ) { .Call(vctrs_fill_missing, x, direction, max_fill) } vctrs/R/order.R0000644000176200001440000002775015120272011013054 0ustar liggesusers# TODO: Use this NEWS bullet when we move to the new `vec_order()` algorithm # # * `vec_order()` and `vec_sort()` now use a custom radix sort algorithm, rather # than relying on `order()`. The implementation is based on data.table’s # `forder()` and their earlier contribution to R’s `order()`. There are four # major changes, outlined below, the first two of which are breaking changes. # If you need to retain the old ordering behavior, use `vec_order_base()`. # # * Character vectors now order in the C locale by default, which is _much_ # faster than ordering in the system's locale. To order in a specific locale, # you can provide a character proxy function through `chr_proxy_collate`, # such as `stringi::stri_sort_key()`. # # * Optional arguments, such as `direction` and `na_value`, must now be # specified by name. Specifying by position will result in an error. # # * When ordering data frames, you can now control the behavior of `direction` # and `na_value` on a per column basis. # # * There is a new `nan_distinct` argument for differentiating between `NaN` # and `NA` in double and complex vectors. #' Order and sort vectors #' #' @description #' `vec_order_radix()` computes the order of `x`. For data frames, the order is #' computed along the rows by computing the order of the first column and #' using subsequent columns to break ties. #' #' `vec_sort_radix()` sorts `x`. It is equivalent to `vec_slice(x, vec_order_radix(x))`. #' #' @inheritParams rlang::args_dots_empty #' #' @param x A vector #' @param direction Direction to sort in. #' - A single `"asc"` or `"desc"` for ascending or descending order #' respectively. #' - For data frames, a length `1` or `ncol(x)` character vector containing #' only `"asc"` or `"desc"`, specifying the direction for each column. #' @param na_value Ordering of missing values. #' - A single `"largest"` or `"smallest"` for ordering missing values as the #' largest or smallest values respectively. #' - For data frames, a length `1` or `ncol(x)` character vector containing #' only `"largest"` or `"smallest"`, specifying how missing values should #' be ordered within each column. #' @param nan_distinct A single logical specifying whether or not `NaN` should #' be considered distinct from `NA` for double and complex vectors. If `TRUE`, #' `NaN` will always be ordered between `NA` and non-missing numbers. #' @param chr_proxy_collate A function generating an alternate representation #' of character vectors to use for collation, often used for locale-aware #' ordering. #' - If `NULL`, no transformation is done. #' - Otherwise, this must be a function of one argument. If the input contains #' a character vector, it will be passed to this function after it has been #' translated to UTF-8. This function should return a character vector with #' the same length as the input. The result should sort as expected in the #' C-locale, regardless of encoding. #' #' For data frames, `chr_proxy_collate` will be applied to all character #' columns. #' #' Common transformation functions include: `tolower()` for case-insensitive #' ordering and `stringi::stri_sort_key()` for locale-aware ordering. #' #' @return #' * `vec_order_radix()` an integer vector the same size as `x`. #' * `vec_sort_radix()` a vector with the same size and type as `x`. #' #' @section Differences with `order()`: #' #' Unlike the `na.last` argument of `order()` which decides the positions of #' missing values irrespective of the `decreasing` argument, the `na_value` #' argument of `vec_order_radix()` interacts with `direction`. If missing values #' are considered the largest value, they will appear last in ascending order, #' and first in descending order. #' #' Character vectors are ordered in the C-locale. This is different from #' `base::order()`, which respects `base::Sys.setlocale()`. Sorting in a #' consistent locale can produce more reproducible results between different #' sessions and platforms, however, the results of sorting in the C-locale #' can be surprising. For example, capital letters sort before lower case #' letters. Sorting `c("b", "C", "a")` with `vec_sort_radix()` will return #' `c("C", "a", "b")`, but with `base::order()` will return `c("a", "b", "C")` #' unless `base::order(method = "radix")` is explicitly set, which also uses #' the C-locale. While sorting with the C-locale can be useful for #' algorithmic efficiency, in many real world uses it can be the cause of #' data analysis mistakes. To balance these trade-offs, you can supply a #' `chr_proxy_collate` function to transform character vectors into an #' alternative representation that orders in the C-locale in a less surprising #' way. For example, providing [base::tolower()] as a transform will order the #' original vector in a case-insensitive manner. Locale-aware ordering can be #' achieved by providing `stringi::stri_sort_key()` as a transform, setting the #' collation options as appropriate for your locale. #' #' Character vectors are always translated to UTF-8 before ordering, and before #' any transform is applied by `chr_proxy_collate`. #' #' For complex vectors, if either the real or imaginary component is `NA` or #' `NaN`, then the entire observation is considered missing. #' #' @section Dependencies of `vec_order_radix()`: #' * [vec_proxy_order()] #' #' @section Dependencies of `vec_sort_radix()`: #' * [vec_order_radix()] #' * [vec_slice()] #' #' @name order-radix #' @keywords internal #' #' @examples #' if (FALSE) { #' #' x <- round(sample(runif(5), 9, replace = TRUE), 3) #' x <- c(x, NA) #' #' vec_order_radix(x) #' vec_sort_radix(x) #' vec_sort_radix(x, direction = "desc") #' #' # Can also handle data frames #' df <- data.frame(g = sample(2, 10, replace = TRUE), x = x) #' vec_order_radix(df) #' vec_sort_radix(df) #' vec_sort_radix(df, direction = "desc") #' #' # For data frames, `direction` and `na_value` are allowed to be vectors #' # with length equal to the number of columns in the data frame #' vec_sort_radix( #' df, #' direction = c("desc", "asc"), #' na_value = c("largest", "smallest") #' ) #' #' # Character vectors are ordered in the C locale, which orders capital letters #' # below lowercase ones #' y <- c("B", "A", "a") #' vec_sort_radix(y) #' #' # To order in a case-insensitive manner, provide a `chr_proxy_collate` #' # function that transforms the strings to all lowercase #' vec_sort_radix(y, chr_proxy_collate = tolower) #' #' } NULL #' @rdname order-radix vec_order_radix <- function( x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) { check_dots_empty0(...) .Call(vctrs_order, x, direction, na_value, nan_distinct, chr_proxy_collate) } #' @rdname order-radix vec_sort_radix <- function( x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) { check_dots_empty0(...) idx <- vec_order_radix( x = x, direction = direction, na_value = na_value, nan_distinct = nan_distinct, chr_proxy_collate = chr_proxy_collate ) vec_slice(x, idx) } # ------------------------------------------------------------------------------ #' Locate sorted groups #' #' @description #' `r lifecycle::badge("experimental")` #' #' `vec_locate_sorted_groups()` returns a data frame containing a `key` column #' with sorted unique groups, and a `loc` column with the locations of each #' group in `x`. It is similar to [vec_group_loc()], except the groups are #' returned sorted rather than by first appearance. #' #' @details #' `vec_locate_sorted_groups(x)` is equivalent to, but faster than: #' #' ``` #' info <- vec_group_loc(x) #' vec_slice(info, vec_order(info$key)) #' ``` #' #' @inheritParams order-radix #' #' @return #' A two column data frame with size equal to `vec_size(vec_unique(x))`. #' * A `key` column of type `vec_ptype(x)`. #' * A `loc` column of type list, with elements of type integer. #' #' @section Dependencies of `vec_locate_sorted_groups()`: #' * [vec_proxy_order()] #' #' @export #' @keywords internal #' @examples #' df <- data.frame( #' g = sample(2, 10, replace = TRUE), #' x = c(NA, sample(5, 9, replace = TRUE)) #' ) #' #' # `vec_locate_sorted_groups()` is similar to `vec_group_loc()`, except keys #' # are returned ordered rather than by first appearance. #' vec_locate_sorted_groups(df) #' #' vec_group_loc(df) vec_locate_sorted_groups <- function( x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) { check_dots_empty0(...) .Call( vctrs_locate_sorted_groups, x, direction, na_value, nan_distinct, chr_proxy_collate ) } # ------------------------------------------------------------------------------ vec_order_info <- function( x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) { check_dots_empty0(...) .Call( vctrs_order_info, x, direction, na_value, nan_distinct, chr_proxy_collate ) } # ------------------------------------------------------------------------------ #' Order and sort vectors #' #' @inheritParams rlang::args_dots_empty #' #' @param x A vector #' @param direction Direction to sort in. Defaults to `asc`ending. #' @param na_value Should `NA`s be treated as the largest or smallest values? #' @return #' * `vec_order()` an integer vector the same size as `x`. #' * `vec_sort()` a vector with the same size and type as `x`. #' #' @section Differences with `order()`: #' Unlike the `na.last` argument of `order()` which decides the #' positions of missing values irrespective of the `decreasing` #' argument, the `na_value` argument of `vec_order()` interacts with #' `direction`. If missing values are considered the largest value, #' they will appear last in ascending order, and first in descending #' order. #' #' @section Dependencies of `vec_order()`: #' * [vec_proxy_order()] #' #' @section Dependencies of `vec_sort()`: #' * [vec_proxy_order()] #' * [vec_order()] #' * [vec_slice()] #' @export #' @examples #' x <- round(c(runif(9), NA), 3) #' vec_order(x) #' vec_sort(x) #' vec_sort(x, direction = "desc") #' #' # Can also handle data frames #' df <- data.frame(g = sample(2, 10, replace = TRUE), x = x) #' vec_order(df) #' vec_sort(df) #' vec_sort(df, direction = "desc") #' #' # Missing values interpreted as largest values are last when #' # in increasing order: #' vec_order(c(1, NA), na_value = "largest", direction = "asc") #' vec_order(c(1, NA), na_value = "largest", direction = "desc") vec_order <- function( x, ..., direction = c("asc", "desc"), na_value = c("largest", "smallest") ) { check_dots_empty0(...) direction <- arg_match0(direction, c("asc", "desc")) na_value <- arg_match0(na_value, c("largest", "smallest")) decreasing <- !identical(direction, "asc") na.last <- identical(na_value, "largest") if (decreasing) { na.last <- !na.last } proxy <- vec_proxy_order(x) if (is.data.frame(proxy)) { if (length(proxy) == 0L) { # Work around type-instability in `base::order()` return(vec_seq_along(proxy)) } args <- map(unstructure(proxy), function(.x) { if (is.data.frame(.x)) { .x <- order(vec_order(.x, direction = direction, na_value = na_value)) } .x }) exec("order", !!!args, decreasing = decreasing, na.last = na.last) } else if ( is_character(proxy) || is_logical(proxy) || is_integer(proxy) || is_double(proxy) || is.complex(proxy) ) { if (is.object(proxy)) { proxy <- unstructure(proxy) } order(proxy, decreasing = decreasing, na.last = na.last) } else { abort("Invalid type returned by `vec_proxy_order()`.") } } #' @export #' @rdname vec_order vec_sort <- function( x, ..., direction = c("asc", "desc"), na_value = c("largest", "smallest") ) { check_dots_empty0(...) direction <- arg_match0(direction, c("asc", "desc")) na_value <- arg_match0(na_value, c("largest", "smallest")) idx <- vec_order(x, direction = direction, na_value = na_value) vec_slice(x, idx) } vctrs/R/type-bare.R0000644000176200001440000002535015120515501013627 0ustar liggesusers# Type2 ------------------------------------------------------------------- # Left generics ----------------------------------------------------------- #' @rdname vec_ptype2 #' @export vec_ptype2.logical #' @method vec_ptype2 logical #' @export vec_ptype2.logical <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.logical") } #' @rdname vec_ptype2 #' @export vec_ptype2.integer #' @method vec_ptype2 integer #' @export vec_ptype2.integer <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.integer") } #' @rdname vec_ptype2 #' @export vec_ptype2.double #' @method vec_ptype2 double #' @export vec_ptype2.double <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.double") } #' @rdname vec_ptype2 #' @export vec_ptype2.complex #' @method vec_ptype2 complex #' @export vec_ptype2.complex <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.complex") } #' @rdname vec_ptype2 #' @export vec_ptype2.character #' @method vec_ptype2 character #' @export vec_ptype2.character <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.character") } #' @rdname vec_ptype2 #' @export vec_ptype2.raw #' @method vec_ptype2 raw #' @export vec_ptype2.raw <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.raw") } #' @rdname vec_ptype2 #' @export vec_ptype2.list #' @method vec_ptype2 list #' @export vec_ptype2.list <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.list") } # Numeric-ish #' @method vec_ptype2.logical logical #' @export vec_ptype2.logical.logical <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.logical.logical") } #' @export #' @method vec_ptype2.integer integer vec_ptype2.integer.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.integer") } #' @export #' @method vec_ptype2.logical integer vec_ptype2.logical.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.logical.integer") } #' @export #' @method vec_ptype2.integer logical vec_ptype2.integer.logical <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.logical") } #' @export #' @method vec_ptype2.double double vec_ptype2.double.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.double") } #' @export #' @method vec_ptype2.logical double vec_ptype2.logical.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.logical.double") } #' @export #' @method vec_ptype2.double logical vec_ptype2.double.logical <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.logical") } #' @export #' @method vec_ptype2.integer double vec_ptype2.integer.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.double") } #' @export #' @method vec_ptype2.double integer vec_ptype2.double.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.integer") } #' @export #' @method vec_ptype2.complex complex vec_ptype2.complex.complex <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.complex.complex") } #' @export #' @method vec_ptype2.integer complex vec_ptype2.integer.complex <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.integer.complex") } #' @export #' @method vec_ptype2.complex integer vec_ptype2.complex.integer <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.complex.integer") } #' @export #' @method vec_ptype2.double complex vec_ptype2.double.complex <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.double.complex") } #' @export #' @method vec_ptype2.complex double vec_ptype2.complex.double <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.complex.double") } # Character #' @method vec_ptype2.character character #' @export vec_ptype2.character.character <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.character.character") } # Raw #' @export #' @method vec_ptype2.raw raw vec_ptype2.raw.raw <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.raw.raw") } # Lists #' @method vec_ptype2.list list #' @export vec_ptype2.list.list <- function(x, y, ..., x_arg = "", y_arg = "") { stop_native_implementation("vec_ptype2.list.list") } # Cast -------------------------------------------------------------------- # These methods for base types are handled at the C level unless # inputs have shape or have lossy casts #' @export #' @rdname vec_cast #' @export vec_cast.logical #' @method vec_cast logical vec_cast.logical <- function(x, to, ...) { UseMethod("vec_cast.logical") } #' @export #' @method vec_cast.logical logical vec_cast.logical.logical <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.logical integer vec_cast.logical.integer <- function( x, to, ..., x_arg = "", to_arg = "", call = caller_env() ) { out <- vec_coerce_bare(x, "logical") out <- shape_broadcast( out, to, x_arg = x_arg, to_arg = to_arg, call = call ) lossy <- !x %in% c(0L, 1L, NA_integer_) maybe_lossy_cast( out, x, to, lossy, x_arg = x_arg, to_arg = to_arg, call = call ) } #' @export #' @method vec_cast.logical double vec_cast.logical.double <- function( x, to, ..., x_arg = "", to_arg = "", call = caller_env() ) { out <- vec_coerce_bare(x, "logical") out <- shape_broadcast( out, to, x_arg = x_arg, to_arg = to_arg, call = call ) lossy <- !x %in% c(0, 1, NA_real_) maybe_lossy_cast( out, x, to, lossy, x_arg = x_arg, to_arg = to_arg, call = call ) } #' @export #' @rdname vec_cast #' @export vec_cast.integer #' @method vec_cast integer vec_cast.integer <- function(x, to, ...) { UseMethod("vec_cast.integer") } #' @export #' @method vec_cast.integer logical vec_cast.integer.logical <- function(x, to, ...) { x <- vec_coerce_bare(x, "integer") shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.integer integer vec_cast.integer.integer <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.integer double vec_cast.integer.double <- function( x, to, ..., x_arg = "", to_arg = "", call = caller_env() ) { out <- suppressWarnings(vec_coerce_bare(x, "integer")) x_na <- is.na(x) lossy <- (out != x & !x_na) | xor(x_na, is.na(out)) out <- shape_broadcast( out, to, x_arg = x_arg, to_arg = to_arg, call = call ) maybe_lossy_cast( out, x, to, lossy, x_arg = x_arg, to_arg = to_arg, call = call ) } #' @export #' @rdname vec_cast #' @export vec_cast.double #' @method vec_cast double vec_cast.double <- function(x, to, ...) { UseMethod("vec_cast.double") } #' @export #' @method vec_cast.double logical vec_cast.double.logical <- function(x, to, ...) { x <- vec_coerce_bare(x, "double") shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.double integer vec_cast.double.integer <- vec_cast.double.logical #' @export #' @method vec_cast.double double vec_cast.double.double <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @rdname vec_cast #' @export vec_cast.complex #' @method vec_cast complex vec_cast.complex <- function(x, to, ...) { UseMethod("vec_cast.complex") } #' @export #' @method vec_cast.complex logical vec_cast.complex.logical <- function(x, to, ...) { x <- vec_coerce_bare(x, "complex") shape_broadcast(x, to, ...) } #' @export #' @method vec_cast.complex integer vec_cast.complex.integer <- vec_cast.complex.logical #' @export #' @method vec_cast.complex double vec_cast.complex.double <- vec_cast.complex.logical #' @export #' @method vec_cast.complex complex vec_cast.complex.complex <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @rdname vec_cast #' @export vec_cast.raw #' @method vec_cast raw vec_cast.raw <- function(x, to, ...) { UseMethod("vec_cast.raw") } #' @export #' @method vec_cast.raw raw vec_cast.raw.raw <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @export #' @rdname vec_cast #' @export vec_cast.character #' @method vec_cast character vec_cast.character <- function(x, to, ...) { UseMethod("vec_cast.character") } #' @export #' @method vec_cast.character character vec_cast.character.character <- function(x, to, ...) { shape_broadcast(x, to, ...) } #' @rdname vec_cast #' @export vec_cast.list #' @method vec_cast list #' @export vec_cast.list <- function(x, to, ...) { UseMethod("vec_cast.list") } #' @export #' @method vec_cast.list list vec_cast.list.list <- function(x, to, ...) { shape_broadcast(x, to, ...) } # equal -------------------------------------------------------------- #' @export vec_proxy_equal.array <- function(x, ...) { # The conversion to data frame is only a stopgap, in the long # term, we'll hash arrays natively. Note that hashing functions # similarly convert to data frames. x <- as.data.frame(x) vec_proxy_equal(x) } # compare ------------------------------------------------------------ #' @export vec_proxy_compare.raw <- function(x, ...) { # because: # order(as.raw(1:3)) # #> Error in order(as.raw(1:3)): unimplemented type 'raw' in 'orderVector1' as.integer(x) } #' @export vec_proxy_compare.list <- function(x, ...) { stop_unsupported(x, "vec_proxy_compare") } #' @export vec_proxy_compare.array <- function(x, ...) { # The conversion to data frame is only a stopgap, in the long # term, we'll hash arrays natively. Note that hashing functions # similarly convert to data frames. x <- as.data.frame(x) vec_proxy_compare(x) } # order ------------------------------------------------------------ #' @export vec_proxy_order.raw <- function(x, ...) { # Can't rely on fallthrough behavior to `vec_proxy_compare()` because this # isn't an S3 object. Have to call it manually. vec_proxy_compare(x) } #' @export vec_proxy_order.list <- function(x, ...) { # Order lists by first appearance. # This allows list elements to be grouped in `vec_order()`. # Have to separately ensure missing values are propagated. out <- vec_duplicate_id(x) if (vec_any_missing(x)) { missing <- vec_detect_missing(x) out <- vec_assign(out, missing, NA_integer_) } out } #' @export vec_proxy_order.array <- function(x, ...) { # The conversion to data frame is only a stopgap, in the long # term, we'll hash arrays natively. Note that hashing functions # similarly convert to data frames. x <- as.data.frame(x) vec_proxy_order(x) } vctrs/R/type2.R0000644000176200001440000002212615132161317013005 0ustar liggesusers#' Find the common type for a pair of vectors #' #' @description #' #' `vec_ptype2()` defines the coercion hierarchy for a set of related #' vector types. Along with [vec_cast()], this generic forms the #' foundation of type coercions in vctrs. #' #' `vec_ptype2()` is relevant when you are implementing vctrs methods #' for your class, but it should not usually be called directly. If #' you need to find the common type of a set of inputs, call #' [vec_ptype_common()] instead. This function supports multiple #' inputs and [finalises][vec_ptype_finalise] the common type. #' #' @includeRmd man/faq/developer/links-coercion.Rmd #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' @param x,y Vector types. #' @param x_arg,y_arg Argument names for `x` and `y`. These are used #' in error messages to inform the user about the locations of #' incompatible types (see [stop_incompatible_type()]). #' #' @seealso [stop_incompatible_type()] when you determine from the #' attributes that an input can't be cast to the target type. #' #' @section Dependencies: #' - [vec_ptype()] is applied to `x` and `y` #' #' @export vec_ptype2 <- function( x, y, ..., x_arg = caller_arg(x), y_arg = caller_arg(y), call = caller_env() ) { if (!missing(...)) { check_ptype2_dots_empty(...) return(vec_ptype2_opts( x, y, opts = match_fallback_opts(...), x_arg = x_arg, y_arg = y_arg, call = call )) } return(.Call(ffi_ptype2, x, y, environment())) UseMethod("vec_ptype2") } vec_ptype2_dispatch_s3 <- function( x, y, ..., x_arg = "", y_arg = "", call = caller_env() ) { UseMethod("vec_ptype2") } vec_ptype2_dispatch_native <- function( x, y, ..., x_arg = "", y_arg = "", call = caller_env() ) { fallback_opts <- match_fallback_opts(...) .Call( ffi_ptype2_dispatch_native, x, y, fallback_opts, frame = environment() ) } #' Default cast and ptype2 methods #' #' @description #' #' These functions are automatically called when no [vec_ptype2()] or #' [vec_cast()] method is implemented for a pair of types. #' #' * They apply special handling if one of the inputs is of type #' `AsIs` or `sfc`. #' #' * They attempt a number of fallbacks in cases where it would be too #' inconvenient to be strict: #' #' - If the class and attributes are the same they are considered #' compatible. `vec_default_cast()` returns `x` in this case. #' #' - In case of incompatible data frame classes, they fall back to #' `data.frame`. If an incompatible subclass of tibble is #' involved, they fall back to `tbl_df`. #' #' * Otherwise, an error is thrown with [stop_incompatible_type()] or #' [stop_incompatible_cast()]. #' #' @keywords internal #' @export vec_default_ptype2 <- function( x, y, ..., x_arg = "", y_arg = "", call = caller_env() ) { if (is_asis(x)) { return(vec_ptype2_asis_left( x, y, x_arg = x_arg, y_arg = y_arg, call = call )) } if (is_asis(y)) { return(vec_ptype2_asis_right( x, y, x_arg = x_arg, y_arg = y_arg, call = call )) } opts <- match_fallback_opts(...) if (opts$s3_fallback && can_fall_back_2(x, y)) { common <- common_class_suffix(x, y) if (length(common)) { return(new_common_class_fallback(x, common)) } } if (is.data.frame(x) && is.data.frame(y)) { out <- vec_ptype2_df_fallback( x, y, opts, x_arg = x_arg, y_arg = y_arg, call = call ) if (identical(non_df_attrib(x), non_df_attrib(y))) { attributes(out) <- c(df_attrib(out), non_df_attrib(x)) } return(out) } if (is_same_type(x, y)) { return(vec_ptype(x, x_arg = x_arg)) } if (opts$s3_fallback) { # Undo common class fallback class for error messages if (is_common_class_fallback(x)) { x <- fallback_class_remove(x) } if (is_common_class_fallback(y)) { y <- fallback_class_remove(y) } } # The from-dispatch parameter is set only when called from our S3 # dispatch mechanism, when no method is found to dispatch to. It # indicates whether the error message should provide advice about # diverging attributes. withRestarts( stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, `vctrs:::from_dispatch` = match_from_dispatch(...), call = call ), vctrs_restart_ptype2 = function(ptype) { ptype } ) } # This wrapper for `stop_incompatible_type()` matches error context # arguments. It is useful to pass ptype2 arguments through dots # without risking unknown arguments getting stored as condition fields. vec_incompatible_ptype2 <- function( x, y, ..., x_arg = "", y_arg = "", call = caller_env() ) { stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, call = call ) } # We can't check for a proxy or ptype2 method to determine whether a # class is foreign, because we implement these generics for many base # classes and we still need to allow base fallbacks with subclasses. can_fall_back_2 <- function(x, y) { if (!identical(typeof(x), typeof(y))) { return(FALSE) } if (!can_fall_back(x) || !can_fall_back(y)) { return(FALSE) } TRUE } can_fall_back <- function(x) { UseMethod("can_fall_back") } #' @export can_fall_back.vctrs_vctr <- function(x) { # Work around bad interaction when `c()` method calls back into `vec_c()` FALSE } #' @export can_fall_back.ts <- function(x) { # Work around bug with hard-coded `tsp` attribute in Rf_setAttrib() FALSE } #' @export can_fall_back.data.frame <- function(x) { # The `c()` fallback is only for 1D vectors FALSE } #' @export `can_fall_back.vctrs:::common_class_fallback` <- function(x) { TRUE } #' @export can_fall_back.default <- function(x) { # Don't fall back for classes that directly implement a proxy. # # NOTE: That's suboptimal. For instance this forces us to override # `can_fall_back()` for `vctrs_vctr` to avoid recursing into # `vec_c()` through `c()`. Maybe we want to avoid falling back for # any vector that inherits a `vec_proxy()` method implemented # _outside_ of vctrs, i.e. not for a base class? is_null(s3_get_method(class(x)[[1]], "vec_proxy", ns = "vctrs")) } new_common_class_fallback <- function(x, fallback_class) { structure( vec_ptype(x), class = "vctrs:::common_class_fallback", fallback_class = fallback_class ) } #' @export `vec_proxy.vctrs:::common_class_fallback` <- function(x, ...) { x } is_common_class_fallback <- function(x) { inherits(x, "vctrs:::common_class_fallback") } common_class_suffix <- function(x, y) { vec_common_suffix(fallback_class(x), fallback_class(y)) } fallback_class <- function(x) { if (is_common_class_fallback(x)) { attr(x, "fallback_class") } else { class(x) } } fallback_class_remove <- function(x) { class(x) <- attr(x, "fallback_class", exact = TRUE) attr(x, "fallback_class") <- NULL x } check_ptype2_dots_empty <- function( ..., `vctrs:::from_dispatch`, `vctrs:::s3_fallback` ) { check_dots_empty0(...) } match_fallback_opts <- function(..., `vctrs:::s3_fallback` = NULL) { fallback_opts( s3_fallback = `vctrs:::s3_fallback` ) } match_from_dispatch <- function(..., `vctrs:::from_dispatch` = FALSE) { `vctrs:::from_dispatch` } fallback_opts <- function(s3_fallback = NULL) { # Order is important for the C side list( s3_fallback = s3_fallback %||% S3_FALLBACK_false ) } enabled_fallback_opts <- function() { fallback_opts( s3_fallback = S3_FALLBACK_true ) } vec_ptype2_opts <- function( x, y, ..., opts, x_arg = "", y_arg = "", call = caller_env() ) { .Call(ffi_ptype2_opts, x, y, opts, environment()) } vec_ptype2_params <- function( x, y, ..., s3_fallback = NULL, x_arg = "", y_arg = "", call = caller_env() ) { opts <- fallback_opts( s3_fallback = s3_fallback ) vec_ptype2_opts( x, y, opts = opts, x_arg = x_arg, y_arg = y_arg, call = call ) } vec_ptype2_no_fallback <- function( x, y, ..., x_arg = "", y_arg = "", call = caller_env() ) { opts <- fallback_opts( s3_fallback = S3_FALLBACK_false ) vec_ptype2_opts( x, y, ..., , opts = opts, x_arg = x_arg, y_arg = y_arg, call = call ) } S3_FALLBACK_false <- 0L S3_FALLBACK_true <- 1L vec_typeof2 <- function(x, y) { .Call(ffi_typeof2, x, y) } vec_typeof2_s3 <- function(x, y) { .Call(ffi_typeof2_s3, x, y) } # https://github.com/r-lib/vctrs/issues/571 vec_is_coercible <- function( x, y, ..., opts = fallback_opts(), x_arg = "", y_arg = "", call = caller_env() ) { check_dots_empty0(...) .Call( ffi_is_coercible, x, y, opts, environment() ) } vec_is_subtype <- function(x, super, ..., x_arg = "", super_arg = "") { tryCatch( vctrs_error_incompatible_type = function(...) FALSE, { common <- vctrs::vec_ptype2( x, super, ..., x_arg = x_arg, y_arg = super_arg ) vec_is(common, super) } ) } vec_implements_ptype2 <- function(x) { .Call(vctrs_implements_ptype2, x) } vctrs/R/dictionary.R0000644000176200001440000002077015072256373014124 0ustar liggesusers#' Count unique values in a vector #' #' Count the number of unique values in a vector. `vec_count()` has two #' important differences to `table()`: it returns a data frame, and when #' given multiple inputs (as a data frame), it only counts combinations that #' appear in the input. #' #' @param x A vector (including a data frame). #' @param sort One of "count", "key", "location", or "none". #' * "count", the default, puts most frequent values at top #' * "key", orders by the output key column (i.e. unique values of `x`) #' * "location", orders by location where key first seen. This is useful #' if you want to match the counts up to other unique/duplicated functions. #' * "none", leaves unordered. This is not guaranteed to produce the same #' ordering across R sessions, but is the fastest method. #' @return A data frame with columns `key` (same type as `x`) and #' `count` (an integer vector). #' #' @section Dependencies: #' - [vec_proxy_equal()] #' - [vec_slice()] #' - [vec_order()] #' @export #' @examples #' vec_count(mtcars$vs) #' vec_count(iris$Species) #' #' # If you count a data frame you'll get a data frame #' # column in the output #' str(vec_count(mtcars[c("vs", "am")])) #' #' # Sorting --------------------------------------- #' #' x <- letters[rpois(100, 6)] #' # default is to sort by frequency #' vec_count(x) #' #' # by can sort by key #' vec_count(x, sort = "key") #' #' # or location of first value #' vec_count(x, sort = "location") #' head(x) #' #' # or not at all #' vec_count(x, sort = "none") vec_count <- function(x, sort = c("count", "key", "location", "none")) { sort <- arg_match0(sort, c("count", "key", "location", "none")) # Returns info pair giving index of first occurrence value and count info <- vec_count_impl(x) # Sorting based on rearranging `info` if (sort == "location") { loc <- vec_order(info$loc) info <- vec_slice(info, loc) } else if (sort == "count") { # Order by descending count, but ascending original location. # This retains stable ordering in case of ties in the `count`. # Need `vec_order_radix()` to handle different `direction`s. loc <- vec_order_radix( info[c("count", "loc")], direction = c("desc", "asc") ) info <- vec_slice(info, loc) } out <- data_frame( key = vec_slice(x, info$loc), count = info$count ) # Sorting based on rearranging `out` if (sort == "key") { loc <- vec_order(out$key) out <- vec_slice(out, loc) } out } vec_count_impl <- function(x) { .Call(vctrs_count, x) } # Duplicates -------------------------------------------------------------- #' Find duplicated values #' #' * `vec_duplicate_any()`: detects the presence of duplicated values, #' similar to [anyDuplicated()]. #' * `vec_duplicate_detect()`: returns a logical vector describing if each #' element of the vector is duplicated elsewhere. Unlike [duplicated()], it #' reports all duplicated values, not just the second and subsequent #' repetitions. #' * `vec_duplicate_id()`: returns an integer vector giving the location of #' the first occurrence of the value. #' #' @section Missing values: #' In most cases, missing values are not considered to be equal, i.e. #' `NA == NA` is not `TRUE`. This behaviour would be unappealing here, #' so these functions consider all `NAs` to be equal. (Similarly, #' all `NaN` are also considered to be equal.) #' #' @param x A vector (including a data frame). #' @return #' * `vec_duplicate_any()`: a logical vector of length 1. #' * `vec_duplicate_detect()`: a logical vector the same length as `x`. #' * `vec_duplicate_id()`: an integer vector the same length as `x`. #' #' @section Dependencies: #' - [vec_proxy_equal()] #' #' @seealso [vec_unique()] for functions that work with the dual of duplicated #' values: unique values. #' @name vec_duplicate #' @examples #' vec_duplicate_any(1:10) #' vec_duplicate_any(c(1, 1:10)) #' #' x <- c(10, 10, 20, 30, 30, 40) #' vec_duplicate_detect(x) #' # Note that `duplicated()` doesn't consider the first instance to #' # be a duplicate #' duplicated(x) #' #' # Identify elements of a vector by the location of the first element that #' # they're equal to: #' vec_duplicate_id(x) #' # Location of the unique values: #' vec_unique_loc(x) #' # Equivalent to `duplicated()`: #' vec_duplicate_id(x) == seq_along(x) NULL #' @rdname vec_duplicate #' @export vec_duplicate_any <- function(x) { .Call(vctrs_duplicated_any, x) } #' @rdname vec_duplicate #' @export vec_duplicate_detect <- function(x) { .Call(vctrs_duplicated, x) } #' @rdname vec_duplicate #' @export vec_duplicate_id <- function(x) { .Call(vctrs_id, x) } # Unique values ----------------------------------------------------------- #' Find and count unique values #' #' * `vec_unique()`: the unique values. Equivalent to [unique()]. #' * `vec_unique_loc()`: the locations of the unique values. #' * `vec_unique_count()`: the number of unique values. #' #' @inherit vec_duplicate sections #' @param x A vector (including a data frame). #' @return #' * `vec_unique()`: a vector the same type as `x` containing only unique #' values. #' * `vec_unique_loc()`: an integer vector, giving locations of unique values. #' * `vec_unique_count()`: an integer vector of length 1, giving the #' number of unique values. #' @seealso [vec_duplicate] for functions that work with the dual of #' unique values: duplicated values. #' #' @section Dependencies: #' - [vec_proxy_equal()] #' #' @export #' @examples #' x <- rpois(100, 8) #' vec_unique(x) #' vec_unique_loc(x) #' vec_unique_count(x) #' #' # `vec_unique()` returns values in the order that encounters them #' # use sort = "location" to match to the result of `vec_count()` #' head(vec_unique(x)) #' head(vec_count(x, sort = "location")) #' #' # Normally missing values are not considered to be equal #' NA == NA #' #' # But they are for the purposes of considering uniqueness #' vec_unique(c(NA, NA, NA, NA, 1, 2, 1)) vec_unique <- function(x) { vec_slice(x, vec_unique_loc(x)) } #' @rdname vec_unique #' @export vec_unique_loc <- function(x) { .Call(vctrs_unique_loc, x) } #' @rdname vec_unique #' @export vec_unique_count <- function(x) { .Call(vctrs_n_distinct, x) } # Matching ---------------------------------------------------------------- #' Find matching observations across vectors #' #' `vec_in()` returns a logical vector based on whether `needle` is found in #' haystack. `vec_match()` returns an integer vector giving location of #' `needle` in `haystack`, or `NA` if it's not found. #' #' `vec_in()` is equivalent to [base::%in%]; `vec_match()` is equivalent to #' [base::match()]. #' #' @section Missing values: #' In most cases places in R, missing values are not considered to be equal, #' i.e. `NA == NA` is not `TRUE`. The exception is in matching functions #' like [base::match()] and [merge()], where an `NA` will match another `NA`. #' By `vec_match()` and `vec_in()` will match `NA`s; but you can control #' this behaviour with the `na_equal` argument. #' #' @param needles,haystack Vector of `needles` to search for in vector haystack. #' `haystack` should usually be unique; if not `vec_match()` will only #' return the location of the first match. #' #' `needles` and `haystack` are coerced to the same type prior to #' comparison. #' @inheritParams rlang::args_dots_empty #' @param na_equal If `TRUE`, missing values in `needles` can be #' matched to missing values in `haystack`. If `FALSE`, they #' propagate, missing values in `needles` are represented as `NA` in #' the return value. #' @param needles_arg,haystack_arg Argument tags for `needles` and #' `haystack` used in error messages. #' @return A vector the same length as `needles`. `vec_in()` returns a #' logical vector; `vec_match()` returns an integer vector. #' #' @section Dependencies: #' - [vec_cast_common()] with fallback #' - [vec_proxy_equal()] #' #' @export #' @examples #' hadley <- strsplit("hadley", "")[[1]] #' vec_match(hadley, letters) #' #' vowels <- c("a", "e", "i", "o", "u") #' vec_match(hadley, vowels) #' vec_in(hadley, vowels) #' #' # Only the first index of duplicates is returned #' vec_match(c("a", "b"), c("a", "b", "a", "b")) vec_match <- function( needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "" ) { check_dots_empty0(...) .Call(vctrs_match, needles, haystack, na_equal, environment()) } #' @export #' @rdname vec_match vec_in <- function( needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "" ) { check_dots_empty0(...) .Call(vctrs_in, needles, haystack, na_equal, environment()) } vctrs/R/faq-internal.R0000644000176200001440000000026314713504470014326 0ustar liggesusers#' Internal FAQ - `vec_ptype2()`, `NULL`, and unspecified vectors #' #' @includeRmd man/faq/internal/ptype2-identity.Rmd description #' #' @name internal-faq-ptype2-identity NULL vctrs/R/numeric.R0000644000176200001440000000432514713505651013414 0ustar liggesusers#' Mathematical operations #' #' This generic provides a common dispatch mechanism for all regular unary #' mathematical functions. It is used as a common wrapper around many of the #' Summary group generics, the Math group generics, and a handful of other #' mathematical functions like `mean()` (but not `var()` or `sd()`). #' #' `vec_math_base()` is provided as a convenience for writing methods. It #' calls the base `.fn` on the underlying [vec_data()]. #' #' @section Included functions: #' #' * From the [Summary] group generic: #' `prod()`, `sum()`, `any()`, `all()`. #' #' * From the [Math] group generic: #' `abs()`, `sign()`, `sqrt()`, `ceiling()`, `floor()`, `trunc()`, `cummax()`, #' `cummin()`, `cumprod()`, `cumsum()`, `log()`, `log10()`, `log2()`, #' `log1p()`, `acos()`, `acosh()`, `asin()`, `asinh()`, `atan()`, `atanh()`, #' `exp()`, `expm1()`, `cos()`, `cosh()`, `cospi()`, `sin()`, `sinh()`, #' `sinpi()`, `tan()`, `tanh()`, `tanpi()`, `gamma()`, `lgamma()`, #' `digamma()`, `trigamma()`. #' #' * Additional generics: `mean()`, `is.nan()`, `is.finite()`, `is.infinite()`. #' #' Note that `median()` is currently not implemented, and `sd()` and #' `var()` are currently not generic and so do not support custom #' classes. #' #' @seealso [vec_arith()] for the equivalent for the arithmetic infix operators. #' @param .fn A mathematical function from the base package, as a string. #' @param .x A vector. #' @param ... Additional arguments passed to `.fn`. #' @keywords internal #' @export #' @examples #' x <- new_vctr(c(1, 2.5, 10)) #' x #' #' abs(x) #' sum(x) #' cumsum(x) vec_math <- function(.fn, .x, ...) { UseMethod("vec_math", .x) } #' @export vec_math.default <- function(.fn, .x, ...) { if (!is_double(.x) && !is_logical_dispatch(.fn, .x)) { stop_unimplemented(.x, "vec_math") } out <- vec_math_base(.fn, .x, ...) # Don't restore output of logical predicates like `any()`, # `is.finite()`, or `is.nan()` if (is_double(out)) { out <- vec_restore(out, .x) } out } is_logical_dispatch <- function(fn, x) { is_logical(x) && fn %in% c("any", "all") } #' @export #' @rdname vec_math vec_math_base <- function(.fn, .x, ...) { .fn <- getExportedValue("base", .fn) .fn(vec_data(.x), ...) } vctrs/R/type-unspecified.R0000644000176200001440000001055715120272011015213 0ustar liggesusers#' Unspecified vectors and prototype finalisation #' #' @description #' `unspecified()` is the underlying type used to represent logical vectors that #' only contain `NA`. These require special handling because we want to allow #' logical `NA` to specify missingness that can be cast to any other type. #' #' In vctrs, the `` type is considered _unfinalised_ and is not #' suitable for use in most vctrs functions that take a `ptype` argument, like #' [vec_c()]. The purpose of `vec_ptype_finalise()` is to finalise any #' `` types into `` after common type determination #' has been completed. #' #' [vec_ptype()] and [vec_ptype2()] return _unfinalised_ types, and will convert #' a logical vector of `NA` into an empty `` type that can combine #' with any other type. It is unlikely that you will call these yourself, but, #' if you do, you'll need to manually finalise with `vec_ptype_finalise()` to #' take care of any `` types. #' #' [vec_ptype_common()] uses both [vec_ptype()] and [vec_ptype2()] to compute #' the common type, but typically returns a _finalised_ type for immediate usage #' in other vctrs functions. You can optionally skip finalisation by setting #' `.finalise = FALSE`, in which case `vec_ptype_common()` can return #' `` and you'll need to manually call `vec_ptype_finalise()` #' yourself. #' #' `vec_ptype_finalise()` is an S3 generic, but it is extremely rare to need to #' write an S3 method for this. Data frames (and data frame subclasses) are #' already recursively finalised by the default method. The only time you may #' need to write an S3 method for `vec_ptype_finalise()` is if your class #' _wraps_ an arbitrary vector that has the potential to be a logical vector #' containing only `NA`s. See `ivs::iv()` for an example of this, which wraps #' arbitrary `start` and `end` vectors of the same type into a single interval #' vector class. #' #' @keywords internal #' @name vctrs-unspecified #' #' @examples #' # Returns `unspecified()` #' vec_ptype(NA) #' vec_ptype(c(NA, NA)) #' #' # We've chosen to make this return `logical()`, but this is admittedly #' # ambiguous, as it could be seen as "an empty vector of `NA`s" that could #' # also be treated as unspecified. #' vec_ptype(logical()) #' #' # These return `unspecified()` #' vec_ptype2(NA, NA) #' vec_ptype2(NA, NULL) #' vec_ptype2(NULL, NA) #' #' # An unspecified vector can combine with any other type #' vec_ptype2(NA, "x") #' vec_ptype2("x", NA) #' #' # Same as using `unspecified()` directly #' vec_ptype2(unspecified(1), "x") #' vec_ptype2("x", unspecified(1)) #' #' # Finalising a ptype turns unspecified back to logical #' vec_ptype(NA) #' vec_ptype_finalise(vec_ptype(NA)) #' #' # This works recursively over data frames #' df <- data_frame(x = NA, y = data_frame(z = NA)) #' vec_ptype_show(vec_ptype(df)) #' vec_ptype_show(vec_ptype_finalise(vec_ptype(df))) #' #' # `vec_ptype_common()` finalises automatically rather than returning an #' # unspecified type #' vec_ptype_common(NA) #' vec_ptype_common(NA, NA) #' vec_ptype_show(vec_ptype_common(df)) #' #' # `vec_ptype_common()` lets you opt out of finalisation using `.finalise` #' vec_ptype_common(NA, .finalise = FALSE) #' vec_ptype_show(vec_ptype_common(df, .finalise = FALSE)) NULL #' @param n Length of vector #' #' @rdname vctrs-unspecified #' @export unspecified <- function(n = 0) { .Call(vctrs_unspecified, n) } #' @export `[.vctrs_unspecified` <- function(x, i, ...) { unspecified(length(NextMethod())) } #' @export print.vctrs_unspecified <- function(x, ...) { cat(" [", length(x), "]\n", sep = "") } #' @export vec_ptype_abbr.vctrs_unspecified <- function(x, ...) { "???" } is_unspecified <- function(x) { .Call(vctrs_is_unspecified, x) } ununspecify <- function(x) { if (is_unspecified(x)) { new_logical(length(x)) } else { x } } #' @inheritParams rlang::args_dots_empty #' #' @param x A `ptype` to finalize, typically a result of [vec_ptype()], #' [vec_ptype2()], or [`vec_ptype_common(.finalise = #' FALSE)`][vec_ptype_common]. #' #' @rdname vctrs-unspecified #' @export vec_ptype_finalise <- function(x, ...) { check_dots_empty0(...) return(.Call(vctrs_ptype_finalise, x)) UseMethod("vec_ptype_finalise") } vec_ptype_finalise_dispatch <- function(x, ...) { UseMethod("vec_ptype_finalise") } #' @export vec_ptype_finalise.default <- function(x, ...) { x } vctrs/R/interval.R0000644000176200001440000002205015065005761013567 0ustar liggesusers#' Group overlapping intervals #' #' @description #' These functions are used to group together any overlaps that are present #' within a set of vector intervals. When multiple overlapping intervals are #' grouped together they result in a wider interval containing the smallest #' `start` and the largest `end` of the overlaps. #' #' - `vec_interval_groups()` merges all overlapping intervals found within #' `start` and `end`. The resulting intervals are known as the interval #' "groups". #' #' - `vec_interval_locate_groups()` returns a two column data frame with a `key` #' column containing the result of `vec_interval_groups()` and a `loc` #' list-column containing integer vectors that map each interval in `start` and #' `end` to the group that it falls in. #' #' These functions require that `start < end`. Additionally, intervals are #' treated as if they are right-open, i.e. `[start, end)`. #' #' @section Assumptions: #' For performance and simplicity, these functions make a few assumptions about #' `start` and `end` that are not checked internally: #' #' - `start < end` must be true, with an exception for missing intervals. #' #' - If the i-th observation of `start` is missing, then the i-th observation #' of `end` must also be missing. #' #' - Each observation of `start` and `end` must be either #' [complete][vec_detect_complete] or [missing][vec_detect_missing]. Partially #' complete values such as `start = data_frame(x = 1, y = NA)` are not allowed. #' #' If any of these assumptions are invalid, then the result is undefined. #' #' Developer note: These assumptions stem from the idea that if these functions #' were in ivs itself, then we could safely make these assumptions in the C #' code, because the `iv()` helper would assert them for us ahead of time. #' Trying to re-assert these checks in the C code here is wasteful and makes the #' code more complex. #' #' @inheritParams rlang::args_dots_empty #' #' @param start,end #' A pair of vectors representing the starts and ends of the intervals. #' #' It is required that `start < end`. #' #' `start` and `end` will be cast to their common type, and must have the same #' size. #' #' @param abutting #' A single logical controlling whether or not abutting intervals should be #' grouped together. If `TRUE`, `[a, b)` and `[b, c)` will be grouped. #' #' @param missing #' Handling of missing intervals. #' #' - `"group"`: Group all missing intervals together. #' #' - `"drop"`: Drop all missing intervals from the result. #' #' @return #' - `vec_interval_groups()` returns a data frame with two columns, `start` and #' `end`, which contain vectors matching the types of `start` and `end`. #' #' - `vec_interval_locate_groups()` returns a data frame with two columns, `key` #' and `loc`. `key` contains the result of `vec_interval_groups()` and `loc` is #' a list of integer vectors. #' #' @name interval-groups #' #' @examples #' bounds <- data_frame( #' start = c(1, 2, NA, 5, NA, 9, 12), #' end = c(5, 3, NA, 6, NA, 12, 14) #' ) #' bounds #' #' # Group overlapping intervals together #' vec_interval_groups(bounds$start, bounds$end) #' #' # You can choose not to group abutting intervals if you want to retain #' # those boundaries #' vec_interval_groups(bounds$start, bounds$end, abutting = FALSE) #' #' # You can also choose to drop all missing intervals if you don't consider #' # them part of the result #' vec_interval_groups(bounds$start, bounds$end, missing = "drop") #' #' # You can also locate the groups, which allows you to map each original #' # interval to its corresponding group #' vec_interval_locate_groups(bounds$start, bounds$end) #' #' @noRd vec_interval_groups <- function( start, end, ..., abutting = TRUE, missing = "group" ) { check_dots_empty0(...) .Call(ffi_interval_groups, start, end, abutting, missing) } #' @noRd #' @rdname interval-groups vec_interval_locate_groups <- function( start, end, ..., abutting = TRUE, missing = "group" ) { check_dots_empty0(...) .Call(ffi_interval_locate_groups, start, end, abutting, missing) } # ------------------------------------------------------------------------------ #' Interval complement #' #' @description #' `vec_interval_complement()` takes the complement of the intervals defined by #' `start` and `end`. The complement can also be thought of as the "gaps" #' between the intervals. By default, the minimum of `start` and the maximum of #' `end` define the bounds to take the complement over, but this can be adjusted #' with `lower` and `upper`. Missing intervals are always dropped from the #' complement. #' #' These functions require that `start < end`. Additionally, intervals are #' treated as if they are right-open, i.e. `[start, end)`. #' #' @inheritSection interval-groups Assumptions #' #' @inheritParams rlang::args_dots_empty #' #' @param start,end #' A pair of vectors representing the starts and ends of the intervals. #' #' It is required that `start < end`. #' #' `start` and `end` will be cast to their common type, and must have the same #' size. #' #' @param lower,upper #' Bounds for the universe over which to compute the complement. These should #' be singular values with the same type as `start` and `end`. #' #' @return #' A two column data frame with a `start` column containing a vector of the #' same type as `start` and an `end` column containing a vector of the same #' type as `end`. #' #' @examples #' x <- data_frame( #' start = c(10, 0, NA, 3, -5, NA), #' end = c(12, 5, NA, 6, -2, NA) #' ) #' x #' #' # The complement contains any values from `[-5, 12)` that aren't represented #' # in these intervals. Missing intervals are dropped. #' vec_interval_complement(x$start, x$end) #' #' # Expand out the "universe" of possible values #' vec_interval_complement(x$start, x$end, lower = -Inf) #' vec_interval_complement(x$start, x$end, lower = -Inf, upper = Inf) #' #' @noRd vec_interval_complement <- function( start, end, ..., lower = NULL, upper = NULL ) { check_dots_empty0(...) .Call(ffi_interval_complement, start, end, lower, upper) } # ------------------------------------------------------------------------------ #' Interval containers #' #' @description #' `vec_interval_locate_containers()` locates interval _containers_. Containers #' are defined as the widest intervals that aren't contained by any other #' interval. The returned locations will arrange the containers in ascending #' order. #' #' For example, with the following vector of intervals: `[1, 5), [2, 6), [3, 4), #' [5, 9), [5, 8)`, the containers are: `[1, 5), [2, 6), [5, 9)`. The intervals #' `[3, 4)` and `[5, 8)` aren't containers because they are completely contained #' within at least one other interval. Note that containers can partially #' overlap, i.e. `[1, 5)` and `[2, 6)`, and multiple containers can contain the #' same intervals, i.e. both `[1, 5)` and `[2, 6)` contain `[3, 4)`. #' #' Missing intervals are placed into their own container at the end, separate #' from all other intervals. #' #' These functions require that `start < end`. Additionally, intervals are #' treated as if they are right-open, i.e. `[start, end)`. #' #' @inheritSection interval-groups Assumptions #' #' @param start,end #' A pair of vectors representing the starts and ends of the intervals. #' #' It is required that `start < end`. #' #' `start` and `end` will be cast to their common type, and must have the same #' size. #' #' @return #' An integer vector that represents the locations of the containers in `start` #' and `end`. #' #' @examples #' x <- data_frame( #' start = c(10, 0, NA, 3, 2, 2, NA, 11), #' end = c(12, 5, NA, 5, 6, 6, NA, 12) #' ) #' x #' #' loc <- vec_interval_locate_containers(x$start, x$end) #' loc #' #' vec_slice(x, loc) #' #' @noRd vec_interval_locate_containers <- function(start, end) { .Call(ffi_interval_locate_containers, start, end) } # ------------------------------------------------------------------------------ # Experimental shims of interval functions used by other packages (mainly, ivs). # # This gives us the freedom to experiment with the signature of these functions # while being backwards compatible with ivs in the meantime. # # We can remove these after: # - The interval functions are exported # - ivs updates to use them directly # - A short deprecation period goes by that allows users time to update their # version of ivs exp_vec_interval_groups <- function( start, end, ..., abutting = TRUE, missing = "group" ) { vec_interval_groups( start = start, end = end, ..., abutting = abutting, missing = missing ) } exp_vec_interval_locate_groups <- function( start, end, ..., abutting = TRUE, missing = "group" ) { vec_interval_locate_groups( start = start, end = end, ..., abutting = abutting, missing = missing ) } exp_vec_interval_complement <- function( start, end, ..., lower = NULL, upper = NULL ) { vec_interval_complement( start = start, end = end, ..., lower = lower, upper = upper ) } exp_vec_interval_locate_containers <- function(start, end) { vec_interval_locate_containers( start = start, end = end ) } vctrs/R/type.R0000644000176200001440000002055715120272011012720 0ustar liggesusers#' Find the prototype of a set of vectors #' #' @description #' - `vec_ptype()` returns the [unfinalised][vec_ptype_finalise] prototype of a #' single vector. #' #' - `vec_ptype_common()` returns the common type of multiple vectors. By #' default, this is [finalised][vec_ptype_finalise] for immediate usage, but #' can optionally be left unfinalised for advanced common type determination. #' #' - `vec_ptype_show()` nicely prints the common type of any number of inputs, #' and is designed for interactive exploration. #' #' @inheritParams rlang::args_error_context #' #' @param x A vector #' #' @param ... For `vec_ptype()`, these dots are for future extensions and must #' be empty. #' #' For `vec_ptype_common()` and `vec_ptype_show()`, vector inputs. #' #' @param x_arg Argument name for `x`. This is used in error messages to inform #' the user about the locations of incompatible types. #' #' @param .ptype If `NULL`, the default, the output type is determined by #' computing the common type across all elements of `...`. #' #' Alternatively, you can supply `.ptype` to give the output known type. #' If `getOption("vctrs.no_guessing")` is `TRUE` you must supply this value: #' this is a convenient way to make production code demand fixed types. #' #' @param .finalise Should `vec_ptype_common()` [finalise][vec_ptype_finalise] #' its output? #' #' - If `TRUE`, [vec_ptype_finalise()] is called on the final `ptype` before #' it is returned. Practically this has the effect of converting any #' types from [unspecified] to logical. #' #' - If `FALSE`, [unspecified] types are left unfinalised, which can be useful #' for advanced cases where you combine one common type result with another #' type via [vec_ptype2()]. Note that you must manually call #' [vec_ptype_finalise()] on the final `ptype` before supplying it to any #' other vctrs functions. #' #' @return `vec_ptype()` and `vec_ptype_common()` return a prototype #' (a size-0 vector). #' #' @section `vec_ptype()`: #' #' `vec_ptype()` returns [size][vec_size] 0 vectors potentially #' containing attributes but no data. Generally, this is just #' `vec_slice(x, 0L)`, but some inputs require special #' handling. #' #' * While you can't slice `NULL`, the prototype of `NULL` is #' itself. This is because we treat `NULL` as an identity value in #' the `vec_ptype2()` monoid. #' #' * The prototype of logical vectors that only contain missing values #' is the special [unspecified] type, which can be coerced to any #' other 1d type. This allows bare `NA`s to represent missing values #' for any 1d vector type. [Finalising][vec_ptype_finalise] this type #' converts it from unspecified back to logical. #' #' See [internal-faq-ptype2-identity] for more information about #' identity values. #' #' `vec_ptype()` is a _performance_ generic. It is not necessary to implement it #' because the default method will work for any vctrs type. However the default #' method builds around other vctrs primitives like `vec_slice()` which incurs #' performance costs. If your class has a static prototype, you might consider #' implementing a custom `vec_ptype()` method that returns a constant. This will #' improve the performance of your class in many cases ([common #' type][vec_ptype2] imputation in particular). #' #' Because it may contain unspecified vectors, the prototype returned by #' `vec_ptype()` is said to be __unfinalised__. Call [vec_ptype_finalise()] to #' finalise it. #' #' @section `vec_ptype_common()`: #' #' `vec_ptype_common()` first finds the prototype of each input, then #' successively calls [vec_ptype2()] to find a common type. It returns a #' [finalised][vec_ptype_finalise] prototype by default, but can optionally be #' left unfinalised for advanced common type determination. #' #' @section Dependencies of `vec_ptype()`: #' - [vec_slice()] for returning an empty slice #' #' @section Dependencies of `vec_ptype_common()`: #' - [vec_ptype2()] #' - [vec_ptype_finalise()] #' #' @export #' @examples #' # Unknown types ------------------------------------------ #' vec_ptype_show() #' vec_ptype_show(NULL) #' #' # Vectors ------------------------------------------------ #' vec_ptype_show(1:10) #' vec_ptype_show(letters) #' vec_ptype_show(TRUE) #' #' vec_ptype_show(Sys.Date()) #' vec_ptype_show(Sys.time()) #' vec_ptype_show(factor("a")) #' vec_ptype_show(ordered("a")) #' #' # Matrices ----------------------------------------------- #' # The prototype of a matrix includes the number of columns #' vec_ptype_show(array(1, dim = c(1, 2))) #' vec_ptype_show(array("x", dim = c(1, 2))) #' #' # Data frames -------------------------------------------- #' # The prototype of a data frame includes the prototype of #' # every column #' vec_ptype_show(iris) #' #' # The prototype of multiple data frames includes the prototype #' # of every column that in any data frame #' vec_ptype_show( #' data.frame(x = TRUE), #' data.frame(y = 2), #' data.frame(z = "a") #' ) #' #' # Finalisation ------------------------------------------- #' #' # `vec_ptype()` and `vec_ptype2()` return unfinalised ptypes so that they #' # can be coerced to any other type #' vec_ptype(NA) #' vec_ptype2(NA, NA) #' #' # By default `vec_ptype_common()` finalises so that you can use its result #' # directly in other vctrs functions #' vec_ptype_common(NA, NA) #' #' # You can opt out of finalisation to make it work like `vec_ptype()` and #' # `vec_ptype2()` with `.finalise = FALSE`, but don't forget that you must #' # call `vec_ptype_finalise()` manually if you do so! #' vec_ptype_common(NA, NA, .finalise = FALSE) #' vec_ptype_finalise(vec_ptype_common(NA, NA, .finalise = FALSE)) #' #' # This can be useful in rare scenarios, like including a separate `default` #' # argument in the ptype computation #' xs <- list(NA, NA) #' default <- "a" #' try(vec_ptype2(vec_ptype_common(!!!xs), default)) #' vec_ptype2(vec_ptype_common(!!!xs, .finalise = FALSE), default) vec_ptype <- function(x, ..., x_arg = "", call = caller_env()) { check_dots_empty0(...) return(.Call(ffi_ptype, x, x_arg, environment())) UseMethod("vec_ptype") } #' @export #' @rdname vec_ptype vec_ptype_common <- function( ..., .ptype = NULL, .finalise = TRUE, .arg = "", .call = caller_env() ) { .External2(ffi_ptype_common, list2(...), .ptype, .finalise) } vec_ptype_common_params <- function( ..., .ptype = NULL, .finalise = TRUE, .fallback_opts = fallback_opts(), .arg = "", .call = caller_env() ) { .External2( ffi_ptype_common_params, list2(...), .ptype, .finalise, .fallback_opts ) } vec_ptype_common_fallback <- function( ..., .ptype = NULL, .arg = "", .call = caller_env() ) { vec_ptype_common_params( ..., .ptype = .ptype, .fallback_opts = enabled_fallback_opts(), .arg = .arg, .call = .call ) } #' @export #' @rdname vec_ptype vec_ptype_show <- function(...) { args <- compact(list2(...)) n <- length(args) if (n == 0) { cat_line("Prototype: NULL") } else if (n == 1) { cat_line("Prototype: ", vec_ptype_full(args[[1]])) } else { in_types <- map(args, vec_ptype) out_types <- vector("list", length(in_types)) out_types[[1]] <- in_types[[1]] for (i in seq2(2, n)) { out_types[[i]] <- vec_ptype2(out_types[[i - 1]], in_types[[i]]) } in_full <- paste0("<", map_chr(in_types, vec_ptype_full), ">") out_full <- paste0("<", map_chr(out_types, vec_ptype_full), ">") out <- cbind( n = paste0(seq(0, n - 1), ". "), lhs = c("", out_full[-n]), comma = " , ", rhs = in_full, equals = " = ", res = c(in_full[[1]], out_full[-1]) ) out <- t(apply(out, 1, pad_height)) out <- apply(out, 2, pad_width) out[, "lhs"] <- parens(out[, "lhs"]) out[, "rhs"] <- parens(out[, "rhs"], FALSE) lines <- strsplit(out, "\n") dim(lines) <- dim(out) steps <- apply(lines, 1, function(x) do.call(cbind, x)) if (is.list(steps)) { step_lines <- unlist(lapply(steps, function(x) { apply(x, 1, paste0, collapse = "") })) } else { step_lines <- apply(steps, 2, paste0, collapse = "") } cat_line("Prototype: ", out_full[[n]]) cat_line(step_lines) } invisible() } vec_typeof <- function(x) { .Call(vctrs_typeof, x, TRUE) } vec_typeof_bare <- function(x) { .Call(vctrs_typeof, x, FALSE) } vec_type_info <- function(x) { .Call(ffi_type_info, x) } vec_proxy_info <- function(x) { .Call(ffi_proxy_info, x) } vctrs/R/type-idate.R0000644000176200001440000000173215056611175014016 0ustar liggesusers# Proxy method for # # `vec_proxy.Date()` coerces integer storage dates to double. If we don't # intercept that, an IDate will also be converted to double storage, but that # results in a corrupt IDate. Since we provide some methods for , we # also provide this one. # # Notably we don't provide `vec_ptype2.Date.IDate` methods. We don't think it is # our place to provide methods for that, especially since we'd likely prefer # as the more general common type, but `c.IDate` in data.table prefers # , so we'd probably just add conflicting behavior. vec_proxy_IDate <- function(x, ...) { if (typeof(x) != "integer") { type <- typeof(x) cli::cli_abort( "Corrupt . Expected integer storage, not {type} storage." ) } x } # Restore method for # # Follow `vec_restore.Date` and pass through to `vec_restore.default` which # uses standard restore behavior vec_restore_IDate <- function(x, to, ...) { NextMethod() } vctrs/R/match.R0000644000176200001440000005156515065005761013054 0ustar liggesusers#' Locate observations matching specified conditions #' #' @description #' `vec_locate_matches()` is a more flexible version of [vec_match()] used to #' identify locations where each value of `needles` matches one or multiple #' values in `haystack`. Unlike `vec_match()`, `vec_locate_matches()` returns #' all matches by default, and can match on binary conditions other than #' equality, such as `>`, `>=`, `<`, and `<=`. #' #' @details #' [vec_match()] is identical to (but often slightly faster than): #' #' ``` #' vec_locate_matches( #' needles, #' haystack, #' condition = "==", #' multiple = "first", #' nan_distinct = TRUE #' ) #' ``` #' #' `vec_locate_matches()` is extremely similar to a SQL join between `needles` #' and `haystack`, with the default being most similar to a left join. #' #' Be very careful when specifying match `condition`s. If a condition is #' misspecified, it is very easy to accidentally generate an exponentially #' large number of matches. #' #' @section Dependencies of `vec_locate_matches()`: #' - [vec_order_radix()] #' - [vec_detect_complete()] #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' @inheritParams order-radix #' #' @param needles,haystack Vectors used for matching. #' #' - `needles` represents the vector to search for. #' #' - `haystack` represents the vector to search in. #' #' Prior to comparison, `needles` and `haystack` are coerced to the same type. #' #' @param condition Condition controlling how `needles` should be compared #' against `haystack` to identify a successful match. #' #' - One of: `"=="`, `">"`, `">="`, `"<"`, or `"<="`. #' #' - For data frames, a length `1` or `ncol(needles)` character vector #' containing only the above options, specifying how matching is determined #' for each column. #' #' @param filter Filter to be applied to the matched results. #' #' - `"none"` doesn't apply any filter. #' #' - `"min"` returns only the minimum haystack value matching the current #' needle. #' #' - `"max"` returns only the maximum haystack value matching the current #' needle. #' #' - For data frames, a length `1` or `ncol(needles)` character vector #' containing only the above options, specifying a filter to apply to #' each column. #' #' Filters don't have any effect on `"=="` conditions, but are useful for #' computing "rolling" matches with other conditions. #' #' A filter can return multiple haystack matches for a particular needle #' if the maximum or minimum haystack value is duplicated in `haystack`. These #' can be further controlled with `multiple`. #' #' @param incomplete Handling of missing and [incomplete][vec_detect_complete] #' values in `needles`. #' #' - `"compare"` uses `condition` to determine whether or not a missing value #' in `needles` matches a missing value in `haystack`. If `condition` is #' `==`, `>=`, or `<=`, then missing values will match. #' #' - `"match"` always allows missing values in `needles` to match missing #' values in `haystack`, regardless of the `condition`. #' #' - `"drop"` drops incomplete values in `needles` from the result. #' #' - `"error"` throws an error if any `needles` are incomplete. #' #' - If a single integer is provided, this represents the value returned #' in the `haystack` column for values of `needles` that are incomplete. If #' `no_match = NA`, setting `incomplete = NA` forces incomplete values in #' `needles` to be treated like unmatched values. #' #' `nan_distinct` determines whether a `NA` is allowed to match a `NaN`. #' #' @param no_match Handling of `needles` without a match. #' #' - `"drop"` drops `needles` with zero matches from the result. #' #' - `"error"` throws an error if any `needles` have zero matches. #' #' - If a single integer is provided, this represents the value returned in #' the `haystack` column for values of `needles` that have zero matches. The #' default represents an unmatched needle with `NA`. #' #' @param remaining Handling of `haystack` values that `needles` never matched. #' #' - `"drop"` drops remaining `haystack` values from the result. #' Typically, this is the desired behavior if you only care when `needles` #' has a match. #' #' - `"error"` throws an error if there are any remaining `haystack` #' values. #' #' - If a single integer is provided (often `NA`), this represents the value #' returned in the `needles` column for the remaining `haystack` values #' that `needles` never matched. Remaining `haystack` values are always #' returned at the end of the result. #' #' @param multiple Handling of `needles` with multiple matches. For each needle: #' #' - `"all"` returns all matches detected in `haystack`. #' #' - `"any"` returns any match detected in `haystack` with no guarantees on #' which match will be returned. It is often faster than `"first"` and #' `"last"` if you just need to detect if there is at least one match. #' #' - `"first"` returns the first match detected in `haystack`. #' #' - `"last"` returns the last match detected in `haystack`. #' #' @param relationship Handling of the expected relationship between #' `needles` and `haystack`. If the expectations chosen from the list below #' are invalidated, an error is thrown. #' #' - `"none"` doesn't perform any relationship checks. #' #' - `"one-to-one"` expects: #' - Each value in `needles` matches at most 1 value in `haystack`. #' - Each value in `haystack` matches at most 1 value in `needles`. #' #' - `"one-to-many"` expects: #' - Each value in `needles` matches any number of values in `haystack`. #' - Each value in `haystack` matches at most 1 value in `needles`. #' #' - `"many-to-one"` expects: #' - Each value in `needles` matches at most 1 value in `haystack`. #' - Each value in `haystack` matches any number of values in `needles`. #' #' - `"many-to-many"` expects: #' - Each value in `needles` matches any number of values in `haystack`. #' - Each value in `haystack` matches any number of values in `needles`. #' #' This performs no checks, and is identical to `"none"`, but is provided to #' allow you to be explicit about this relationship if you know it exists. #' #' - `"warn-many-to-many"` doesn't assume there is any known relationship, but #' will warn if `needles` and `haystack` have a many-to-many relationship #' (which is typically unexpected), encouraging you to either take a closer #' look at your inputs or make this relationship explicit by specifying #' `"many-to-many"`. #' #' `relationship` is applied after `filter` and `multiple` to allow potential #' multiple matches to be filtered out first. #' #' `relationship` doesn't handle cases where there are zero matches. For that, #' see `no_match` and `remaining`. #' #' @param needles_arg,haystack_arg Argument tags for `needles` and `haystack` #' used in error messages. #' #' @return A two column data frame containing the locations of the matches. #' #' - `needles` is an integer vector containing the location of #' the needle currently being matched. #' #' - `haystack` is an integer vector containing the location of the #' corresponding match in the haystack for the current needle. #' #' @export #' @examples #' x <- c(1, 2, NA, 3, NaN) #' y <- c(2, 1, 4, NA, 1, 2, NaN) #' #' # By default, for each value of `x`, all matching locations in `y` are #' # returned #' matches <- vec_locate_matches(x, y) #' matches #' #' # The result can be used to slice the inputs to align them #' data_frame( #' x = vec_slice(x, matches$needles), #' y = vec_slice(y, matches$haystack) #' ) #' #' # If multiple matches are present, control which is returned with `multiple` #' vec_locate_matches(x, y, multiple = "first") #' vec_locate_matches(x, y, multiple = "last") #' vec_locate_matches(x, y, multiple = "any") #' #' # Use `relationship` to add constraints and error on multiple matches if #' # they aren't expected #' try(vec_locate_matches(x, y, relationship = "one-to-one")) #' #' # In this case, the `NA` in `y` matches two rows in `x` #' try(vec_locate_matches(x, y, relationship = "one-to-many")) #' #' # By default, `NA` is treated as being identical to `NaN`. #' # Using `nan_distinct = TRUE` treats `NA` and `NaN` as different values, so #' # `NA` can only match `NA`, and `NaN` can only match `NaN`. #' vec_locate_matches(x, y, nan_distinct = TRUE) #' #' # If you never want missing values to match, set `incomplete = NA` to return #' # `NA` in the `haystack` column anytime there was an incomplete value #' # in `needles`. #' vec_locate_matches(x, y, incomplete = NA) #' #' # Using `incomplete = NA` allows us to enforce the one-to-many relationship #' # that we couldn't before #' vec_locate_matches(x, y, relationship = "one-to-many", incomplete = NA) #' #' # `no_match` allows you to specify the returned value for a needle with #' # zero matches. Note that this is different from an incomplete value, #' # so specifying `no_match` allows you to differentiate between incomplete #' # values and unmatched values. #' vec_locate_matches(x, y, incomplete = NA, no_match = 0L) #' #' # If you want to require that every `needle` has at least 1 match, set #' # `no_match` to `"error"`: #' try(vec_locate_matches(x, y, incomplete = NA, no_match = "error")) #' #' # By default, `vec_locate_matches()` detects equality between `needles` and #' # `haystack`. Using `condition`, you can detect where an inequality holds #' # true instead. For example, to find every location where `x[[i]] >= y`: #' matches <- vec_locate_matches(x, y, condition = ">=") #' #' data_frame( #' x = vec_slice(x, matches$needles), #' y = vec_slice(y, matches$haystack) #' ) #' #' # You can limit which matches are returned with a `filter`. For example, #' # with the above example you can filter the matches returned by `x[[i]] >= y` #' # down to only the ones containing the maximum `y` value of those matches. #' matches <- vec_locate_matches(x, y, condition = ">=", filter = "max") #' #' # Here, the matches for the `3` needle value have been filtered down to #' # only include the maximum haystack value of those matches, `2`. This is #' # often referred to as a rolling join. #' data_frame( #' x = vec_slice(x, matches$needles), #' y = vec_slice(y, matches$haystack) #' ) #' #' # In the very rare case that you need to generate locations for a #' # cross match, where every value of `x` is forced to match every #' # value of `y` regardless of what the actual values are, you can #' # replace `x` and `y` with integer vectors of the same size that contain #' # a single value and match on those instead. #' x_proxy <- vec_rep(1L, vec_size(x)) #' y_proxy <- vec_rep(1L, vec_size(y)) #' nrow(vec_locate_matches(x_proxy, y_proxy)) #' vec_size(x) * vec_size(y) #' #' # By default, missing values will match other missing values when using #' # `==`, `>=`, or `<=` conditions, but not when using `>` or `<` conditions. #' # This is similar to how `vec_compare(x, y, na_equal = TRUE)` works. #' x <- c(1, NA) #' y <- c(NA, 2) #' #' vec_locate_matches(x, y, condition = "<=") #' vec_locate_matches(x, y, condition = "<") #' #' # You can force missing values to match regardless of the `condition` #' # by using `incomplete = "match"` #' vec_locate_matches(x, y, condition = "<", incomplete = "match") #' #' # You can also use data frames for `needles` and `haystack`. The #' # `condition` will be recycled to the number of columns in `needles`, or #' # you can specify varying conditions per column. In this example, we take #' # a vector of date `values` and find all locations where each value is #' # between lower and upper bounds specified by the `haystack`. #' values <- as.Date("2019-01-01") + 0:9 #' needles <- data_frame(lower = values, upper = values) #' #' set.seed(123) #' lower <- as.Date("2019-01-01") + sample(10, 10, replace = TRUE) #' upper <- lower + sample(3, 10, replace = TRUE) #' haystack <- data_frame(lower = lower, upper = upper) #' #' # (values >= lower) & (values <= upper) #' matches <- vec_locate_matches(needles, haystack, condition = c(">=", "<=")) #' #' data_frame( #' lower = vec_slice(lower, matches$haystack), #' value = vec_slice(values, matches$needle), #' upper = vec_slice(upper, matches$haystack) #' ) vec_locate_matches <- function( needles, haystack, ..., condition = "==", filter = "none", incomplete = "compare", no_match = NA_integer_, remaining = "drop", multiple = "all", relationship = "none", nan_distinct = FALSE, chr_proxy_collate = NULL, needles_arg = "needles", haystack_arg = "haystack", error_call = current_env() ) { check_dots_empty0(...) frame <- environment() .Call( ffi_locate_matches, needles, haystack, condition, filter, incomplete, no_match, remaining, multiple, relationship, nan_distinct, chr_proxy_collate, needles_arg, haystack_arg, frame ) } # ------------------------------------------------------------------------------ #' Internal FAQ - Implementation of `vec_locate_matches()` #' #' ```{r, child = "man/faq/internal/matches-algorithm.Rmd"} #' ``` #' #' @name internal-faq-matches-algorithm NULL # ------------------------------------------------------------------------------ # Helper used for testing and in the internal FAQ. # It needs to live in R/ to be usable by the FAQ Rmd. compute_nesting_container_info <- function(x, condition) { .Call(ffi_compute_nesting_container_info, x, condition) } # ------------------------------------------------------------------------------ stop_matches <- function( message = NULL, class = NULL, ..., call = caller_env() ) { stop_vctrs( message = message, class = c(class, "vctrs_error_matches"), ..., call = call ) } warn_matches <- function(message, class = NULL, ..., call = caller_env()) { warn_vctrs( message = message, class = c(class, "vctrs_warning_matches"), ..., call = call ) } # ------------------------------------------------------------------------------ stop_matches_overflow <- function(size, call) { size <- format(size, scientific = FALSE) # Pre-generating the message in this case because we want to use # `.internal = TRUE` and that doesn't work with lazy messages message <- c( "Match procedure results in an allocation larger than 2^31-1 elements.", i = glue::glue("Attempted allocation size was {size}.") ) stop_matches( message = message, class = "vctrs_error_matches_overflow", size = size, call = call, .internal = TRUE ) } # ------------------------------------------------------------------------------ stop_matches_nothing <- function(i, needles_arg, haystack_arg, call) { stop_matches( class = "vctrs_error_matches_nothing", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_nothing <- function(cnd, ...) { glue::glue( "Each value of `{cnd$needles_arg}` must have a match in `{cnd$haystack_arg}`." ) } #' @export cnd_body.vctrs_error_matches_nothing <- function(cnd, ...) { bullet <- glue::glue( "Location {cnd$i} of `{cnd$needles_arg}` does not have a match." ) bullet <- c(x = bullet) format_error_bullets(bullet) } # ------------------------------------------------------------------------------ stop_matches_remaining <- function(i, needles_arg, haystack_arg, call) { stop_matches( class = "vctrs_error_matches_remaining", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_remaining <- function(cnd, ...) { glue::glue( "Each value of `{cnd$haystack_arg}` must be matched by `{cnd$needles_arg}`." ) } #' @export cnd_body.vctrs_error_matches_remaining <- function(cnd, ...) { bullet <- glue::glue( "Location {cnd$i} of `{cnd$haystack_arg}` was not matched." ) bullet <- c(x = bullet) format_error_bullets(bullet) } # ------------------------------------------------------------------------------ stop_matches_incomplete <- function(i, needles_arg, call) { stop_matches( class = "vctrs_error_matches_incomplete", i = i, needles_arg = needles_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_incomplete <- function(cnd, ...) { glue::glue("`{cnd$needles_arg}` can't contain missing values.") } #' @export cnd_body.vctrs_error_matches_incomplete <- function(cnd, ...) { bullet <- glue::glue("Location {cnd$i} contains missing values.") bullet <- c(x = bullet) format_error_bullets(bullet) } # ------------------------------------------------------------------------------ stop_matches_multiple <- function(i, needles_arg, haystack_arg, call) { stop_matches( class = "vctrs_error_matches_multiple", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_multiple <- function(cnd, ...) { cnd_matches_multiple_header(cnd$needles_arg, cnd$haystack_arg) } #' @export cnd_body.vctrs_error_matches_multiple <- function(cnd, ...) { cnd_matches_multiple_body(cnd$i, cnd$needles_arg) } # ------------------------------------------------------------------------------ warn_matches_multiple <- function(i, needles_arg, haystack_arg, call) { message <- paste( cnd_matches_multiple_header(needles_arg, haystack_arg), cnd_matches_multiple_body(i, needles_arg), sep = "\n" ) warn_matches( message = message, class = "vctrs_warning_matches_multiple", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } # ------------------------------------------------------------------------------ stop_matches_relationship_one_to_one <- function( i, which, needles_arg, haystack_arg, call ) { stop_matches_relationship( class = "vctrs_error_matches_relationship_one_to_one", i = i, which = which, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_relationship_one_to_one <- function(cnd, ...) { if (cnd$which == "needles") { cnd_matches_multiple_header(cnd$needles_arg, cnd$haystack_arg) } else { cnd_matches_multiple_header(cnd$haystack_arg, cnd$needles_arg) } } #' @export cnd_body.vctrs_error_matches_relationship_one_to_one <- function(cnd, ...) { if (cnd$which == "needles") { cnd_matches_multiple_body(cnd$i, cnd$needles_arg) } else { cnd_matches_multiple_body(cnd$i, cnd$haystack_arg) } } stop_matches_relationship_one_to_many <- function( i, needles_arg, haystack_arg, call ) { stop_matches_relationship( class = "vctrs_error_matches_relationship_one_to_many", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_relationship_one_to_many <- function(cnd, ...) { cnd_matches_multiple_header(cnd$haystack_arg, cnd$needles_arg) } #' @export cnd_body.vctrs_error_matches_relationship_one_to_many <- function(cnd, ...) { cnd_matches_multiple_body(cnd$i, cnd$haystack_arg) } stop_matches_relationship_many_to_one <- function( i, needles_arg, haystack_arg, call ) { stop_matches_relationship( class = "vctrs_error_matches_relationship_many_to_one", i = i, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } #' @export cnd_header.vctrs_error_matches_relationship_many_to_one <- function(cnd, ...) { cnd_matches_multiple_header(cnd$needles_arg, cnd$haystack_arg) } #' @export cnd_body.vctrs_error_matches_relationship_many_to_one <- function(cnd, ...) { cnd_matches_multiple_body(cnd$i, cnd$needles_arg) } stop_matches_relationship <- function(class = NULL, ..., call = caller_env()) { stop_matches( class = c(class, "vctrs_error_matches_relationship"), ..., call = call ) } cnd_matches_multiple_header <- function(x_arg, y_arg) { glue::glue( "Each value of `{x_arg}` can match at most 1 value from `{y_arg}`." ) } cnd_matches_multiple_body <- function(i, name) { bullet <- glue::glue("Location {i} of `{name}` matches multiple values.") bullet <- c(x = bullet) format_error_bullets(bullet) } # ------------------------------------------------------------------------------ warn_matches_relationship_many_to_many <- function( i, j, needles_arg, haystack_arg, call ) { message <- paste( glue::glue( "Detected an unexpected many-to-many relationship between `{needles_arg}` and `{haystack_arg}`." ), cnd_matches_multiple_body(i, needles_arg), cnd_matches_multiple_body(j, haystack_arg), sep = "\n" ) warn_matches_relationship( message = message, class = "vctrs_warning_matches_relationship_many_to_many", i = i, j = j, needles_arg = needles_arg, haystack_arg = haystack_arg, call = call ) } warn_matches_relationship <- function( message, class = NULL, ..., call = caller_env() ) { warn_matches( message = message, class = c(class, "vctrs_warning_matches_relationship"), ..., call = call ) } vctrs/R/altrep-rle.R0000644000176200001440000000036615113325071014011 0ustar liggesuserschr_rle <- function(...) { new_chr_rle(c(...)) } new_chr_rle <- function(x) { stopifnot(is.integer(x), is_named(x)) .Call(vctrs_altrep_rle_Make, x) } chr_rle_is_materialized <- function(x) { .Call(vctrs_altrep_rle_is_materialized, x) } vctrs/R/type-explore.R0000644000176200001440000000134115065005761014400 0ustar liggesuserscoerces_to <- function(x, y, using = "strict") { type_max <- switch( using, strict = vec_ptype2, base_c = c, base_unlist = function(x, y) unlist(list(x, y)), base_modify = function(x, y) `[<-`(x, 2, value = y) ) tryCatch( { type <- suppressWarnings(type_max(x, y)) vec_ptype_full(type) }, error = function(e) { NA_character_ } ) } maxtype_mat <- function(types, using = "strict") { names(types) <- map_chr(types, function(x) vec_ptype_full(vec_ptype(x))) grid <- expand.grid(x = types, y = types) grid$max <- map2_chr(grid$x, grid$y, coerces_to, using = using) matrix( grid$max, nrow = length(types), dimnames = list(names(types), names(types)) ) } vctrs/R/aaa.R0000644000176200001440000000104015105625314012456 0ustar liggesusersreplace_from <- function(what, pkg, to = topenv(caller_env())) { if (what %in% getNamespaceExports(pkg)) { env <- ns_env(pkg) } else { env <- to } env_get(env, what, inherit = TRUE) } # nocov start # Useful for micro-optimising default arguments requiring evaluation, # such as `param = c("foo", "bar")`. Buys about 0.6us on my desktop. fn_inline_formals <- function(fn, names) { stopifnot(typeof(fn) == "closure") fmls <- formals(fn) fmls[names] <- lapply(fmls[names], eval) formals(fn) <- fmls fn } # nocov end vctrs/R/expand.R0000644000176200001440000000376715065005761013240 0ustar liggesusers#' Create a data frame from all combinations of the inputs #' #' @description #' `vec_expand_grid()` creates a new data frame by creating a grid of all #' possible combinations of the input vectors. It is inspired by #' [expand.grid()]. Compared with `expand.grid()`, it: #' #' - Produces sorted output by default by varying the first column the slowest, #' rather than the fastest. Control this with `.vary`. #' #' - Never converts strings to factors. #' #' - Does not add additional attributes. #' #' - Drops `NULL` inputs. #' #' - Can expand any vector type, including data frames and [records][new_rcrd]. #' #' @details #' If any input is empty (i.e. size 0), then the result will have 0 rows. #' #' If no inputs are provided, the result is a 1 row data frame with 0 columns. #' This is consistent with the fact that `prod()` with no inputs returns `1`. #' #' @inheritParams rlang::args_error_context #' @inheritParams df_list #' #' @param ... Name-value pairs. The name will become the column name in the #' resulting data frame. #' #' @param .vary One of: #' #' - `"slowest"` to vary the first column slowest. This produces sorted #' output and is generally the most useful. #' #' - `"fastest"` to vary the first column fastest. This matches the behavior #' of [expand.grid()]. #' #' @returns #' A data frame with as many columns as there are inputs in `...` and as many #' rows as the [prod()] of the sizes of the inputs. #' #' @export #' @examples #' vec_expand_grid(x = 1:2, y = 1:3) #' #' # Use `.vary` to match `expand.grid()`: #' vec_expand_grid(x = 1:2, y = 1:3, .vary = "fastest") #' #' # Can also expand data frames #' vec_expand_grid( #' x = data_frame(a = 1:2, b = 3:4), #' y = 1:4 #' ) vec_expand_grid <- function( ..., .vary = "slowest", .name_repair = "check_unique", .error_call = current_env() ) { .vary <- arg_match0( arg = .vary, values = c("slowest", "fastest"), error_call = .error_call ) .Call(ffi_vec_expand_grid, list2(...), .vary, .name_repair, environment()) } vctrs/R/import-standalone-obj-type.R0000644000176200001440000002105015072256373017136 0ustar liggesusers# Standalone file: do not edit by hand # Source: https://github.com/r-lib/rlang/blob/HEAD/R/standalone-obj-type.R # Generated by: usethis::use_standalone("r-lib/rlang", "obj-type") # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-obj-type.R # last-updated: 2025-10-02 # license: https://unlicense.org # imports: rlang (>= 1.1.0) # --- # # ## Changelog # # 2025-10-02: # - `obj_type_friendly()` now shows the dimensionality of arrays. # # 2024-02-14: # - `obj_type_friendly()` now works for S7 objects. # # 2023-05-01: # - `obj_type_friendly()` now only displays the first class of S3 objects. # # 2023-03-30: # - `stop_input_type()` now handles `I()` input literally in `arg`. # # 2022-10-04: # - `obj_type_friendly(value = TRUE)` now shows numeric scalars # literally. # - `stop_friendly_type()` now takes `show_value`, passed to # `obj_type_friendly()` as the `value` argument. # # 2022-10-03: # - Added `allow_na` and `allow_null` arguments. # - `NULL` is now backticked. # - Better friendly type for infinities and `NaN`. # # 2022-09-16: # - Unprefixed usage of rlang functions with `rlang::` to # avoid onLoad issues when called from rlang (#1482). # # 2022-08-11: # - Prefixed usage of rlang functions with `rlang::`. # # 2022-06-22: # - `friendly_type_of()` is now `obj_type_friendly()`. # - Added `obj_type_oo()`. # # 2021-12-20: # - Added support for scalar values and empty vectors. # - Added `stop_input_type()` # # 2021-06-30: # - Added support for missing arguments. # # 2021-04-19: # - Added support for matrices and arrays (#141). # - Added documentation. # - Added changelog. # # nocov start #' Return English-friendly type #' @param x Any R object. #' @param value Whether to describe the value of `x`. Special values #' like `NA` or `""` are always described. #' @param length Whether to mention the length of vectors and lists. #' @return A string describing the type. Starts with an indefinite #' article, e.g. "an integer vector". #' @noRd obj_type_friendly <- function(x, value = TRUE) { if (is_missing(x)) { return("absent") } if (is.object(x)) { if (inherits(x, "quosure")) { type <- "quosure" } else { type <- class(x)[[1L]] } return(sprintf("a <%s> object", type)) } if (!is_vector(x)) { return(.rlang_as_friendly_type(typeof(x))) } n_dim <- length(dim(x)) if (!n_dim) { if (!is_list(x) && length(x) == 1) { if (is_na(x)) { return(switch( typeof(x), logical = "`NA`", integer = "an integer `NA`", double = if (is.nan(x)) { "`NaN`" } else { "a numeric `NA`" }, complex = "a complex `NA`", character = "a character `NA`", .rlang_stop_unexpected_typeof(x) )) } show_infinites <- function(x) { if (x > 0) { "`Inf`" } else { "`-Inf`" } } str_encode <- function(x, width = 30, ...) { if (nchar(x) > width) { x <- substr(x, 1, width - 3) x <- paste0(x, "...") } encodeString(x, ...) } if (value) { if (is.numeric(x) && is.infinite(x)) { return(show_infinites(x)) } if (is.numeric(x) || is.complex(x)) { number <- as.character(round(x, 2)) what <- if (is.complex(x)) "the complex number" else "the number" return(paste(what, number)) } return(switch( typeof(x), logical = if (x) "`TRUE`" else "`FALSE`", character = { what <- if (nzchar(x)) "the string" else "the empty string" paste(what, str_encode(x, quote = "\"")) }, raw = paste("the raw value", as.character(x)), .rlang_stop_unexpected_typeof(x) )) } return(switch( typeof(x), logical = "a logical value", integer = "an integer", double = if (is.infinite(x)) show_infinites(x) else "a number", complex = "a complex number", character = if (nzchar(x)) "a string" else "\"\"", raw = "a raw value", .rlang_stop_unexpected_typeof(x) )) } if (length(x) == 0) { return(switch( typeof(x), logical = "an empty logical vector", integer = "an empty integer vector", double = "an empty numeric vector", complex = "an empty complex vector", character = "an empty character vector", raw = "an empty raw vector", list = "an empty list", .rlang_stop_unexpected_typeof(x) )) } } vec_type_friendly(x) } vec_type_friendly <- function(x, length = FALSE) { if (!is_vector(x)) { abort("`x` must be a vector.") } type <- typeof(x) n_dim <- length(dim(x)) add_length <- function(type) { if (length && !n_dim) { paste0(type, sprintf(" of length %s", length(x))) } else { type } } if (type == "list") { if (n_dim == 0) { return(add_length("a list")) } else if (n_dim == 2) { if (is.data.frame(x)) { return("a data frame") } else { return("a list matrix") } } else { return(sprintf("a list %sD array", n_dim)) } } type <- switch( type, logical = "a logical %s", integer = "an integer %s", numeric = , double = "a double %s", complex = "a complex %s", character = "a character %s", raw = "a raw %s", type = paste0("a ", type, " %s") ) if (n_dim == 0) { kind <- "vector" } else if (n_dim == 2) { kind <- "matrix" } else { kind <- sprintf("%sD array", n_dim) } out <- sprintf(type, kind) if (n_dim >= 2) { out } else { add_length(out) } } .rlang_as_friendly_type <- function(type) { switch( type, list = "a list", NULL = "`NULL`", environment = "an environment", externalptr = "a pointer", weakref = "a weak reference", S4 = "an S4 object", name = , symbol = "a symbol", language = "a call", pairlist = "a pairlist node", expression = "an expression vector", char = "an internal string", promise = "an internal promise", ... = "an internal dots object", any = "an internal `any` object", bytecode = "an internal bytecode object", primitive = , builtin = , special = "a primitive function", closure = "a function", type ) } .rlang_stop_unexpected_typeof <- function(x, call = caller_env()) { abort( sprintf("Unexpected type <%s>.", typeof(x)), call = call ) } #' Return OO type #' @param x Any R object. #' @return One of `"bare"` (for non-OO objects), `"S3"`, `"S4"`, #' `"R6"`, or `"S7"`. #' @noRd obj_type_oo <- function(x) { if (!is.object(x)) { return("bare") } class <- inherits(x, c("R6", "S7_object"), which = TRUE) if (class[[1]]) { "R6" } else if (class[[2]]) { "S7" } else if (isS4(x)) { "S4" } else { "S3" } } #' @param x The object type which does not conform to `what`. Its #' `obj_type_friendly()` is taken and mentioned in the error message. #' @param what The friendly expected type as a string. Can be a #' character vector of expected types, in which case the error #' message mentions all of them in an "or" enumeration. #' @param show_value Passed to `value` argument of `obj_type_friendly()`. #' @param ... Arguments passed to [abort()]. #' @inheritParams args_error_context #' @noRd stop_input_type <- function( x, what, ..., allow_na = FALSE, allow_null = FALSE, show_value = TRUE, arg = caller_arg(x), call = caller_env() ) { # From standalone-cli.R cli <- env_get_list( nms = c("format_arg", "format_code"), last = topenv(), default = function(x) sprintf("`%s`", x), inherit = TRUE ) if (allow_na) { what <- c(what, cli$format_code("NA")) } if (allow_null) { what <- c(what, cli$format_code("NULL")) } if (length(what)) { what <- oxford_comma(what) } if (inherits(arg, "AsIs")) { format_arg <- identity } else { format_arg <- cli$format_arg } message <- sprintf( "%s must be %s, not %s.", format_arg(arg), what, obj_type_friendly(x, value = show_value) ) abort(message, ..., call = call, arg = arg) } oxford_comma <- function(chr, sep = ", ", final = "or") { n <- length(chr) if (n < 2) { return(chr) } head <- chr[seq_len(n - 1)] last <- chr[n] head <- paste(head, collapse = sep) # Write a or b. But a, b, or c. if (n > 2) { paste0(head, sep, final, " ", last) } else { paste0(head, " ", final, " ", last) } } # nocov end vctrs/R/altrep.R0000644000176200001440000000007115107411704013224 0ustar liggesusersis_altrep <- function(x) { .Call(vctrs_is_altrep, x) } vctrs/R/list-unchop.R0000644000176200001440000000572215072256373014224 0ustar liggesusers#' Combine a list of vectors #' #' @description #' While `list_unchop()` is not deprecated, we now recommend that you use #' either: #' #' - `list_combine(x, indices = indices, size = size)` over #' `list_unchop(x, indices = indices)` #' #' - `vec_c(!!!x)` over `list_unchop(x)` #' #' `list_unchop()` combines a list of vectors into a single vector, placing #' elements in the output according to the locations specified by `indices`. It #' is similar to [vec_c()], but gives greater control over how the elements are #' combined. #' #' @inheritParams rlang::args_dots_empty #' @inheritParams vec_c #' #' @param x A list #' #' @param indices A list of positive integer vectors specifying the #' locations to place elements of `x` in. Each element of `x` is recycled to #' the size of the corresponding index vector. The size of `indices` must #' match the size of `x`. If `NULL`, `x` is combined in the order it is #' provided in, which is equivalent to using [vec_c()]. #' #' @param ptype If `NULL`, the default, the output type is determined by #' computing the common type across all elements of `x`. Alternatively, you #' can supply `ptype` to give the output a known type. #' #' @returns #' A vector of type `vec_ptype_common(!!!x)`, or `ptype`, if specified. The size #' is computed as `vec_size_common(!!!indices)` unless the indices are `NULL`, #' in which case the size is `vec_size_common(!!!x)`. #' #' @section Dependencies: #' - [vec_c()] #' #' @keywords internal #' #' @export #' @examples #' # If `indices` selects every value in `x` exactly once, #' # in any order, then `list_unchop()` inverts `vec_chop()` #' x <- c("a", "b", "c", "d") #' indices <- list(2, c(3, 1), 4) #' vec_chop(x, indices = indices) #' list_unchop(vec_chop(x, indices = indices), indices = indices) #' #' # When unchopping, size 1 elements of `x` are recycled #' # to the size of the corresponding index #' list_unchop(list(1, 2:3), indices = list(c(1, 3, 5), c(2, 4))) #' #' # Names are retained, and outer names can be combined with inner #' # names through the use of a `name_spec` #' lst <- list(x = c(a = 1, b = 2), y = 1) #' list_unchop(lst, indices = list(c(3, 2), c(1, 4)), name_spec = "{outer}_{inner}") #' #' # If you have a list of homogeneous vectors, sometimes it can be useful to #' # unchop, apply a function to the flattened vector, and then rechop according #' # to the original indices. This can be done efficiently with `list_sizes()`. #' x <- list(c(1, 2, 1), c(3, 1), 5, double()) #' x_flat <- list_unchop(x) #' x_flat <- x_flat + max(x_flat) #' vec_chop(x_flat, sizes = list_sizes(x)) list_unchop <- function( x, ..., indices = NULL, ptype = NULL, name_spec = NULL, name_repair = c( "minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet" ), error_arg = "x", error_call = current_env() ) { check_dots_empty0(...) .Call( ffi_list_unchop, x, indices, ptype, name_spec, name_repair, environment() ) } vctrs/R/list-of-transpose.R0000644000176200001440000001132515120272011015321 0ustar liggesusers#' Transpose a list of homogenous vectors #' #' @description #' `list_of_transpose()` takes a list of homogenous vectors, transposes it, and #' returns a new list of homogenous vectors. To perform a transpose, three #' pieces of information are required: #' #' - The _list size_, from [`vec_size(x)`][vec_size()]. #' #' - The _element size_, from [`list_of_size(x)`][list_of_size()]. #' #' - The _element type_, from [`list_of_ptype(x)`][list_of_ptype()]. #' #' Because all three of these are required, this function only works on fully #' specified [list_of()]s, with both `size` and `ptype` specified. #' #' To predict the output from `list_of_transpose()`, swap the list size with the #' element size. For example: #' #' - Input: `list_of[2]` #' - Output: `list_of[3]` #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' #' @param x A [list_of][list_of()] with both `size` and `ptype` specified. #' #' @param x_arg Argument name used in error messages. #' #' @returns #' A `list_of` of size `list_of_size(x)`, with an element size of `vec_size(x)` #' and an element type of `list_of_ptype(x)`. #' #' @export #' @examples #' # A form of `list_of()` that infers both ptype and size #' list_of2 <- function(...) { #' list_of(..., .ptype = NULL, .size = NULL) #' } #' #' # I: list_of[3] #' # O: list_of[2] #' list_of_transpose(list_of2(1:2, 3:4, 5:6)) #' #' # With data frames #' x <- data_frame(a = 1:2, b = letters[1:2]) #' y <- data_frame(a = 3:4, b = letters[3:4]) #' list_of_transpose(list_of2(x, y)) #' #' # Size 1 elements are recycled #' list_of_transpose(list_of2(1, 2:3, 4)) #' #' # --------------------------------------------------------------------------- #' # `NULL` handling #' #' # `NULL` values aren't allowed in `list_of_transpose()` #' x <- list_of2(1:3, NULL, 5:7, NULL) #' try(list_of_transpose(x)) #' #' # Either drop them entirely or replace them up front before transposing #' #' x_dropped <- vec_slice(x, !vec_detect_missing(x)) #' x_dropped #' #' list_of_transpose(x_dropped) #' #' x_replaced <- vec_assign(x, vec_detect_missing(x), list(NA)) #' x_replaced #' #' list_of_transpose(x_replaced) #' #' # --------------------------------------------------------------------------- #' # Reversibility #' #' # Because `list_of_transpose()` takes and returns fully specified list-ofs, #' # it is fully reversible, even in the edge cases. #' x <- list_of2(integer(), integer()) #' #' # This returns a list of size 0 #' # I: list_of[2] #' # O: list_of[0] #' out <- list_of_transpose(x) #' out #' #' # Even though there are no elements, we know the element size and type, #' # so we can transpose a second time to recover `x`. This would not be #' # possible if this function returned a bare `list()`, which would result #' # in lost information. #' # I: list_of[0] #' # O: list_of[2] #' list_of_transpose(out) #' #' # --------------------------------------------------------------------------- #' # Padding #' #' # If you'd like to pad with a missing value rather than erroring, #' # you might do something like this, which left-pads before conversion #' # to list-of. #' x <- list(1, 2:5, 6:7) #' #' sizes <- list_sizes(x) #' size <- max(sizes) #' index <- which(sizes != size) #' #' x[index] <- lapply( #' index, #' function(i) vec_c(rep(NA, times = size - sizes[[i]]), x[[i]]) #' ) #' x #' #' x <- as_list_of(x, .ptype = NULL, .size = NULL) #' #' list_of_transpose(x) list_of_transpose <- function( x, ..., x_arg = caller_arg(x), error_call = current_env() ) { check_dots_empty0(...) check_list_of(x, arg = x_arg, call = error_call) if (vec_any_missing(x)) { abort( cli::format_inline( "{arg_backtick(x_arg)} can't contain `NULL` values." ), call = error_call ) } size <- list_of_size0(x) ptype <- list_of_ptype0(x) if (is_null(size)) { abort( c( cli::format_inline( "{arg_backtick(x_arg)} must be a fully specified ``." ), i = "`size` is not specified." ), call = error_call ) } if (is_null(ptype)) { abort( c( cli::format_inline( "{arg_backtick(x_arg)} must be a fully specified ``." ), i = "`ptype` is not specified." ), call = error_call ) } x_size <- vec_size(x) sizes <- vec_rep(x_size, times = size) # Flatten pieces into one big vector out <- list_interleave( x, size = size, ptype = ptype, name_spec = "inner", x_arg = x_arg, error_call = error_call ) # Chop the one big vector into transposed pieces of size `x_size` out <- vec_chop(out, sizes = sizes) new_list_of0(out, ptype = ptype, size = x_size) } vctrs/R/proxy.R0000644000176200001440000001701515113335375013132 0ustar liggesusers#' Proxy and restore #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' `vec_proxy()` returns the data structure containing the values of a #' vector. This data structure is usually the vector itself. In this #' case the proxy is the [identity function][base::identity], which is #' the default `vec_proxy()` method. #' #' Only experts should implement special `vec_proxy()` methods, for #' these cases: #' #' - A vector has vectorised attributes, i.e. metadata for #' each element of the vector. These _record types_ are implemented #' in vctrs by returning a data frame in the proxy method. If you're #' starting your class from scratch, consider deriving from the #' [`rcrd`][new_rcrd] class. It implements the appropriate data #' frame proxy and is generally the preferred way to create a record #' class. #' #' - When you're implementing a vector on top of a non-vector type, #' like an environment or an S4 object. This is currently only #' partially supported. #' #' - S3 lists are considered scalars by default. This is the safe #' choice for list objects such as returned by `stats::lm()`. To #' declare that your S3 list class is a vector, you normally add #' `"list"` to the right of your class vector. Explicit inheritance #' from list is generally the preferred way to declare an S3 list in #' R, for instance it makes it possible to dispatch on #' `generic.list` S3 methods. #' #' If you can't modify your class vector, you can implement an #' identity proxy (i.e. a proxy method that just returns its input) #' to let vctrs know this is a vector list and not a scalar. #' #' `vec_restore()` is the inverse operation of `vec_proxy()`. It #' should only be called on vector proxies. #' #' - It undoes the transformations of `vec_proxy()`. #' #' - It restores attributes and classes. These may be lost when the #' memory values are manipulated. For example slicing a subset of a #' vector's proxy causes a new proxy to be allocated. #' #' By default vctrs restores all attributes and classes #' automatically. You only need to implement a `vec_restore()` method #' if your class has attributes that depend on the data. #' #' @param x A vector. #' @inheritParams rlang::args_dots_empty #' #' @section Proxying: #' #' You should only implement `vec_proxy()` when your type is designed #' around a non-vector class. I.e. anything that is not either: #' #' * An atomic vector #' * A bare list #' * A data frame #' #' In this case, implement `vec_proxy()` to return such a vector #' class. The vctrs operations such as [vec_slice()] are applied on #' the proxy and `vec_restore()` is called to restore the original #' representation of your type. #' #' The most common case where you need to implement `vec_proxy()` is #' for S3 lists. In vctrs, S3 lists are treated as scalars by #' default. This way we don't treat objects like model fits as #' vectors. To prevent vctrs from treating your S3 list as a scalar, #' unclass it in the `vec_proxy()` method. For instance, here is the #' definition for `list_of`: #' #' ``` #' vec_proxy.vctrs_list_of <- function(x) { #' unclass(x) #' } #' ``` #' #' Another case where you need to implement a proxy is [record #' types][new_rcrd]. Record types should return a data frame, as in #' the `POSIXlt` method: #' #' ``` #' vec_proxy.POSIXlt <- function(x) { #' new_data_frame(unclass(x)) #' } #' ``` #' #' Note that you don't need to implement `vec_proxy()` when your class #' inherits from `vctrs_vctr` or `vctrs_rcrd`. #' #' #' @section Restoring: #' #' A restore is a specialised type of cast, primarily used in #' conjunction with `NextMethod()` or a C-level function that works on #' the underlying data structure. A `vec_restore()` method can make #' the following assumptions about `x`: #' #' * It has the correct type. #' * It has the correct names. #' * It has the correct `dim` and `dimnames` attributes. #' * It is unclassed. This way you can call vctrs generics with `x` #' without triggering an infinite loop of restoration. #' #' The length may be different (for example after [vec_slice()] has #' been called), and all other attributes may have been lost. The #' method should restore all attributes so that after restoration, #' `vec_restore(vec_data(x), x)` yields `x`. #' #' To understand the difference between `vec_cast()` and `vec_restore()` #' think about factors: it doesn't make sense to cast an integer to a factor, #' but if `NextMethod()` or another low-level function has stripped attributes, #' you still need to be able to restore them. #' #' The default method copies across all attributes so you only need to #' provide your own method if your attributes require special care #' (i.e. they are dependent on the data in some way). When implementing #' your own method, bear in mind that many R users add attributes to track #' additional metadata that is important to them, so you should preserve any #' attributes that don't require special handling for your class. #' #' @section Dependencies: #' - `x` must be a vector in the vctrs sense (see [vec_is()]) #' - By default the underlying data is returned as is (identity proxy) #' #' All vector classes have a proxy, even those who don't implement any #' vctrs methods. The exception is S3 lists that don't inherit from #' `"list"` explicitly. These might have to implement an identity #' proxy for compatibility with vctrs (see discussion above). #' #' @keywords internal #' @export vec_proxy <- function(x, ...) { check_dots_empty0(...) return(.Call(ffi_vec_proxy, x)) UseMethod("vec_proxy") } #' @export vec_proxy.default <- function(x, ...) { x } #' @rdname vec_proxy #' @param to The original vector to restore to. #' @export vec_restore <- function(x, to, ...) { check_dots_empty0(...) return(.Call(ffi_vec_restore, x, to)) UseMethod("vec_restore", to) } vec_restore_dispatch <- function(x, to, ...) { UseMethod("vec_restore", to) } #' @export vec_restore.default <- function(x, to, ...) { .Call(ffi_vec_restore_default, x, to) } vec_restore_default <- function(x, to, ...) { .Call(ffi_vec_restore_default, x, to) } vec_proxy_recurse <- function(x, ...) { .Call(ffi_vec_proxy_recurse, x) } vec_restore_recurse <- function(x, to, ...) { .Call(ffi_vec_restore_recurse, x, to) } #' Extract underlying data #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' Extract the data underlying an S3 vector object, i.e. the underlying #' (named) atomic vector, data frame, or list. #' #' @param x A vector or object implementing `vec_proxy()`. #' @return The data underlying `x`, free from any attributes except the names. #' #' @section Difference with `vec_proxy()`: #' #' * `vec_data()` returns unstructured data. The only attributes #' preserved are names, dims, and dimnames. #' #' Currently, due to the underlying memory architecture of R, this #' creates a full copy of the data for atomic vectors. #' #' * `vec_proxy()` may return structured data. This generic is the #' main customisation point for accessing memory values in vctrs, #' along with [vec_restore()]. #' #' Methods must return a vector type. Records and data frames will #' be processed rowwise. #' #' @keywords internal #' @export vec_data <- function(x) { obj_check_vector(x) x <- vec_proxy(x) if (is.data.frame(x)) { return(new_data_frame(x, row.names = .row_names_info(x, 0L))) } if (has_dim(x)) { x <- vec_set_attributes(x, list(dim = dim(x), dimnames = dimnames(x))) } else { x <- vec_set_attributes(x, list(names = names(x))) } # Unset S4 bit in vector-like S4 objects as_not_s4(x) } as_not_s4 <- function(x) { .Call(ffi_as_not_s4, x) } vctrs/R/conditions.R0000644000176200001440000005523515120600707014120 0ustar liggesusers#' Custom conditions for vctrs package #' #' These functions are called for their side effect of raising #' errors and warnings. #' These conditions have custom classes and structures to make #' testing easier. #' #' @inheritParams rlang::args_error_context #' @param x,y,to Vectors #' @param ...,class Only use these fields when creating a subclass. #' @param x_arg,y_arg,to_arg Argument names for `x`, `y`, and `to`. Used in #' error messages to inform the user about the locations of incompatible #' types. #' @param action An option to customize the incompatible type message depending #' on the context. Errors thrown from [vec_ptype2()] use `"combine"` and #' those thrown from [vec_cast()] use `"convert"`. #' @param details Any additional human readable details. #' @param message An overriding message for the error. `details` and #' `message` are mutually exclusive, supplying both is an error. #' #' @examples #' #' # Most of the time, `maybe_lossy_cast()` returns its input normally: #' maybe_lossy_cast( #' c("foo", "bar"), #' NA, #' "", #' lossy = c(FALSE, FALSE), #' x_arg = "", #' to_arg = "" #' ) #' #' # If `lossy` has any `TRUE`, an error is thrown: #' try(maybe_lossy_cast( #' c("foo", "bar"), #' NA, #' "", #' lossy = c(FALSE, TRUE), #' x_arg = "", #' to_arg = "" #' )) #' #' # Unless lossy casts are allowed: #' allow_lossy_cast( #' maybe_lossy_cast( #' c("foo", "bar"), #' NA, #' "", #' lossy = c(FALSE, TRUE), #' x_arg = "", #' to_arg = "" #' ) #' ) #' #' @keywords internal #' @name vctrs-conditions NULL stop_vctrs <- function( message = NULL, class = NULL, ..., call = caller_env() ) { abort( message, class = c(class, "vctrs_error"), ..., call = call ) } warn_vctrs <- function( message = NULL, class = NULL, ..., call = caller_env() ) { warn( message, class = c(class, "vctrs_warning"), ..., call = call ) } stop_incompatible <- function( x, y, ..., details = NULL, message = NULL, class = NULL, call = caller_env() ) { stop_vctrs( message, class = c(class, "vctrs_error_incompatible"), x = x, y = y, details = details, ..., call = call ) } #' @return #' `stop_incompatible_*()` unconditionally raise an error of class #' `"vctrs_error_incompatible_*"` and `"vctrs_error_incompatible"`. #' #' @rdname vctrs-conditions #' @export stop_incompatible_type <- function( x, y, ..., x_arg, y_arg, action = c("combine", "convert"), details = NULL, message = NULL, class = NULL, call = caller_env() ) { obj_check_vector(x, arg = x_arg) obj_check_vector(y, arg = y_arg) action <- arg_match(action) message <- cnd_type_message( x, y, x_arg, y_arg, details, action, message, from_dispatch = match_from_dispatch(...) ) subclass <- switch( action, combine = "vctrs_error_ptype2", convert = "vctrs_error_cast" ) stop_incompatible( x, y, x_arg = x_arg, y_arg = y_arg, details = details, ..., message = message, class = c(class, subclass, "vctrs_error_incompatible_type"), call = call ) } #' @rdname vctrs-conditions #' @export stop_incompatible_cast <- function( x, to, ..., x_arg, to_arg, details = NULL, message = NULL, class = NULL, call = caller_env() ) { stop_incompatible_type( x = x, y = to, to = to, ..., x_arg = x_arg, y_arg = to_arg, to_arg = to_arg, action = "convert", details = details, message = message, class = class, call = call ) } stop_incompatible_shape <- function( x, y, x_size, y_size, axis, x_arg, y_arg, call = caller_env() ) { details <- format_error_bullets(c( x = glue::glue( "Incompatible sizes {x_size} and {y_size} along axis {axis}." ) )) stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, details = details, call = call ) } type_actions <- c( "combine", "convert" ) cnd_type_separator <- function(action) { if (identical(action, "combine")) { "and" } else if (identical(action, "convert")) { "to" } else { abort("Internal error: Unknown `action`.") } } cnd_type_message <- function( x, y, x_arg, y_arg, details, action, message, from_dispatch = FALSE, fallback = NULL ) { if (!is_null(message)) { if (!is_null(details)) { abort("Can't supply both `message` and `details`.") } return(message) } x_arg <- arg_as_string(x_arg) y_arg <- arg_as_string(y_arg) if (nzchar(x_arg)) { x_name <- paste0(" `", x_arg, "` ") } else { x_name <- " " } if (nzchar(y_arg)) { y_name <- paste0(" `", y_arg, "` ") } else { y_name <- " " } separator <- cnd_type_separator(action) if (is.data.frame(x) && is.data.frame(y)) { if (vec_is_coercible(new_data_frame(x), new_data_frame(y))) { x_type <- cnd_type_message_df_label(x) y_type <- cnd_type_message_df_label(y) } else { x_type <- vec_ptype_full(x) y_type <- vec_ptype_full(y) } } else { x_type <- cnd_type_message_type_label(x) y_type <- cnd_type_message_type_label(y) } converting <- action == "convert" # If we are here directly from dispatch, this means there is no # ptype2 method implemented and the is-same-class fallback has # failed because of diverging attributes. The author of the class # should implement a ptype2 method as documented in the FAQ # indicated below. if (from_dispatch && !converting && identical(class(x)[[1]], class(y)[[1]])) { details <- c(incompatible_attrib_bullets(), details) details <- format_error_bullets(details) } if (is_null(fallback)) { end <- "." } else { end <- glue::glue("; falling back to {fallback}.") } if (converting && nzchar(y_arg)) { header <- glue::glue( "Can't convert{x_name}<{x_type}> to match type of{y_name}<{y_type}>{end}" ) } else { header <- glue::glue( "Can't {action}{x_name}<{x_type}> {separator}{y_name}<{y_type}>{end}" ) } paste_line(header, details) } cnd_type_message_type_label <- function(x) { if (is.data.frame(x)) { class(x)[[1]] } else { vec_ptype_full(x) } } incompatible_attrib_bullets <- function() { c( x = "Some attributes are incompatible.", i = "The author of the class should implement vctrs methods.", i = "See ." ) } cnd_type_message_df_label <- function(x) { x <- class(x)[[1]] if (identical(x, "tbl_df")) { "tibble" } else { x } } #' @rdname vctrs-conditions #' @export stop_incompatible_op <- function( op, x, y, details = NULL, ..., message = NULL, class = NULL, call = caller_env() ) { message <- message %||% glue_lines( "<{vec_ptype_full(x)}> {op} <{vec_ptype_full(y)}> is not permitted", details ) stop_incompatible( x, y, op = op, details = details, ..., message = message, class = c(class, "vctrs_error_incompatible_op"), call = call ) } #' @rdname vctrs-conditions #' @export stop_incompatible_size <- function( x, y, x_size, y_size, ..., x_arg, y_arg, details = NULL, message = NULL, class = NULL, call = caller_env() ) { stop_incompatible( x, y, x_size = x_size, y_size = y_size, ..., x_arg = x_arg, y_arg = y_arg, details = details, message = message, class = c(class, "vctrs_error_incompatible_size"), call = call ) } #' @export cnd_header.vctrs_error_incompatible_size <- function(cnd, ...) { if (is_string(cnd$message) && nzchar(cnd$message)) { return(cnd$message) } x_size <- vec_cast(cnd$x_size, int()) y_size <- vec_cast(cnd$y_size, int()) stopifnot( length(x_size) == 1, length(y_size) == 1 ) x_arg <- arg_as_string(cnd$x_arg) y_arg <- arg_as_string(cnd$y_arg) if (nzchar(x_arg)) { x_tag <- glue::glue("`{x_arg}` (size {x_size})") } else { x_tag <- glue::glue("input of size {x_size}") } if (nzchar(y_arg)) { y_tag <- glue::glue("to match `{y_arg}` (size {y_size})") } else { y_tag <- glue::glue("to size {y_size}") } glue::glue("Can't recycle {x_tag} {y_tag}.") } #' @export cnd_body.vctrs_error_incompatible_size <- function(cnd, ...) { cnd$details } #' Lossy cast error #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' By default, lossy casts are an error. Use `allow_lossy_cast()` to #' silence these errors and continue with the partial results. In this #' case the lost values are typically set to `NA` or to a lower value #' resolution, depending on the type of cast. #' #' Lossy cast errors are thrown by `maybe_lossy_cast()`. Unlike #' functions prefixed with `stop_`, `maybe_lossy_cast()` usually #' returns a result. If a lossy cast is detected, it throws an error, #' unless it's been wrapped in `allow_lossy_cast()`. In that case, it #' returns the result silently. #' #' @inheritParams stop_incompatible_cast #' @inheritParams vec_cast #' @inheritParams rlang::args_error_context #' @param result The result of a potentially lossy cast. #' @param to Type to cast to. #' @param lossy A logical vector indicating which elements of `result` #' were lossy. #' #' Can also be a single `TRUE`, but note that `locations` picks up #' locations from this vector by default. In this case, supply your #' own location vector, possibly empty. #' @param loss_type The kind of lossy cast to be mentioned in error #' messages. Can be loss of precision (for instance from double to #' integer) or loss of generality (from character to factor). #' @param locations An optional integer vector giving the #' locations where `x` lost information. #' @param .deprecation If `TRUE`, the error is downgraded to a #' deprecation warning. This is useful for transitioning your class #' to a stricter conversion scheme. The warning advises your users #' to wrap their code with `allow_lossy_cast()`. #' @keywords internal #' @export maybe_lossy_cast <- function( result, x, to, lossy = NULL, locations = NULL, ..., loss_type = c("precision", "generality"), x_arg, to_arg, call = caller_env(), details = NULL, message = NULL, class = NULL, .deprecation = FALSE ) { if (!any(lossy)) { return(result) } if (.deprecation) { maybe_warn_deprecated_lossy_cast(x, to, loss_type, x_arg, to_arg) return(result) } locations <- locations %||% which(lossy) withRestarts( vctrs_restart_error_cast_lossy = function() result, stop_lossy_cast( x = x, to = to, result = result, locations = locations, ..., loss_type = loss_type, x_arg = x_arg, to_arg = to_arg, details = details, message = message, class = class, call = call ) ) } stop_lossy_cast <- function( x, to, result, locations = NULL, ..., loss_type, x_arg, to_arg, details = NULL, message = NULL, class = NULL, call = caller_env() ) { stop_incompatible_cast( x = x, to = to, result = result, locations = locations, ..., loss_type = loss_type, x_arg = x_arg, to_arg = to_arg, details = details, class = c(class, "vctrs_error_cast_lossy"), call = call ) } #' @export cnd_header.vctrs_error_cast_lossy <- function(cnd, ...) { x_label <- format_arg_label(vec_ptype_full(cnd$x), cnd$x_arg) to_label <- format_arg_label(vec_ptype_full(cnd$y), cnd$y_arg) loss_type <- loss_type(cnd$loss_type) glue::glue( "Can't convert from {x_label} to {to_label} due to loss of {loss_type}." ) } #' @export cnd_body.vctrs_error_cast_lossy <- function(cnd, ...) { if (length(cnd$locations)) { format_error_bullets(inline_list("Locations: ", cnd$locations)) } else { character() } } loss_type <- function(x) { stopifnot( is_character(x), all(x %in% c("precision", "generality")) ) x[[1]] } # Used in maybe_warn_deprecated_lossy_cast() new_error_cast_lossy <- function(x, to, loss_type, x_arg = "", to_arg = "") { error_cnd( c("vctrs_error_cast_lossy", "vctrs_error_incompatible_type"), x = x, y = to, loss_type = loss_type, x_arg = x_arg, y_arg = to_arg ) } #' @rdname vctrs-conditions #' @param x_ptype,to_ptype Suppress only the casting errors where `x` #' or `to` match these [prototypes][vec_ptype]. #' @export allow_lossy_cast <- function(expr, x_ptype = NULL, to_ptype = NULL) { withCallingHandlers( vctrs_error_cast_lossy = function(err) { if (!is_null(x_ptype) && !vec_is(err$x, x_ptype)) { return() } if (!is_null(to_ptype) && !vec_is(err$y, to_ptype)) { return() } invokeRestart("vctrs_restart_error_cast_lossy") }, expr ) } maybe_warn_deprecated_lossy_cast <- function( x, to, loss_type, x_arg, to_arg, user_env = caller_env(2) ) { # Returns `TRUE` if `allow_lossy_cast()` is on the stack and accepts # to handle the condition handled <- withRestarts( vctrs_restart_error_cast_lossy = function() TRUE, { # Signal fully formed condition but strip the error classes in # case someone is catching: This is not an abortive condition. cnd <- new_error_cast_lossy( x, to, loss_type = loss_type, x_arg = x_arg, to_arg = to_arg ) class(cnd) <- setdiff(class(cnd), c("error", "rlang_error")) signalCondition(cnd) FALSE } ) if (handled) { return(invisible()) } from <- format_arg_label(vec_ptype_abbr(x), x_arg) to <- format_arg_label(vec_ptype_abbr(to), to_arg) lifecycle::deprecate_warn( when = "0.2.0", what = I("Coercion with lossy casts"), with = "allow_lossy_cast()", details = paste0( glue::glue( "We detected a lossy transformation from { from } to { to }. " ), "The result will contain lower-resolution values or missing values. ", "To suppress this warning, wrap your code with `allow_lossy_cast()`." ), always = TRUE, user_env = user_env ) invisible() } stop_unsupported <- function(x, method, call = caller_env()) { msg <- glue::glue("`{method}.{class(x)[[1]]}()` not supported.") stop_vctrs( "vctrs_error_unsupported", message = msg, x = x, method = method, call = call ) } stop_unimplemented <- function(x, method, call = caller_env()) { msg <- glue::glue("`{method}.{class(x)[[1]]}()` not implemented.") stop_vctrs( "vctrs_error_unimplemented", message = msg, x = x, method = method, call = call ) } stop_scalar_type <- function(x, arg = NULL, call = caller_env()) { if (is_null(arg) || !nzchar(arg)) { arg <- "Input" } else { arg <- glue::backtick(arg) } message <- glue::glue("{arg} must be a vector, not {obj_type_friendly(x)}.") # Use the first detected issue, with a fallthrough to point to our scalar FAQ message <- with_incompatible_s3_list_bullets(message, x) %||% with_incompatible_data_frame_bullets(message, x) %||% with_scalar_faq_bullet(message) stop_vctrs( message, "vctrs_error_scalar_type", actual = x, call = call ) } with_incompatible_s3_list_bullets <- function(message, x) { is_list_typeof <- typeof(x) == "list" classes <- class(x) doesnt_contain_explicit_list_class <- !any(classes == "list") doesnt_contain_data_frame_class <- !any(classes == "data.frame") # We also assume no `vec_proxy()` method exists, otherwise one would have # been invoked, avoiding the error is_incompatible_s3_list <- is_list_typeof && doesnt_contain_explicit_list_class && doesnt_contain_data_frame_class if (!is_incompatible_s3_list) { return(NULL) } c( message, x = cli::format_inline(paste( "Detected incompatible scalar S3 list.", "To be treated as a vector, the object must explicitly inherit from {.cls list}", "or should implement a {.fn vec_proxy} method.", "Class: {.cls {classes}}." )), i = "If this object comes from a package, please report this error to the package author.", i = cli::format_inline(paste( "Read our FAQ about", "{.topic [creating vector types](vctrs::howto_faq_fix_scalar_type_error)}", "to learn more." )) ) } with_incompatible_data_frame_bullets <- function(message, x) { classes <- class(x) n_classes <- length(classes) contains_data_frame_class <- any(classes == "data.frame") if (n_classes == 0L) { # Edge case of `NULL` or `character()` classes last_class_is_not_data_frame <- TRUE } else { last_class_is_not_data_frame <- classes[n_classes] != "data.frame" } is_incompatible_data_frame <- contains_data_frame_class && last_class_is_not_data_frame if (!is_incompatible_data_frame) { return(NULL) } subclasses <- setdiff(classes, "data.frame") c( message, x = cli::format_inline(paste( "Detected incompatible data frame structure.", "A data frame is normally treated as a vector, but an incompatible class ordering was detected.", "To be compatible, the subclass {.cls {subclasses}} must come before {.cls data.frame}, not after.", "Class: {.cls {classes}}." )), i = "If this object comes from a package, please report this error to the package author.", i = cli::format_inline(paste( "Read our FAQ about", "{.topic [creating vector types](vctrs::howto_faq_fix_scalar_type_error)}", "to learn more." )) ) } with_scalar_faq_bullet <- function(message) { c( message, i = cli::format_inline(paste( "Read our FAQ about {.topic [scalar types](vctrs::faq_error_scalar_type)}", "to learn more." )) ) } stop_corrupt_factor_levels <- function(x, arg = "x", call = caller_env()) { msg <- glue::glue("`{arg}` is a corrupt factor with non-character levels") abort(msg, call = call) } stop_corrupt_ordered_levels <- function(x, arg = "x", call = caller_env()) { msg <- glue::glue( "`{arg}` is a corrupt ordered factor with non-character levels" ) abort(msg, call = call) } stop_recycle_incompatible_size <- function( x_size, size, x_arg = "x", call = caller_env() ) { stop_vctrs( x_size = x_size, y_size = size, x_arg = x_arg, # FIXME: tibble is the only package that uses `vctrs_error_recycle_incompatible_size` class = c( "vctrs_error_incompatible_size", "vctrs_error_recycle_incompatible_size" ), call = call ) } # Names ------------------------------------------------------------------- stop_names <- function(class = NULL, ..., call = caller_env()) { stop_vctrs( class = c(class, "vctrs_error_names"), ..., call = call ) } stop_names_cannot_be_empty <- function(names, call = caller_env()) { stop_names( class = "vctrs_error_names_cannot_be_empty", names = names, call = call ) } #' @export cnd_header.vctrs_error_names_cannot_be_empty <- function(cnd, ...) { "Names can't be empty." } #' @export cnd_body.vctrs_error_names_cannot_be_empty <- function(cnd, ...) { locations <- detect_empty_names(cnd$names) if (length(locations) == 1) { bullet <- glue::glue("Empty name found at location {locations}.") } else { bullet <- glue::glue( "Empty names found at locations {ensure_full_stop(enumerate(locations))}" ) } bullet <- c(x = bullet) format_error_bullets(bullet) } stop_names_cannot_be_dot_dot <- function(names, call = caller_env()) { stop_names( class = "vctrs_error_names_cannot_be_dot_dot", names = names, call = call ) } #' @export cnd_header.vctrs_error_names_cannot_be_dot_dot <- function(cnd, ...) { "Names can't be of the form `...` or `..j`." } #' @export cnd_body.vctrs_error_names_cannot_be_dot_dot <- function(cnd, ...) { names <- cnd$names locations <- detect_dot_dot(names) names <- names[locations] split <- vec_group_loc(names) info <- map2_chr(split$key, split$loc, make_names_loc_bullet) header <- "These names are invalid:" header <- c(x = header) header <- format_error_bullets(header) message <- bullets(info, header = header) message <- indent(message, 2) message } stop_names_must_be_unique <- function(names, arg = "", call = caller_env()) { stop_names( class = "vctrs_error_names_must_be_unique", arg = arg, names = names, call = call ) } #' @export cnd_header.vctrs_error_names_must_be_unique <- function(cnd, ...) { "Names must be unique." } #' @export cnd_body.vctrs_error_names_must_be_unique <- function(cnd, ...) { names <- cnd$names dups <- vec_group_loc(names) dup_indicator <- map_lgl(dups$loc, function(x) length(x) != 1L) dups <- vec_slice(dups, dup_indicator) header <- "These names are duplicated:" header <- c(x = header) header <- format_error_bullets(header) info <- map2_chr(dups$key, dups$loc, make_names_loc_bullet) message <- bullets(info, header = header) message <- indent(message, 2) arg <- arg_as_string(cnd$arg) if (arg != "") { hint <- c( i = glue::glue("Use argument `{cnd$arg}` to specify repair strategy.") ) message <- c(message, format_error_bullets(hint)) } message } make_names_loc_bullet <- function(x, loc) { if (length(loc) == 1) { glue::glue("{glue::double_quote(x)} at location {loc}.") } else { glue::glue( "{glue::double_quote(x)} at locations {ensure_full_stop(enumerate(loc))}" ) } } enumerate <- function(x, max = 5L, allow_empty = FALSE) { n <- length(x) if (n == 0L && !allow_empty) { abort("Internal error: Enumeration can't be empty.") } if (n > max) { paste0(glue::glue_collapse(x[seq2(1, max)], ", "), ", etc.") } else { if (n == 2) { last <- " and " } else { last <- ", and " } glue::glue_collapse(x, ", ", last = last) } } ensure_full_stop <- function(x) { n <- nchar(x) if (substr(x, n, n) == ".") { x } else { paste0(x, ".") } } stop_native_implementation <- function(fn) { cli::cli_abort( c( "{.fn {fn}} is implemented at C level.", " " = "This R function is purely indicative and should never be called." ), .internal = TRUE ) } # Helpers ----------------------------------------------------------------- glue_lines <- function(..., env = parent.frame()) { out <- map_chr(chr(...), glue::glue, .envir = env) paste(out, collapse = "\n") } format_arg_label <- function(type, arg = "") { type <- paste0("<", type, ">") if (nzchar(arg)) { paste0("`", arg, "` ", type) } else { type } } arg_backtick <- function(arg, or = "Input") { if (nzchar(arg)) { glue::backtick(arg) } else { or } } arg_as_string <- function(arg) { if (is_null(arg)) { "" } else if (is_string(arg)) { arg } else { as_label(arg) } } append_arg <- function(x, arg) { if (is_null(arg)) { return(x) } arg <- arg_as_string(arg) if (nzchar(arg)) { glue::glue("{x} `{arg}`") } else { x } } vctrs/R/rank.R0000644000176200001440000000722315065005761012703 0ustar liggesusers#' Compute ranks #' #' `vec_rank()` computes the sample ranks of a vector. For data frames, ranks #' are computed along the rows, using all columns after the first to break #' ties. #' #' @details #' Unlike [base::rank()], when `incomplete = "rank"` all missing values are #' given the same rank, rather than an increasing sequence of ranks. When #' `nan_distinct = FALSE`, `NaN` values are given the same rank as `NA`, #' otherwise they are given a rank that differentiates them from `NA`. #' #' Like [vec_order_radix()], ordering is done in the C-locale. This can affect #' the ranks of character vectors, especially regarding how uppercase and #' lowercase letters are ranked. See the documentation of [vec_order_radix()] #' for more information. #' #' @inheritParams order-radix #' @inheritParams rlang::args_dots_empty #' #' @param ties Ranking of duplicate values. #' - `"min"`: Use the current rank for all duplicates. The next non-duplicate #' value will have a rank incremented by the number of duplicates present. #' #' - `"max"`: Use the current rank `+ n_duplicates - 1` for all duplicates. #' The next non-duplicate value will have a rank incremented by the number of #' duplicates present. #' #' - `"sequential"`: Use an increasing sequence of ranks starting at the #' current rank, applied to duplicates in order of appearance. #' #' - `"dense"`: Use the current rank for all duplicates. The next #' non-duplicate value will have a rank incremented by `1`, effectively #' removing any gaps in the ranking. #' #' @param incomplete Ranking of missing and [incomplete][vec_detect_complete] #' observations. #' #' - `"rank"`: Rank incomplete observations normally. Missing values within #' incomplete observations will be affected by `na_value` and `nan_distinct`. #' #' - `"na"`: Don't rank incomplete observations at all. Instead, they are #' given a rank of `NA`. In this case, `na_value` and `nan_distinct` have #' no effect. #' #' @section Dependencies: #' #' - [vec_order_radix()] #' - [vec_slice()] #' #' @export #' @examples #' x <- c(5L, 6L, 3L, 3L, 5L, 3L) #' #' vec_rank(x, ties = "min") #' vec_rank(x, ties = "max") #' #' # Sequential ranks use an increasing sequence for duplicates #' vec_rank(x, ties = "sequential") #' #' # Dense ranks remove gaps between distinct values, #' # even if there are duplicates #' vec_rank(x, ties = "dense") #' #' y <- c(NA, x, NA, NaN) #' #' # Incomplete values match other incomplete values by default, and their #' # overall position can be adjusted with `na_value` #' vec_rank(y, na_value = "largest") #' vec_rank(y, na_value = "smallest") #' #' # NaN can be ranked separately from NA if required #' vec_rank(y, nan_distinct = TRUE) #' #' # Rank in descending order. Since missing values are the largest value, #' # they are given a rank of `1` when ranking in descending order. #' vec_rank(y, direction = "desc", na_value = "largest") #' #' # Give incomplete values a rank of `NA` by setting `incomplete = "na"` #' vec_rank(y, incomplete = "na") #' #' # Can also rank data frames, using columns after the first to break ties #' z <- c(2L, 3L, 4L, 4L, 5L, 2L) #' df <- data_frame(x = x, z = z) #' df #' #' vec_rank(df) vec_rank <- function( x, ..., ties = c("min", "max", "sequential", "dense"), incomplete = c("rank", "na"), direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) { check_dots_empty0(...) ties <- arg_match0(ties, c("min", "max", "sequential", "dense"), "ties") incomplete <- arg_match0(incomplete, c("rank", "na"), "incomplete") .Call( vctrs_rank, x, ties, incomplete, direction, na_value, nan_distinct, chr_proxy_collate ) } vctrs/R/type-sclr.R0000644000176200001440000000711415065005761013671 0ustar liggesusersnew_sclr <- function(..., class = character()) { fields <- list(...) stopifnot(has_unique_names(fields)) structure( list(...), class = c(class, "vctrs_sclr") ) } # Subsetting -------------------------------------------------------------- #' @export `[[.vctrs_sclr` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `$.vctrs_sclr` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `[[<-.vctrs_sclr` <- function(x, i, value) { .Call(vctrs_list_set, x, i, value) } #' @export `$<-.vctrs_sclr` <- function(x, i, value) { .Call(vctrs_list_set, x, i, value) } # Shared niceties --------------------------------------------------------- #' @export print.vctrs_sclr <- function(x, ...) { obj_print(x, ...) invisible(x) } #' @export as.list.vctrs_sclr <- function(x, ...) { vec_set_attributes(x, list(names = names(x))) } #' @export as.data.frame.vctrs_sclr <- function( x, row.names = NULL, optional = FALSE, ..., nm = paste(deparse(substitute(x), width.cutoff = 500L), collapse = " ") ) { force(nm) cols <- list(list(x)) if (!optional) { names(cols) <- nm } new_data_frame(cols, n = 1L) } # Vector behaviours ------------------------------------------------------- #' @export `[.vctrs_sclr` <- function(x, ...) { stop_unsupported(x, "[") } #' @export `[<-.vctrs_sclr` <- function(x, ..., value) { stop_unsupported(x, "[<-") } #' @export c.vctrs_sclr <- function(...) { stop_unsupported(..1, "c") } #' @export Math.vctrs_sclr <- function(x, ...) { stop_unsupported(x, .Generic) } #' @export Ops.vctrs_sclr <- function(e1, e2) { stop_unsupported(e1, .Generic) } #' @export Complex.vctrs_sclr <- function(z) { stop_unsupported(z, .Generic) } #' @export Summary.vctrs_sclr <- function(..., na.rm = TRUE) { stop_unsupported(..1, .Generic) } #' @export `names<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "names<-") } #' @export xtfrm.vctrs_sclr <- function(x) { stop_unsupported(x, "xtfrm") } #' @export `dim<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "dim<-") } #' @export `dimnames<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "dimnames<-") } #' @export levels.vctrs_sclr <- function(x) { stop_unsupported(x, "levels") } #' @export `levels<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "levels<-") } #' @export `t.vctrs_sclr` <- function(x) { stop_unsupported(x, "t") } #' @export `is.na<-.vctrs_sclr` <- function(x, value) { stop_unsupported(x, "is.na<-") } #' @export unique.vctrs_sclr <- function(x, incomparables = FALSE, ...) { stop_unsupported(x, "unique") } #' @export duplicated.vctrs_sclr <- function(x, incomparables = FALSE, ...) { stop_unsupported(x, "unique") } #' @export anyDuplicated.vctrs_sclr <- function(x, incomparables = FALSE, ...) { stop_unsupported(x, "unique") } #' @export as.logical.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.logical") } #' @export as.integer.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.integer") } #' @export as.double.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.double") } #' @export as.character.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.character") } #' @export as.Date.vctrs_sclr <- function(x, ...) { stop_unsupported(x, "as.Date") } #' @export as.POSIXct.vctrs_sclr <- function(x, tz = "", ...) { stop_unsupported(x, "as.POSIXct") } # Unimplemented ----------------------------------------------------------- #' @export summary.vctrs_sclr <- function(object, ...) { # nocov start stop_unimplemented(object, "summary") # nocov end } vctrs/R/zzz.R0000644000176200001440000001314515156000460012575 0ustar liggesusers# nocov start .onLoad <- function(libname, pkgname) { check_linked_version(pkgname) ns <- ns_env("vctrs") run_on_load() s3_register("generics::as.factor", "vctrs_vctr") s3_register("generics::as.ordered", "vctrs_vctr") s3_register("generics::as.difftime", "vctrs_vctr") # Remove once tibble has implemented the methods on_package_load("tibble", { if (!env_has(ns_env("tibble"), "vec_ptype2.tbl_df.tbl_df")) { s3_register( "vctrs::vec_ptype2", "tbl_df.tbl_df", vec_ptype2_tbl_df_tbl_df ) s3_register( "vctrs::vec_ptype2", "tbl_df.data.frame", vec_ptype2_tbl_df_data.frame ) s3_register( "vctrs::vec_ptype2", "data.frame.tbl_df", vec_ptype2_data.frame_tbl_df ) } if (!env_has(ns_env("tibble"), "vec_cast.tbl_df.tbl_df")) { s3_register("vctrs::vec_cast", "tbl_df.tbl_df", vec_cast_tbl_df_tbl_df) s3_register( "vctrs::vec_cast", "tbl_df.data.frame", vec_cast_tbl_df_data.frame ) s3_register( "vctrs::vec_cast", "data.frame.tbl_df", vec_cast_data.frame_tbl_df ) } }) on_package_load("dplyr", { if (!env_has(ns_env("dplyr"), "vec_restore.grouped_df")) { s3_register("vctrs::vec_restore", "grouped_df", vec_restore_grouped_df) } if (!env_has(ns_env("dplyr"), "vec_ptype2.grouped_df.grouped_df")) { s3_register( "vctrs::vec_ptype2", "grouped_df.grouped_df", vec_ptype2_grouped_df_grouped_df ) s3_register( "vctrs::vec_ptype2", "grouped_df.data.frame", vec_ptype2_grouped_df_data.frame ) s3_register( "vctrs::vec_ptype2", "grouped_df.tbl_df", vec_ptype2_grouped_df_tbl_df ) s3_register( "vctrs::vec_ptype2", "data.frame.grouped_df", vec_ptype2_data.frame_grouped_df ) s3_register( "vctrs::vec_ptype2", "tbl_df.grouped_df", vec_ptype2_tbl_df_grouped_df ) } if (!env_has(ns_env("dplyr"), "vec_cast.grouped_df.grouped_df")) { s3_register( "vctrs::vec_cast", "grouped_df.grouped_df", vec_cast_grouped_df_grouped_df ) s3_register( "vctrs::vec_cast", "grouped_df.data.frame", vec_cast_grouped_df_data.frame ) s3_register( "vctrs::vec_cast", "grouped_df.tbl_df", vec_cast_grouped_df_tbl_df ) s3_register( "vctrs::vec_cast", "data.frame.grouped_df", vec_cast_data.frame_grouped_df ) s3_register( "vctrs::vec_cast", "tbl_df.grouped_df", vec_cast_tbl_df_grouped_df ) } if (!env_has(ns_env("dplyr"), "vec_restore.rowwise_df")) { s3_register("vctrs::vec_restore", "rowwise_df", vec_restore_rowwise_df) } if (!env_has(ns_env("dplyr"), "vec_ptype2.rowwise_df.rowwise_df")) { s3_register( "vctrs::vec_ptype2", "rowwise_df.rowwise_df", vec_ptype2_rowwise_df_rowwise_df ) s3_register( "vctrs::vec_ptype2", "rowwise_df.data.frame", vec_ptype2_rowwise_df_data.frame ) s3_register( "vctrs::vec_ptype2", "rowwise_df.tbl_df", vec_ptype2_rowwise_df_tbl_df ) s3_register( "vctrs::vec_ptype2", "data.frame.rowwise_df", vec_ptype2_data.frame_rowwise_df ) s3_register( "vctrs::vec_ptype2", "tbl_df.rowwise_df", vec_ptype2_tbl_df_rowwise_df ) } if (!env_has(ns_env("dplyr"), "vec_cast.rowwise_df.rowwise_df")) { s3_register( "vctrs::vec_cast", "rowwise_df.rowwise_df", vec_cast_rowwise_df_rowwise_df ) s3_register( "vctrs::vec_cast", "rowwise_df.data.frame", vec_cast_rowwise_df_data.frame ) s3_register( "vctrs::vec_cast", "rowwise_df.tbl_df", vec_cast_rowwise_df_tbl_df ) s3_register( "vctrs::vec_cast", "data.frame.rowwise_df", vec_cast_data.frame_rowwise_df ) s3_register( "vctrs::vec_cast", "tbl_df.rowwise_df", vec_cast_tbl_df_rowwise_df ) } }) on_package_load("sf", { import_from("sf", sf_deps, env = sf_env) if (!env_has(ns_env("sf"), "vec_restore.sf")) { s3_register("vctrs::vec_proxy", "sf", vec_proxy_sf) s3_register("vctrs::vec_restore", "sf", vec_restore_sf) } if (!env_has(ns_env("sf"), "vec_ptype2.sf.sf")) { s3_register("vctrs::vec_ptype2", "sf.sf", vec_ptype2_sf_sf) s3_register( "vctrs::vec_ptype2", "sf.data.frame", vec_ptype2_sf_data.frame ) s3_register( "vctrs::vec_ptype2", "data.frame.sf", vec_ptype2_data.frame_sf ) s3_register("vctrs::vec_ptype2", "sf.tbl_df", vec_ptype2_sf_tbl_df) s3_register("vctrs::vec_ptype2", "tbl_df.sf", vec_ptype2_tbl_df_sf) s3_register("vctrs::vec_cast", "sf.sf", vec_cast_sf_sf) s3_register("vctrs::vec_cast", "sf.data.frame", vec_cast_sf_data.frame) s3_register("vctrs::vec_cast", "data.frame.sf", vec_cast_data.frame_sf) } if (!env_has(ns_env("sf"), "vec_proxy_order.sfc")) { s3_register("vctrs::vec_proxy_order", "sfc", vec_proxy_order_sfc) } }) on_package_load("data.table", { if (!env_has(ns_env("data.table"), "vec_proxy.IDate")) { s3_register("vctrs::vec_proxy", "IDate", vec_proxy_IDate) s3_register("vctrs::vec_restore", "IDate", vec_restore_IDate) } }) .Call(vctrs_init_library, ns_env()) } # nocov end vctrs/R/group.R0000644000176200001440000000574515065005761013113 0ustar liggesusers#' Identify groups #' #' @description #' * `vec_group_id()` returns an identifier for the group that each element of #' `x` falls in, constructed in the order that they appear. The number of #' groups is also returned as an attribute, `n`. #' #' * `vec_group_loc()` returns a data frame containing a `key` column with the #' unique groups, and a `loc` column with the locations of each group in `x`. #' #' * `vec_group_rle()` locates groups in `x` and returns them run length #' encoded in the order that they appear. The return value is a rcrd object #' with fields for the `group` identifiers and the run `length` of the #' corresponding group. The number of groups is also returned as an #' attribute, `n`. #' #' @param x A vector #' @return #' * `vec_group_id()`: An integer vector with the same size as `x`. #' * `vec_group_loc()`: A two column data frame with size equal to #' `vec_size(vec_unique(x))`. #' * A `key` column of type `vec_ptype(x)` #' * A `loc` column of type list, with elements of type integer. #' * `vec_group_rle()`: A `vctrs_group_rle` rcrd object with two integer #' vector fields: `group` and `length`. #' #' Note that when using `vec_group_loc()` for complex types, the default #' `data.frame` print method will be suboptimal, and you will want to coerce #' into a tibble to better understand the output. #' @name vec_group #' #' @section Dependencies: #' - [vec_proxy_equal()] #' #' @keywords internal #' @examples #' purrr <- c("p", "u", "r", "r", "r") #' vec_group_id(purrr) #' vec_group_rle(purrr) #' #' groups <- mtcars[c("vs", "am")] #' vec_group_id(groups) #' #' group_rle <- vec_group_rle(groups) #' group_rle #' #' # Access fields with `field()` #' field(group_rle, "group") #' field(group_rle, "length") #' #' # `vec_group_id()` is equivalent to #' vec_match(groups, vec_unique(groups)) #' #' vec_group_loc(mtcars$vs) #' vec_group_loc(mtcars[c("vs", "am")]) #' #' if (require("tibble")) { #' as_tibble(vec_group_loc(mtcars[c("vs", "am")])) #' } NULL #' @rdname vec_group #' @export vec_group_id <- function(x) { .Call(vctrs_group_id, x) } #' @rdname vec_group #' @export vec_group_loc <- function(x) { .Call(vctrs_group_loc, x) } #' @rdname vec_group #' @export vec_group_rle <- function(x) { .Call(vctrs_group_rle, x) } #' @export format.vctrs_group_rle <- function(x, ...) { group <- field(x, "group") length <- field(x, "length") paste0(group, "x", length) } #' @export obj_print_header.vctrs_group_rle <- function(x, ...) { size <- vec_size(x) n <- attr(x, "n") cat_line("<", vec_ptype_full(x), "[", size, "][n = ", n, "]>") invisible(x) } # For testing new_group_rle <- function(group, length, n) { stopifnot(is_integer(group)) stopifnot(is_integer(length)) stopifnot(is_integer(n)) vec_check_size(n, size = 1L) if (vec_size(group) != vec_size(length)) { abort("`group` and `length` must have the same size.") } new_rcrd( list(group = group, length = length), n = n, class = "vctrs_group_rle" ) } vctrs/R/shape.R0000644000176200001440000000431715113335375013052 0ustar liggesusers# The dimensionality of an matrix/array is partition into two parts: # * the first dimension = the number of observations # * all other dimensions = the shape parameter of the type # These helpers work with the shape parameter new_shape <- function(type, shape = integer()) { structure(type, dim = c(0L, shape)) } vec_shaped_ptype <- function(ptype, x, y, ..., x_arg = "", y_arg = "") { check_dots_empty0(...) .Call(ffi_vec_shaped_ptype, ptype, x, y, environment()) } vec_shape2 <- function(x, y, ..., x_arg = "", y_arg = "") { check_dots_empty0(...) .Call(ffi_vec_shape2, x, y, environment()) } # Should take same signature as `vec_cast()` shape_broadcast <- function(x, to, ..., x_arg, to_arg, call = caller_env()) { if (is.null(x) || is.null(to)) { return(x) } dim_x <- vec_dim(x) dim_to <- vec_dim(to) # Don't set dimensions for vectors if (length(dim_x) == 1L && length(dim_to) == 1L) { return(x) } if (length(dim_x) > length(dim_to)) { details <- sprintf( "Can't decrease dimensionality from %s to %s.", length(dim_x), length(dim_to) ) stop_incompatible_cast( x, to, details = details, x_arg = x_arg, to_arg = to_arg, call = call ) } dim_x <- n_dim2(dim_x, dim_to)$x dim_to[[1]] <- dim_x[[1]] # don't change number of observations ok <- dim_x == dim_to | dim_x == 1 if (any(!ok)) { stop_incompatible_cast( x, to, details = "Non-recyclable dimensions.", x_arg = x_arg, to_arg = to_arg, call = call ) } # Increase dimensionality if required if (vec_dim_n(x) != length(dim_x)) { dim(x) <- dim_x } recycle <- dim_x != dim_to # Avoid expensive subset if (all(!recycle)) { return(x) } indices <- rep(list(missing_arg()), length(dim_to)) indices[recycle] <- map(dim_to[recycle], rep_len, x = 1L) eval_bare(expr(x[!!!indices, drop = FALSE])) } # Helpers ----------------------------------------------------------------- n_dim2 <- function(x, y) { nx <- length(x) ny <- length(y) if (nx == ny) { list(x = x, y = y) } else if (nx < ny) { list(x = c(x, rep(1L, ny - nx)), y = y) } else { list(x = x, y = c(y, rep(1L, nx - ny))) } } vctrs/R/ptype-abbr-full.R0000644000176200001440000000727615065005761014765 0ustar liggesusers#' Vector type as a string #' #' `vec_ptype_full()` displays the full type of the vector. `vec_ptype_abbr()` #' provides an abbreviated summary suitable for use in a column heading. #' #' @section S3 dispatch: #' The default method for `vec_ptype_full()` uses the first element of the #' class vector. Override this method if your class has parameters that should #' be prominently displayed. #' #' The default method for `vec_ptype_abbr()` [abbreviate()]s `vec_ptype_full()` #' to 8 characters. You should almost always override, aiming for 4-6 #' characters where possible. #' #' These arguments are handled by the generic and not passed to methods: #' * `prefix_named` #' * `suffix_shape` #' #' @param x A vector. #' @param prefix_named If `TRUE`, add a prefix for named vectors. #' @param suffix_shape If `TRUE` (the default), append the shape of #' the vector. #' @inheritParams rlang::args_dots_empty #' #' @keywords internal #' @return A string. #' @export #' @examples #' cat(vec_ptype_full(1:10)) #' cat(vec_ptype_full(iris)) #' #' cat(vec_ptype_abbr(1:10)) vec_ptype_full <- function(x, ...) { check_dots_empty0(...) # Data frames and their subclasses have internal handling in the # default method to get the inner types format method <- s3_method_specific(x, "vec_ptype_full", ns = "vctrs") return(method(x, ...)) UseMethod("vec_ptype_full") } #' @export #' @rdname vec_ptype_full vec_ptype_abbr <- function(x, ..., prefix_named = FALSE, suffix_shape = TRUE) { check_dots_empty0(...) method <- s3_method_specific(x, "vec_ptype_abbr", ns = "vctrs") abbr <- method(x, ...) named <- if ((prefix_named || is_bare_list(x)) && !is.null(vec_names(x))) { "named " } shape <- if (suffix_shape) vec_ptype_shape(x) abbr <- paste0(named, abbr, shape) return(abbr) UseMethod("vec_ptype_abbr") } #' @export vec_ptype_full.NULL <- function(x, ...) "NULL" #' @export vec_ptype_abbr.NULL <- function(x, ...) "NULL" # Default: base types and fallback for S3/S4 ------------------------------ #' @export vec_ptype_full.default <- function(x, ...) { if (is.data.frame(x)) { vec_ptype_full_data_frame(x, ...) } else if (is.object(x)) { class(x)[[1]] } else if (is_vector(x)) { paste0(typeof(x), vec_ptype_shape(x)) } else { abort("Not a vector.") } } #' @export vec_ptype_abbr.default <- function(x, ...) { if (is.object(x)) { type <- class(x)[[1]] } else if (is_vector(x)) { type <- vec_ptype_abbr_bare(x, ...) } else { abort("Not a vector.") } unname(abbreviate(type, 8)) } vec_ptype_full_data_frame <- function(x, ...) { if (length(x) == 0) { return(paste0(class(x)[[1]], "<>")) } else if (length(x) == 1) { return(paste0( class(x)[[1]], "<", names(x), ":", vec_ptype_full(x[[1]]), ">" )) } # Needs to handle recursion with indenting types <- map_chr(x, vec_ptype_full) needs_indent <- grepl("\n", types) types[needs_indent] <- map(types[needs_indent], function(x) { indent(paste0("\n", x), 4) }) names <- paste0(" ", format(names(x))) paste0( class(x)[[1]], "<\n", paste0(names, ": ", types, collapse = "\n"), "\n>" ) } vec_ptype_abbr_bare <- function(x, ...) { switch( typeof(x), list = "list", logical = "lgl", integer = "int", double = "dbl", character = "chr", complex = "cpl", list = "list", expression = "expr", raw = "raw", typeof(x) ) } # Helpers ----------------------------------------------------------------- vec_ptype_shape <- function(x) { dim <- dim2(x) if (length(dim) == 1) { if (is_null(dim(x))) { "" } else { "[1d]" } } else { paste0("[,", paste(dim[-1], collapse = ","), "]") } } vctrs/R/slice-interleave.R0000644000176200001440000000504615075743736015220 0ustar liggesusers#' Interleave many vectors into one vector #' #' @description #' `vec_interleave()` combines multiple vectors together, much like [vec_c()], #' but does so in such a way that the elements of each vector are interleaved #' together. #' #' It is a more efficient equivalent to the following usage of `vec_c()`: #' #' ``` #' vec_interleave(x, y) == vec_c(x[1], y[1], x[2], y[2], ..., x[n], y[n]) #' ``` #' #' @section Dependencies: #' #' ## vctrs dependencies #' #' - [list_combine()] #' #' @inheritParams vec_c #' #' @param ... Vectors to interleave. #' #' @param .size The expected size of each vector. If not provided, computed #' automatically by [vec_size_common()]. Each vector will be #' [recycled][theory-faq-recycling] to this size. #' #' @param .ptype The expected type of each vector. If not provided, computed #' automatically by [vec_ptype_common()]. Each vector will be #' [cast][theory-faq-coercion] to this type. #' #' @export #' @examples #' # The most common case is to interleave two vectors #' vec_interleave(1:3, 4:6) #' #' # But you aren't restricted to just two #' vec_interleave(1:3, 4:6, 7:9, 10:12) #' #' # You can also interleave data frames #' x <- data_frame(x = 1:2, y = c("a", "b")) #' y <- data_frame(x = 3:4, y = c("c", "d")) #' #' vec_interleave(x, y) #' #' # `.size` can be used to recycle size 1 elements before interleaving #' vec_interleave(1, 2, .size = 3) #' #' # `.ptype` can be used to enforce a particular type #' typeof(vec_interleave(1, 2, .ptype = integer())) vec_interleave <- function( ..., .size = NULL, .ptype = NULL, .name_spec = NULL, .name_repair = c( "minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet" ), .error_call = current_env() ) { list_interleave( x = list2(...), size = .size, ptype = .ptype, name_spec = .name_spec, name_repair = .name_repair, x_arg = "", error_call = .error_call ) } # It's sometimes more convenient to supply a list, plus you get access to # `x_arg` for better error messages than you get from `vec_interleave(!!!x)`. # We could consider exporting this alongside `vec_interleave()`. list_interleave <- function( x, ..., size = NULL, ptype = NULL, name_spec = NULL, name_repair = c( "minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet" ), x_arg = caller_arg(x), error_call = current_env() ) { check_dots_empty0(...) .Call( ffi_list_interleave, x, size, ptype, name_spec, name_repair, environment() ) } vctrs/R/type-rcrd.R0000644000176200001440000000756415065005761013671 0ustar liggesusers# Constructor and basic methods --------------------------------------------- #' rcrd (record) S3 class #' #' The rcrd class extends [vctr]. A rcrd is composed of 1 or more [field]s, #' which must be vectors of the same length. Is designed specifically for #' classes that can naturally be decomposed into multiple vectors of the same #' length, like [POSIXlt], but where the organisation should be considered #' an implementation detail invisible to the user (unlike a [data.frame]). #' #' @details #' Record-style objects created with [new_rcrd()] do not do much on their own. #' For instance they do not have a default [format()] method, which means #' printing the object causes an error. See [Record-style #' objects](https://vctrs.r-lib.org/articles/s3-vector.html#record-style-objects) #' for details on implementing methods for record vectors. #' #' @param fields A list or a data frame. Lists must be rectangular #' (same sizes), and contain uniquely named vectors (at least #' one). `fields` is validated with [df_list()] to ensure uniquely #' named vectors. #' @param ... Additional attributes #' @param class Name of subclass. #' @export #' @aliases ses rcrd #' @keywords internal new_rcrd <- function(fields, ..., class = character()) { if (obj_is_list(fields) && length(vec_unique(list_sizes(fields))) > 1L) { abort("All fields must be the same size.") } fields <- df_list(!!!fields) if (!length(fields)) { abort("`fields` must be a list of length 1 or greater.") } structure(fields, ..., class = c(class, "vctrs_rcrd", "vctrs_vctr")) } #' @export vec_proxy.vctrs_rcrd <- function(x, ...) { new_data_frame(x) } #' @export vec_restore.vctrs_rcrd <- function(x, to, ...) { x <- NextMethod() attr(x, "row.names") <- NULL x } #' @export length.vctrs_rcrd <- function(x) { vec_size(x) } #' @export names.vctrs_rcrd <- function(x) { NULL } #' @export `names<-.vctrs_rcrd` <- function(x, value) { if (is_null(value)) { x } else { abort("Can't assign names to a .") } } #' @export format.vctrs_rcrd <- function(x, ...) { if (inherits(x, "vctrs_foobar")) { # For unit tests exec("paste", !!!vec_data(x), sep = ":") } else { stop_unimplemented(x, "format") } } #' @export obj_str_data.vctrs_rcrd <- function(x, ...) { obj_str_leaf(x, ...) } #' @method vec_cast vctrs_rcrd #' @export vec_cast.vctrs_rcrd <- function(x, to, ...) UseMethod("vec_cast.vctrs_rcrd") #' @export vec_cast.vctrs_rcrd.vctrs_rcrd <- function(x, to, ...) { out <- vec_cast(vec_data(x), vec_data(to), ...) new_rcrd(out) } # Subsetting -------------------------------------------------------------- #' @export `[.vctrs_rcrd` <- function(x, i, ...) { if (!missing(...)) { abort("Can't index record vectors on dimensions greater than 1.") } vec_slice(x, maybe_missing(i)) } #' @export `[[.vctrs_rcrd` <- function(x, i, ...) { out <- vec_slice(vec_data(x), i) vec_restore(out, x) } #' @export `$.vctrs_rcrd` <- function(x, i, ...) { stop_unsupported(x, "subsetting with $") } #' @export rep.vctrs_rcrd <- function(x, ...) { out <- lapply(vec_data(x), base_vec_rep, ...) vec_restore(out, x) } #' @export `length<-.vctrs_rcrd` <- function(x, value) { out <- vec_size_assign(vec_data(x), value) vec_restore(out, x) } # Replacement ------------------------------------------------------------- #' @export `[[<-.vctrs_rcrd` <- function(x, i, value) { force(i) x[i] <- value x } #' @export `$<-.vctrs_rcrd` <- function(x, i, value) { stop_unsupported(x, "subset assignment with $") } #' @export `[<-.vctrs_rcrd` <- function(x, i, value) { i <- maybe_missing(i, TRUE) value <- vec_cast(value, x) out <- vec_assign(vec_data(x), i, vec_data(value)) vec_restore(out, x) } # Equality and ordering --------------------------------------------------- #' @export vec_math.vctrs_rcrd <- function(.fn, .x, ...) { stop_unsupported(.x, "vec_math") } vctrs/R/subscript-loc.R0000644000176200001440000004071415065005761014543 0ustar liggesusers#' Create a vector of locations #' #' @description #' #' These helpers provide a means of standardizing common indexing #' methods such as integer, character or logical indexing. #' #' * `vec_as_location()` accepts integer, character, or logical vectors #' of any size. The output is always an integer vector that is #' suitable for subsetting with `[` or [vec_slice()]. It might be a #' different size than the input because negative selections are #' transformed to positive ones and logical vectors are transformed #' to a vector of indices for the `TRUE` locations. #' #' * `vec_as_location2()` accepts a single number or string. It returns #' a single location as a integer vector of size 1. This is suitable #' for extracting with `[[`. #' #' * `num_as_location()` and `num_as_location2()` are specialized variants #' that have extra options for numeric indices. #' #' @inheritParams rlang::args_error_context #' @inheritParams rlang::args_dots_empty #' #' @param i An index vector to convert. #' #' @param n A single integer representing the total size of the #' object that `i` is meant to index into. #' #' @param names If `i` is a character vector, `names` should be a character #' vector that `i` will be matched against to construct the index. Otherwise, #' not used. The default value of `NULL` will result in an error #' if `i` is a character vector. #' #' @param missing How should missing `i` values be handled? #' - `"error"` throws an error. #' - `"propagate"` returns them as is. #' - `"remove"` removes them. #' #' By default, vector subscripts propagate missing values but scalar #' subscripts error on them. #' #' Propagated missing values can't be combined with negative indices when #' `negative = "invert"`, because they can't be meaningfully inverted. #' #' @param arg The argument name to be displayed in error messages. #' #' @return #' - `vec_as_location()` and `num_as_location()` return an integer vector that #' can be used as an index in a subsetting operation. #' #' - `vec_as_location2()` and `num_as_location2()` return an integer of size 1 #' that can be used a scalar index for extracting an element. #' #' @examples #' x <- array(1:6, c(2, 3)) #' dimnames(x) <- list(c("r1", "r2"), c("c1", "c2", "c3")) #' #' # The most common use case validates row indices #' vec_as_location(1, vec_size(x)) #' #' # Negative indices can be used to index from the back #' vec_as_location(-1, vec_size(x)) #' #' # Character vectors can be used if `names` are provided #' vec_as_location("r2", vec_size(x), rownames(x)) #' #' # You can also construct an index for dimensions other than the first #' vec_as_location(c("c2", "c1"), ncol(x), colnames(x)) #' #' @keywords internal #' @export vec_as_location <- function( i, n, names = NULL, ..., missing = c("propagate", "remove", "error"), arg = caller_arg(i), call = caller_env() ) { check_dots_empty0(...) .Call( ffi_as_location, i = i, n = n, names = names, loc_negative = "invert", loc_oob = "error", loc_zero = "remove", missing = missing, frame = environment() ) } #' @rdname vec_as_location #' #' @param negative How should negative `i` values be handled? #' - `"error"` throws an error. #' - `"ignore"` returns them as is. #' - `"invert"` returns the positive location generated by inverting the #' negative location. When inverting, positive and negative locations #' can't be mixed. This option is only applicable for `num_as_location()`. #' #' @param oob How should out-of-bounds `i` values be handled? #' - `"error"` throws an error. #' - `"remove"` removes both positive and negative out-of-bounds locations. #' - `"extend"` allows positive out-of-bounds locations if they directly #' follow the end of a vector. This can be used to implement extendable #' vectors, like `letters[1:30]`. #' #' @param zero How should zero `i` values be handled? #' - `"error"` throws an error. #' - `"remove"` removes them. #' - `"ignore"` returns them as is. #' #' @export num_as_location <- function( i, n, ..., missing = c("propagate", "remove", "error"), negative = c("invert", "error", "ignore"), oob = c("error", "remove", "extend"), zero = c("remove", "error", "ignore"), arg = caller_arg(i), call = caller_env() ) { check_dots_empty0(...) if (is.object(i) || !(is_integer(i) || is_double(i))) { abort("`i` must be a numeric vector.") } .Call( ffi_as_location, i = i, n = n, names = NULL, loc_negative = negative, loc_oob = oob, loc_zero = zero, missing = missing, env = environment() ) } #' @rdname vec_as_location #' @export vec_as_location2 <- function( i, n, names = NULL, ..., missing = c("error", "propagate"), arg = caller_arg(i), call = caller_env() ) { check_dots_empty0(...) result_get(vec_as_location2_result( i, n = n, names = names, negative = "error", missing = missing, arg = arg, call = call )) } #' @rdname vec_as_location #' @export num_as_location2 <- function( i, n, ..., negative = c("error", "ignore"), missing = c("error", "propagate"), arg = caller_arg(i), call = caller_env() ) { check_dots_empty0(...) if (!is_integer(i) && !is_double(i)) { abort("`i` must be a numeric vector.", call = call) } result_get(vec_as_location2_result( i, n = n, names = NULL, negative = negative, missing = missing, arg = arg, call = call )) } vec_as_location2_result <- function(i, n, names, missing, negative, arg, call) { allow_missing <- arg_match0(missing, c("error", "propagate")) == "propagate" allow_negative <- arg_match0(negative, c("error", "ignore")) == "ignore" result <- vec_as_subscript2_result( i = i, arg = arg, call = call ) if (!is_null(result$err)) { parent <- result$err return(result( err = new_error_location2_type( i = i, subscript_arg = arg, body = parent$body, call = call ) )) } # Locations must be size 1, can't be NA, and must be positive i <- result$ok if (length(i) != 1L) { return(result( err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_scalar, call = call ) )) } neg <- typeof(i) == "integer" && !is.na(i) && i < 0L if (allow_negative && neg) { i <- -i } if (is.na(i)) { if (!allow_missing && is.na(i)) { result <- result( err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_present, call = call ) ) } else { result <- result(i) } return(result) } if (identical(i, 0L)) { return(result( err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_positive, call = call ) )) } if (!allow_negative && neg) { return(result( err = new_error_location2_type( i = i, subscript_arg = arg, body = cnd_bullets_location2_need_positive, call = call ) )) } err <- NULL i <- tryCatch( vec_as_location(i, n, names = names, arg = arg, call = call), vctrs_error_subscript = function(err) { err[["subscript_scalar"]] <- TRUE err <<- err i } ) if (!is_null(err)) { return(result(err = err)) } if (neg) { i <- -i } result(i) } stop_location_negative_missing <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i, ..., body = cnd_body_vctrs_error_location_negative_missing, call = call )) } cnd_body_vctrs_error_location_negative_missing <- function(cnd, ...) { missing_loc <- which(is.na(cnd$i)) arg <- append_arg("Subscript", cnd$subscript_arg) if (length(missing_loc) == 1) { loc <- glue::glue("{arg} has a missing value at location {missing_loc}.") } else { n_loc <- length(missing_loc) missing_loc <- ensure_full_stop(enumerate(missing_loc)) loc <- glue::glue( "{arg} has {n_loc} missing values at locations {missing_loc}" ) } format_error_bullets(c( x = "Negative locations can't have missing values.", i = loc )) } stop_location_negative_positive <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i, ..., body = cnd_body_vctrs_error_location_negative_positive, call = call )) } cnd_body_vctrs_error_location_negative_positive <- function(cnd, ...) { positive_loc <- which(cnd$i > 0) arg <- append_arg("Subscript", cnd$subscript_arg) if (length(positive_loc) == 1) { loc <- glue::glue("{arg} has a positive value at location {positive_loc}.") } else { n_loc <- length(positive_loc) positive_loc <- ensure_full_stop(enumerate(positive_loc)) loc <- glue::glue( "{arg} has {n_loc} positive values at locations {positive_loc}" ) } format_error_bullets(c( x = "Negative and positive locations can't be mixed.", i = loc )) } new_error_location2_type <- function(i, ..., class = NULL) { new_error_subscript2_type( class = class, i = i, numeric = "cast", character = "cast", ... ) } cnd_bullets_location2_need_scalar <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} must be size 1, not {length(i)}.") )) } cnd_bullets_location2_need_present <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data( cnd, "{subscript_arg} must be a location, not {obj_type_friendly(i)}." ) )) } cnd_bullets_location2_need_positive <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data( cnd, "{subscript_arg} must be a positive location, not {i}." ) )) } stop_location_negative <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i, body = cnd_bullets_location_need_non_negative, ..., call = call )) } cnd_bullets_location_need_non_negative <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) format_error_bullets(c( x = glue::glue_data( cnd, "{subscript_arg} can't contain negative locations." ) )) } stop_location_zero <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i, body = cnd_bullets_location_need_non_zero, ..., call = call )) } cnd_bullets_location_need_non_zero <- function(cnd, ...) { zero_loc <- which(cnd$i == 0) zero_loc_size <- length(zero_loc) arg <- append_arg("Subscript", cnd$subscript_arg) if (zero_loc_size == 1) { loc <- glue::glue("It has a `0` value at location {zero_loc}.") } else { zero_loc <- ensure_full_stop(enumerate(zero_loc)) loc <- glue::glue( "It has {zero_loc_size} `0` values at locations {zero_loc}" ) } format_error_bullets(c( x = glue::glue("{arg} can't contain `0` values."), i = loc )) } stop_subscript_missing <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i = i, body = cnd_bullets_subscript_missing, ..., call = call )) } cnd_bullets_subscript_missing <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) missing_loc <- which(is.na(cnd$i)) if (length(missing_loc) == 1) { missing_line <- glue::glue( "It has a missing value at location {missing_loc}." ) } else { missing_enum <- ensure_full_stop(enumerate(missing_loc)) missing_line <- glue::glue( "It has missing values at locations {missing_enum}" ) } format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} can't contain missing values."), x = missing_line )) } stop_subscript_empty <- function(i, ..., call = caller_env()) { cnd_signal(new_error_subscript_type( i = i, body = cnd_bullets_subscript_empty, ..., call = call )) } cnd_bullets_subscript_empty <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg) loc <- which(cnd$i == "") if (length(loc) == 1) { line <- glue::glue("It has an empty string at location {loc}.") } else { enum <- ensure_full_stop(enumerate(loc)) line <- glue::glue("It has an empty string at locations {enum}") } format_error_bullets(c( x = glue::glue_data(cnd, "{subscript_arg} can't contain the empty string."), x = line )) } stop_indicator_size <- function(i, n, ..., call = caller_env()) { cnd_signal(new_error_subscript_size( i, n = n, ..., body = cnd_body_vctrs_error_indicator_size, call = call )) } cnd_body_vctrs_error_indicator_size <- function(cnd, ...) { cnd$subscript_arg <- append_arg("Logical subscript", cnd$subscript_arg) glue_data_bullets( cnd, x = "{subscript_arg} must be size 1 or {n}, not {vec_size(i)}." ) } stop_subscript_oob <- function(i, subscript_type, ..., call = caller_env()) { stop_subscript( class = "vctrs_error_subscript_oob", i = i, subscript_type = subscript_type, ..., call = call ) } #' @export cnd_header.vctrs_error_subscript_oob <- function(cnd, ...) { if (cnd_subscript_oob_non_consecutive(cnd)) { return(cnd_header_vctrs_error_subscript_oob_non_consecutive(cnd, ...)) } elt <- cnd_subscript_element(cnd) action <- cnd_subscript_action(cnd) type <- cnd_subscript_type(cnd) if (action %in% c("rename", "relocate") || type == "character") { glue::glue("Can't {action} {elt[[2]]} that don't exist.") } else { glue::glue("Can't {action} {elt[[2]]} past the end.") } } #' @export cnd_body.vctrs_error_subscript_oob <- function(cnd, ...) { switch( cnd_subscript_type(cnd), numeric = if (cnd_subscript_oob_non_consecutive(cnd)) { cnd_body_vctrs_error_subscript_oob_non_consecutive(cnd, ...) } else { cnd_body_vctrs_error_subscript_oob_location(cnd, ...) }, character = cnd_body_vctrs_error_subscript_oob_name(cnd, ...), abort("Internal error: subscript type can't be `logical` for OOB errors.") ) } cnd_body_vctrs_error_subscript_oob_location <- function(cnd, ...) { i <- cnd$i # In case of missing locations i <- i[!is.na(i)] if (cnd_subscript_action(cnd) == "negate") { # Only report negative indices i <- i[i < 0L] } # In case of negative indexing i <- abs(i) oob <- i[i > cnd$size] oob_enum <- vctrs_cli_vec(oob) n_loc <- length(oob) n <- cnd$size elt <- cnd_subscript_element_cli(n, cnd) # TODO: Switch to `format_inline()` and format bullets lazily through rlang cli::format_error(c( "i" = "{cli::qty(n_loc)} Location{?s} {oob_enum} do{?esn't/n't} exist.", "i" = "There {cli::qty(n)} {?is/are} only {elt}." )) } cnd_body_vctrs_error_subscript_oob_name <- function(cnd, ...) { elt <- cnd_subscript_element(cnd, capital = TRUE) oob <- cnd$i[!cnd$i %in% cnd$names] oob_enum <- enumerate(glue::backtick(oob)) format_error_bullets(c( x = glue::glue(ngettext( length(oob), "{elt[[1]]} {oob_enum} doesn't exist.", "{elt[[2]]} {oob_enum} don't exist." )) )) } vctrs_cli_vec <- function(x, ..., vec_trunc = 5) { cli::cli_vec(as.character(x), list(..., vec_trunc = vec_trunc)) } stop_location_oob_non_consecutive <- function( i, size, ..., call = caller_env() ) { stop_subscript_oob( i = i, size = size, subscript_type = "numeric", subscript_oob_non_consecutive = TRUE, ..., call = call ) } cnd_header_vctrs_error_subscript_oob_non_consecutive <- function(cnd, ...) { action <- cnd_subscript_action(cnd) elt <- cnd_subscript_element(cnd) glue::glue( "Can't {action} {elt[[2]]} beyond the end with non-consecutive locations." ) } cnd_body_vctrs_error_subscript_oob_non_consecutive <- function(cnd, ...) { i <- sort(cnd$i) i <- i[i > cnd$size] non_consecutive <- i[c(TRUE, diff(i) != 1L)] arg <- append_arg("Subscript", cnd$subscript_arg) if (length(non_consecutive) == 1) { x_line <- glue::glue( "{arg} contains non-consecutive location {non_consecutive}." ) } else { non_consecutive <- ensure_full_stop(enumerate(non_consecutive)) x_line <- glue::glue( "{arg} contains non-consecutive locations {non_consecutive}" ) } glue_data_bullets( cnd, i = "Input has size {size}.", x = x_line ) } cnd_subscript_oob_non_consecutive <- function(cnd) { out <- cnd$subscript_oob_non_consecutive %||% FALSE check_bool(out) out } vctrs/R/register-s3.R0000644000176200001440000001565115065005761014123 0ustar liggesusers# This source code file is licensed under the unlicense license # https://unlicense.org #' Register a method for a suggested dependency #' #' Generally, the recommend way to register an S3 method is to use the #' `S3Method()` namespace directive (often generated automatically by the #' `@export` roxygen2 tag). However, this technique requires that the generic #' be in an imported package, and sometimes you want to suggest a package, #' and only provide a method when that package is loaded. `s3_register()` #' can be called from your package's `.onLoad()` to dynamically register #' a method only if the generic's package is loaded. #' #' For R 3.5.0 and later, `s3_register()` is also useful when demonstrating #' class creation in a vignette, since method lookup no longer always involves #' the lexical scope. For R 3.6.0 and later, you can achieve a similar effect #' by using "delayed method registration", i.e. placing the following in your #' `NAMESPACE` file: #' #' ``` #' if (getRversion() >= "3.6.0") { #' S3method(package::generic, class) #' } #' ``` #' #' @section Usage in other packages: #' To avoid taking a dependency on vctrs, you copy the source of #' [`s3_register()`](https://github.com/r-lib/vctrs/blob/main/R/register-s3.R) #' into your own package. It is licensed under the permissive #' [unlicense](https://choosealicense.com/licenses/unlicense/) to make it #' crystal clear that we're happy for you to do this. There's no need to include #' the license or even credit us when using this function. #' #' @usage NULL #' @param generic Name of the generic in the form `pkg::generic`. #' @param class Name of the class #' @param method Optionally, the implementation of the method. By default, #' this will be found by looking for a function called `generic.class` #' in the package environment. #' #' Note that providing `method` can be dangerous if you use #' devtools. When the namespace of the method is reloaded by #' `devtools::load_all()`, the function will keep inheriting from #' the old namespace. This might cause crashes because of dangling #' `.Call()` pointers. #' @export #' @examples #' # A typical use case is to dynamically register tibble/pillar methods #' # for your class. That way you avoid creating a hard dependency on packages #' # that are not essential, while still providing finer control over #' # printing when they are used. #' #' .onLoad <- function(...) { #' s3_register("pillar::pillar_shaft", "vctrs_vctr") #' s3_register("tibble::type_sum", "vctrs_vctr") #' } #' @keywords internal # nocov start s3_register <- function(generic, class, method = NULL) { stopifnot(is.character(generic), length(generic) == 1) stopifnot(is.character(class), length(class) == 1) pieces <- strsplit(generic, "::")[[1]] stopifnot(length(pieces) == 2) package <- pieces[[1]] generic <- pieces[[2]] caller <- parent.frame() get_method_env <- function() { top <- topenv(caller) if (isNamespace(top)) { asNamespace(environmentName(top)) } else { caller } } get_method <- function(method) { if (is.null(method)) { get(paste0(generic, ".", class), envir = get_method_env()) } else { method } } register <- function(...) { envir <- asNamespace(package) # Refresh the method each time, it might have been updated by # `devtools::load_all()` method_fn <- get_method(method) stopifnot(is.function(method_fn)) # Only register if generic can be accessed if (exists(generic, envir)) { registerS3method(generic, class, method_fn, envir = envir) } else if (identical(Sys.getenv("NOT_CRAN"), "true")) { warn <- .rlang_s3_register_compat("warn") warn(c( sprintf( "Can't find generic `%s` in package %s to register S3 method.", generic, package ), "i" = "This message is only shown to developers using devtools.", "i" = sprintf( "Do you need to update %s to the latest version?", package ) )) } } # Always register hook in case package is later unloaded & reloaded setHook(packageEvent(package, "onLoad"), function(...) { register() }) # For compatibility with R < 4.0 where base isn't locked is_sealed <- function(pkg) { identical(pkg, "base") || environmentIsLocked(asNamespace(pkg)) } # Avoid registration failures during loading (pkgload or regular). # Check that environment is locked because the registering package # might be a dependency of the package that exports the generic. In # that case, the exports (and the generic) might not be populated # yet (#1225). if (isNamespaceLoaded(package) && is_sealed(package)) { register() } invisible() } .rlang_s3_register_compat <- function(fn, try_rlang = TRUE) { # Compats that behave the same independently of rlang's presence out <- switch( fn, is_installed = return(function(pkg) requireNamespace(pkg, quietly = TRUE)) ) # Only use rlang if it is fully loaded (#1482) if ( try_rlang && requireNamespace("rlang", quietly = TRUE) && environmentIsLocked(asNamespace("rlang")) ) { switch( fn, is_interactive = return(rlang::is_interactive) ) # Make sure rlang knows about "x" and "i" bullets if (utils::packageVersion("rlang") >= "0.4.2") { switch( fn, abort = return(rlang::abort), warn = return((rlang::warn)), inform = return(rlang::inform) ) } } # Fall back to base compats is_interactive_compat <- function() { opt <- getOption("rlang_interactive") if (!is.null(opt)) { opt } else { interactive() } } format_msg <- function(x) paste(x, collapse = "\n") switch( fn, is_interactive = return(is_interactive_compat), abort = return(function(msg) stop(format_msg(msg), call. = FALSE)), warn = return(function(msg) warning(format_msg(msg), call. = FALSE)), inform = return(function(msg) message(format_msg(msg))) ) stop(sprintf("Internal error in rlang shims: Unknown function `%s()`.", fn)) } on_load({ s3_register <- replace_from("s3_register", "rlang") }) knitr_defer <- function(expr, env = caller_env()) { roxy_caller <- detect(sys.frames(), env_inherits, ns_env("knitr")) if (is_null(roxy_caller)) { abort("Internal error: can't find knitr on the stack.") } blast( withr::defer(!!substitute(expr), !!roxy_caller), env ) } blast <- function(expr, env = caller_env()) { eval_bare(enexpr(expr), env) } knitr_local_registration <- function(generic, class, env = caller_env()) { stopifnot(is.character(generic), length(generic) == 1) stopifnot(is.character(class), length(class) == 1) pieces <- strsplit(generic, "::")[[1]] stopifnot(length(pieces) == 2) package <- pieces[[1]] generic <- pieces[[2]] name <- paste0(generic, ".", class) method <- env_get(env, name) old <- env_bind(global_env(), !!name := method) knitr_defer(env_bind(global_env(), !!!old)) } # nocov end vctrs/R/fields.R0000644000176200001440000000164314713505651013220 0ustar liggesusers#' Tools for accessing the fields of a record. #' #' A [rcrd] behaves like a vector, so `length()`, `names()`, and `$` can #' not provide access to the fields of the underlying list. These helpers do: #' `fields()` is equivalent to `names()`; `n_fields()` is equivalent to #' `length()`; `field()` is equivalent to `$`. #' #' @param x A [rcrd], i.e. a list of equal length vectors with unique names. #' @keywords internal #' @export #' @examples #' x <- new_rcrd(list(x = 1:3, y = 3:1, z = letters[1:3])) #' n_fields(x) #' fields(x) #' #' field(x, "y") #' field(x, "y") <- runif(3) #' field(x, "y") fields <- function(x) { .Call(vctrs_fields, x) } #' @export #' @rdname fields n_fields <- function(x) { .Call(vctrs_n_fields, x) } #' @export #' @rdname fields field <- function(x, i) { .Call(vctrs_field_get, x, i) } #' @export #' @rdname fields `field<-` <- function(x, i, value) { .Call(vctrs_field_set, x, i, value) } vctrs/R/type-tibble.R0000644000176200001440000000221215065005761014161 0ustar liggesusers#' @rdname df_ptype2 #' @export tib_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) { .Call( ffi_tib_ptype2, x = x, y = y, x_arg = x_arg, y_arg = y_arg, frame = environment() ) } #' @rdname df_ptype2 #' @export tib_cast <- function(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) { .Call( ffi_tib_cast, x = x, to = to, x_arg = x_arg, to_arg = to_arg, frame = environment() ) } df_as_tibble <- function(df) { class(df) <- c("tbl_df", "tbl", "data.frame") df } # Conditionally registered in .onLoad() vec_ptype2_tbl_df_tbl_df <- function(x, y, ...) { vec_ptype2_dispatch_native(x, y, ...) } vec_ptype2_tbl_df_data.frame <- function(x, y, ...) { vec_ptype2_dispatch_native(x, y, ...) } vec_ptype2_data.frame_tbl_df <- function(x, y, ...) { vec_ptype2_dispatch_native(x, y, ...) } vec_cast_tbl_df_tbl_df <- function(x, to, ...) { vec_cast_dispatch_native(x, to, ...) } vec_cast_data.frame_tbl_df <- function(x, to, ...) { vec_cast_dispatch_native(x, to, ...) } vec_cast_tbl_df_data.frame <- function(x, to, ...) { vec_cast_dispatch_native(x, to, ...) } vctrs/R/type-vctr.R0000644000176200001440000004277015065005761013713 0ustar liggesusers#' vctr (vector) S3 class #' #' @description #' This abstract class provides a set of useful default methods that makes it #' considerably easier to get started with a new S3 vector class. See #' `vignette("s3-vector")` to learn how to use it to create your own S3 #' vector classes. #' #' @details #' List vctrs are special cases. When created through `new_vctr()`, the #' resulting list vctr should always be recognized as a list by #' `obj_is_list()`. Because of this, if `inherit_base_type` is `FALSE` #' an error is thrown. #' #' @section Base methods: #' The vctr class provides methods for many base generics using a smaller #' set of generics defined by this package. Generally, you should think #' carefully before overriding any of the methods that vctrs implements for #' you as they've been carefully planned to be internally consistent. #' #' * `[[` and `[` use `NextMethod()` dispatch to the underlying base function, #' then restore attributes with `vec_restore()`. #' `rep()` and `length<-` work similarly. #' #' * `[[<-` and `[<-` cast `value` to same type as `x`, then call #' `NextMethod()`. #' #' * `as.logical()`, `as.integer()`, `as.numeric()`, `as.character()`, #' `as.Date()` and `as.POSIXct()` methods call `vec_cast()`. #' The `as.list()` method calls `[[` repeatedly, and the `as.data.frame()` #' method uses a standard technique to wrap a vector in a data frame. #' #' * `as.factor()`, `as.ordered()` and `as.difftime()` are not generic functions #' in base R, but have been reimplemented as generics in the `generics` #' package. `vctrs` extends these and calls `vec_cast()`. To inherit this #' behaviour in a package, import and re-export the generic of interest #' from `generics`. #' #' * `==`, `!=`, `unique()`, `anyDuplicated()`, and `is.na()` use #' [vec_proxy()]. #' #' * `<`, `<=`, `>=`, `>`, `min()`, `max()`, `range()`, `median()`, #' `quantile()`, and `xtfrm()` methods use [vec_proxy_compare()]. #' #' * `+`, `-`, `/`, `*`, `^`, `%%`, `%/%`, `!`, `&`, and `|` operators #' use [vec_arith()]. #' #' * Mathematical operations including the Summary group generics (`prod()`, #' `sum()`, `any()`, `all()`), the Math group generics (`abs()`, `sign()`, #' etc), `mean()`, `is.nan()`, `is.finite()`, and `is.infinite()` #' use [vec_math()]. #' #' * `dims()`, `dims<-`, `dimnames()`, `dimnames<-`, `levels()`, and #' `levels<-` methods throw errors. #' #' @param .data Foundation of class. Must be a vector #' @param ... Name-value pairs defining attributes #' @param class Name of subclass. #' @param inherit_base_type `r lifecycle::badge("experimental")` #' A single logical, or `NULL`. Does this class extend the base type of #' `.data`? i.e. does the resulting object extend the behaviour of the #' underlying type? Defaults to `FALSE` for all types except lists, which #' are required to inherit from the base type. #' @export #' @keywords internal #' @aliases vctr new_vctr <- function( .data, ..., class = character(), inherit_base_type = NULL ) { if (!is_vector(.data)) { abort("`.data` must be a vector type.") } if (is_list(.data)) { if (is.data.frame(.data)) { abort("`.data` can't be a data frame.") } if (is.null(inherit_base_type)) { inherit_base_type <- TRUE } else if (is_false(inherit_base_type)) { abort("List `.data` must inherit from the base type.") } } # Default to `FALSE` in all cases except lists if (is.null(inherit_base_type)) { inherit_base_type <- FALSE } names <- names(.data) names <- names_repair_missing(names) class <- c(class, "vctrs_vctr", if (inherit_base_type) typeof(.data)) attrib <- list(names = names, ..., class = class) vec_set_attributes(.data, attrib) } names_repair_missing <- function(x) { if (is.null(x)) { return(x) } if (vec_any_missing(x)) { # We never want to allow `NA_character_` names to slip through, but # erroring on them has caused issues. Instead, we repair them to the # empty string (#784). missing <- vec_detect_missing(x) x <- vec_assign(x, missing, "") } x } #' @export vec_proxy.vctrs_vctr <- function(x, ...) { if (is_list(x)) { unclass(x) } else { x } } #' @export vec_restore.vctrs_vctr <- function(x, to, ..., i = NULL) { if (typeof(x) != typeof(to)) { stop_incompatible_cast(x, to, x_arg = "", to_arg = "") } NextMethod() } #' @method vec_cast vctrs_vctr #' @export vec_cast.vctrs_vctr <- function(x, to, ...) { UseMethod("vec_cast.vctrs_vctr") } vctr_cast <- function( x, to, ..., x_arg = "", to_arg = "", call = caller_env() ) { # These are not strictly necessary, but make bootstrapping a new class # a bit simpler if (is.object(x)) { if (is_same_type(x, to)) { x } else { stop_incompatible_cast( x, to, x_arg = x_arg, to_arg = to_arg, call = call ) } } else { # FIXME: `vec_restore()` should only be called on proxies vec_restore(x, to) } } #' @export c.vctrs_vctr <- function(..., recursive = FALSE, use.names = TRUE) { if (!is_false(recursive)) { abort("`recursive` must be `FALSE` when concatenating vctrs classes.") } if (!is_true(use.names)) { abort("`use.names` must be `TRUE` when concatenating vctrs classes.") } vec_c(...) } # Printing ---------------------------------------------------------------- #' @export print.vctrs_vctr <- function(x, ...) { obj_print(x, ...) invisible(x) } #' @export str.vctrs_vctr <- function(object, ...) { obj_str(object, ...) } #' @export format.vctrs_vctr <- function(x, ...) { format(vec_data(x), ...) } # Subsetting -------------------------------------------------------------- #' @export `[.vctrs_vctr` <- function(x, i, ...) { vec_index(x, i, ...) } #' @export `[[.vctrs_vctr` <- function(x, i, ...) { if (is.list(x)) { NextMethod() } else { vec_restore(NextMethod(), x) } } #' @export `$.vctrs_vctr` <- function(x, i) { if (is.list(x)) { NextMethod() } else { vec_restore(NextMethod(), x) } } #' @export rep.vctrs_vctr <- function(x, ...) { vec_restore(NextMethod(), x) } #' @export `length<-.vctrs_vctr` <- function(x, value) { vec_restore(NextMethod(), x) } #' @export diff.vctrs_vctr <- function(x, lag = 1L, differences = 1L, ...) { stopifnot(length(lag) == 1L, lag >= 1L) stopifnot(length(differences) == 1L, differences >= 1L) n <- vec_size(x) if (lag * differences >= n) { return(vec_slice(x, 0L)) } out <- x for (i in seq_len(differences)) { n <- vec_size(out) lhs <- (1L + lag):n rhs <- 1L:(n - lag) out <- vec_slice(out, lhs) - vec_slice(out, rhs) } out } # Modification ------------------------------------------------------------- #' @export `[[<-.vctrs_vctr` <- function(x, ..., value) { if (!is.list(x)) { value <- vec_cast(value, x) } NextMethod() } #' @export `$<-.vctrs_vctr` <- function(x, i, value) { if (is.list(x)) { NextMethod() } else { # Default behaviour is to cast LHS to a list abort("$ operator is invalid for atomic vectors.") } } #' @export `[<-.vctrs_vctr` <- function(x, i, value) { value <- vec_cast(value, x) NextMethod() } #' @export `names<-.vctrs_vctr` <- function(x, value) { if (length(value) != 0 && length(value) != length(x)) { abort("`names()` must be the same length as x.") } value <- names_repair_missing(value) NextMethod() } # Coercion ---------------------------------------------------------------- #' @export as.logical.vctrs_vctr <- function(x, ...) { vec_cast(x, logical()) } #' @export as.integer.vctrs_vctr <- function(x, ...) { vec_cast(x, integer()) } #' @export as.double.vctrs_vctr <- function(x, ...) { vec_cast(x, double()) } #' @export as.character.vctrs_vctr <- function(x, ...) { vec_cast(x, character()) } #' @export as.list.vctrs_vctr <- function(x, ...) { out <- vec_chop(x) if (obj_is_list(x)) { out <- lapply(out, `[[`, 1) } out } #' @export as.Date.vctrs_vctr <- function(x, ...) { vec_cast(x, new_date()) } #' @export as.POSIXct.vctrs_vctr <- function(x, tz = "", ...) { vec_cast(x, new_datetime(tzone = tz)) } #' @export as.POSIXlt.vctrs_vctr <- function(x, tz = "", ...) { to <- as.POSIXlt(new_datetime(), tz = tz) vec_cast(x, to) } # Work around inconsistencies in as.data.frame() as.data.frame2 <- function(x) { # Unclass to avoid dispatching on `as.data.frame()` methods that break size # invariants, like `as.data.frame.table()` (#913). This also prevents infinite # recursion with shaped vctrs in `as.data.frame.vctrs_vctr()`. x <- unclass(x) out <- as.data.frame(x) if (vec_dim_n(x) == 1) { # 1D arrays are not stripped from their dimensions out[[1]] <- as.vector(out[[1]]) # 1D arrays are auto-labelled with substitute() names(out) <- "V1" } out } #' @export as.data.frame.vctrs_vctr <- function( x, row.names = NULL, optional = FALSE, ..., nm = paste(deparse(substitute(x), width.cutoff = 500L), collapse = " ") ) { force(nm) if (has_dim(x)) { return(as.data.frame2(x)) } cols <- list(x) if (!optional) { names(cols) <- nm } new_data_frame(cols, n = vec_size(x)) } # Dynamically registered in .onLoad() as.factor.vctrs_vctr <- function(x, levels = character(), ...) { vec_cast(x, new_factor(levels = levels)) } # Dynamically registered in .onLoad() as.ordered.vctrs_vctr <- function(x, levels = character(), ...) { vec_cast(x, new_ordered(levels = levels)) } # Dynamically registered in .onLoad() as.difftime.vctrs_vctr <- function(x, units = "secs", ...) { vec_cast(x, new_duration(units = units)) } # Equality ---------------------------------------------------------------- #' @export `==.vctrs_vctr` <- function(e1, e2) { vec_equal(e1, e2) } #' @export `!=.vctrs_vctr` <- function(e1, e2) { !vec_equal(e1, e2) } #' @export is.na.vctrs_vctr <- function(x) { vec_detect_missing(x) } #' @importFrom stats na.fail #' @export na.fail.vctrs_vctr <- function(object, ...) { if (vec_any_missing(object)) { # Return the same error as `na.fail.default()` abort("missing values in object") } object } #' @importFrom stats na.omit #' @export na.omit.vctrs_vctr <- function(object, ...) { na_remove(object, "omit") } #' @importFrom stats na.exclude #' @export na.exclude.vctrs_vctr <- function(object, ...) { na_remove(object, "exclude") } na_remove <- function(x, type) { # The only difference between `na.omit()` and `na.exclude()` is the class # of the `na.action` attribute if (!vec_any_missing(x)) { return(x) } # `na.omit/exclude()` attach the locations of the omitted values to the result missing <- vec_detect_missing(x) loc <- which(missing) names <- vec_names(x) if (!is_null(names)) { # `na.omit/exclude()` retain the original names, if applicable names <- vec_slice(names, loc) loc <- vec_set_names(loc, names) } attr(loc, "class") <- type out <- vec_slice(x, !missing) attr(out, "na.action") <- loc out } #' @export anyNA.vctrs_vctr <- function(x, recursive = FALSE) { if (recursive && obj_is_list(x)) { any(map_lgl(x, anyNA, recursive = recursive)) } else { any(is.na(x)) } } #' @export unique.vctrs_vctr <- function(x, incomparables = FALSE, ...) { vec_unique(x) } #' @export duplicated.vctrs_vctr <- function(x, incomparables = FALSE, ...) { vec_duplicate_id(x) != seq_along(x) } #' @export anyDuplicated.vctrs_vctr <- function(x, incomparables = FALSE, ...) { vec_duplicate_any(x) } # Comparison ---------------------------------------------------------------- #' @export `<=.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) <= 0 } #' @export `<.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) < 0 } #' @export `>=.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) >= 0 } #' @export `>.vctrs_vctr` <- function(e1, e2) { vec_compare(e1, e2) > 0 } #' @export xtfrm.vctrs_vctr <- function(x) { proxy <- vec_proxy_order(x) type <- typeof(proxy) if (type == "logical") { proxy <- unstructure(proxy) proxy <- as.integer(proxy) return(proxy) } if (type %in% c("integer", "double")) { proxy <- unstructure(proxy) return(proxy) } vec_rank(proxy, ties = "dense", incomplete = "na") } #' @importFrom stats median #' @export median.vctrs_vctr <- function(x, ..., na.rm = FALSE) { # nocov start stop_unimplemented(x, "median") # nocov end } #' @importFrom stats quantile #' @export quantile.vctrs_vctr <- function(x, ..., type = 1, na.rm = FALSE) { # nocov start stop_unimplemented(x, "quantile") # nocov end } vec_cast_or_na <- function(x, to, ...) { tryCatch( vctrs_error_incompatible_type = function(...) vec_init(to, length(x)), vec_cast(x, to) ) } #' @export min.vctrs_vctr <- function(x, ..., na.rm = FALSE) { if (vec_is_empty(x)) { return(vec_cast_or_na(Inf, x)) } # TODO: implement to do vec_arg_min() rank <- xtfrm(x) if (isTRUE(na.rm)) { idx <- which.min(rank) if (vec_is_empty(idx)) { return(vec_cast_or_na(Inf, x)) } } else { idx <- which(vec_equal(rank, min(rank), na_equal = TRUE)) } x[[idx[[1]]]] } #' @export max.vctrs_vctr <- function(x, ..., na.rm = FALSE) { if (vec_is_empty(x)) { return(vec_cast_or_na(-Inf, x)) } # TODO: implement to do vec_arg_max() rank <- xtfrm(x) if (isTRUE(na.rm)) { idx <- which.max(rank) if (vec_is_empty(idx)) { return(vec_cast_or_na(-Inf, x)) } } else { idx <- which(vec_equal(rank, max(rank), na_equal = TRUE)) } x[[idx[[1]]]] } #' @export range.vctrs_vctr <- function(x, ..., na.rm = FALSE) { if (vec_is_empty(x)) { return(vec_cast_or_na(c(Inf, -Inf), x)) } # Inline `min()` / `max()` to only call `xtfrm()` once rank <- xtfrm(x) if (isTRUE(na.rm)) { idx_min <- which.min(rank) idx_max <- which.max(rank) if (vec_is_empty(idx_min) && vec_is_empty(idx_max)) { return(vec_cast_or_na(c(Inf, -Inf), x)) } } else { idx_min <- which(vec_equal(rank, min(rank), na_equal = TRUE)) idx_max <- which(vec_equal(rank, max(rank), na_equal = TRUE)) } c(x[[idx_min[[1]]]], x[[idx_max[[1]]]]) } # Numeric ----------------------------------------------------------------- #' @export Math.vctrs_vctr <- function(x, ...) { vec_math(.Generic, x, ...) } #' @export Summary.vctrs_vctr <- function(..., na.rm = FALSE) { vec_math(.Generic, vec_c(...), na.rm = na.rm) } #' @export mean.vctrs_vctr <- function(x, ..., na.rm = FALSE) { vec_math("mean", x, na.rm = na.rm) } #' @export is.finite.vctrs_vctr <- function(x) { vec_math("is.finite", x) } #' @export is.infinite.vctrs_vctr <- function(x) { vec_math("is.infinite", x) } #' @export is.nan.vctrs_vctr <- function(x) { vec_math("is.nan", x) } # Arithmetic -------------------------------------------------------------- #' @export `+.vctrs_vctr` <- function(e1, e2) { if (missing(e2)) { vec_arith("+", e1, MISSING()) } else { vec_arith("+", e1, e2) } } #' @export `-.vctrs_vctr` <- function(e1, e2) { if (missing(e2)) { vec_arith("-", e1, MISSING()) } else { vec_arith("-", e1, e2) } } #' @export `*.vctrs_vctr` <- function(e1, e2) { vec_arith("*", e1, e2) } #' @export `/.vctrs_vctr` <- function(e1, e2) { vec_arith("/", e1, e2) } #' @export `^.vctrs_vctr` <- function(e1, e2) { vec_arith("^", e1, e2) } #' @export `%%.vctrs_vctr` <- function(e1, e2) { vec_arith("%%", e1, e2) } #' @export `%/%.vctrs_vctr` <- function(e1, e2) { vec_arith("%/%", e1, e2) } #' @export `!.vctrs_vctr` <- function(x) { vec_arith("!", x, MISSING()) } #' @export `&.vctrs_vctr` <- function(e1, e2) { vec_arith("&", e1, e2) } #' @export `|.vctrs_vctr` <- function(e1, e2) { vec_arith("|", e1, e2) } # Unimplemented ------------------------------------------------------------ #' @export summary.vctrs_vctr <- function(object, ...) { # nocov start stop_unimplemented(object, "summary") # nocov end } # Unsupported -------------------------------------------------------------- #' @export `dim<-.vctrs_vctr` <- function(x, value) { stop_unsupported(x, "dim<-") } #' @export `dimnames<-.vctrs_vctr` <- function(x, value) { stop_unsupported(x, "dimnames<-") } #' @export levels.vctrs_vctr <- function(x) { NULL } #' @export `levels<-.vctrs_vctr` <- function(x, value) { stop_unsupported(x, "levels<-") } #' @export `t.vctrs_vctr` <- function(x) { stop_unsupported(x, "t") } #' @export `is.na<-.vctrs_vctr` <- function(x, value) { vec_assign(x, value, vec_init(x)) } # Helpers ----------------------------------------------------------------- # This simple class is used for testing as defining methods inside # a test does not work (because the lexical scope is lost) # nocov start new_hidden <- function(x = double()) { stopifnot(is.numeric(x)) new_vctr(vec_cast(x, double()), class = "hidden", inherit_base_type = FALSE) } #' @export format.hidden <- function(x, ...) rep("xxx", length(x)) local_hidden <- function(frame = caller_env()) { local_bindings( .env = global_env(), .frame = frame, vec_ptype2.hidden.hidden = function(x, y, ...) new_hidden(), vec_ptype2.hidden.double = function(x, y, ...) new_hidden(), vec_ptype2.double.hidden = function(x, y, ...) new_hidden(), vec_ptype2.hidden.logical = function(x, y, ...) new_hidden(), vec_ptype2.logical.hidden = function(x, y, ...) new_hidden(), vec_cast.hidden.hidden = function(x, to, ...) x, vec_cast.hidden.double = function(x, to, ...) new_hidden(vec_data(x)), vec_cast.double.hidden = function(x, to, ...) vec_data(x), vec_cast.hidden.logical = function(x, to, ...) new_hidden(as.double(x)), vec_cast.logical.hidden = function(x, to, ...) as.logical(vec_data(x)) ) } # nocov end vctrs/R/type-integer64.R0000644000176200001440000000752315065005761014541 0ustar liggesusers#' @export vec_proxy_equal.integer64 <- function(x, ...) { if (is.array(x)) { # Stopgap to convert arrays to data frames, then run them through # `vec_proxy_equal()` again, which will proxy each column x <- as_data_frame_from_array(x) x <- vec_proxy_equal(x) return(x) } integer64_proxy(x) } # Print ------------------------------------------------------------------- #' 64 bit integers #' #' A `integer64` is a 64 bits integer vector, implemented in the `bit64` package. #' #' These functions help the `integer64` class from `bit64` in to #' the vctrs type system by providing coercion functions #' and casting functions. #' #' @keywords internal #' @rdname int64 #' @export vec_ptype_full.integer64 <- function(x, ...) { "integer64" } #' @rdname int64 #' @export vec_ptype_abbr.integer64 <- function(x, ...) { "int64" } # Coerce ------------------------------------------------------------------ #' @export #' @rdname int64 #' @export vec_ptype2.integer64 #' @method vec_ptype2 integer64 vec_ptype2.integer64 <- function(x, y, ...) { UseMethod("vec_ptype2.integer64") } #' @method vec_ptype2.integer64 integer64 #' @export vec_ptype2.integer64.integer64 <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.integer64 integer #' @export vec_ptype2.integer64.integer <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.integer integer64 #' @export vec_ptype2.integer.integer64 <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.integer64 logical #' @export vec_ptype2.integer64.logical <- function(x, y, ...) bit64::integer64() #' @method vec_ptype2.logical integer64 #' @export vec_ptype2.logical.integer64 <- function(x, y, ...) bit64::integer64() # Cast -------------------------------------------------------------------- #' @export #' @rdname int64 #' @export vec_cast.integer64 #' @method vec_cast integer64 vec_cast.integer64 <- function(x, to, ...) { UseMethod("vec_cast.integer64") } #' @export #' @method vec_cast.integer64 integer64 vec_cast.integer64.integer64 <- function(x, to, ...) { x } #' @export #' @method vec_cast.integer64 integer vec_cast.integer64.integer <- function(x, to, ...) { bit64::as.integer64(x) } #' @export #' @method vec_cast.integer integer64 vec_cast.integer.integer64 <- function(x, to, ...) { as.integer(x) } #' @export #' @method vec_cast.integer64 logical vec_cast.integer64.logical <- function(x, to, ...) { bit64::as.integer64(x) } #' @export #' @method vec_cast.logical integer64 vec_cast.logical.integer64 <- function(x, to, ...) { as.logical(x) } #' @export #' @method vec_cast.integer64 double vec_cast.integer64.double <- function(x, to, ...) { bit64::as.integer64(x) } #' @export #' @method vec_cast.double integer64 vec_cast.double.integer64 <- function(x, to, ...) { as.double(x) } # ------------------------------------------------------------------------------ integer64_proxy <- function(x) { .Call(vctrs_integer64_proxy, x) } integer64_restore <- function(x) { .Call(vctrs_integer64_restore, x) } # ------------------------------------------------------------------------------ as_data_frame_from_array <- function(x) { # Alternative to `as.data.frame.array()` that always strips 1-D arrays # of their dimensions. Unlike `as.data.frame2()`, it doesn't unclass the # input, which means that each column retains its original class. # This function doesn't attempt to keep the names of `x` at all. dim <- dim(x) n_dim <- length(dim) if (n_dim == 1) { # Treat 1-D arrays as 1 column matrices dim(x) <- c(dim, 1L) n_dim <- 2L } n_row <- dim[[1L]] n_col <- prod(dim[-1L]) n_col_seq <- seq_len(n_col) dim(x) <- c(n_row, n_col) out <- vector("list", n_col) names(out) <- as_unique_names(rep("", n_col), quiet = TRUE) for (i in n_col_seq) { out[[i]] <- x[, i, drop = TRUE] } new_data_frame(out, n = n_row) } vctrs/R/type-data-table.R0000644000176200001440000000171714713505651014731 0ustar liggesusersdelayedAssign("as.data.table", { if (is_installed("data.table")) { env_get(ns_env("data.table"), "as.data.table") } else { function(...) abort("`data.table` must be installed.") } }) dt_ptype2 <- function(x, y, ...) { as.data.table(df_ptype2(x, y, ...)) } dt_cast <- function(x, to, ...) { as.data.table(df_cast(x, to, ...)) } #' @export vec_ptype2.data.table.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_ptype2.data.table.data.frame <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_ptype2.data.frame.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_cast.data.table.data.table <- function(x, to, ...) { dt_cast(x, to, ...) } #' @export vec_cast.data.table.data.frame <- function(x, to, ...) { dt_cast(x, to, ...) } #' @export vec_cast.data.frame.data.table <- function(x, to, ...) { df_cast(x, to, ...) } #' @export vec_ptype_abbr.data.table <- function(x, ...) { "dt" } vctrs/R/rep.R0000644000176200001440000000615315065005761012537 0ustar liggesusers#' Repeat a vector #' #' @description #' - `vec_rep()` repeats an entire vector a set number of `times`. #' #' - `vec_rep_each()` repeats each element of a vector a set number of `times`. #' #' - `vec_unrep()` compresses a vector with repeated values. The repeated values #' are returned as a `key` alongside the number of `times` each key is #' repeated. #' #' @details #' Using `vec_unrep()` and `vec_rep_each()` together is similar to using #' [base::rle()] and [base::inverse.rle()]. The following invariant shows #' the relationship between the two functions: #' #' ``` #' compressed <- vec_unrep(x) #' identical(x, vec_rep_each(compressed$key, compressed$times)) #' ``` #' #' There are two main differences between `vec_unrep()` and [base::rle()]: #' #' - `vec_unrep()` treats adjacent missing values as equivalent, while `rle()` #' treats them as different values. #' #' - `vec_unrep()` works along the size of `x`, while `rle()` works along its #' length. This means that `vec_unrep()` works on data frames by compressing #' repeated rows. #' #' @inheritParams rlang::args_error_context #' @inheritParams rlang::args_dots_empty #' @param x A vector. #' @param times #' For `vec_rep()`, a single integer for the number of times to repeat #' the entire vector. #' #' For `vec_rep_each()`, an integer vector of the number of times to repeat #' each element of `x`. `times` will be [recycled][theory-faq-recycling] to #' the size of `x`. #' @param x_arg,times_arg Argument names for errors. #' #' @return #' For `vec_rep()`, a vector the same type as `x` with size #' `vec_size(x) * times`. #' #' For `vec_rep_each()`, a vector the same type as `x` with size #' `sum(vec_recycle(times, vec_size(x)))`. #' #' For `vec_unrep()`, a data frame with two columns, `key` and `times`. `key` #' is a vector with the same type as `x`, and `times` is an integer vector. #' #' @section Dependencies: #' - [vec_slice()] #' #' @name vec-rep #' @examples #' # Repeat the entire vector #' vec_rep(1:2, 3) #' #' # Repeat within each vector #' vec_rep_each(1:2, 3) #' x <- vec_rep_each(1:2, c(3, 4)) #' x #' #' # After using `vec_rep_each()`, you can recover the original vector #' # with `vec_unrep()` #' vec_unrep(x) #' #' df <- data.frame(x = 1:2, y = 3:4) #' #' # `rep()` repeats columns of data frames, and returns lists #' rep(df, each = 2) #' #' # `vec_rep()` and `vec_rep_each()` repeat rows, and return data frames #' vec_rep(df, 2) #' vec_rep_each(df, 2) #' #' # `rle()` treats adjacent missing values as different #' y <- c(1, NA, NA, 2) #' rle(y) #' #' # `vec_unrep()` treats them as equivalent #' vec_unrep(y) NULL #' @rdname vec-rep #' @export vec_rep <- function( x, times, ..., error_call = current_env(), x_arg = "x", times_arg = "times" ) { check_dots_empty0(...) .Call(ffi_vec_rep, x, times, environment()) } #' @rdname vec-rep #' @export vec_rep_each <- function( x, times, ..., error_call = current_env(), x_arg = "x", times_arg = "times" ) { check_dots_empty0(...) .Call(ffi_vec_rep_each, x, times, environment()) } #' @rdname vec-rep #' @export vec_unrep <- function(x) { .Call(ffi_vec_unrep, x, environment()) } vctrs/R/case-when.R0000644000176200001440000001356015072256373013630 0ustar liggesusers#' Recode and replace using logical conditions #' #' @description #' #' - `vec_case_when()` constructs an entirely new vector by recoding the `TRUE` #' `conditions` to their corresponding `values`. If there are locations not #' matched by `conditions`, then they are recoded to the `default` value. #' #' - `vec_replace_when()` updates an existing vector by replacing the values #' from `x` matched by the `TRUE` `conditions` with their corresponding #' `values`. In this case, each element of `values` must have the same type as #' `x` and locations not matched by `conditions` retain their original `x` #' value. #' #' `vec_case_when()` is often thought of as a way to vectorize multiple if-else #' statements, and is an R equivalent of the SQL "searched" `CASE WHEN` #' statement. #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' #' @param x A vector. #' #' @param conditions A list of logical condition vectors. #' #' For `vec_case_when()`, each vector should be the same size. #' #' For `vec_replace_when()`, each vector should be the same size as `x`. #' #' Where a value in `conditions` is `TRUE`, the corresponding value in #' `values` will be assigned to the result. #' #' @param values A list of vectors. #' #' For `vec_case_when()`, each vector should be size 1 or the size implied by #' `conditions`. The common type of `values` and `default` determine the #' output type, unless overridden by `ptype`. #' #' For `vec_replace_when()`, each vector should be size 1 or the same size #' as `x`. Each vector will be cast to the type of `x`. #' #' @param default Default value to use when `conditions` does not match every #' location in the output. #' #' By default, a missing value is used as the default value. #' #' If supplied, `default` must be size 1 or the size implied by `conditions`. #' #' Can only be set when `unmatched = "default"`. #' #' @param unmatched Handling of unmatched locations. #' #' One of: #' #' - `"default"` to use `default` in unmatched locations. #' #' - `"error"` to error when there are unmatched locations. #' #' @param ptype An optional override for the output type, which is usually #' computed as the common type of `values` and `default`. #' #' @param size An optional override for the output size, which is usually #' computed as the size of the first element of `conditions`. #' #' Only useful for requiring a fixed size when `conditions` is an empty list. #' #' @param x_arg,conditions_arg,values_arg,default_arg Argument names used in #' error messages. #' #' @returns #' A vector. #' #' - For `vec_case_when()`, the type of the output is computed as the common #' type of `values` and `default`, unless overridden by `ptype`. The names of #' the output come from the names of `values` and `default`. The size of the #' output comes from the implied size from `conditions`, unless overridden by #' `size`. #' #' - For `vec_replace_when()`, the type of the output will have the same type as #' `x`. The names of the output will be the same as the names of `x`. The size #' of the output will be the same size as `x`. #' #' @name vec-case-and-replace #' #' @examples #' # Note how the first `TRUE` is used in the output. #' # Also note how the `NA` falls through to `default`. #' x <- seq(-2L, 2L, by = 1L) #' x <- c(x, NA) #' conditions <- list( #' x < 0, #' x < 1 #' ) #' values <- list( #' "<0", #' "<1" #' ) #' vec_case_when( #' conditions, #' values, #' default = "other" #' ) #' #' # Missing values need to be handled with their own case #' # if you want them to have a special value #' conditions <- list( #' x < 0, #' x < 1, #' is.na(x) #' ) #' values <- list( #' "<0", #' "<1", #' NA #' ) #' vec_case_when( #' conditions, #' values, #' default = "other" #' ) #' #' # Both `values` and `default` are vectorized #' values <- list( #' x * 5, #' x * 10, #' NA #' ) #' vec_case_when( #' conditions, #' values, #' default = x * 100 #' ) #' #' # Use `vec_replace_when()` if you need to update `x`, retaining #' # all previous values in locations that you don't match #' conditions <- list( #' x < 0, #' x < 1 #' ) #' values <- list( #' 0, #' 1 #' ) #' out <- vec_replace_when( #' x, #' conditions, #' values #' ) #' out #' #' # Note how `vec_replace_when()` is type stable on `x`, we retain the #' # integer type here even though `values` contained doubles #' typeof(out) #' #' # `vec_case_when()` creates a new vector, so names come from `values` #' # and `default`. `vec_replace_when()` modifies an existing vector, so #' # names come from `x` no matter what, just like `[<-` and `base::replace()` #' x <- c(a = 1, b = 2, c = 3) #' conditions <- list(x == 1, x == 2) #' values <- list(c(x = 0), c(y = -1)) #' vec_case_when(conditions, values) #' vec_replace_when(x, conditions, values) #' #' # If you want to enforce that you've covered all of the locations in your #' # `conditions`, use `unmatched = "error"` rather than providing a `default` #' x <- c(0, 1, 2) #' conditions <- list(x == 1, x == 2) #' values <- list("a", "b") #' try(vec_case_when(conditions, values, unmatched = "error")) NULL #' @rdname vec-case-and-replace #' @export vec_case_when <- function( conditions, values, ..., default = NULL, unmatched = "default", ptype = NULL, size = NULL, conditions_arg = "conditions", values_arg = "values", default_arg = "default", error_call = current_env() ) { check_dots_empty0(...) .Call( ffi_vec_case_when, conditions, values, default, unmatched, ptype, size, environment() ) } #' @rdname vec-case-and-replace #' @export vec_replace_when <- function( x, conditions, values, ..., x_arg = "x", conditions_arg = "conditions", values_arg = "values", error_call = current_env() ) { check_dots_empty0(...) .Call( ffi_vec_replace_when, x, conditions, values, environment() ) } vctrs/R/type-list-of.R0000644000176200001440000003332015120272011014263 0ustar liggesusers#' Construct a list of homogenous vectors #' #' @description #' A `list_of` is a restricted version of a list that adds constraints on the #' list elements. #' #' - `list_of(.ptype = )` restricts the _type_ of each element. #' #' - `.ptype = ` asserts that each element has type ``. #' #' - `.ptype = NULL` infers the type from the original set of elements, or #' errors if no vector inputs were provided. #' #' - `.ptype = rlang::zap()` doesn't restrict the type. #' #' - `list_of(.size = )` restricts the _size_ of each element. #' #' - `.size = ` asserts that each element has size ``. #' #' - `.size = NULL` infers the size from the original set of elements, or #' errors if no vector inputs were provided. #' #' - `.size = rlang::zap()` doesn't restrict the size. #' #' The default behavior infers the element type and doesn't restrict the size. #' #' Both `.ptype` and `.size` may be specified to restrict both the size and #' type of the list elements. You cannot set both of these to `rlang::zap()`, #' as that would be the same as a bare `list()` with no restrictions. #' #' Modifying a `list_of` with `$<-`, `[<-`, and `[[<-` preserves the constraints #' by coercing and recycling all input items. #' #' @param ... For `list_of()`, vectors to include in the list. #' #' For other methods, these dots must be empty. #' #' @param x For `as_list_of()`, a vector to be coerced to list_of. #' #' For `is_list_of()`, an object to test. #' #' @param y,to Arguments to `vec_ptype2()` and `vec_cast()`. #' #' @param .ptype The type to restrict each list element to. One of: #' #' - A prototype like `integer()` or `double()`. #' #' - `NULL`, to infer the type from `...`. If no vector inputs are provided, #' an error is thrown. #' #' - [rlang::zap()] to avoid placing any restrictions on the type. #' #' @param .size The size to restrict each list element to. One of: #' #' - A scalar integer size. #' #' - `NULL`, to infer the size from `...`. If no vector inputs are provided, #' an error is thrown. #' #' - [rlang::zap()] to avoid placing any restrictions on the size. #' #' @export #' @examples #' # Restrict the type, but not the size #' x <- list_of(1:3, 5:6, 10:15) #' x #' #' if (requireNamespace("tibble", quietly = TRUE)) { #' # As a column in a tibble #' tibble::tibble(x = x) #' } #' #' # Coercion happens during assignment #' x[1] <- list(4) #' typeof(x[[1]]) #' #' try(x[1] <- list(4.5)) #' #' # Restrict the size, but not the type #' x <- list_of(1, 2:3, .ptype = rlang::zap(), .size = 2) #' x #' #' # Recycling happens during assignment #' x[1] <- list(4) #' x #' #' try(x[1] <- list(3:6)) #' #' # Restricting both size and type #' x <- list_of(1L, 2:3, .ptype = integer(), .size = 2) #' x #' #' # Setting an element to `NULL` #' x[2] <- list(NULL) #' x #' #' # Note that using `NULL` shortens the list, like a base R list #' x[2] <- NULL #' x #' #' # Combining a list_of with a list results in a list #' vec_c(list_of(1), list(2, "x")) #' #' # Combining a list_of with another list_of tries to find a common element #' # type and common element size, but will remove the constraint if that #' # fails #' x <- list_of(1, .ptype = double()) #' y <- list_of(c("a", "b"), .ptype = character(), .size = 2) #' z <- list_of(c("c", "d", "e"), .ptype = character(), .size = 3) #' #' # Falls back to a list #' vec_c(x, y) #' #' # Falls back to a `list_of` with no size restriction #' vec_c(y, z) list_of <- function(..., .ptype = NULL, .size = zap()) { args <- list2(...) list_as_list_of(args, ptype = .ptype, size = .size) } #' @export #' @rdname list_of as_list_of <- function(x, ...) { UseMethod("as_list_of") } #' @export as_list_of.vctrs_list_of <- function(x, ...) { x } #' @export as_list_of.list <- function(x, ..., .ptype = NULL, .size = zap()) { list_as_list_of(x, ptype = .ptype, size = .size) } #' Create list_of subclass #' #' @param x A list #' @param ptype The prototype which every element of `x` belongs to. If `NULL`, #' the prototype is not specified. #' @param size The size which every element of `x` has. If `NULL`, the size is #' not specified. #' @param ... Additional attributes used by subclass #' @param class Optional subclass name #' @keywords internal #' @export new_list_of <- function( x = list(), ptype = logical(), size = NULL, ..., class = character() ) { obj_check_list(x) has_ptype <- !is_null(ptype) has_size <- !is_null(size) if (!has_ptype && !has_size) { abort("Must specify at least one of `ptype` or `size`.") } if (has_ptype) { ptype <- vec_ptype(ptype, x_arg = "ptype") ptype <- vec_ptype_finalise(ptype) } if (has_size) { check_number_whole(size, min = 0) size <- vec_cast(size, integer()) } new_list_of0(x = x, ptype = ptype, size = size, ..., class = class) } new_list_of0 <- function(x, ptype, size, ..., class = character()) { new_vctr( x, ..., ptype = ptype, size = size, class = c(class, "vctrs_list_of") ) } list_of_unstructure <- function(x) { attr(x, "ptype") <- NULL attr(x, "size") <- NULL attr(x, "class") <- NULL x } #' `list_of` attributes #' #' @description #' - `list_of_ptype()` returns the `ptype` required by the `list_of`. #' If no `ptype` is required, then `NULL` is returned. #' #' - `list_of_size()` returns the `size` required by the `list_of`. #' If no `size` is required, then `NULL` is returned. #' #' @param x A [list_of][list_of()]. #' #' @name list-of-attributes #' #' @examples #' x <- list_of(1, 2) #' list_of_ptype(x) #' list_of_size(x) #' #' x <- list_of(.ptype = integer(), .size = 5) #' list_of_ptype(x) #' list_of_size(x) NULL #' @rdname list-of-attributes #' @export list_of_ptype <- function(x) { check_list_of(x) list_of_ptype0(x) } #' @rdname list-of-attributes #' @export list_of_size <- function(x) { check_list_of(x) list_of_size0(x) } list_of_ptype0 <- function(x) { attr(x, "ptype", exact = TRUE) } list_of_size0 <- function(x) { attr(x, "size", exact = TRUE) } #' @export #' @rdname list_of is_list_of <- function(x) { inherits(x, "vctrs_list_of") } check_list_of <- function(x, ..., arg = caller_arg(x), call = caller_env()) { if (is_list_of(x)) { return(invisible(NULL)) } stop_input_type( x, "a ``", ..., arg = arg, call = call ) } #' @export vec_proxy.vctrs_list_of <- function(x, ...) { unclass(x) } # Formatting -------------------------------------------------------------- #' @export obj_print_data.vctrs_list_of <- function(x, ...) { if (length(x) == 0) { return() } print(vec_data(x)) } #' @export format.vctrs_list_of <- function(x, ...) { format.default(x) } #' @export vec_ptype_full.vctrs_list_of <- function(x, ...) { size <- list_of_size0(x) if (is_null(size)) { size <- "" } else { size <- paste0("[", size, "]") } ptype <- list_of_ptype0(x) if (is_null(ptype)) { ptype <- "any" } else { ptype <- vec_ptype_full(ptype) } ptype <- paste0(ptype, size) if (grepl("\n", ptype)) { ptype <- paste0(indent(paste0("\n", ptype), 2), "\n") } paste0("list_of<", ptype, ">") } #' @export vec_ptype_abbr.vctrs_list_of <- function(x, ...) { size <- list_of_size0(x) if (is_null(size)) { size <- "" } else { size <- paste0("[", size, "]") } ptype <- list_of_ptype0(x) if (is_null(ptype)) { ptype <- "any" } else { ptype <- vec_ptype_abbr(ptype) } ptype <- paste0(ptype, size) paste0("list<", ptype, ">") } # vctr methods ------------------------------------------------------------ #' @export as.list.vctrs_list_of <- function(x, ...) { list_of_unstructure(x) } #' @export as.character.vctrs_list_of <- function(x, ...) { # For compatibility with the RStudio Viewer. See tidyverse/tidyr#654. map_chr(x, function(elt) paste0("<", vec_ptype_abbr(elt), ">")) } #' @export `[[.vctrs_list_of` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `$.vctrs_list_of` <- function(x, i, ...) { .Call(vctrs_list_get, x, i) } #' @export `[<-.vctrs_list_of` <- function(x, i, value) { if (is_null(value)) { return(NextMethod()) } if (!obj_is_list(value)) { # Ideally the user provides a list, but if `value` is not a list, we chop # it. This matches list semantics where this works: # # ``` # x <- list(1, 2, 3) # x[1:2] <- c(4, 5) # ``` value <- vec_chop(value) } ptype <- list_of_ptype0(x) if (!is_null(ptype)) { value <- map(value, vec_cast, to = ptype) } size <- list_of_size0(x) if (!is_null(size)) { value <- map(value, vec_recycle, size = size) } value <- new_list_of0(value, ptype = ptype, size = size) NextMethod() } #' @export `[[<-.vctrs_list_of` <- function(x, i, value) { if (is_null(value)) { return(NextMethod()) } ptype <- list_of_ptype0(x) if (!is_null(ptype)) { value <- vec_cast(value, ptype) } size <- list_of_size0(x) if (!is_null(size)) { value <- vec_recycle(value, size) } NextMethod() } #' @export `$<-.vctrs_list_of` <- function(x, i, value) { if (is_null(value)) { return(NextMethod()) } ptype <- list_of_ptype0(x) if (!is_null(ptype)) { value <- vec_cast(value, ptype) } size <- list_of_size0(x) if (!is_null(size)) { value <- vec_recycle(value, size) } NextMethod() } # Type system ------------------------------------------------------------- #' @rdname list_of #' @inheritParams vec_ptype2 #' @export vec_ptype2.vctrs_list_of #' @method vec_ptype2 vctrs_list_of #' @export vec_ptype2.vctrs_list_of <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.vctrs_list_of") } #' @method vec_ptype2.vctrs_list_of vctrs_list_of #' @export vec_ptype2.vctrs_list_of.vctrs_list_of <- function( x, y, ..., x_arg = "", y_arg = "" ) { x_ptype <- list_of_ptype0(x) y_ptype <- list_of_ptype0(y) x_size <- list_of_size0(x) y_size <- list_of_size0(y) if (identical(x_ptype, y_ptype) && identical(x_size, y_size)) { return(x) } # Common type always goes towards more lenient type # # Element type: # - If either `x_ptype` or `y_ptype` are `NULL`, fall back to `NULL` # - If both `x_ptype` and `y_ptype` are specified, try common type but fall # back to `NULL` # # Element size: # - If either `x_size` or `y_size` are `NULL`, fall back to `NULL` # - If both `x_ptype` and `y_ptype` are specified, try common size but fall # back to `NULL` # # If both `ptype` and `size` are `NULL` after this, return bare `list()`, # otherwise return `list_of()` with appropriate restrictions. Note that with # this set up we may fail a ptype2 determination but pass a size2 # determination and still return a list-of. if (is_null(x_ptype) || is_null(y_ptype)) { ptype <- NULL } else { ptype <- tryCatch( vec_ptype2(x_ptype, y_ptype), vctrs_error_incompatible_type = function(cnd) NULL ) } if (is_null(x_size) || is_null(y_size)) { size <- NULL } else { # No `vec_size2()`. This uses ALTREP to be efficient. size <- tryCatch( vec_size_common(seq_len(x_size), seq_len(y_size)), vctrs_error_incompatible_size = function(cnd) NULL ) } if (is_null(ptype) && is_null(size)) { list() } else { new_list_of0(x = list(), ptype = ptype, size = size) } } #' @export vec_ptype2.list.vctrs_list_of <- function(x, y, ...) { list() } #' @export vec_ptype2.vctrs_list_of.list <- function(x, y, ...) { list() } #' @rdname list_of #' @export vec_cast.vctrs_list_of #' @method vec_cast vctrs_list_of #' @export vec_cast.vctrs_list_of <- function(x, to, ...) { UseMethod("vec_cast.vctrs_list_of") } #' @export #' @method vec_cast.vctrs_list_of vctrs_list_of vec_cast.vctrs_list_of.vctrs_list_of <- function( x, to, ..., call = caller_env() ) { x_ptype <- list_of_ptype0(x) to_ptype <- list_of_ptype0(to) x_size <- list_of_size0(x) to_size <- list_of_size0(to) if (identical(x_ptype, to_ptype) && identical(x_size, to_size)) { # FIXME: Suboptimal check for "same type", but should be good enough for the # common case of unchopping a list of identically generated list-ofs (#875). # Would be fixed by https://github.com/r-lib/vctrs/issues/1688. return(x) } x <- list_of_unstructure(x) ptype <- to_ptype %||% zap() size <- to_size %||% zap() list_as_list_of( x = x, ptype = ptype, size = size, error_call = call ) } #' @export vec_cast.list.vctrs_list_of <- function(x, to, ...) { list_of_unstructure(x) } #' @export vec_cast.vctrs_list_of.list <- function(x, to, ..., call = caller_env()) { ptype <- list_of_ptype0(to) %||% zap() size <- list_of_size0(to) %||% zap() list_as_list_of(x, ptype = ptype, size = size, error_call = call) } # Helpers ----------------------------------------------------------------- list_as_list_of <- function(x, ptype, size, error_call = caller_env()) { free_ptype <- is_zap(ptype) free_size <- is_zap(size) if (free_ptype && free_size) { abort("Can't set both `ptype` and `size` to ``.", call = error_call) } if (free_ptype) { # Not locked ptype <- NULL } else { # Lock to a type or die trying ptype <- vec_ptype_common( !!!x, .ptype = ptype, .call = error_call ) if (is_null(ptype)) { abort("Can't find common type for elements of `x`.", call = error_call) } x <- vec_cast_common(!!!x, .to = ptype, .call = error_call) } if (free_size) { # Not locked size <- NULL } else { # Lock to a size or die trying size <- vec_size_common( !!!x, .size = size, .absent = -1L, .call = error_call ) if (size == -1L) { abort("Can't find common size for elements of `x`.", call = error_call) } x <- vec_recycle_common(!!!x, .size = size, .call = error_call) } new_list_of0(x = x, ptype = ptype, size = size) } vctrs/R/c.R0000644000176200001440000000441215113325071012160 0ustar liggesusers#' Combine many vectors into one vector #' #' Combine all arguments into a new vector of common type. #' #' @section Invariants: #' * `vec_size(vec_c(x, y)) == vec_size(x) + vec_size(y)` #' * `vec_ptype(vec_c(x, y)) == vec_ptype_common(x, y)`. #' #' @section Dependencies: #' #' ## vctrs dependencies #' #' - [vec_cast_common()] with fallback #' - [vec_proxy()] #' - [vec_restore()] #' #' #' ## base dependencies #' #' - [base::c()] #' #' If inputs inherit from a common class hierarchy, `vec_c()` falls #' back to `base::c()` if there exists a `c()` method implemented for #' this class hierarchy. #' #' @inheritParams rlang::args_error_context #' @inheritParams vec_ptype_show #' @inheritParams name_spec #' @inheritParams vec_as_names #' #' @param ... Vectors to coerce. #' @param .name_repair How to repair names, see `repair` options in #' [vec_as_names()]. #' @return A vector with class given by `.ptype`, and length equal to the #' sum of the `vec_size()` of the contents of `...`. #' #' The vector will have names if the individual components have names #' (inner names) or if the arguments are named (outer names). If both #' inner and outer names are present, an error is thrown unless a #' `.name_spec` is provided. #' #' @seealso [vec_cbind()]/[vec_rbind()] for combining data frames by rows #' or columns. #' @export #' @examples #' vec_c(FALSE, 1L, 1.5) #' #' # Date/times -------------------------- #' c(Sys.Date(), Sys.time()) #' c(Sys.time(), Sys.Date()) #' #' vec_c(Sys.Date(), Sys.time()) #' vec_c(Sys.time(), Sys.Date()) #' #' # Factors ----------------------------- #' c(factor("a"), factor("b")) #' vec_c(factor("a"), factor("b")) #' #' #' # By default, named inputs must be length 1: #' vec_c(name = 1) #' try(vec_c(name = 1:3)) #' #' # Pass a name specification to work around this: #' vec_c(name = 1:3, .name_spec = "{outer}_{inner}") #' #' # See `?name_spec` for more examples of name specifications. vec_c <- function( ..., .ptype = NULL, .name_spec = NULL, .name_repair = c( "minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet" ), .error_arg = "", .error_call = current_env() ) { .External2(ffi_vec_c, list2(...), .ptype, .name_spec, .name_repair) } vec_c <- fn_inline_formals(vec_c, ".name_repair") vctrs/R/subscript.R0000644000176200001440000002201215065005761013757 0ustar liggesusers#' Convert to a base subscript type #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' Convert `i` to the base type expected by [vec_as_location()] or #' [vec_as_location2()]. The values of the subscript type are #' not checked in any way (length, missingness, negative elements). #' #' @inheritParams vec_as_location #' #' @param logical,numeric,character How to handle logical, numeric, #' and character subscripts. #' #' If `"cast"` and the subscript is not one of the three base types #' (logical, integer or character), the subscript is #' [cast][vec_cast] to the relevant base type, e.g. factors are #' coerced to character. `NULL` is treated as an empty integer #' vector, and is thus coercible depending on the setting of #' `numeric`. Symbols are treated as character vectors and thus #' coercible depending on the setting of `character`. #' #' If `"error"`, the subscript type is disallowed and triggers an #' informative error. #' @keywords internal #' @export vec_as_subscript <- function( i, ..., logical = c("cast", "error"), numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL, call = caller_env() ) { check_dots_empty0(...) .Call( ffi_as_subscript, i = i, logical = logical, numeric = numeric, character = character, frame = environment() ) } vec_as_subscript_result <- function(i, arg, call, logical, numeric, character) { .Call( ffi_as_subscript_result, i = i, logical = logical, numeric = numeric, character = character, frame = environment() ) } #' @rdname vec_as_subscript #' @export vec_as_subscript2 <- function( i, ..., numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL, call = caller_env() ) { check_dots <- function(..., logical = "error", call = caller_env()) { if (!is_string(logical, "error")) { abort( "`vctrs::vec_as_subscript2(logical = 'cast')` is deprecated.", call = caller_env() ) } check_dots_empty0(..., call = call) } check_dots(...) result_get(vec_as_subscript2_result( i, arg, call, numeric = numeric, character = character )) } vec_as_subscript2_result <- function( i, arg, call, numeric = "cast", character = "cast" ) { numeric <- arg_match0(numeric, c("cast", "error")) character <- arg_match0(character, c("cast", "error")) result <- vec_as_subscript_result( i, arg = arg, call = call, logical = "error", numeric = numeric, character = character ) # This should normally be a `vctrs_error_subscript`. Indicate to # message methods that this error refers to a `[[` subscript. if (!is_null(result$err)) { result$err$subscript_scalar <- TRUE } result } subscript_type_opts <- c("logical", "numeric", "character") subscript_type_opts_indefinite_singular <- c( "a logical flag", "a location", "a name" ) subscript_type_opts_indefinite_plural <- c( "logical flags", "locations", "names" ) as_opts_subscript_type <- function(x, arg = NULL) { if (inherits(x, "vctrs_opts_subscript_type")) { return(x) } new_opts( x, subscript_type_opts, subclass = "vctrs_opts_subscript_type", arg = arg ) } as_opts_subscript2_type <- function(x, arg = NULL) { if ("logical" %in% x) { abort("Logical subscripts can't be converted to a single location.") } as_opts_subscript_type(x, arg = arg) } stop_subscript <- function(i, ..., class = NULL, call = caller_env()) { abort( class = c(class, "vctrs_error_subscript"), i = i, ..., call = call ) } new_error_subscript <- function(class = NULL, i, ...) { error_cnd( c(class, "vctrs_error_subscript"), i = i, ... ) } new_error_subscript_type <- function( i, logical = "cast", numeric = "cast", character = "cast", ..., call = NULL, class = NULL ) { new_error_subscript( class = c(class, "vctrs_error_subscript_type"), i = i, logical = logical, numeric = numeric, character = character, ..., call = call ) } #' @export cnd_header.vctrs_error_subscript_type <- function(cnd, ...) { arg <- cnd[["subscript_arg"]] if (is_subscript_arg(arg)) { with <- glue::glue(" with {format_subscript_arg(arg)}") } else { with <- "" } action <- cnd_subscript_action(cnd, assign_to = FALSE) elt <- cnd_subscript_element(cnd) if (cnd_subscript_scalar(cnd)) { glue::glue("Can't {action} {elt[[1]]}{with}.") } else { glue::glue("Can't {action} {elt[[2]]}{with}.") } } #' @export cnd_body.vctrs_error_subscript_type <- function(cnd, ...) { arg <- cnd_subscript_arg(cnd) type <- obj_type_friendly(cnd$i) expected_types <- cnd_subscript_expected_types(cnd) format_error_bullets(c( x = cli::format_inline("{arg} must be {.or {expected_types}}, not {type}.") )) } new_cnd_bullets_subscript_lossy_cast <- function(lossy_err) { function(cnd, ...) { format_error_bullets(c(x = cnd_header(lossy_err))) } } collapse_subscript_type <- function(cnd) { types <- cnd_subscript_expected_types(cnd) if (length(types) == 2) { last <- " or " } else { last <- ", or " } glue::glue_collapse(types, sep = ", ", last = last) } cnd_subscript_expected_types <- function(cnd) { types <- c("logical", "numeric", "character") allowed <- cnd[types] != "error" types[allowed] } new_error_subscript_size <- function(i, ..., class = NULL) { new_error_subscript( class = c(class, "vctrs_error_subscript_size"), i = i, ... ) } #' @export cnd_header.vctrs_error_subscript_size <- function(cnd, ...) { cnd_header.vctrs_error_subscript_type(cnd, ...) } new_error_subscript2_type <- function(i, numeric, character, ...) { new_error_subscript_type( i = i, logical = "error", numeric = numeric, character = character, subscript_scalar = TRUE, ... ) } cnd_body_subscript_dim <- function(cnd, ...) { arg <- append_arg("Subscript", cnd$subscript_arg) dim <- length(dim(cnd$i)) if (dim < 2) { abort( "Internal error: Unexpected dimensionality in `cnd_body_subcript_dim()`." ) } if (dim == 2) { shape <- "a matrix" } else { shape <- "an array" } format_error_bullets(c( x = glue::glue("{arg} must be a simple vector, not {shape}.") )) } cnd_subscript_element <- function(cnd, capital = FALSE) { elt <- cnd$subscript_elt %||% "element" if (!is_string(elt, c("element", "row", "column", "table"))) { abort(paste0( "Internal error: `cnd$subscript_elt` must be one of ", "`element`, `row`, `column` or `table`." )) } if (capital) { switch( elt, element = c("Element", "Elements"), row = c("Row", "Rows"), column = c("Column", "Columns"), table = c("Table", "Tables") ) } else { switch( elt, element = c("element", "elements"), row = c("row", "rows"), column = c("column", "columns"), table = c("table", "tables") ) } } cnd_subscript_element_cli <- function(n, cnd, capital = FALSE) { elt <- cnd$subscript_elt %||% "element" if (!is_string(elt, c("element", "row", "column", "table"))) { abort(paste0( "Internal error: `cnd$subscript_elt` must be one of ", "`element`, `row`, `column` or `table`." )) } if (capital) { elt <- switch( elt, element = "Element{?s}", row = "Row{?s}", column = "Column{?s}", table = "Table{?s}" ) } else { elt <- switch( elt, element = "element{?s}", row = "row{?s}", column = "column{?s}", table = "table{?s}" ) } cli::pluralize("{n} ", elt) } subscript_actions <- c( "select", "subset", "extract", "assign", "rename", "relocate", "remove", "negate" ) cnd_subscript_action <- function(cnd, assign_to = TRUE) { action <- cnd$subscript_action if (is_null(action)) { if (cnd_subscript_scalar(cnd)) { action <- "extract" } else { action <- "subset" } } if (!is_string(action, subscript_actions)) { cli::cli_abort( "`cnd$subscript_action` must be one of {.or {.arg {subscript_actions}}}.", .internal = TRUE ) } if (assign_to && action == "assign") { "assign to" } else { action } } cnd_subscript_arg <- function(cnd, ...) { format_subscript_arg(cnd[["subscript_arg"]], ...) } format_subscript_arg <- function(arg, capitalise = TRUE) { if (is_subscript_arg(arg)) { if (!is_string(arg)) { arg <- as_label(arg) } cli::format_inline("{.arg {arg}}") } else { if (capitalise) { "Subscript" } else { "subscript" } } } is_subscript_arg <- function(x) { !is_null(x) && !is_string(x, "") } cnd_subscript_type <- function(cnd) { type <- cnd$subscript_type if (!is_string(type, c("logical", "numeric", "character"))) { abort( "Internal error: `cnd$subscript_type` must be `logical`, `numeric`, or `character`." ) } type } cnd_subscript_scalar <- function(cnd) { out <- cnd$subscript_scalar %||% FALSE if (!is_bool(out)) { abort("Internal error: `cnd$subscript_scalar` must be a boolean.") } out } vctrs/R/faq.R0000644000176200001440000000155015113325071012505 0ustar liggesusers#' FAQ - How is the compatibility of vector types decided? #' #' @includeRmd man/faq/user/faq-compatibility-types.Rmd description #' #' @name faq-compatibility-types NULL #' FAQ - Error/Warning: Some attributes are incompatible #' #' @description #' #' This error occurs when [vec_ptype2()] or [vec_cast()] are supplied #' vectors of the same classes with different attributes. In this #' case, vctrs doesn't know how to combine the inputs. #' #' To fix this error, the maintainer of the class should implement #' self-to-self coercion methods for [vec_ptype2()] and [vec_cast()]. #' #' @includeRmd man/faq/developer/links-coercion.Rmd #' #' @name faq-error-incompatible-attributes NULL #' FAQ - Error: Input must be a vector #' #' @includeRmd man/faq/user/faq-error-scalar-type.Rmd description #' #' @name faq-error-scalar-type #' @aliases faq_error_scalar_type NULL vctrs/R/cast.R0000644000176200001440000001406115120576121012673 0ustar liggesusers#' Cast a vector to a specified type #' #' @description #' #' `vec_cast()` provides directional conversions from one type of #' vector to another. Along with [vec_ptype2()], this generic forms #' the foundation of type coercions in vctrs. #' #' @includeRmd man/faq/developer/links-coercion.Rmd #' #' @inheritParams rlang::args_error_context #' @param x Vectors to cast. #' @param ... For `vec_cast_common()`, vectors to cast. For #' `vec_cast()`, `vec_cast_default()`, and `vec_restore()`, these #' dots are only for future extensions and should be empty. #' @param to,.to Type to cast to. If `NULL`, `x` will be returned as is. #' @param x_arg Argument name for `x`, used in error messages to #' inform the user about the locations of incompatible types #' (see [stop_incompatible_type()]). #' @param to_arg Argument name `to` used in error messages to #' inform the user about the locations of incompatible types #' (see [stop_incompatible_type()]). #' @return A vector the same length as `x` with the same type as `to`, #' or an error if the cast is not possible. An error is generated if #' information is lost when casting between compatible types (i.e. when #' there is no 1-to-1 mapping for a specific value). #' #' @section Dependencies of `vec_cast_common()`: #' #' ## vctrs dependencies #' #' - [vec_ptype2()] #' - [vec_cast()] #' #' #' ## base dependencies #' #' Some functions enable a base-class fallback for #' `vec_cast_common()`. In that case the inputs are deemed compatible #' when they have the same [base type][base::typeof] and inherit from #' the same base class. #' #' @seealso Call [stop_incompatible_cast()] when you determine from the #' attributes that an input can't be cast to the target type. #' @export #' @examples #' # x is a double, but no information is lost #' vec_cast(1, integer()) #' #' # When information is lost the cast fails #' try(vec_cast(c(1, 1.5), integer())) #' try(vec_cast(c(1, 2), logical())) #' #' # You can suppress this error and get the partial results #' allow_lossy_cast(vec_cast(c(1, 1.5), integer())) #' allow_lossy_cast(vec_cast(c(1, 2), logical())) #' #' # By default this suppress all lossy cast errors without #' # distinction, but you can be specific about what cast is allowed #' # by supplying prototypes #' allow_lossy_cast(vec_cast(c(1, 1.5), integer()), to_ptype = integer()) #' try(allow_lossy_cast(vec_cast(c(1, 2), logical()), to_ptype = integer())) #' #' # No sensible coercion is possible so an error is generated #' try(vec_cast(1.5, factor("a"))) #' #' # Cast to common type #' vec_cast_common(factor("a"), factor(c("a", "b"))) vec_cast <- function( x, to, ..., x_arg = caller_arg(x), to_arg = "", call = caller_env() ) { if (!missing(...)) { check_ptype2_dots_empty(...) } return(.Call(ffi_cast, x, to, environment())) UseMethod("vec_cast", to) } vec_cast_dispatch <- function(x, to, ..., x_arg = "", to_arg = "") { UseMethod("vec_cast", to) } vec_cast_dispatch_native <- function( x, to, ..., x_arg = "", to_arg = "", call = caller_env() ) { .Call( ffi_cast_dispatch_native, x, to, match_fallback_opts(...), x_arg, to_arg, environment() ) } #' @export #' @rdname vec_cast vec_cast_common <- function(..., .to = NULL, .arg = "", .call = caller_env()) { .External2(ffi_cast_common, list2(...), .to) } vec_cast_common_opts <- function( ..., .to = NULL, .opts = fallback_opts(), .arg = "", .call = caller_env() ) { .External2(ffi_cast_common_opts, list2(...), .to, .opts) } vec_cast_common_params <- function( ..., .to = NULL, .s3_fallback = NULL, .arg = "", .call = caller_env() ) { opts <- fallback_opts( s3_fallback = .s3_fallback ) vec_cast_common_opts( ..., .to = .to, .opts = opts, .arg = .arg, .call = .call ) } vec_cast_common_fallback <- function( ..., .to = NULL, .arg = "", .call = caller_env() ) { vec_cast_common_opts( ..., .to = .to, .opts = enabled_fallback_opts(), .arg = .arg, .call = .call ) } #' @rdname vec_default_ptype2 #' @inheritParams vec_cast #' @export vec_default_cast <- function( x, to, ..., x_arg = "", to_arg = "", call = caller_env() ) { if (is_asis(x)) { return(vec_cast_from_asis( x, to, x_arg = x_arg, to_arg = to_arg, call = call )) } if (is_asis(to)) { return(vec_cast_to_asis( x, to, x_arg = x_arg, to_arg = to_arg, call = call )) } if ( inherits(to, "vctrs_vctr") && !inherits(to, c("vctrs_rcrd", "vctrs_list_of")) ) { return(vctr_cast( x, to, x_arg = x_arg, to_arg = to_arg, call = call )) } opts <- match_fallback_opts(...) if (is_common_class_fallback(to) && length(common_class_suffix(x, to))) { return(x) } # Data frames have special bare class and same type fallbacks if (is.data.frame(x) && is.data.frame(to)) { out <- df_cast_opts( x, to, ..., opts = opts, x_arg = x_arg, to_arg = to_arg, call = call ) # Same-type fallback for data frames. If attributes of the empty # data frames are congruent, just reproduce these attributes. This # eschews any constraints on rows and cols that `[` and `[<-` # methods might have. If that is a problem, the class needs to # implement vctrs methods. if (identical(non_df_attrib(x), non_df_attrib(to))) { attributes(out) <- c(df_attrib(out), non_df_attrib(to)) return(out) } # Bare-class fallback for data frames. # FIXME: Should we only allow it when target is a bare df? if (inherits(to, "tbl_df")) { out <- df_as_tibble(out) } return(out) } if (is_same_type(x, to)) { return(x) } withRestarts( stop_incompatible_cast( x, to, x_arg = x_arg, to_arg = to_arg, `vctrs:::from_dispatch` = match_from_dispatch(...), call = call ), vctrs_restart_cast = function(out) { out } ) } is_bare_df <- function(x) { inherits_only(x, "data.frame") || inherits_only(x, c("tbl_df", "tbl", "data.frame")) } vctrs/R/utils.R0000644000176200001440000001734315113325071013105 0ustar liggesusersstr_dup <- function(x, times) { paste0(rep(x, times = times), collapse = "") } indent <- function(x, n) { pad <- str_dup(" ", n) map_chr(x, gsub, pattern = "(\n+)", replacement = paste0("\\1", pad)) } ones <- function(...) { array(1, dim = c(...)) } ones_list <- function(...) { array(list(1), dim = c(...)) } vec_coerce_bare <- function(x, type) { # FIXME! Unexported wrapper around Rf_coerceVector() coerce <- env_get(ns_env("rlang"), "vec_coerce") coerce(x, type) } # Matches the semantics of c() - based on experimenting with the output # of c(), not reading the source code. outer_names <- function(names, outer, n) { .Call(ffi_outer_names, names, outer, vec_cast(n, int())) } has_inner_names <- function(x) { !all(map_lgl(map(x, vec_names), is.null)) } cat_line <- function(...) { cat(paste0(..., "\n", collapse = "")) } set_partition <- function(x, y) { list( both = intersect(x, y), only_x = setdiff(x, y), only_y = setdiff(y, x) ) } all_equal <- function(x) all(x == x[[1]]) inline_list <- function(title, x, width = getOption("width"), quote = "") { label_width <- width - nchar(title) x <- glue::glue_collapse( encodeString(x, quote = quote), sep = ", ", width = label_width ) paste0(title, x) } has_unique_names <- function(x) { nms <- names(x) if (length(nms) != length(x)) { return(FALSE) } if (any(is.na(nms) | nms == "")) { return(FALSE) } !anyDuplicated(nms) } compact <- function(x) { is_null <- map_lgl(x, is.null) x[!is_null] } paste_line <- function(...) { paste(chr(...), collapse = "\n") } # Experimental result <- function(ok = NULL, err = NULL) { structure( list(ok = ok, err = err), class = "rlang_result" ) } result_get <- function(x) { if (!is_null(x$err)) { cnd_signal(x$err) } x$ok } obj_type <- function(x) { if (vec_is(x)) { vec_ptype_full(x) } else if (is.object(x)) { paste(class(x), collapse = "/") } else if (is_function(x)) { "function" } else { typeof(x) } } new_opts <- function(x, opts, subclass = NULL, arg = NULL) { if (!all(x %in% opts)) { if (is_null(arg)) { arg <- "Argument" } else { arg <- glue::glue("`{arg}`") } opts <- encodeString(opts, quote = "\"") opts <- glue::glue_collapse(opts, sep = ", ", last = " or ") abort(glue::glue("{arg} must be one of {opts}.")) } structure( set_names(opts %in% x, opts), class = c(subclass, "vctrs_opts") ) } glue_data_bullets <- function(.data, ..., .env = caller_env()) { glue_data <- function(...) glue::glue_data(.data, ..., .envir = .env) format_error_bullets(map_chr(chr(...), glue_data)) } unstructure <- function(x) { attributes(x) <- NULL x } # We almost never want `stringsAsFactors = TRUE`, and `FALSE` became # the default in R 4.0.0. This wrapper ensures that our tests are compliant # with versions of R before and after this change. Keeping it in `utils.R` # rather than as a testthat helper ensures that it is sourced before any other # testthat helpers. data.frame <- function(..., stringsAsFactors = NULL) { stringsAsFactors <- stringsAsFactors %||% FALSE base::data.frame(..., stringsAsFactors = stringsAsFactors) } try_catch_callback <- function(data, cnd) { .Call(vctrs_try_catch_callback, data, cnd) } try_catch_hnd <- function(data) { function(cnd) { try_catch_callback(data, cnd) } } try_catch_impl <- function(data, ...) { tryCatch( try_catch_callback(data, NULL), ... ) } ns_methods <- function(name) { ns_env(name)$.__S3MethodsTable__. } s3_find_method <- function(x, generic, ns = "base") { stopifnot( is_string(generic), is_string(ns) ) table <- ns_methods(ns_env(ns)) .Call(vctrs_s3_find_method, generic, x, table) } s3_get_method <- function(class, generic, ns = "base") { stopifnot( is_string(class), is_string(generic), is_string(ns) ) table <- ns_methods(ns_env(ns)) .Call(ffi_s3_get_method, generic, class, table) } s3_method_specific <- function(x, generic, ns = "base", default = TRUE) { classes <- class(x)[[1]] if (default) { classes <- c(classes, "default") } for (class in classes) { method <- s3_get_method(class, generic, ns = ns) if (!is_null(method)) { return(method) } } cli::cli_abort("Can't find {.fn {generic}} method for {.cls {class}}.") } df_has_base_subset <- function(x) { method <- s3_find_method(x, "[", ns = "base") is_null(method) || identical(method, `[.data.frame`) } last <- function(x) { x[[length(x)]] } # Find the longest common suffix of two vectors vec_common_suffix <- function(x, y) { common <- vec_cast_common(x = x, y = y) x <- common$x y <- common$y x_size <- vec_size(x) y_size <- vec_size(y) n <- min(x_size, y_size) if (!n) { return(vec_slice(x, int())) } # Truncate the start of the vectors so they have equal size if (x_size < y_size) { y <- vec_slice(y, seq2(y_size - x_size + 1, y_size)) } else if (y_size < x_size) { x <- vec_slice(x, seq2(x_size - y_size + 1, x_size)) } # Find locations of unequal elements. Elements after the last # location are the common suffix. common <- vec_equal(x, y) i <- which(!common) # Slice the suffix after the last unequal element if (length(i)) { vec_slice(x, seq2(max(i) + 1, n)) } else { x } } import_from <- function(ns, names, env = caller_env()) { objs <- env_get_list(ns_env(ns), names) env_bind(env, !!!objs) } fast_c <- function(x, y) { .Call(vctrs_fast_c, x, y) } # Based on r-lib/bench (itself based on gaborcsardi/prettyunits) #' @export format.vctrs_bytes <- function( x, scientific = FALSE, digits = 3, drop0trailing = TRUE, ... ) { nms <- names(x) bytes <- unclass(x) unit <- map_chr(x, find_unit, byte_units) res <- round(bytes / byte_units[unit], digits = digits) ## Zero bytes res[bytes == 0] <- 0 unit[bytes == 0] <- "B" ## NA and NaN bytes res[is.na(bytes)] <- NA_real_ res[is.nan(bytes)] <- NaN unit[is.na(bytes)] <- "" # Includes NaN as well # Append an extra B to each unit large_units <- unit %in% names(byte_units)[-1] unit[large_units] <- paste0(unit[large_units], "B") res <- format( res, scientific = scientific, digits = digits, drop0trailing = drop0trailing, ... ) stats::setNames(paste0(res, unit), nms) } #' @export print.vctrs_bytes <- function(x, ...) { print(format(x, ...), quote = FALSE) } tolerance <- sqrt(.Machine$double.eps) find_unit <- function(x, units) { if (is.na(x) || is.nan(x) || x <= 0 || is.infinite(x)) { return(NA_character_) } epsilon <- 1 - (x * (1 / units)) names(utils::tail(n = 1, which(epsilon < tolerance))) } byte_units <- c( 'B' = 1, 'K' = 1024, 'M' = 1024^2, 'G' = 1024^3, 'T' = 1024^4, 'P' = 1024^5, 'E' = 1024^6, 'Z' = 1024^7, 'Y' = 1024^8 ) new_vctrs_bytes <- function(x) { structure(x, class = c("vctrs_bytes", "numeric")) } named <- function(x) { if (is_null(names(x))) { names(x) <- names2(x) } x } browser <- function(..., skipCalls = 0, frame = parent.frame()) { if (!identical(stdout(), getConnection(1))) { sink(getConnection(1)) withr::defer(sink(), envir = frame) } # Calling `browser()` on exit avoids RStudio displaying the # `browser2()` location. We still need one `n` to get to the # expected place. Ideally `skipCalls` would not skip but exit the # contexts. on.exit(base::browser(..., skipCalls = skipCalls + 1)) } vec_paste0 <- function(...) { args <- vec_recycle_common(...) exec(paste0, !!!args) } # On R <3.6.0, we used to have a special C version of this to avoid two copies # from being made by `attributes(x) <- attrib`. Now that we require R >=4.0.0, # we don't have the C utility anymore. vec_set_attributes <- function(x, attrib) { attributes(x) <- attrib x } vctrs/R/faq-developer.R0000644000176200001440000000223515132161317014473 0ustar liggesusers#' FAQ - Is my class compatible with vctrs? #' #' @includeRmd man/faq/developer/reference-compatibility.Rmd description #' #' @name reference-faq-compatibility NULL #' FAQ - How does coercion work in vctrs? #' #' @includeRmd man/faq/developer/theory-coercion.Rmd description #' #' @name theory-faq-coercion NULL # Also see the `redirects:` section in `_pkgdown.yml` # for `vector_recycling_rules.html` #' FAQ - How does recycling work in vctrs and the tidyverse? #' #' @includeRmd man/faq/developer/theory-recycling.Rmd description #' #' @name theory-faq-recycling #' @aliases vector_recycling_rules NULL #' FAQ - How to implement ptype2 and cast methods? #' #' @includeRmd man/faq/developer/howto-coercion.Rmd description #' #' @name howto-faq-coercion NULL #' FAQ - How to implement ptype2 and cast methods? (Data frames) #' #' @includeRmd man/faq/developer/howto-coercion-data-frame.Rmd description #' #' @name howto-faq-coercion-data-frame NULL #' FAQ - Why isn't my class treated as a vector? #' #' @includeRmd man/faq/developer/howto-faq-fix-scalar-type-error.Rmd description #' #' @name howto-faq-fix-scalar-type-error #' @aliases howto_faq_fix_scalar_type_error NULL vctrs/R/arith.R0000644000176200001440000000617615065005761013065 0ustar liggesusers#' Arithmetic operations #' #' This generic provides a common double dispatch mechanism for all infix #' operators (`+`, `-`, `/`, `*`, `^`, `%%`, `%/%`, `!`, `&`, `|`). It is used #' to power the default arithmetic and boolean operators for [vctr]s objects, #' overcoming the limitations of the base [Ops] generic. #' #' `vec_arith_base()` is provided as a convenience for writing methods. It #' recycles `x` and `y` to common length then calls the base operator with the #' underlying [vec_data()]. #' #' `vec_arith()` is also used in `diff.vctrs_vctr()` method via `-`. #' #' @param op An arithmetic operator as a string #' @param x,y A pair of vectors. For `!`, unary `+` and unary `-`, `y` will be #' a sentinel object of class `MISSING`, as created by `MISSING()`. #' @inheritParams rlang::args_dots_empty #' #' @seealso [stop_incompatible_op()] for signalling that an arithmetic #' operation is not permitted/supported. #' @seealso See [vec_math()] for the equivalent for the unary mathematical #' functions. #' @export #' @keywords internal #' @examples #' d <- as.Date("2018-01-01") #' dt <- as.POSIXct("2018-01-02 12:00") #' t <- as.difftime(12, unit = "hours") #' #' vec_arith("-", dt, 1) #' vec_arith("-", dt, t) #' vec_arith("-", dt, d) #' #' vec_arith("+", dt, 86400) #' vec_arith("+", dt, t) #' vec_arith("+", t, t) #' #' vec_arith("/", t, t) #' vec_arith("/", t, 2) #' #' vec_arith("*", t, 2) vec_arith <- function(op, x, y, ...) { check_dots_empty0(...) UseMethod("vec_arith", x) } #' @export #' @rdname vec_arith vec_arith.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } # Atomic vectors ---------------------------------------------------------- #' @rdname vec_arith #' @export vec_arith.logical #' @method vec_arith logical #' @export vec_arith.logical <- function(op, x, y, ...) UseMethod("vec_arith.logical", y) #' @method vec_arith.logical default #' @export vec_arith.logical.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } #' @method vec_arith.logical logical #' @export vec_arith.logical.logical <- function(op, x, y, ...) vec_arith_base(op, x, y) #' @method vec_arith.logical numeric #' @export vec_arith.logical.numeric <- function(op, x, y, ...) vec_arith_base(op, x, y) #' @rdname vec_arith #' @export vec_arith.numeric #' @method vec_arith numeric #' @export vec_arith.numeric <- function(op, x, y, ...) UseMethod("vec_arith.numeric", y) #' @method vec_arith.numeric default #' @export vec_arith.numeric.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } #' @method vec_arith.numeric logical #' @export vec_arith.numeric.logical <- function(op, x, y, ...) vec_arith_base(op, x, y) #' @method vec_arith.numeric numeric #' @export vec_arith.numeric.numeric <- function(op, x, y, ...) vec_arith_base(op, x, y) # Helpers ----------------------------------------------------------------- #' @export #' @rdname vec_arith vec_arith_base <- function(op, x, y) { args <- vec_recycle_common(x, y) op_fn <- getExportedValue("base", op) op_fn(vec_data(args[[1L]]), vec_data(args[[2L]])) } #' @export #' @rdname vec_arith MISSING <- function() { structure(list(), class = "MISSING") } vctrs/R/type-date-time.R0000644000176200001440000004140615132161317014574 0ustar liggesusers#' Date, date-time, and duration S3 classes #' #' * A `date` ([Date]) is a double vector. Its value represent the number #' of days since the Unix "epoch", 1970-01-01. It has no attributes. #' * A `datetime` ([POSIXct] is a double vector. Its value represents the #' number of seconds since the Unix "Epoch", 1970-01-01. It has a single #' attribute: the timezone (`tzone`)) #' * A `duration` ([difftime]) #' #' These function help the base `Date`, `POSIXct`, and `difftime` classes fit #' into the vctrs type system by providing constructors, coercion functions, #' and casting functions. #' #' @param x A double vector representing the number of days since UNIX #' epoch for `new_date()`, number of seconds since UNIX epoch for #' `new_datetime()`, and number of `units` for `new_duration()`. #' @param tzone Time zone. A character vector of length 1. Either `""` for #' the local time zone, or a value from [OlsonNames()] #' @param units Units of duration. #' @export #' @keywords internal #' @examples #' new_date(0) #' new_datetime(0, tzone = "UTC") #' new_duration(1, "hours") new_date <- function(x = double()) { .Call(vctrs_new_date, x) } #' @export #' @rdname new_date new_datetime <- function(x = double(), tzone = "") { .Call(vctrs_new_datetime, x, tzone) } #' @export #' @rdname new_date new_duration <- function( x = double(), units = c("secs", "mins", "hours", "days", "weeks") ) { stopifnot(is.double(x)) units <- arg_match0(units, c("secs", "mins", "hours", "days", "weeks")) structure( x, units = units, class = "difftime" ) } #' @export vec_proxy.Date <- function(x, ...) { date_validate(x) } #' @export vec_proxy.POSIXct <- function(x, ...) { datetime_validate(x) } #' @export vec_proxy.POSIXlt <- function(x, ...) { new_data_frame(unclass(x)) } #' @export vec_proxy_equal.POSIXlt <- function(x, ...) { x <- vec_cast(x, new_datetime(tzone = tzone(x))) vec_proxy_equal(x, ...) } #' @export vec_proxy_compare.POSIXlt <- function(x, ...) { x <- vec_cast(x, new_datetime(tzone = tzone(x))) vec_proxy_compare(x) } #' @export vec_restore.Date <- function(x, to, ...) { NextMethod() } #' @export vec_restore.POSIXct <- function(x, to, ...) { NextMethod() } #' @export vec_restore.POSIXlt <- function(x, to, ...) { NextMethod() } # Print ------------------------------------------------------------------ #' @export vec_ptype_full.Date <- function(x, ...) { "date" } #' @export vec_ptype_abbr.Date <- function(x, ...) { "date" } #' @export vec_ptype_full.POSIXct <- function(x, ...) { tzone <- if (tzone_is_local(x)) "local" else tzone(x) paste0("datetime<", tzone, ">") } #' @export vec_ptype_full.POSIXlt <- function(x, ...) { tzone <- if (tzone_is_local(x)) "local" else tzone(x) paste0("POSIXlt<", tzone, ">") } #' @export vec_ptype_abbr.POSIXct <- function(x, ...) { "dttm" } #' @export vec_ptype_abbr.POSIXlt <- function(x, ...) { "dttm" } #' @export vec_ptype_full.difftime <- function(x, ...) { paste0("duration<", attr(x, "units"), ">") } #' @export vec_ptype_abbr.difftime <- function(x, ...) { "drtn" } # Ptype ------------------------------------------------------------------- #' @export vec_ptype.POSIXlt <- function(x, ...) { # `vec_ptype2()` pushes towards `POSIXct`. In theory, maybe `vec_ptype()` # should as well so that `vec_c()` and `vec_c(, )` # are consistent and both return `POSIXct`. In practice this broke a number of # packages (datetimeoffset, slider, nanoarrow), and also generally means that # you wouldn't be able to provide a `POSIXlt` as a `ptype` argument to # anything and expect it to return a `POSIXlt` as output. So we've decided to # just leave this inconsistency. Getting `POSIXlt` attributes exactly right # is very tricky, and very R version dependent, so we just consistency align # with `[` here and in tests. x[0] } # Coerce ------------------------------------------------------------------ #' @rdname new_date #' @export vec_ptype2.Date #' @method vec_ptype2 Date #' @export vec_ptype2.Date <- function(x, y, ...) { UseMethod("vec_ptype2.Date") } #' @method vec_ptype2.Date Date #' @export vec_ptype2.Date.Date <- function(x, y, ...) { stop_native_implementation("vec_ptype2.Date.Date") } #' @method vec_ptype2.Date POSIXct #' @export vec_ptype2.Date.POSIXct <- function(x, y, ...) { stop_native_implementation("vec_ptype2.Date.POSIXct") } #' @method vec_ptype2.Date POSIXlt #' @export vec_ptype2.Date.POSIXlt <- function(x, y, ...) { stop_native_implementation("vec_ptype2.Date.POSIXlt") } #' @rdname new_date #' @export vec_ptype2.POSIXct #' @method vec_ptype2 POSIXct #' @export vec_ptype2.POSIXct <- function(x, y, ...) { UseMethod("vec_ptype2.POSIXct") } #' @method vec_ptype2.POSIXct POSIXct #' @export vec_ptype2.POSIXct.POSIXct <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXct.POSIXct") } #' @method vec_ptype2.POSIXct Date #' @export vec_ptype2.POSIXct.Date <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXct.Date") } #' @method vec_ptype2.POSIXct POSIXlt #' @export vec_ptype2.POSIXct.POSIXlt <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXct.POSIXlt") } #' @rdname new_date #' @export vec_ptype2.POSIXlt #' @method vec_ptype2 POSIXlt #' @export vec_ptype2.POSIXlt <- function(x, y, ...) { UseMethod("vec_ptype2.POSIXlt") } #' @method vec_ptype2.POSIXlt POSIXlt #' @export vec_ptype2.POSIXlt.POSIXlt <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXlt.POSIXlt") } #' @method vec_ptype2.POSIXlt Date #' @export vec_ptype2.POSIXlt.Date <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXlt.Date") } #' @method vec_ptype2.POSIXlt POSIXct #' @export vec_ptype2.POSIXlt.POSIXct <- function(x, y, ...) { stop_native_implementation("vec_ptype2.POSIXlt.POSIXct") } #' @rdname new_date #' @export vec_ptype2.difftime #' @method vec_ptype2 difftime #' @export vec_ptype2.difftime <- function(x, y, ...) UseMethod("vec_ptype2.difftime") #' @method vec_ptype2.difftime difftime #' @export vec_ptype2.difftime.difftime <- function(x, y, ...) { new_duration(units = units_union(x, y)) } # Cast -------------------------------------------------------------------- #' @rdname new_date #' @export vec_cast.Date #' @method vec_cast Date #' @export vec_cast.Date <- function(x, to, ...) { UseMethod("vec_cast.Date") } #' @export #' @method vec_cast.Date Date vec_cast.Date.Date <- function(x, to, ...) { stop_native_implementation("vec_cast.Date.Date") } #' @export #' @method vec_cast.Date POSIXct vec_cast.Date.POSIXct <- function(x, to, ...) { # TODO: Mark with `stop_native_implementation()` when we use lazy errors date_cast(x, to, ...) } #' @export #' @method vec_cast.Date POSIXlt vec_cast.Date.POSIXlt <- function(x, to, ...) { # TODO: Mark with `stop_native_implementation()` when we use lazy errors date_cast(x, to, ...) } # TODO: Remove when we have lazy errors date_cast <- function(x, to, ..., x_arg = "", to_arg = "") { out <- as.Date(x, tz = tzone(x)) x_ct <- as.POSIXct(x) out_ct <- as.POSIXct(as.character(out), tz = tzone(x)) lossy <- abs(x_ct - out_ct) > 1e-9 & !is.na(x) maybe_lossy_cast(out, x, to, lossy, x_arg = x_arg, to_arg = to_arg) } #' @rdname new_date #' @export vec_cast.POSIXct #' @method vec_cast POSIXct #' @export vec_cast.POSIXct <- function(x, to, ...) { UseMethod("vec_cast.POSIXct") } #' @export #' @method vec_cast.POSIXct Date vec_cast.POSIXct.Date <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXct.Date") } #' @export #' @method vec_cast.POSIXct POSIXlt vec_cast.POSIXct.POSIXlt <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXct.POSIXlt") } #' @export #' @method vec_cast.POSIXct POSIXct vec_cast.POSIXct.POSIXct <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXct.POSIXct") } #' @rdname new_date #' @export vec_cast.POSIXlt #' @method vec_cast POSIXlt #' @export vec_cast.POSIXlt <- function(x, to, ...) { UseMethod("vec_cast.POSIXlt") } #' @export #' @method vec_cast.POSIXlt Date vec_cast.POSIXlt.Date <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXlt.Date") } #' @export #' @method vec_cast.POSIXlt POSIXlt vec_cast.POSIXlt.POSIXlt <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXlt.POSIXlt") } #' @export #' @method vec_cast.POSIXlt POSIXct vec_cast.POSIXlt.POSIXct <- function(x, to, ...) { stop_native_implementation("vec_cast.POSIXlt.POSIXct") } #' @rdname new_date #' @export vec_cast.difftime #' @method vec_cast difftime #' @export vec_cast.difftime <- function(x, to, ...) { UseMethod("vec_cast.difftime") } #' @export #' @method vec_cast.difftime difftime vec_cast.difftime.difftime <- function(x, to, ...) { if (identical(units(x), units(to))) { if (typeof(x) == "integer") { # Catch corrupt difftime objects (#1602) storage.mode(x) <- "double" } x } else { # Hack: I can't see any obvious way of changing the units origin <- as.POSIXct(0, origin = "1970-01-01") difftime(origin, origin - x, units = units(to)) } } # Arithmetic -------------------------------------------------------------- #' @rdname new_date #' @export vec_arith.Date #' @method vec_arith Date #' @export vec_arith.Date <- function(op, x, y, ...) UseMethod("vec_arith.Date", y) #' @rdname new_date #' @export vec_arith.POSIXct #' @method vec_arith POSIXct #' @export vec_arith.POSIXct <- function(op, x, y, ...) UseMethod("vec_arith.POSIXct", y) #' @rdname new_date #' @export vec_arith.POSIXlt #' @method vec_arith POSIXlt #' @export vec_arith.POSIXlt <- function(op, x, y, ...) UseMethod("vec_arith.POSIXlt", y) #' @rdname new_date #' @export vec_arith.difftime #' @method vec_arith difftime #' @export vec_arith.difftime <- function(op, x, y, ...) UseMethod("vec_arith.difftime", y) #' @method vec_arith.Date default #' @export vec_arith.Date.default <- function(op, x, y, ...) stop_incompatible_op(op, x, y) #' @method vec_arith.POSIXct default #' @export vec_arith.POSIXct.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } #' @method vec_arith.POSIXlt default #' @export vec_arith.POSIXlt.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } #' @method vec_arith.difftime default #' @export vec_arith.difftime.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } #' @method vec_arith.Date Date #' @export vec_arith.Date.Date <- function(op, x, y, ...) { switch( op, `-` = difftime(x, y, units = "days"), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXct POSIXct #' @export vec_arith.POSIXct.POSIXct <- function(op, x, y, ...) { switch( op, `-` = difftime(x, y, units = "secs"), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXlt POSIXlt #' @export vec_arith.POSIXlt.POSIXlt <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXct Date #' @export vec_arith.POSIXct.Date <- vec_arith.POSIXct.POSIXct #' @method vec_arith.Date POSIXct #' @export vec_arith.Date.POSIXct <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXlt Date #' @export vec_arith.POSIXlt.Date <- vec_arith.POSIXct.POSIXct #' @method vec_arith.Date POSIXlt #' @export vec_arith.Date.POSIXlt <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXlt POSIXct #' @export vec_arith.POSIXlt.POSIXct <- vec_arith.POSIXct.POSIXct #' @method vec_arith.POSIXct POSIXlt #' @export vec_arith.POSIXct.POSIXlt <- vec_arith.POSIXct.POSIXct #' @method vec_arith.Date numeric #' @export vec_arith.Date.numeric <- function(op, x, y, ...) { switch( op, `+` = vec_restore(vec_arith_base(op, x, y), x), `-` = vec_restore(vec_arith_base(op, x, y), x), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.numeric Date #' @export vec_arith.numeric.Date <- function(op, x, y, ...) { switch( op, `+` = vec_restore(vec_arith_base(op, x, y), y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXct numeric #' @export vec_arith.POSIXct.numeric <- vec_arith.Date.numeric #' @method vec_arith.numeric POSIXct #' @export vec_arith.numeric.POSIXct <- vec_arith.numeric.Date #' @method vec_arith.POSIXlt numeric #' @export vec_arith.POSIXlt.numeric <- function(op, x, y, ...) { vec_arith.POSIXct.numeric(op, as.POSIXct(x), y, ...) } #' @method vec_arith.numeric POSIXlt #' @export vec_arith.numeric.POSIXlt <- function(op, x, y, ...) { vec_arith.numeric.POSIXct(op, x, as.POSIXct(y), ...) } #' @method vec_arith.Date difftime #' @export vec_arith.Date.difftime <- function(op, x, y, ...) { y <- vec_cast(y, new_duration(units = "days")) switch( op, `+` = , `-` = vec_restore(vec_arith_base(op, x, lossy_floor(y, x)), x), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime Date #' @export vec_arith.difftime.Date <- function(op, x, y, ...) { x <- vec_cast(x, new_duration(units = "days")) switch( op, `+` = vec_restore(vec_arith_base(op, lossy_floor(x, y), y), y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXct difftime #' @export vec_arith.POSIXct.difftime <- function(op, x, y, ...) { y <- vec_cast(y, new_duration(units = "secs")) switch( op, `+` = vec_restore(vec_arith_base(op, x, y), x), `-` = vec_restore(vec_arith_base(op, x, y), x), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime POSIXct #' @export vec_arith.difftime.POSIXct <- function(op, x, y, ...) { x <- vec_cast(x, new_duration(units = "secs")) switch( op, `+` = vec_restore(vec_arith_base(op, x, y), y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.POSIXlt difftime #' @export vec_arith.POSIXlt.difftime <- function(op, x, y, ...) { vec_arith.POSIXct.difftime(op, as.POSIXct(x), y, ...) } #' @method vec_arith.difftime POSIXlt #' @export vec_arith.difftime.POSIXlt <- function(op, x, y, ...) { vec_arith.difftime.POSIXct(op, x, as.POSIXct(y), ...) } #' @method vec_arith.difftime difftime #' @export vec_arith.difftime.difftime <- function(op, x, y, ...) { # Ensure x and y have same units args <- vec_cast_common(x, y) x <- args[[1L]] y <- args[[2L]] switch( op, `+` = vec_restore(vec_arith_base(op, x, y), x), `-` = vec_restore(vec_arith_base(op, x, y), x), `/` = vec_arith_base(op, x, y), `%/%` = vec_arith_base(op, x, y), `%%` = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime MISSING #' @export vec_arith.difftime.MISSING <- function(op, x, y, ...) { switch( op, `-` = vec_restore(-vec_data(x), x), `+` = x, stop_incompatible_op(op, x, y) ) } #' @method vec_arith.difftime numeric #' @export vec_arith.difftime.numeric <- function(op, x, y, ...) { vec_restore(vec_arith_base(op, x, y), x) } #' @method vec_arith.numeric difftime #' @export vec_arith.numeric.difftime <- function(op, x, y, ...) { switch( op, `/` = stop_incompatible_op(op, x, y), vec_restore(vec_arith_base(op, x, y), y) ) } # Helpers ----------------------------------------------------------------- # The tz attribute for POSIXlt can have 3 components # (time zone name, abbreviated name, abbreviated DST name) tzone <- function(x) { attr(x, "tzone")[[1]] %||% "" } tzone_is_local <- function(x) { identical(tzone(x), "") } tzone_union <- function(x, y) { if (tzone_is_local(x)) { tzone(y) } else { tzone(x) } } units_union <- function(x, y) { if (identical(units(x), units(y))) { units(x) } else { "secs" } } date_validate <- function(x) { .Call(vctrs_date_validate, x) } datetime_validate <- function(x) { .Call(vctrs_datetime_validate, x) } # as.character.Date() calls format() which tries to guess a simplified format. # Supplying a known format is faster and much more memory efficient. date_as_character <- function(x) { format(x, format = "%Y-%m-%d") } # `as.POSIXlt.character()` tries multiple formats. Supplying # a known format is much faster and more memory efficient. chr_date_as_posixlt <- function(x, tzone) { as.POSIXlt.character(x, tz = tzone, format = "%Y-%m-%d") } # `as.POSIXct.default()` for characters goes through `as.POSIXlt.character()` chr_date_as_posixct <- function(x, tzone) { out <- chr_date_as_posixlt(x, tzone) as.POSIXct.POSIXlt(out, tzone) } lossy_floor <- function(x, to, x_arg = "", to_arg = "") { x_floor <- floor(x) lossy <- x != x_floor maybe_lossy_cast(x_floor, x, to, lossy, x_arg = x_arg, to_arg = to_arg) } # Guarantees the presence of a `tzone` attribute # by going through `as.POSIXlt.POSIXct()`. # Useful for testing, since we always try to restore a `tzone`. as_posixlt <- function(x, tz = "") { as.POSIXlt(as.POSIXct(x, tz)) } # Math -------------------------------------------------------------------- #' @export vec_math.Date <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } #' @export vec_math.POSIXct <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } #' @export vec_math.POSIXlt <- function(.fn, .x, ...) { stop_unsupported(.x, .fn) } vctrs/R/recode.R0000644000176200001440000001543115065005761013211 0ustar liggesusers#' Recode and replace values #' #' @description #' #' - `vec_recode_values()` constructs an entirely new vector by recoding the #' values from `x` specified in `from` to the corresponding values in `to`. If #' there are values in `x` not matched by `from`, then they are recoded to the #' `default` value. #' #' - `vec_replace_values()` updates an existing vector by replacing the values #' from `x` specified in `from` with the corresponding values in `to`. In this #' case, `to` must have the same type as `x` and values in `x` not matched by #' `from` pass through untouched. #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' #' @param x A vector. #' #' @param from Values to locate in `x` and map to values in `to`. #' #' Extra values present in `from` but not in `x` are ignored. #' #' - If `from_as_list_of_vectors = FALSE`, `from` must be a single vector of #' any size, which will be [cast][vctrs::theory-faq-coercion] to the type of #' `x`. #' #' - If `from_as_list_of_vectors = TRUE`, `from` must be a list of vectors of #' any size, which will individually be [cast][vctrs::theory-faq-coercion] #' to the type of `x`. #' #' @param to Values to map `from` to. #' #' The common type of `to` and `default` will determine the type of the #' output, unless `ptype` is provided. #' #' - If `to_as_list_of_vectors = FALSE`, `to` must be a single vector of size #' 1 or the same size as `from`. #' #' - If `to_as_list_of_vectors = TRUE`, `to` must be a list of vectors. The #' list itself must be size 1 or the same size as `from`. Each individual #' vector in the list must be size 1 or the same size as `x`. #' #' @param default Default value to use when there is a value present in `x` #' that is unmatched by a value in `from`. #' #' By default, a missing value is used as the default value. #' #' If supplied, `default` must be size 1 or the same size as `x`. #' #' Can only be set when `unmatched = "default"`. #' #' @param unmatched Handling of unmatched locations. #' #' One of: #' #' - `"default"` to use `default` in unmatched locations. #' #' - `"error"` to error when there are unmatched locations. #' #' @param from_as_list_of_vectors,to_as_list_of_vectors Boolean values #' determining whether to treat `from` and `to` as vectors or as lists of #' vectors. See their parameter descriptions for more details. #' #' @param x_arg,from_arg,to_arg,default_arg Argument names used in error #' messages. #' #' @param ptype An optional override for the output type, which is usually #' computed as the common type of `to` and `default`. #' #' @returns #' A vector the same size as `x`. #' #' - For `vec_recode_values()`, the type of the output is computed as the common #' type of `to` and `default`, unless overridden by `ptype`. The names of the #' output come from the names of `to` and `default`. #' #' - For `vec_replace_values()`, the type of the output will have the same type #' as `x`. The names of the output will be the same as the names of `x`. #' #' @name vec-recode-and-replace #' #' @examples #' x <- c(1, 2, 3, 1, 2, 4, NA, 5) #' #' # Imagine you have a pre-existing lookup table #' likert <- data.frame( #' from = c(1, 2, 3, 4, 5), #' to = c( #' "Strongly disagree", #' "Disagree", #' "Neutral", #' "Agree", #' "Strongly agree" #' ) #' ) #' vec_recode_values(x, from = likert$from, to = likert$to) #' #' # If you don't map all of the values, a `default` is used #' x <- c(1, 2, 3, 1, 2, 4, NA, 5, 6, 7) #' vec_recode_values(x, from = likert$from, to = likert$to) #' vec_recode_values(x, from = likert$from, to = likert$to, default = "Unknown") #' #' # If you want existing `NA`s to pass through, include a mapping for `NA` in #' # your lookup table #' likert <- data.frame( #' from = c(1, 2, 3, 4, 5, NA), #' to = c( #' "Strongly disagree", #' "Disagree", #' "Neutral", #' "Agree", #' "Strongly agree", #' NA #' ) #' ) #' vec_recode_values(x, from = likert$from, to = likert$to, default = "Unknown") #' #' # If you believe you've captured all of the cases, you can assert this with #' # `unmatched = "error"`, which will error if you've missed any cases #' # (including `NA`, which must be explicitly handled) #' try(vec_recode_values( #' x, #' from = likert$from, #' to = likert$to, #' unmatched = "error" #' )) #' #' if (require("tibble")) { #' # If you want to partially update `x`, retaining the type of `x` and #' # leaving values not covered by `from` alone, use `vec_replace_values()` #' universities <- c( #' "Duke", #' "Fake U", #' "Duke U", #' NA, #' "Chapel Hill", #' "UNC", #' NA, #' "Duke" #' ) #' #' standardize <- tibble::tribble( #' ~from, ~to, #' "Duke", "Duke University", #' "Duke U", "Duke University", #' "UNC", "UNC Chapel Hill", #' "Chapel Hill", "UNC Chapel Hill", #' ) #' vec_replace_values( #' universities, #' from = standardize$from, #' to = standardize$to #' ) #' #' # In this case, you can use a more powerful feature of #' # `vec_replace_values()`, `from_as_list_of_vectors`, which allows you to #' # provide a list of `from` vectors that each match multiple `from` values #' # to a single `to` value. `tribble()` can help you create these! #' standardize <- tibble::tribble( #' ~from, ~to, #' c("Duke", "Duke U"), "Duke University", #' c("UNC", "Chapel Hill"), "UNC Chapel Hill", #' ) #' #' # Note how `from` is a list column #' standardize #' #' vec_replace_values( #' universities, #' from = standardize$from, #' to = standardize$to, #' from_as_list_of_vectors = TRUE #' ) #' #' # `vec_replace_values()` is also a useful way to map from or to `NA` #' vec_replace_values(universities, from = NA, to = "Unknown") #' vec_replace_values(universities, from = "Fake U", to = NA) #' } NULL #' @rdname vec-recode-and-replace #' @export vec_recode_values <- function( x, ..., from, to, default = NULL, unmatched = "default", from_as_list_of_vectors = FALSE, to_as_list_of_vectors = FALSE, ptype = NULL, x_arg = "x", from_arg = "from", to_arg = "to", default_arg = "default", error_call = current_env() ) { check_dots_empty0(...) .Call( ffi_vec_recode_values, x, from, to, default, unmatched, from_as_list_of_vectors, to_as_list_of_vectors, ptype, environment() ) } #' @rdname vec-recode-and-replace #' @export vec_replace_values <- function( x, ..., from, to, from_as_list_of_vectors = FALSE, to_as_list_of_vectors = FALSE, x_arg = "x", from_arg = "from", to_arg = "to", error_call = current_env() ) { check_dots_empty0(...) .Call( ffi_vec_replace_values, x, from, to, from_as_list_of_vectors, to_as_list_of_vectors, environment() ) } vctrs/R/parallel.R0000644000176200001440000000541215113325071013533 0ustar liggesusers#' Parallel `any()` and `all()` #' #' @description #' These functions are variants of [any()] and [all()] that work in parallel on #' multiple inputs at once. They work similarly to how [pmin()] and [pmax()] are #' parallel variants of [min()] and [max()]. #' #' @inheritParams rlang::args_error_context #' #' @param ... Logical vectors of equal size. #' #' @param .missing Value to use when a missing value is encountered. One of: #' #' - `NA` to propagate missing values. With this, missings are treated the #' same way as `|` or `&`. #' #' - `FALSE` to treat missing values as `FALSE`. #' #' - `TRUE` to treat missing values as `TRUE`. #' #' @param .size An optional output size. Only useful to specify if it is possible #' for no inputs to be provided. #' #' @param .arg Argument name used in error messages. #' #' @returns A logical vector the same size as the vectors in `...`. #' #' @details #' `vec_pany()` and `vec_pall()` are consistent with [any()] and [all()] when #' there are no inputs to process in parallel: #' #' - `any()` returns `FALSE` with no inputs. Similarly, `vec_pany(.size = 1)` #' returns `FALSE`. #' #' - `all()` returns `TRUE` with no inputs. Similarly, `vec_pall(.size = 1)` #' returns `TRUE`. #' #' @name parallel-operators #' #' @examples #' a <- c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, NA, NA, NA) #' b <- c(TRUE, FALSE, NA, TRUE, FALSE, NA, TRUE, FALSE, NA) #' #' # Default behavior treats missings like `|` does #' vec_pany(a, b) #' a | b #' #' # Default behavior treats missings like `&` does #' vec_pall(a, b) #' a & b #' #' # Remove missings from the computation, like `na_rm = TRUE` #' vec_pany(a, b, .missing = FALSE) #' (a & !is.na(a)) | (b & !is.na(b)) #' #' vec_pall(a, b, .missing = TRUE) #' (a | is.na(a)) & (b | is.na(b)) #' #' # `vec_pall()` can be used to implement a `dplyr::filter()` style API #' df <- data_frame(id = seq_along(a), a = a, b = b) #' #' keep_rows <- function(x, ...) { #' vec_slice(x, vec_pall(..., .missing = FALSE)) #' } #' drop_rows <- function(x, ...) { #' vec_slice(x, !vec_pall(..., .missing = FALSE)) #' } #' #' # "Keep / Drop the rows when both a and b are TRUE" #' # These form complements of one another, even with `NA`s. #' keep_rows(df, a, b) #' drop_rows(df, a, b) #' #' # Same empty behavior as `any()` and `all()` #' vec_pany(.size = 1) #' any() #' #' vec_pall(.size = 1) #' all() NULL #' @rdname parallel-operators #' @export vec_pany <- function( ..., .missing = NA, .size = NULL, .arg = "", .error_call = current_env() ) { .Call(ffi_vec_pany, list2(...), .missing, .size, environment()) } #' @rdname parallel-operators #' @export vec_pall <- function( ..., .missing = NA, .size = NULL, .arg = "", .error_call = current_env() ) { .Call(ffi_vec_pall, list2(...), .missing, .size, environment()) } vctrs/R/runs.R0000644000176200001440000000370114713505651012736 0ustar liggesusers#' Runs #' #' @description #' - `vec_identify_runs()` returns a vector of identifiers for the elements of #' `x` that indicate which run of repeated values they fall in. The number of #' runs is also returned as an attribute, `n`. #' #' - `vec_run_sizes()` returns an integer vector corresponding to the size of #' each run. This is identical to the `times` column from `vec_unrep()`, but #' is faster if you don't need the run keys. #' #' - [vec_unrep()] is a generalized [base::rle()]. It is documented alongside #' the "repeat" functions of [vec_rep()] and [vec_rep_each()]; look there for #' more information. #' #' @details #' Unlike [base::rle()], adjacent missing values are considered identical when #' constructing runs. For example, `vec_identify_runs(c(NA, NA))` will return #' `c(1, 1)`, not `c(1, 2)`. #' #' @param x A vector. #' #' @return #' - For `vec_identify_runs()`, an integer vector with the same size as `x`. A #' scalar integer attribute, `n`, is attached. #' #' - For `vec_run_sizes()`, an integer vector with size equal to the number of #' runs in `x`. #' #' @seealso #' [vec_unrep()] for a generalized [base::rle()]. #' #' @name runs #' @examples #' x <- c("a", "z", "z", "c", "a", "a") #' #' vec_identify_runs(x) #' vec_run_sizes(x) #' vec_unrep(x) #' #' y <- c(1, 1, 1, 2, 2, 3) #' #' # With multiple columns, the runs are constructed rowwise #' df <- data_frame( #' x = x, #' y = y #' ) #' #' vec_identify_runs(df) #' vec_run_sizes(df) #' vec_unrep(df) NULL #' @rdname runs #' @export vec_identify_runs <- function(x) { .Call(ffi_vec_identify_runs, x, environment()) } #' @rdname runs #' @export vec_run_sizes <- function(x) { .Call(ffi_vec_run_sizes, x, environment()) } vec_locate_run_bounds <- function(x, which = c("start", "end")) { .Call(ffi_vec_locate_run_bounds, x, which, environment()) } vec_detect_run_bounds <- function(x, which = c("start", "end")) { .Call(ffi_vec_detect_run_bounds, x, which, environment()) } vctrs/R/assert.R0000644000176200001440000003334215120600304013234 0ustar liggesusers#' Assert an argument has known prototype and/or size #' #' @description #' `r lifecycle::badge("questioning")` #' #' * `vec_is()` is a predicate that checks if its input is a vector that #' conforms to a prototype and/or a size. #' #' * `vec_assert()` throws an error when the input is not a vector or #' doesn't conform. #' #' @inheritSection vector-checks Vectors and scalars #' #' @section Error types: #' #' `vec_is()` never throws. #' `vec_assert()` throws the following errors: #' #' * If the input is not a vector, an error of class #' `"vctrs_error_scalar_type"` is raised. #' #' * If the prototype doesn't match, an error of class #' `"vctrs_error_assert_ptype"` is raised. #' #' * If the size doesn't match, an error of class #' `"vctrs_error_assert_size"` is raised. #' #' Both errors inherit from `"vctrs_error_assert"`. #' #' @section Lifecycle: #' #' Both `vec_is()` and `vec_assert()` are questioning because their `ptype` #' arguments have semantics that are challenging to define clearly and are #' rarely useful. #' #' - Use [obj_is_vector()] or [obj_check_vector()] for vector checks #' #' - Use [vec_check_size()] for size checks #' #' - Use [vec_cast()], [inherits()], or simple type predicates like #' [rlang::is_logical()] for specific type checks #' #' @inheritParams rlang::args_error_context #' #' @param x A vector argument to check. #' @param ptype Prototype to compare against. If the prototype has a #' class, its [vec_ptype()] is compared to that of `x` with #' `identical()`. Otherwise, its [typeof()] is compared to that of #' `x` with `==`. #' @param size A single integer size against which to compare. #' @param arg Name of argument being checked. This is used in error #' messages. The label of the expression passed as `x` is taken as #' default. #' #' @return `vec_is()` returns `TRUE` or `FALSE`. `vec_assert()` either #' throws a typed error (see section on error types) or returns `x`, #' invisibly. #' @keywords internal #' @export vec_assert <- function( x, ptype = NULL, size = NULL, arg = caller_arg(x), call = caller_env() ) { if (!obj_is_vector(x)) { stop_scalar_type(x, arg, call = call) } if (!is_null(ptype)) { ptype <- vec_ptype(ptype) x_type <- vec_ptype_finalise(vec_ptype(x)) if (!is_same_type(x_type, ptype)) { msg <- vec_assert_type_explain(x_type, ptype, arg) abort( msg, class = c("vctrs_error_assert_ptype", "vctrs_error_assert"), required = ptype, actual = x_type, call = call ) } } if (!is_null(size)) { size <- vec_cast(size, integer(), x_arg = "size") n_size <- length(size) if (n_size != 1L) { abort(glue::glue("`size` must be length 1, not length {n_size}.")) } x_size <- vec_size(x) if (!identical(x_size, size)) { stop_assert_size( x_size, size, arg, call = call ) } } invisible(x) } # Also thrown from C stop_assert_size <- function(actual, required, arg, call = caller_env()) { arg <- arg_backtick(arg) message <- glue::glue("{arg} must have size {required}, not size {actual}.") stop_assert( message, class = "vctrs_error_assert_size", actual = actual, required = required, call = call ) } stop_assert <- function( message = NULL, class = NULL, ..., call = caller_env() ) { stop_vctrs( message, class = c(class, "vctrs_error_assert"), ..., call = call ) } #' @rdname vec_assert #' @export vec_is <- function(x, ptype = NULL, size = NULL) { if (!obj_is_vector(x)) { return(FALSE) } if (!is_null(ptype)) { ptype <- vec_ptype(ptype) x_type <- vec_ptype_finalise(vec_ptype(x)) if (!is_same_type(x_type, ptype)) { return(FALSE) } } if (!is_null(size)) { size <- vec_recycle(vec_cast(size, integer()), 1L) x_size <- vec_size(x) if (!identical(x_size, size)) { return(FALSE) } } TRUE } #' Vector checks #' #' @description #' #' - `obj_is_vector()` tests if `x` is considered a vector in the vctrs sense. #' See _Vectors and scalars_ below for the exact details. #' #' - `obj_check_vector()` uses `obj_is_vector()` and throws a standardized and #' informative error if it returns `FALSE`. #' #' - `vec_check_size()` tests if `x` has size `size`, and throws an informative #' error if it doesn't. #' #' - `vec_check_recyclable()` tests if `x` can recycle to size `size`, and #' throws an informative error if it can't. #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' #' @param x For `obj_*()` functions, an object. For `vec_*()` functions, a #' vector. #' #' @param size The size to check for compatibility with. #' #' @returns #' - `obj_is_vector()` returns a single `TRUE` or `FALSE`. #' #' - `obj_check_vector()` returns `NULL` invisibly, or errors. #' #' - `vec_check_size()` returns `NULL` invisibly, or errors. #' #' - `vec_check_recyclable()` returns `NULL` invisibly, or errors. #' #' @section Vectors and scalars: #' #' Informally, a vector is a collection that makes sense to use as column in a #' data frame. The following rules define whether or not `x` is considered a #' vector. #' #' If no [vec_proxy()] method has been registered, `x` is a vector if: #' #' - The [base type][typeof] of the object is atomic: `"logical"`, `"integer"`, #' `"double"`, `"complex"`, `"character"`, or `"raw"`. #' #' - `x` is a list, as defined by [obj_is_list()]. #' #' - `x` is a [data.frame]. #' #' If a `vec_proxy()` method has been registered, `x` is a vector if: #' #' - The proxy satisfies one of the above conditions. #' #' - The base type of the proxy is `"list"`, regardless of its class. S3 lists #' are thus treated as scalars unless they implement a `vec_proxy()` method. #' #' Otherwise an object is treated as scalar and cannot be used as a vector. #' In particular: #' #' - `NULL` is not a vector. #' #' - S3 lists like `lm` objects are treated as scalars by default. #' #' - Objects of type [expression] are not treated as vectors. #' #' @section Technical limitations: #' #' - Support for S4 vectors is currently limited to objects that inherit from an #' atomic type. #' #' - Subclasses of [data.frame] that *append* their class to the back of the #' `"class"` attribute are not treated as vectors. If you inherit from an S3 #' class, always prepend your class to the front of the `"class"` attribute #' for correct dispatch. This matches our general principle of allowing #' subclasses but not mixins. #' #' @name vector-checks #' @examples #' obj_is_vector(1) #' #' # Data frames are vectors #' obj_is_vector(data_frame()) #' #' # Bare lists are vectors #' obj_is_vector(list()) #' #' # S3 lists are vectors if they explicitly inherit from `"list"` #' x <- structure(list(), class = c("my_list", "list")) #' obj_is_list(x) #' obj_is_vector(x) #' #' # But if they don't explicitly inherit from `"list"`, they aren't #' # automatically considered to be vectors. Instead, vctrs considers this #' # to be a scalar object, like a linear model returned from `lm()`. #' y <- structure(list(), class = "my_list") #' obj_is_list(y) #' obj_is_vector(y) #' #' # `obj_check_vector()` throws an informative error if the input #' # isn't a vector #' try(obj_check_vector(y)) #' #' # `vec_check_size()` throws an informative error if the size of the #' # input doesn't match `size` #' vec_check_size(1:5, size = 5) #' try(vec_check_size(1:5, size = 4)) #' #' # `vec_check_recyclable()` throws an informative error if the input can't #' # recycle to size `size` #' vec_check_recyclable(1:5, size = 5) #' vec_check_recyclable(1, size = 5) #' try(vec_check_recyclable(1:2, size = 5)) NULL #' @export #' @rdname vector-checks obj_is_vector <- function(x) { .Call(ffi_obj_is_vector, x) } #' @export #' @rdname vector-checks obj_check_vector <- function(x, ..., arg = caller_arg(x), call = caller_env()) { check_dots_empty0(...) invisible(.Call(ffi_obj_check_vector, x, environment())) } #' @export #' @rdname vector-checks vec_check_size <- function( x, size, ..., arg = caller_arg(x), call = caller_env() ) { check_dots_empty0(...) invisible(.Call(ffi_vec_check_size, x, size, environment())) } #' @export #' @rdname vector-checks vec_check_recyclable <- function( x, size, ..., arg = caller_arg(x), call = caller_env() ) { check_dots_empty0(...) invisible(.Call(ffi_vec_check_recyclable, x, size, environment())) } #' List checks #' #' @description #' - `obj_is_list()` tests if `x` is considered a list in the vctrs sense. It #' returns `TRUE` if all of the following hold: #' - `x` must have list storage, i.e. `typeof(x)` returns `"list"` #' - `x` must not have a `dim` attribute #' - `x` must not have a `class` attribute, or must explicitly inherit from #' `"list"` as the last class #' #' - `list_all_vectors()` takes a list and returns `TRUE` if all elements of #' that list are vectors. #' #' - `list_all_size()` takes a list and returns `TRUE` if all elements of that #' list have the same `size`. #' #' - `list_all_recyclable()` takes a list and returns `TRUE` if all elements of #' that list can recycle to `size`. #' #' - `obj_check_list()`, `list_check_all_vectors()`, `list_check_all_size()`, #' and `list_check_all_recyclable()` use the above functions, but throw a #' standardized and informative error if they return `FALSE`. #' #' @inheritParams rlang::args_error_context #' @inheritParams rlang::args_dots_empty #' #' @param x For `vec_*()` functions, an object. For `list_*()` functions, a #' list. #' #' @param size The size to check each element for compatibility with. #' #' @param allow_null Whether `NULL` elements should be skipped over #' automatically or not. #' #' @details #' Notably, data frames and S3 record style classes like POSIXlt are not #' considered lists. #' #' @seealso [list_sizes()] #' @export #' @examples #' obj_is_list(list()) #' obj_is_list(list_of(1)) #' obj_is_list(data.frame()) #' #' list_all_vectors(list(1, mtcars)) #' list_all_vectors(list(1, environment())) #' #' list_all_size(list(1:2, 2:3), 2) #' list_all_size(list(1:2, 2:4), 2) #' #' list_all_recyclable(list(1, 2:3), 2) #' list_all_recyclable(list(1, 2:4), 2) #' #' # `list_`-prefixed functions assume a list: #' try(list_all_vectors(environment())) #' #' # `NULL` elements are not considered vectors and generally have a size of 0 #' try(list_check_all_vectors(list(1, NULL, 2))) #' try(list_check_all_size(list(1, NULL, 2), size = 1)) #' #' # However, it is often useful to perform upfront vector/size checks on a #' # list, excluding `NULL`s, and then filter them out later on #' list_check_all_vectors(list(1, NULL, 2), allow_null = TRUE) #' list_check_all_size(list(1, NULL, 2), size = 1, allow_null = TRUE) #' #' # Performing the checks before removing `NULL`s from the list ensures that #' # any errors report the correct index. Note how the index is incorrect from a #' # user's point of view if we filter out `NULL` too soon. #' xs <- list(1, NULL, 2:3) #' try(list_check_all_size(xs, size = 1, allow_null = TRUE)) #' xs <- vec_slice(xs, !vec_detect_missing(xs)) #' try(list_check_all_size(xs, size = 1)) obj_is_list <- function(x) { .Call(ffi_obj_is_list, x) } #' @rdname obj_is_list #' @export obj_check_list <- function(x, ..., arg = caller_arg(x), call = caller_env()) { check_dots_empty0(...) invisible(.Call(ffi_check_list, x, environment())) } #' @rdname obj_is_list #' @export list_all_vectors <- function(x, ..., allow_null = FALSE) { check_dots_empty0(...) .Call(ffi_list_all_vectors, x, allow_null, environment()) } #' @rdname obj_is_list #' @export list_check_all_vectors <- function( x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { check_dots_empty0(...) invisible(.Call(ffi_list_check_all_vectors, x, allow_null, environment())) } #' @rdname obj_is_list #' @export list_all_size <- function(x, size, ..., allow_null = FALSE) { check_dots_empty0(...) .Call(ffi_list_all_size, x, size, allow_null, environment()) } #' @rdname obj_is_list #' @export list_check_all_size <- function( x, size, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { check_dots_empty0(...) invisible(.Call( ffi_list_check_all_size, x, size, allow_null, environment() )) } #' @rdname obj_is_list #' @export list_all_recyclable <- function(x, size, ..., allow_null = FALSE) { check_dots_empty0(...) .Call(ffi_list_all_recyclable, x, size, allow_null, environment()) } #' @rdname obj_is_list #' @export list_check_all_recyclable <- function( x, size, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) { check_dots_empty0(...) invisible(.Call( ffi_list_check_all_recyclable, x, size, allow_null, environment() )) } # Called from C stop_non_list_type <- function(x, arg, call) { arg <- arg_backtick(arg) cli::cli_abort( "{arg} must be a list, not {obj_type_friendly(x)}.", call = call ) } is_same_type <- function(x, ptype) { x <- vec_slice(x, integer()) ptype <- vec_slice(ptype, integer()) # FIXME: Remove row names for matrices and arrays, and handle empty # but existing dimnames x <- vec_set_names(x, NULL) ptype <- vec_set_names(ptype, NULL) identical(x, ptype) } vec_assert_type_explain <- function(x, type, arg) { arg <- str_backtick(arg) x <- paste0("<", vec_ptype_full(x), ">") type <- paste0("<", vec_ptype_full(type), ">") intro <- paste0(arg, " must be a vector with type") intro <- layout_type(intro, type) outro <- paste0("Instead, it has type") outro <- layout_type(outro, x) paste_line( !!!intro, if (str_is_multiline(intro)) "", !!!outro ) } layout_type <- function(start, type) { if (str_is_multiline(type)) { paste_line( paste0(start, ":"), "", paste0(" ", indent(type, 2)) ) } else { paste0(start, " ", type, ".") } } vctrs/R/type-dplyr.R0000644000176200001440000000604615110231275014053 0ustar liggesusers# All methods in this file are conditionally registered in .onLoad() ### `grouped_df` ----------------------------------------------------- group_intersect <- function(x, new) { intersect(dplyr::group_vars(x), names(new)) } vec_restore_grouped_df <- function(x, to, ...) { vars <- group_intersect(to, x) drop <- dplyr::group_by_drop_default(to) dplyr::grouped_df(x, vars, drop = drop) } # `vec_ptype2()` ----------------------------------------------------- vec_ptype2_grouped_df_grouped_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2_grouped_df_data.frame <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2_data.frame_grouped_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2_grouped_df_tbl_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } vec_ptype2_tbl_df_grouped_df <- function(x, y, ...) { gdf_ptype2(x, y, ...) } gdf_ptype2 <- function(x, y, ...) { common <- df_ptype2(x, y, ...) x_vars <- dplyr::group_vars(x) y_vars <- dplyr::group_vars(y) vars <- union(x_vars, y_vars) drop <- dplyr::group_by_drop_default(x) && dplyr::group_by_drop_default(y) dplyr::grouped_df(common, vars, drop = drop) } # `vec_cast()` ------------------------------------------------------- vec_cast_grouped_df_grouped_df <- function(x, to, ...) { gdf_cast(x, to, ...) } vec_cast_grouped_df_data.frame <- function(x, to, ...) { gdf_cast(x, to, ...) } vec_cast_data.frame_grouped_df <- function(x, to, ...) { df_cast(x, to, ...) } vec_cast_grouped_df_tbl_df <- function(x, to, ...) { gdf_cast(x, to, ...) } vec_cast_tbl_df_grouped_df <- function(x, to, ...) { tib_cast(x, to, ...) } gdf_cast <- function(x, to, ...) { df <- df_cast(x, to, ...) vars <- dplyr::group_vars(to) drop <- dplyr::group_by_drop_default(to) dplyr::grouped_df(df, vars, drop = drop) } ### `rowwise` -------------------------------------------------------- vec_restore_rowwise_df <- function(x, to, ...) { dplyr::rowwise(x) } # `vec_ptype2()` ----------------------------------------------------- vec_ptype2_rowwise_df_rowwise_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2_rowwise_df_data.frame <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2_data.frame_rowwise_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2_rowwise_df_tbl_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } vec_ptype2_tbl_df_rowwise_df <- function(x, y, ...) { rww_ptype2(x, y, ...) } rww_ptype2 <- function(x, y, ...) { dplyr::rowwise(df_ptype2(x, y, ...)) } # `vec_cast()` ------------------------------------------------------- vec_cast_rowwise_df_rowwise_df <- function(x, to, ...) { rww_cast(x, to, ...) } vec_cast_rowwise_df_data.frame <- function(x, to, ...) { rww_cast(x, to, ...) } vec_cast_data.frame_rowwise_df <- function(x, to, ...) { df_cast(x, to, ...) } vec_cast_rowwise_df_tbl_df <- function(x, to, ...) { rww_cast(x, to, ...) } vec_cast_tbl_df_rowwise_df <- function(x, to, ...) { tib_cast(x, to, ...) } rww_cast <- function(x, to, ...) { dplyr::rowwise(df_cast(x, to, ...)) } vctrs/R/equal.R0000644000176200001440000000476515157004241013061 0ustar liggesusers#' Equality proxy #' #' Returns a proxy object (i.e. an atomic vector or data frame of atomic #' vectors). For [vctr]s, this determines the behaviour of `==` and #' `!=` (via [vec_equal()]); [unique()], [duplicated()] (via #' [vec_unique()] and [vec_duplicate_detect()]); [is.na()] and [anyNA()] #' (via [vec_detect_missing()]). #' #' The default method calls [vec_proxy()], as the default underlying #' vector data should be equal-able in most cases. If your class is #' not equal-able, provide a `vec_proxy_equal()` method that throws an #' error. #' #' @section Data frames: #' If the proxy for `x` is a data frame, the proxy function is automatically #' recursively applied on all columns as well. After applying the proxy #' recursively, if there are any data frame columns present in the proxy, then #' they are unpacked. Finally, if the resulting data frame only has a single #' column, then it is unwrapped and a vector is returned as the proxy. #' #' @param x A vector x. #' @inheritParams rlang::args_dots_empty #' #' @return A 1d atomic vector or a data frame. #' @keywords internal #' #' @section Dependencies: #' - [vec_proxy()] called by default #' #' @export vec_proxy_equal <- function(x, ...) { check_dots_empty0(...) return(.Call(vctrs_proxy_equal, x)) UseMethod("vec_proxy_equal") } #' @export vec_proxy_equal.default <- function(x, ...) { stop_native_implementation("vec_proxy_equal.default") } #' Equality #' #' @description #' `vec_equal()` tests if two vectors are equal. #' #' @details #' Attributes of `x` and `y` are considered equal if they have the same names #' and values, even if the attribute ordering is different. This reflects the #' idea that attributes are treated as a map rather than an ordered list. #' #' @inheritParams vec_compare #' @return A logical vector the same size as the common size of `x` and `y`. #' Will only contain `NA`s if `na_equal` is `FALSE`. #' #' @section Dependencies: #' - [vec_cast_common()] with fallback #' - [vec_recycle_common()] #' - [vec_proxy_equal()] #' #' @seealso [vec_detect_missing()] #' #' @export #' @examples #' vec_equal(c(TRUE, FALSE, NA), FALSE) #' vec_equal(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) #' #' vec_equal(5, 1:10) #' vec_equal("d", letters[1:10]) #' #' df <- data.frame(x = c(1, 1, 2, 1), y = c(1, 2, 1, NA)) #' vec_equal(df, data.frame(x = 1, y = 2)) vec_equal <- function(x, y, na_equal = FALSE, .ptype = NULL) { .Call(ffi_vec_equal, x, y, na_equal, .ptype, environment()) } obj_equal <- function(x, y) { .Call(ffi_obj_equal, x, y) } vctrs/R/type-asis.R0000644000176200001440000000516314713505651013671 0ustar liggesusers#' AsIs S3 class #' #' These functions help the base AsIs class fit into the vctrs type system #' by providing coercion and casting functions. #' #' @keywords internal #' @name as-is NULL # ------------------------------------------------------------------------------ # Printing #' @export vec_ptype_full.AsIs <- function(x, ...) { x <- asis_strip(x) paste0("I<", vec_ptype_full(x), ">") } #' @export vec_ptype_abbr.AsIs <- function(x, ...) { x <- asis_strip(x) paste0("I<", vec_ptype_abbr(x), ">") } # ------------------------------------------------------------------------------ # Proxy / restore # Arises with base df ctor: `data.frame(x = I(list(1, 2:3)))` #' @export vec_proxy.AsIs <- function(x, ...) { x <- asis_strip(x) vec_proxy(x) } #' @export vec_restore.AsIs <- function(x, to, ...) { asis_restore(x) } #' @export vec_proxy_equal.AsIs <- function(x, ...) { x <- asis_strip(x) vec_proxy_equal(x) } #' @export vec_proxy_compare.AsIs <- function(x, ...) { x <- asis_strip(x) vec_proxy_compare(x) } #' @export vec_proxy_order.AsIs <- function(x, ...) { x <- asis_strip(x) vec_proxy_order(x) } # ------------------------------------------------------------------------------ # Coercion #' @rdname as-is #' @export vec_ptype2.AsIs #' @method vec_ptype2 AsIs #' @export vec_ptype2.AsIs <- function(x, y, ..., x_arg = "", y_arg = "") { UseMethod("vec_ptype2.AsIs") } #' @method vec_ptype2.AsIs AsIs #' @export vec_ptype2.AsIs.AsIs <- function(x, y, ..., x_arg = "", y_arg = "") { x <- asis_strip(x) y <- asis_strip(y) vec_ptype2_asis(x, y, ..., x_arg = x_arg, y_arg = y_arg) } vec_ptype2_asis_left <- function(x, y, ...) { x <- asis_strip(x) vec_ptype2_asis(x, y, ...) } vec_ptype2_asis_right <- function(x, y, ...) { y <- asis_strip(y) vec_ptype2_asis(x, y, ...) } vec_ptype2_asis <- function(x, y, ...) { out <- vec_ptype2(x, y, ...) asis_restore(out) } # ------------------------------------------------------------------------------ # Casting vec_cast_from_asis <- function(x, to, ..., call = caller_env()) { x <- asis_strip(x) vec_cast(x, to, ..., call = call) } vec_cast_to_asis <- function(x, to, ..., call = caller_env()) { to <- asis_strip(to) out <- vec_cast(x, to, ..., call = call) asis_restore(out) } # ------------------------------------------------------------------------------ is_asis <- function(x) { inherits(x, "AsIs") } asis_strip <- function(x) { class(x) <- setdiff(class(x), "AsIs") x } asis_restore <- function(x) { # Using `oldClass()` here to return `NULL` for atomics # so that their implicit class isn't added class(x) <- c("AsIs", oldClass(x)) x } vctrs/R/import-standalone-linked-version.R0000644000176200001440000000426515065006131020332 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-linked-version.R # last-updated: 2022-05-26 # license: https://unlicense.org # --- # # nocov start check_linked_version <- local({ # Keep in sync with standalone-downstream-deps.R howto_reinstall_msg <- function(pkg) { os <- tolower(Sys.info()[["sysname"]]) if (os == "windows") { url <- "https://github.com/jennybc/what-they-forgot/issues/62" c( i = sprintf("Please update %s to the latest version.", pkg), i = sprintf("Updating packages on Windows requires precautions:\n <%s>", url) ) } else { c( i = sprintf("Please update %s with `install.packages(\"%s\")` and restart R.", pkg, pkg) ) } } function(pkg, with_rlang = requireNamespace("rlang", quietly = TRUE)) { ver <- utils::packageVersion(pkg) ns <- asNamespace(pkg) linked_ver_ptr <- ns[[paste0(pkg, "_linked_version")]] if (is.null(linked_ver_ptr)) { linked_ver <- "" } else { # Construct call to avoid NOTE when argument to `.Call()` is not # statically analysable linked_ver <- do.call(".Call", list(linked_ver_ptr)) } if (nzchar(linked_ver) && ver == linked_ver) { return(invisible(NULL)) } header <- sprintf("The %s package is not properly installed.", pkg) if (nzchar(linked_ver)) { msg <- c(x = sprintf( "The DLL version (%s) does not correspond to the package version (%s).", linked_ver, ver )) } else { # Package does not have a version pointer. This happens when DLL # updating fails for the first version that includes the pointer. msg <- c(x = "The DLL version does not correspond to the package version.") } msg <- c(msg, howto_reinstall_msg(pkg)) if (with_rlang) { msg <- paste(header, rlang::format_error_bullets(msg), sep = "\n") rlang::abort(msg) } else { msg <- paste(c(header, msg), collapse = "\n") stop(msg, call. = FALSE) } } }) # nocov end vctrs/R/hash.R0000644000176200001440000000042115072256373012671 0ustar liggesusers# These return raw vectors of hashes. Vector elements are coded with # 32 bit hashes. Thus, the size of the raw vector of hashes is 4 times # the size of the input. vec_hash <- function(x) { .Call(ffi_vec_hash, x) } obj_hash <- function(x) { .Call(ffi_obj_hash, x) } vctrs/R/utils-cli.R0000644000176200001440000000214615065005761013654 0ustar liggesusersparens <- function(x, left = TRUE) { x_lines <- strsplit(x, "\n") x_lines <- map(x_lines, paren, left = left) map_chr(x_lines, paste0, collapse = "\n") } paren <- function(x, left = TRUE) { if (length(x) <= 1) { if (left) { paste0("( ", x) } else { paste0(x, " )") } } else { if (left) { paste0(c("\u250c ", rep("\u2502 ", length(x) - 2), "\u2514 "), x) } else { paste0(format(x), c(" \u2510", rep(" \u2502", length(x) - 2), " \u2518")) } } } pad_height <- function(x) { pad <- function(x, n) c(x, rep("", n - length(x))) lines <- strsplit(x, "\n") height <- max(map_int(lines, length)) lines <- map(lines, pad, height) map_chr(lines, paste0, "\n", collapse = "") } pad_width <- function(x) { lines <- strsplit(x, "\n", fixed = TRUE) # fix up strsplit bug n <- map_int(lines, length) lines[n == 0] <- "" width <- max(unlist(map(lines, nchar))) lines <- map(lines, format, width = width) map_chr(lines, paste, collapse = "\n") } str_backtick <- function(x) { paste0("`", x, "`") } str_is_multiline <- function(x) { grepl("\n", x) } vctrs/R/bind.R0000644000176200001440000001730015156000460012651 0ustar liggesusers#' Combine many data frames into one data frame #' #' This pair of functions binds together data frames (and vectors), either #' row-wise or column-wise. Row-binding creates a data frame with common type #' across all arguments. Column-binding creates a data frame with common length #' across all arguments. #' #' @section Invariants: #' #' All inputs are first converted to a data frame. The conversion for #' 1d vectors depends on the direction of binding: #' #' * For `vec_rbind()`, each element of the vector becomes a column in #' a single row. #' * For `vec_cbind()`, each element of the vector becomes a row in a #' single column. #' #' Once the inputs have all become data frames, the following #' invariants are observed for row-binding: #' #' * `vec_size(vec_rbind(x, y)) == vec_size(x) + vec_size(y)` #' * `vec_ptype(vec_rbind(x, y)) = vec_ptype_common(x, y)` #' #' Note that if an input is an empty vector, it is first converted to #' a 1-row data frame with 0 columns. Despite being empty, its #' effective size for the total number of rows is 1. #' #' For column-binding, the following invariants apply: #' #' * `vec_size(vec_cbind(x, y)) == vec_size_common(x, y)` #' * `vec_ptype(vec_cbind(x, y)) == vec_cbind(vec_ptype(x), vec_ptype(x))` #' #' @inheritParams vec_c #' @inheritParams rlang::args_error_context #' #' @param ... Data frames or vectors. #' #' When the inputs are named: #' * `vec_rbind()` assigns names to row names unless `.names_to` is #' supplied. In that case the names are assigned in the column #' defined by `.names_to`. #' * `vec_cbind()` creates packed data frame columns with named #' inputs. #' #' `NULL` inputs are silently ignored. Empty (e.g. zero row) inputs #' will not appear in the output, but will affect the derived `.ptype`. #' @param .names_to This controls what to do with names on `...`: #' #' * By default, names on `...` are [zapped][rlang::zap] and do not appear #' anywhere in the output. #' #' * If a string, specifies a column where the names on `...` will be #' copied. These names are often useful to identify rows with #' their original input. If a column name is supplied and `...` is #' not named, an integer column is used instead. #' #' * If `NULL`, the outer names on `...` are instead merged with inner #' row names on each element of `...` and are subject to `.name_spec`. #' @param .name_repair One of `"unique"`, `"universal"`, `"check_unique"`, #' `"unique_quiet"`, or `"universal_quiet"`. See [vec_as_names()] for the #' meaning of these options. #' #' With `vec_rbind()`, the repair function is applied to all inputs #' separately. This is because `vec_rbind()` needs to align their #' columns before binding the rows, and thus needs all inputs to #' have unique names. On the other hand, `vec_cbind()` applies the #' repair function after all inputs have been concatenated together #' in a final data frame. Hence `vec_cbind()` allows the more #' permissive minimal names repair. #' #' @return A data frame, or subclass of data frame. #' #' If `...` is a mix of different data frame subclasses, `vec_ptype2()` #' will be used to determine the output type. For `vec_rbind()`, this #' will determine the type of the container and the type of each column; #' for `vec_cbind()` it only determines the type of the output container. #' If there are no non-`NULL` inputs, the result will be `data.frame()`. #' #' @section Dependencies: #' #' ## vctrs dependencies #' #' - [vec_cast_common()] #' - [vec_proxy()] #' - [vec_init()] #' - [vec_assign()] #' - [vec_restore()] #' #' #' ## base dependencies of `vec_rbind()` #' #' - [base::c()] #' #' If columns to combine inherit from a common class, #' `vec_rbind()` falls back to `base::c()` if there exists a `c()` #' method implemented for this class hierarchy. #' #' @seealso [vec_c()] for combining 1d vectors. #' @examples #' # row binding ----------------------------------------- #' #' # common columns are coerced to common class #' vec_rbind( #' data.frame(x = 1), #' data.frame(x = FALSE) #' ) #' #' # unique columns are filled with NAs #' vec_rbind( #' data.frame(x = 1), #' data.frame(y = "x") #' ) #' #' # null inputs are ignored #' vec_rbind( #' data.frame(x = 1), #' NULL, #' data.frame(x = 2) #' ) #' #' # bare vectors are treated as rows #' vec_rbind( #' c(x = 1, y = 2), #' c(x = 3) #' ) #' #' # default names will be supplied if arguments are not named #' vec_rbind( #' 1:2, #' 1:3, #' 1:4 #' ) #' #' # column binding -------------------------------------- #' #' # each input is recycled to have common length #' vec_cbind( #' data.frame(x = 1), #' data.frame(y = 1:3) #' ) #' #' # bare vectors are treated as columns #' vec_cbind( #' data.frame(x = 1), #' y = letters[1:3] #' ) #' #' # if you supply a named data frame, it is packed in a single column #' data <- vec_cbind( #' x = data.frame(a = 1, b = 2), #' y = 1 #' ) #' data #' #' # Packed data frames are nested in a single column. This makes it #' # possible to access it through a single name: #' data$x #' #' # since the base print method is suboptimal with packed data #' # frames, it is recommended to use tibble to work with these: #' if (rlang::is_installed("tibble")) { #' vec_cbind(x = tibble::tibble(a = 1, b = 2), y = 1) #' } #' #' # duplicate names are flagged #' vec_cbind(x = 1, x = 2) #' #' @name vec_bind NULL #' @export #' @param .name_spec A name specification (as documented in [vec_c()]) for #' combining the outer names on `...` with the inner row names of each element #' of `...`. An outer name will only ever be provided when `.names_to` is set #' to `NULL`, which causes the outer name to be used as part of the row names #' rather than as a new column, but it can still be useful to hardcode this to #' either [rlang::zap()] to always ignore all names, or `"inner"` to always #' ignore outer names, regardless of `.names_to`. #' @rdname vec_bind vec_rbind <- function( ..., .ptype = NULL, .names_to = rlang::zap(), .name_repair = c( "unique", "universal", "check_unique", "unique_quiet", "universal_quiet" ), .name_spec = NULL, .error_call = current_env() ) { .External2(ffi_rbind, list2(...), .ptype, .names_to, .name_repair, .name_spec) } vec_rbind <- fn_inline_formals(vec_rbind, ".name_repair") #' @export #' @rdname vec_bind #' @param .size If, `NULL`, the default, will determine the number of rows in #' `vec_cbind()` output by using the tidyverse [recycling #' rules][theory-faq-recycling]. #' #' Alternatively, specify the desired number of rows, and any inputs of length #' 1 will be recycled appropriately. vec_cbind <- function( ..., .ptype = NULL, .size = NULL, .name_repair = c( "unique", "universal", "check_unique", "minimal", "unique_quiet", "universal_quiet" ), .error_call = current_env() ) { .External2(ffi_cbind, list2(...), .ptype, .size, .name_repair) } vec_cbind <- fn_inline_formals(vec_cbind, ".name_repair") as_df_row <- function(x, quiet = FALSE) { .Call(ffi_as_df_row, x, quiet, environment()) } as_df_col <- function(x, outer_name) { .Call(ffi_as_df_col, x, outer_name, environment()) } #' Frame prototype #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' This is an experimental generic that returns zero-columns variants #' of a data frame. It is needed for [vec_cbind()], to work around the #' lack of colwise primitives in vctrs. Expect changes. #' #' @param x A data frame. #' @inheritParams rlang::args_dots_empty #' #' @keywords internal #' @export vec_cbind_frame_ptype <- function(x, ...) { UseMethod("vec_cbind_frame_ptype") } #' @export vec_cbind_frame_ptype.default <- function(x, ...) { x[0] } #' @export vec_cbind_frame_ptype.sf <- function(x, ...) { data.frame() } vctrs/R/vctrs-package.R0000644000176200001440000000107114713505651014477 0ustar liggesusers#' @description #' `r lifecycle::badge("maturing")` #' #' Defines new notions of prototype and size that are #' used to provide tools for consistent and well-founded type-coercion #' and size-recycling, and are in turn connected to ideas of type- and #' size-stability useful for analysing function interfaces. #' #' @keywords internal #' @import rlang #' @useDynLib vctrs, .registration = TRUE "_PACKAGE" release_extra_revdeps <- function() { # Extra revdeps to run before release. # Recognized by `usethis::use_release_issue()`. c("dplyr", "tidyr", "purrr") } vctrs/R/type-data-frame.R0000644000176200001440000002675415120515501014730 0ustar liggesusers#' Assemble attributes for data frame construction #' #' `new_data_frame()` constructs a new data frame from an existing list. It is #' meant to be performant, and does not check the inputs for correctness in any #' way. It is only safe to use after a call to [df_list()], which collects and #' validates the columns used to construct the data frame. #' #' @seealso #' [df_list()] for a way to safely construct a data frame's underlying #' data structure from individual columns. This can be used to create a #' named list for further use by `new_data_frame()`. #' #' @param x A named list of equal-length vectors. The lengths are not #' checked; it is responsibility of the caller to make sure they are #' equal. #' @param n Number of rows. If `NULL`, will be computed from the length of #' the first element of `x`. #' @param ...,class Additional arguments for creating subclasses. #' #' The following attributes have special behavior: #' - `"names"` is preferred if provided, overriding existing names in `x`. #' - `"row.names"` is preferred if provided, overriding both `n` and the size #' implied by `x`. #' #' @export #' @examples #' new_data_frame(list(x = 1:10, y = 10:1)) new_data_frame <- function(x = list(), n = NULL, ..., class = NULL) { .External(ffi_new_data_frame, x, n, class, ...) } new_data_frame <- fn_inline_formals(new_data_frame, "x") #' Collect columns for data frame construction #' #' `df_list()` constructs the data structure underlying a data #' frame, a named list of equal-length vectors. It is often used in #' combination with [new_data_frame()] to safely and consistently create #' a helper function for data frame subclasses. #' #' @section Properties: #' #' - Inputs are [recycled][theory-faq-recycling] to a common size with #' [vec_recycle_common()]. #' #' - With the exception of data frames, inputs are not modified in any way. #' Character vectors are never converted to factors, and lists are stored #' as-is for easy creation of list-columns. #' #' - Unnamed data frame inputs are automatically unpacked. Named data frame #' inputs are stored unmodified as data frame columns. #' #' - `NULL` inputs are completely ignored. #' #' - The dots are dynamic, allowing for splicing of lists with `!!!` and #' unquoting. #' #' @seealso #' [new_data_frame()] for constructing data frame subclasses from a validated #' input. [data_frame()] for a fast data frame creation helper. #' #' @inheritParams rlang::args_error_context #' #' @param ... Vectors of equal-length. When inputs are named, those names #' are used for names of the resulting list. #' @param .size The common size of vectors supplied in `...`. If `NULL`, this #' will be computed as the common size of the inputs. #' @param .unpack Should unnamed data frame inputs be unpacked? Defaults to #' `TRUE`. #' @param .name_repair One of `"check_unique"`, `"unique"`, `"universal"`, #' `"minimal"`, `"unique_quiet"`, or `"universal_quiet"`. See [vec_as_names()] #' for the meaning of these options. #' #' @export #' @examples #' # `new_data_frame()` can be used to create custom data frame constructors #' new_fancy_df <- function(x = list(), n = NULL, ..., class = NULL) { #' new_data_frame(x, n = n, ..., class = c(class, "fancy_df")) #' } #' #' # Combine this constructor with `df_list()` to create a safe, #' # consistent helper function for your data frame subclass #' fancy_df <- function(...) { #' data <- df_list(...) #' new_fancy_df(data) #' } #' #' df <- fancy_df(x = 1) #' class(df) df_list <- function( ..., .size = NULL, .unpack = TRUE, .name_repair = c( "check_unique", "unique", "universal", "minimal", "unique_quiet", "universal_quiet" ), .error_call = current_env() ) { .Call(ffi_df_list, list2(...), .size, .unpack, .name_repair, environment()) } df_list <- fn_inline_formals(df_list, ".name_repair") #' Construct a data frame #' #' @description #' `data_frame()` constructs a data frame. It is similar to #' [base::data.frame()], but there are a few notable differences that make it #' more in line with vctrs principles. The Properties section outlines these. #' #' @details #' If no column names are supplied, `""` will be used as a default name for all #' columns. This is applied before name repair occurs, so the default name #' repair of `"check_unique"` will error if any unnamed inputs are supplied and #' `"unique"` (or `"unique_quiet"`) will repair the empty string column names #' appropriately. If the column names don't matter, use a `"minimal"` name #' repair for convenience and performance. #' #' @inheritSection df_list Properties #' #' @seealso #' [df_list()] for safely creating a data frame's underlying data structure from #' individual columns. [new_data_frame()] for constructing the actual data #' frame from that underlying data structure. Together, these can be useful #' for developers when creating new data frame subclasses supporting #' standard evaluation. #' #' @inheritParams rlang::args_error_context #' #' @param ... Vectors to become columns in the data frame. When inputs are #' named, those names are used for column names. #' @param .size The number of rows in the data frame. If `NULL`, this will #' be computed as the common size of the inputs. #' @param .name_repair One of `"check_unique"`, `"unique"`, `"universal"`, #' `"minimal"`, `"unique_quiet"`, or `"universal_quiet"`. See [vec_as_names()] #' for the meaning of these options. #' #' @export #' @examples #' data_frame(x = 1, y = 2) #' #' # Inputs are recycled using tidyverse recycling rules #' data_frame(x = 1, y = 1:3) #' #' # Strings are never converted to factors #' class(data_frame(x = "foo")$x) #' #' # List columns can be easily created #' df <- data_frame(x = list(1:2, 2, 3:4), y = 3:1) #' #' # However, the base print method is suboptimal for displaying them, #' # so it is recommended to convert them to tibble #' if (rlang::is_installed("tibble")) { #' tibble::as_tibble(df) #' } #' #' # Named data frame inputs create data frame columns #' df <- data_frame(x = data_frame(y = 1:2, z = "a")) #' #' # The `x` column itself is another data frame #' df$x #' #' # Again, it is recommended to convert these to tibbles for a better #' # print method #' if (rlang::is_installed("tibble")) { #' tibble::as_tibble(df) #' } #' #' # Unnamed data frame input is automatically unpacked #' data_frame(x = 1, data_frame(y = 1:2, z = "a")) data_frame <- function( ..., .size = NULL, .name_repair = c( "check_unique", "unique", "universal", "minimal", "unique_quiet", "universal_quiet" ), .error_call = current_env() ) { .Call(ffi_data_frame, list2(...), .size, .name_repair, environment()) } data_frame <- fn_inline_formals(data_frame, ".name_repair") #' @export vec_ptype_abbr.data.frame <- function(x, ...) { "df" } # For testing # Keep in sync with `enum vctrs_proxy_kind` in `vctrs.h` df_proxy <- function(x, kind) { .Call(ffi_df_proxy, x, kind) } VCTRS_PROXY_KIND_equal <- 0L VCTRS_PROXY_KIND_compare <- 1L VCTRS_PROXY_KIND_order <- 2L df_is_coercible <- function(x, y, opts) { vec_is_coercible( new_data_frame(x), new_data_frame(y), opts = opts ) } # Coercion ---------------------------------------------------------------- #' Coercion between two data frames #' #' `df_ptype2()` and `df_cast()` are the two functions you need to #' call from `vec_ptype2()` and `vec_cast()` methods for data frame #' subclasses. See [?howto-faq-coercion-data-frame][howto-faq-coercion-data-frame]. #' Their main job is to determine the common type of two data frames, #' adding and coercing columns as needed, or throwing an incompatible #' type error when the columns are not compatible. #' #' @param x,y,to Subclasses of data frame. #' @param ... If you call `df_ptype2()` or `df_cast()` from a #' `vec_ptype2()` or `vec_cast()` method, you must forward the dots #' passed to your method on to `df_ptype2()` or `df_cast()`. #' @inheritParams vec_ptype2 #' @inheritParams vec_cast #' #' @return #' * When `x` and `y` are not compatible, an error of class #' `vctrs_error_incompatible_type` is thrown. #' * When `x` and `y` are compatible, `df_ptype2()` returns the common #' type as a bare data frame. `tib_ptype2()` returns the common type #' as a bare tibble. #' #' @export df_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) { .Call( ffi_df_ptype2_opts, x, y, opts = match_fallback_opts(...), environment() ) } #' @rdname df_ptype2 #' @export df_cast <- function(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) { .Call( ffi_df_cast_opts, x, to, opts = match_fallback_opts(...), environment() ) } df_ptype2_opts <- function( x, y, ..., opts, x_arg = "", y_arg = "", call = caller_env() ) { .Call(ffi_df_ptype2_opts, x, y, opts = opts, environment()) } df_cast_opts <- function( x, to, ..., opts = fallback_opts(), x_arg = "", to_arg = "", call = caller_env() ) { .Call( ffi_df_cast_opts, x, to, opts, environment() ) } df_cast_params <- function( x, to, ..., x_arg = "", to_arg = "", s3_fallback = NULL ) { opts <- fallback_opts(s3_fallback = s3_fallback) df_cast_opts(x, to, opts = opts, x_arg = x_arg, to_arg = to_arg) } #' vctrs methods for data frames #' #' These functions help the base data.frame class fit into the vctrs type system #' by providing coercion and casting functions. #' #' @keywords internal #' @name vctrs-data-frame NULL #' @rdname vctrs-data-frame #' @export vec_ptype2.data.frame #' @method vec_ptype2 data.frame #' @export vec_ptype2.data.frame <- function(x, y, ...) { UseMethod("vec_ptype2.data.frame") } #' @method vec_ptype2.data.frame data.frame #' @export vec_ptype2.data.frame.data.frame <- function(x, y, ...) { df_ptype2(x, y, ...) } # Fallback for data frame subclasses (#981) vec_ptype2_df_fallback <- function( x, y, opts, x_arg = "", y_arg = "", call = caller_env() ) { vec_ptype2_params( as_base_df(x), as_base_df(y), s3_fallback = opts$s3_fallback, x_arg = x_arg, y_arg = y_arg, call = call ) } as_base_df <- function(x) { if (inherits(x, "tbl_df")) { new_data_frame(x, class = c("tbl_df", "tbl")) } else { new_data_frame(x) } } # Cast -------------------------------------------------------------------- #' @rdname vctrs-data-frame #' @export vec_cast.data.frame #' @method vec_cast data.frame #' @export vec_cast.data.frame <- function(x, to, ...) { UseMethod("vec_cast.data.frame") } #' @export #' @method vec_cast.data.frame data.frame vec_cast.data.frame.data.frame <- function( x, to, ..., x_arg = "", to_arg = "" ) { df_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg) } #' @export vec_restore.data.frame <- function(x, to, ...) { .Call(ffi_vec_bare_df_restore, x, to) } # Helpers ----------------------------------------------------------------- df_size <- function(x) { .Call(vctrs_df_size, x) } df_lossy_cast <- function( out, x, to, ..., x_arg = "", to_arg = "", call = caller_env() ) { extra <- setdiff(names(x), names(to)) maybe_lossy_cast( result = out, x = x, to = to, lossy = length(extra) > 0, locations = int(), x_arg = x_arg, to_arg = to_arg, call = call, details = inline_list("Dropped variables: ", extra, quote = "`"), class = "vctrs_error_cast_lossy_dropped" ) } df_attrib <- function(x) { attributes(x)[c("row.names", "names")] } non_df_attrib <- function(x) { attrib <- attributes(x) attrib <- attrib[!names(attrib) %in% c("row.names", "names")] # Sort to allow comparison attrib[order(names(attrib))] } vctrs/R/type-misc.R0000644000176200001440000001035315065005761013660 0ustar liggesusers# `numeric_version` from base ---------------------------------------- #' @export vec_proxy.numeric_version <- function(x, ...) { x } #' @export vec_proxy_equal.numeric_version <- function(x, ...) { proxy_equal_numeric_version(x) } # To generate data agnostic proxies of ``, we enforce a # restriction that each version can have at most 8 components. This allows us # to `vec_compare()` them without needing a "joint" comparison proxy, unlike # what `.encode_numeric_version()` returns. proxy_equal_numeric_version <- function(x, error_call = caller_env()) { N_COMPONENTS <- 8L x <- unclass(x) size <- length(x) sizes <- lengths(x) if (length(sizes) != 0L) { max <- max(sizes) } else { max <- N_COMPONENTS } if (max > N_COMPONENTS) { cli::cli_abort( "`x` can't contain more than {N_COMPONENTS} version components.", call = error_call ) } if (any(sizes != max)) { # Pad with zeros where needed to be able to transpose. # This is somewhat slow if required. pad_sizes <- max - sizes pad_needed <- which(pad_sizes != 0L) x[pad_needed] <- map2( x[pad_needed], pad_sizes[pad_needed], function(elt, pad_size) { c(elt, vec_rep(0L, times = pad_size)) } ) } # Transpose with combination of `vec_interleave()` and `vec_chop()` x <- vec_interleave(!!!x, .ptype = integer()) out <- vec_chop(x, sizes = vec_rep(size, times = max)) n_zeros <- N_COMPONENTS - max if (n_zeros != 0L) { # Pad columns of zeros out to `N_COMPONENTS` columns zero <- list(vec_rep(0L, times = size)) out <- c(out, vec_rep(zero, times = n_zeros)) } # Use a data frame as the proxy names(out) <- paste0("...", seq_len(N_COMPONENTS)) out <- new_data_frame(out, n = size) # A `` internally stored as `integer()` is considered the # `NA` value. We patch that in at the very end if needed. It is hard to create # so should be very uncommon. missing <- sizes == 0L if (any(missing)) { na <- vec_init(out) out <- vec_assign(out, missing, na) } out } # `omit` from base --------------------------------------------------- #' @export vec_proxy.omit <- function(x, ...) { x } #' @export vec_restore.omit <- function(x, ...) { structure(x, class = "omit") } #' @export vec_ptype2.omit.omit <- function(x, y, ...) { x } #' @export vec_ptype2.integer.omit <- function(x, y, ...) { x } #' @export vec_ptype2.omit.integer <- function(x, y, ...) { y } #' @export vec_ptype2.double.omit <- function(x, y, ...) { x } #' @export vec_ptype2.omit.double <- function(x, y, ...) { y } #' @export vec_cast.omit.omit <- function(x, to, ...) { x } #' @export vec_cast.integer.omit <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.omit.integer <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } #' @export vec_cast.double.omit <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.omit.double <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } # `exclude` from base ------------------------------------------------ #' @export vec_proxy.exclude <- function(x, ...) { x } #' @export vec_restore.exclude <- function(x, ...) { structure(x, class = "exclude") } #' @export vec_ptype2.exclude.exclude <- function(x, y, ...) { x } #' @export vec_ptype2.integer.exclude <- function(x, y, ...) { x } #' @export vec_ptype2.exclude.integer <- function(x, y, ...) { y } #' @export vec_ptype2.double.exclude <- function(x, y, ...) { x } #' @export vec_ptype2.exclude.double <- function(x, y, ...) { y } #' @export vec_cast.exclude.exclude <- function(x, to, ...) { x } #' @export vec_cast.integer.exclude <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.exclude.integer <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } #' @export vec_cast.double.exclude <- function(x, to, ...) { vec_cast(vec_data(x), to, ...) } #' @export vec_cast.exclude.double <- function(x, to, ..., x_arg = "", to_arg = "") { stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg) } vctrs/R/if-else.R0000644000176200001440000000477115065005761013301 0ustar liggesusers#' Vectorized if-else #' #' @description #' `vec_if_else()` is a vectorized [if-else][if]. Compared to the base R #' equivalent, [ifelse()], this function allows you to handle missing values in #' the `condition` with `missing` and always takes `true`, `false`, and #' `missing` into account when determining what the output type should be. #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' #' @param condition A logical vector. #' #' @param true,false Vectors to use for `TRUE` and `FALSE` values of #' `condition`. #' #' Both `true` and `false` will be [recycled][theory-faq-recycling] #' to the size of `condition`. #' #' `true`, `false`, and `missing` (if used) will be cast to their common type. #' #' @param missing If not `NULL`, will be used as the value for `NA` values of #' `condition`. Follows the same size and type rules as `true` and `false`. #' #' @param ptype An optional prototype declaring the desired output type. If #' supplied, this overrides the common type of `true`, `false`, and `missing`. #' #' @param condition_arg,true_arg,false_arg,missing_arg Argument names used in #' error messages. #' #' @returns #' A vector with the same size as `condition` and the same type as the common #' type of `true`, `false`, and `missing`. #' #' Where `condition` is `TRUE`, the matching values from `true`, where it is #' `FALSE`, the matching values from `false`, and where it is `NA`, the matching #' values from `missing`, if provided, otherwise a missing value will be used. #' #' @export #' @examples #' x <- c(-5:5, NA) #' vec_if_else(x < 0, NA, x) #' #' # Explicitly handle `NA` values in the `condition` with `missing` #' vec_if_else(x < 0, "negative", "positive", missing = "missing") #' #' # Unlike `ifelse()`, `vec_if_else()` preserves types #' x <- factor(sample(letters[1:5], 10, replace = TRUE)) #' ifelse(x %in% c("a", "b", "c"), x, NA) #' vec_if_else(x %in% c("a", "b", "c"), x, NA) #' #' # `vec_if_else()` also works with data frames #' condition <- c(TRUE, FALSE, NA, TRUE) #' true <- data_frame(x = 1:4, y = 5:8) #' false <- data_frame(x = 9:12, y = 13:16) #' vec_if_else(condition, true, false) vec_if_else <- function( condition, true, false, ..., missing = NULL, ptype = NULL, condition_arg = "condition", true_arg = "true", false_arg = "false", missing_arg = "missing", error_call = current_env() ) { check_dots_empty0(...) .Call( ffi_vec_if_else, condition, true, false, missing, ptype, environment() ) } vctrs/R/slice.R0000644000176200001440000003030415132161317013036 0ustar liggesusers#' Get or set observations in a vector #' #' This provides a common interface to extracting and modifying observations #' for all vector types, regardless of dimensionality. They are analogs to `[` #' and `[<-` that match [vec_size()] instead of `length()`. #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' #' @param x A vector #' #' @param i An integer, character or logical vector specifying the locations or #' names of the observations to get/set. Specify `TRUE` to index all elements #' (as in `x[]`), or `NULL`, `FALSE` or `integer()` to index none (as in #' `x[NULL]`). #' #' @param value A vector of replacement values #' #' `value` is cast to the type of `x`. #' #' If `slice_value = FALSE`, `value` must be size 1 or the same size as `i` #' after `i` has been converted to a positive integer location vector with #' [vec_as_location()] (which may not be the same size as `i` originally). #' #' If `slice_value = TRUE`, `value` must be size 1 or the same size as `x`. #' #' @param slice_value A boolean. If `TRUE`, the assignment proceeds as if you #' had provided `vec_slice(x, i) <- vec_slice(value, i)`, but is optimized to #' avoid materializing the slice of `value`. #' #' @param x_arg,value_arg Argument names for `x` and `value`. These are used #' in error messages to inform the user about the locations of #' incompatible types and sizes (see [stop_incompatible_type()] and #' [stop_incompatible_size()]). #' #' @return A vector of the same type as `x`. #' #' @section Genericity: #' #' Support for S3 objects depends on whether the object implements a #' [vec_proxy()] method. #' #' * When a `vec_proxy()` method exists, the proxy is sliced or assigned to and #' `vec_restore()` is called on the result. #' #' * Otherwise, `vec_slice()` falls back to the base generic `[` and #' `vec_slice<-()` falls back to the base generic `[<-`. #' #' When `vec_slice<-()` falls back to `[<-`, it is expected that the subclass's #' `[<-` method can handle the following subset of cases that base R's `[<-` #' can also handle: #' #' * An `i` vector of positive integer positions (notably excluding `NA`). #' #' * A `value` vector of length 1 or length `length(i)`. If length 1, it #' should be recycled by the `[<-` method to the length of `i`. #' #' If your `[<-` method eventually calls base R's native `[<-` code, then these #' cases will be handled for you. #' #' Note that S3 lists are treated as scalars by default, and will #' cause an error if they don't implement a [vec_proxy()] method. #' #' @section Differences with base R subsetting: #' #' * `vec_slice()` only slices along one dimension. For #' two-dimensional types, the first dimension is subsetted. #' #' * `vec_slice()` preserves attributes by default. #' #' * `vec_slice<-()` is type-stable and always returns the same type #' as the LHS. #' #' @section Dependencies: #' #' ## vctrs dependencies #' #' - [vec_proxy()] #' - [vec_restore()] #' #' ## base dependencies #' #' - \code{base::`[`} #' - \code{base::`[<-`} #' #' @export #' @keywords internal #' @examples #' x <- sample(10) #' x #' vec_slice(x, 1:3) #' #' # You can assign with the infix variant: #' vec_slice(x, 2) <- 100 #' x #' #' # Or with the regular variant that doesn't modify the original input: #' y <- vec_assign(x, 3, 500) #' y #' x #' #' #' # Slicing objects of higher dimension: #' vec_slice(mtcars, 1:3) #' #' # Type stability -------------------------------------------------- #' #' # The assign variant is type stable. It always returns the same #' # type as the input. #' x <- 1:5 #' vec_slice(x, 2) <- 20.0 #' #' # `x` is still an integer vector because the RHS was cast to the #' # type of the LHS: #' vec_ptype(x) #' #' # Compare to `[<-`: #' x[2] <- 20.0 #' vec_ptype(x) #' #' #' # Note that the types must be coercible for the cast to happen. #' # For instance, you can cast a double vector of whole numbers to an #' # integer vector: #' vec_cast(1, integer()) #' #' # But not fractional doubles: #' try(vec_cast(1.5, integer())) #' #' # For this reason you can't assign fractional values in an integer #' # vector: #' x <- 1:3 #' try(vec_slice(x, 2) <- 1.5) #' #' # Slicing `value` ------------------------------------------------- #' #' # Sometimes both `x` and `value` start from objects that are the same length, #' # and you need to slice `value` by `i` before assigning it to `x`. This comes #' # up when thinking about how `base::ifelse()` and `dplyr::case_when()` work. #' condition <- c(TRUE, FALSE, TRUE, FALSE) #' yes <- 1:4 #' no <- 5:8 #' #' # Create an output container and fill it #' out <- vec_init(integer(), 4) #' out <- vec_assign(out, condition, vec_slice(yes, condition)) #' out <- vec_assign(out, !condition, vec_slice(no, !condition)) #' out #' #' # This is wasteful because you have to materialize the slices of `yes` and #' # `no` before they can be assigned, and you also have to validate `condition` #' # multiple times. Using `slice_value` internally performs #' # `vec_slice(yes, condition)` and `vec_slice(no, !condition)` for you, #' # but does so in a way that avoids the materialization. #' out <- vec_init(integer(), 4) #' out <- vec_assign(out, condition, yes, slice_value = TRUE) #' out <- vec_assign(out, !condition, no, slice_value = TRUE) #' out vec_slice <- function(x, i, ..., error_call = current_env()) { check_dots_empty0(...) .Call(ffi_slice, x, i, environment()) } # Called when `x` has dimensions vec_slice_fallback <- function(x, i) { out <- unclass(vec_proxy(x)) obj_check_vector(out) d <- vec_dim_n(out) if (d == 2) { out <- out[i, , drop = FALSE] } else { miss_args <- rep(list(missing_arg()), d - 1) out <- eval_bare(expr(out[i, !!!miss_args, drop = FALSE])) } vec_restore(out, x) } vec_slice_fallback_integer64 <- function(x, i) { d <- vec_dim_n(x) if (d == 2) { out <- x[i, , drop = FALSE] } else { miss_args <- rep(list(missing_arg()), d - 1) out <- eval_bare(expr(x[i, !!!miss_args, drop = FALSE])) } is_na <- is.na(i) if (!any(is_na)) { return(out) } if (d == 2) { out[is_na, ] <- bit64::NA_integer64_ } else { eval_bare(expr(out[is_na, !!!miss_args] <- bit64::NA_integer64_)) } out } # bit64::integer64() objects do not have support for `NA_integer_` # slicing. This manually replaces the garbage values that are created # any time a slice with `NA_integer_` is made. vec_slice_dispatch_integer64 <- function(x, i) { out <- x[i] is_na <- is.na(i) if (!any(is_na)) { return(out) } out[is_na] <- bit64::NA_integer64_ out } vec_slice_altrep <- function(x, i) { # We have already validated `i`, it is one of: # - Integer vector from `vec_as_location()` # - Integer vector from materializing a `compact_rep()` # - Integer vector from materializing a `compact_seq()` # - Logical vector from materializing a `compact_condition()` # (which `VectorSubset()` will convert to an integer vector) # For the main case we care about (an ALTREP vector with an Extract_Subset # method, like vroom), `.subset()` will: # - Call `do_subset_dflt()` (bypassing S3 dispatch!) # - Call `VectorSubset()` # - Call `ExtractSubset()` # - Call `ALTVEC_EXTRACT_SUBSET()` # - If that returns `NULL`, i.e. if this ALTREP class has not implemented an # ALTREP `Extract_Subset` method, then it will use the `Elt` method to # subset .subset(x, i) } #' @rdname vec_slice #' @export `vec_slice<-` <- function(x, i, value) { x_arg <- "" # Substitution is `*tmp*` delayedAssign("value_arg", as_label(substitute(value))) slice_value <- FALSE .Call(ffi_assign, x, i, value, slice_value, environment()) } #' @rdname vec_slice #' @export vec_assign <- function( x, i, value, ..., slice_value = FALSE, x_arg = "", value_arg = "" ) { check_dots_empty0(...) .Call(ffi_assign, x, i, value, slice_value, environment()) } # Invariants for `[<-` methods: # # - `i` will contain positive integers # - `i` will not contain `NA` (a base `[<-` issue gets in the way if we allow this) # - `value` will be size 1 or size `length(i)` # # Given these invariants, the base `[<-` works the way we want it to in fallback # methods, and we expect any subclasses to also uphold the same behavior as base # `[<-` with these inputs. In other words, we don't expect subclasses to have # vctrs subassign behavior, but we do expect them to match a subset of base R # subassign behavior. vec_assign_fallback <- function(x, i, value, slice_value, index_style) { if (index_style == "condition") { # Convert logical condition `i` to integer location `i`. Must use # `vec_as_location()` rather than `which()` because we want `NA` values to # propagate, which is consistent with how `value`'s size is checked # internally (i.e. when `slice_value = FALSE`, `value` was checked to have # size equal to the number of `TRUE` or `NA` values in `i`). Propagated # `NA`s are later dropped before calling `[<-` to work around a base `[<-` # issue, but we need them to slice both `i` and `value` consistently. i <- vec_as_location(i, n = vec_size(x), missing = "propagate") } if (slice_value && vec_size(value) != 1L) { # `value` has same size as `x` rather than same size as `i`. # We need to pre-slice it down to same size as `i` to match what `[<-` expects. # Effectively we are preemptively doing the RHS of this: # vec_slice(x, i) <- vec_slice(value, i) value <- vec_slice(value, i) } # Work around issue in base `[<-` that errors on `NA_integer_` in subassign # indices. Only do this as necessary, as `vec_slice()` will typically call # back to `[` when slicing `value` here, which is somewhat slow. if (vec_any_missing(i)) { existing <- !is.na(i) i <- vec_slice(i, existing) if (vec_size(value) != 1L) { value <- vec_slice(value, existing) } } d <- vec_dim_n(x) miss_args <- rep(list(missing_arg()), d - 1) eval_bare(expr(x[i, !!!miss_args] <- value)) x } # `start` is 0-based vec_assign_seq <- function( x, value, start, size, increasing = TRUE, slice_value = FALSE ) { .Call(ffi_assign_seq, x, value, start, size, increasing, slice_value) } # `start` is 0-based compact_seq <- function(start, size, increasing = TRUE) { .Call(ffi_compact_seq, start, size, increasing) } vec_assign_compact_condition <- function( x, i, value, slice_value = FALSE ) { .Call(ffi_assign_compact_condition, x, i, value, slice_value) } as_compact_condition <- function(x) { .Call(ffi_as_compact_condition, x) } #' @param assign_names A boolean. If `TRUE`, will assign names from `value` #' onto `x` as well. #' #' @noRd vec_assign_params <- function( x, i, value, ..., assign_names = FALSE, slice_value = FALSE, x_arg = "", value_arg = "" ) { check_dots_empty0(...) .Call( ffi_assign_params, x, i, value, assign_names, slice_value, environment() ) } vec_remove <- function(x, i) { vec_slice(x, -vec_as_location(i, length(x), names(x))) } vec_index <- function(x, i, ...) { i <- maybe_missing(i, TRUE) out <- vec_slice(x, i) if (!dots_n(...)) { return(out) } # Need to unclass to avoid infinite recursion through `[` proxy <- vec_data(out) out <- proxy[, ..., drop = FALSE] vec_restore(out, x) } #' Initialize a vector #' #' @param x Template of vector to initialize. #' @param n Desired size of result. #' @export #' @section Dependencies: #' * vec_slice() #' @examples #' vec_init(1:10, 3) #' vec_init(Sys.Date(), 5) #' #' # The "missing" value for a data frame is a row that is entirely missing #' vec_init(mtcars, 2) #' #' # The "missing" value for a list is `NULL` #' vec_init(list(), 3) vec_init <- function(x, n = 1L) { .Call(ffi_init, x, n, environment()) } # Exposed for testing (`start` is 0-based) vec_slice_seq <- function(x, start, size, increasing = TRUE) { .Call(ffi_slice_seq, x, start, size, increasing) } # Exposed for testing (`i` is 1-based) vec_slice_rep <- function(x, i, n) { .Call(ffi_slice_rep, x, i, n) } # Forwards arguments to `base::rep()` base_vec_rep <- function(x, ...) { i <- rep(seq_len(vec_size(x)), ...) vec_slice(x, i) } # Emulates `length<-` vec_size_assign <- function(x, n) { x_size <- vec_size(x) if (n > x_size) { i <- seq_len(x_size) i <- c(i, vec_init(int(), n - x_size)) } else { i <- seq_len(n) } vec_slice(x, i) } vctrs/R/list-combine.R0000644000176200001440000002032315072256373014336 0ustar liggesusers#' Combine a list of vectors #' #' @description #' `list_combine()` is a more powerful version of [vec_c()]. While `vec_c()` is #' used for sequential combination, `list_combine()` takes a list of `indices` #' that specify where to place each element in the output. #' #' If you have a list of vectors and just need to combine them sequentially, #' you'll still want to use `vec_c(!!!x)`. #' #' @inheritParams rlang::args_dots_empty #' @inheritParams rlang::args_error_context #' @inheritParams name_spec #' @inheritParams vec_as_names #' #' @param x A list of vectors. #' #' If `slice_x = FALSE`, each element must be size 1 or the same size as its #' corresponding index in `indices` after that index has been converted to #' a positive integer location vector with [vec_as_location()]. #' #' If `slice_x = TRUE`, each element must be size 1 or size `size`. #' #' @param indices A list of indices. #' #' Indices can be provided in one of two forms: #' #' - Positive integer vectors of locations less than or equal to `size`. Each #' vector can be any size. #' #' - Logical vectors of size `size` where `TRUE` denotes the location in the #' output to assign to, and the location from the `x` element to pull from. #' Both `NA` and `FALSE` are considered unmatched. #' #' The size of `indices` must match the size of `x`. #' #' @param size The output size. #' #' @param default If `NULL`, a missing value is used for locations unmatched by #' `indices`, otherwise the provided `default` is used. #' #' If provided, `default` must be size 1 or size `size`. #' #' Can only be set when `unmatched = "default"`. #' #' @param unmatched Handling of locations in the output unmatched by `indices`. #' One of: #' #' - `"default"` to use `default` in unmatched locations. #' #' - `"error"` to error when there are unmatched locations. #' #' @param multiple Handling of locations in the output matched by multiple #' `indices`. #' #' - `"last"` uses the value from the last matched index. #' #' - `"first"` uses the value from the first matched index. #' #' Note that `multiple` only applies across `indices`. Within a single index #' if there are overlapping locations, then the last will always win. This can #' only occur with integer `indices`, as you can't overlap within an index #' when using logical `indices`. #' #' @param slice_x A boolean. #' #' If `TRUE`, each element of `x` is sliced by its corresponding index from #' `indices` before being assigned into the output, which is effectively the #' same as `map2(list(x, indices), function(x, index) vec_slice(x, index))`, #' but is optimized to avoid materializing the slices. #' #' See the `slice_value` argument of [vec_assign()] for more examples. #' #' @param ptype If `NULL`, the output type is determined by computing the common #' type across all elements of `x` and `default`. Alternatively, you can #' supply `ptype` to give the output a known type. #' #' @param name_repair How to repair names, see `repair` options in #' [vec_as_names()]. #' #' @param x_arg,indices_arg,default_arg An argument name as a string. This #' argument will be mentioned in error messages as the input that is at the #' origin of a problem. #' #' @returns #' A vector of type `vec_ptype_common(!!!x)`, or `ptype`, if specified. #' #' The size of the output is determined by `size`. #' #' @export #' @examples #' # Combine a list of vectors using #' # a list of `indices` #' x <- list( #' 1:3, #' 4:6, #' 7:8 #' ) #' indices <- list( #' c(1, 3, 7), #' c(8, 6, 5), #' c(2, 4) #' ) #' list_combine(x, indices = indices, size = 8) #' #' # Overlapping `indices` are allowed. #' # The last match "wins" by default. #' x <- list( #' 1:3, #' 4:6 #' ) #' indices <- list( #' c(1, 2, 3), #' c(1, 2, 6) #' ) #' list_combine(x, indices = indices, size = 6) #' #' # Use `multiple` to force the first match to win. #' # This is similar to how `dplyr::case_when()` works. #' list_combine(x, indices = indices, size = 6, multiple = "first") #' #' # Works with data frames as well. #' # Now how index 2 is not assigned to. #' x <- list( #' data.frame(x = 1:2, y = c("a", "b")), #' data.frame(x = 3:4, y = c("c", "d")) #' ) #' indices <- list( #' c(4, 1), #' c(3, NA) #' ) #' list_combine(x, indices = indices, size = 4) #' #' # You can use `size` to combine into a larger object than you have values for #' list_combine(list(1:2, 4:5), indices = list(1:2, 4:5), size = 8) #' #' # Additionally specifying `default` allows you to control the value used in #' # unfilled locations #' list_combine( #' list(1:2, 4:5), #' indices = list(1:2, 4:5), #' size = 8, #' default = 0L #' ) #' #' # Alternatively, if you'd like to assert that you've covered all output #' # locations through `indices`, set `unmatched = "error"`. #' # Here, we've set the size to 5 but missed location 3: #' try(list_combine( #' list(1:2, 4:5), #' indices = list(1:2, 4:5), #' size = 5, #' unmatched = "error" #' )) list_combine <- function( x, ..., indices, size, default = NULL, unmatched = "default", multiple = "last", slice_x = FALSE, ptype = NULL, name_spec = NULL, name_repair = c( "minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet" ), x_arg = "x", indices_arg = "indices", default_arg = "default", error_call = current_env() ) { check_dots_empty0(...) .Call( ffi_list_combine, x, indices, size, default, unmatched, multiple, slice_x, ptype, name_spec, name_repair, environment() ) } list_combine <- fn_inline_formals(list_combine, "name_repair") # ------------------------------------------------------------------------------ stop_combine <- function( message = NULL, class = NULL, ..., call = caller_env() ) { stop_vctrs( message = message, class = c(class, "vctrs_error_combine"), ..., call = call ) } # ------------------------------------------------------------------------------ stop_combine_unmatched <- function(loc, call) { stop_combine( class = "vctrs_error_combine_unmatched", loc = loc, call = call ) } #' @export cnd_header.vctrs_error_combine_unmatched <- function(cnd, ...) { "Each location must be matched." } #' @export cnd_body.vctrs_error_combine_unmatched <- function(cnd, ...) { # cli's pluralization length feature only kicks in on character vectors loc <- as.character(cnd$loc) bullet <- cli::format_inline("Location{?s} {loc} {?is/are} unmatched.") bullet <- c(x = bullet) format_error_bullets(bullet) } # ------------------------------------------------------------------------------ # Called from C base_c_invoke <- function(xs) { local_options("vctrs:::base_c_in_progress" = TRUE) # Remove all `NULL` arguments which prevent dispatch if in first # position and might not be handled correctly by methods xs <- compact(xs) unspecified <- map_lgl(xs, fallback_is_unspecified) if (!any(unspecified)) { return(base_c(xs)) } # First concatenate without the unspecified chunks. This way the # `c()` method doesn't need to handle unspecified inputs correctly, # and we're guaranteed to dispatch on the correct class even if the # first input is unspecified. out <- base_c(xs[!unspecified]) # Create index vector with `NA` locations for unspecified chunks locs <- c_locs(xs) locs[unspecified] <- map(locs[unspecified], rep_along, na_int) locs[!unspecified] <- c_locs(xs[!unspecified]) locs <- vec_c(!!!locs, .ptype = int()) # Expand the concatenated vector with unspecified chunks out[locs] } base_c <- function(xs) { # Dispatch in the base namespace which inherits from the global env exec("c", !!!xs, .env = ns_env("base")) } # FIXME: Should be unnecessary in the future. We currently attach an # attribute to unspecified columns initialised in `df_cast()`. We # can't use an unspecified vector because we (unnecessarily but for # convenience) go through `vec_assign()` before falling back in # `vec_rbind()`. fallback_is_unspecified <- function(x) { is_unspecified(x) || is_true(attr(x, "vctrs:::unspecified")) } c_locs <- function(xs) { locs <- reduce(lengths(xs), .init = list(0), function(output, input) { n <- last(last(output)) c(output, list(seq(n + 1, n + input))) }) locs[-1] } vctrs/R/names.R0000644000176200001440000004520015065005761013050 0ustar liggesusers#' Retrieve and repair names #' #' @description #' #' `vec_as_names()` takes a character vector of names and repairs it #' according to the `repair` argument. It is the r-lib and tidyverse #' equivalent of [base::make.names()]. #' #' vctrs deals with a few levels of name repair: #' #' * `minimal` names exist. The `names` attribute is not `NULL`. The #' name of an unnamed element is `""` and never `NA`. For instance, #' `vec_as_names()` always returns minimal names and data frames #' created by the tibble package have names that are, at least, #' `minimal`. #' #' * `unique` names are `minimal`, have no duplicates, and can be used #' where a variable name is expected. Empty names, `...`, and #' `..` followed by a sequence of digits are banned. #' #' - All columns can be accessed by name via `df[["name"]]` and #' ``df$`name` `` and ``with(df, `name`)``. #' #' * `universal` names are `unique` and syntactic (see Details for #' more). #' #' - Names work everywhere, without quoting: `df$name` and `with(df, #' name)` and `lm(name1 ~ name2, data = df)` and #' `dplyr::select(df, name)` all work. #' #' `universal` implies `unique`, `unique` implies `minimal`. These #' levels are nested. #' #' #' @inheritParams rlang::args_error_context #' @inheritParams rlang::args_dots_empty #' #' @param names A character vector. #' @param repair Either a string or a function. If a string, it must be one of #' `"check_unique"`, `"minimal"`, `"unique"`, `"universal"`, `"unique_quiet"`, #' or `"universal_quiet"`. If a function, it is invoked with a vector of #' minimal names and must return minimal names, otherwise an error is thrown. #' #' * Minimal names are never `NULL` or `NA`. When an element doesn't #' have a name, its minimal name is an empty string. #' #' * Unique names are unique. A suffix is appended to duplicate #' names to make them unique. #' #' * Universal names are unique and syntactic, meaning that you can #' safely use the names as variables without causing a syntax #' error. #' #' The `"check_unique"` option doesn't perform any name repair. #' Instead, an error is raised if the names don't suit the #' `"unique"` criteria. #' #' The options `"unique_quiet"` and `"universal_quiet"` are here to help the #' user who calls this function indirectly, via another function which exposes #' `repair` but not `quiet`. Specifying `repair = "unique_quiet"` is like #' specifying `repair = "unique", quiet = TRUE`. When the `"*_quiet"` options #' are used, any setting of `quiet` is silently overridden. #' @param repair_arg If specified and `repair = "check_unique"`, any errors #' will include a hint to set the `repair_arg`. #' @param quiet By default, the user is informed of any renaming #' caused by repairing the names. This only concerns unique and #' universal repairing. Set `quiet` to `TRUE` to silence the #' messages. #' #' Users can silence the name repair messages by setting the #' `"rlib_name_repair_verbosity"` global option to `"quiet"`. #' #' @section `minimal` names: #' #' `minimal` names exist. The `names` attribute is not `NULL`. The #' name of an unnamed element is `""` and never `NA`. #' #' Examples: #' #' ``` #' Original names of a vector with length 3: NULL #' minimal names: "" "" "" #' #' Original names: "x" NA #' minimal names: "x" "" #' ``` #' #' #' @section `unique` names: #' #' `unique` names are `minimal`, have no duplicates, and can be used #' (possibly with backticks) in contexts where a variable is #' expected. Empty names, `...`, and `..` followed by a sequence of #' digits are banned. If a data frame has `unique` names, you can #' index it by name, and also access the columns by name. In #' particular, `df[["name"]]` and `` df$`name` `` and also ``with(df, #' `name`)`` always work. #' #' There are many ways to make names `unique`. We append a suffix of the form #' `...j` to any name that is `""` or a duplicate, where `j` is the position. #' We also change `..#` and `...` to `...#`. #' #' Example: #' #' ``` #' Original names: "" "x" "" "y" "x" "..2" "..." #' unique names: "...1" "x...2" "...3" "y" "x...5" "...6" "...7" #' ``` #' #' Pre-existing suffixes of the form `...j` are always stripped, prior #' to making names `unique`, i.e. reconstructing the suffixes. If this #' interacts poorly with your names, you should take control of name #' repair. #' #' #' @section `universal` names: #' #' `universal` names are `unique` and syntactic, meaning they: #' #' * Are never empty (inherited from `unique`). #' * Have no duplicates (inherited from `unique`). #' * Are not `...`. Do not have the form `..i`, where `i` is a #' number (inherited from `unique`). #' * Consist of letters, numbers, and the dot `.` or underscore `_` #' characters. #' * Start with a letter or start with the dot `.` not followed by a #' number. #' * Are not a [reserved] word, e.g., `if` or `function` or `TRUE`. #' #' If a vector has `universal` names, variable names can be used #' "as is" in code. They work well with nonstandard evaluation, e.g., #' `df$name` works. #' #' vctrs has a different method of making names syntactic than #' [base::make.names()]. In general, vctrs prepends one or more dots #' `.` until the name is syntactic. #' #' Examples: #' #' ``` #' Original names: "" "x" NA "x" #' universal names: "...1" "x...2" "...3" "x...4" #' #' Original names: "(y)" "_z" ".2fa" "FALSE" #' universal names: ".y." "._z" "..2fa" ".FALSE" #' ``` #' #' @seealso [rlang::names2()] returns the names of an object, after #' making them `minimal`. #' @examples #' # By default, `vec_as_names()` returns minimal names: #' vec_as_names(c(NA, NA, "foo")) #' #' # You can make them unique: #' vec_as_names(c(NA, NA, "foo"), repair = "unique") #' #' # Universal repairing fixes any non-syntactic name: #' vec_as_names(c("_foo", "+"), repair = "universal") #' @export vec_as_names <- function( names, ..., repair = c( "minimal", "unique", "universal", "check_unique", "unique_quiet", "universal_quiet" ), repair_arg = NULL, quiet = FALSE, call = caller_env() ) { check_dots_empty0(...) .Call( ffi_vec_as_names, names, repair, quiet, environment() ) } # TODO! Error calls validate_name_repair_arg <- function(repair) { .Call(vctrs_validate_name_repair_arg, repair) } validate_minimal_names <- function(names, n = NULL) { .Call(vctrs_validate_minimal_names, names, n) } validate_unique <- function(names, arg = "", n = NULL, call = caller_env()) { validate_minimal_names(names, n) empty_names <- detect_empty_names(names) if (has_length(empty_names)) { stop_names_cannot_be_empty(names, call = call) } dot_dot_name <- detect_dot_dot(names) if (has_length(dot_dot_name)) { stop_names_cannot_be_dot_dot(names, call = call) } if (anyDuplicated(names)) { stop_names_must_be_unique(names, arg, call = call) } invisible(names) } detect_empty_names <- function(names) { which(names == "") } detect_dot_dot <- function(names) { grep("^[.][.](?:[.]|[1-9][0-9]*)$", names) } #' Get or set the names of a vector #' #' @description #' These functions work like [rlang::names2()], [names()] and [names<-()], #' except that they return or modify the the rowwise names of the vector. These are: #' * The usual `names()` for atomic vectors and lists #' * The row names for data frames and matrices #' * The names of the first dimension for arrays #' Rowwise names are size consistent: the length of the names always equals #' [vec_size()]. #' #' `vec_names2()` returns the repaired names from a vector, even if it is unnamed. #' See [vec_as_names()] for details on name repair. #' #' `vec_names()` is a bare-bones version that returns `NULL` if the vector is #' unnamed. #' #' `vec_set_names()` sets the names or removes them. #' #' @param x A vector with names #' @param names A character vector, or `NULL`. #' @inheritParams vec_as_names #' #' @return #' `vec_names2()` returns the names of `x`, repaired. #' `vec_names()` returns the names of `x` or `NULL` if unnamed. #' `vec_set_names()` returns `x` with names updated. #' #' @name vec_names #' @export #' @examples #' vec_names2(1:3) #' vec_names2(1:3, repair = "unique") #' vec_names2(c(a = 1, b = 2)) #' #' # `vec_names()` consistently returns the rowwise names of data frames and arrays: #' vec_names(data.frame(a = 1, b = 2)) #' names(data.frame(a = 1, b = 2)) #' vec_names(mtcars) #' names(mtcars) #' vec_names(Titanic) #' names(Titanic) #' #' vec_set_names(1:3, letters[1:3]) #' vec_set_names(data.frame(a = 1:3), letters[1:3]) vec_names2 <- function( x, ..., repair = c( "minimal", "unique", "universal", "check_unique", "unique_quiet", "universal_quiet" ), quiet = FALSE ) { check_dots_empty0(...) repair <- validate_name_repair_arg(repair) if (is_function(repair)) { names <- minimal_names(x) new_names <- validate_minimal_names(repair(names), n = length(names)) if (!quiet) { describe_repair(names, new_names) } return(new_names) } switch( repair, minimal = minimal_names(x), unique = unique_names(x, quiet = quiet), universal = as_universal_names(minimal_names(x), quiet = quiet), check_unique = validate_unique(minimal_names(x)), unique_quiet = unique_names(x, quiet = TRUE), universal_quiet = as_universal_names(minimal_names(x), quiet = TRUE) ) } vec_repair_names <- function( x, repair = c( "minimal", "unique", "universal", "check_unique", "unique_quiet", "universal_quiet" ), ..., quiet = FALSE ) { if (is.data.frame(x)) { x } else { vec_set_names(x, vec_names2(x, ..., repair = repair, quiet = quiet)) } } minimal_names <- function(x) { .Call(ffi_minimal_names, x) } unique_names <- function(x, quiet = FALSE) { .Call(ffi_unique_names, x, quiet) } #' @rdname vec_names #' @export vec_names <- function(x) { .Call(vctrs_names, x) } as_minimal_names <- function(names) { .Call(ffi_as_minimal_names, names) } as_unique_names <- function(names, quiet = FALSE) { .Call(vctrs_as_unique_names, names, quiet) } as_universal_names <- function(names, quiet = FALSE) { new_names <- names new_names[] <- "" naked_names <- strip_pos(two_to_three_dots(names)) empty <- naked_names %in% c("", "...") new_names[!empty] <- make_syntactic(naked_names[!empty]) needs_suffix <- empty | vec_duplicate_detect(new_names) new_names <- append_pos(new_names, needs_suffix = needs_suffix) if (!quiet) { describe_repair(names, new_names) } new_names } two_to_three_dots <- function(names) { sub("(^[.][.][1-9][0-9]*$)", ".\\1", names) } append_pos <- function(names, needs_suffix) { need_append_pos <- which(needs_suffix) names[need_append_pos] <- paste0( names[need_append_pos], "...", need_append_pos ) names } strip_pos <- function(names) { rx <- "([.][.][.][1-9][0-9]*)+$" gsub(rx, "", names) %|% "" } # Makes each individual name syntactic but does not enforce unique-ness make_syntactic <- function(names) { names[is.na(names)] <- "" names[names == ""] <- "." names[names == "..."] <- "...." names <- sub("^_", "._", names) new_names <- make.names(names) X_prefix <- grepl("^X", new_names) & !grepl("^X", names) new_names[X_prefix] <- sub("^X", "", new_names[X_prefix]) dot_suffix <- which(new_names == paste0(names, ".")) new_names[dot_suffix] <- sub("^(.*)[.]$", ".\\1", new_names[dot_suffix]) # Illegal characters have been replaced with '.' via make.names() # however, we have: # * Declined its addition of 'X' prefixes. # * Turned its '.' suffixes to '.' prefixes. regex <- paste0( "^(?[.]{0,2})", "(?[0-9]*)", "(?[^0-9]?.*$)" ) re <- re_match(new_names, pattern = regex) needs_dots <- which(re$numbers != "") needs_third_dot <- (re$leftovers[needs_dots] == "") re$leading_dots[needs_dots] <- ifelse(needs_third_dot, "...", "..") new_names <- paste0(re$leading_dots, re$numbers, re$leftovers) new_names } # From rematch2, except we don't add tbl_df or tbl classes to the return value re_match <- function(text, pattern, perl = TRUE, ...) { stopifnot( is.character(pattern), length(pattern) == 1, !is.na(pattern) ) text <- as.character(text) match <- regexpr(pattern, text, perl = perl, ...) start <- as.vector(match) length <- attr(match, "match.length") end <- start + length - 1L matchstr <- substring(text, start, end) matchstr[start == -1] <- NA_character_ res <- data.frame( stringsAsFactors = FALSE, .text = text, .match = matchstr ) if (!is.null(attr(match, "capture.start"))) { gstart <- attr(match, "capture.start") glength <- attr(match, "capture.length") gend <- gstart + glength - 1L groupstr <- substring(text, gstart, gend) groupstr[gstart == -1] <- NA_character_ dim(groupstr) <- dim(gstart) res <- cbind(groupstr, res, stringsAsFactors = FALSE) } names(res) <- c(attr(match, "capture.names"), ".text", ".match") res } describe_repair <- function(orig_names, names) { names_inform_repair(orig_names, names) } bullets <- function(..., header = NULL) { problems <- c(...) MAX_BULLETS <- 6L if (length(problems) >= MAX_BULLETS) { n_more <- length(problems) - MAX_BULLETS + 1L problems[[MAX_BULLETS]] <- "..." length(problems) <- MAX_BULLETS } info <- paste0("* ", problems, collapse = "\n") if (!is.null(header)) { info <- paste0(header, "\n", info) } info } # Used in names.c set_rownames_dispatch <- function(x, names) { rownames(x) <- names x } # Used in names.c set_names_dispatch <- function(x, names) { names(x) <- names x } #' @rdname vec_names #' @export vec_set_names <- function(x, names) { .Call(ffi_vec_set_names, x, names) } #' Repair names with legacy method #' #' This standardises names with the legacy approach that was used in #' tidyverse packages (such as tibble, tidyr, and readxl) before #' [vec_as_names()] was implemented. This tool is meant to help #' transitioning to the new name repairing standard and will be #' deprecated and removed from the package some time in the future. #' #' @inheritParams vec_as_names #' @param prefix,sep Prefix and separator for repaired names. #' #' @examples #' if (rlang::is_installed("tibble")) { #' #' library(tibble) #' #' # Names repair is turned off by default in tibble: #' try(tibble(a = 1, a = 2)) #' #' # You can turn it on by supplying a repair method: #' tibble(a = 1, a = 2, .name_repair = "universal") #' #' # If you prefer the legacy method, use `vec_as_names_legacy()`: #' tibble(a = 1, a = 2, .name_repair = vec_as_names_legacy) #' #' } #' @keywords internal #' @export vec_as_names_legacy <- function(names, prefix = "V", sep = "") { if (length(names) == 0) { return(character()) } blank <- names == "" names[!blank] <- make.unique(names[!blank], sep = sep) new_nms <- setdiff(paste(prefix, seq_along(names), sep = sep), names) names[blank] <- new_nms[seq_len(sum(blank))] names } #' Name specifications #' #' @description #' #' A name specification describes how to combine an inner and outer #' names. This sort of name combination arises when concatenating #' vectors or flattening lists. There are two possible cases: #' #' * Named vector: #' #' ``` #' vec_c(outer = c(inner1 = 1, inner2 = 2)) #' ``` #' #' * Unnamed vector: #' #' ``` #' vec_c(outer = 1:2) #' ``` #' #' In r-lib and tidyverse packages, these cases are errors by default, #' because there's no behaviour that works well for every case. #' Instead, you can provide a name specification that describes how to #' combine the inner and outer names of inputs. Name specifications #' can refer to: #' #' * `outer`: The external name recycled to the size of the input #' vector. #' #' * `inner`: Either the names of the input vector, or a sequence of #' integer from 1 to the size of the vector if it is unnamed. #' #' @param name_spec,.name_spec A name specification for combining #' inner and outer names. This is relevant for inputs passed with a #' name, when these inputs are themselves named, like `outer = #' c(inner = 1)`, or when they have length greater than 1: `outer = #' 1:2`. By default, these cases trigger an error. You can resolve #' the error by providing a specification that describes how to #' combine the names or the indices of the inner vector with the #' name of the input. This specification can be: #' #' * A function of two arguments. The outer name is passed as a #' string to the first argument, and the inner names or positions #' are passed as second argument. #' #' * An anonymous function as a purrr-style formula. #' #' * A glue specification of the form `"{outer}_{inner}"`. #' #' * `"inner"`, in which case outer names are ignored, and inner #' names are used if they exist. Note that outer names may still #' be used to provide informative error messages. #' #' * An [rlang::zap()] object, in which case both outer and inner #' names are ignored and the result is unnamed. #' #' See the [name specification topic][name_spec]. #' #' @examples #' # By default, named inputs must be length 1: #' vec_c(name = 1) # ok #' try(vec_c(name = 1:3)) # bad #' #' # They also can't have internal names, even if scalar: #' try(vec_c(name = c(internal = 1))) # bad #' #' # Pass a name specification to work around this. A specification #' # can be a glue string referring to `outer` and `inner`: #' vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}") #' vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}_{inner}") #' #' # They can also be functions: #' my_spec <- function(outer, inner) paste(outer, inner, sep = "_") #' vec_c(name = 1:3, other = 4:5, .name_spec = my_spec) #' #' # Or purrr-style formulas for anonymous functions: #' vec_c(name = 1:3, other = 4:5, .name_spec = ~ paste0(.x, .y)) #' #' # Or the string `"inner"` to only use inner names #' vec_c(name = 1:3, outer = 4:5, .name_spec = "inner") #' vec_c(name = c(a = 1, b = 2, c = 3), outer = 4:5, .name_spec = "inner") #' # This can be useful when you want outer names mentioned in error messages, #' # but you don't want them interfering with the result #' try(vec_c(x = c(a = 1), y = c(b = "2"), .name_spec = "inner")) #' #' # Or `rlang::zap()` to ignore both outer and inner names entirely #' vec_c(name = c(a = 1, b = 2), outer = c(c = 3), .name_spec = rlang::zap()) #' @name name_spec NULL apply_name_spec <- function(name_spec, outer, inner, n = length(inner)) { .Call(ffi_apply_name_spec, name_spec, outer, inner, n) } glue_as_name_spec <- function(`_spec`) { function(inner, outer) { glue::glue(`_spec`) } } # Evaluate glue specs in a child of base for now environment(glue_as_name_spec) <- baseenv() vctrs/R/import-standalone-purrr.R0000644000176200001440000001277714751701606016574 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-purrr.R # last-updated: 2023-02-23 # license: https://unlicense.org # --- # # This file provides a minimal shim to provide a purrr-like API on top of # base R functions. They are not drop-in replacements but allow a similar style # of programming. # # ## Changelog # # 2023-02-23: # * Added `list_c()` # # 2022-06-07: # * `transpose()` is now more consistent with purrr when inner names # are not congruent (#1346). # # 2021-12-15: # * `transpose()` now supports empty lists. # # 2021-05-21: # * Fixed "object `x` not found" error in `imap()` (@mgirlich) # # 2020-04-14: # * Removed `pluck*()` functions # * Removed `*_cpl()` functions # * Used `as_function()` to allow use of `~` # * Used `.` prefix for helpers # # nocov start map <- function(.x, .f, ...) { .f <- as_function(.f, env = global_env()) lapply(.x, .f, ...) } walk <- function(.x, .f, ...) { map(.x, .f, ...) invisible(.x) } map_lgl <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, logical(1), ...) } map_int <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, integer(1), ...) } map_dbl <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, double(1), ...) } map_chr <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, character(1), ...) } .rlang_purrr_map_mold <- function(.x, .f, .mold, ...) { .f <- as_function(.f, env = global_env()) out <- vapply(.x, .f, .mold, ..., USE.NAMES = FALSE) names(out) <- names(.x) out } map2 <- function(.x, .y, .f, ...) { .f <- as_function(.f, env = global_env()) out <- mapply(.f, .x, .y, MoreArgs = list(...), SIMPLIFY = FALSE) if (length(out) == length(.x)) { set_names(out, names(.x)) } else { set_names(out, NULL) } } map2_lgl <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "logical") } map2_int <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "integer") } map2_dbl <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "double") } map2_chr <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "character") } imap <- function(.x, .f, ...) { map2(.x, names(.x) %||% seq_along(.x), .f, ...) } pmap <- function(.l, .f, ...) { .f <- as.function(.f) args <- .rlang_purrr_args_recycle(.l) do.call("mapply", c( FUN = list(quote(.f)), args, MoreArgs = quote(list(...)), SIMPLIFY = FALSE, USE.NAMES = FALSE )) } .rlang_purrr_args_recycle <- function(args) { lengths <- map_int(args, length) n <- max(lengths) stopifnot(all(lengths == 1L | lengths == n)) to_recycle <- lengths == 1L args[to_recycle] <- map(args[to_recycle], function(x) rep.int(x, n)) args } keep <- function(.x, .f, ...) { .x[.rlang_purrr_probe(.x, .f, ...)] } discard <- function(.x, .p, ...) { sel <- .rlang_purrr_probe(.x, .p, ...) .x[is.na(sel) | !sel] } map_if <- function(.x, .p, .f, ...) { matches <- .rlang_purrr_probe(.x, .p) .x[matches] <- map(.x[matches], .f, ...) .x } .rlang_purrr_probe <- function(.x, .p, ...) { if (is_logical(.p)) { stopifnot(length(.p) == length(.x)) .p } else { .p <- as_function(.p, env = global_env()) map_lgl(.x, .p, ...) } } compact <- function(.x) { Filter(length, .x) } transpose <- function(.l) { if (!length(.l)) { return(.l) } inner_names <- names(.l[[1]]) if (is.null(inner_names)) { fields <- seq_along(.l[[1]]) } else { fields <- set_names(inner_names) .l <- map(.l, function(x) { if (is.null(names(x))) { set_names(x, inner_names) } else { x } }) } # This way missing fields are subsetted as `NULL` instead of causing # an error .l <- map(.l, as.list) map(fields, function(i) { map(.l, .subset2, i) }) } every <- function(.x, .p, ...) { .p <- as_function(.p, env = global_env()) for (i in seq_along(.x)) { if (!rlang::is_true(.p(.x[[i]], ...))) return(FALSE) } TRUE } some <- function(.x, .p, ...) { .p <- as_function(.p, env = global_env()) for (i in seq_along(.x)) { if (rlang::is_true(.p(.x[[i]], ...))) return(TRUE) } FALSE } negate <- function(.p) { .p <- as_function(.p, env = global_env()) function(...) !.p(...) } reduce <- function(.x, .f, ..., .init) { f <- function(x, y) .f(x, y, ...) Reduce(f, .x, init = .init) } reduce_right <- function(.x, .f, ..., .init) { f <- function(x, y) .f(y, x, ...) Reduce(f, .x, init = .init, right = TRUE) } accumulate <- function(.x, .f, ..., .init) { f <- function(x, y) .f(x, y, ...) Reduce(f, .x, init = .init, accumulate = TRUE) } accumulate_right <- function(.x, .f, ..., .init) { f <- function(x, y) .f(y, x, ...) Reduce(f, .x, init = .init, right = TRUE, accumulate = TRUE) } detect <- function(.x, .f, ..., .right = FALSE, .p = is_true) { .p <- as_function(.p, env = global_env()) .f <- as_function(.f, env = global_env()) for (i in .rlang_purrr_index(.x, .right)) { if (.p(.f(.x[[i]], ...))) { return(.x[[i]]) } } NULL } detect_index <- function(.x, .f, ..., .right = FALSE, .p = is_true) { .p <- as_function(.p, env = global_env()) .f <- as_function(.f, env = global_env()) for (i in .rlang_purrr_index(.x, .right)) { if (.p(.f(.x[[i]], ...))) { return(i) } } 0L } .rlang_purrr_index <- function(x, right = FALSE) { idx <- seq_along(x) if (right) { idx <- rev(idx) } idx } list_c <- function(x) { inject(c(!!!x)) } # nocov end vctrs/R/slice-chop.R0000644000176200001440000000630715072256373014005 0ustar liggesusers#' Chopping #' #' @description #' `vec_chop()` provides an efficient method to repeatedly slice a vector. It #' captures the pattern of `map(indices, vec_slice, x = x)`. When no indices #' are supplied, it is generally equivalent to [as.list()]. #' #' @inheritParams rlang::args_dots_empty #' #' @param x A vector #' #' @param indices A list of positive integer vectors to slice `x` with, or #' `NULL`. Can't be used if `sizes` is already specified. If both `indices` #' and `sizes` are `NULL`, `x` is split into its individual elements, #' equivalent to using an `indices` of `as.list(vec_seq_along(x))`. #' #' @param sizes An integer vector of non-negative sizes representing sequential #' indices to slice `x` with, or `NULL`. Can't be used if `indices` is already #' specified. #' #' For example, `sizes = c(2, 4)` is equivalent to `indices = list(1:2, 3:6)`, #' but is typically faster. #' #' `sum(sizes)` must be equal to `vec_size(x)`, i.e. `sizes` must completely #' partition `x`, but an individual size is allowed to be `0`. #' #' @returns #' A list where each element has the same type as `x`. The size of the list is #' equal to `vec_size(indices)`, `vec_size(sizes)`, or `vec_size(x)` depending #' on whether or not `indices` or `sizes` is provided. #' #' @section Dependencies: #' - [vec_slice()] #' #' @export #' @examples #' vec_chop(1:5) #' #' # These two are equivalent #' vec_chop(1:5, indices = list(1:2, 3:5)) #' vec_chop(1:5, sizes = c(2, 3)) #' #' # Can also be used on data frames #' vec_chop(mtcars, indices = list(1:3, 4:6)) #' #' # If you know your input is sorted and you'd like to split on the groups, #' # `vec_run_sizes()` can be efficiently combined with `sizes` #' df <- data_frame( #' g = c(2, 5, 5, 6, 6, 6, 6, 8, 9, 9), #' x = 1:10 #' ) #' vec_chop(df, sizes = vec_run_sizes(df$g)) #' #' # If you have a list of homogeneous vectors, sometimes it can be useful to #' # combine, apply a function to the flattened vector, and chop according #' # to the original indices. This can be done efficiently with `list_sizes()`. #' x <- list(c(1, 2, 1), c(3, 1), 5, double()) #' x_flat <- vec_c(!!!x) #' x_flat <- x_flat + max(x_flat) #' vec_chop(x_flat, sizes = list_sizes(x)) vec_chop <- function(x, ..., indices = NULL, sizes = NULL) { if (!missing(...)) { indices <- check_dots_chop(..., indices = indices) } .Call(ffi_vec_chop, x, indices, sizes) } check_dots_chop <- function(..., indices = NULL, call = caller_env()) { if (!is_null(indices)) { # Definitely can't supply both `indices` and `...` check_dots_empty0(..., call = call) } if (dots_n(...) != 1L) { # Backwards compatible case doesn't allow for length >1 `...`. # This must be an error case. check_dots_empty0(..., call = call) } # TODO: Soft-deprecate this after dplyr/tidyr have updated all `vec_chop()` # calls to be explicit about `indices =` # Assume this is an old style `vec_chop(x, indices)` call, before we # added the `...` indices <- list(...)[[1L]] indices } # Exposed for testing (`starts` is 0-based) vec_chop_seq <- function(x, starts, sizes, increasings = TRUE) { args <- vec_recycle_common(starts, sizes, increasings) .Call(ffi_vec_chop_seq, x, args[[1]], args[[2]], args[[3]]) } vctrs/R/print-str.R0000644000176200001440000000631715065005761013715 0ustar liggesusers# print ------------------------------------------------------------------- #' `print()` and `str()` generics. #' #' These are constructed to be more easily extensible since you can override #' the `_header()`, `_data()` or `_footer()` components individually. The #' default methods are built on top of `format()`. #' #' @param x A vector #' @param ... Additional arguments passed on to methods. See [print()] and #' [str()] for commonly used options #' @keywords internal #' @export obj_print <- function(x, ...) { obj_print_header(x, ...) obj_print_data(x, ...) obj_print_footer(x, ...) invisible(x) } #' @export #' @rdname obj_print obj_print_header <- function(x, ...) { UseMethod("obj_print_header") } #' @export obj_print_header.default <- function(x, ...) { cat_line("<", vec_ptype_full(x), "[", vec_size(x), "]>") invisible(x) } #' @export #' @rdname obj_print obj_print_data <- function(x, ...) { UseMethod("obj_print_data") } #' @export obj_print_data.default <- function(x, ...) { if (length(x) == 0) { return(invisible(x)) } out <- stats::setNames(format(x), names(x)) print(out, quote = FALSE) invisible(x) } #' @export #' @rdname obj_print obj_print_footer <- function(x, ...) { UseMethod("obj_print_footer") } #' @export obj_print_footer.default <- function(x, ...) { invisible(x) } # str --------------------------------------------------------------------- #' @export #' @rdname obj_print obj_str <- function(x, ...) { obj_str_header(x, ...) obj_str_data(x, ...) obj_str_footer(x, ...) } #' @export #' @rdname obj_print obj_str_header <- function(x, ...) { UseMethod("obj_str_header") } #' @export obj_str_header.default <- function(x, ...) { invisible(x) } #' @export #' @rdname obj_print obj_str_data <- function(x, ...) { UseMethod("obj_str_data") } #' @export obj_str_data.default <- function(x, ...) { if (is.list(x)) { obj_str_recursive(x, ...) } else { obj_str_leaf(x, ...) } } obj_str_recursive <- function(x, ..., indent.str = "", nest.lev = 0) { if (nest.lev != 0L) { cat(" ") } cat_line(glue::glue("{vec_ptype_abbr(x)} [1:{vec_size(x)}] ")) utils::str( vec_data(x), no.list = TRUE, ..., nest.lev = nest.lev + 1L, indent.str = indent.str ) } obj_str_leaf <- function(x, ..., indent.str = "", width = getOption("width")) { width <- width - nchar(indent.str) - 2 # Avoid spending too much time formatting elements that won't see length <- ceiling(width / 2) if (length(x) > length) { out <- x[seq2(1, length)] } else { out <- x } title <- glue::glue(" {vec_ptype_abbr(x)} [1:{length(x)}] ") cat_line(inline_list(title, format(out), width = width)) invisible(x) } #' @export #' @rdname obj_print obj_str_footer <- function(x, ...) { UseMethod("obj_str_footer") } #' @export obj_str_footer.default <- function(x, ..., indent.str = "", nest.lev = 0) { attr <- attributes(x) attr[["class"]] <- NULL attr[["names"]] <- NULL if (length(attr) == 0) { return(invisible(x)) } if (!is.list(x)) { indent.str <- paste0(" ", indent.str) } utils::str( attr, no.list = TRUE, ..., comp.str = "@ ", nest.lev = nest.lev + 1L, indent.str = indent.str ) invisible(x) } vctrs/R/compare.R0000644000176200001440000001063115120515501013361 0ustar liggesusers# proxies ----------------------------------------------------------------- #' Comparison and order proxy #' #' @description #' `vec_proxy_compare()` and `vec_proxy_order()` return proxy objects, i.e. #' an atomic vector or data frame of atomic vectors. #' #' For [`vctrs_vctr`][vctr] objects: #' #' - `vec_proxy_compare()` determines the behavior of `<`, `>`, `>=` #' and `<=` (via [vec_compare()]); and [min()], [max()], [median()], and #' [quantile()]. #' #' - `vec_proxy_order()` determines the behavior of `order()` and `sort()` #' (via `xtfrm()`). #' #' @details #' The default method of `vec_proxy_compare()` assumes that all classes built #' on top of atomic vectors or records are comparable. Internally the default #' calls [vec_proxy_equal()]. If your class is not comparable, you will need #' to provide a `vec_proxy_compare()` method that throws an error. #' #' The behavior of `vec_proxy_order()` is identical to `vec_proxy_compare()`, #' with the exception of lists. Lists are not comparable, as comparing #' elements of different types is undefined. However, to allow ordering of #' data frames containing list-columns, the ordering proxy of a list is #' generated as an integer vector that can be used to order list elements #' by first appearance. #' #' If a class implements a `vec_proxy_compare()` method, it usually doesn't need #' to provide a `vec_proxy_order()` method, because the latter is implemented #' by forwarding to `vec_proxy_compare()` by default. Classes inheriting from #' list are an exception: due to the default `vec_proxy_order()` implementation, #' `vec_proxy_compare()` and `vec_proxy_order()` should be provided for such #' classes (with identical implementations) to avoid mismatches between #' comparison and sorting. #' #' @inheritSection vec_proxy_equal Data frames #' #' @param x A vector x. #' @inheritParams rlang::args_dots_empty #' @return A 1d atomic vector or a data frame. #' #' @section Dependencies: #' - [vec_proxy_equal()] called by default in `vec_proxy_compare()` #' - [vec_proxy_compare()] called by default in `vec_proxy_order()` #' #' @keywords internal #' @export #' @examples #' # Lists are not comparable #' x <- list(1:2, 1, 1:2, 3) #' try(vec_compare(x, x)) #' #' # But lists are orderable by first appearance to allow for #' # ordering data frames with list-cols #' df <- new_data_frame(list(x = x)) #' vec_sort(df) vec_proxy_compare <- function(x, ...) { check_dots_empty0(...) return(.Call(vctrs_proxy_compare, x)) UseMethod("vec_proxy_compare") } #' @export vec_proxy_compare.default <- function(x, ...) { stop_native_implementation("vec_proxy_compare.default") } #' @rdname vec_proxy_compare #' @export vec_proxy_order <- function(x, ...) { check_dots_empty0(...) return(.Call(vctrs_proxy_order, x)) UseMethod("vec_proxy_order") } #' @export vec_proxy_order.default <- function(x, ...) { stop_native_implementation("vec_proxy_order.default") } # compare ----------------------------------------------------------------- #' Compare two vectors #' #' @section S3 dispatch: #' `vec_compare()` is not generic for performance; instead it uses #' [vec_proxy_compare()] to create a proxy that is used in the comparison. #' #' @param x,y Vectors with compatible types and lengths. #' @param na_equal Should `NA` values be considered equal? #' @param .ptype Override to optionally specify common type #' @return An integer vector with values -1 for `x < y`, 0 if `x == y`, #' and 1 if `x > y`. If `na_equal` is `FALSE`, the result will be `NA` #' if either `x` or `y` is `NA`. #' #' @section Dependencies: #' - [vec_cast_common()] with fallback #' - [vec_recycle_common()] #' - [vec_proxy_compare()] #' #' @export #' @examples #' vec_compare(c(TRUE, FALSE, NA), FALSE) #' vec_compare(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) #' #' vec_compare(1:10, 5) #' vec_compare(runif(10), 0.5) #' vec_compare(letters[1:10], "d") #' #' df <- data.frame(x = c(1, 1, 1, 2), y = c(0, 1, 2, 1)) #' vec_compare(df, data.frame(x = 1, y = 1)) vec_compare <- function(x, y, na_equal = FALSE, .ptype = NULL) { obj_check_vector(x) obj_check_vector(y) check_bool(na_equal) args <- vec_recycle_common(x, y) args <- vec_cast_common_params(!!!args, .to = .ptype) .Call( ffi_vec_compare, vec_proxy_compare(args[[1]]), vec_proxy_compare(args[[2]]), na_equal ) } # Helpers ----------------------------------------------------------------- # Used for testing cmp <- function(x, y) (x > y) - (x < y) vctrs/R/split.R0000644000176200001440000000215715065005761013104 0ustar liggesusers#' Split a vector into groups #' #' This is a generalisation of [split()] that can split by any type of vector, #' not just factors. Instead of returning the keys in the character names, #' the are returned in a separate parallel vector. #' #' @param x Vector to divide into groups. #' @param by Vector whose unique values defines the groups. #' @return A data frame with two columns and size equal to #' `vec_size(vec_unique(by))`. The `key` column has the same type as #' `by`, and the `val` column is a list containing elements of type #' `vec_ptype(x)`. #' #' Note for complex types, the default `data.frame` print method will be #' suboptimal, and you will want to coerce into a tibble to better #' understand the output. #' @export #' #' @section Dependencies: #' - [vec_group_loc()] #' - [vec_chop()] #' #' @examples #' vec_split(mtcars$cyl, mtcars$vs) #' vec_split(mtcars$cyl, mtcars[c("vs", "am")]) #' #' if (require("tibble")) { #' as_tibble(vec_split(mtcars$cyl, mtcars[c("vs", "am")])) #' as_tibble(vec_split(mtcars, mtcars[c("vs", "am")])) #' } vec_split <- function(x, by) { .Call(vctrs_split, x, by) } vctrs/R/missing.R0000644000176200001440000000337014713505651013422 0ustar liggesusers#' Missing values #' #' @description #' - `vec_detect_missing()` returns a logical vector the same size as `x`. For #' each element of `x`, it returns `TRUE` if the element is missing, and `FALSE` #' otherwise. #' #' - `vec_any_missing()` returns a single `TRUE` or `FALSE` depending on whether #' or not `x` has _any_ missing values. #' #' ## Differences with [is.na()] #' #' Data frame rows are only considered missing if every element in the row is #' missing. Similarly, [record vector][new_rcrd()] elements are only considered #' missing if every field in the record is missing. Put another way, rows with #' _any_ missing values are considered [incomplete][vec_detect_complete()], but #' only rows with _all_ missing values are considered missing. #' #' List elements are only considered missing if they are `NULL`. #' #' @param x A vector #' #' @return #' - `vec_detect_missing()` returns a logical vector the same size as `x`. #' #' - `vec_any_missing()` returns a single `TRUE` or `FALSE`. #' #' @section Dependencies: #' - [vec_proxy_equal()] #' #' @name missing #' @seealso [vec_detect_complete()] #' #' @examples #' x <- c(1, 2, NA, 4, NA) #' #' vec_detect_missing(x) #' vec_any_missing(x) #' #' # Data frames are iterated over rowwise, and only report a row as missing #' # if every element of that row is missing. If a row is only partially #' # missing, it is said to be incomplete, but not missing. #' y <- c("a", "b", NA, "d", "e") #' df <- data_frame(x = x, y = y) #' #' df$missing <- vec_detect_missing(df) #' df$incomplete <- !vec_detect_complete(df) #' df NULL #' @rdname missing #' @export vec_detect_missing <- function(x) { .Call(ffi_vec_detect_missing, x) } #' @rdname missing #' @export vec_any_missing <- function(x) { .Call(ffi_vec_any_missing, x) } vctrs/R/encoding.R0000644000176200001440000000022115156537555013541 0ustar liggesusersobj_encode_utf8 <- function(x) { .Call(ffi_obj_encode_utf8, x) } chr_is_ascii_or_utf8 <- function(x) { .Call(ffi_chr_is_ascii_or_utf8, x) } vctrs/R/type-table.R0000644000176200001440000000314415113335375014015 0ustar liggesusers#' Table S3 class #' #' These functions help the base table class fit into the vctrs type system #' by providing coercion and casting functions. #' #' @keywords internal #' @name table NULL #' @export vec_restore.table <- function(x, to, ...) { new_table(x, dim = dim(x), dimnames = dimnames(x)) } # Print ------------------------------------------------------------------- #' @export vec_ptype_full.table <- function(x, ...) { paste0("table", vec_ptype_shape(x)) } #' @export vec_ptype_abbr.table <- function(x, ...) { "table" } # Coercion ---------------------------------------------------------------- #' @export vec_ptype2.table.table <- function(x, y, ..., x_arg = "", y_arg = "") { ptype <- vec_ptype2(unstructure(x), unstructure(y)) vec_shaped_ptype(new_table(ptype), x, y, x_arg = x_arg, y_arg = y_arg) } #' @export vec_cast.table.table <- function(x, to, ...) { out <- vec_cast(unstructure(x), unstructure(to)) out <- new_table(out, dim = dim(x), dimnames = dimnames(x)) shape_broadcast(out, to, ...) } # ------------------------------------------------------------------------------ new_table <- function(x = integer(), dim = NULL, dimnames = NULL) { if (is_null(dim)) { dim <- length(x) } else if (!is.integer(dim)) { abort("`dim` must be an integer vector.") } n_elements <- prod(dim) n_x <- length(x) if (n_elements != n_x) { abort(glue::glue( "Length implied by `dim`, {n_elements}, must match the length of `x`, {n_x}." )) } structure(x, dim = dim, dimnames = dimnames, class = "table") } is_bare_table <- function(x) { identical(class(x), "table") } vctrs/LICENSE.note0000644000176200001440000004136714276722575013420 0ustar liggesusersThe implementation of vec_order() is based on data.table’s forder() and their earlier contribution to R’s order(). This warrants placing specific files in the vctrs package under the MPL-2.0 license used by data.table. Files named with the pattern of `src/order-*.c` and `src/order-*.h` are additionally under the MPL-2.0 license. MPL-2.0 License ---------------------------------------------------------------- Mozilla Public License Version 2.0 ================================== 1. Definitions -------------- 1.1. "Contributor" means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software. 1.2. "Contributor Version" means the combination of the Contributions of others (if any) used by a Contributor and that particular Contributor's Contribution. 1.3. "Contribution" means Covered Software of a particular Contributor. 1.4. "Covered Software" means Source Code Form to which the initial Contributor has attached the notice in Exhibit A, the Executable Form of such Source Code Form, and Modifications of such Source Code Form, in each case including portions thereof. 1.5. "Incompatible With Secondary Licenses" means (a) that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or (b) that the Covered Software was made available under the terms of version 1.1 or earlier of the License, but not also under the terms of a Secondary License. 1.6. "Executable Form" means any form of the work other than Source Code Form. 1.7. "Larger Work" means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" means this document. 1.9. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently, any and all of the rights conveyed by this License. 1.10. "Modifications" means any of the following: (a) any file in Source Code Form that results from an addition to, deletion from, or modification of the contents of Covered Software; or (b) any new file in Source Code Form that contains any Covered Software. 1.11. "Patent Claims" of a Contributor means any patent claim(s), including without limitation, method, process, and apparatus claims, in any patent Licensable by such Contributor that would be infringed, but for the grant of the License, by the making, using, selling, offering for sale, having made, import, or transfer of either its Contributions or its Contributor Version. 1.12. "Secondary License" means either the GNU General Public License, Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General Public License, Version 3.0, or any later versions of those licenses. 1.13. "Source Code Form" means the form of the work preferred for making modifications. 1.14. "You" (or "Your") means an individual or a legal entity exercising rights under this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. 2. License Grants and Conditions -------------------------------- 2.1. Grants Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: (a) under intellectual property rights (other than patent or trademark) Licensable by such Contributor to use, reproduce, make available, modify, display, perform, distribute, and otherwise exploit its Contributions, either on an unmodified basis, with Modifications, or as part of a Larger Work; and (b) under Patent Claims of such Contributor to make, use, sell, offer for sale, have made, import, and otherwise transfer either its Contributions or its Contributor Version. 2.2. Effective Date The licenses granted in Section 2.1 with respect to any Contribution become effective for each Contribution on the date the Contributor first distributes such Contribution. 2.3. Limitations on Grant Scope The licenses granted in this Section 2 are the only rights granted under this License. No additional rights or licenses will be implied from the distribution or licensing of Covered Software under this License. Notwithstanding Section 2.1(b) above, no patent license is granted by a Contributor: (a) for any code that a Contributor has removed from Covered Software; or (b) for infringements caused by: (i) Your and any other third party's modifications of Covered Software, or (ii) the combination of its Contributions with other software (except as part of its Contributor Version); or (c) under Patent Claims infringed by Covered Software in the absence of its Contributions. This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with the notice requirements in Section 3.4). 2.4. Subsequent Licenses No Contributor makes additional grants as a result of Your choice to distribute the Covered Software under a subsequent version of this License (see Section 10.2) or under the terms of a Secondary License (if permitted under the terms of Section 3.3). 2.5. Representation Each Contributor represents that the Contributor believes its Contributions are its original creation(s) or it has sufficient rights to grant the rights to its Contributions conveyed by this License. 2.6. Fair Use This License is not intended to limit any rights You have under applicable copyright doctrines of fair use, fair dealing, or other equivalents. 2.7. Conditions Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1. 3. Responsibilities ------------------- 3.1. Distribution of Source Form All distribution of Covered Software in Source Code Form, including any Modifications that You create or to which You contribute, must be under the terms of this License. You must inform recipients that the Source Code Form of the Covered Software is governed by the terms of this License, and how they can obtain a copy of this License. You may not attempt to alter or restrict the recipients' rights in the Source Code Form. 3.2. Distribution of Executable Form If You distribute Covered Software in Executable Form then: (a) such Covered Software must also be made available in Source Code Form, as described in Section 3.1, and You must inform recipients of the Executable Form how they can obtain a copy of such Source Code Form by reasonable means in a timely manner, at a charge no more than the cost of distribution to the recipient; and (b) You may distribute such Executable Form under the terms of this License, or sublicense it under different terms, provided that the license for the Executable Form does not attempt to limit or alter the recipients' rights in the Source Code Form under this License. 3.3. Distribution of a Larger Work You may create and distribute a Larger Work under terms of Your choice, provided that You also comply with the requirements of this License for the Covered Software. If the Larger Work is a combination of Covered Software with a work governed by one or more Secondary Licenses, and the Covered Software is not Incompatible With Secondary Licenses, this License permits You to additionally distribute such Covered Software under the terms of such Secondary License(s), so that the recipient of the Larger Work may, at their option, further distribute the Covered Software under the terms of either this License or such Secondary License(s). 3.4. Notices You may not remove or alter the substance of any license notices (including copyright notices, patent notices, disclaimers of warranty, or limitations of liability) contained within the Source Code Form of the Covered Software, except that You may alter any license notices to the extent required to remedy known factual inaccuracies. 3.5. Application of Additional Terms You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, You may do so only on Your own behalf, and not on behalf of any Contributor. You must make it absolutely clear that any such warranty, support, indemnity, or liability obligation is offered by You alone, and You hereby agree to indemnify every Contributor for any liability incurred by such Contributor as a result of warranty, support, indemnity or liability terms You offer. You may include additional disclaimers of warranty and limitations of liability specific to any jurisdiction. 4. Inability to Comply Due to Statute or Regulation --------------------------------------------------- If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Software due to statute, judicial order, or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be placed in a text file included with all distributions of the Covered Software under this License. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. 5. Termination -------------- 5.1. The rights granted under this License will terminate automatically if You fail to comply with any of its terms. However, if You become compliant, then the rights granted under this License from a particular Contributor are reinstated (a) provisionally, unless and until such Contributor explicitly and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor fails to notify You of the non-compliance by some reasonable means prior to 60 days after You have come back into compliance. Moreover, Your grants from a particular Contributor are reinstated on an ongoing basis if such Contributor notifies You of the non-compliance by some reasonable means, this is the first time You have received notice of non-compliance with this License from such Contributor, and You become compliant prior to 30 days after Your receipt of the notice. 5.2. If You initiate litigation against any entity by asserting a patent infringement claim (excluding declaratory judgment actions, counter-claims, and cross-claims) alleging that a Contributor Version directly or indirectly infringes any patent, then the rights granted to You by any and all Contributors for the Covered Software under Section 2.1 of this License shall terminate. 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or Your distributors under this License prior to termination shall survive termination. ************************************************************************ * * * 6. Disclaimer of Warranty * * ------------------------- * * * * Covered Software is provided under this License on an "as is" * * basis, without warranty of any kind, either expressed, implied, or * * statutory, including, without limitation, warranties that the * * Covered Software is free of defects, merchantable, fit for a * * particular purpose or non-infringing. The entire risk as to the * * quality and performance of the Covered Software is with You. * * Should any Covered Software prove defective in any respect, You * * (not any Contributor) assume the cost of any necessary servicing, * * repair, or correction. This disclaimer of warranty constitutes an * * essential part of this License. No use of any Covered Software is * * authorized under this License except under this disclaimer. * * * ************************************************************************ ************************************************************************ * * * 7. Limitation of Liability * * -------------------------- * * * * Under no circumstances and under no legal theory, whether tort * * (including negligence), contract, or otherwise, shall any * * Contributor, or anyone who distributes Covered Software as * * permitted above, be liable to You for any direct, indirect, * * special, incidental, or consequential damages of any character * * including, without limitation, damages for lost profits, loss of * * goodwill, work stoppage, computer failure or malfunction, or any * * and all other commercial damages or losses, even if such party * * shall have been informed of the possibility of such damages. This * * limitation of liability shall not apply to liability for death or * * personal injury resulting from such party's negligence to the * * extent applicable law prohibits such limitation. Some * * jurisdictions do not allow the exclusion or limitation of * * incidental or consequential damages, so this exclusion and * * limitation may not apply to You. * * * ************************************************************************ 8. Litigation ------------- Any litigation relating to this License may be brought only in the courts of a jurisdiction where the defendant maintains its principal place of business and such litigation shall be governed by laws of that jurisdiction, without reference to its conflict-of-law provisions. Nothing in this Section shall prevent a party's ability to bring cross-claims or counter-claims. 9. Miscellaneous ---------------- This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not be used to construe this License against a Contributor. 10. Versions of the License --------------------------- 10.1. New Versions Mozilla Foundation is the license steward. Except as provided in Section 10.3, no one other than the license steward has the right to modify or publish new versions of this License. Each version will be given a distinguishing version number. 10.2. Effect of New Versions You may distribute the Covered Software under the terms of the version of the License under which You originally received the Covered Software, or under the terms of any subsequent version published by the license steward. 10.3. Modified Versions If you create software not governed by this License, and you want to create a new license for such software, you may create and use a modified version of this License if you rename the license and remove any references to the name of the license steward (except to note that such modified license differs from this License). 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses If You choose to distribute Source Code Form that is Incompatible With Secondary Licenses under the terms of this version of the License, the notice described in Exhibit B of this License must be attached. Exhibit A - Source Code Form License Notice ------------------------------------------- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE file in a relevant directory) where a recipient would be likely to look for such a notice. You may add additional accurate notices of copyright ownership. Exhibit B - "Incompatible With Secondary Licenses" Notice --------------------------------------------------------- This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0. vctrs/vignettes/0000755000176200001440000000000015157322654013435 5ustar liggesusersvctrs/vignettes/stability.Rmd0000644000176200001440000003142614376223322016105 0ustar liggesusers--- title: "Type and size stability" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Type and size stability} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette introduces the ideas of type-stability and size-stability. If a function possesses these properties, it is substantially easier to reason about because to predict the "shape" of the output you only need to know the "shape"s of the inputs. This work is partly motivated by a common pattern that I noticed when reviewing code: if I read the code (without running it!), and I can't predict the type of each variable, I feel very uneasy about the code. This sense is important because most unit tests explore typical inputs, rather than exhaustively testing the strange and unusual. Analysing the types (and size) of variables makes it possible to spot unpleasant edge cases. ```{r setup} library(vctrs) library(rlang) library(zeallot) ``` ## Definitions We say a function is __type-stable__ iff: 1. You can predict the output type knowing only the input types. 1. The order of arguments in ... does not affect the output type. Similarly, a function is __size-stable__ iff: 1. You can predict the output size knowing only the input sizes, or there is a single numeric input that specifies the output size. Very few base R functions are size-stable, so I'll also define a slightly weaker condition. I'll call a function __length-stable__ iff: 1. You can predict the output _length_ knowing only the input _lengths_, or there is a single numeric input that specifies the output _length_. (But note that length-stable is not a particularly robust definition because `length()` returns a value for things that are not vectors.) We'll call functions that don't obey these principles __type-unstable__ and __size-unstable__ respectively. On top of type- and size-stability it's also desirable to have a single set of rules that are applied consistently. We want one set of type-coercion and size-recycling rules that apply everywhere, not many sets of rules that apply to different functions. The goal of these principles is to minimise cognitive overhead. Rather than having to memorise many special cases, you should be able to learn one set of principles and apply them again and again. ### Examples To make these ideas concrete, let's apply them to a few base functions: 1. `mean()` is trivially type-stable and size-stable because it always returns a double vector of length 1 (or it throws an error). 1. Surprisingly, `median()` is type-unstable: ```{r} vec_ptype_show(median(c(1L, 1L))) vec_ptype_show(median(c(1L, 1L, 1L))) ``` It is, however, size-stable, since it always returns a vector of length 1. 1. `sapply()` is type-unstable because you can't predict the output type only knowing the input types: ```{r} vec_ptype_show(sapply(1L, function(x) c(x, x))) vec_ptype_show(sapply(integer(), function(x) c(x, x))) ``` It's not quite size-stable; `vec_size(sapply(x, f))` is `vec_size(x)` for vectors but not for matrices (the output is transposed) or data frames (it iterates over the columns). 1. `vapply()` is a type-stable version of `sapply()` because `vec_ptype_show(vapply(x, fn, template))` is always `vec_ptype_show(template)`. It is size-unstable for the same reasons as `sapply()`. 1. `c()` is type-unstable because `c(x, y)` doesn't always output the same type as `c(y, x)`. ```{r} vec_ptype_show(c(NA, Sys.Date())) vec_ptype_show(c(Sys.Date(), NA)) ``` `c()` is *almost always* length-stable because `length(c(x, y))` *almost always* equals `length(x) + length(y)`. One common source of instability here is dealing with non-vectors (see the later section "Non-vectors"): ```{r} env <- new.env(parent = emptyenv()) length(env) length(mean) length(c(env, mean)) ``` 1. `paste(x1, x2)` is length-stable because `length(paste(x1, x2))` equals `max(length(x1), length(x2))`. However, it doesn't follow the usual arithmetic recycling rules because `paste(1:2, 1:3)` doesn't generate a warning. 1. `ifelse()` is length-stable because `length(ifelse(cond, true, false))` is always `length(cond)`. `ifelse()` is type-unstable because the output type depends on the value of `cond`: ```{r} vec_ptype_show(ifelse(NA, 1L, 1L)) vec_ptype_show(ifelse(FALSE, 1L, 1L)) ``` 1. `read.csv(file)` is type-unstable and size-unstable because, while you know it will return a data frame, you don't know what columns it will return or how many rows it will have. Similarly, `df[[i]]` is not type-stable because the result depends on the _value_ of `i`. There are many important functions that can not be made type-stable or size-stable! With this understanding of type- and size-stability in hand, we'll use them to analyse some base R functions in greater depth and then propose alternatives with better properties. ## `c()` and `vctrs::vec_c()` In this section we'll compare and contrast `c()` and `vec_c()`. `vec_c()` is both type- and size-stable because it possesses the following invariants: * `vec_ptype(vec_c(x, y))` equals `vec_ptype_common(x, y)`. * `vec_size(vec_c(x, y))` equals `vec_size(x) + vec_size(y)`. `c()` has another undesirable property in that it's not consistent with `unlist()`; i.e., `unlist(list(x, y))` does not always equal `c(x, y)`; i.e., base R has multiple sets of type-coercion rules. I won't consider this problem further here. I have two goals here: * To fully document the quirks of `c()`, hence motivating the development of an alternative. * To discuss non-obvious consequences of the type- and size-stability above. ### Atomic vectors If we only consider atomic vectors, `c()` is type-stable because it uses a hierarchy of types: character > complex > double > integer > logical. ```{r} c(FALSE, 1L, 2.5) ``` `vec_c()` obeys similar rules: ```{r} vec_c(FALSE, 1L, 2.5) ``` But it does not automatically coerce to character vectors or lists: ```{r, error = TRUE} c(FALSE, "x") vec_c(FALSE, "x") c(FALSE, list(1)) vec_c(FALSE, list(1)) ``` ### Incompatible vectors and non-vectors In general, most base methods do not throw an error: ```{r} c(10.5, factor("x")) ``` If the inputs aren't vectors, `c()` automatically puts them in a list: ```{r} c(mean, globalenv()) ``` For numeric versions, this depends on the order of inputs. Version first is an error, otherwise the input is wrapped in a list: ```{r, error = TRUE} c(getRversion(), "x") c("x", getRversion()) ``` `vec_c()` throws an error if the inputs are not vectors or not automatically coercible: ```{r, error = TRUE} vec_c(mean, globalenv()) vec_c(Sys.Date(), factor("x"), "x") ``` ### Factors Combining two factors returns an integer vector: ```{r} fa <- factor("a") fb <- factor("b") c(fa, fb) ``` (This is documented in `c()` but is still undesirable.) `vec_c()` returns a factor taking the union of the levels. This behaviour is motivated by pragmatics: there are many places in base R that automatically convert character vectors to factors, so enforcing stricter behaviour would be unnecessarily onerous. (This is backed up by experience with `dplyr::bind_rows()`, which is stricter and is a common source of user difficulty.) ```{r} vec_c(fa, fb) vec_c(fb, fa) ``` ### Date-times `c()` strips the time zone associated with date-times: ```{r} datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland") c(datetime_nz) ``` This behaviour is documented in `?DateTimeClasses` but is the source of considerable user pain. `vec_c()` preserves time zones: ```{r} vec_c(datetime_nz) ``` What time zone should the output have if inputs have different time zones? One option would be to be strict and force the user to manually align all the time zones. However, this is onerous (particularly because there's no easy way to change the time zone in base R), so vctrs chooses to use the first non-local time zone: ```{r} datetime_local <- as.POSIXct("2020-01-01 09:00") datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central") vec_c(datetime_local, datetime_houston, datetime_nz) vec_c(datetime_houston, datetime_nz) vec_c(datetime_nz, datetime_houston) ``` ### Dates and date-times Combining dates and date-times with `c()` gives silently incorrect results: ```{r} date <- as.Date("2020-01-01") datetime <- as.POSIXct("2020-01-01 09:00") c(date, datetime) c(datetime, date) ``` This behaviour arises because neither `c.Date()` nor `c.POSIXct()` check that all inputs are of the same type. `vec_c()` uses a standard set of rules to avoid this problem. When you mix dates and date-times, vctrs returns a date-time and converts dates to date-times at midnight (in the timezone of the date-time). ```{r} vec_c(date, datetime) vec_c(date, datetime_nz) ``` ### Missing values If a missing value comes at the beginning of the inputs, `c()` falls back to the internal behaviour, which strips all attributes: ```{r} c(NA, fa) c(NA, date) c(NA, datetime) ``` `vec_c()` takes a different approach treating a logical vector consisting only of `NA` as the `unspecified()` class which can be converted to any other 1d type: ```{r} vec_c(NA, fa) vec_c(NA, date) vec_c(NA, datetime) ``` ### Data frames Because it is *almost always* length-stable, `c()` combines data frames column wise (into a list): ```{r} df1 <- data.frame(x = 1) df2 <- data.frame(x = 2) str(c(df1, df1)) ``` `vec_c()` is size-stable, which implies it will row-bind data frames: ```{r} vec_c(df1, df2) ``` ### Matrices and arrays The same reasoning applies to matrices: ```{r} m <- matrix(1:4, nrow = 2) c(m, m) vec_c(m, m) ``` One difference is that `vec_c()` will "broadcast" a vector to match the dimensions of a matrix: ```{r} c(m, 1) vec_c(m, 1) ``` ### Implementation The basic implementation of `vec_c()` is reasonably simple. We first figure out the properties of the output, i.e. the common type and total size, and then allocate it with `vec_init()`, and then insert each input into the correct place in the output. ```{r, eval = FALSE} vec_c <- function(...) { args <- compact(list2(...)) ptype <- vec_ptype_common(!!!args) if (is.null(ptype)) return(NULL) ns <- map_int(args, vec_size) out <- vec_init(ptype, sum(ns)) pos <- 1 for (i in seq_along(ns)) { n <- ns[[i]] x <- vec_cast(args[[i]], to = ptype) vec_slice(out, pos:(pos + n - 1)) <- x pos <- pos + n } out } ``` (The real `vec_c()` is a bit more complicated in order to handle inner and outer names). ## `ifelse()` One of the functions that motivate the development of vctrs is `ifelse()`. It has the surprising property that the result value is "A vector of the same length and attributes (including dimensions and class) as `test`". To me, it seems more reasonable for the type of the output to be controlled by the type of the `yes` and `no` arguments. In `dplyr::if_else()` I swung too far towards strictness: it throws an error if `yes` and `no` are not the same type. This is annoying in practice because it requires typed missing values (`NA_character_` etc), and because the checks are only on the class (not the full prototype), it's easy to create invalid output. I found it much easier to understand what `ifelse()` _should_ do once I internalised the ideas of type- and size-stability: * The first argument must be logical. * `vec_ptype(if_else(test, yes, no))` equals `vec_ptype_common(yes, no)`. Unlike `ifelse()` this implies that `if_else()` must always evaluate both `yes` and `no` in order to figure out the correct type. I think this is consistent with `&&` (scalar operation, short circuits) and `&` (vectorised, evaluates both sides). * `vec_size(if_else(test, yes, no))` equals `vec_size_common(test, yes, no)`. I think the output could have the same size as `test` (i.e., the same behaviour as `ifelse`), but I _think_ as a general rule that your inputs should either be mutually recycling or not. This leads to the following implementation: ```{r} if_else <- function(test, yes, no) { if (!is_logical(test)) { abort("`test` must be a logical vector.") } c(yes, no) %<-% vec_cast_common(yes, no) c(test, yes, no) %<-% vec_recycle_common(test, yes, no) out <- vec_init(yes, vec_size(yes)) vec_slice(out, test) <- vec_slice(yes, test) vec_slice(out, !test) <- vec_slice(no, !test) out } x <- c(NA, 1:4) if_else(x > 2, "small", "big") if_else(x > 2, factor("small"), factor("big")) if_else(x > 2, Sys.Date(), Sys.Date() + 7) ``` By using `vec_size()` and `vec_slice()`, this definition of `if_else()` automatically works with data.frames and matrices: ```{r} if_else(x > 2, data.frame(x = 1), data.frame(y = 2)) if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30)) ``` vctrs/vignettes/s3-vector.Rmd0000644000176200001440000013076115132202160015714 0ustar liggesusers--- title: "S3 vectors" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{S3 vectors} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) set.seed(1014) ``` This vignette shows you how to create your own S3 vector classes. It focuses on the aspects of making a vector class that every class needs to worry about; you'll also need to provide methods that actually make the vector useful. I assume that you're already familiar with the basic machinery of S3, and the vocabulary I use in Advanced R: constructor, helper, and validator. If not, I recommend reading at least the first two sections of [the S3 chapter](https://adv-r.hadley.nz/s3.html) of *Advanced R*. This article refers to "vectors of numbers" as *double vectors*. Here, "double" stands for ["double precision floating point number"](https://en.wikipedia.org/wiki/Double-precision_floating-point_format), see also `double()`. ```{r setup} library(vctrs) library(rlang) library(zeallot) ``` This vignette works through five big topics: - The basics of creating a new vector class with vctrs. - The coercion and casting system. - The record and list-of types. - Equality and comparison proxies. - Arithmetic operators. They're collectively demonstrated with a number of simple S3 classes: - Percent: a double vector that prints as a percentage. This illustrates the basic mechanics of class creation, coercion, and casting. - Decimal: a double vector that always prints with a fixed number of decimal places. This class has an attribute which needs a little extra care in casts and coercions. - Cached sum: a double vector that caches the total sum in an attribute. The attribute depends on the data, so needs extra care. - Rational: a pair of integer vectors that defines a rational number like `2 / 3`. This introduces you to the record style, and to the equality and comparison operators. It also needs special handling for `+`, `-`, and friends. - Polynomial: a list of integer vectors that define polynomials like `1 + x - x^3`. Sorting such vectors correctly requires a custom equality method. - Meter: a numeric vector with meter units. This is the simplest possible class with interesting algebraic properties. - Period and frequency: a pair of classes represent a period, or its inverse, frequency. This allows us to explore more arithmetic operators. ## Basics In this section you'll learn how to create a new vctrs class by calling `new_vctr()`. This creates an object with class `vctrs_vctr` which has a number of methods. These are designed to make your life as easy as possible. For example: - The `print()` and `str()` methods are defined in terms of `format()` so you get a pleasant, consistent display as soon as you've made your `format()` method. - You can immediately put your new vector class in a data frame because `as.data.frame.vctrs_vctr()` does the right thing. - Subsetting (`[`, `[[`, and `$`), `length<-`, and `rep()` methods automatically preserve attributes because they use `vec_restore()`. A default `vec_restore()` works for all classes where the attributes are data-independent, and can easily be customised when the attributes do depend on the data. - Default subset-assignment methods (`[<-`, `[[<-`, and `$<-`) follow the principle that the new values should be coerced to match the existing vector. This gives predictable behaviour and clear error messages. ### Percent class In this section, I'll show you how to make a `percent` class, i.e., a double vector that is printed as a percentage. We start by defining a low-level [constructor](https://adv-r.hadley.nz/s3.html#s3-constrcutor) to check types and/or sizes and call `new_vctr()`. `percent` is built on a double vector of any length and doesn't have any attributes. ```{r} new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "vctrs_percent") } x <- new_percent(c(seq(0, 1, length.out = 4), NA)) x str(x) ``` Note that we prefix the name of the class with the name of the package. This prevents conflicting definitions between packages. For packages that implement only one class (such as [blob](https://blob.tidyverse.org/)), it's fine to use the package name without prefix as the class name. We then follow up with a user friendly [helper](https://adv-r.hadley.nz/s3.html#helpers). Here we'll use `vec_cast()` to allow it to accept anything coercible to a double: ```{r} percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` Before you go on, check that user-friendly constructor returns a zero-length vector when called with no arguments. This makes it easy to use as a prototype. ```{r} new_percent() percent() ``` For the convenience of your users, consider implementing an `is_percent()` function: ```{r} is_percent <- function(x) { inherits(x, "vctrs_percent") } ``` ### `format()` method The first method for every class should almost always be a `format()` method. This should return a character vector the same length as `x`. The easiest way to do this is to rely on one of R's low-level formatting functions like `formatC()`: ```{r} format.vctrs_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } ``` ```{r, include = FALSE} # As of R 3.5, print.vctr can not find format.percent since it's not in # its lexical environment. We fix that problem by manually registering. s3_register("base::format", "vctrs_percent") ``` ```{r} x ``` (Note the use of `vec_data()` so `format()` doesn't get stuck in an infinite loop, and that I take a little care to not convert `NA` to `"NA"`; this leads to better printing.) The format method is also used by data frames, tibbles, and `str()`: ```{r} data.frame(x) ``` For optimal display, I recommend also defining an abbreviated type name, which should be 4-5 letters for commonly used vectors. This is used in tibbles and in `str()`: ```{r} vec_ptype_abbr.vctrs_percent <- function(x, ...) { "prcnt" } tibble::tibble(x) str(x) ``` If you need more control over printing in tibbles, implement a method for `pillar::pillar_shaft()`. See `vignette("pillar", package = "vctrs")` for details. ## Casting and coercion The next set of methods you are likely to need are those related to coercion and casting. Coercion and casting are two sides of the same coin: changing the prototype of an existing object. When the change happens *implicitly* (e.g in `c()`) we call it **coercion**; when the change happens *explicitly* (e.g. with `as.integer(x)`), we call it **casting**. One of the main goals of vctrs is to put coercion and casting on a robust theoretical footing so it's possible to make accurate predictions about what (e.g.) `c(x, y)` should do when `x` and `y` have different prototypes. vctrs achieves this goal through two generics: - `vec_ptype2(x, y)` defines possible set of coercions. It returns a prototype if `x` and `y` can be safely coerced to the same prototype; otherwise it returns an error. The set of automatic coercions is usually quite small because too many tend to make code harder to reason about and silently propagate mistakes. - `vec_cast(x, to)` defines the possible sets of casts. It returns `x` translated to have prototype `to`, or throws an error if the conversion isn't possible. The set of possible casts is a superset of possible coercions because they're requested explicitly. ### Double dispatch Both generics use [**double dispatch**](https://en.wikipedia.org/wiki/Double_dispatch) which means that the implementation is selected based on the class of two arguments, not just one. S3 does not natively support double dispatch, so we implement our own dispatch mechanism. In practice, this means: - You end up with method names with two classes, like `vec_ptype2.foo.bar()`. - You don't need to implement default methods (they would never be called if you do). - You can't call `NextMethod()`. ### Percent class {#percent} We'll make our percent class coercible back and forth with double vectors. `vec_ptype2()` provides a user friendly error message if the coercion doesn't exist and makes sure `NA` is handled in a standard way. `NA` is technically a logical vector, but we want to stand in for a missing value of any type. ```{r, error = TRUE} vec_ptype2("bogus", percent()) vec_ptype2(percent(), NA) vec_ptype2(NA, percent()) ``` By default and in simple cases, an object of the same class is compatible with itself: ```{r} vec_ptype2(percent(), percent()) ``` However this only works if the attributes for both objects are the same. Also the default methods are a bit slower. It is always a good idea to provide an explicit coercion method for the case of identical classes. So we'll start by saying that a `vctrs_percent` combined with a `vctrs_percent` yields a `vctrs_percent`, which we indicate by returning a prototype generated by the constructor. ```{r} vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() ``` Next we define methods that say that combining a `percent` and double should yield a `double`. We avoid returning a `percent` here because errors in the scale (1 vs. 0.01) are more obvious with raw numbers. Because double dispatch is a bit of a hack, we need to provide two methods. It's your responsibility to ensure that each member of the pair returns the same result: if they don't you will get weird and unpredictable behaviour. The double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. If we implemented `vec_ptype2.vctrs_percent.numeric()`, it would never be called. ```{r} vec_ptype2.vctrs_percent.double <- function(x, y, ...) double() vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() ``` We can check that we've implemented this correctly with `vec_ptype_show()`: ```{r} vec_ptype_show(percent(), double(), percent()) ``` The `vec_ptype2()` methods define which input is the richer type that vctrs should coerce to. However, they don't perform any conversion. This is the job of `vec_cast()`, which we implement next. We'll provide a method to cast a percent to a percent: ```{r} vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x ``` And then for converting back and forth between doubles. To convert a double to a percent we use the `percent()` helper (not the constructor; this is unvalidated user input). To convert a `percent` to a double, we strip the attributes. Note that for historical reasons the order of argument in the signature is the opposite as for `vec_ptype2()`. The class for `to` comes first, and the class for `x` comes second. Again, the double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. Implementing `vec_cast.vctrs_percent.numeric()` has no effect. ```{r} vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x) vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x) ``` Then we can check this works with `vec_cast()`: ```{r} vec_cast(0.5, percent()) vec_cast(percent(0.5), double()) ``` Once you've implemented `vec_ptype2()` and `vec_cast()`, you get `vec_c()`, `[<-`, and `[[<-` implementations for free. ```{r, error = TRUE} vec_c(percent(0.5), 1) vec_c(NA, percent(0.5)) # but vec_c(TRUE, percent(0.5)) x <- percent(c(0.5, 1, 2)) x[1:2] <- 2:1 x[[3]] <- 0.5 x ``` You'll also get mostly correct behaviour for `c()`. The exception is when you use `c()` with a base R class: ```{r, error = TRUE} # Correct c(percent(0.5), 1) c(percent(0.5), factor(1)) # Incorrect c(factor(1), percent(0.5)) ``` Unfortunately there's no way to fix this problem with the current design of `c()`. Again, as a convenience, consider providing an `as_percent()` function that makes use of the casts defined in your `vec_cast.vctrs_percent()` methods: ```{r} as_percent <- function(x) { vec_cast(x, new_percent()) } ``` Occasionally, it is useful to provide conversions that go beyond what's allowed in casting. For example, we could offer a parsing method for character vectors. In this case, `as_percent()` should be generic, the default method should cast, and then additional methods should implement more flexible conversion: ```{r} as_percent <- function(x, ...) { UseMethod("as_percent") } as_percent.default <- function(x, ...) { vec_cast(x, new_percent()) } as_percent.character <- function(x) { value <- as.numeric(gsub(" *% *$", "", x)) / 100 new_percent(value) } ``` ### Decimal class Now that you've seen the basics with a very simple S3 class, we'll gradually explore more complicated scenarios. This section creates a `decimal` class that prints with the specified number of decimal places. This is very similar to `percent` but now the class needs an attribute: the number of decimal places to display (an integer vector of length 1). We start off as before, defining a low-level constructor, a user-friendly constructor, a `format()` method, and a `vec_ptype_abbr()`. Note that additional object attributes are simply passed along to `new_vctr()`: ```{r} new_decimal <- function(x = double(), digits = 2L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_integer(digits)) { abort("`digits` must be an integer vector.") } vec_check_size(digits, size = 1L) new_vctr(x, digits = digits, class = "vctrs_decimal") } decimal <- function(x = double(), digits = 2L) { x <- vec_cast(x, double()) digits <- vec_recycle(vec_cast(digits, integer()), 1L) new_decimal(x, digits = digits) } digits <- function(x) attr(x, "digits") format.vctrs_decimal <- function(x, ...) { sprintf(paste0("%-0.", digits(x), "f"), x) } vec_ptype_abbr.vctrs_decimal <- function(x, ...) { "dec" } x <- decimal(runif(10), 1L) x ``` Note that I provide a little helper to extract the `digits` attribute. This makes the code a little easier to read and should not be exported. By default, vctrs assumes that attributes are independent of the data and so are automatically preserved. You'll see what to do if the attributes are data dependent in the next section. ```{r} x[1:2] x[[1]] ``` For the sake of exposition, we'll assume that `digits` is an important attribute of the class and should be included in the full type: ```{r} vec_ptype_full.vctrs_decimal <- function(x, ...) { paste0("decimal<", digits(x), ">") } x ``` Now consider `vec_cast()` and `vec_ptype2()`. Casting and coercing from one decimal to another requires a little thought as the values of the `digits` attribute might be different, and we need some way to reconcile them. Here I've decided to chose the maximum of the two; other reasonable options are to take the value from the left-hand side or throw an error. ```{r} vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) { new_decimal(digits = max(digits(x), digits(y))) } vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) { new_decimal(vec_data(x), digits = digits(to)) } vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2)) ``` Finally, I can implement coercion to and from other types, like doubles. When automatically coercing, I choose the richer type (i.e., the decimal). ```{r} vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y vec_cast.vctrs_decimal.double <- function(x, to, ...) new_decimal(x, digits = digits(to)) vec_cast.double.vctrs_decimal <- function(x, to, ...) vec_data(x) vec_c(decimal(1, digits = 1), pi) vec_c(pi, decimal(1, digits = 1)) ``` If type `x` has greater resolution than `y`, there will be some inputs that lose precision. These should generate errors using `stop_lossy_cast()`. You can see that in action when casting from doubles to integers; only some doubles can become integers without losing resolution. ```{r, error = TRUE} vec_cast(c(1, 2, 10), to = integer()) vec_cast(c(1.5, 2, 10.5), to = integer()) ``` ### Cached sum class {#cached-sum} The next level up in complexity is an object that has data-dependent attributes. To explore this idea we'll create a vector that caches the sum of its values. As usual, we start with low-level and user-friendly constructors: ```{r} new_cached_sum <- function(x = double(), sum = 0L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_double(sum)) { abort("`sum` must be a double vector.") } vec_check_size(sum, size = 1L) new_vctr(x, sum = sum, class = "vctrs_cached_sum") } cached_sum <- function(x) { x <- vec_cast(x, double()) new_cached_sum(x, sum(x)) } ``` For this class, we can use the default `format()` method, and instead, we'll customise the `obj_print_footer()` method. This is a good place to display user facing attributes. ```{r} obj_print_footer.vctrs_cached_sum <- function(x, ...) { cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "") } x <- cached_sum(runif(10)) x ``` We'll also override `sum()` and `mean()` to use the attribute. This is easiest to do with `vec_math()`, which you'll learn about later. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { cat("Using cache\n") switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } sum(x) ``` As mentioned above, vctrs assumes that attributes are independent of the data. This means that when we take advantage of the default methods, they'll work, but return the incorrect result: ```{r} x[1:2] ``` To fix this, you need to provide a `vec_restore()` method. Note that this method dispatches on the `to` argument. ```{r} vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) { new_cached_sum(x, sum(x)) } x[1] ``` This works because most of the vctrs methods dispatch to the underlying base function by first stripping off extra attributes with `vec_data()` and then reapplying them again with `vec_restore()`. The default `vec_restore()` method copies over all attributes, which is not appropriate when the attributes depend on the data. Note that `vec_restore.class` is subtly different from `vec_cast.class.class()`. `vec_restore()` is used when restoring attributes that have been lost; `vec_cast()` is used for coercions. This is easier to understand with a concrete example. Imagine factors were implemented with `new_vctr()`. `vec_restore.factor()` would restore attributes back to an integer vector, but you would not want to allow manually casting an integer to a factor with `vec_cast()`. ## Record-style objects Record-style objects use a list of equal-length vectors to represent individual components of the object. The best example of this is `POSIXlt`, which underneath the hood is a list of 11 fields like year, month, and day. Record-style classes override `length()` and subsetting methods to conceal this implementation detail. ```{r} x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3)) x length(x) length(unclass(x)) x[[1]] # the first date time unclass(x)[[1]] # the first component, the number of seconds ``` vctrs makes it easy to create new record-style classes using `new_rcrd()`, which has a wide selection of default methods. ### Rational class A fraction, or rational number, can be represented by a pair of integer vectors representing the numerator (the number on top) and the denominator (the number on bottom), where the length of each vector must be the same. To represent such a data structure we turn to a new base data type: the record (or rcrd for short). As usual we start with low-level and user-friendly constructors. The low-level constructor calls `new_rcrd()`, which needs a named list of equal-length vectors. ```{r} new_rational <- function(n = integer(), d = integer()) { if (!is_integer(n)) { abort("`n` must be an integer vector.") } if (!is_integer(d)) { abort("`d` must be an integer vector.") } new_rcrd(list(n = n, d = d), class = "vctrs_rational") } ``` Our user friendly constructor casts `n` and `d` to integers and recycles them to the same length. ```{r} rational <- function(n = integer(), d = integer()) { c(n, d) %<-% vec_cast_common(n, d, .to = integer()) c(n, d) %<-% vec_recycle_common(n, d) new_rational(n, d) } x <- rational(1, 1:10) ``` Behind the scenes, `x` is a named list with two elements. But those details are hidden so that it behaves like a vector: ```{r} names(x) length(x) ``` To access the underlying fields we need to use `field()` and `fields()`: ```{r} fields(x) field(x, "n") ``` Notice that we can't `print()` or `str()` the new rational vector `x` yet. Printing causes an error: ```{r, error = TRUE} x str(x) ``` This is because we haven't defined how our class can be printed from the underlying data. Note that if you want to look under the hood during development, you can always call `vec_data(x)`. ```{r} vec_data(x) str(vec_data(x)) ``` It is generally best to define a formatting method early in the development of a class. The format method defines how to display the class so that it can be printed in the normal way: ```{r} format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl" vec_ptype_full.vctrs_rational <- function(x, ...) "rational" x ``` vctrs uses the `format()` method in `str()`, hiding the underlying implementation details from the user: ```{r} str(x) ``` For `rational`, `vec_ptype2()` and `vec_cast()` follow the same pattern as `percent()`. We allow coercion from integer and to doubles. ```{r} vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational() vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational() vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational() vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d") vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1) vec_c(rational(1, 2), 1L, NA) ``` ### Decimal2 class The previous implementation of `decimal` was built on top of doubles. This is a bad idea because decimal vectors are typically used when you care about precise values (i.e., dollars and cents in a bank account), and double values suffer from floating point problems. A better implementation of a decimal class would be to use pair of integers, one for the value to the left of the decimal point, and the other for the value to the right (divided by a `scale`). The following code is a very quick sketch of how you might start creating such a class: ```{r} new_decimal2 <- function(l, r, scale = 2L) { if (!is_integer(l)) { abort("`l` must be an integer vector.") } if (!is_integer(r)) { abort("`r` must be an integer vector.") } if (!is_integer(scale)) { abort("`scale` must be an integer vector.") } vec_check_size(scale, size = 1L) new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2") } decimal2 <- function(l, r, scale = 2L) { l <- vec_cast(l, integer()) r <- vec_cast(r, integer()) c(l, r) %<-% vec_recycle_common(l, r) scale <- vec_cast(scale, integer()) # should check that r < 10^scale new_decimal2(l = l, r = r, scale = scale) } format.vctrs_decimal2 <- function(x, ...) { val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale") sprintf(paste0("%.0", attr(x, "scale"), "f"), val) } decimal2(10, c(0, 5, 99)) ``` ## Equality and comparison vctrs provides four "proxy" generics. Two of these let you control how your class determines equality and comparison: - `vec_proxy_equal()` returns a data vector suitable for comparison. It underpins `==`, `!=`, `unique()`, `anyDuplicated()`, and `is.na()`. - `vec_proxy_compare()` specifies how to compare the elements of your vector. This proxy is used in `<`, `<=`, `>=`, `>`, `min()`, `max()`, `median()`, and `quantile()`. Two other proxy generic are used for sorting for unordered data types and for accessing the raw data for exotic storage formats: - `vec_proxy_order()` specifies how to sort the elements of your vector. It is used in `xtfrm()`, which in turn is called by the `order()` and `sort()` functions. This proxy was added to implement the behaviour of lists, which are sortable (their order proxy sorts by first occurrence) but not comparable (comparison operators cause an error). Its default implementation for other classes calls `vec_proxy_compare()` and you normally don't need to implement this proxy. - `vec_proxy()` returns the actual data of a vector. This is useful when you store the data in a field of your class. Most of the time, you shouldn't need to implement `vec_proxy()`. The default behavior is as follows: - `vec_proxy_equal()` calls `vec_proxy()` - `vec_proxy_compare()` calls `vec_proxy_equal()` - `vec_proxy_order()` calls `vec_proxy_compare()` You should only implement these proxies when some preprocessing on the data is needed to make elements comparable. In that case, defining these methods will get you a lot of behaviour for relatively little work. These proxy functions should always return a simple object (either a bare vector or a data frame) that possesses the same properties as your class. This permits efficient implementation of the vctrs internals because it allows dispatch to happen once in R, and then efficient computations can be written in C. ### Rational class Let's explore these ideas by with the rational class we started on above. By default, `vec_proxy()` converts a record to a data frame, and the default comparison works column by column: ```{r} x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2)) x vec_proxy(x) x == rational(1, 1) ``` This makes sense as a default but isn't correct here because `rational(1, 1)` represents the same number as `rational(2, 2)`, so they should be equal. We can fix that by implementing a `vec_proxy_equal()` method that divides `n` and `d` by their greatest common divisor: ```{r} # Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632 gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_equal.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } vec_proxy_equal(x) x == rational(1, 1) ``` `vec_proxy_equal()` is also used by `unique()`: ```{r} unique(x) ``` We now need to fix the comparison operations similarly, since comparison currently happens lexicographically by `n`, then by `d`: ```{r} rational(1, 2) < rational(2, 3) rational(2, 4) < rational(2, 3) ``` The easiest fix is to convert the fraction to a floating point number and use this as a proxy: ```{r} vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } rational(2, 4) < rational(2, 3) ``` This also fixes `sort()`, because the default implementation of `vec_proxy_order()` calls `vec_proxy_compare()`. ```{r} sort(x) ``` (We could have used the same approach in `vec_proxy_equal()`, but when working with floating point numbers it's not necessarily true that `x == y` implies that `d * x == d * y`.) ### Polynomial class A related problem occurs if we build our vector on top of a list. The following code defines a polynomial class that represents polynomials (like `1 + 3x - 2x^2`) using a list of integer vectors (like `c(1, 3, -2)`). Note the use of `new_list_of()` in the constructor. ```{r} poly <- function(...) { x <- vec_cast_common(..., .to = integer()) new_poly(x) } new_poly <- function(x) { new_list_of(x, ptype = integer(), class = "vctrs_poly_list") } vec_ptype_full.vctrs_poly_list <- function(x, ...) "polynomial" vec_ptype_abbr.vctrs_poly_list <- function(x, ...) "poly" format.vctrs_poly_list <- function(x, ...) { format_one <- function(x) { if (length(x) == 0) { return("") } if (length(x) == 1) { format(x) } else { suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "") out <- paste0(x, suffix) out <- out[x != 0L] paste0(out, collapse = " + ") } } vapply(x, format_one, character(1)) } obj_print_data.vctrs_poly_list <- function(x, ...) { if (length(x) != 0) { print(format(x), quote = FALSE) } } p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ``` The resulting objects will inherit from the `vctrs_list_of` class, which provides tailored methods for `$`, `[[`, the corresponding assignment operators, and other methods. ```{r} class(p) p[2] p[[2]] ``` The class implements the list interface: ```{r} obj_is_list(p) ``` This is fine for the internal implementation of this class but it would be more appropriate if it behaved like an atomic vector rather than a list. #### Make an atomic polynomial vector An atomic vector is a vector like integer or character for which `[[` returns the same type. Unlike lists, you can't reach inside an atomic vector. To make the polynomial class an atomic vector, we'll wrap the internal `list_of()` class within a record vector. Usually records are used because they can store several fields of data for each observation. Here we have only one, but we use the class anyway to inherit its atomicity. ```{r} poly <- function(...) { x <- vec_cast_common(..., .to = integer()) x <- new_poly(x) new_rcrd(list(data = x), class = "vctrs_poly") } format.vctrs_poly <- function(x, ...) { format(field(x, "data")) } ``` The new `format()` method delegates to the one we wrote for the internal list. The vector looks just like before: ```{r} p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ``` Making the class atomic means that `obj_is_list()` now returns `FALSE`. This prevents recursive algorithms that traverse lists from reaching too far inside the polynomial internals. ```{r} obj_is_list(p) ``` Most importantly, it prevents users from reaching into the internals with `[[`: ```{r} p[[2]] ``` #### Implementing equality and comparison Equality works out of the box because we can tell if two integer vectors are equal: ```{r} p == poly(c(1, 0, 1)) ``` We can't compare individual elements, because the data is stored in a list and by default lists are not comparable: ```{r, error = TRUE} p < p[2] ``` To enable comparison, we implement a `vec_proxy_compare()` method: ```{r} vec_proxy_compare.vctrs_poly <- function(x, ...) { # Get the list inside the record vector x_raw <- vec_data(field(x, "data")) # First figure out the maximum length n <- max(vapply(x_raw, length, integer(1))) # Then expand all vectors to this length by filling in with zeros full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x)) # Then turn into a data frame as.data.frame(do.call(rbind, full)) } p < p[2] ``` Often, this is sufficient to also implement `sort()`. However, for lists, there is already a default `vec_proxy_order()` method that sorts by first occurrence: ```{r} sort(p) sort(p[c(1:3, 1:2)]) ``` To ensure consistency between ordering and comparison, we forward `vec_proxy_order()` to `vec_proxy_compare()`: ```{r} vec_proxy_order.vctrs_poly <- function(x, ...) { vec_proxy_compare(x, ...) } sort(p) ``` ## Arithmetic vctrs also provides two mathematical generics that allow you to define a broad swath of mathematical behaviour at once: - `vec_math(fn, x, ...)` specifies the behaviour of mathematical functions like `abs()`, `sum()`, and `mean()`. (Note that `var()` and `sd()` can't be overridden, see `?vec_math()` for the complete list supported by `vec_math()`.) - `vec_arith(op, x, y)` specifies the behaviour of the arithmetic operations like `+`, `-`, and `%%`. (See `?vec_arith()` for the complete list.) Both generics define the behaviour for multiple functions because `sum.vctrs_vctr(x)` calls `vec_math.vctrs_vctr("sum", x)`, and `x + y` calls `vec_math.x_class.y_class("+", x, y)`. They're accompanied by `vec_math_base()` and `vec_arith_base()` which make it easy to call the underlying base R functions. `vec_arith()` uses double dispatch and needs the following standard boilerplate: ```{r} vec_arith.MYCLASS <- function(op, x, y, ...) { UseMethod("vec_arith.MYCLASS", y) } vec_arith.MYCLASS.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` Correctly exporting `vec_arith()` methods from a package is currently a little awkward. See the instructions in the Arithmetic section of the "Implementing a vctrs S3 class in a package" section below. ### Cached sum class I showed an example of `vec_math()` to define `sum()` and `mean()` methods for `cached_sum`. Now let's talk about exactly how it works. Most `vec_math()` functions will have a similar form. You use a switch statement to handle the methods that you care about and fall back to `vec_math_base()` for those that you don't care about. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } ``` ### Meter class To explore the infix arithmetic operators exposed by `vec_arith()` I'll create a new class that represents a measurement in `meter`s: ```{r} new_meter <- function(x) { stopifnot(is.double(x)) new_vctr(x, class = "vctrs_meter") } format.vctrs_meter <- function(x, ...) { paste0(format(vec_data(x)), " m") } meter <- function(x) { x <- vec_cast(x, double()) new_meter(x) } x <- meter(1:10) x ``` Because `meter` is built on top of a double vector, basic mathematic operations work: ```{r} sum(x) mean(x) ``` But we can't do arithmetic: ```{r, error = TRUE} x + 1 meter(10) + meter(1) meter(10) * 3 ``` To allow these infix functions to work, we'll need to provide `vec_arith()` generic. But before we do that, let's think about what combinations of inputs we should support: - It makes sense to add and subtract meters: that yields another meter. We can divide a meter by another meter (yielding a unitless number), but we can't multiply meters (because that would yield an area). - For a combination of meter and number multiplication and division by a number are acceptable. Addition and subtraction don't make much sense as we, strictly speaking, are dealing with objects of different nature. `vec_arith()` is another function that uses double dispatch, so as usual we start with a template. ```{r} vec_arith.vctrs_meter <- function(op, x, y, ...) { UseMethod("vec_arith.vctrs_meter", y) } vec_arith.vctrs_meter.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` Then write the method for two meter objects. We use a switch statement to cover the cases we care about and `stop_incompatible_op()` to throw an informative error message for everything else. ```{r, error = TRUE} vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) { switch( op, "+" = , "-" = new_meter(vec_arith_base(op, x, y)), "/" = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } meter(10) + meter(1) meter(10) - meter(1) meter(10) / meter(1) meter(10) * meter(1) ``` Next we write the pair of methods for arithmetic with a meter and a number. These are almost identical, but while `meter(10) / 2` makes sense, `2 / meter(10)` does not (and neither do addition and subtraction). To support both doubles and integers as operands, we dispatch over `numeric` here instead of `double`. ```{r, error = TRUE} vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) { switch( op, "/" = , "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) { switch( op, "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } meter(2) * 10 meter(2) * as.integer(10) 10 * meter(2) meter(20) / 10 10 / meter(20) meter(20) + 10 ``` For completeness, we also need `vec_arith.vctrs_meter.MISSING` for the unary `+` and `-` operators: ```{r} vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) { switch(op, `-` = x * -1, `+` = x, stop_incompatible_op(op, x, y) ) } -meter(1) +meter(1) ``` ## Implementing a vctrs S3 class in a package Defining S3 methods interactively is fine for iteration and exploration, but if your class lives in a package, you need to do a few more things: - Register the S3 methods by listing them in the `NAMESPACE` file. - Create documentation around your methods, for the sake of your user and to satisfy `R CMD check`. Let's assume that the `percent` class is implemented in the pizza package in the file `R/percent.R`. Here we walk through the major sections of this hypothetical file. You've seen all of this code before, but now it's augmented by the roxygen2 directives that produce the correct `NAMESPACE` entries and help topics. ### Getting started First, the pizza package needs to include vctrs in the `Imports` section of its `DESCRIPTION` (perhaps by calling `usethis::use_package("vctrs")`. While vctrs is under very active development, it probably makes sense to state a minimum version. Imports: a_package, another_package, ... vctrs (>= x.y.z), ... Then we make all vctrs functions available within the pizza package by including the directive `#' @import vctrs` somewhere. Usually, it's not good practice to `@import` the entire namespace of a package, but vctrs is deliberately designed with this use case in mind. Where should we put `#' @import vctrs`? There are two natural locations: - With package-level docs in `R/pizza-doc.R`. You can use `usethis::use_package_doc()` to initiate this package-level documentation. - In `R/percent.R`. This makes the most sense when the vctrs S3 class is a modest and self-contained part of the overall package. We also must use one of these locations to dump some internal documentation that's needed to avoid `R CMD check` complaints. We don't expect any human to ever read this documentation. Here's how this dummy documentation should look, combined with the `#' @import vctrs` directive described above. ```{r eval = FALSE} #' Internal vctrs methods #' #' @import vctrs #' @keywords internal #' @name pizza-vctrs NULL ``` This should appear in `R/pizza-doc.R` (package-level docs) or in `R/percent.R` (class-focused file). Remember to call `devtools::document()` regularly, as you develop, to regenerate `NAMESPACE` and the `.Rd` files. From this point on, the code shown is expected to appear in `R/percent.R`. ### Low-level and user-friendly constructors Next we add our constructor: ```{r} new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "pizza_percent") } ``` Note that the name of the package must be included in the class name (`pizza_percent`), but it does not need to be included in the constructor name. You do not need to export the constructor, unless you want people to extend your class. We can also add a call to `setOldClass()` for compatibility with S4: ```{r} # for compatibility with the S4 system methods::setOldClass(c("pizza_percent", "vctrs_vctr")) ``` Because we've used a function from the methods package, you'll also need to add methods to `Imports`, with (e.g.) `usethis::use_package("methods")`. This is a "free" dependency because methods is bundled with every R install. Next we implement, export, and document a user-friendly helper: `percent()`. ```{r} #' `percent` vector #' #' This creates a double vector that represents percentages so when it is #' printed, it is multiplied by 100 and suffixed with `%`. #' #' @param x A numeric vector #' @return An S3 vector of class `pizza_percent`. #' @export #' @examples #' percent(c(0.25, 0.5, 0.75)) percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` (Again note that the package name will appear in the class, but does not need to occur in the function, because we can already do `pizza::percent()`; it would be redundant to have `pizza::pizza_percent()`.) ### Other helpers It's a good idea to provide a function that tests if an object is of this class. If you do so, it makes sense to document it with the user-friendly constructor `percent()`: ```{r} #' @export #' @rdname percent is_percent <- function(x) { inherits(x, "pizza_percent") } ``` You'll also need to update the `percent()` documentation to reflect that `x` now means two different things: ```{r} #' @param x #' * For `percent()`: A numeric vector #' * For `is_percent()`: An object to test. ``` Next we provide the key methods to make printing work. These are S3 methods, so they don't need to be documented, but they do need to be exported. ```{r eval = FALSE} #' @export format.pizza_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } #' @export vec_ptype_abbr.pizza_percent <- function(x, ...) { "prcnt" } ``` Finally, we implement methods for `vec_ptype2()` and `vec_cast()`. ```{r, eval = FALSE} #' @export vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() #' @export vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() #' @export vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x #' @export vec_cast.pizza_percent.double <- function(x, to, ...) percent(x) #' @export vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x) ``` ### Arithmetic Writing double dispatch methods for `vec_arith()` is currently more awkward than writing them for `vec_ptype2()` or `vec_cast()`. We plan to improve this in the future. For now, you can use the following instructions. If you define a new type and want to write `vec_arith()` methods for it, you'll need to provide a new single dispatch S3 generic for it of the following form: ```{r, eval=FALSE} #' @export #' @method vec_arith my_type vec_arith.my_type <- function(op, x, y, ...) { UseMethod("vec_arith.my_type", y) } ``` Note that this actually functions as both an S3 method for `vec_arith()` and an S3 generic called `vec_arith.my_type()` that dispatches off `y`. roxygen2 only recognizes it as an S3 generic, so you have to register the S3 method part of this with an explicit `@method` call. After that, you can define double dispatch methods, but you still need an explicit `@method` tag to ensure it is registered with the correct generic: ```{r, eval=FALSE} #' @export #' @method vec_arith.my_type my_type vec_arith.my_type.my_type <- function(op, x, y, ...) { # implementation here } #' @export #' @method vec_arith.my_type integer vec_arith.my_type.integer <- function(op, x, y, ...) { # implementation here } #' @export #' @method vec_arith.integer my_type vec_arith.integer.my_type <- function(op, x, y, ...) { # implementation here } ``` vctrs provides the hybrid S3 generics/methods for most of the base R types, like `vec_arith.integer()`. If you don't fully import vctrs with `@import vctrs`, then you will need to explicitly import the generic you are registering double dispatch methods for with `@importFrom vctrs vec_arith.integer`. ### Testing It's good practice to test your new class. Specific recommendations: - `R/percent.R` is the type of file where you really do want 100% test coverage. You can use `devtools::test_coverage_file()` to check this. - Make sure to test behaviour with zero-length inputs and missing values. - Use `testthat::verify_output()` to test your format method. Customised printing is often a primary motivation for creating your own S3 class in the first place, so this will alert you to unexpected changes in your printed output. Read more about `verify_output()` in the [testthat v2.3.0 blog post](https://tidyverse.org/blog/2019/11/testthat-2-3-0/); it's an example of a so-called [golden test](https://ro-che.info/articles/2017-12-04-golden-tests). - Check for method symmetry; use `expect_s3_class()`, probably with `exact = TRUE`, to ensure that `vec_c(x, y)` and `vec_c(y, x)` return the same type of output for the important `x`s and `y`s in your domain. - Use `testthat::expect_error()` to check that inputs that can't be combined fail with an error. Here, you should be generally checking the class of the error, not its message. Relevant classes include `vctrs_error_assert_ptype`, `vctrs_error_assert_size`, and `vctrs_error_incompatible_type`. ```{r, eval = FALSE} expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type") ``` If your tests pass when run by `devtools::test()`, but fail when run in `R CMD check`, it is very likely to reflect a problem with S3 method registration. Carefully check your roxygen2 comments and the generated `NAMESPACE`. ### Existing classes Before you build your own class, you might want to consider using, or subclassing existing classes. You can check [awesome-vctrs](https://github.com/krlmlr/awesome-vctrs) for a curated list of R vector classes, some of which are built with vctrs. If you've built or extended a class, consider adding it to that list so other people can use it. vctrs/vignettes/type-size.Rmd0000644000176200001440000003155214511320527016026 0ustar liggesusers--- title: "Prototypes and sizes" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Prototypes and sizes} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` Rather than using `class()` and `length()`, vctrs has notions of prototype (`vec_ptype_show()`) and size (`vec_size()`). This vignette discusses the motivation for why these alternatives are necessary and connects their definitions to type coercion and the recycling rules. Size and prototype are motivated by thinking about the optimal behaviour for `c()` and `rbind()`, particularly inspired by data frames with columns that are matrices or data frames. ```{r} library(vctrs) ``` ## Prototype The idea of a prototype is to capture the metadata associated with a vector without capturing any data. Unfortunately, the `class()` of an object is inadequate for this purpose: * The `class()` doesn't include attributes. Attributes are important because, for example, they store the levels of a factor and the timezone of a `POSIXct`. You cannot combine two factors or two `POSIXct`s without thinking about the attributes. * The `class()` of a matrix is "matrix" and doesn't include the type of the underlying vector or the dimensionality. Instead, vctrs takes advantage of R's vectorised nature and uses a __prototype__, a 0-observation slice of the vector (this is basically `x[0]` but with some subtleties we'll come back to later). This is a miniature version of the vector that contains all of the attributes but none of the data. Conveniently, you can create many prototypes using existing base functions (e.g, `double()` and `factor(levels = c("a", "b"))`). vctrs provides a few helpers (e.g. `new_date()`, `new_datetime()`, and `new_duration()`) where the equivalents in base R are missing. ### Base prototypes `vec_ptype()` creates a prototype from an existing object. However, many base vectors have uninformative printing methods for 0-length subsets, so vctrs also provides `vec_ptype_show()`, which prints the prototype in a friendly way (and returns nothing). Using `vec_ptype_show()` allows us to see the prototypes base R classes: * Atomic vectors have no attributes and just display the underlying `typeof()`: ```{r} vec_ptype_show(FALSE) vec_ptype_show(1L) vec_ptype_show(2.5) vec_ptype_show("three") vec_ptype_show(list(1, 2, 3)) ``` * The prototype of matrices and arrays include the base type and the dimensions after the first: ```{r} vec_ptype_show(array(logical(), c(2, 3))) vec_ptype_show(array(integer(), c(2, 3, 4))) vec_ptype_show(array(character(), c(2, 3, 4, 5))) ``` * The prototype of a factor includes its levels. Levels are a character vector, which can be arbitrarily long, so the prototype just shows a hash. If the hash of two factors is equal, it's highly likely that their levels are also equal. ```{r} vec_ptype_show(factor("a")) vec_ptype_show(ordered("b")) ``` While `vec_ptype_show()` prints only the hash, the prototype object itself does contain all levels: ```{r} vec_ptype(factor("a")) ``` * Base R has three key date time classes: dates, date-times (`POSIXct`), and durations (`difftime)`. Date-times have a timezone, and durations have a unit. ```{r} vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(as.difftime(10, units = "mins")) ``` * Data frames have the most complex prototype: the prototype of a data frame is the name and prototype of each column: ```{r} vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x")) ``` Data frames can have columns that are themselves data frames, making this a "recursive" type: ```{r} df <- data.frame(x = FALSE) df$y <- data.frame(a = 1L, b = 2.5) vec_ptype_show(df) ``` ### Coercing to common type It's often important to combine vectors with multiple types. vctrs provides a consistent set of rules for coercion, via `vec_ptype_common()`. `vec_ptype_common()` possesses the following invariants: * `class(vec_ptype_common(x, y))` equals `class(vec_ptype_common(y, x))`. * `class(vec_ptype_common(x, vec_ptype_common(y, z))` equals `class(vec_ptype_common(vec_ptype_common(x, y), z))`. * `vec_ptype_common(x, NULL) == vec_ptype(x)`. i.e., `vec_ptype_common()` is both commutative and associative (with respect to class) and has an identity element, `NULL`; i.e., it's a __commutative monoid__. This means the underlying implementation is quite simple: we can find the common type of any number of objects by progressively finding the common type of pairs of objects. Like with `vec_ptype()`, the easiest way to explore `vec_ptype_common()` is with `vec_ptype_show()`: when given multiple inputs, it will print their common prototype. (In other words: program with `vec_ptype_common()` but play with `vec_ptype_show()`.) * The common type of atomic vectors is computed very similar to the rules of base R, except that we do not coerce to character automatically: ```{r, error = TRUE} vec_ptype_show(logical(), integer(), double()) vec_ptype_show(logical(), character()) ``` * Matrices and arrays are automatically broadcast to higher dimensions: ```{r} vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 2)) ) vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 3)), array(1, c(0, 3, 4)), array(1, c(0, 3, 4, 5)) ) ``` Provided that the dimensions follow the vctrs recycling rules: ```{r, error = TRUE} vec_ptype_show( array(1, c(0, 2)), array(1, c(0, 3)) ) ``` * Factors combine levels in the order in which they appear. ```{r} fa <- factor("a") fb <- factor("b") levels(vec_ptype_common(fa, fb)) levels(vec_ptype_common(fb, fa)) ``` * Combining a date and date-time yields a date-time: ```{r} vec_ptype_show(new_date(), new_datetime()) ``` When combining two date times, the timezone is taken from the first input: ```{r} vec_ptype_show( new_datetime(tzone = "US/Central"), new_datetime(tzone = "Pacific/Auckland") ) ``` Unless it's the local timezone, in which case any explicit time zone will win: ```{r} vec_ptype_show( new_datetime(tzone = ""), new_datetime(tzone = ""), new_datetime(tzone = "Pacific/Auckland") ) ``` * The common type of two data frames is the common type of each column that occurs in both data frames: ```{r} vec_ptype_show( data.frame(x = FALSE), data.frame(x = 1L), data.frame(x = 2.5) ) ``` And the union of the columns that only occur in one: ```{r} vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1)) ``` Note that new columns are added on the right-hand side. This is consistent with the way that factor levels and time zones are handled. ### Casting to specified type `vec_ptype_common()` finds the common type of a set of vector. Typically, however, what you want is a set of vectors coerced to that common type. That's the job of `vec_cast_common()`: ```{r} str(vec_cast_common( FALSE, 1:5, 2.5 )) str(vec_cast_common( factor("x"), factor("y") )) str(vec_cast_common( data.frame(x = 1), data.frame(y = 1:2) )) ``` Alternatively, you can cast to a specific prototype using `vec_cast()`: ```{r, error = TRUE} # Cast succeeds vec_cast(c(1, 2), integer()) # Cast fails vec_cast(c(1.5, 2.5), factor("a")) ``` If a cast is possible in general (i.e., double -> integer), but information is lost for a specific input (e.g. 1.5 -> 1), it will generate an error. ```{r, error = TRUE} vec_cast(c(1.5, 2), integer()) ``` You can suppress the lossy cast errors with `allow_lossy_cast()`: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()) ) ``` This will suppress all lossy cast errors. Supply prototypes if you want to be specific about the type of lossy cast allowed: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()), x_ptype = double(), to_ptype = integer() ) ``` The set of casts should not be more permissive than the set of coercions. This is not enforced but it is expected from classes to follow the rule and keep the coercion ecosystem sound. ## Size `vec_size()` was motivated by the need to have an invariant that describes the number of "observations" in a data structure. This is particularly important for data frames, as it's useful to have some function such that `f(data.frame(x))` equals `f(x)`. No base function has this property: * `length(data.frame(x))` equals `1` because the length of a data frame is the number of columns. * `nrow(data.frame(x))` does not equal `nrow(x)` because `nrow()` of a vector is `NULL`. * `NROW(data.frame(x))` equals `NROW(x)` for vector `x`, so is almost what we want. But because `NROW()` is defined in terms of `length()`, it returns a value for every object, even types that can't go in a data frame, e.g. `data.frame(mean)` errors even though `NROW(mean)` is `1`. We define `vec_size()` as follows: * It is the length of 1d vectors. * It is the number of rows of data frames, matrices, and arrays. * It throws error for non vectors. Given `vec_size()`, we can give a precise definition of a data frame: a data frame is a list of vectors where every vector has the same size. This has the desirable property of trivially supporting matrix and data frame columns. ### Slicing `vec_slice()` is to `vec_size()` as `[` is to `length()`; i.e., it allows you to select observations regardless of the dimensionality of the underlying object. `vec_slice(x, i)` is equivalent to: * `x[i]` when `x` is a vector. * `x[i, , drop = FALSE]` when `x` is a data frame. * `x[i, , , drop = FALSE]` when `x` is a 3d array. ```{r} x <- sample(1:10) df <- data.frame(x = x) vec_slice(x, 5:6) vec_slice(df, 5:6) ``` `vec_slice(data.frame(x), i)` equals `data.frame(vec_slice(x, i))` (modulo variable and row names). Prototypes are generated with `vec_slice(x, 0L)`; given a prototype, you can initialize a vector of given size (filled with `NA`s) with `vec_init()`. ### Common sizes: recycling rules Closely related to the definition of size are the __recycling rules__. The recycling rules determine the size of the output when two vectors of different sizes are combined. In vctrs, the recycling rules are encoded in `vec_size_common()`, which gives the common size of a set of vectors: ```{r} vec_size_common(1:3, 1:3, 1:3) vec_size_common(1:10, 1) vec_size_common(integer(), 1) ``` vctrs obeys a stricter set of recycling rules than base R. Vectors of size 1 are recycled to any other size. All other size combinations will generate an error. This strictness prevents common mistakes like `dest == c("IAH", "HOU"))`, at the cost of occasionally requiring an explicit calls to `rep()`. ```{r, echo = FALSE, fig.cap="Summary of vctrs recycling rules. X indicates an error"} knitr::include_graphics("../man/figures/sizes-recycling.png", dpi = 300) ``` You can apply the recycling rules in two ways: * If you have a vector and desired size, use `vec_recycle()`: ```{r} vec_recycle(1:3, 3) vec_recycle(1, 10) ``` * If you have multiple vectors and you want to recycle them to the same size, use `vec_recycle_common()`: ```{r} vec_recycle_common(1:3, 1:3) vec_recycle_common(1:10, 1) ``` ## Appendix: recycling in base R The recycling rules in base R are described in [The R Language Definition](https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Recycling-rules) but are not implemented in a single function and thus are not applied consistently. Here, I give a brief overview of their most common realisation, as well as showing some of the exceptions. Generally, in base R, when a pair of vectors is not the same length, the shorter vector is recycled to the same length as the longer: ```{r} rep(1, 6) + 1 rep(1, 6) + 1:2 rep(1, 6) + 1:3 ``` If the length of the longer vector is not an integer multiple of the length of the shorter, you usually get a warning: ```{r} invisible(pmax(1:2, 1:3)) invisible(1:2 + 1:3) invisible(cbind(1:2, 1:3)) ``` But some functions recycle silently: ```{r} length(atan2(1:3, 1:2)) length(paste(1:3, 1:2)) length(ifelse(1:3, 1:2, 1:2)) ``` And `data.frame()` throws an error: ```{r, error = TRUE} data.frame(1:2, 1:3) ``` The R language definition states that "any arithmetic operation involving a zero-length vector has a zero-length result". But outside of arithmetic, this rule is not consistently followed: ```{r, error = TRUE} # length-0 output 1:2 + integer() atan2(1:2, integer()) pmax(1:2, integer()) # dropped cbind(1:2, integer()) # recycled to length of first ifelse(rep(TRUE, 4), integer(), character()) # preserved-ish paste(1:2, integer()) # Errors data.frame(1:2, integer()) ``` vctrs/vignettes/pillar.Rmd0000644000176200001440000002126014315060310015344 0ustar liggesusers--- title: "Printing vectors nicely in tibbles" author: "Kirill Müller, Hadley Wickham" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Printing vectors nicely in tibbles} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ``` You can get basic control over how a vector is printed in a tibble by providing a `format()` method. If you want greater control, you need to understand how printing works. The presentation of a column in a tibble is controlled by two S3 generics: * `vctrs::vec_ptype_abbr()` determines what goes into the column header. * `pillar::pillar_shaft()` determines what goes into the body, or the shaft, of the column. Technically a [*pillar*](https://en.wikipedia.org/wiki/Column#Nomenclature) is composed of a *shaft* (decorated with an *ornament*), with a *capital* above and a *base* below. Multiple pillars form a *colonnade*, which can be stacked in multiple *tiers*. This is the motivation behind the names in our API. This short vignette shows the basics of column styling using a `"latlon"` vector. The vignette imagines the code is in a package, so that you can see the roxygen2 commands you'll need to create documentation and the `NAMESPACE` file. In this vignette, we'll attach pillar and vctrs: ```{r setup} library(vctrs) library(pillar) ``` You don't need to do this in a package. Instead, you'll need to _import_ the packages by then to the `Imports:` section of your `DESCRIPTION`. The following helper does this for you: ```{r, eval = FALSE} usethis::use_package("vctrs") usethis::use_package("pillar") ``` ## Prerequisites To illustrate the basic ideas we're going to create a `"latlon"` class that encodes geographic coordinates in a record. We'll pretend that this code lives in a package called earth. For simplicity, the values are printed as degrees and minutes only. By using `vctrs_rcrd()`, we already get the infrastructure to make this class fully compatible with data frames for free. See `vignette("s3-vector", package = "vctrs")` for details on the record data type. ```{r} #' @export latlon <- function(lat, lon) { new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon") } #' @export format.earth_latlon <- function(x, ..., formatter = deg_min) { x_valid <- which(!is.na(x)) lat <- field(x, "lat")[x_valid] lon <- field(x, "lon")[x_valid] ret <- rep(NA_character_, vec_size(x)) ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon")) # It's important to keep NA in the vector! ret } deg_min <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg min <- round(x * 60) # Ensure the columns are always the same width so they line up nicely ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]])) format(ret, justify = "right") } latlon(c(32.71, 2.95), c(-117.17, 1.67)) ``` ## Using in a tibble Columns of this class can be used in a tibble right away because we've made a class using the vctrs infrastructure and have provided a `format()` method: ```{r} library(tibble) loc <- latlon( c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA), c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA) ) data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc) data ``` This output is ok, but we could improve it by: 1. Using a more description type abbreviation than ``. 1. Using a dash of colour to highlight the most important parts of the value. 1. Providing a narrower view when horizontal space is at a premium. The following sections show how to enhance the rendering. ## Fixing the data type Instead of `` we'd prefer to use ``. We can do that by implementing the `vec_ptype_abbr()` method, which should return a string that can be used in a column header. For your own classes, strive for an evocative abbreviation that's under 6 characters. ```{r} #' @export vec_ptype_abbr.earth_latlon <- function(x) { "latlon" } data ``` ## Custom rendering The `format()` method is used by default for rendering. For custom formatting you need to implement the `pillar_shaft()` method. This function should always return a pillar shaft object, created by `new_pillar_shaft_simple()` or similar. `new_pillar_shaft_simple()` accepts ANSI escape codes for colouring, and pillar includes some built in styles like `style_subtle()`. We can use subtle style for the degree and minute separators to make the data more obvious. First we define a degree formatter that makes use of `style_subtle()`: ```{r} deg_min_color <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg rad <- round(x * 60) ret <- sprintf( "%d%s%.2d%s%s", deg, pillar::style_subtle("°"), rad, pillar::style_subtle("'"), pm[ifelse(sign >= 0, 1, 2)] ) format(ret, justify = "right") } ``` And then we pass that to our `format()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x, formatter = deg_min_color) pillar::new_pillar_shaft_simple(out, align = "right") } ``` Currently, ANSI escapes are not rendered in vignettes, so this result doesn't look any different, but if you run the code yourself you'll see an improved display. ```{r} data ``` As well as the functions in pillar, the [cli](https://cli.r-lib.org/) package provides a variety of tools for styling text. ## Truncation Tibbles can automatically compacts columns when there's no enough horizontal space to display everything: ```{r} print(data, width = 30) ``` Currently the latlon class isn't ever compacted because we haven't specified a minimum width when constructing the shaft. Let's fix that and re-print the data: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10) } print(data, width = 30) ``` ## Adaptive rendering Truncation may be useful for character data, but for lat-lon data it'd be nicer to show full degrees and remove the minutes. We'll first write a function that does this: ```{r} deg <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- round(x) ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)]) format(ret, justify = "right") } ``` Then use it as part of more sophisticated implementation of the `pillar_shaft()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { deg <- format(x, formatter = deg) deg_min <- format(x) pillar::new_pillar_shaft( list(deg = deg, deg_min = deg_min), width = pillar::get_max_extent(deg_min), min_width = pillar::get_max_extent(deg), class = "pillar_shaft_latlon" ) } ``` Now the `pillar_shaft()` method returns an object of class `"pillar_shaft_latlon"` created by `new_pillar_shaft()`. This object contains the necessary information to render the values, and also minimum and maximum width values. For simplicity, both formats are pre-rendered, and the minimum and maximum widths are computed from there. (`get_max_extent()` is a helper that computes the maximum display width occupied by the values in a character vector.) All that's left to do is to implement a `format()` method for our new `"pillar_shaft_latlon"` class. This method will be called with a `width` argument, which then determines which of the formats to choose. The formatting of our choice is passed to the `new_ornament()` function: ```{r} #' @export format.pillar_shaft_latlon <- function(x, width, ...) { if (get_max_extent(x$deg_min) <= width) { ornament <- x$deg_min } else { ornament <- x$deg } pillar::new_ornament(ornament, align = "right") } data print(data, width = 30) ``` ## Testing If you want to test the output of your code, you can compare it with a known state recorded in a text file. The `testthat::expect_snapshot()` function offers an easy way to test output-generating functions. It takes care about details such as Unicode, ANSI escapes, and output width. Furthermore it won't make the tests fail on CRAN. This is important because your output may rely on details out of your control, which should be fixed eventually but should not lead to your package being removed from CRAN. Use this testthat expectation in one of your test files to create a snapshot test: ```{r eval = FALSE} expect_snapshot(pillar_shaft(data$loc)) ``` See for more information. vctrs/src/0000755000176200001440000000000015157322654012214 5ustar liggesusersvctrs/src/order-collate.h0000644000176200001440000000220315156537555015125 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_COLLATE_H #define VCTRS_ORDER_COLLATE_H #include "vctrs-core.h" // ----------------------------------------------------------------------------- /* * `proxy_apply_chr_proxy_collate()` iterates over `proxy`, applying * `chr_proxy_collate` on any character vectors that it detects. * * It expects that: * - If `proxy` is a data frame, it has been flattened by its corresponding * `vec_proxy_*()` function. * - All character vectors in `proxy` have already been reencoded to UTF-8 * by `obj_encode_utf8()`. */ SEXP proxy_apply_chr_proxy_collate(SEXP proxy, SEXP chr_proxy_collate); // ----------------------------------------------------------------------------- #endif vctrs/src/type-factor.c0000644000176200001440000002267215156543701014623 0ustar liggesusers#include "vctrs.h" static SEXP levels_union(SEXP x, SEXP y); // [[ include("type-factor.h") ]] SEXP fct_ptype2( SEXP x, SEXP y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg ) { SEXP x_levels = PROTECT(Rf_getAttrib(x, R_LevelsSymbol)); SEXP y_levels = PROTECT(Rf_getAttrib(y, R_LevelsSymbol)); if (TYPEOF(x_levels) != STRSXP) { stop_corrupt_factor_levels(x, p_x_arg); } if (TYPEOF(y_levels) != STRSXP) { stop_corrupt_factor_levels(y, p_y_arg); } // Use `x_levels` if they are equal pointers, otherwise compute union SEXP levels = (x_levels == y_levels) ? x_levels : levels_union(x_levels, y_levels); PROTECT(levels); SEXP out = new_empty_factor(levels); UNPROTECT(3); return out; } static bool ord_ptype2_validate( r_obj* x_levels, r_obj* y_levels, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg ) { if (TYPEOF(x_levels) != STRSXP) { stop_corrupt_ordered_levels(x, p_x_arg); } if (TYPEOF(y_levels) != STRSXP) { stop_corrupt_ordered_levels(y, p_y_arg); } return obj_equal(x_levels, y_levels); } // [[ include("type-factor.h") ]] r_obj* ord_ptype2( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ) { r_obj* x_levels = PROTECT(Rf_getAttrib(x, R_LevelsSymbol)); r_obj* y_levels = PROTECT(Rf_getAttrib(y, R_LevelsSymbol)); SEXP out; if (ord_ptype2_validate(x_levels, y_levels, x, y, p_x_arg, p_y_arg)) { out = new_empty_ordered(x_levels); } else { out = vec_ptype2_default( x, y, p_x_arg, p_y_arg, call, s3_fallback ); } UNPROTECT(2); return out; } static SEXP levels_union(SEXP x, SEXP y) { SEXP args = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(args, 0, x); SET_VECTOR_ELT(args, 1, y); const struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_none, .fn = R_NilValue }; // Combine with known ptype // No name repair because this is just combining factor levels SEXP xy = PROTECT(vec_c( args, r_globals.empty_chr, R_NilValue, &name_repair_opts, vec_args.empty, r_lazy_null )); SEXP out = vec_unique(xy); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- static void init_factor(SEXP x, SEXP levels); static void init_ordered(SEXP x, SEXP levels); // [[ include("vctrs.h") ]] SEXP fct_as_character(SEXP x, struct vctrs_arg* x_arg) { SEXP levels = PROTECT(Rf_getAttrib(x, R_LevelsSymbol)); if (TYPEOF(levels) != STRSXP) { stop_corrupt_factor_levels(x, x_arg); } UNPROTECT(1); return Rf_asCharacterFactor(x); } // [[ include("vctrs.h") ]] SEXP ord_as_character(SEXP x, struct vctrs_arg* x_arg) { return fct_as_character(x, x_arg); } static SEXP chr_as_factor_from_self(SEXP x, bool ordered); static SEXP chr_as_factor_impl(SEXP x, SEXP levels, bool* lossy, bool ordered); // [[ include("vctrs.h") ]] SEXP chr_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg) { SEXP levels = PROTECT(Rf_getAttrib(to, R_LevelsSymbol)); if (TYPEOF(levels) != STRSXP) { stop_corrupt_factor_levels(to, to_arg); } SEXP out; // When `to` has no levels, it is treated as a template and the // levels come from `x` if (vec_size(levels) == 0) { out = chr_as_factor_from_self(x, false); } else { out = chr_as_factor_impl(x, levels, lossy, false); } UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP chr_as_ordered(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg) { SEXP levels = PROTECT(Rf_getAttrib(to, R_LevelsSymbol)); if (TYPEOF(levels) != STRSXP) { stop_corrupt_ordered_levels(to, to_arg); } SEXP out; // When `to` has no levels, it is treated as a template and the // levels come from `x` if (vec_size(levels) == 0) { out = chr_as_factor_from_self(x, true); } else { out = chr_as_factor_impl(x, levels, lossy, true); } UNPROTECT(1); return out; } static SEXP chr_as_factor_impl(SEXP x, SEXP levels, bool* lossy, bool ordered) { SEXP out = PROTECT(vec_match(x, levels)); const int* p_out = INTEGER(out); R_len_t size = vec_size(x); const SEXP* p_x = STRING_PTR_RO(x); // Detect lossy no-matches, but allow `NA` values from `x` for (R_len_t i = 0; i < size; ++i) { if (p_out[i] == NA_INTEGER && p_x[i] != NA_STRING) { *lossy = true; UNPROTECT(1); return R_NilValue; } } if (ordered) { init_ordered(out, levels); } else { init_factor(out, levels); } UNPROTECT(1); return out; } static SEXP remove_na_levels(SEXP levels); // Factor levels are added in order of appearance. // `NA` values in `x` are not considered factor levels. static SEXP chr_as_factor_from_self(SEXP x, bool ordered) { SEXP levels = PROTECT(vec_unique(x)); levels = PROTECT(remove_na_levels(levels)); // `NA` values in `x` correctly become `NA` values in the result SEXP out = PROTECT(vec_match(x, levels)); if (ordered) { init_ordered(out, levels); } else { init_factor(out, levels); } UNPROTECT(3); return out; } static SEXP remove_na_levels(SEXP levels) { R_len_t size = vec_size(levels); const SEXP* p_levels = STRING_PTR_RO(levels); // There might only ever be 1 `NA` level. // Remove it if it exists. for (R_len_t i = 0; i < size; ++i) { if (p_levels[i] == NA_STRING) { int na_loc = (i + 1) * -1; SEXP na_loc_obj = PROTECT(r_int(na_loc)); SEXP out = vec_slice(levels, na_loc_obj); UNPROTECT(1); return out; } } return levels; } static SEXP fct_as_factor_impl(SEXP x, SEXP x_levels, SEXP to_levels, bool* lossy, bool ordered); // [[ include("vctrs.h") ]] SEXP fct_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* x_arg, struct vctrs_arg* to_arg) { SEXP x_levels = PROTECT(Rf_getAttrib(x, R_LevelsSymbol)); SEXP to_levels = PROTECT(Rf_getAttrib(to, R_LevelsSymbol)); if (TYPEOF(x_levels) != STRSXP) { stop_corrupt_factor_levels(x, x_arg); } if (TYPEOF(to_levels) != STRSXP) { stop_corrupt_factor_levels(to, to_arg); } SEXP out = fct_as_factor_impl(x, x_levels, to_levels, lossy, false); UNPROTECT(2); return out; } // [[ include("factor.h") ]] SEXP ord_as_ordered(const struct cast_opts* p_opts) { SEXP x_levels = PROTECT(Rf_getAttrib(p_opts->x, R_LevelsSymbol)); SEXP to_levels = PROTECT(Rf_getAttrib(p_opts->to, R_LevelsSymbol)); SEXP out; if (ord_ptype2_validate( x_levels, to_levels, p_opts->x, p_opts->to, p_opts->p_x_arg, p_opts->p_to_arg )) { out = p_opts->x; } else { out = vec_cast_default( p_opts->x, p_opts->to, p_opts->p_x_arg, p_opts->p_to_arg, p_opts->call, p_opts->s3_fallback ); } UNPROTECT(2); return out; } static SEXP fct_as_factor_impl(SEXP x, SEXP x_levels, SEXP to_levels, bool* lossy, bool ordered) { // Early exit if levels are identical if (x_levels == to_levels) { return x; } R_len_t x_levels_size = vec_size(x_levels); R_len_t to_levels_size = vec_size(to_levels); // Early exit if `to` has no levels. In this case it is being used as // a template if (to_levels_size == 0) { return x; } // Always lossy if there are more levels in `x` than in `to` if (x_levels_size > to_levels_size) { *lossy = true; return R_NilValue; } R_len_t x_size = vec_size(x); const SEXP* p_x_levels = STRING_PTR_RO(x_levels); const SEXP* p_to_levels = STRING_PTR_RO(to_levels); bool is_contiguous_subset = true; for (R_len_t i = 0; i < x_levels_size; ++i) { if (p_x_levels[i] != p_to_levels[i]) { is_contiguous_subset = false; break; } } // No recoding required if contiguous subset. // Duplicate, strip non-factor attributes, and re-initialize with new levels. // Using `r_clone_referenced()` avoids an immediate copy using ALTREP wrappers. if (is_contiguous_subset) { SEXP out = PROTECT(r_clone_referenced(x)); r_attrib_zap_all(out); if (ordered) { init_ordered(out, to_levels); } else { init_factor(out, to_levels); } UNPROTECT(1); return out; } const int* p_x = INTEGER_RO(x); SEXP out = PROTECT(Rf_allocVector(INTSXP, x_size)); int* p_out = INTEGER(out); if (ordered) { init_ordered(out, to_levels); } else { init_factor(out, to_levels); } SEXP recode = PROTECT(vec_match(x_levels, to_levels)); const int* p_recode = INTEGER_RO(recode); // Detect if there are any levels in `x` that aren't in `to` for (R_len_t i = 0; i < x_levels_size; ++i) { if (p_recode[i] == NA_INTEGER) { *lossy = true; UNPROTECT(2); return R_NilValue; } } // Recode `x` int values into `to` level ordering for (R_len_t i = 0; i < x_size; ++i) { const int elt = p_x[i]; if (elt == NA_INTEGER) { p_out[i] = NA_INTEGER; continue; } p_out[i] = p_recode[elt - 1]; } UNPROTECT(2); return out; } static void init_factor(SEXP x, SEXP levels) { if (TYPEOF(x) != INTSXP) { r_stop_internal("Only integers can be made into factors."); } Rf_setAttrib(x, R_LevelsSymbol, levels); Rf_setAttrib(x, R_ClassSymbol, classes_factor); } static void init_ordered(SEXP x, SEXP levels) { if (TYPEOF(x) != INTSXP) { r_stop_internal("Only integers can be made into ordered factors."); } Rf_setAttrib(x, R_LevelsSymbol, levels); Rf_setAttrib(x, R_ClassSymbol, classes_ordered); } vctrs/src/equal.h0000644000176200001440000002031315156561441013471 0ustar liggesusers#ifndef VCTRS_EQUAL_H #define VCTRS_EQUAL_H #include "vctrs-core.h" #include "missing.h" #include "poly-op.h" SEXP vec_equal( SEXP x, SEXP y, bool na_equal, SEXP ptype, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy error_call ); // obj_equal() never propagates missingness, so it can return a `bool` bool obj_equal(r_obj* x, r_obj* y); bool obj_equal_utf8(r_obj* x, r_obj* y); // ----------------------------------------------------------------------------- static inline int lgl_equal_na_equal(int x, int y) { return x == y; } static inline int int_equal_na_equal(int x, int y) { return x == y; } static inline int dbl_equal_na_equal(double x, double y) { switch (dbl_classify(x)) { case VCTRS_DBL_number: return isnan(y) ? false : x == y; case VCTRS_DBL_missing: return dbl_classify(y) == VCTRS_DBL_missing; case VCTRS_DBL_nan: return dbl_classify(y) == VCTRS_DBL_nan; default: r_stop_unreachable(); } } static inline int cpl_equal_na_equal(Rcomplex x, Rcomplex y) { return dbl_equal_na_equal(x.r, y.r) && dbl_equal_na_equal(x.i, y.i); } static inline int chr_equal_na_equal(SEXP x, SEXP y) { return x == y; } static inline int raw_equal_na_equal(Rbyte x, Rbyte y) { return x == y; } static inline int list_equal_na_equal(SEXP x, SEXP y) { return obj_equal_utf8(x, y); } // ----------------------------------------------------------------------------- #define P_EQUAL_NA_EQUAL(CTYPE, EQUAL_NA_EQUAL) do { \ return EQUAL_NA_EQUAL(((const CTYPE*) p_x)[i], ((const CTYPE*) p_y)[j]); \ } while (0) static r_no_return inline int p_nil_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { r_stop_internal("Can't compare NULL for equality."); } static inline int p_lgl_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(int, lgl_equal_na_equal); } static inline int p_int_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(int, int_equal_na_equal); } static inline int p_dbl_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(double, dbl_equal_na_equal); } static inline int p_cpl_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(Rcomplex, cpl_equal_na_equal); } static inline int p_chr_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(SEXP, chr_equal_na_equal); } static inline int p_raw_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(Rbyte, raw_equal_na_equal); } static inline int p_list_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_EQUAL(SEXP, list_equal_na_equal); } #undef P_EQUAL_NA_EQUAL // No support for df-cols, as they should be flattened static inline bool p_col_equal_na_equal( const void* p_x, r_ssize i, const void* p_y, r_ssize j, const enum vctrs_type type ) { switch (type) { case VCTRS_TYPE_logical: return p_lgl_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_integer: return p_int_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_double: return p_dbl_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_complex: return p_cpl_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_character: return p_chr_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_raw: return p_raw_equal_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_list: return p_list_equal_na_equal(p_x, i, p_y, j); default: stop_unimplemented_vctrs_type("p_col_equal_na_equal", type); } } static inline int p_df_equal_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { struct poly_df_data* p_x_data = (struct poly_df_data*) p_x; struct poly_df_data* p_y_data = (struct poly_df_data*) p_y; r_ssize n_col = p_x_data->n_col; if (n_col != p_y_data->n_col) { r_stop_internal("`x` and `y` must have the same number of columns."); } enum vctrs_type* v_col_type = p_x_data->v_col_type; const void** v_x_col_ptr = p_x_data->v_col_ptr; const void** v_y_col_ptr = p_y_data->v_col_ptr; // df-cols should already be flattened for (r_ssize col = 0; col < n_col; ++col) { if (!p_col_equal_na_equal(v_x_col_ptr[col], i, v_y_col_ptr[col], j, v_col_type[col])) { return false; } } return true; } // ----------------------------------------------------------------------------- static inline int lgl_equal_na_propagate(int x, int y) { if (lgl_is_missing(x) || lgl_is_missing(y)) { return NA_LOGICAL; } else { return lgl_equal_na_equal(x, y); } } static inline int int_equal_na_propagate(int x, int y) { if (int_is_missing(x) || int_is_missing(y)) { return NA_LOGICAL; } else { return int_equal_na_equal(x, y); } } static inline int dbl_equal_na_propagate(double x, double y) { if (dbl_is_missing(x) || dbl_is_missing(y)) { return NA_LOGICAL; } else { // Faster than `dbl_equal_na_equal()`, // which has unneeded missing value checks return x == y; } } static inline int cpl_equal_na_propagate(Rcomplex x, Rcomplex y) { int real_equal = dbl_equal_na_propagate(x.r, y.r); int imag_equal = dbl_equal_na_propagate(x.i, y.i); if (real_equal == NA_LOGICAL || imag_equal == NA_LOGICAL) { return NA_LOGICAL; } else { return real_equal && imag_equal; } } static inline int chr_equal_na_propagate(SEXP x, SEXP y) { if (chr_is_missing(x) || chr_is_missing(y)) { return NA_LOGICAL; } else { return chr_equal_na_equal(x, y); } } static inline int raw_equal_na_propagate(Rbyte x, Rbyte y) { return raw_equal_na_equal(x, y); } static inline int list_equal_na_propagate(SEXP x, SEXP y) { if (list_is_missing(x) || list_is_missing(y)) { return NA_LOGICAL; } else { return list_equal_na_equal(x, y); } } // ----------------------------------------------------------------------------- #define P_EQUAL_NA_PROPAGATE(CTYPE, EQUAL_NA_PROPAGATE) do { \ return EQUAL_NA_PROPAGATE(((const CTYPE*) p_x)[i], ((const CTYPE*) p_y)[j]); \ } while (0) static r_no_return inline int p_nil_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { r_stop_internal("Can't compare NULL for equality."); } static inline int p_lgl_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(int, lgl_equal_na_propagate); } static inline int p_int_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(int, int_equal_na_propagate); } static inline int p_dbl_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(double, dbl_equal_na_propagate); } static inline int p_cpl_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(Rcomplex, cpl_equal_na_propagate); } static inline int p_chr_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(SEXP, chr_equal_na_propagate); } static inline int p_raw_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(Rbyte, raw_equal_na_propagate); } static inline int p_list_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_EQUAL_NA_PROPAGATE(SEXP, list_equal_na_propagate); } #undef P_EQUAL_NA_PROPAGATE static inline bool p_equal_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j, const enum vctrs_type type) { switch (type) { case VCTRS_TYPE_logical: return p_lgl_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_integer: return p_int_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_double: return p_dbl_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_complex: return p_cpl_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_character: return p_chr_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_raw: return p_raw_equal_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_list: return p_list_equal_na_propagate(p_x, i, p_y, j); default: stop_unimplemented_vctrs_type("p_equal_na_propagate", type); } } // ----------------------------------------------------------------------------- #endif vctrs/src/expand.h0000644000176200001440000000062314362266120013635 0ustar liggesusers#ifndef VCTRS_EXPAND_H #define VCTRS_EXPAND_H #include "vctrs-core.h" #include "names.h" enum vctrs_expand_vary { VCTRS_EXPAND_VARY_slowest = 0, VCTRS_EXPAND_VARY_fastest = 1 }; r_obj* vec_expand_grid(r_obj* xs, enum vctrs_expand_vary vary, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call); #endif vctrs/src/altrep-lazy-character.c0000644000176200001440000000744215113325071016551 0ustar liggesusers#include "vctrs.h" #include "R_ext/Altrep.h" // Initialised at load time R_altrep_class_t altrep_lazy_character_class; r_obj* ffi_altrep_lazy_character_is_materialized(r_obj* x) { return r_lgl(R_altrep_data2(x) != r_null); } r_obj* ffi_altrep_new_lazy_character(r_obj* fn) { r_obj* out = R_new_altrep(altrep_lazy_character_class, fn, r_null); r_mark_shared(out); return out; } // ----------------------------------------------------------------------------- // ALTVEC r_obj* altrep_lazy_character_Materialize(r_obj* vec) { r_obj* out = R_altrep_data2(vec); if (out != r_null) { return out; } r_obj* fn = R_altrep_data1(vec); r_obj* call = KEEP(r_new_call(fn, r_null)); // `fn()` evaluated in the global environment out = r_eval(call, r_envs.global); if (r_typeof(out) != R_TYPE_character) { r_stop_internal("`fn` must evaluate to a character vector."); } R_set_altrep_data2(vec, out); UNPROTECT(1); return out; } void* altrep_lazy_character_Dataptr(r_obj* vec, Rboolean writeable) { if (writeable) { r_stop_internal("Can't get writeable `DATAPTR()` to ``"); } else { // R promises not to write to this array, but we still have to return a // `void*` pointer rather than a `const void*` pointer. `STRING_PTR()` is // non-API so we use `STRING_PTR_RO()` and cast. This is really a bad ALTREP // API. It should have been separated into `void* Dataptr()` and `const // void* Dataptr_ro()`. return (void*) STRING_PTR_RO(altrep_lazy_character_Materialize(vec)); } } const void* altrep_lazy_character_Dataptr_or_null(r_obj* vec) { r_obj* out = R_altrep_data2(vec); if (out == r_null) { return NULL; } else { return r_chr_cbegin(out); } } // ----------------------------------------------------------------------------- // ALTREP R_xlen_t altrep_lazy_character_Length(r_obj* vec) { r_obj* out = R_altrep_data2(vec); if (out == r_null) { out = altrep_lazy_character_Materialize(vec); } return r_length(out); } // What gets printed when .Internal(inspect()) is used Rboolean altrep_lazy_character_Inspect(r_obj* x, int pre, int deep, int pvec, void (*inspect_subtree)(r_obj*, int, int, int)) { Rprintf("vctrs_altrep_lazy_character (materialized=%s)\n", R_altrep_data2(x) != r_null ? "T" : "F"); return TRUE; } // ----------------------------------------------------------------------------- // ALTSTRING r_obj* altrep_lazy_character_Elt(r_obj* vec, R_xlen_t i) { r_obj* out = R_altrep_data2(vec); if (out == r_null) { out = altrep_lazy_character_Materialize(vec); } return STRING_ELT(out, i); } void altrep_lazy_character_Set_elt(r_obj* vec, R_xlen_t i, r_obj* value) { r_obj* out = R_altrep_data2(vec); if (out == r_null) { out = altrep_lazy_character_Materialize(vec); } SET_STRING_ELT(out, i, value); } // ----------------------------------------------------------------------------- void vctrs_init_altrep_lazy_character(DllInfo* dll) { altrep_lazy_character_class = R_make_altstring_class("altrep_lazy_character", "vctrs", dll); // ALTVEC R_set_altvec_Dataptr_method(altrep_lazy_character_class, altrep_lazy_character_Dataptr); R_set_altvec_Dataptr_or_null_method(altrep_lazy_character_class, altrep_lazy_character_Dataptr_or_null); // ALTREP R_set_altrep_Length_method(altrep_lazy_character_class, altrep_lazy_character_Length); R_set_altrep_Inspect_method(altrep_lazy_character_class, altrep_lazy_character_Inspect); // ALTSTRING R_set_altstring_Elt_method(altrep_lazy_character_class, altrep_lazy_character_Elt); R_set_altstring_Set_elt_method(altrep_lazy_character_class, altrep_lazy_character_Set_elt); } vctrs/src/missing.c0000644000176200001440000003132315047425317014031 0ustar liggesusers#include "vctrs.h" #include "decl/missing-decl.h" // [[ register() ]] r_obj* ffi_vec_detect_missing(r_obj* x) { return vec_detect_missing(x); } // [[ include("missing.h") ]] r_obj* vec_detect_missing(r_obj* x) { r_obj* proxy = KEEP(vec_proxy_equal(x)); r_obj* out = proxy_detect_missing(proxy); FREE(1); return out; } static inline r_obj* proxy_detect_missing(r_obj* proxy) { const enum vctrs_type type = vec_proxy_typeof(proxy); switch (type) { case VCTRS_TYPE_logical: return lgl_detect_missing(proxy); case VCTRS_TYPE_integer: return int_detect_missing(proxy); case VCTRS_TYPE_double: return dbl_detect_missing(proxy); case VCTRS_TYPE_complex: return cpl_detect_missing(proxy); case VCTRS_TYPE_raw: return raw_detect_missing(proxy); case VCTRS_TYPE_character: return chr_detect_missing(proxy); case VCTRS_TYPE_list: return list_detect_missing(proxy); case VCTRS_TYPE_dataframe: return df_detect_missing(proxy); case VCTRS_TYPE_null: return r_globals.empty_lgl; case VCTRS_TYPE_scalar: stop_scalar_type(proxy, vec_args.empty, r_lazy_null); default: stop_unimplemented_vctrs_type("vec_detect_missing", type); } r_stop_unreachable(); } // ----------------------------------------------------------------------------- #define DETECT_MISSING(CTYPE, CBEGIN, IS_MISSING) do { \ const r_ssize size = vec_size(x); \ \ r_obj* out = KEEP(r_new_logical(size)); \ int* v_out = r_lgl_begin(out); \ \ CTYPE const* v_x = CBEGIN(x); \ \ for (r_ssize i = 0; i < size; ++i) { \ v_out[i] = IS_MISSING(v_x[i]); \ } \ \ FREE(1); \ return out; \ } while (0) static inline r_obj* lgl_detect_missing(r_obj* x) { DETECT_MISSING(int, r_lgl_cbegin, lgl_is_missing); } static inline r_obj* int_detect_missing(r_obj* x) { DETECT_MISSING(int, r_int_cbegin, int_is_missing); } static inline r_obj* dbl_detect_missing(r_obj* x) { DETECT_MISSING(double, r_dbl_cbegin, dbl_is_missing); } static inline r_obj* cpl_detect_missing(r_obj* x) { DETECT_MISSING(r_complex, r_cpl_cbegin, cpl_is_missing); } static inline r_obj* raw_detect_missing(r_obj* x) { DETECT_MISSING(unsigned char, r_uchar_cbegin, raw_is_missing); } static inline r_obj* chr_detect_missing(r_obj* x) { DETECT_MISSING(r_obj*, r_chr_cbegin, chr_is_missing); } static inline r_obj* list_detect_missing(r_obj* x) { DETECT_MISSING(r_obj*, r_list_cbegin, list_is_missing); } #undef DETECT_MISSING // ----------------------------------------------------------------------------- static inline r_obj* df_detect_missing(r_obj* x) { int n_prot = 0; const r_ssize n_col = r_length(x); const r_ssize size = vec_size(x); r_obj* const* v_x = r_list_cbegin(x); // A location vector to track rows where we still need to check for missing // values. After we iterate through all columns, `v_loc` points to the missing // rows. r_ssize loc_size = size; r_obj* loc_shelter = KEEP_N(r_alloc_raw(loc_size * sizeof(r_ssize)), &n_prot); r_ssize* v_loc = (r_ssize*) r_raw_begin(loc_shelter); for (r_ssize i = 0; i < loc_size; ++i) { v_loc[i] = i; } for (r_ssize i = 0; i < n_col; ++i) { r_obj* col = v_x[i]; loc_size = col_detect_missing(col, v_loc, loc_size); // If all rows have at least one non-missing value, break if (loc_size == 0) { break; } } r_obj* out = KEEP_N(r_new_logical(size), &n_prot); int* v_out = r_lgl_begin(out); r_p_lgl_fill(v_out, 0, size); for (r_ssize i = 0; i < loc_size; ++i) { const r_ssize loc = v_loc[i]; v_out[loc] = 1; } FREE(n_prot); return out; } // ----------------------------------------------------------------------------- static inline r_ssize col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { const enum vctrs_type type = vec_proxy_typeof(x); switch (type) { case VCTRS_TYPE_logical: return lgl_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_integer: return int_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_double: return dbl_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_complex: return cpl_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_raw: return raw_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_character: return chr_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_list: return list_col_detect_missing(x, v_loc, loc_size); case VCTRS_TYPE_dataframe: r_stop_internal("Data frame columns should have been flattened by now."); case VCTRS_TYPE_null: r_abort("Unexpected `NULL` column found in a data frame."); case VCTRS_TYPE_scalar: stop_scalar_type(x, vec_args.empty, r_lazy_null); default: stop_unimplemented_vctrs_type("vec_detect_missing", type); } } // ----------------------------------------------------------------------------- /* * The data frame algorithm for `vec_detect_missing()` is fast because this * inner for loop doesn't have any `if` branches in it. We utilize the fact that * this is a no-op when the element isn't missing: * `new_loc_size += IS_MISSING(v_x[loc])` * This is faster than doing `if (IS_MISSING())` at each iteration, especially * when there is a moderate amount of missing values, which makes that branch * fairly unpredictable. * * `r_ssize* v_loc` is a location vector that tracks which rows we still need * to check for missingness. It is "narrowed" after each column is processed to * only point to the rows that might still be missing. After all columns are * processed, it points to exactly where the missing rows are. Here is some * pseudo R code that demonstrates how `v_loc` changes: * * ``` * df <- data.frame( * x = c(1, NA, NA, 2, NA, 3), * y = c(NA, NA, 1, 2, NA, 4) * ) * df * #> x y * #> 1 1 NA * #> 2 NA NA * #> 3 NA 1 * #> 4 2 2 * #> 5 NA NA * #> 6 3 4 * * # Initially any row could be missing * loc_size <- 6 * loc <- 1:6 * * # After processing the first column, only rows 2, 3, and 5 could be missing * loc_size <- 3 * loc <- c(2, 3, 5) * * # After processing the second column, only 2 and 5 could be missing * # This is the last column, so these are the missing rows * loc_size <- 2 * loc <- c(2, 5) * ``` * * For more details, see: https://github.com/r-lib/vctrs/pull/1584 */ #define COL_DETECT_MISSING(CTYPE, CBEGIN, IS_MISSING) do { \ CTYPE const* v_x = CBEGIN(x); \ r_ssize new_loc_size = 0; \ \ for (r_ssize i = 0; i < loc_size; ++i) { \ const r_ssize loc = v_loc[i]; \ v_loc[new_loc_size] = loc; \ new_loc_size += IS_MISSING(v_x[loc]); \ } \ \ return new_loc_size; \ } while (0) static inline r_ssize lgl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(int, r_lgl_cbegin, lgl_is_missing); } static inline r_ssize int_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(int, r_int_cbegin, int_is_missing); } static inline r_ssize dbl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(double, r_dbl_cbegin, dbl_is_missing); } static inline r_ssize cpl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(r_complex, r_cpl_cbegin, cpl_is_missing); } static inline r_ssize raw_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(unsigned char, r_uchar_cbegin, raw_is_missing); } static inline r_ssize chr_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(r_obj*, r_chr_cbegin, chr_is_missing); } static inline r_ssize list_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size) { COL_DETECT_MISSING(r_obj*, r_list_cbegin, list_is_missing); } #undef COL_DETECT_MISSING // ----------------------------------------------------------------------------- r_obj* ffi_vec_any_missing(r_obj* x) { return r_lgl(vec_any_missing(x)); } bool vec_any_missing(r_obj* x) { return vec_first_missing(x) != vec_size(x); } r_ssize vec_first_missing(r_obj* x) { r_obj* proxy = KEEP(vec_proxy_equal(x)); r_ssize out = proxy_first_missing(proxy); FREE(1); return out; } static inline r_ssize proxy_first_missing(r_obj* proxy) { const enum vctrs_type type = vec_proxy_typeof(proxy); switch (type) { case VCTRS_TYPE_logical: return lgl_first_missing(proxy); case VCTRS_TYPE_integer: return int_first_missing(proxy); case VCTRS_TYPE_double: return dbl_first_missing(proxy); case VCTRS_TYPE_complex: return cpl_first_missing(proxy); case VCTRS_TYPE_raw: return raw_first_missing(proxy); case VCTRS_TYPE_character: return chr_first_missing(proxy); case VCTRS_TYPE_list: return list_first_missing(proxy); case VCTRS_TYPE_dataframe: return df_first_missing(proxy); case VCTRS_TYPE_null: return 0; case VCTRS_TYPE_scalar: stop_scalar_type(proxy, vec_args.empty, r_lazy_null); default: stop_unimplemented_vctrs_type("vec_first_missing", type); } r_stop_unreachable(); } // ----------------------------------------------------------------------------- #define FIRST_MISSING(CTYPE, CBEGIN, IS_MISSING) do { \ const r_ssize size = r_length(x); \ \ CTYPE const* v_x = CBEGIN(x); \ \ for (r_ssize i = 0; i < size; ++i) { \ if (IS_MISSING(v_x[i])) { \ return i; \ } \ } \ \ return size; \ } while (0) static inline r_ssize lgl_first_missing(r_obj* x) { FIRST_MISSING(int, r_lgl_cbegin, lgl_is_missing); } static inline r_ssize int_first_missing(r_obj* x) { FIRST_MISSING(int, r_int_cbegin, int_is_missing); } static inline r_ssize dbl_first_missing(r_obj* x) { FIRST_MISSING(double, r_dbl_cbegin, dbl_is_missing); } static inline r_ssize cpl_first_missing(r_obj* x) { FIRST_MISSING(r_complex, r_cpl_cbegin, cpl_is_missing); } static inline r_ssize raw_first_missing(r_obj* x) { FIRST_MISSING(unsigned char, r_uchar_cbegin, raw_is_missing); } static inline r_ssize chr_first_missing(r_obj* x) { FIRST_MISSING(r_obj*, r_chr_cbegin, chr_is_missing); } static inline r_ssize list_first_missing(r_obj* x) { FIRST_MISSING(r_obj*, r_list_cbegin, list_is_missing); } #undef FIRST_MISSING // ----------------------------------------------------------------------------- static inline r_ssize df_first_missing(r_obj* x) { const r_ssize n_cols = r_length(x); const r_ssize size = vec_size(x); r_ssize i = 0; if (n_cols > 0) { // First perform a very cheap check to see if there is at least 1 missing // value in the first column. If not, then we are done. If there is at least // 1 missing value, we start the loop below from there by updating `i`. This // avoids the more expensive rowwise poly-op loop when there aren't any // missing values. r_obj* col = r_list_get(x, 0); i = vec_first_missing(col); if (i == size) { return size; } } int n_prot = 0; struct poly_vec* p_poly_x = new_poly_vec(x, VCTRS_TYPE_dataframe); KEEP_N(p_poly_x->shelter, &n_prot); const void* v_x = p_poly_x->p_vec; r_ssize out = size; for (; i < size; ++i) { if (p_df_is_missing(v_x, i)) { out = i; break; } } FREE(n_prot); return out; } // ----------------------------------------------------------------------------- static inline const unsigned char* r_uchar_cbegin(r_obj* x) { // TODO: Move to the rlang library return (const unsigned char*) r_raw_cbegin(x); } vctrs/src/arg.h0000644000176200001440000000250715075563301013135 0ustar liggesusers#ifndef VCTRS_ARG_H #define VCTRS_ARG_H #include "vctrs-core.h" // Materialise an argument tag as a CHARSXP. r_obj* vctrs_arg(struct vctrs_arg* arg); // Materialise an argument tag as a vmax-protected C string. const char* vec_arg_format(struct vctrs_arg* p_arg); // Simple wrapper around a string struct vctrs_arg new_wrapper_arg(struct vctrs_arg* parent, const char* arg); struct vctrs_arg new_lazy_arg(struct r_lazy* data); // Wrapper around a counter representing the current position of the // argument struct arg_data_counter { struct vctrs_arg* p_parent; r_ssize* i; r_obj** names; }; struct vctrs_arg new_counter_arg(struct vctrs_arg* parent, struct arg_data_counter* data); struct arg_data_counter new_counter_arg_data(struct vctrs_arg* p_parent, r_ssize* i, r_obj** names); struct vctrs_arg* new_subscript_arg_vec(struct vctrs_arg* parent, r_obj* x, r_ssize* p_i); struct vctrs_arg* new_subscript_arg(struct vctrs_arg* parent, r_obj* names, r_ssize n, r_ssize* p_i); #endif vctrs/src/cast.h0000644000176200001440000000462315120513137013310 0ustar liggesusers#ifndef VCTRS_CAST_H #define VCTRS_CAST_H #include "vctrs-core.h" #include "ptype2.h" struct cast_opts { r_obj* x; r_obj* to; struct vctrs_arg* p_x_arg; struct vctrs_arg* p_to_arg; struct r_lazy call; enum s3_fallback s3_fallback; }; struct cast_common_opts { struct vctrs_arg* p_arg; struct r_lazy call; enum s3_fallback s3_fallback; }; r_obj* vec_cast_opts(const struct cast_opts* opts); static inline r_obj* vec_cast(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call) { struct cast_opts opts = { .x = x, .to = to, .p_x_arg = p_x_arg, .p_to_arg = p_to_arg, .call = call }; return vec_cast_opts(&opts); } static inline r_obj* vec_cast_params(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call, enum s3_fallback s3_fallback) { const struct cast_opts opts = { .x = x, .to = to, .p_x_arg = p_x_arg, .p_to_arg = p_to_arg, .call = call, .s3_fallback = s3_fallback }; return vec_cast_opts(&opts); } r_obj* vec_cast_common(r_obj* xs, r_obj* to, struct vctrs_arg* p_arg, struct r_lazy call); r_obj* vec_cast_common_opts(r_obj* xs, r_obj* to, const struct cast_common_opts* opts); r_obj* vec_cast_common_params(r_obj* xs, r_obj* to, enum s3_fallback s3_fallback, struct vctrs_arg* p_arg, struct r_lazy call); struct cast_opts new_cast_opts(r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, r_obj* opts); r_obj* vec_cast_e(const struct cast_opts* opts, ERR* err); r_obj* vec_cast_default(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call, enum s3_fallback s3_fallback); #endif vctrs/src/utils.c0000644000176200001440000016014315157242340013516 0ustar liggesusers#include "vctrs-core.h" #include "vctrs.h" #include "type-data-frame.h" #include "vec-bool.h" #include // Initialised at load time SEXP vctrs_method_table = NULL; SEXP base_method_table = NULL; SEXP s4_c_method_table = NULL; SEXP strings_tbl = NULL; SEXP strings_tbl_df = NULL; SEXP strings_data_frame = NULL; SEXP strings_date = NULL; SEXP strings_posixct = NULL; SEXP strings_posixlt = NULL; SEXP strings_posixt = NULL; SEXP strings_factor = NULL; SEXP strings_ordered = NULL; SEXP strings_list = NULL; SEXP classes_data_frame = NULL; SEXP classes_factor = NULL; SEXP classes_ordered = NULL; SEXP classes_date = NULL; SEXP classes_posixct = NULL; SEXP classes_tibble = NULL; SEXP classes_vctrs_group_rle = NULL; static SEXP syms_as_data_frame2 = NULL; static SEXP fns_as_data_frame2 = NULL; static SEXP vctrs_eval_mask_n_impl(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args, SEXP env); /** * Evaluate with masked arguments * * This takes two arrays of argument (`args`) and argument names * `syms`). The names should correspond to formal arguments of `fn`. * Elements of `args` are assigned to their corresponding name in * `syms` directly in the current environment, i.e. the environment of * the closure wrapping the `.Call()` invokation. Since masked * evaluation causes side effects and variable assignments in that * frame environment, the native code invokation must be tailing: no * further R code (including `on.exit()` expressions) should be * evaluated in that closure wrapper. * * A call to `fn` is constructed with the * CARs and TAGs assigned symmetrically to the elements of * `syms`. This way the arguments are masked by symbols corresponding * to the formal parameters. * * @param fn The function to call. * @param syms A null-terminated array of symbols. The arguments * `args` are assigned to these symbols. The assignment occurs in a * child of `env` and the dispatch call refers to these symbols. * @param args A null-terminated array of arguments passed to the method. * @param env The environment in which to evaluate. */ SEXP vctrs_eval_mask_n(SEXP fn, SEXP* syms, SEXP* args) { return vctrs_eval_mask_n_impl(R_NilValue, fn, syms, args, vctrs_ns_env); } SEXP vctrs_eval_mask1(SEXP fn, SEXP x_sym, SEXP x) { SEXP syms[2] = { x_sym, NULL }; SEXP args[2] = { x, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask2(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y) { SEXP syms[3] = { x_sym, y_sym, NULL }; SEXP args[3] = { x, y, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask3(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z) { SEXP syms[4] = { x_sym, y_sym, z_sym, NULL }; SEXP args[4] = { x, y, z, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask4(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4) { SEXP syms[5] = { x1_sym, x2_sym, x3_sym, x4_sym, NULL }; SEXP args[5] = { x1, x2, x3, x4, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask5(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5) { SEXP syms[6] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, NULL }; SEXP args[6] = { x1, x2, x3, x4, x5, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask6(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6) { SEXP syms[7] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, NULL }; SEXP args[7] = { x1, x2, x3, x4, x5, x6, NULL }; return vctrs_eval_mask_n(fn, syms, args); } SEXP vctrs_eval_mask7(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6, SEXP x7_sym, SEXP x7) { SEXP syms[8] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, x7_sym, NULL }; SEXP args[8] = { x1, x2, x3, x4, x5, x6, x7, NULL }; return vctrs_eval_mask_n(fn, syms, args); } r_obj* vctrs_eval_mask8(r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* x6_sym, r_obj* x6, r_obj* x7_sym, r_obj* x7, r_obj* x8_sym, r_obj* x8) { r_obj* syms[9] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, x7_sym, x8_sym, NULL }; r_obj* args[9] = { x1, x2, x3, x4, x5, x6, x7, x8, NULL }; return vctrs_eval_mask_n(fn, syms, args); } /** * Dispatch in the current environment * * Like `vctrs_eval_mask_n()`, the arguments `args` are are assigned * to the symbols `syms`. In addition, the function `fn` is assigned * to `fn_sym`. The mask is the current environment which has hygiene * implications regarding the closure wrapping `.Call()`, as * documented in `vctrs_eval_mask_n()`. * * @param fn_sym A symbol to which `fn` is assigned. * @inheritParams vctrs_eval_mask_n */ SEXP vctrs_dispatch_n(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args) { SEXP mask = PROTECT(r_peek_frame()); SEXP out = vctrs_eval_mask_n_impl(fn_sym, fn, syms, args, mask); UNPROTECT(1); return out; } SEXP vctrs_dispatch1(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x) { SEXP syms[2] = { x_sym, NULL }; SEXP args[2] = { x, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch2(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y) { SEXP syms[3] = { x_sym, y_sym, NULL }; SEXP args[3] = { x, y, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch3(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z) { SEXP syms[4] = { x_sym, y_sym, z_sym, NULL }; SEXP args[4] = { x, y, z, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch4(SEXP fn_sym, SEXP fn, SEXP w_sym, SEXP w, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z) { SEXP syms[5] = { w_sym, x_sym, y_sym, z_sym, NULL }; SEXP args[5] = { w, x, y, z, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch6(SEXP fn_sym, SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6) { SEXP syms[7] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, NULL }; SEXP args[7] = { x1, x2, x3, x4, x5, x6, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } static SEXP vctrs_eval_mask_n_impl(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args, SEXP env) { SEXP mask = PROTECT(r_alloc_empty_environment(env)); if (fn_sym != R_NilValue) { Rf_defineVar(fn_sym, fn, mask); fn = fn_sym; } SEXP body = PROTECT(r_call_n(fn, syms, syms)); SEXP call_fn = PROTECT(r_new_function(R_NilValue, body, mask)); SEXP call = PROTECT(Rf_lang1(call_fn)); while (*syms) { Rf_defineVar(*syms, *args, mask); ++syms; ++args; } SEXP out = Rf_eval(call, env); UNPROTECT(4); return out; } // [[ register() ]] SEXP vctrs_maybe_shared_col(SEXP x, SEXP i) { int i_ = r_int_get(i, 0) - 1; SEXP col = VECTOR_ELT(x, i_); bool out = MAYBE_SHARED(col); return Rf_ScalarLogical(out); } // [[ register() ]] SEXP vctrs_new_df_unshared_col(void) { SEXP col = PROTECT(Rf_allocVector(INTSXP, 1)); INTEGER(col)[0] = 1; SEXP out = PROTECT(Rf_allocVector(VECSXP, 1)); // In R 4.0.0, `SET_VECTOR_ELT()` bumps the REFCNT of // `col`. Because of this, `col` is now referenced (refcnt > 0), // but it isn't shared (refcnt > 1). SET_VECTOR_ELT(out, 0, col); SEXP names = PROTECT(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(names, 0, Rf_mkChar("x")); Rf_setAttrib(out, R_NamesSymbol, names); init_data_frame(out, 1); UNPROTECT(3); return out; } // [[ include("utils.h") ]] SEXP map(SEXP x, SEXP (*fn)(SEXP)) { R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(VECSXP, n)); for (R_len_t i = 0; i < n; ++i) { SET_VECTOR_ELT(out, i, fn(VECTOR_ELT(x, i))); } SEXP nms = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); Rf_setAttrib(out, R_NamesSymbol, nms); UNPROTECT(2); return out; } // [[ include("utils.h") ]] SEXP map_with_data(SEXP x, SEXP (*fn)(SEXP, void*), void* data) { R_len_t n = Rf_length(x); SEXP out = PROTECT(Rf_allocVector(VECSXP, n)); for (R_len_t i = 0; i < n; ++i) { SET_VECTOR_ELT(out, i, fn(VECTOR_ELT(x, i), data)); } SEXP nms = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); Rf_setAttrib(out, R_NamesSymbol, nms); UNPROTECT(2); return out; } // [[ include("utils.h") ]] SEXP bare_df_map(SEXP df, SEXP (*fn)(SEXP)) { SEXP out = PROTECT(map(df, fn)); // Shallow ownership over `out` because `map()` generates a fresh // list. We only care about "restoring" that bare list to the type of `df`, // not the columns, so not recursive. struct vec_restore_opts opts = { .ownership = VCTRS_OWNERSHIP_shallow, .recursively_proxied = false }; out = vec_bare_df_restore(out, df, &opts); UNPROTECT(1); return out; } // [[ include("utils.h") ]] SEXP df_map(SEXP df, SEXP (*fn)(SEXP)) { SEXP out = PROTECT(map(df, fn)); // Shallow ownership over `out` because `map()` generates a fresh // list. We only care about "restoring" that bare list to the type of `df`, // not the contents, so not recursive. struct vec_restore_opts opts = { .ownership = VCTRS_OWNERSHIP_shallow, .recursively_proxied = false }; out = vec_df_restore(out, df, &opts); UNPROTECT(1); return out; } #define RESIZE(CONST_DEREF, DEREF, CTYPE, SEXPTYPE) do { \ if (x_size == size) { \ return x; \ } \ \ const CTYPE* p_x = CONST_DEREF(x); \ \ SEXP out = PROTECT(Rf_allocVector(SEXPTYPE, size)); \ CTYPE* p_out = DEREF(out); \ \ r_ssize copy_size = (size > x_size) ? x_size : size; \ \ r_memcpy(p_out, p_x, copy_size * sizeof(CTYPE)); \ \ UNPROTECT(1); \ return out; \ } while (0) #define RESIZE_BARRIER(CONST_DEREF, SEXPTYPE, SET) do { \ if (x_size == size) { \ return x; \ } \ \ const SEXP* p_x = CONST_DEREF(x); \ \ SEXP out = PROTECT(Rf_allocVector(SEXPTYPE, size)); \ \ r_ssize copy_size = (size > x_size) ? x_size : size; \ \ for (r_ssize i = 0; i < copy_size; ++i) { \ SET(out, i, p_x[i]); \ } \ \ UNPROTECT(1); \ return out; \ } while (0) // Faster than `Rf_xlengthgets()` because that fills the new extended // locations with `NA`, which we don't need. // [[ include("utils.h") ]] SEXP int_resize(SEXP x, r_ssize x_size, r_ssize size) { RESIZE(INTEGER_RO, INTEGER, int, INTSXP); } // [[ include("utils.h") ]] SEXP raw_resize(SEXP x, r_ssize x_size, r_ssize size) { RESIZE(RAW_RO, RAW, Rbyte, RAWSXP); } // [[ include("utils.h") ]] SEXP chr_resize(SEXP x, r_ssize x_size, r_ssize size) { RESIZE_BARRIER(STRING_PTR_RO, STRSXP, SET_STRING_ELT); } #undef RESIZE #undef RESIZE_BARRIER inline void never_reached(const char* fn) { Rf_error("Internal error in `%s()`: Reached the unreachable.", fn); } static char s3_buf[200]; SEXP s3_paste_method_sym(const char* generic, const char* cls) { int gen_len = strlen(generic); int cls_len = strlen(cls); int dot_len = 1; if (gen_len + cls_len + dot_len >= sizeof(s3_buf)) { r_stop_internal("Generic or class name is too long."); } char* buf = s3_buf; r_memcpy(buf, generic, gen_len); buf += gen_len; *buf = '.'; ++buf; r_memcpy(buf, cls, cls_len); buf += cls_len; *buf = '\0'; return Rf_install(s3_buf); } // First check in global env, then in method table SEXP s3_get_method(const char* generic, const char* cls, SEXP table) { SEXP sym = s3_paste_method_sym(generic, cls); return s3_sym_get_method(sym, table); } SEXP s3_sym_get_method(SEXP sym, SEXP table) { // `r_env_get()` errors on missing bindings, // so we have to check with `r_env_has()` if (r_env_has(R_GlobalEnv, sym)) { SEXP method = r_env_get(R_GlobalEnv, sym); if (r_is_function(method)) { return method; } } if (r_env_has(table, sym)) { SEXP method = r_env_get(table, sym); if (r_is_function(method)) { return method; } } return R_NilValue; } // [[ register() ]] SEXP vctrs_s3_find_method(SEXP generic, SEXP x, SEXP table) { return s3_find_method(r_chr_get_c_string(generic, 0), x, table); } // [[ register() ]] r_obj* ffi_s3_get_method(r_obj* generic, r_obj* cls, r_obj* table) { if (!r_is_string(generic)) { r_stop_internal("`generic` must be a string"); } if (!r_is_string(cls)) { r_stop_internal("`cls` must be a string"); } return s3_get_method(r_chr_get_c_string(generic, 0), r_chr_get_c_string(cls, 0), table); } // [[ include("utils.h") ]] SEXP s3_find_method(const char* generic, SEXP x, SEXP table) { if (!r_is_object(x)) { return R_NilValue; } SEXP cls = PROTECT(Rf_getAttrib(x, R_ClassSymbol)); SEXP method = s3_class_find_method(generic, cls, table); UNPROTECT(1); return method; } // [[ include("utils.h") ]] SEXP s3_class_find_method(const char* generic, SEXP cls, SEXP table) { // Avoid corrupt objects where `x` passes `r_is_object()`, but the class is NULL if (cls == R_NilValue) { return R_NilValue; } SEXP const* p_cls = STRING_PTR_RO(cls); int n_cls = Rf_length(cls); for (int i = 0; i < n_cls; ++i) { SEXP method = s3_get_method(generic, CHAR(p_cls[i]), table); if (method != R_NilValue) { return method; } } return R_NilValue; } // [[ include("utils.h") ]] SEXP s3_get_class(SEXP x) { SEXP cls = R_NilValue; if (r_is_object(x)) { cls = Rf_getAttrib(x, R_ClassSymbol); } // This handles unclassed objects as well as gremlins objects where // `x` passes `r_is_object()`, but the class is NULL if (cls == R_NilValue) { cls = s3_bare_class(x); } if (!Rf_length(cls)) { r_stop_internal("Class must have length."); } return cls; } SEXP s3_get_class0(SEXP x) { SEXP cls = PROTECT(s3_get_class(x)); SEXP out = STRING_ELT(cls, 0); UNPROTECT(1); return out; } // [[ include("utils.h") ]] SEXP s3_find_method_xy(const char* generic, SEXP x, SEXP y, SEXP table, SEXP* method_sym_out) { SEXP x_class = PROTECT(s3_get_class0(x)); SEXP y_class = PROTECT(s3_get_class0(y)); SEXP method_sym = R_NilValue; method_sym = s3_paste_method_sym(generic, CHAR(x_class)); method_sym = s3_paste_method_sym(CHAR(PRINTNAME(method_sym)), CHAR(y_class)); SEXP method = s3_sym_get_method(method_sym, table); if (method == R_NilValue) { *method_sym_out = R_NilValue; } else { *method_sym_out = method_sym; } UNPROTECT(2); return method; } // [[ include("utils.h") ]] SEXP s3_find_method2(const char* generic, SEXP x, SEXP table, SEXP* method_sym_out) { SEXP cls = PROTECT(s3_get_class0(x)); SEXP method_sym = s3_paste_method_sym(generic, CHAR(cls)); SEXP method = s3_sym_get_method(method_sym, table); if (method == R_NilValue) { *method_sym_out = R_NilValue; } else { *method_sym_out = method_sym; } UNPROTECT(1); return method; } // [[ include("utils.h") ]] SEXP s3_bare_class(SEXP x) { switch (TYPEOF(x)) { case NILSXP: return chrs_null; case LGLSXP: return chrs_logical; case INTSXP: return chrs_integer; case REALSXP: return chrs_double; case CPLXSXP: return chrs_complex; case STRSXP: return chrs_character; case RAWSXP: return chrs_raw; case VECSXP: return chrs_list; case EXPRSXP: return chrs_expression; case CLOSXP: case SPECIALSXP: case BUILTINSXP: return chrs_function; default: stop_unimplemented_vctrs_type("base_dispatch_class_str", vec_typeof(x)); } } static SEXP s4_get_method(const char* cls, SEXP table) { SEXP sym = Rf_install(cls); // `r_env_get()` errors on missing bindings, // so we have to check with `r_env_has()` if (r_env_has(table, sym)) { SEXP method = r_env_get(table, sym); if (r_is_function(method)) { return method; } } return R_NilValue; } // For S4 objects, the `table` is specific to the generic SEXP s4_find_method(SEXP x, SEXP table) { if (!r_is_s4(x)) { return R_NilValue; } SEXP cls = PROTECT(Rf_getAttrib(x, R_ClassSymbol)); SEXP out = s4_class_find_method(cls, table); UNPROTECT(1); return out; } SEXP s4_class_find_method(SEXP cls, SEXP table) { // Avoid corrupt objects where `x` passes `r_is_object()`, but the class is NULL if (cls == R_NilValue) { return R_NilValue; } SEXP const* p_class = STRING_PTR_RO(cls); int n_class = Rf_length(cls); for (int i = 0; i < n_class; ++i) { SEXP method = s4_get_method(CHAR(p_class[i]), table); if (method != R_NilValue) { return method; } } return R_NilValue; } // [[ include("utils.h") ]] bool vec_implements_ptype2(SEXP x) { switch (vec_typeof(x)) { case VCTRS_TYPE_scalar: return false; case VCTRS_TYPE_s3: { SEXP method_sym = R_NilValue; SEXP method = s3_find_method_xy("vec_ptype2", x, x, vctrs_method_table, &method_sym); if (method != R_NilValue) { return true; } method = s3_find_method2("vec_ptype2", x, vctrs_method_table, &method_sym); return method != R_NilValue; } default: return true; } } // [[ register() ]] SEXP vctrs_implements_ptype2(SEXP x) { return r_lgl(vec_implements_ptype2(x)); } // [[ include("utils.h") ]] SEXP list_first_non_null(SEXP xs, R_len_t* non_null_i) { SEXP x = R_NilValue; R_len_t n = Rf_length(xs); R_len_t i = 0; for (; i < n; ++i) { x = VECTOR_ELT(xs, i); if (x != R_NilValue) { break; } } if (non_null_i) { *non_null_i = i; } return x; } // [[ include("utils.h") ]] SEXP node_compact_d(SEXP node) { SEXP handle = PROTECT(Rf_cons(R_NilValue, node)); SEXP prev = handle; while (node != R_NilValue) { if (CAR(node) == R_NilValue) { SETCDR(prev, CDR(node)); } else { prev = node; } node = CDR(node); } UNPROTECT(1); return CDR(handle); } // [[ include("utils.h") ]] SEXP new_empty_factor(SEXP levels) { if (TYPEOF(levels) != STRSXP) { r_stop_internal("`level` must be a character vector."); } SEXP out = PROTECT(Rf_allocVector(INTSXP, 0)); Rf_setAttrib(out, R_LevelsSymbol, levels); Rf_setAttrib(out, R_ClassSymbol, classes_factor); UNPROTECT(1); return out; } // [[ include("utils.h") ]] SEXP new_empty_ordered(SEXP levels) { SEXP out = PROTECT(Rf_allocVector(INTSXP, 0)); Rf_setAttrib(out, R_LevelsSymbol, levels); Rf_setAttrib(out, R_ClassSymbol, classes_ordered); UNPROTECT(1); return out; } // [[ include("utils.h") ]] bool list_has_inner_vec_names(SEXP x, R_len_t size) { for (R_len_t i = 0; i < size; ++i) { SEXP elt = VECTOR_ELT(x, i); if (vec_names(elt) != R_NilValue) { return true; } } return false; } /** * Pluck elements `i` from a list of lists. * @return A list of the same length as `xs`. */ // [[ include("utils.h") ]] r_obj* list_pluck(r_obj* xs, r_ssize i) { r_ssize n = r_length(xs); r_obj* const * v_xs = r_list_cbegin(xs); r_obj* out = KEEP(r_new_list(n)); for (r_ssize j = 0; j < n; ++j) { r_obj* x = v_xs[j]; if (x != r_null) { r_list_poke(out, j, r_list_get(x, i)); } } FREE(1); return out; } // Initialised at load time const char* compact_seq_class_string = NULL; SEXP compact_seq_classes = NULL; // p[0] = Start value // p[1] = Sequence size. Always >= 0. // p[2] = Step size to increment/decrement `start` with void init_compact_seq(int* p, R_len_t start, R_len_t size, bool increasing) { int step = increasing ? 1 : -1; p[0] = start; p[1] = size; p[2] = step; } // Exported for testing with `list_combine()` r_obj* ffi_compact_seq(r_obj* ffi_start, r_obj* ffi_size, r_obj* ffi_increasing) { return compact_seq( r_arg_as_ssize(ffi_start, "start"), r_arg_as_ssize(ffi_size, "size"), r_arg_as_bool(ffi_increasing, "increasing") ); } // Returns a compact sequence that `vec_slice()` understands // The sequence is generally generated as `[start, start +/- size)` // If `size == 0` a 0-length sequence is generated // `start` is 0-based SEXP compact_seq(R_len_t start, R_len_t size, bool increasing) { if (start < 0) { r_stop_internal("`start` must not be negative."); } if (size < 0) { r_stop_internal("`size` must not be negative."); } if (!increasing && size > start + 1) { r_stop_internal("`size` must not be larger than `start` for decreasing sequences."); } SEXP info = PROTECT(Rf_allocVector(INTSXP, 3)); int* p = INTEGER(info); init_compact_seq(p, start, size, increasing); r_attrib_poke_class(info, compact_seq_classes); UNPROTECT(1); return info; } bool is_compact_seq(SEXP x) { return r_inherits(x, compact_seq_class_string); } // Materialize a 1-based sequence SEXP compact_seq_materialize(SEXP x) { int* p = INTEGER(x); R_len_t start = p[0] + 1; R_len_t size = p[1]; R_len_t step = p[2]; SEXP out = PROTECT(Rf_allocVector(INTSXP, size)); int* out_data = INTEGER(out); for (R_len_t i = 0; i < size; ++i, ++out_data, start += step) { *out_data = start; } UNPROTECT(1); return out; } // Initialised at load time const char* compact_rep_class_string = NULL; SEXP compact_rep_classes = NULL; void init_compact_rep(int* p, R_len_t i, R_len_t n) { p[0] = i; p[1] = n; } // Returns a compact repetition that `vec_slice()` understands // `i` should be an R-based index SEXP compact_rep(R_len_t i, R_len_t n) { if (n < 0) { r_stop_internal("Negative `n` in `compact_rep()`."); } SEXP rep = PROTECT(Rf_allocVector(INTSXP, 2)); int* p = INTEGER(rep); init_compact_rep(p, i, n); r_attrib_poke_class(rep, compact_rep_classes); UNPROTECT(1); return rep; } bool is_compact_rep(SEXP x) { return r_inherits(x, compact_rep_class_string); } SEXP compact_rep_materialize(SEXP x) { int i = r_int_get(x, 0); int n = r_int_get(x, 1); SEXP out = PROTECT(Rf_allocVector(INTSXP, n)); r_int_fill(out, i, n); UNPROTECT(1); return out; } // Initialised at load time const char* compact_condition_class_string = NULL; SEXP compact_condition_classes = NULL; /** * Compact condition index * * A condition index usable with `VCTRS_INDEX_STYLE_condition` * that is backed by a RAWSXP bool array rather than a LGLSXP. * * Extremely useful when you only have `true` and `false` values * and you construct the index at the C level (like `default` * locations in `list_combine()`). * * Using a bool array is 4x less memory than a LGLSXP, and is * faster due to being able to load more of the array into a * single cache line. */ r_obj* new_compact_condition(R_xlen_t size) { if (size < 0) { r_stop_internal("Negative `size` in `compact_condition()`."); } r_obj* out = KEEP(r_alloc_raw(size * sizeof(bool))); r_attrib_poke_class(out, compact_condition_classes); FREE(1); return out; } bool is_compact_condition(r_obj* x) { return r_inherits(x, compact_condition_class_string); } r_ssize compact_condition_size(r_obj* x) { // Should always be the same as the length, but you never know return r_length(x) / sizeof(bool); } // Materializes as its corresponding logical index. // Maintains `index_style` of `VCTRS_INDEX_STYLE_condition`. r_obj* compact_condition_materialize(r_obj* x) { const bool* v_x = compact_condition_cbegin(x); const r_ssize size = compact_condition_size(x); r_obj* out = KEEP(r_alloc_logical(size)); int* v_out = r_lgl_begin(out); for (r_ssize i = 0; i < size; ++i) { v_out[i] = v_x[i]; } FREE(1); return out; } // Materializes as its corresponding `VCTRS_INDEX_STYLE_location` index r_obj* compact_condition_materialize_location(r_obj* x) { const bool* v_x = compact_condition_cbegin(x); const r_ssize size = compact_condition_size(x); return p_bool_which(v_x, size); } bool* compact_condition_begin(r_obj* x) { return (bool*) r_raw_begin(x); } const bool* compact_condition_cbegin(r_obj* x) { return (const bool*) r_raw_cbegin(x); } r_ssize compact_condition_sum(r_obj* x) { const bool* v_x = compact_condition_cbegin(x); const r_ssize size = compact_condition_size(x); return p_bool_sum(v_x, size); } // Exported for testing with `vec_assign_compact_condition()` r_obj* ffi_as_compact_condition(r_obj* x) { if (r_typeof(x) != R_TYPE_logical) { r_stop_internal("`x` must be a logical condition vector."); } const r_ssize size = r_length(x); const int* v_x = r_lgl_cbegin(x); r_obj* out = KEEP(new_compact_condition(size)); bool* v_out = compact_condition_begin(out); for (r_ssize i = 0; i < size; ++i) { const int elt = v_x[i]; if (elt == r_globals.na_int) { r_stop_internal("Can't use `NA` when creating a `compact_condition`."); } v_out[i] = elt; } FREE(1); return out; } // Materialize the subscript as its corresponding `index_style` // // - integer -> `VCTRS_INDEX_STYLE_location` // - compact_rep -> `VCTRS_INDEX_STYLE_location` // - compact_seq -> `VCTRS_INDEX_STYLE_location` // // - logical -> `VCTRS_INDEX_STYLE_condition` // - compact_condition -> `VCTRS_INDEX_STYLE_condition` SEXP vec_subscript_materialize(SEXP x) { if (is_compact_rep(x)) { return compact_rep_materialize(x); } else if (is_compact_seq(x)) { return compact_seq_materialize(x); } else if (is_compact_condition(x)) { return compact_condition_materialize(x); } else { return x; } } R_len_t vec_subscript_size(SEXP x) { if (is_compact_rep(x)) { return r_int_get(x, 1); } else if (is_compact_seq(x)) { return r_int_get(x, 1); } else if (is_compact_condition(x)) { return compact_condition_size(x); } else { return vec_size(x); } } r_ssize vec_condition_subscript_sum(r_obj* x, bool na_true) { if (is_compact_condition(x)) { return compact_condition_sum(x); } else { return r_lgl_sum(x, na_true); } } static SEXP syms_colnames = NULL; static SEXP fns_colnames = NULL; // [[ include("utils.h") ]] SEXP colnames(SEXP x) { return vctrs_dispatch1(syms_colnames, fns_colnames, syms_x, x); } r_obj* colnames2(r_obj* x) { r_obj* names = colnames(x); if (names == r_null) { return r_alloc_character(Rf_ncols(x)); } else { return names; } } // [[ include("utils.h") ]] bool is_integer64(SEXP x) { return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64"); } // [[ include("utils.h") ]] bool lgl_any_na(SEXP x) { R_xlen_t size = Rf_xlength(x); const int* p_x = LOGICAL_RO(x); for (R_xlen_t i = 0; i < size; ++i) { if (p_x[i] == NA_LOGICAL) { return true; } } return false; } void* r_vec_deref_barrier(SEXP x) { switch (TYPEOF(x)) { case STRSXP: case VECSXP: return (void*) x; default: return r_vec_begin(x); } } const void* r_vec_deref_barrier_const(SEXP x) { switch (TYPEOF(x)) { case STRSXP: case VECSXP: return (const void*) x; default: return r_vec_cbegin(x); } } #define FILL(CTYPE, DEST, DEST_I, SRC, SRC_I, N) \ do { \ CTYPE* p_dest = (CTYPE*) DEST; \ p_dest += DEST_I; \ CTYPE* end = p_dest + N; \ CTYPE value = ((const CTYPE*) SRC)[SRC_I]; \ \ while (p_dest != end) { \ *p_dest++ = value; \ } \ } while (false) #define FILL_BARRIER(GET, SET, DEST, DEST_I, SRC, SRC_I, N) \ do { \ SEXP out = (SEXP) DEST; \ SEXP value = GET((SEXP) SRC, SRC_I); \ \ for (r_ssize i = 0; i < N; ++i) { \ SET(out, DEST_I + i, value); \ } \ } while (false) void r_vec_fill(SEXPTYPE type, void* dest, r_ssize dest_i, const void* src, r_ssize src_i, r_ssize n) { switch (type) { case INTSXP: FILL(int, dest, dest_i, src, src_i, n); return; case STRSXP: FILL_BARRIER(STRING_ELT, SET_STRING_ELT, dest, dest_i, src, src_i, n); return; default: stop_unimplemented_type("r_vec_fill", type); } } #undef FILL_BARRIER #undef FILL #define FILL() { \ for (R_len_t i = 0; i < n; ++i) { \ p_x[i] = value; \ } \ } void r_p_lgl_fill(int* p_x, int value, R_len_t n) { FILL(); } void r_p_int_fill(int* p_x, int value, R_len_t n) { FILL(); } void r_p_chr_fill(SEXP* p_x, SEXP value, R_len_t n) { FILL(); } #undef FILL void r_lgl_fill(SEXP x, int value, R_len_t n) { r_p_lgl_fill(LOGICAL(x), value, n); } void r_int_fill(SEXP x, int value, R_len_t n) { r_p_int_fill(INTEGER(x), value, n); } void r_int_fill_seq(SEXP x, int start, R_len_t n) { int* data = INTEGER(x); for (R_len_t i = 0; i < n; ++i, ++data, ++start) { *data = start; } } SEXP r_seq(R_len_t from, R_len_t to) { R_len_t n = to - from; if (n < 0) { r_stop_internal("Negative length."); } SEXP seq = PROTECT(Rf_allocVector(INTSXP, n)); r_int_fill_seq(seq, from, n); UNPROTECT(1); return seq; } #define FIND(CTYPE, CONST_DEREF) \ R_len_t n = Rf_length(x); \ const CTYPE* data = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < n; ++i) { \ if (data[i] == value) { \ return i; \ } \ } \ return -1 R_len_t r_chr_find(SEXP x, SEXP value) { FIND(SEXP, STRING_PTR_RO); } #undef FIND bool r_int_any_na(SEXP x) { int* data = INTEGER(x); R_len_t n = Rf_length(x); for (R_len_t i = 0; i < n; ++i, ++data) { if (*data == NA_INTEGER) { return true; } } return false; } // Treats missing values as `false` bool r_lgl_any(r_obj* x) { if (r_typeof(x) != R_TYPE_logical) { r_stop_internal("`x` must be a logical vector."); } const int* v_x = r_lgl_cbegin(x); r_ssize size = r_length(x); for (r_ssize i = 0; i < size; ++i) { if (v_x[i] == 1) { return true; } } return false; } // Like `!x` at the R level r_obj* r_lgl_invert(r_obj* x) { const r_ssize size = r_length(x); const int* v_x = r_lgl_cbegin(x); r_obj* out = KEEP(r_alloc_logical(size)); int* v_out = r_lgl_begin(out); for (r_ssize i = 0; i < size; ++i) { const int elt = v_x[i]; v_out[i] = (elt == r_globals.na_lgl) ? r_globals.na_lgl : !elt; } FREE(1); return out; } int r_chr_max_len(SEXP x) { R_len_t n = Rf_length(x); SEXP const* p = STRING_PTR_RO(x); int max = 0; for (R_len_t i = 0; i < n; ++i, ++p) { int len = strlen(CHAR(*p)); max = len > max ? len : max; } return max; } /** * Create a character vector of sequential integers * * @param n The sequence is from 1 to `n`. * @param buf,len A memory buffer of size `len`. * @param prefix A null-terminated string that is prefixed to the * sequence. */ SEXP r_chr_iota(R_len_t n, char* buf, int len, const char* prefix) { int prefix_len = strlen(prefix); if (len - 1 < prefix_len) { r_stop_internal("Prefix is larger than iota buffer."); } r_memcpy(buf, prefix, prefix_len); len -= prefix_len; char* beg = buf + prefix_len; SEXP out = PROTECT(Rf_allocVector(STRSXP, n)); for (R_len_t i = 0; i < n; ++i) { int written = snprintf(beg, len, "%d", i + 1); if (written >= len) { UNPROTECT(1); return R_NilValue; } SET_STRING_ELT(out, i, Rf_mkChar(buf)); } UNPROTECT(1); return out; } static SEXP new_env_call = NULL; static SEXP new_env__parent_node = NULL; static SEXP new_env__size_node = NULL; // [[ include("utils.h") ]] SEXP r_protect(SEXP x) { return Rf_lang2(fns_quote, x); } // [[ include("utils.h") ]] int r_bool_as_int(SEXP x) { if (!r_is_bool(x)) { Rf_errorcall(R_NilValue, "Input must be a single `TRUE` or `FALSE`."); } return LOGICAL(x)[0]; } bool r_is_number(SEXP x) { return TYPEOF(x) == INTSXP && Rf_length(x) == 1 && INTEGER(x)[0] != NA_INTEGER; } bool r_is_positive_number(SEXP x) { return r_is_number(x) && INTEGER(x)[0] > 0; } /** * Create a call or pairlist * * @param tags Optional. If not `NULL`, a null-terminated array of symbols. * @param cars Mandatory. A null-terminated array of CAR values. * @param fn The first CAR value of the language list. * * [[ include("utils.h") ]] */ SEXP _r_pairlist(SEXP* tags, SEXP* cars) { if (!cars) { r_stop_internal("NULL `cars`."); } SEXP list = PROTECT(Rf_cons(R_NilValue, R_NilValue)); SEXP node = list; while (*cars) { SEXP next_node = Rf_cons(*cars, R_NilValue); SETCDR(node, next_node); node = next_node; if (tags) { SET_TAG(next_node, *tags); ++tags; } ++cars; } UNPROTECT(1); return CDR(list); } SEXP r_call_n(SEXP fn, SEXP* tags, SEXP* cars) { return Rf_lcons(fn, _r_pairlist(tags, cars)); } bool r_has_name_at(SEXP names, R_len_t i) { if (TYPEOF(names) != STRSXP) { return false; } R_len_t n = Rf_length(names); if (n <= i) { r_stop_internal("Names shorter than expected: (%d/%d).", i + 1, n); } SEXP elt = STRING_ELT(names, i); return elt != NA_STRING && elt != strings_empty; } bool r_is_minimal_names(SEXP x) { if (TYPEOF(x) != STRSXP) { return false; } R_len_t n = Rf_length(x); const SEXP* p = STRING_PTR_RO(x); for (R_len_t i = 0; i < n; ++i, ++p) { SEXP elt = *p; if (elt == NA_STRING || elt == strings_empty) { return false; } } return true; } bool r_is_empty_names(SEXP x) { if (TYPEOF(x) != STRSXP) { if (x == R_NilValue) { return true; } else { return false; } } R_len_t n = Rf_length(x); const SEXP* p = STRING_PTR_RO(x); for (R_len_t i = 0; i < n; ++i, ++p) { SEXP elt = *p; if (elt != NA_STRING && elt != strings_empty) { return false; } } return true; } SEXP r_clone_referenced(SEXP x) { if (MAYBE_REFERENCED(x)) { return Rf_shallow_duplicate(x); } else { return x; } } bool r_is_names(SEXP names) { if (names == R_NilValue) { return false; } R_len_t n = Rf_length(names); const SEXP* p = STRING_PTR_RO(names); for (R_len_t i = 0; i < n; ++i, ++p) { SEXP nm = *p; if (nm == strings_empty || nm == NA_STRING) { return false; } } return true; } bool r_chr_has_string(SEXP x, SEXP str) { R_len_t n = Rf_length(x); const SEXP* xp = STRING_PTR_RO(x); for (R_len_t i = 0; i < n; ++i, ++xp) { if (*xp == str) { return true; } } return false; } SEXP r_as_data_frame(SEXP x) { if (is_bare_data_frame(x)) { return x; } else { return vctrs_dispatch1(syms_as_data_frame2, fns_as_data_frame2, syms_x, x); } } static SEXP syms_try_catch_hnd = NULL; static inline SEXP try_catch_hnd(SEXP ptr) { SEXP call = PROTECT(Rf_lang2(syms_try_catch_hnd, ptr)); SEXP out = Rf_eval(call, vctrs_ns_env); UNPROTECT(1); return out; } struct r_try_catch_data { void (*fn)(void*); void* fn_data; SEXP cnd_sym; void (*hnd)(void*); void* hnd_data; ERR err; }; // [[ register() ]] SEXP vctrs_try_catch_callback(SEXP ptr, SEXP cnd) { struct r_try_catch_data* data = (struct r_try_catch_data*) R_ExternalPtrAddr(ptr); if (cnd == R_NilValue) { if (data->fn) { data->fn(data->fn_data); } } else { data->err = cnd; if (data->hnd) { data->hnd(data->hnd_data); } } return R_NilValue; } static SEXP syms_try_catch_impl = NULL; // [[ include("utils.h") ]] ERR r_try_catch(void (*fn)(void*), void* fn_data, SEXP cnd_sym, void (*hnd)(void*), void* hnd_data) { struct r_try_catch_data data = { .fn = fn, .fn_data = fn_data, .cnd_sym = cnd_sym, .hnd = hnd, .hnd_data = hnd_data, .err = NULL }; SEXP xptr = PROTECT(R_MakeExternalPtr(&data, R_NilValue, R_NilValue)); SEXP hnd_fn = PROTECT(try_catch_hnd(xptr)); SEXP syms[3] = { syms_data, cnd_sym, NULL }; SEXP args[3] = { xptr, hnd_fn, NULL }; SEXP call = PROTECT(r_call_n(syms_try_catch_impl, syms, args)); Rf_eval(call, vctrs_ns_env); UNPROTECT(3); return data.err; } SEXP (*rlang_sym_as_character)(SEXP x); // [[ include("utils.h") ]] SEXP chr_c(SEXP x, SEXP y) { r_ssize x_n = r_length(x); r_ssize y_n = r_length(y); if (x_n == 0) { return y; } if (y_n == 0) { return x; } r_ssize out_n = r_ssize_add(x_n, y_n); SEXP out = PROTECT(r_alloc_vector(STRSXP, out_n)); const SEXP* p_x = STRING_PTR_RO(x); const SEXP* p_y = STRING_PTR_RO(y); for (r_ssize i = 0; i < x_n; ++i) { SET_STRING_ELT(out, i, p_x[i]); } for (r_ssize i = 0, j = x_n; i < y_n; ++i, ++j) { SET_STRING_ELT(out, j, p_y[i]); } UNPROTECT(1); return out; } // [[ register() ]] SEXP vctrs_fast_c(SEXP x, SEXP y) { SEXPTYPE x_type = TYPEOF(x); if (x_type != TYPEOF(y)) { Rf_error("`x` and `y` must have the same types."); } switch (x_type) { case STRSXP: return chr_c(x, y); default: stop_unimplemented_type("vctrs_fast_c", x_type); } } bool vctrs_debug_verbose = false; SEXP vctrs_ns_env = NULL; SEXP vctrs_shared_empty_str = NULL; SEXP vctrs_shared_empty_date = NULL; Rcomplex vctrs_shared_na_cpl; SEXP vctrs_shared_missing_lgl = NULL; SEXP vctrs_shared_missing_int = NULL; SEXP vctrs_shared_missing_dbl = NULL; SEXP vctrs_shared_missing_cpl = NULL; SEXP vctrs_shared_missing_raw = NULL; SEXP vctrs_shared_missing_chr = NULL; SEXP vctrs_shared_missing_list = NULL; SEXP vctrs_shared_zero_int = NULL; SEXP strings2 = NULL; SEXP strings_empty = NULL; SEXP strings_dots = NULL; SEXP strings_none = NULL; SEXP strings_minimal = NULL; SEXP strings_unique = NULL; SEXP strings_universal = NULL; SEXP strings_check_unique = NULL; SEXP strings_unique_quiet = NULL; SEXP strings_universal_quiet = NULL; SEXP strings_key = NULL; SEXP strings_loc = NULL; SEXP strings_val = NULL; SEXP strings_group = NULL; SEXP strings_length = NULL; SEXP strings_vctrs_vctr = NULL; SEXP strings_times = NULL; SEXP strings_needles = NULL; SEXP strings_haystack = NULL; SEXP chrs_subset = NULL; SEXP chrs_extract = NULL; SEXP chrs_assign = NULL; SEXP chrs_rename = NULL; SEXP chrs_remove = NULL; SEXP chrs_negate = NULL; SEXP chrs_null = NULL; SEXP chrs_logical = NULL; SEXP chrs_integer = NULL; SEXP chrs_double = NULL; SEXP chrs_complex = NULL; SEXP chrs_character = NULL; SEXP chrs_raw = NULL; SEXP chrs_list = NULL; SEXP chrs_expression = NULL; SEXP chrs_numeric = NULL; SEXP chrs_function = NULL; SEXP chrs_empty = NULL; SEXP chrs_cast = NULL; SEXP chrs_error = NULL; SEXP chrs_combine = NULL; SEXP chrs_convert = NULL; SEXP chrs_asc = NULL; SEXP chrs_desc = NULL; SEXP chrs_largest = NULL; SEXP chrs_smallest = NULL; SEXP chrs_which = NULL; SEXP syms_i = NULL; SEXP syms_j = NULL; SEXP syms_n = NULL; SEXP syms_x = NULL; SEXP syms_y = NULL; SEXP syms_x_size = NULL; SEXP syms_y_size = NULL; SEXP syms_to = NULL; SEXP syms_dots = NULL; SEXP syms_bracket = NULL; SEXP syms_x_arg = NULL; SEXP syms_y_arg = NULL; SEXP syms_to_arg = NULL; SEXP syms_times_arg = NULL; SEXP syms_subscript_arg = NULL; SEXP syms_needles_arg = NULL; SEXP syms_haystack_arg = NULL; SEXP syms_out = NULL; SEXP syms_value = NULL; SEXP syms_quiet = NULL; SEXP syms_dot_name_spec = NULL; SEXP syms_outer = NULL; SEXP syms_inner = NULL; SEXP syms_tilde = NULL; SEXP syms_dot_environment = NULL; SEXP syms_ptype = NULL; SEXP syms_missing = NULL; SEXP syms_size = NULL; SEXP syms_subscript_action = NULL; SEXP syms_subscript_type = NULL; SEXP syms_repair = NULL; SEXP syms_tzone = NULL; SEXP syms_data = NULL; SEXP syms_vctrs_error_incompatible_type = NULL; SEXP syms_vctrs_error_cast_lossy = NULL; SEXP syms_cnd_signal = NULL; SEXP syms_logical = NULL; SEXP syms_numeric = NULL; SEXP syms_character = NULL; SEXP syms_body = NULL; SEXP syms_parent = NULL; SEXP syms_s3_methods_table = NULL; SEXP syms_from_dispatch = NULL; SEXP syms_df_fallback = NULL; SEXP syms_s3_fallback = NULL; SEXP syms_stop_incompatible_type = NULL; SEXP syms_stop_incompatible_size = NULL; SEXP syms_stop_assert_size = NULL; SEXP syms_stop_matches_overflow = NULL; SEXP syms_stop_matches_nothing = NULL; SEXP syms_stop_matches_remaining = NULL; SEXP syms_stop_matches_incomplete = NULL; SEXP syms_stop_matches_multiple = NULL; SEXP syms_warn_matches_multiple = NULL; SEXP syms_stop_matches_relationship_one_to_one = NULL; SEXP syms_stop_matches_relationship_one_to_many = NULL; SEXP syms_stop_matches_relationship_many_to_one = NULL; SEXP syms_warn_matches_relationship_many_to_many = NULL; SEXP syms_stop_combine_unmatched = NULL; SEXP syms_action = NULL; SEXP syms_vctrs_common_class_fallback = NULL; SEXP syms_fallback_class = NULL; SEXP syms_abort = NULL; SEXP syms_message = NULL; SEXP syms_chr_proxy_collate = NULL; SEXP syms_actual = NULL; SEXP syms_required = NULL; SEXP syms_call = NULL; SEXP syms_dot_call = NULL; SEXP syms_which = NULL; SEXP syms_slice_value = NULL; SEXP syms_index_style = NULL; SEXP syms_loc = NULL; SEXP fns_bracket = NULL; SEXP fns_quote = NULL; SEXP fns_names = NULL; SEXP rlang_result_names = NULL; SEXP rlang_result_class = NULL; SEXP r_new_shared_vector(SEXPTYPE type, R_len_t n) { SEXP out = Rf_allocVector(type, n); R_PreserveObject(out); MARK_NOT_MUTABLE(out); return out; } SEXP r_new_shared_character(const char* name) { SEXP out = Rf_mkString(name); R_PreserveObject(out); MARK_NOT_MUTABLE(out); return out; } void c_print_backtrace(void) { #if defined(RLIB_DEBUG) #include #include void *buffer[500]; int nptrs = backtrace(buffer, 100); char **strings = backtrace_symbols(buffer, nptrs); for (int j = 0; j < nptrs; ++j) { Rprintf("%s\n", strings[j]); } free(strings); #else Rprintf("vctrs must be compliled with -DRLIB_DEBUG."); #endif } void vctrs_init_utils(SEXP ns) { vctrs_ns_env = ns; vctrs_debug_verbose = r_is_true(Rf_GetOption1(Rf_install("vctrs:::debug"))); vctrs_method_table = r_env_get(ns, Rf_install(".__S3MethodsTable__.")); base_method_table = r_env_get(R_BaseNamespace, Rf_install(".__S3MethodsTable__.")); s4_c_method_table = r_parse_eval("environment(methods::getGeneric('c'))$.MTable", R_GlobalEnv); R_PreserveObject(s4_c_method_table); vctrs_shared_empty_str = Rf_mkString(""); R_PreserveObject(vctrs_shared_empty_str); // Holds the CHARSXP objects because unlike symbols they can be // garbage collected strings2 = r_new_shared_vector(STRSXP, 25); strings_dots = Rf_mkChar("..."); SET_STRING_ELT(strings2, 0, strings_dots); strings_empty = Rf_mkChar(""); SET_STRING_ELT(strings2, 1, strings_empty); strings_date = Rf_mkChar("Date"); SET_STRING_ELT(strings2, 2, strings_date); strings_posixct = Rf_mkChar("POSIXct"); SET_STRING_ELT(strings2, 3, strings_posixct); strings_posixlt = Rf_mkChar("POSIXlt"); SET_STRING_ELT(strings2, 4, strings_posixlt); strings_posixt = Rf_mkChar("POSIXt"); SET_STRING_ELT(strings2, 5, strings_posixt); strings_none = Rf_mkChar("none"); SET_STRING_ELT(strings2, 6, strings_none); strings_minimal = Rf_mkChar("minimal"); SET_STRING_ELT(strings2, 7, strings_minimal); strings_unique = Rf_mkChar("unique"); SET_STRING_ELT(strings2, 8, strings_unique); strings_universal = Rf_mkChar("universal"); SET_STRING_ELT(strings2, 9, strings_universal); strings_check_unique = Rf_mkChar("check_unique"); SET_STRING_ELT(strings2, 10, strings_check_unique); strings_unique_quiet = Rf_mkChar("unique_quiet"); SET_STRING_ELT(strings2, 23, strings_unique_quiet); strings_universal_quiet = Rf_mkChar("universal_quiet"); SET_STRING_ELT(strings2, 24, strings_universal_quiet); strings_key = Rf_mkChar("key"); SET_STRING_ELT(strings2, 11, strings_key); strings_loc = Rf_mkChar("loc"); SET_STRING_ELT(strings2, 12, strings_loc); strings_val = Rf_mkChar("val"); SET_STRING_ELT(strings2, 13, strings_val); strings_group = Rf_mkChar("group"); SET_STRING_ELT(strings2, 14, strings_group); strings_length = Rf_mkChar("length"); SET_STRING_ELT(strings2, 15, strings_length); strings_factor = Rf_mkChar("factor"); SET_STRING_ELT(strings2, 16, strings_factor); strings_ordered = Rf_mkChar("ordered"); SET_STRING_ELT(strings2, 17, strings_ordered); strings_list = Rf_mkChar("list"); SET_STRING_ELT(strings2, 18, strings_list); strings_vctrs_vctr = Rf_mkChar("vctrs_vctr"); SET_STRING_ELT(strings2, 19, strings_vctrs_vctr); strings_times = Rf_mkChar("times"); SET_STRING_ELT(strings2, 20, strings_times); strings_needles = Rf_mkChar("needles"); SET_STRING_ELT(strings2, 21, strings_needles); strings_haystack = Rf_mkChar("haystack"); SET_STRING_ELT(strings2, 22, strings_haystack); classes_data_frame = r_new_shared_vector(STRSXP, 1); strings_data_frame = Rf_mkChar("data.frame"); SET_STRING_ELT(classes_data_frame, 0, strings_data_frame); classes_factor = r_new_shared_vector(STRSXP, 1); SET_STRING_ELT(classes_factor, 0, strings_factor); classes_ordered = r_new_shared_vector(STRSXP, 2); SET_STRING_ELT(classes_ordered, 0, strings_ordered); SET_STRING_ELT(classes_ordered, 1, strings_factor); classes_date = r_new_shared_vector(STRSXP, 1); SET_STRING_ELT(classes_date, 0, strings_date); classes_posixct = r_new_shared_vector(STRSXP, 2); SET_STRING_ELT(classes_posixct, 0, strings_posixct); SET_STRING_ELT(classes_posixct, 1, strings_posixt); chrs_subset = r_new_shared_character("subset"); chrs_extract = r_new_shared_character("extract"); chrs_assign = r_new_shared_character("assign"); chrs_rename = r_new_shared_character("rename"); chrs_remove = r_new_shared_character("remove"); chrs_negate = r_new_shared_character("negate"); chrs_null = r_new_shared_character("NULL"); chrs_logical = r_new_shared_character("logical"); chrs_integer = r_new_shared_character("integer"); chrs_double = r_new_shared_character("double"); chrs_complex = r_new_shared_character("complex"); chrs_character = r_new_shared_character("character"); chrs_raw = r_new_shared_character("raw"); chrs_list = r_new_shared_character("list"); chrs_expression = r_new_shared_character("expression"); chrs_numeric = r_new_shared_character("numeric"); chrs_function = r_new_shared_character("function"); chrs_empty = r_new_shared_character(""); chrs_cast = r_new_shared_character("cast"); chrs_error = r_new_shared_character("error"); chrs_combine = r_new_shared_character("combine"); chrs_convert = r_new_shared_character("convert"); chrs_asc = r_new_shared_character("asc"); chrs_desc = r_new_shared_character("desc"); chrs_largest = r_new_shared_character("largest"); chrs_smallest = r_new_shared_character("smallest"); chrs_which = r_new_shared_character("which"); classes_tibble = r_new_shared_vector(STRSXP, 3); strings_tbl_df = Rf_mkChar("tbl_df"); SET_STRING_ELT(classes_tibble, 0, strings_tbl_df); strings_tbl = Rf_mkChar("tbl"); SET_STRING_ELT(classes_tibble, 1, strings_tbl); SET_STRING_ELT(classes_tibble, 2, strings_data_frame); classes_vctrs_group_rle = r_new_shared_vector(STRSXP, 3); SET_STRING_ELT(classes_vctrs_group_rle, 0, Rf_mkChar("vctrs_group_rle")); SET_STRING_ELT(classes_vctrs_group_rle, 1, Rf_mkChar("vctrs_rcrd")); SET_STRING_ELT(classes_vctrs_group_rle, 2, Rf_mkChar("vctrs_vctr")); vctrs_shared_empty_date = r_new_shared_vector(REALSXP, 0); Rf_setAttrib(vctrs_shared_empty_date, R_ClassSymbol, classes_date); vctrs_shared_na_cpl.i = NA_REAL; vctrs_shared_na_cpl.r = NA_REAL; vctrs_shared_missing_lgl = r_new_shared_vector(LGLSXP, 1); LOGICAL(vctrs_shared_missing_lgl)[0] = NA_LOGICAL; vctrs_shared_missing_int = r_new_shared_vector(INTSXP, 1); INTEGER(vctrs_shared_missing_int)[0] = NA_INTEGER; vctrs_shared_missing_dbl = r_new_shared_vector(REALSXP, 1); REAL(vctrs_shared_missing_dbl)[0] = NA_REAL; vctrs_shared_missing_cpl = r_new_shared_vector(CPLXSXP, 1); COMPLEX(vctrs_shared_missing_cpl)[0] = vctrs_shared_na_cpl; // No actual `NA` value for raw, but we always use `0` vctrs_shared_missing_raw = r_new_shared_vector(RAWSXP, 1); RAW(vctrs_shared_missing_raw)[0] = 0; vctrs_shared_missing_chr = r_new_shared_vector(STRSXP, 1); SET_STRING_ELT(vctrs_shared_missing_chr, 0, NA_STRING); vctrs_shared_missing_list = r_new_shared_vector(VECSXP, 1); SET_VECTOR_ELT(vctrs_shared_missing_list, 0, R_NilValue); vctrs_shared_zero_int = r_new_shared_vector(INTSXP, 1); INTEGER(vctrs_shared_zero_int)[0] = 0; syms_i = Rf_install("i"); syms_j = Rf_install("j"); syms_n = Rf_install("n"); syms_x = Rf_install("x"); syms_y = Rf_install("y"); syms_x_size = Rf_install("x_size"); syms_y_size = Rf_install("y_size"); syms_to = Rf_install("to"); syms_dots = Rf_install("..."); syms_bracket = Rf_install("["); syms_x_arg = Rf_install("x_arg"); syms_y_arg = Rf_install("y_arg"); syms_to_arg = Rf_install("to_arg"); syms_times_arg = Rf_install("times_arg"); syms_subscript_arg = Rf_install("subscript_arg"); syms_needles_arg = Rf_install("needles_arg"); syms_haystack_arg = Rf_install("haystack_arg"); syms_out = Rf_install("out"); syms_value = Rf_install("value"); syms_quiet = Rf_install("quiet"); syms_dot_name_spec = Rf_install(".name_spec"); syms_outer = Rf_install("outer"); syms_inner = Rf_install("inner"); syms_tilde = Rf_install("~"); syms_dot_environment = Rf_install(".Environment"); syms_ptype = Rf_install("ptype"); syms_missing = R_MissingArg; syms_size = Rf_install("size"); syms_subscript_action = Rf_install("subscript_action"); syms_subscript_type = Rf_install("subscript_type"); syms_repair = Rf_install("repair"); syms_tzone = Rf_install("tzone"); syms_data = Rf_install("data"); syms_try_catch_impl = Rf_install("try_catch_impl"); syms_try_catch_hnd = Rf_install("try_catch_hnd"); syms_vctrs_error_incompatible_type = Rf_install("vctrs_error_incompatible_type"); syms_vctrs_error_cast_lossy = Rf_install("vctrs_error_cast_lossy"); syms_cnd_signal = Rf_install("cnd_signal"); syms_logical = Rf_install("logical"); syms_numeric = Rf_install("numeric"); syms_character = Rf_install("character"); syms_body = Rf_install("body"); syms_parent = Rf_install("parent"); syms_s3_methods_table = Rf_install(".__S3MethodsTable__."); syms_from_dispatch = Rf_install("vctrs:::from_dispatch"); syms_s3_fallback = Rf_install("vctrs:::s3_fallback"); syms_stop_incompatible_type = Rf_install("stop_incompatible_type"); syms_stop_incompatible_size = Rf_install("stop_incompatible_size"); syms_stop_assert_size = Rf_install("stop_assert_size"); syms_stop_matches_overflow = Rf_install("stop_matches_overflow"); syms_stop_matches_nothing = Rf_install("stop_matches_nothing"); syms_stop_matches_remaining = Rf_install("stop_matches_remaining"); syms_stop_matches_incomplete = Rf_install("stop_matches_incomplete"); syms_stop_matches_multiple = Rf_install("stop_matches_multiple"); syms_warn_matches_multiple = Rf_install("warn_matches_multiple"); syms_stop_matches_relationship_one_to_one = Rf_install("stop_matches_relationship_one_to_one"); syms_stop_matches_relationship_one_to_many = Rf_install("stop_matches_relationship_one_to_many"); syms_stop_matches_relationship_many_to_one = Rf_install("stop_matches_relationship_many_to_one"); syms_warn_matches_relationship_many_to_many = Rf_install("warn_matches_relationship_many_to_many"); syms_stop_combine_unmatched = Rf_install("stop_combine_unmatched"); syms_action = Rf_install("action"); syms_vctrs_common_class_fallback = Rf_install(c_strs_vctrs_common_class_fallback); syms_fallback_class = Rf_install("fallback_class"); syms_abort = Rf_install("abort"); syms_message = Rf_install("message"); syms_chr_proxy_collate = Rf_install("chr_proxy_collate"); syms_actual = Rf_install("actual"); syms_required = Rf_install("required"); syms_call = Rf_install("call"); syms_dot_call = Rf_install(".call"); syms_which = Rf_install("which"); syms_slice_value = Rf_install("slice_value"); syms_index_style = Rf_install("index_style"); syms_loc = Rf_install("loc"); fns_bracket = r_env_get(R_BaseEnv, syms_bracket); fns_quote = r_env_get(R_BaseEnv, Rf_install("quote")); fns_names = r_env_get(R_BaseEnv, Rf_install("names")); new_env_call = r_parse_eval("as.call(list(new.env, TRUE, NULL, NULL))", R_BaseEnv); R_PreserveObject(new_env_call); new_env__parent_node = CDDR(new_env_call); new_env__size_node = CDR(new_env__parent_node); rlang_sym_as_character = (SEXP (*)(SEXP)) R_GetCCallable("rlang", "rlang_sym_as_character"); syms_as_data_frame2 = Rf_install("as.data.frame2"); syms_colnames = Rf_install("colnames"); fns_as_data_frame2 = r_env_get(ns, syms_as_data_frame2); fns_colnames = r_env_get(R_BaseEnv, syms_colnames); compact_seq_class_string = "vctrs_compact_seq"; compact_seq_classes = Rf_allocVector(STRSXP, 1); R_PreserveObject(compact_seq_classes); SET_STRING_ELT(compact_seq_classes, 0, Rf_mkChar(compact_seq_class_string)); compact_rep_class_string = "vctrs_compact_rep"; compact_rep_classes = Rf_allocVector(STRSXP, 1); R_PreserveObject(compact_rep_classes); SET_STRING_ELT(compact_rep_classes, 0, Rf_mkChar(compact_rep_class_string)); compact_condition_class_string = "vctrs_compact_condition"; compact_condition_classes = Rf_allocVector(STRSXP, 1); R_PreserveObject(compact_condition_classes); SET_STRING_ELT(compact_condition_classes, 0, Rf_mkChar(compact_condition_class_string)); rlang_result_names = Rf_allocVector(STRSXP, 2); R_PreserveObject(rlang_result_names); SET_STRING_ELT(rlang_result_names, 0, Rf_mkChar("ok")); SET_STRING_ELT(rlang_result_names, 1, Rf_mkChar("err")); rlang_result_class = Rf_allocVector(STRSXP, 1); R_PreserveObject(rlang_result_class); SET_STRING_ELT(rlang_result_class, 0, Rf_mkChar("rlang_result")); // We assume the following in `union vctrs_dbl_indicator` VCTRS_ASSERT(sizeof(double) == sizeof(int64_t)); VCTRS_ASSERT(sizeof(double) == 2 * sizeof(int)); // We assume the following in `vec_order()` VCTRS_ASSERT(sizeof(int) == sizeof(int32_t)); VCTRS_ASSERT(sizeof(double) == sizeof(int64_t)); } vctrs/src/typeof2.c0000644000176200001440000004620314315060310013734 0ustar liggesusers#include "vctrs.h" /** * Type for symmetric binary dispatch. * * Permuting `x` and `y` does not change the typeof2. * * After adding entries in `vec_typeof2()`, adjust the list of types * in helper-types.R. This will ensure the consistency of the new * entries. */ /** * [[ include("utils.h") ]] * * @param left Output parameter. Set to 1 when the common type comes * from the left, 0 when it comes from the right, and -1 when it * comes from both sides. This means that "left" is the default * when coerced to a boolean value. */ enum vctrs_type2 vec_typeof2_impl(enum vctrs_type type_x, enum vctrs_type type_y, int* left) { switch (type_x) { case VCTRS_TYPE_null: { switch (type_y) { case VCTRS_TYPE_null: *left = -1; return VCTRS_TYPE2_null_null; case VCTRS_TYPE_unspecified: *left = 0; return VCTRS_TYPE2_null_unspecified; case VCTRS_TYPE_logical: *left = 0; return VCTRS_TYPE2_null_logical; case VCTRS_TYPE_integer: *left = 0; return VCTRS_TYPE2_null_integer; case VCTRS_TYPE_double: *left = 0; return VCTRS_TYPE2_null_double; case VCTRS_TYPE_complex: *left = 0; return VCTRS_TYPE2_null_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_null_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_null_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_null_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_null_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_null_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_null_scalar; } } case VCTRS_TYPE_unspecified: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_unspecified; case VCTRS_TYPE_unspecified: *left = -1; return VCTRS_TYPE2_unspecified_unspecified; case VCTRS_TYPE_logical: *left = 0; return VCTRS_TYPE2_unspecified_logical; case VCTRS_TYPE_integer: *left = 0; return VCTRS_TYPE2_unspecified_integer; case VCTRS_TYPE_double: *left = 0; return VCTRS_TYPE2_unspecified_double; case VCTRS_TYPE_complex: *left = 0; return VCTRS_TYPE2_unspecified_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_unspecified_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_unspecified_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_unspecified_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_unspecified_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_unspecified_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_unspecified_scalar; } } case VCTRS_TYPE_logical: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_logical; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_logical; case VCTRS_TYPE_logical: *left = -1; return VCTRS_TYPE2_logical_logical; case VCTRS_TYPE_integer: *left = 0; return VCTRS_TYPE2_logical_integer; case VCTRS_TYPE_double: *left = 0; return VCTRS_TYPE2_logical_double; case VCTRS_TYPE_complex: *left = 0; return VCTRS_TYPE2_logical_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_logical_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_logical_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_logical_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_logical_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_logical_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_logical_scalar; } } case VCTRS_TYPE_integer: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_integer; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_integer; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_integer; case VCTRS_TYPE_integer: *left = -1; return VCTRS_TYPE2_integer_integer; case VCTRS_TYPE_double: *left = 0; return VCTRS_TYPE2_integer_double; case VCTRS_TYPE_complex: *left = 0; return VCTRS_TYPE2_integer_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_integer_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_integer_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_integer_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_integer_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_integer_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_integer_scalar; } } case VCTRS_TYPE_double: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_double; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_double; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_double; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_double; case VCTRS_TYPE_double: *left = -1; return VCTRS_TYPE2_double_double; case VCTRS_TYPE_complex: *left = 0; return VCTRS_TYPE2_double_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_double_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_double_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_double_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_double_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_double_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_double_scalar; } } case VCTRS_TYPE_complex: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_complex; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_complex; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_complex; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_complex; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_complex; case VCTRS_TYPE_complex: *left = -1; return VCTRS_TYPE2_complex_complex; case VCTRS_TYPE_character: *left = 0; return VCTRS_TYPE2_complex_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_complex_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_complex_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_complex_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_complex_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_complex_scalar; } } case VCTRS_TYPE_character: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_character; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_character; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_character; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_character; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_character; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_character; case VCTRS_TYPE_character: *left = -1; return VCTRS_TYPE2_character_character; case VCTRS_TYPE_raw: *left = 0; return VCTRS_TYPE2_character_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_character_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_character_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_character_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_character_scalar; } } case VCTRS_TYPE_raw: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_raw; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_raw; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_raw; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_raw; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_raw; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_raw; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_character_raw; case VCTRS_TYPE_raw: *left = -1; return VCTRS_TYPE2_raw_raw; case VCTRS_TYPE_list: *left = 0; return VCTRS_TYPE2_raw_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_raw_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_raw_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_raw_scalar; } } case VCTRS_TYPE_list: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_list; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_list; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_list; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_list; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_list; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_list; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_character_list; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_raw_list; case VCTRS_TYPE_list: *left = -1; return VCTRS_TYPE2_list_list; case VCTRS_TYPE_dataframe: *left = 0; return VCTRS_TYPE2_list_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_list_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_list_scalar; } } case VCTRS_TYPE_dataframe: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_dataframe; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_dataframe; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_dataframe; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_dataframe; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_dataframe; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_dataframe; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_character_dataframe; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_raw_dataframe; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_list_dataframe; case VCTRS_TYPE_dataframe: *left = -1; return VCTRS_TYPE2_dataframe_dataframe; case VCTRS_TYPE_s3: *left = 0; return VCTRS_TYPE2_dataframe_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_dataframe_scalar; } } case VCTRS_TYPE_s3: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_s3; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_s3; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_s3; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_s3; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_s3; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_s3; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_character_s3; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_raw_s3; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_list_s3; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_dataframe_s3; case VCTRS_TYPE_s3: *left = -1; return VCTRS_TYPE2_S3_s3; case VCTRS_TYPE_scalar: *left = 0; return VCTRS_TYPE2_S3_scalar; } } case VCTRS_TYPE_scalar: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_null_scalar; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_unspecified_scalar; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_logical_scalar; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_integer_scalar; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_double_scalar; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_complex_scalar; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_character_scalar; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_raw_scalar; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_list_scalar; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_dataframe_scalar; case VCTRS_TYPE_s3: *left = 1; return VCTRS_TYPE2_S3_scalar; case VCTRS_TYPE_scalar: *left = -1; return VCTRS_TYPE2_scalar_scalar; } }} r_stop_unreachable(); } enum vctrs_type2 vec_typeof2(r_obj* x, r_obj* y) { int _; return vec_typeof2_impl(vec_typeof(x), vec_typeof(y), &_); } const char* vctrs_type2_as_str(enum vctrs_type2 type) { switch (type) { case VCTRS_TYPE2_null_null: return "VCTRS_TYPE2_null_null"; case VCTRS_TYPE2_null_logical: return "VCTRS_TYPE2_null_logical"; case VCTRS_TYPE2_null_integer: return "VCTRS_TYPE2_null_integer"; case VCTRS_TYPE2_null_double: return "VCTRS_TYPE2_null_double"; case VCTRS_TYPE2_null_complex: return "VCTRS_TYPE2_null_complex"; case VCTRS_TYPE2_null_character: return "VCTRS_TYPE2_null_character"; case VCTRS_TYPE2_null_raw: return "VCTRS_TYPE2_null_raw"; case VCTRS_TYPE2_null_list: return "VCTRS_TYPE2_null_list"; case VCTRS_TYPE2_null_dataframe: return "VCTRS_TYPE2_null_dataframe"; case VCTRS_TYPE2_null_s3: return "VCTRS_TYPE2_null_s3"; case VCTRS_TYPE2_null_unspecified: return "VCTRS_TYPE2_null_unspecified"; case VCTRS_TYPE2_null_scalar: return "VCTRS_TYPE2_null_scalar"; case VCTRS_TYPE2_unspecified_logical: return "VCTRS_TYPE2_unspecified_logical"; case VCTRS_TYPE2_unspecified_integer: return "VCTRS_TYPE2_unspecified_integer"; case VCTRS_TYPE2_unspecified_double: return "VCTRS_TYPE2_unspecified_double"; case VCTRS_TYPE2_unspecified_complex: return "VCTRS_TYPE2_unspecified_complex"; case VCTRS_TYPE2_unspecified_character: return "VCTRS_TYPE2_unspecified_character"; case VCTRS_TYPE2_unspecified_raw: return "VCTRS_TYPE2_unspecified_raw"; case VCTRS_TYPE2_unspecified_list: return "VCTRS_TYPE2_unspecified_list"; case VCTRS_TYPE2_unspecified_dataframe: return "VCTRS_TYPE2_unspecified_dataframe"; case VCTRS_TYPE2_unspecified_s3: return "VCTRS_TYPE2_unspecified_s3"; case VCTRS_TYPE2_unspecified_unspecified: return "VCTRS_TYPE2_unspecified_unspecified"; case VCTRS_TYPE2_unspecified_scalar: return "VCTRS_TYPE2_unspecified_scalar"; case VCTRS_TYPE2_logical_logical: return "VCTRS_TYPE2_logical_logical"; case VCTRS_TYPE2_logical_integer: return "VCTRS_TYPE2_logical_integer"; case VCTRS_TYPE2_logical_double: return "VCTRS_TYPE2_logical_double"; case VCTRS_TYPE2_logical_complex: return "VCTRS_TYPE2_logical_complex"; case VCTRS_TYPE2_logical_character: return "VCTRS_TYPE2_logical_character"; case VCTRS_TYPE2_logical_raw: return "VCTRS_TYPE2_logical_raw"; case VCTRS_TYPE2_logical_list: return "VCTRS_TYPE2_logical_list"; case VCTRS_TYPE2_logical_dataframe: return "VCTRS_TYPE2_logical_dataframe"; case VCTRS_TYPE2_logical_s3: return "VCTRS_TYPE2_logical_s3"; case VCTRS_TYPE2_logical_scalar: return "VCTRS_TYPE2_logical_scalar"; case VCTRS_TYPE2_integer_integer: return "VCTRS_TYPE2_integer_integer"; case VCTRS_TYPE2_integer_double: return "VCTRS_TYPE2_integer_double"; case VCTRS_TYPE2_integer_complex: return "VCTRS_TYPE2_integer_complex"; case VCTRS_TYPE2_integer_character: return "VCTRS_TYPE2_integer_character"; case VCTRS_TYPE2_integer_raw: return "VCTRS_TYPE2_integer_raw"; case VCTRS_TYPE2_integer_list: return "VCTRS_TYPE2_integer_list"; case VCTRS_TYPE2_integer_dataframe: return "VCTRS_TYPE2_integer_dataframe"; case VCTRS_TYPE2_integer_s3: return "VCTRS_TYPE2_integer_s3"; case VCTRS_TYPE2_integer_scalar: return "VCTRS_TYPE2_integer_scalar"; case VCTRS_TYPE2_double_double: return "VCTRS_TYPE2_double_double"; case VCTRS_TYPE2_double_complex: return "VCTRS_TYPE2_double_complex"; case VCTRS_TYPE2_double_character: return "VCTRS_TYPE2_double_character"; case VCTRS_TYPE2_double_raw: return "VCTRS_TYPE2_double_raw"; case VCTRS_TYPE2_double_list: return "VCTRS_TYPE2_double_list"; case VCTRS_TYPE2_double_dataframe: return "VCTRS_TYPE2_double_dataframe"; case VCTRS_TYPE2_double_s3: return "VCTRS_TYPE2_double_s3"; case VCTRS_TYPE2_double_scalar: return "VCTRS_TYPE2_double_scalar"; case VCTRS_TYPE2_complex_complex: return "VCTRS_TYPE2_complex_complex"; case VCTRS_TYPE2_complex_character: return "VCTRS_TYPE2_complex_character"; case VCTRS_TYPE2_complex_raw: return "VCTRS_TYPE2_complex_raw"; case VCTRS_TYPE2_complex_list: return "VCTRS_TYPE2_complex_list"; case VCTRS_TYPE2_complex_dataframe: return "VCTRS_TYPE2_complex_dataframe"; case VCTRS_TYPE2_complex_s3: return "VCTRS_TYPE2_complex_s3"; case VCTRS_TYPE2_complex_scalar: return "VCTRS_TYPE2_complex_scalar"; case VCTRS_TYPE2_character_character: return "VCTRS_TYPE2_character_character"; case VCTRS_TYPE2_character_raw: return "VCTRS_TYPE2_character_raw"; case VCTRS_TYPE2_character_list: return "VCTRS_TYPE2_character_list"; case VCTRS_TYPE2_character_dataframe: return "VCTRS_TYPE2_character_dataframe"; case VCTRS_TYPE2_character_s3: return "VCTRS_TYPE2_character_s3"; case VCTRS_TYPE2_character_scalar: return "VCTRS_TYPE2_character_scalar"; case VCTRS_TYPE2_raw_raw: return "VCTRS_TYPE2_raw_raw"; case VCTRS_TYPE2_raw_list: return "VCTRS_TYPE2_raw_list"; case VCTRS_TYPE2_raw_dataframe: return "VCTRS_TYPE2_raw_dataframe"; case VCTRS_TYPE2_raw_s3: return "VCTRS_TYPE2_raw_s3"; case VCTRS_TYPE2_raw_scalar: return "VCTRS_TYPE2_raw_scalar"; case VCTRS_TYPE2_list_list: return "VCTRS_TYPE2_list_list"; case VCTRS_TYPE2_list_dataframe: return "VCTRS_TYPE2_list_dataframe"; case VCTRS_TYPE2_list_s3: return "VCTRS_TYPE2_list_s3"; case VCTRS_TYPE2_list_scalar: return "VCTRS_TYPE2_list_scalar"; case VCTRS_TYPE2_dataframe_dataframe: return "VCTRS_TYPE2_dataframe_dataframe"; case VCTRS_TYPE2_dataframe_s3: return "VCTRS_TYPE2_dataframe_s3"; case VCTRS_TYPE2_dataframe_scalar: return "VCTRS_TYPE2_dataframe_scalar"; case VCTRS_TYPE2_S3_s3: return "VCTRS_TYPE2_S3_s3"; case VCTRS_TYPE2_S3_scalar: return "VCTRS_TYPE2_S3_scalar"; case VCTRS_TYPE2_scalar_scalar: return "VCTRS_TYPE2_scalar_scalar"; } r_stop_unreachable(); } r_obj* ffi_typeof2(r_obj* x, r_obj* y) { enum vctrs_type2 type = vec_typeof2(x, y); return r_chr(vctrs_type2_as_str(type)); } vctrs/src/if-else.h0000644000176200001440000000054415065005761013707 0ustar liggesusers#ifndef VCTRS_IF_ELSE_H #define VCTRS_IF_ELSE_H #include "vctrs-core.h" r_obj* vec_if_else( r_obj* condition, r_obj* true_, r_obj* false_, r_obj* missing, r_obj* ptype, struct vctrs_arg* p_condition_arg, struct vctrs_arg* p_true_arg, struct vctrs_arg* p_false_arg, struct vctrs_arg* p_missing_arg, struct r_lazy error_call ); #endif vctrs/src/arg.c0000644000176200001440000001604115113325071013117 0ustar liggesusers#include "vctrs.h" #include "decl/arg-decl.h" // Materialising argument tags ------------------------------------------ #define DEFAULT_ARG_BUF_SIZE 100 /** * This takes a `struct vctrs_arg{}` linked list and calls the * recursive function `fill_arg_buffer()`. It allocates a buffer in a * RAWSXP of size 100, which is grown by a factor of 1.5 each time the * `fill()` methods return a negative value. Returns a character * vector of size 1 containing the materialised argument tag. */ r_obj* vctrs_arg(struct vctrs_arg* arg) { if (!arg) { return chrs_empty; } r_ssize next_size = DEFAULT_ARG_BUF_SIZE; r_ssize size; r_obj* buf_holder = KEEP(r_null); char* buf; do { size = next_size; FREE(1); buf_holder = KEEP(r_alloc_raw(size)); buf = (char*) r_raw_begin(buf_holder); // Reallocate a larger buffer at the next iteration if the current // buffer turns out too small next_size *= 1.5; } while (fill_arg_buffer(arg, buf, 0, size) < 0); r_obj* out = r_chr(buf); FREE(1); return out; } // vmax-protected const char* vec_arg_format(struct vctrs_arg* p_arg) { r_obj* arg = KEEP(vctrs_arg(p_arg)); const char* out = r_format_error_arg(arg); FREE(1); return out; } /** * Takes a `struct vctrs_arg{}` linked list and a buffer and calls the * `fill()` method on each of those, recursively. Unless an error * occurred, it returns the current size written to the buffer so we * can track the remaining memory available in the buffer after * recursion. */ static int fill_arg_buffer(struct vctrs_arg* arg, char* buf, r_ssize cur_size, r_ssize tot_size) { if (arg->parent) { cur_size = fill_arg_buffer(arg->parent, buf, cur_size, tot_size); if (cur_size < 0) { return cur_size; } } r_ssize written = arg->fill(arg->data, buf + cur_size, tot_size - cur_size); if (written < 0) { return written; } else { return cur_size + written; } } static r_ssize str_arg_fill(const char* data, char* buf, r_ssize remaining) { size_t len = strlen(data); if (len >= remaining) { return -1; } r_memcpy(buf, data, len); buf[len] = '\0'; return len; } // Objects ------------------------------------------------------------- // Simple wrapper around a `const char*` argument tag struct vctrs_arg new_wrapper_arg(struct vctrs_arg* parent, const char* arg) { return (struct vctrs_arg) { .parent = parent, .fill = &wrapper_arg_fill, .data = (void*) arg }; } static r_ssize wrapper_arg_fill(void* data, char* buf, r_ssize remaining) { return str_arg_fill((const char*) data, buf, remaining); } // Wrapper that accesses a symbol in an environment, for lazy evaluation struct vctrs_arg new_lazy_arg(struct r_lazy* arg) { return (struct vctrs_arg) { .parent = NULL, .fill = &lazy_arg_fill, .data = arg }; } static r_ssize lazy_arg_fill(void* data_, char* buf, r_ssize remaining) { struct r_lazy* data = (struct r_lazy*) data_; r_obj* arg = KEEP(r_lazy_eval(*data)); const char* arg_str = ""; if (r_is_string(arg)) { arg_str = r_chr_get_c_string(arg, 0); } else if (arg != r_null) { r_abort("`arg` must be a string."); } r_ssize out = str_arg_fill(arg_str, buf, remaining); FREE(1); return out; } // Wrapper around a subscript, either numeric or character struct subscript_arg_data { struct vctrs_arg self; r_obj* names; r_ssize n; r_ssize* p_i; }; struct vctrs_arg* new_subscript_arg_vec(struct vctrs_arg* parent, r_obj* x, r_ssize* p_i) { r_obj* names = KEEP(vec_names(x)); struct vctrs_arg* p_arg = new_subscript_arg(parent, names, vec_size(x), p_i); FREE(1); return p_arg; } struct vctrs_arg* new_subscript_arg(struct vctrs_arg* parent, r_obj* names, r_ssize n, r_ssize* p_i) { r_obj* shelter = KEEP(r_alloc_list(2)); r_list_poke(shelter, 0, r_alloc_raw(sizeof(struct subscript_arg_data))); r_list_poke(shelter, 1, names); struct subscript_arg_data* p_data = r_raw_begin(r_list_get(shelter, 0)); p_data->self = (struct vctrs_arg) { .shelter = shelter, .parent = parent, .fill = &subscript_arg_fill, .data = p_data }; p_data->names = names; p_data->n = n; p_data->p_i = p_i; FREE(1); return (struct vctrs_arg*) p_data; } static r_ssize subscript_arg_fill(void* p_data_, char* buf, r_ssize remaining) { struct subscript_arg_data* p_data = (struct subscript_arg_data*) p_data_; r_ssize i = *p_data->p_i; r_obj* names = p_data->names; r_ssize n = p_data->n; if (i >= n) { r_stop_internal("`i = %" R_PRI_SSIZE "` can't be greater than `vec_size(x) = %" R_PRI_SSIZE "`.", i, n); } int len = 0; bool child = !is_empty_arg(p_data->self.parent); if (child) { if (r_has_name_at(names, i)) { len = snprintf(buf, remaining, "$%s", r_chr_get_c_string(names, i)); } else { len = snprintf(buf, remaining, "[[%td]]", i + 1); } } else { if (r_has_name_at(names, i)) { len = snprintf(buf, remaining, "%s", r_chr_get_c_string(names, i)); } else { len = snprintf(buf, remaining, "..%td", i + 1); } } if (len >= remaining) { return -1; } else { return len; } } // Wrapper around a counter representing the current position of the // argument struct vctrs_arg new_counter_arg(struct vctrs_arg* parent, struct arg_data_counter* data) { return (struct vctrs_arg) { .parent = parent, .fill = &counter_arg_fill, .data = (void*) data }; } struct arg_data_counter new_counter_arg_data(struct vctrs_arg* p_parent, r_ssize* i, r_obj** names) { return (struct arg_data_counter) { .p_parent = p_parent, .i = i, .names = names }; } static r_ssize counter_arg_fill(void* data_, char* buf, r_ssize remaining) { struct arg_data_counter* data = (struct arg_data_counter*) data_; r_ssize i = *data->i; r_obj* names = *data->names; int len; bool child = !is_empty_arg(data->p_parent); // FIXME: Check for syntactic names if (child) { if (r_has_name_at(names, i)) { len = snprintf(buf, remaining, "$%s", r_chr_get_c_string(names, i)); } else { len = snprintf(buf, remaining, "[[%" R_PRI_SSIZE "]]", i + 1); } } else { if (r_has_name_at(names, i)) { len = snprintf(buf, remaining, "%s", r_chr_get_c_string(names, i)); } else { len = snprintf(buf, remaining, "..%" R_PRI_SSIZE, i + 1); } } if (len >= remaining) { return -1; } else { return len; } } static bool is_empty_arg(struct vctrs_arg* arg) { if (!arg) { return true; } char tmp[1]; return arg->fill(arg->data, tmp, 1) == 0; } vctrs/src/growable.c0000644000176200001440000000051114315060310014136 0ustar liggesusers#include "vctrs.h" struct growable new_growable(SEXPTYPE type, int capacity) { struct growable g; g.x = Rf_allocVector(type, capacity); g.type = type; g.array = r_vec_unwrap(type, g.x); g.n = 0; g.capacity = capacity; return g; } SEXP growable_values(struct growable* g) { return Rf_lengthgets(g->x, g->n); } vctrs/src/hash.c0000644000176200001440000002335315156537736013321 0ustar liggesusers#include "hash.h" #include "vctrs.h" #include "decl/hash-decl.h" // ---------------------------------------------------------------------------- // Object r_obj* ffi_obj_hash(r_obj* x) { uint32_t hash = 0; hash = hash_combine(hash, obj_hash(x)); r_obj* out = KEEP(r_alloc_raw(sizeof(uint32_t))); r_memcpy(r_raw_begin(out), &hash, sizeof(uint32_t)); FREE(1); return out; } uint32_t obj_hash(r_obj* x) { uint32_t hash = sexp_hash(x); if (r_attrib_has_any(x)) { hash = hash_combine(hash, attrib_hash(x)); } return hash; } static inline uint32_t sexp_hash(r_obj* x) { switch (TYPEOF(x)) { // `NULL` case NILSXP: return 0; // Atomics case LGLSXP: return lgl_hash(x); case INTSXP: return int_hash(x); case REALSXP: return dbl_hash(x); case CPLXSXP: return cpl_hash(x); case RAWSXP: return raw_hash(x); case STRSXP: return chr_hash(x); case VECSXP: return list_hash(x); // Expressions case EXPRSXP: return expr_hash(x); // Node-like case DOTSXP: case LANGSXP: case LISTSXP: case BCODESXP: return node_hash(x); // Functions case CLOSXP: return fn_hash(x); // Pointer based hashing case SYMSXP: case SPECIALSXP: case BUILTINSXP: case ENVSXP: case EXTPTRSXP: return uint64_hash((uintptr_t) x); default: Rf_errorcall(R_NilValue, "Unsupported type %s", Rf_type2char(TYPEOF(x))); } } #define HASH(CTYPE, CONST_DEREF, HASHER) \ uint32_t hash = 0; \ const r_ssize size = r_length(x); \ CTYPE const* v_x = CONST_DEREF(x); \ \ for (r_ssize i = 0; i < size; ++i) { \ hash = hash_combine(hash, HASHER(v_x[i])); \ } \ \ return hash static inline uint32_t lgl_hash(r_obj* x) { HASH(int, r_lgl_cbegin, lgl_hash_scalar); } static inline uint32_t int_hash(r_obj* x) { HASH(int, r_int_cbegin, int_hash_scalar); } static inline uint32_t dbl_hash(r_obj* x) { HASH(double, r_dbl_cbegin, dbl_hash_scalar); } static inline uint32_t cpl_hash(r_obj* x) { HASH(r_complex, r_cpl_cbegin, cpl_hash_scalar); } static inline uint32_t raw_hash(r_obj* x) { HASH(Rbyte, r_raw_cbegin, raw_hash_scalar); } static inline uint32_t chr_hash(r_obj* x) { HASH(r_obj*, r_chr_cbegin, chr_hash_scalar); } static inline uint32_t list_hash(r_obj* x) { HASH(r_obj*, r_list_cbegin, list_hash_scalar); } #undef HASH static inline uint32_t expr_hash(r_obj* x) { uint32_t hash = 0; r_ssize n = Rf_xlength(x); for (r_ssize i = 0; i < n; ++i) { r_obj* elt = r_list_get(x, i); hash = hash_combine(hash, obj_hash(elt)); } return hash; } static inline uint32_t node_hash(r_obj* x) { uint32_t hash = 0; hash = hash_combine(hash, obj_hash(r_node_tag(x))); hash = hash_combine(hash, obj_hash(r_node_car(x))); hash = hash_combine(hash, obj_hash(r_node_cdr(x))); return hash; } static inline uint32_t fn_hash(r_obj* x) { uint32_t hash = 0; hash = hash_combine(hash, obj_hash(r_fn_body(x))); hash = hash_combine(hash, obj_hash(r_fn_env(x))); hash = hash_combine(hash, obj_hash(r_fn_formals(x))); return hash; } // Same idea as `node_hash()` where the `tag` and `value` of each node are // hashed, but using the `r_attrib_map()` API since we can't directly access an // object's attribute pairlist static inline uint32_t attrib_hash(r_obj* x) { uint32_t hash = 0; r_attrib_map(x, attrib_hash_cb, &hash); return hash; } static inline r_obj* attrib_hash_cb(r_obj* tag, r_obj* value, void* data) { uint32_t* p_hash = (uint32_t*) data; *p_hash = hash_combine(*p_hash, obj_hash(tag)); *p_hash = hash_combine(*p_hash, obj_hash(value)); // Return C `NULL` to continue iteration through the attributes return NULL; } // ---------------------------------------------------------------------------- // Vector hash r_obj* ffi_vec_hash(r_obj* x) { x = KEEP(vec_proxy_equal(x)); const r_ssize size = vec_size(x); r_obj* out = KEEP(r_alloc_raw(size * sizeof(uint32_t))); uint32_t* v_out = (uint32_t*) r_raw_begin(out); r_memset(v_out, 0, size * sizeof(uint32_t)); vec_hash_fill(x, size, true, v_out); FREE(2); return out; } // Not compatible with hash_scalar. When `@na_equal` is false, missing // values are propagated and encoded as `1`. void vec_hash_fill(r_obj* x, r_ssize size, bool na_equal, uint32_t* v_out) { if (has_dim(x)) { // The conversion to data frame is only a stopgap, in the long // term, we'll hash arrays natively x = KEEP(r_as_data_frame(x)); vec_hash_fill(x, size, na_equal, v_out); FREE(1); return; } if (na_equal) { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: lgl_hash_fill_na_equal(x, size, v_out); return; case VCTRS_TYPE_integer: int_hash_fill_na_equal(x, size, v_out); return; case VCTRS_TYPE_double: dbl_hash_fill_na_equal(x, size, v_out); return; case VCTRS_TYPE_complex: cpl_hash_fill_na_equal(x, size, v_out); return; case VCTRS_TYPE_character: chr_hash_fill_na_equal(x, size, v_out); return; case VCTRS_TYPE_raw: raw_hash_fill_na_equal(x, size, v_out); return; case VCTRS_TYPE_list: list_hash_fill_na_equal(x, size, v_out); return; case VCTRS_TYPE_dataframe: df_hash_fill(x, size, na_equal, v_out); return; default: break; } } else { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: lgl_hash_fill_na_propagate(x, size, v_out); return; case VCTRS_TYPE_integer: int_hash_fill_na_propagate(x, size, v_out); return; case VCTRS_TYPE_double: dbl_hash_fill_na_propagate(x, size, v_out); return; case VCTRS_TYPE_complex: cpl_hash_fill_na_propagate(x, size, v_out); return; case VCTRS_TYPE_character: chr_hash_fill_na_propagate(x, size, v_out); return; case VCTRS_TYPE_raw: raw_hash_fill_na_propagate(x, size, v_out); return; case VCTRS_TYPE_list: list_hash_fill_na_propagate(x, size, v_out); return; case VCTRS_TYPE_dataframe: df_hash_fill(x, size, na_equal, v_out); return; default: break; } } stop_unimplemented_vctrs_type("vec_hash_fill", vec_proxy_typeof(x)); } #define HASH_FILL(CTYPE, CONST_DEREF, HASHER) \ CTYPE const* v_x = CONST_DEREF(x); \ \ for (r_ssize i = 0; i < size; ++i) { \ const uint32_t hash = v_out[i]; \ CTYPE elt = v_x[i]; \ v_out[i] = hash_combine(hash, HASHER(elt)); \ } // Incomplete rows in data frames propagate an `NA` hash #define HASH_FILL_NA_PROPAGATE(CTYPE, CONST_DEREF, HASHER, IS_MISSING) \ CTYPE const* v_x = CONST_DEREF(x); \ \ for (r_ssize i = 0; i < size; ++i) { \ const uint32_t hash = v_out[i]; \ if (hash == HASH_MISSING) { \ continue; \ } \ CTYPE elt = v_x[i]; \ v_out[i] = IS_MISSING(elt) ? HASH_MISSING : hash_combine(hash, HASHER(elt)); \ } static inline void lgl_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL(int, r_lgl_cbegin, lgl_hash_scalar); } static inline void int_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL(int, r_int_cbegin, int_hash_scalar); } static inline void dbl_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL(double, r_dbl_cbegin, dbl_hash_scalar); } static inline void cpl_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL(r_complex, r_cpl_cbegin, cpl_hash_scalar); } static inline void chr_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL(r_obj*, r_chr_cbegin, chr_hash_scalar); } static inline void raw_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL(Rbyte, r_raw_cbegin, raw_hash_scalar); } static inline void list_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL(r_obj*, r_list_cbegin, list_hash_scalar); } static inline void lgl_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL_NA_PROPAGATE(int, r_lgl_cbegin, lgl_hash_scalar, lgl_is_missing); } static inline void int_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL_NA_PROPAGATE(int, r_int_cbegin, int_hash_scalar, int_is_missing); } static inline void dbl_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL_NA_PROPAGATE(double, r_dbl_cbegin, dbl_hash_scalar, dbl_is_missing); } static inline void cpl_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL_NA_PROPAGATE(r_complex, r_cpl_cbegin, cpl_hash_scalar, cpl_is_missing); } static inline void chr_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL_NA_PROPAGATE(r_obj*, r_chr_cbegin, chr_hash_scalar, chr_is_missing); } static inline void raw_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL_NA_PROPAGATE(Rbyte, r_raw_cbegin, raw_hash_scalar, raw_is_missing); } static inline void list_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out) { HASH_FILL_NA_PROPAGATE(r_obj*, r_list_cbegin, list_hash_scalar, list_is_missing); } #undef HASH_FILL_NA_PROPAGATE #undef HASH_FILL static inline void df_hash_fill(r_obj* x, r_ssize size, bool na_equal, uint32_t* v_out) { const r_ssize n_col = r_length(x); r_obj* const* v_x = r_list_cbegin(x); for (r_ssize i = 0; i < n_col; ++i) { vec_hash_fill(v_x[i], size, na_equal, v_out); } } vctrs/src/type-data-frame.c0000644000176200001440000006233515156543676015361 0ustar liggesusers#include "utils-dispatch.h" #include "vctrs.h" #include "type-data-frame.h" #include "decl/type-data-frame-decl.h" bool is_data_frame(r_obj* x) { return r_typeof(x) == R_TYPE_list && class_type_is_data_frame(class_type(x)); } bool is_native_df(r_obj* x) { enum vctrs_class_type type = class_type(x); return type == VCTRS_CLASS_bare_data_frame || type == VCTRS_CLASS_bare_tibble; } bool is_bare_data_frame(r_obj* x) { return class_type(x) == VCTRS_CLASS_bare_data_frame; } bool is_bare_tibble(r_obj* x) { return class_type(x) == VCTRS_CLASS_bare_tibble; } r_obj* new_data_frame(r_obj* x, r_ssize n) { x = KEEP(r_clone_referenced(x)); init_data_frame(x, n); FREE(1); return x; } // [[ register() ]] r_obj* ffi_new_data_frame(r_obj* args) { args = r_node_cdr(args); r_obj* x = r_node_car(args); args = r_node_cdr(args); r_obj* n = r_node_car(args); args = r_node_cdr(args); r_obj* cls = r_node_car(args); args = r_node_cdr(args); r_obj* attrib = args; if (r_typeof(x) != R_TYPE_list) { r_abort_call(r_null, "`x` must be a list"); } // Input comes from the R side, so we must always clone it. This is also // important because we pull attributes from `x` later on, but clear the ones // on `out` right now because we build it from scratch. r_obj* out = KEEP(r_clone(x)); r_attrib_zap_all(out); r_obj* names = NULL; r_obj* row_names = NULL; for (r_obj* node = attrib; node != r_null; node = r_node_cdr(node)) { r_obj* tag = r_node_tag(node); r_obj* value = r_node_car(node); // We might add dynamic dots later on if (tag == r_syms.class_) { r_stop_internal("Can't supply `class` in `...`."); } if (tag == r_syms.names) { names = value; continue; } if (tag == r_syms.row_names) { // We used to validate a user supplied `n` against a user supplied // `row.names`, but that requires extracting out the `rownames_size()`, // which can materialize ALTREP row name objects and is prohibitively // expensive (tidyverse/dplyr#6596). So instead we say that user supplied // `row.names` overrides both the implied size of `x` and a user supplied // `n`, even if they are incompatible. row_names = value; continue; } r_attrib_poke(out, tag, value); } // `names` handling: // - If `...` had `names`, use them. // - If `x` is empty, use `character()` names (supports empty `list()` case). // - Use `r_names(x)`. // // Note that this means `new_data_frame(list(1))` purposefully constructs a // corrupt data frame with no names, which is questionable, but we have a test // for this and I think we use it for testing other edge cases. if (names == NULL) { if (r_length(x) == 0) { names = r_globals.empty_chr; } else { names = r_names(x); } } r_attrib_poke_names(out, names); // `row.names` handling: // - If `...` had `row.names`, use them. // - If `n` was provided, use it (overridden by `row.names` in `...`). // - Use `df_raw_size_from_list(x)`. // // Note that this means `new_data_frame(new_data_frame(n = 10L))` will return // a zero row data frame. `new_data_frame()` always treats `x` as a bare list, // even if it might contain some extra info about the `row.names` in the zero // column data frame case. if (row_names == NULL) { const r_ssize size = n != r_null ? df_size_from_n(n) : df_raw_size_from_list(x); row_names = new_compact_rownames(size); } KEEP(row_names); attrib_append_row_names(out, row_names); FREE(1); if (cls == r_null) { cls = classes_data_frame; } else { cls = c_data_frame_class(cls); } r_attrib_poke_class(out, cls); FREE(1); return out; } // This utility appends a `row.names` attribute to `x`'s attribute // pairlist without expensive materialization. // // SAFETY: For simplicity, this does not check if a `row.names` attribute // already exists, as `new_data_frame()` clears all attributes up front. // // In R < 4.6.0, `Rf_setAttrib()` would materialize duckplyr's // ALTREP row names. It no longer does this thanks to: // https://github.com/r-devel/r-dev-day/issues/148 static void attrib_append_row_names(r_obj* x, r_obj* row_names) { #if R_VERSION >= R_Version(4, 6, 0) r_attrib_poke(x, r_syms.row_names, row_names); #else r_obj* attrib = ATTRIB(x); attrib = KEEP(r_new_node(row_names, attrib)); r_node_poke_tag(attrib, r_syms.row_names); SET_ATTRIB(x, attrib); FREE(1); #endif } static r_ssize df_size_from_n(r_obj* n) { if (r_typeof(n) != R_TYPE_integer || r_length(n) != 1) { r_abort("`n` must be an integer of size 1."); } r_ssize out = r_int_get(n, 0); if (out == r_globals.na_int) { r_abort("`n` can't be missing."); } if (out < 0) { r_abort("`n` can't be negative."); } return out; } static r_obj* c_data_frame_class(r_obj* cls) { if (r_typeof(cls) != R_TYPE_character) { r_abort_call(r_null, "`class` must be NULL or a character vector"); } return chr_c(cls, classes_data_frame); } // [[ register() ]] r_obj* ffi_data_frame(r_obj* x, r_obj* size, r_obj* name_repair, r_obj* frame) { struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, lazy_args.dot_name_repair, false, error_call); KEEP(name_repair_opts.shelter); r_ssize c_size = 0; if (size == r_null) { c_size = vec_size_common(x, 0, vec_args.empty, error_call); } else { c_size = vec_as_short_length(size, vec_args.dot_size, error_call); } r_obj* out = data_frame(x, c_size, &name_repair_opts, error_call); FREE(1); return out; } static r_obj* data_frame(r_obj* x, r_ssize size, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call) { const bool unpack = true; r_obj* out = KEEP(df_list(x, size, unpack, p_name_repair_opts, error_call)); out = new_data_frame(out, size); FREE(1); return out; } // [[ register() ]] r_obj* ffi_df_list(r_obj* x, r_obj* size, r_obj* unpack, r_obj* name_repair, r_obj* frame) { struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; struct name_repair_opts name_repair_opts = new_name_repair_opts(name_repair, lazy_args.dot_name_repair, false, error_call); KEEP(name_repair_opts.shelter); r_ssize c_size = 0; if (size == r_null) { c_size = vec_size_common(x, 0, vec_args.empty, error_call); } else { c_size = vec_as_short_length(size, vec_args.dot_size, error_call); } const bool c_unpack = r_arg_as_bool(unpack, ".unpack"); r_obj* out = df_list(x, c_size, c_unpack, &name_repair_opts, error_call); FREE(1); return out; } static r_obj* df_list(r_obj* x, r_ssize size, bool unpack, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call) { if (r_typeof(x) != R_TYPE_list) { r_stop_internal("`x` must be a list."); } x = KEEP(vec_recycle_common(x, size, vec_args.empty, error_call)); r_ssize n_cols = r_length(x); // Unnamed columns are auto-named with `""` if (r_names(x) == r_null) { r_obj* names = KEEP(r_new_character(n_cols)); r_attrib_poke_names(x, names); FREE(1); } x = KEEP(df_list_drop_null(x)); if (unpack) { x = df_list_unpack(x); } KEEP(x); r_obj* names = KEEP(r_names(x)); names = KEEP(vec_as_names(names, p_name_repair_opts)); r_attrib_poke_names(x, names); FREE(5); return x; } static r_obj* df_list_drop_null(r_obj* x) { r_ssize n_cols = r_length(x); r_ssize count = 0; for (r_ssize i = 0; i < n_cols; ++i) { count += r_list_get(x, i) == r_null; } if (count == 0) { return x; } r_obj* names = KEEP(r_names(x)); r_obj* const * p_names = r_chr_cbegin(names); r_ssize n_out = n_cols - count; r_obj* out = KEEP(r_alloc_list(n_out)); r_obj* out_names = KEEP(r_alloc_character(n_out)); r_ssize out_i = 0; for (r_ssize i = 0; i < n_cols; ++i) { r_obj* col = r_list_get(x, i); if (col != r_null) { r_list_poke(out, out_i, col); r_chr_poke(out_names, out_i, p_names[i]); ++out_i; } } r_attrib_poke_names(out, out_names); FREE(3); return out; } static r_obj* df_list_unpack(r_obj* x) { r_obj* names = KEEP(r_names(x)); r_obj* const * p_names = r_chr_cbegin(names); bool any_needs_unpack = false; r_ssize n_cols = r_length(x); r_ssize i = 0; for (; i < n_cols; ++i) { // Only unpack unnamed data frames if (p_names[i] != strings_empty) { continue; } r_obj* col = r_list_get(x, i); if (is_data_frame(col)) { any_needs_unpack = true; break; } } if (!any_needs_unpack) { FREE(1); return x; } r_obj* unpack = KEEP(r_new_logical(n_cols)); int* p_unpack = LOGICAL(unpack); for (r_ssize j = 0; j < n_cols; ++j) { p_unpack[j] = 0; } r_ssize width = i; for (; i < n_cols; ++i) { // Only unpack unnamed data frames if (p_names[i] != strings_empty) { ++width; continue; } r_obj* col = r_list_get(x, i); if (is_data_frame(col)) { width += r_length(col); p_unpack[i] = 1; } else { ++width; } } r_obj* out = KEEP(r_new_list(width)); r_obj* out_names = KEEP(r_new_character(width)); r_ssize loc = 0; // Unpack loop for (r_ssize i = 0; i < n_cols; ++i) { if (!p_unpack[i]) { r_list_poke(out, loc, r_list_get(x, i)); r_chr_poke(out_names, loc, p_names[i]); ++loc; continue; } r_obj* col = r_list_get(x, i); r_obj* col_names = KEEP(r_names(col)); if (r_typeof(col_names) != R_TYPE_character) { r_stop_internal( "Encountered corrupt data frame. " "Data frames must have character column names." ); } r_obj* const * p_col_names = r_chr_cbegin(col_names); r_ssize col_i = 0; r_ssize stop = loc + r_length(col); for (; loc < stop; ++loc, ++col_i) { r_list_poke(out, loc, r_list_get(col, col_i)); r_chr_poke(out_names, loc, p_col_names[col_i]); } loc = stop; FREE(1); } r_attrib_poke_names(out, out_names); FREE(4); return out; } enum rownames_type rownames_type(r_obj* x) { switch (r_typeof(x)) { case R_TYPE_character: return ROWNAMES_TYPE_identifiers; case R_TYPE_integer: if (r_length(x) == 2 && r_int_begin(x)[0] == r_globals.na_int) { return ROWNAMES_TYPE_automatic_compact; } else { return ROWNAMES_TYPE_automatic; } default: r_stop_internal("Unexpected type `%s`.", Rf_type2char(r_typeof(x))); } } static r_ssize compact_rownames_length(r_obj* x) { return abs(r_int_get(x, 1)); } // [[ include("type-data-frame.h") ]] r_ssize rownames_size(r_obj* rn) { switch (rownames_type(rn)) { case ROWNAMES_TYPE_identifiers: case ROWNAMES_TYPE_automatic: return r_length(rn); case ROWNAMES_TYPE_automatic_compact: return compact_rownames_length(rn); } never_reached("rownames_size"); } // [[ include("type-data-frame.h") ]] void init_data_frame(r_obj* x, r_ssize n) { r_attrib_poke(x, r_syms.class_, classes_data_frame); init_bare_data_frame(x, n); } // [[ include("type-data-frame.h") ]] void init_tibble(r_obj* x, r_ssize n) { r_attrib_poke(x, r_syms.class_, classes_tibble); init_bare_data_frame(x, n); } static void init_bare_data_frame(r_obj* x, r_ssize n) { if (r_length(x) == 0) { r_attrib_poke(x, r_syms.names, r_globals.empty_chr); } init_compact_rownames(x, n); } // [[ include("type-data-frame.h") ]] void init_compact_rownames(r_obj* x, r_ssize n) { r_obj* rn = KEEP(new_compact_rownames(n)); r_attrib_poke(x, r_syms.row_names, rn); FREE(1); } static r_obj* new_compact_rownames(r_ssize n) { if (n <= 0) { return r_globals.empty_int; } r_obj* out = r_alloc_integer(2); int* out_data = r_int_begin(out); out_data[0] = r_globals.na_int; out_data[1] = -n; return out; } // vctrs type methods ------------------------------------------------ // [[ register() ]] r_obj* ffi_df_ptype2_opts(r_obj* x, r_obj* y, r_obj* opts, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); struct r_lazy call = { .x = r_syms.call, .env = frame }; const enum s3_fallback s3_fallback = s3_fallback_from_opts(opts); return df_ptype2( x, y, &x_arg, &y_arg, call, s3_fallback ); } r_obj* df_ptype2( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ) { r_obj* x_names = KEEP(r_names(x)); r_obj* y_names = KEEP(r_names(y)); r_obj* out = r_null; if (obj_equal(x_names, y_names)) { out = df_ptype2_loop( x, y, x_names, p_x_arg, p_y_arg, call, s3_fallback ); } else { out = df_ptype2_match( x, y, x_names, y_names, p_x_arg, p_y_arg, call, s3_fallback ); } FREE(2); return out; } static r_obj* df_ptype2_match( r_obj* x, r_obj* y, r_obj* x_names, r_obj* y_names, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ) { r_obj* x_dups_pos = KEEP(vec_match(x_names, y_names)); r_obj* y_dups_pos = KEEP(vec_match(y_names, x_names)); int* x_dups_pos_data = r_int_begin(x_dups_pos); int* y_dups_pos_data = r_int_begin(y_dups_pos); r_ssize x_len = r_length(x_names); r_ssize y_len = r_length(y_names); // Count columns that are only in `y` r_ssize rest_len = 0; for (r_ssize i = 0; i < y_len; ++i) { if (y_dups_pos_data[i] == r_globals.na_int) { ++rest_len; } } r_ssize out_len = x_len + rest_len; r_obj* out = KEEP(r_alloc_list(out_len)); r_obj* nms = KEEP(r_alloc_character(out_len)); r_attrib_poke(out, r_syms.names, nms); r_ssize i = 0; r_ssize y_col_arg_loc = 0; struct vctrs_arg* p_x_col_arg = new_subscript_arg(p_x_arg, x_names, x_len, &i); KEEP(p_x_col_arg->shelter); struct vctrs_arg* p_y_col_arg = new_subscript_arg(p_y_arg, y_names, y_len, &y_col_arg_loc); KEEP(p_y_col_arg->shelter); // Fill in prototypes of all the columns that are in `x`, in order for (; i < x_len; ++i) { r_ssize dup = x_dups_pos_data[i]; r_obj* x_col = r_list_get(x, i); r_obj* type; if (dup == r_globals.na_int) { type = vec_ptype_or_s3_fallback( x_col, p_x_col_arg, vec_typeof(x_col), call, s3_fallback ); } else { // 1-based index --dup; y_col_arg_loc = dup; r_obj* y_col = r_list_get(y, dup); int _; type = vec_ptype2( x_col, y_col, p_x_col_arg, p_y_col_arg, call, s3_fallback, &_ ); } r_list_poke(out, i, type); r_chr_poke(nms, i, r_chr_get(x_names, i)); } // Fill in prototypes of the columns that are only in `y` for (r_ssize j = 0; i < out_len; ++j) { r_ssize dup = y_dups_pos_data[j]; if (dup == r_globals.na_int) { y_col_arg_loc = j; r_obj* y_col = r_list_get(y, j); r_obj* type = vec_ptype_or_s3_fallback( y_col, p_y_col_arg, vec_typeof(y_col), call, s3_fallback ); r_list_poke(out, i, type); r_chr_poke(nms, i, r_chr_get(y_names, j)); ++i; } } init_data_frame(out, 0); FREE(6); return out; } static r_obj* df_ptype2_loop( r_obj* x, r_obj* y, r_obj* names, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ) { const r_ssize n_cols = r_length(names); r_obj* out = KEEP(r_alloc_list(n_cols)); r_attrib_poke(out, r_syms.names, names); r_ssize i = 0; struct vctrs_arg* p_x_col_arg = new_subscript_arg_vec(p_x_arg, out, &i); KEEP(p_x_col_arg->shelter); struct vctrs_arg* p_y_col_arg = new_subscript_arg_vec(p_y_arg, out, &i); KEEP(p_y_col_arg->shelter); for (; i < n_cols; ++i) { r_obj* x_col = r_list_get(x, i); r_obj* y_col = r_list_get(y, i); int _; r_obj* type = vec_ptype2( x_col, y_col, p_x_col_arg, p_y_col_arg, call, s3_fallback, &_ ); r_list_poke(out, i, type); } init_data_frame(out, 0); FREE(3); return out; } // [[ register() ]] r_obj* ffi_df_cast_opts(r_obj* x, r_obj* to, r_obj* opts, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy to_arg_ = { .x = syms.to_arg, .env = frame }; struct vctrs_arg to_arg = new_lazy_arg(&to_arg_); struct r_lazy call = { .x = r_syms.call, .env = frame }; struct cast_opts c_opts = new_cast_opts(x, to, &x_arg, &to_arg, call, opts); return df_cast_opts(&c_opts); } // Take all columns of `to` and preserve the order. Common columns are // cast to their types in `to`. Extra `x` columns are dropped and // cause a lossy cast. Extra `to` columns are filled with missing // values. // [[ include("cast.h") ]] r_obj* df_cast_opts(const struct cast_opts* opts) { r_obj* x_names = KEEP(r_names(opts->x)); r_obj* to_names = KEEP(r_names(opts->to)); if (x_names == r_null || to_names == r_null) { r_stop_internal("Data frame must have names."); } r_obj* out = r_null; if (obj_equal(x_names, to_names)) { out = df_cast_loop(opts, x_names); } else { out = df_cast_match(opts, x_names, to_names); } FREE(2); return out; } static r_obj* df_cast_match(const struct cast_opts* opts, r_obj* x_names, r_obj* to_names) { r_obj* x = opts->x; r_obj* to = opts->to; r_obj* to_dups_pos = KEEP(vec_match(to_names, x_names)); int* to_dups_pos_data = r_int_begin(to_dups_pos); r_ssize to_len = r_length(to_dups_pos); r_obj* out = KEEP(r_alloc_list(to_len)); r_attrib_poke(out, r_syms.names, to_names); r_ssize size = df_size(x); r_ssize common_len = 0; r_ssize i = 0; r_ssize x_arg_loc = 0; struct vctrs_arg* named_x_arg = new_subscript_arg(opts->p_x_arg, x_names, r_length(x_names), &x_arg_loc); KEEP(named_x_arg->shelter); struct vctrs_arg* named_to_arg = new_subscript_arg(opts->p_to_arg, to_names, to_len, &i); KEEP(named_to_arg->shelter); for (; i < to_len; ++i) { r_ssize pos = to_dups_pos_data[i]; if (pos == r_globals.na_int) { r_obj* to_col = r_list_get(to, i); // FIXME: Need to initialise the vector because we currently use // `vec_assign()` in `vec_rbind()` before falling back. Attach // an attribute to recognise unspecified vectors in // `base_c_invoke()`. if (opts->s3_fallback && vec_is_common_class_fallback(to_col)) { r_obj* col = KEEP(vec_init(to_col, size)); r_attrib_poke(col, r_sym("vctrs:::unspecified"), r_true); r_list_poke(out, i, col); FREE(1); } else { r_obj* col = vec_init(to_col, size); r_list_poke(out, i, col); } } else { --pos; // 1-based index ++common_len; x_arg_loc = pos; struct cast_opts col_opts = { .x = r_list_get(x, pos), .to = r_list_get(to, i), .p_x_arg = named_x_arg, .p_to_arg = named_to_arg, .call = opts->call, .s3_fallback = opts->s3_fallback }; r_obj* col = vec_cast_opts(&col_opts); r_list_poke(out, i, col); } } // Restore data frame size before calling `vec_restore()`. `x` and // `to` might not have any columns to compute the original size. init_data_frame(out, size); r_attrib_poke(out, r_syms.row_names, df_rownames(x)); r_ssize extra_len = r_length(x) - common_len; if (extra_len) { r_obj* ffi_x_arg = KEEP(vctrs_arg(opts->p_x_arg)); r_obj* ffi_to_arg = KEEP(vctrs_arg(opts->p_to_arg)); r_obj* ffi_call = KEEP(r_lazy_eval(opts->call)); out = vctrs_dispatch6(syms_df_lossy_cast, fns_df_lossy_cast, syms_out, out, syms_x, x, syms_to, to, syms_x_arg, ffi_x_arg, syms_to_arg, ffi_to_arg, syms_call, ffi_call); FREE(3); } FREE(4); return out; } static r_obj* df_cast_loop(const struct cast_opts* opts, r_obj* names) { r_obj* x = opts->x; r_obj* to = opts->to; r_ssize len = r_length(names); r_obj* out = KEEP(r_alloc_list(len)); r_attrib_poke(out, r_syms.names, names); r_ssize size = df_size(x); r_ssize i = 0; struct vctrs_arg* named_x_arg = new_subscript_arg(opts->p_x_arg, names, len, &i); KEEP(named_x_arg->shelter); struct vctrs_arg* named_to_arg = new_subscript_arg(opts->p_to_arg, names, len, &i); KEEP(named_to_arg->shelter); for (; i < len; ++i) { struct cast_opts col_opts = { .x = r_list_get(x, i), .to = r_list_get(to, i), .p_x_arg = named_x_arg, .p_to_arg = named_to_arg, .call = opts->call, .s3_fallback = opts->s3_fallback }; r_obj* col = vec_cast_opts(&col_opts); r_list_poke(out, i, col); } // Restore data frame size before calling `vec_restore()`. `x` and // `to` might not have any columns to compute the original size. init_data_frame(out, size); r_attrib_poke(out, r_syms.row_names, df_rownames(x)); FREE(3); return out; } // If negative index, value is appended r_obj* df_poke(r_obj* x, r_ssize i, r_obj* value) { if (i >= 0) { r_list_poke(x, i, value); return x; } r_ssize ncol = r_length(x); r_obj* tmp = KEEP(r_resize(x, ncol + 1)); Rf_copyMostAttrib(x, tmp); x = tmp; r_list_poke(x, ncol, value); FREE(1); return x; } r_obj* df_poke_at(r_obj* x, r_obj* name, r_obj* value) { r_obj* names = KEEP(r_names(x)); r_ssize i = r_chr_find(names, name); FREE(1); x = KEEP(df_poke(x, i, value)); if (i < 0) { r_obj* names = KEEP(r_names(x)); r_chr_poke(names, r_length(x) - 1, name); FREE(1); } FREE(1); return x; } static inline r_ssize df_flat_width(r_obj* x) { r_ssize n = r_length(x); r_ssize out = n; r_obj* const * v_x = r_list_cbegin(x); for (r_ssize i = 0; i < n; ++i) { r_obj* col = v_x[i]; if (is_data_frame(col)) { out = out + df_flat_width(col) - 1; } } return out; } struct flatten_info { bool flatten; r_ssize width; }; static inline struct flatten_info df_flatten_info(r_obj* x) { bool flatten = false; r_ssize n = r_length(x); r_ssize width = n; r_obj* const * v_x = r_list_cbegin(x); for (r_ssize i = 0; i < n; ++i) { r_obj* col = v_x[i]; if (is_data_frame(col)) { flatten = true; width = width + df_flat_width(col) - 1; } } return (struct flatten_info){flatten, width}; } // [[ register() ]] r_obj* ffi_df_flatten_info(r_obj* x) { struct flatten_info info = df_flatten_info(x); r_obj* out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, r_lgl(info.flatten)); r_list_poke(out, 1, r_int(info.width)); FREE(1); return out; } // Might return duplicate names. Currently only used for equality // proxy so this doesn't matter. A less bare bone version would repair // names. // // [[ register() ]] r_obj* df_flatten(r_obj* x) { struct flatten_info info = df_flatten_info(x); if (!info.flatten) { return x; } r_obj* out = KEEP(r_alloc_list(info.width)); r_obj* out_names = KEEP(r_alloc_character(info.width)); r_attrib_poke_names(out, out_names); df_flatten_loop(x, out, out_names, 0); init_data_frame(out, df_size(x)); FREE(2); return out; } static r_ssize df_flatten_loop(r_obj* x, r_obj* out, r_obj* out_names, r_ssize counter) { r_ssize n = r_length(x); r_obj* x_names = KEEP(r_names(x)); for (r_ssize i = 0; i < n; ++i) { r_obj* col = r_list_get(x, i); if (is_data_frame(col)) { counter = df_flatten_loop(col, out, out_names, counter); } else { r_list_poke(out, counter, col); r_chr_poke(out_names, counter, r_chr_get(x_names, i)); ++counter; } } FREE(1); return counter; } r_obj* df_repair_names(r_obj* x, struct name_repair_opts* name_repair) { r_obj* nms = KEEP(r_names(x)); r_obj* repaired = KEEP(vec_as_names(nms, name_repair)); // Should this go through proxy and restore so that classes can // update metadata and check invariants when special columns are // renamed? if (nms != repaired) { x = KEEP(r_clone_referenced(x)); r_attrib_poke_names(x, repaired); FREE(1); } FREE(2); return x; } void vctrs_init_type_data_frame(r_obj* ns) { syms_df_lossy_cast = r_sym("df_lossy_cast"); fns_df_lossy_cast = r_eval(syms_df_lossy_cast, ns); } static r_obj* syms_df_lossy_cast = NULL; static r_obj* fns_df_lossy_cast = NULL; vctrs/src/fields.c0000644000176200001440000000704515072256373013634 0ustar liggesusers#include "vctrs.h" // SEXP x and y must be CHARSXP // x_utf* is pointer to const char* which is lazily initialised: // This makes this function also suitable for use when repeated // comparing varying y to constant x bool equal_string(SEXP x, const char** x_utf8, SEXP y) { // Try fast pointer comparison if (x == y) return true; if (*x_utf8 == NULL) *x_utf8 = Rf_translateCharUTF8(x); // Try slower conversion to common encoding const char* y_utf = Rf_translateCharUTF8(y); return (strcmp(y_utf, *x_utf8) == 0); } int find_offset(SEXP x, SEXP index) { if (Rf_length(index) != 1) { Rf_errorcall(R_NilValue, "Invalid index: must have length 1"); } int n = Rf_length(x); if (TYPEOF(index) == INTSXP) { int val = INTEGER(index)[0]; if (val == NA_INTEGER) Rf_errorcall(R_NilValue, "Invalid index: NA_integer_"); val--; if (val < 0 || val >= n) Rf_errorcall(R_NilValue, "Invalid index: out of bounds"); return val; } else if (TYPEOF(index) == REALSXP) { double val = REAL(index)[0]; if (R_IsNA(val)) Rf_errorcall(R_NilValue, "Invalid index: NA_real_"); val--; if (val < 0 || val >= n) Rf_errorcall(R_NilValue, "Invalid index: out of bounds"); if (val > R_LEN_T_MAX) { Rf_errorcall(R_NilValue, "Invalid index: too large"); } return (int) val; } else if (TYPEOF(index) == STRSXP) { SEXP names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); if (names == R_NilValue) Rf_errorcall(R_NilValue, "Corrupt x: no names"); SEXP val_0 = STRING_ELT(index, 0); if (val_0 == NA_STRING) Rf_errorcall(R_NilValue, "Invalid index: NA_character_"); const char* val_0_chr = Rf_translateCharUTF8(val_0); if (val_0_chr[0] == '\0') Rf_errorcall(R_NilValue, "Invalid index: empty string"); for (int j = 0; j < Rf_length(names); ++j) { SEXP name_j = STRING_ELT(names, j); if (name_j == NA_STRING) Rf_errorcall(R_NilValue, "Corrupt x: element %i is unnamed", j + 1); if (equal_string(val_0, &val_0_chr, name_j)) { UNPROTECT(1); return j; } } Rf_errorcall(R_NilValue, "Invalid index: field name '%s' not found", val_0_chr); } else { Rf_errorcall(R_NilValue, "Invalid index: must be a character or numeric vector"); } } // Lists ------------------------------------------------------------------- SEXP vctrs_list_get(SEXP x, SEXP index) { int idx = find_offset(x, index); return VECTOR_ELT(x, idx); } SEXP vctrs_list_set(SEXP x, SEXP index, SEXP value) { int idx = find_offset(x, index); SEXP out = PROTECT(Rf_shallow_duplicate(x)); SET_VECTOR_ELT(out, idx, value); UNPROTECT(1); return out; } // Records ------------------------------------------------------------------ void check_rcrd(SEXP x) { if (!Rf_isVectorList(x)) Rf_errorcall(R_NilValue, "Corrupt rcrd: not a list"); if (Rf_length(x) == 0) Rf_errorcall(R_NilValue, "Corrupt rcrd: length 0"); } SEXP vctrs_fields(SEXP x) { check_rcrd(x); return Rf_getAttrib(x, R_NamesSymbol); } SEXP vctrs_n_fields(SEXP x) { check_rcrd(x); return Rf_ScalarInteger(Rf_length(x)); } SEXP vctrs_field_get(SEXP x, SEXP index) { check_rcrd(x); return vctrs_list_get(x, index); } SEXP vctrs_field_set(SEXP x, SEXP index, SEXP value) { check_rcrd(x); if (!obj_is_vector(value, VCTRS_ALLOW_NULL_no)) { Rf_errorcall(R_NilValue, "Invalid value: not a vector."); } if (vec_size(value) != vec_size(x)) { Rf_errorcall(R_NilValue, "Invalid value: incorrect length."); } return vctrs_list_set(x, index, value); } vctrs/src/list-unchop.c0000644000176200001440000000252115072256373014625 0ustar liggesusers#include "vctrs.h" #include "decl/list-unchop-decl.h" r_obj* ffi_list_unchop( r_obj* x, r_obj* indices, r_obj* ptype, r_obj* name_spec, r_obj* name_repair, r_obj* frame ) { struct r_lazy error_arg_lazy = { .x = r_syms.error_arg, .env = frame }; struct vctrs_arg error_arg = new_lazy_arg(&error_arg_lazy); struct r_lazy error_call = { .x = r_syms.error_call, .env = frame }; struct name_repair_opts name_repair_opts = new_name_repair_opts( name_repair, r_lazy_null, false, error_call ); KEEP(name_repair_opts.shelter); r_obj* out = list_unchop( x, indices, ptype, name_spec, &name_repair_opts, &error_arg, error_call ); FREE(1); return out; } // `list_unchop()` is a thin wrapper around `list_combine()` // with less options, but allows for `indices = NULL` to mean // "sequential" `indices`, i.e. `vec_c()`. // // At the C level, use `vec_c()` directly for simple combinations, // or `list_combine()` for more complex combinations. static r_obj* list_unchop( r_obj* xs, r_obj* indices, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* name_repair, struct vctrs_arg* p_error_arg, struct r_lazy error_call ) { return list_combine_for_list_unchop( xs, indices, ptype, name_spec, name_repair, p_error_arg, error_call ); } vctrs/src/split.c0000644000176200001440000000101514402367170013502 0ustar liggesusers#include "vctrs.h" // [[ register() ]] SEXP vec_split(SEXP x, SEXP by) { if (vec_size(x) != vec_size(by)) { Rf_errorcall(R_NilValue, "`x` and `by` must have the same size."); } SEXP out = PROTECT(vec_group_loc(by)); SEXP indices = VECTOR_ELT(out, 1); SEXP val = vec_chop_unsafe(x, indices, r_null); SET_VECTOR_ELT(out, 1, val); SEXP names = PROTECT(Rf_getAttrib(out, R_NamesSymbol)); SET_STRING_ELT(names, 1, strings_val); Rf_setAttrib(out, R_NamesSymbol, names); UNPROTECT(2); return out; } vctrs/src/type-tibble.h0000644000176200001440000000051515156001116014570 0ustar liggesusers#ifndef VCTRS_TYPE_TIBBLE_H #define VCTRS_TYPE_TIBBLE_H #include "vctrs-core.h" #include "cast.h" #include "ptype2.h" r_obj* tib_ptype2( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ); SEXP tib_cast(const struct cast_opts* opts); #endif vctrs/src/c.h0000644000176200001440000000040515056611175012603 0ustar liggesusers#ifndef VCTRS_C_H #define VCTRS_C_H #include "vctrs-core.h" #include "names.h" r_obj* vec_c( r_obj* xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* name_repair, struct vctrs_arg* p_error_arg, struct r_lazy error_call ); #endif vctrs/src/list-combine.h0000644000176200001440000000375215057550670014761 0ustar liggesusers#ifndef VCTRS_LIST_COMBINE_H #define VCTRS_LIST_COMBINE_H #include "vctrs-core.h" #include "names.h" #include "slice-assign.h" enum list_combine_unmatched { LIST_COMBINE_UNMATCHED_default = 0, LIST_COMBINE_UNMATCHED_error = 1, }; enum list_combine_multiple { LIST_COMBINE_MULTIPLE_last = 0, LIST_COMBINE_MULTIPLE_first = 1, }; r_obj* list_combine( r_obj* xs, r_obj* indices, r_ssize size, r_obj* default_, enum list_combine_unmatched unmatched, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_indices_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call ); // For `list_unchop()` r_obj* list_combine_for_list_unchop( r_obj* xs, r_obj* indices, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ); // For `vec_c()` r_obj* list_combine_for_vec_c( r_obj* xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ); enum list_combine_unmatched parse_list_combine_unmatched(r_obj* unmatched, struct r_lazy error_call); enum list_combine_multiple parse_list_combine_multiple(r_obj* multiple, struct r_lazy error_call); // TODO: Exposed for `bind.c`. Can we remove? bool needs_df_list_combine_common_class_fallback(r_obj* x); // TODO: Exposed for `bind.c`. Can we remove? void df_list_combine_common_class_fallback( r_obj* out, r_obj* xs, bool has_indices, r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, bool has_default, r_obj* default_, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_indices_arg, struct r_lazy error_call ); #endif vctrs/src/order-sortedness.c0000644000176200001440000003327515120272011015651 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "vctrs.h" // ----------------------------------------------------------------------------- static inline int dbl_cmp(double x, double y, enum vctrs_dbl x_type, enum vctrs_dbl y_type, int direction, int na_order, int na_nan_order); /* * Check if a double vector is ordered, handling `decreasing`, `na_last`, and * `nan_distinct`. * * If the double vector is in the expected ordering, no sorting needs to * occur. In these cases, if `p_x` is in exactly the expected ordering. * If `p_x` is in exactly the opposite ordering, the the ordering will later * be reversed (this only happens if it is strictly opposite of expected * ordering, ties would prevent the reversal from being stable). Group * information is also pushed in these cases for use in the next columns. */ enum vctrs_sortedness dbl_sortedness(const double* p_x, r_ssize size, bool decreasing, bool na_last, bool nan_distinct, struct group_infos* p_group_infos) { if (size == 0) { return VCTRS_SORTEDNESS_sorted; } if (size == 1) { groups_size_maybe_push(1, p_group_infos); return VCTRS_SORTEDNESS_sorted; } const int direction = decreasing ? -1 : 1; const int na_order = na_last ? 1 : -1; const int na_nan_order = nan_distinct ? na_order : 0; double previous = p_x[0]; enum vctrs_dbl previous_type = dbl_classify(previous); r_ssize count = 0; // Check for strictly opposite of expected order // (ties are not allowed so we can reverse the vector stably) for (r_ssize i = 1; i < size; ++i, ++count) { double current = p_x[i]; enum vctrs_dbl current_type = dbl_classify(current); int cmp = dbl_cmp( current, previous, current_type, previous_type, direction, na_order, na_nan_order ); if (cmp >= 0) { break; } previous = current; previous_type = current_type; } // Was in strictly opposite of expected order. if (count == size - 1) { // Each group is size 1 since this is strict ordering for (r_ssize j = 0; j < size; ++j) { groups_size_maybe_push(1, p_group_infos); } return VCTRS_SORTEDNESS_reversed; } // Was partially in expected order. Need to sort. if (count != 0) { return VCTRS_SORTEDNESS_unsorted; } // Retain the original `n_groups` to be able to reset the group sizes if // it turns out we don't have expected ordering struct group_info* p_group_info = groups_current(p_group_infos); r_ssize original_n_groups = p_group_info->n_groups; r_ssize group_size = 1; // Check for expected ordering - allowing ties since we don't have to // reverse the ordering. for (r_ssize i = 1; i < size; ++i) { double current = p_x[i]; enum vctrs_dbl current_type = dbl_classify(current); int cmp = dbl_cmp( current, previous, current_type, previous_type, direction, na_order, na_nan_order ); // Not expected ordering if (cmp < 0) { p_group_info->n_groups = original_n_groups; return VCTRS_SORTEDNESS_unsorted; } previous = current; previous_type = current_type; // Continue group run if (cmp == 0) { ++group_size; continue; } // Expected ordering groups_size_maybe_push(group_size, p_group_infos); group_size = 1; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); // Expected ordering return VCTRS_SORTEDNESS_sorted; } static inline int dbl_cmp_numbers(double x, double y, int direction); /* * Compare two doubles, handling `na_order`, `direction`, and `na_nan_order` */ static inline int dbl_cmp(double x, double y, enum vctrs_dbl x_type, enum vctrs_dbl y_type, int direction, int na_order, int na_nan_order) { switch (x_type) { case VCTRS_DBL_number: switch (y_type) { case VCTRS_DBL_number: return dbl_cmp_numbers(x, y, direction); case VCTRS_DBL_missing: return -na_order; case VCTRS_DBL_nan: return -na_order; } case VCTRS_DBL_missing: switch (y_type) { case VCTRS_DBL_number: return na_order; case VCTRS_DBL_missing: return 0; case VCTRS_DBL_nan: return na_nan_order; } case VCTRS_DBL_nan: switch (y_type) { case VCTRS_DBL_number: return na_order; case VCTRS_DBL_missing: return -na_nan_order; case VCTRS_DBL_nan: return 0; } } never_reached("dbl_cmp"); } static inline int dbl_cmp_numbers(double x, double y, int direction) { const int cmp = (x > y) - (x < y); return cmp * direction; } // ----------------------------------------------------------------------------- static inline int int_cmp(int x, int y, const int direction, const int na_order); // Very similar to `dbl_sortedness()` enum vctrs_sortedness int_sortedness(const int* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos) { if (size == 0) { return VCTRS_SORTEDNESS_sorted; } if (size == 1) { groups_size_maybe_push(1, p_group_infos); return VCTRS_SORTEDNESS_sorted; } const int direction = decreasing ? -1 : 1; const int na_order = na_last ? 1 : -1; int previous = p_x[0]; r_ssize count = 0; // Check for strictly opposite of expected order // (ties are not allowed so we can reverse the vector stably) for (r_ssize i = 1; i < size; ++i, ++count) { int current = p_x[i]; int cmp = int_cmp( current, previous, direction, na_order ); if (cmp >= 0) { break; } previous = current; } // Was in strictly opposite of expected order. if (count == size - 1) { // Each group is size 1 since this is strict ordering for (r_ssize j = 0; j < size; ++j) { groups_size_maybe_push(1, p_group_infos); } return VCTRS_SORTEDNESS_reversed; } // Was partially in expected order. Need to sort. if (count != 0) { return VCTRS_SORTEDNESS_unsorted; } // Retain the original `n_groups` to be able to reset the group sizes if // it turns out we don't have expected ordering struct group_info* p_group_info = groups_current(p_group_infos); r_ssize original_n_groups = p_group_info->n_groups; r_ssize group_size = 1; // Check for expected ordering - allowing ties since we don't have to // reverse the ordering. for (r_ssize i = 1; i < size; ++i) { int current = p_x[i]; int cmp = int_cmp( current, previous, direction, na_order ); // Not expected ordering if (cmp < 0) { p_group_info->n_groups = original_n_groups; return VCTRS_SORTEDNESS_unsorted; } previous = current; // Continue group run if (cmp == 0) { ++group_size; continue; } // Expected ordering groups_size_maybe_push(group_size, p_group_infos); group_size = 1; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); // Expected ordering return VCTRS_SORTEDNESS_sorted; } // Very similar to `dbl_cmp()` static inline int int_cmp(int x, int y, const int direction, const int na_order) { if (x == NA_INTEGER) { if (y == NA_INTEGER) { return 0; } else { return na_order; } } if (y == NA_INTEGER) { return -na_order; } int cmp = (x > y) - (x < y); return cmp * direction; } // ----------------------------------------------------------------------------- /* * Check if the data is already in the "expected" ordering as defined by * `decreasing` and `na_last`. If the data is in the expected ordering, or if it * is in the strictly opposite of the expected ordering (with no ties), then * groups are pushed, and a `vctrs_sortedness` value is returned indicating how * to finalize the order. */ enum vctrs_sortedness chr_sortedness(const SEXP* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos) { if (size == 0) { return VCTRS_SORTEDNESS_sorted; } if (size == 1) { groups_size_maybe_push(1, p_group_infos); return VCTRS_SORTEDNESS_sorted; } const int direction = decreasing ? -1 : 1; const int na_order = na_last ? 1 : -1; SEXP previous = p_x[0]; const char* previous_string = CHAR(previous); r_ssize count = 0; // Check for strictly opposite of expected order // (ties are not allowed so we can reverse the vector stably) for (r_ssize i = 1; i < size; ++i, ++count) { SEXP current = p_x[i]; const char* current_string = CHAR(current); int cmp = str_cmp_maybe_na( current, previous, current_string, previous_string, direction, na_order ); if (cmp >= 0) { break; } previous = current; previous_string = current_string; } // Was in strictly opposite of expected order. if (count == size - 1) { // Each group is size 1 since this is strict ordering for (r_ssize j = 0; j < size; ++j) { groups_size_maybe_push(1, p_group_infos); } return VCTRS_SORTEDNESS_reversed; } // Was partially in expected order. Need to sort. if (count != 0) { return VCTRS_SORTEDNESS_unsorted; } // Retain the original `n_groups` to be able to reset the group sizes if // it turns out we don't have expected ordering struct group_info* p_group_info = groups_current(p_group_infos); r_ssize original_n_groups = p_group_info->n_groups; r_ssize group_size = 1; // Check for expected ordering - allowing ties since we don't have to // reverse the ordering. for (r_ssize i = 1; i < size; ++i) { SEXP current = p_x[i]; const char* current_string = CHAR(current); int cmp = str_cmp_maybe_na( current, previous, current_string, previous_string, direction, na_order ); // Not expected ordering if (cmp < 0) { p_group_info->n_groups = original_n_groups; return VCTRS_SORTEDNESS_unsorted; } previous = current; previous_string = current_string; // Continue group run if (cmp == 0) { ++group_size; continue; } // Expected ordering groups_size_maybe_push(group_size, p_group_infos); group_size = 1; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); // Expected ordering return VCTRS_SORTEDNESS_sorted; } // ----------------------------------------------------------------------------- static inline void int_incr(r_ssize size, int* p_x); static inline void ord_reverse(r_ssize size, int* p_o); /* * Resolve ordering based on the sortedness and whether or not `p_o` has * been initialized. For a vector / first column, this function has to * initialize the ordering (for reversed ordering this is faster than * initializing the order sequentially then reversing it). * * `size` will correspond to the size of `x` for the first column, but will * correspond to the size of the current group for subsequent columns. */ void ord_resolve_sortedness(enum vctrs_sortedness sortedness, r_ssize size, int* p_o) { switch (sortedness) { case VCTRS_SORTEDNESS_sorted: int_incr(size, p_o); return; case VCTRS_SORTEDNESS_reversed: ord_reverse(size, p_o); return; case VCTRS_SORTEDNESS_unsorted: Rf_errorcall(R_NilValue, "Internal error: Unsorted case should be handled elsewhere."); } never_reached("ord_resolve_sortedness"); } // Initialize with sequential 1-based ordering static inline void int_incr(r_ssize size, int* p_x) { for (r_ssize i = 0; i < size; ++i) { p_x[i] = i + 1; } } // Used when in strictly opposite of expected order and uninitialized. static inline void ord_reverse(r_ssize size, int* p_o) { const r_ssize half = size / 2; for (r_ssize i = 0; i < half; ++i) { r_ssize swap = size - 1 - i; p_o[i] = swap + 1; p_o[swap] = i + 1; } // Initialize center value if odd number if (size % 2 != 0) { p_o[half] = half + 1; } } static inline void ord_reverse_chunk(r_ssize size, int* p_o); void ord_resolve_sortedness_chunk(enum vctrs_sortedness sortedness, r_ssize size, int* p_o) { switch (sortedness) { case VCTRS_SORTEDNESS_sorted: return; case VCTRS_SORTEDNESS_reversed: ord_reverse_chunk(size, p_o); return; case VCTRS_SORTEDNESS_unsorted: Rf_errorcall(R_NilValue, "Internal error: Unsorted case should be handled elsewhere."); } never_reached("ord_resolve_sortedness_chunk"); } // Used when in strictly opposite of expected order and initialized. // No need to alter "center" value here, it will be initialized to a value // already and it won't be swapped. static inline void ord_reverse_chunk(r_ssize size, int* p_o) { const r_ssize half = size / 2; for (r_ssize i = 0; i < half; ++i) { r_ssize swap = size - 1 - i; const int temp = p_o[i]; p_o[i] = p_o[swap]; p_o[swap] = temp; } } vctrs/src/type-info.c0000644000176200001440000001032115072256373014267 0ustar liggesusers#include "vctrs.h" #include "decl/type-info-decl.h" struct vctrs_proxy_info vec_proxy_info(r_obj* x) { struct vctrs_proxy_info info; // Avoid `KEEP(x_proxy_method)` if not required! This does help with // performance, since this is called in such a tight loop. // // `vec_proxy_method()` itself may also return `r_null` r_obj* x_proxy_method = r_is_object(x) ? vec_proxy_method(x) : r_null; if (x_proxy_method == r_null) { info.inner = x; info.type = vec_base_typeof(x, false); info.had_proxy_method = false; } else { KEEP(x_proxy_method); info.inner = KEEP(vec_proxy_invoke(x, x_proxy_method)); info.type = vec_base_typeof(info.inner, true); info.had_proxy_method = true; FREE(2); } return info; } // Type info of `x` // // Does not take the proxy, so can return `VCTRS_TYPE_s3`, unlike `vec_proxy_info()`. // // [[ register() ]] r_obj* ffi_type_info(r_obj* x) { r_obj* out = KEEP(Rf_mkNamed(R_TYPE_list, (const char*[]) { "type", "had_proxy_method", "" })); const enum vctrs_type type = vec_typeof(x); r_list_poke(out, 0, r_chr(vec_type_as_str(type))); r_list_poke(out, 1, r_lgl(vec_proxy_method(x) != r_null)); FREE(1); return out; } // [[ register() ]] r_obj* ffi_proxy_info(r_obj* x) { struct vctrs_proxy_info info = vec_proxy_info(x); KEEP(info.inner); r_obj* out = KEEP(Rf_mkNamed(R_TYPE_list, (const char*[]) { "type", "had_proxy_method", "proxy", "" })); r_list_poke(out, 0, r_chr(vec_type_as_str(info.type))); r_list_poke(out, 1, r_lgl(info.had_proxy_method)); r_list_poke(out, 2, info.inner); FREE(2); return out; } static enum vctrs_type vec_base_typeof(r_obj* x, bool proxied) { switch (r_typeof(x)) { // Atomic types are always vectors case R_TYPE_null: return VCTRS_TYPE_null; case R_TYPE_logical: return VCTRS_TYPE_logical; case R_TYPE_integer: return VCTRS_TYPE_integer; case R_TYPE_double: return VCTRS_TYPE_double; case R_TYPE_complex: return VCTRS_TYPE_complex; case R_TYPE_character: return VCTRS_TYPE_character; case R_TYPE_raw: return VCTRS_TYPE_raw; case R_TYPE_list: // Bare lists and data frames are vectors if (!r_is_object(x)) return VCTRS_TYPE_list; if (is_data_frame(x)) return VCTRS_TYPE_dataframe; // S3 lists are only vectors if they are proxied if (proxied || r_inherits(x, "list")) return VCTRS_TYPE_list; // fallthrough default: return VCTRS_TYPE_scalar; } } enum vctrs_type vec_proxy_typeof(r_obj* x) { return vec_base_typeof(x, true); } // [[ register() ]] r_obj* vctrs_typeof(r_obj* x, r_obj* dispatch) { enum vctrs_type type; if (r_lgl_get(dispatch, 0)) { type = vec_proxy_info(x).type; } else { type = vec_typeof(x); } return r_chr(vec_type_as_str(type)); } enum vctrs_type vec_typeof(r_obj* x) { // Check for unspecified vectors before `vec_base_typeof()` which // allows vectors of `NA` to pass through as `VCTRS_TYPE_logical` if (vec_is_unspecified(x)) { return VCTRS_TYPE_unspecified; } if (!r_is_object(x) || r_class(x) == r_null) { return vec_base_typeof(x, false); } // Bare data frames are treated as a base atomic type. Subclasses of // data frames are treated as S3 to give them a chance to be proxied // or implement their own methods for cast, type2, etc. if (is_bare_data_frame(x)) { return VCTRS_TYPE_dataframe; } return VCTRS_TYPE_s3; } r_no_return void stop_unimplemented_vctrs_type(const char* fn, enum vctrs_type type) { r_stop_internal("Unsupported vctrs type `%s`.", vec_type_as_str(type)); } const char* vec_type_as_str(enum vctrs_type type) { switch (type) { case VCTRS_TYPE_null: return "null"; case VCTRS_TYPE_unspecified: return "unspecified"; case VCTRS_TYPE_logical: return "logical"; case VCTRS_TYPE_integer: return "integer"; case VCTRS_TYPE_double: return "double"; case VCTRS_TYPE_complex: return "complex"; case VCTRS_TYPE_character: return "character"; case VCTRS_TYPE_raw: return "raw"; case VCTRS_TYPE_list: return "list"; case VCTRS_TYPE_dataframe: return "dataframe"; case VCTRS_TYPE_s3: return "s3"; case VCTRS_TYPE_scalar: return "scalar"; } never_reached("vec_type_as_str"); } void vctrs_init_type_info(r_obj* ns) { } vctrs/src/order-groups.h0000644000176200001440000001241414422506663015015 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_GROUPS_H #define VCTRS_ORDER_GROUPS_H #include "vctrs-core.h" // ----------------------------------------------------------------------------- // This seems to be a reasonable default to start with for tracking group sizes // and is what base R uses. It is expanded by 2x every time we need to // reallocate. It is also capped to the size of `x`. #define GROUP_DATA_SIZE_DEFAULT 100000 // ----------------------------------------------------------------------------- /* * Info related to 1 column / vector worth of groupings * * @member self A RAWSXP for the struct memory. * @member data An integer vector of group sizes. * @member p_data A pointer to `data`. * @member data_pi The protection index for `data` which allows us to * `REPROTECT()` on the fly. * @member data_size The current allocated size of `data`. * @member n_groups The current number of groups seen so far. * Always `<= data_size`. * @member max_group_size The maximum group size seen so far. */ struct group_info { SEXP self; SEXP data; int* p_data; PROTECT_INDEX data_pi; r_ssize data_size; r_ssize n_groups; r_ssize max_group_size; }; #define PROTECT_GROUP_INFO(p_info, p_n) do { \ PROTECT((p_info)->self); \ PROTECT_WITH_INDEX((p_info)->data, &(p_info)->data_pi); \ *(p_n) += 2; \ } while(0) // ----------------------------------------------------------------------------- /* * `group_infos` contains information about 2 `group_info` structs. It contains * a pointer which points to 2 `group_info` pointers. * * For a single atomic vector, `current = 0` is always set and only one of the * structs is ever used. * * For a data frame with multiple columns, after every column `current` is * flipped between 0 and 1, giving us a chance to read the group information * off the previous column (which allows us to chunk the current column into * groups) while also updating the group information of the chunks of * the current one. * * @member self A RAWSXP for the struct memory. * @member p_p_group_info_data A RAWSXP for the p_p_group_info array memory. * @member p_p_group_info A pointer to two `group_info` pointers. * @member max_data_size The maximum data size that can be allocated when * reallocating an individual `p_group_info`. This is set to the size of * `x`. * @member current The current `group_info` pointer we are using. This is * either 0 or 1. * @member force_groups Was group information requested by the user? If so, we * always have to track group information. * @member ignore_groups Should group tracking be ignored? This is the default * for atomic vectors unless groups information is explicitly requested. For * data frames, this is true over all columns except the last one (for * performance) unless `force_groups` is true. */ struct group_infos { SEXP self; SEXP p_p_group_info_data; struct group_info** p_p_group_info; r_ssize max_data_size; int current; bool force_groups; bool ignore_groups; }; #define PROTECT_GROUP_INFOS(p_info, p_n) do { \ PROTECT((p_info)->self); \ PROTECT((p_info)->p_p_group_info_data); \ *(p_n) += 2; \ PROTECT_GROUP_INFO((p_info)->p_p_group_info[0], (p_n)); \ PROTECT_GROUP_INFO((p_info)->p_p_group_info[1], (p_n)); \ } while(0) // ----------------------------------------------------------------------------- struct group_info* new_group_info(void); struct group_infos* new_group_infos(struct group_info* p_group_info0, struct group_info* p_group_info1, r_ssize max_data_size, bool force_groups, bool ignore_groups); void groups_swap(struct group_infos* p_group_infos); // ----------------------------------------------------------------------------- /* * Extract the current `group_info*` */ static inline struct group_info* groups_current(struct group_infos* p_group_infos) { return p_group_infos->p_p_group_info[p_group_infos->current]; } // ----------------------------------------------------------------------------- void groups_size_push(r_ssize size, struct group_infos* p_group_infos); /* * Inline version of `groups_size_push()` that only attempts to push if * we aren't ignoring groups. Important for this to be inline for performance, * especially with atomic vectors where order generally isn't required. */ static inline void groups_size_maybe_push(r_ssize size, struct group_infos* p_group_infos) { if (p_group_infos->ignore_groups) { return; } else { groups_size_push(size, p_group_infos); } } // ----------------------------------------------------------------------------- #endif vctrs/src/globals.c0000644000176200001440000000721715113325071013776 0ustar liggesusers#include "vctrs.h" struct syms syms; struct strings strings; struct chrs chrs; struct fns fns; struct vec_args vec_args; struct lazy_args lazy_args; struct lazy_calls lazy_calls; struct r_dyn_array* globals_shelter = NULL; #define INIT_ARG(ARG) \ static struct vctrs_arg ARG; ARG = new_wrapper_arg(NULL, #ARG); \ vec_args.ARG = &ARG #define INIT_ARG2(ARG, STR) \ static struct vctrs_arg ARG; ARG = new_wrapper_arg(NULL, STR); \ vec_args.ARG = &ARG // Defines both a string and a length 1 character vector #define INIT_STRING(ARG) \ strings.ARG = r_str(#ARG); \ r_dyn_list_push_back(globals_shelter, strings.ARG); \ chrs.ARG = r_chr(#ARG); \ r_dyn_list_push_back(globals_shelter, chrs.ARG); #define INIT_LAZY_ARG(ARG) \ lazy_args.ARG = (struct r_lazy) { .x = r_chr(#ARG), .env = r_null }; \ r_dyn_list_push_back(globals_shelter, lazy_calls.ARG.x) #define INIT_LAZY_ARG_2(ARG, STR) \ lazy_args.ARG = (struct r_lazy) { .x = r_chr(STR), .env = r_null }; \ r_dyn_list_push_back(globals_shelter, lazy_args.ARG.x) #define INIT_CALL(ARG) \ lazy_calls.ARG = (struct r_lazy) { .x = r_parse(#ARG "()"), .env = r_null }; \ r_dyn_list_push_back(globals_shelter, lazy_calls.ARG.x) void vctrs_init_globals(r_obj* ns) { size_t n_strings = sizeof(struct lazy_calls) / sizeof(struct r_lazy); size_t n_lazy_calls = sizeof(struct strings) / sizeof(r_obj*); size_t n_globals = n_strings + n_lazy_calls; globals_shelter = r_new_dyn_vector(R_TYPE_list, n_globals); r_preserve(globals_shelter->shelter); // Symbols ----------------------------------------------------------- syms.arg = r_sym("arg"); syms.condition_arg = r_sym("condition_arg"); syms.conditions_arg = r_sym("conditions_arg"); syms.default_arg = r_sym("default_arg"); syms.dot_arg = r_sym(".arg"); syms.dot_call = r_sym(".call"); syms.dot_error_arg = r_sym(".error_arg"); syms.dot_error_call = r_sym(".error_call"); syms.false_arg = r_sym("false_arg"); syms.from_arg = r_sym("from_arg"); syms.haystack_arg = r_sym("haystack_arg"); syms.missing_arg = r_sym("missing_arg"); syms.indices_arg = r_sym("indices_arg"); syms.needles_arg = r_sym("needles_arg"); syms.recurse = r_sym("recurse"); syms.repair_arg = r_sym("repair_arg"); syms.times_arg = r_sym("times_arg"); syms.to_arg = r_sym("to_arg"); syms.true_arg = r_sym("true_arg"); syms.value_arg = r_sym("value_arg"); syms.values_arg = r_sym("values_arg"); syms.x_arg = r_sym("x_arg"); syms.y_arg = r_sym("y_arg"); // Strings and characters -------------------------------------------- INIT_STRING(AsIs); INIT_STRING(repair); INIT_STRING(location); INIT_STRING(condition); // Args -------------------------------------------------------------- INIT_ARG2(dot_name_repair, ".name_repair"); INIT_ARG2(dot_ptype, ".ptype"); INIT_ARG2(dot_size, ".size"); INIT_ARG2(empty, ""); INIT_ARG(i); INIT_ARG(max_fill); INIT_ARG(n); INIT_ARG(value); INIT_ARG(x); INIT_ARG(y); INIT_ARG(indices); INIT_ARG(sizes); INIT_ARG(ptype); INIT_ARG(size); // Lazy args --------------------------------------------------------- INIT_LAZY_ARG_2(dot_name_repair, ".name_repair"); // Calls ------------------------------------------------------------- INIT_CALL(vec_assign); INIT_CALL(vec_assign_seq); INIT_CALL(vec_init); INIT_CALL(vec_ptype_finalise); INIT_CALL(vec_recycle); INIT_CALL(vec_size); } vctrs/src/proxy-restore.h0000644000176200001440000000122415056611175015223 0ustar liggesusers#ifndef VCTRS_PROXY_RESTORE_H #define VCTRS_PROXY_RESTORE_H #include "vctrs-core.h" struct vec_restore_opts { // The kind of ownership we have over the `proxy` enum vctrs_ownership ownership; // Whether the `proxy` was proxied recursively or not bool recursively_proxied; }; r_obj* vec_restore_opts( r_obj* x, r_obj* to, const struct vec_restore_opts* p_opts ); r_obj* vec_restore_default(r_obj* x, r_obj* to, enum vctrs_ownership ownership); r_obj* vec_df_restore( r_obj* x, r_obj* to, const struct vec_restore_opts* p_opts ); r_obj* vec_bare_df_restore( r_obj* x, r_obj* to, const struct vec_restore_opts* p_opts ); #endif vctrs/src/group.c0000644000176200001440000003044115156537555013525 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/group-decl.h" // [[ register() ]] SEXP vctrs_group_id(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(obj_encode_utf8(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP out = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_out = INTEGER(out); vctrs_group_id_loop(d, n, p_out); SEXP n_groups = PROTECT_N(Rf_ScalarInteger(d->used), &nprot); Rf_setAttrib(out, syms_n, n_groups); UNPROTECT(nprot); return out; } #define VCTRS_GROUP_ID_LOOP(DICT_HASH_SCALAR) \ do { \ int g = 1; \ \ for (int i = 0; i < n; ++i) { \ uint32_t hash = DICT_HASH_SCALAR(d, i); \ R_len_t key = d->key[hash]; \ \ if (key == DICT_EMPTY) { \ dict_put(d, hash, i); \ p_out[i] = g; \ ++g; \ } else { \ p_out[i] = p_out[key]; \ } \ } \ } \ while (0) static inline void vctrs_group_id_loop(struct dictionary* d, R_len_t n, int* p_out) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VCTRS_GROUP_ID_LOOP(nil_dict_hash_scalar); break; case VCTRS_TYPE_logical: VCTRS_GROUP_ID_LOOP(lgl_dict_hash_scalar); break; case VCTRS_TYPE_integer: VCTRS_GROUP_ID_LOOP(int_dict_hash_scalar); break; case VCTRS_TYPE_double: VCTRS_GROUP_ID_LOOP(dbl_dict_hash_scalar); break; case VCTRS_TYPE_complex: VCTRS_GROUP_ID_LOOP(cpl_dict_hash_scalar); break; case VCTRS_TYPE_character: VCTRS_GROUP_ID_LOOP(chr_dict_hash_scalar); break; case VCTRS_TYPE_raw: VCTRS_GROUP_ID_LOOP(raw_dict_hash_scalar); break; case VCTRS_TYPE_list: VCTRS_GROUP_ID_LOOP(list_dict_hash_scalar); break; case VCTRS_TYPE_dataframe: VCTRS_GROUP_ID_LOOP(df_dict_hash_scalar); break; default: stop_unimplemented_vctrs_type("vctrs_group_id_loop", d->p_poly_vec->type); } } #undef VCTRS_GROUP_ID_LOOP // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_group_rle(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(obj_encode_utf8(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP g = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_g = INTEGER(g); SEXP l = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_l = INTEGER(l); if (n == 0) { SEXP out = PROTECT_N(new_group_rle(g, l, 0), &nprot); UNPROTECT(nprot); return out; } const R_len_t size = vctrs_group_rle_loop(d, n, p_g, p_l); g = PROTECT_N(Rf_lengthgets(g, size), &nprot); l = PROTECT_N(Rf_lengthgets(l, size), &nprot); SEXP out = new_group_rle(g, l, d->used); UNPROTECT(nprot); return out; } #define VCTRS_GROUP_RLE_LOOP(DICT_HASH_SCALAR, P_EQUAL_NA_EQUAL) \ do { \ R_len_t loc = 0; \ const void* p_vec = d->p_poly_vec->p_vec; \ \ /* Integer vector that maps `hash` values to locations in `g` */ \ SEXP map = PROTECT(Rf_allocVector(INTSXP, d->size)); \ int* p_map = INTEGER(map); \ \ /* Initialize first value */ \ uint32_t hash = DICT_HASH_SCALAR(d, 0); \ dict_put(d, hash, 0); \ p_map[hash] = 0; \ *p_g = 1; \ *p_l = 1; \ ++loc; \ \ for (R_len_t i = 1; i < n; ++i) { \ if (P_EQUAL_NA_EQUAL(p_vec, i - 1, p_vec, i)) { \ ++(*p_l); \ continue; \ } \ \ ++p_l; \ *p_l = 1; \ \ /* Check if we have seen this value before */ \ uint32_t hash = DICT_HASH_SCALAR(d, i); \ \ if (d->key[hash] == DICT_EMPTY) { \ dict_put(d, hash, i); \ p_map[hash] = loc; \ p_g[loc] = d->used; \ } else { \ p_g[loc] = p_g[p_map[hash]]; \ } \ \ ++loc; \ } \ \ UNPROTECT(1); \ return loc; \ } \ while (0) static inline R_len_t vctrs_group_rle_loop(struct dictionary* d, R_len_t n, int* p_g, int* p_l) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VCTRS_GROUP_RLE_LOOP(nil_dict_hash_scalar, p_nil_equal_na_equal); break; case VCTRS_TYPE_logical: VCTRS_GROUP_RLE_LOOP(lgl_dict_hash_scalar, p_lgl_equal_na_equal); break; case VCTRS_TYPE_integer: VCTRS_GROUP_RLE_LOOP(int_dict_hash_scalar, p_int_equal_na_equal); break; case VCTRS_TYPE_double: VCTRS_GROUP_RLE_LOOP(dbl_dict_hash_scalar, p_dbl_equal_na_equal); break; case VCTRS_TYPE_complex: VCTRS_GROUP_RLE_LOOP(cpl_dict_hash_scalar, p_cpl_equal_na_equal); break; case VCTRS_TYPE_character: VCTRS_GROUP_RLE_LOOP(chr_dict_hash_scalar, p_chr_equal_na_equal); break; case VCTRS_TYPE_raw: VCTRS_GROUP_RLE_LOOP(raw_dict_hash_scalar, p_raw_equal_na_equal); break; case VCTRS_TYPE_list: VCTRS_GROUP_RLE_LOOP(list_dict_hash_scalar, p_list_equal_na_equal); break; case VCTRS_TYPE_dataframe: VCTRS_GROUP_RLE_LOOP(df_dict_hash_scalar, p_df_equal_na_equal); break; default: stop_unimplemented_vctrs_type("vctrs_group_rle_loop", d->p_poly_vec->type); } } #undef VCTRS_GROUP_RLE_LOOP static inline SEXP new_group_rle(SEXP g, SEXP l, R_len_t n) { SEXP out = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(out, 0, g); SET_VECTOR_ELT(out, 1, l); SEXP names = PROTECT(Rf_allocVector(STRSXP, 2)); SET_STRING_ELT(names, 0, strings_group); SET_STRING_ELT(names, 1, strings_length); Rf_setAttrib(out, R_NamesSymbol, names); SEXP n_groups = PROTECT(Rf_ScalarInteger(n)); Rf_setAttrib(out, syms_n, n_groups); Rf_setAttrib(out, R_ClassSymbol, classes_vctrs_group_rle); UNPROTECT(3); return out; } // ----------------------------------------------------------------------------- // [[ include("vctrs.h"); register() ]] SEXP vec_group_loc(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); SEXP proxy = PROTECT_N(vec_proxy_equal(x), &nprot); proxy = PROTECT_N(obj_encode_utf8(proxy), &nprot); struct dictionary* d = new_dictionary(proxy); PROTECT_DICT(d, &nprot); SEXP groups = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_groups = INTEGER(groups); // Identify groups vec_group_loc_loop(d, n, p_groups); const int n_groups = d->used; // Location of first occurence of each group in `x` SEXP key_loc = PROTECT_N(Rf_allocVector(INTSXP, n_groups), &nprot); int* p_key_loc = INTEGER(key_loc); int key_loc_current = 0; // Count of the number of elements in each group SEXP counts = PROTECT_N(Rf_allocVector(INTSXP, n_groups), &nprot); int* p_counts = INTEGER(counts); r_memset(p_counts, 0, n_groups * sizeof(int)); for (int i = 0; i < n; ++i) { const int group = p_groups[i]; if (group == key_loc_current) { p_key_loc[key_loc_current] = i + 1; ++key_loc_current; } ++p_counts[group]; } SEXP out_loc = PROTECT_N(Rf_allocVector(VECSXP, n_groups), &nprot); // Direct pointer to the location vectors we store in `out_loc` int** p_elt_loc = (int**) R_alloc(n_groups, sizeof(int*)); // Initialize `out_loc` to a list of integers with sizes corresponding // to the number of elements in that group for (int i = 0; i < n_groups; ++i) { SEXP elt_loc = Rf_allocVector(INTSXP, p_counts[i]); p_elt_loc[i] = INTEGER(elt_loc); SET_VECTOR_ELT(out_loc, i, elt_loc); } // The current location we are updating, each group has its own counter SEXP locations = PROTECT_N(Rf_allocVector(INTSXP, n_groups), &nprot); int* p_locations = INTEGER(locations); r_memset(p_locations, 0, n_groups * sizeof(int)); // Fill in the location values for each group for (int i = 0; i < n; ++i) { const int group = p_groups[i]; const int location = p_locations[group]; p_elt_loc[group][location] = i + 1; ++p_locations[group]; } SEXP out_key = PROTECT_N(vec_slice(x, key_loc), &nprot); // Construct output data frame SEXP out = PROTECT_N(Rf_allocVector(VECSXP, 2), &nprot); SET_VECTOR_ELT(out, 0, out_key); SET_VECTOR_ELT(out, 1, out_loc); SEXP names = PROTECT_N(Rf_allocVector(STRSXP, 2), &nprot); SET_STRING_ELT(names, 0, strings_key); SET_STRING_ELT(names, 1, strings_loc); Rf_setAttrib(out, R_NamesSymbol, names); out = new_data_frame(out, n_groups); UNPROTECT(nprot); return out; } // This is essentially `vec_group_id()` #define VEC_GROUP_LOC_LOOP(DICT_HASH_SCALAR) \ do { \ int g = 0; \ \ for (int i = 0; i < n; ++i) { \ const uint32_t hash = DICT_HASH_SCALAR(d, i); \ const R_len_t key = d->key[hash]; \ \ if (key == DICT_EMPTY) { \ dict_put(d, hash, i); \ p_groups[i] = g; \ ++g; \ } else { \ p_groups[i] = p_groups[key]; \ } \ } \ } \ while (0) static inline void vec_group_loc_loop(struct dictionary* d, R_len_t n, int* p_groups) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VEC_GROUP_LOC_LOOP(nil_dict_hash_scalar); break; case VCTRS_TYPE_logical: VEC_GROUP_LOC_LOOP(lgl_dict_hash_scalar); break; case VCTRS_TYPE_integer: VEC_GROUP_LOC_LOOP(int_dict_hash_scalar); break; case VCTRS_TYPE_double: VEC_GROUP_LOC_LOOP(dbl_dict_hash_scalar); break; case VCTRS_TYPE_complex: VEC_GROUP_LOC_LOOP(cpl_dict_hash_scalar); break; case VCTRS_TYPE_character: VEC_GROUP_LOC_LOOP(chr_dict_hash_scalar); break; case VCTRS_TYPE_raw: VEC_GROUP_LOC_LOOP(raw_dict_hash_scalar); break; case VCTRS_TYPE_list: VEC_GROUP_LOC_LOOP(list_dict_hash_scalar); break; case VCTRS_TYPE_dataframe: VEC_GROUP_LOC_LOOP(df_dict_hash_scalar); break; default: stop_unimplemented_vctrs_type("vec_group_loc_loop", d->p_poly_vec->type); } } #undef VEC_GROUP_LOC_LOOP vctrs/src/rep.c0000644000176200001440000002377615113325071013151 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/rep-decl.h" r_obj* vec_rep(r_obj* x, int times, struct r_lazy error_call, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_times_arg) { check_rep_times(times, error_call, p_times_arg); if (times == 1) { return x; } const r_ssize times_ = (r_ssize) times; const r_ssize x_size = vec_size(x); if (x_size == 1) { return vec_recycle(x, times_, p_x_arg, error_call); } if (multiply_would_overflow(x_size, times_)) { stop_rep_size_oob(error_call); }; const r_ssize size = x_size * times_; r_obj* subscript = KEEP(r_alloc_integer(size)); int* v_subscript = r_int_begin(subscript); r_ssize k = 0; for (r_ssize i = 0; i < times_; ++i) { for (r_ssize j = 1; j <= x_size; ++j, ++k) { v_subscript[k] = j; } } r_obj* out = vec_slice_unsafe(x, subscript); FREE(1); return out; } r_obj* ffi_vec_rep(r_obj* x, r_obj* ffi_times, r_obj* frame) { struct r_lazy error_call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy times_arg_lazy = { .x = syms.times_arg, .env = frame }; struct vctrs_arg times_arg = new_lazy_arg(×_arg_lazy); ffi_times = KEEP(vec_cast(ffi_times, r_globals.empty_int, ×_arg, vec_args.empty, error_call)); if (vec_size(ffi_times) != 1) { stop_rep_times_size(error_call, ×_arg); } const int times = r_int_get(ffi_times, 0); r_obj* out = vec_rep(x, times, error_call, &x_arg, ×_arg); FREE(1); return out; } // ----------------------------------------------------------------------------- r_obj* vec_rep_each(r_obj* x, r_obj* times, struct r_lazy error_call, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_times_arg) { times = KEEP(vec_cast(times, r_globals.empty_int, p_times_arg, vec_args.empty, error_call)); const r_ssize times_size = vec_size(times); r_obj* out; if (times_size == 1) { const int times_ = r_int_get(times, 0); if (times_ == 1) { out = x; } else if (times_ == 0) { out = vec_slice_unsafe(x, r_globals.empty_int); } else { out = vec_rep_each_uniform(x, times_, error_call, p_times_arg); } } else { out = vec_rep_each_impl(x, times, times_size, error_call, p_times_arg); } FREE(1); return out; } r_obj* ffi_vec_rep_each(r_obj* x, r_obj* times, r_obj* frame) { struct r_lazy error_call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.times_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy times_arg_lazy = { .x = syms.times_arg, .env = frame }; struct vctrs_arg times_arg = new_lazy_arg(×_arg_lazy); return vec_rep_each(x, times, error_call, &x_arg, ×_arg); } // ----------------------------------------------------------------------------- static r_obj* vec_rep_each_uniform(r_obj* x, int times, struct r_lazy error_call, struct vctrs_arg* p_times_arg) { check_rep_each_times(times, 1, error_call, p_times_arg); const r_ssize times_ = (r_ssize) times; const r_ssize x_size = vec_size(x); if (multiply_would_overflow(x_size, times_)) { stop_rep_size_oob(error_call); }; const r_ssize size = x_size * times_; r_obj* subscript = KEEP(r_alloc_integer(size)); int* v_subscript = r_int_begin(subscript); r_ssize k = 0; for (r_ssize i = 1; i <= x_size; ++i) { for (r_ssize j = 0; j < times_; ++j, ++k) { v_subscript[k] = i; } } r_obj* out = vec_slice_unsafe(x, subscript); FREE(1); return out; } static r_obj* vec_rep_each_impl(r_obj* x, r_obj* times, const r_ssize times_size, struct r_lazy error_call, struct vctrs_arg* p_times_arg) { const r_ssize x_size = vec_size(x); if (x_size != times_size) { stop_recycle_incompatible_size(times_size, x_size, p_times_arg, error_call); } const int* v_times = r_int_cbegin(times); r_ssize size = 0; for (r_ssize i = 0; i < times_size; ++i) { const int elt_times = v_times[i]; check_rep_each_times(elt_times, i + 1, error_call, p_times_arg); const r_ssize elt_times_ = (r_ssize) elt_times; if (plus_would_overflow(size, elt_times_)) { stop_rep_size_oob(error_call); } size += elt_times_; } r_obj* subscript = KEEP(r_alloc_integer(size)); int* v_subscript = r_int_begin(subscript); r_ssize k = 0; for (r_ssize i = 1; i <= x_size; ++i) { const r_ssize elt_times = (r_ssize) v_times[i - 1]; for (r_ssize j = 0; j < elt_times; ++j, ++k) { v_subscript[k] = i; } } r_obj* out = vec_slice_unsafe(x, subscript); FREE(1); return out; } // ----------------------------------------------------------------------------- // TODO: Modify for long vectors with `R_XLEN_T_MAX` and `R_xlen_t`. static inline bool times_is_oob(int times) { return times > R_LEN_T_MAX; } // Only useful for positive or zero inputs static inline bool multiply_would_overflow(r_ssize x, r_ssize y) { return (double) x * y > R_LEN_T_MAX; } // Only useful for positive or zero inputs static inline bool plus_would_overflow(r_ssize x, r_ssize y) { return x > R_LEN_T_MAX - y; } // ----------------------------------------------------------------------------- static inline void check_rep_times(int times, struct r_lazy call, struct vctrs_arg* p_times_arg) { if (times < 0) { if (times == r_globals.na_int) { stop_rep_times_missing(call, p_times_arg); } else { stop_rep_times_negative(call, p_times_arg); } } else if (times_is_oob(times)) { stop_rep_times_oob(times, call, p_times_arg); } } static inline void stop_rep_times_negative(struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call(call, "%s must be a positive number.", vec_arg_format(p_times_arg)); } static inline void stop_rep_times_missing(struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call(call, "%s can't be missing.", vec_arg_format(p_times_arg)); } // Not currently thrown since `r_ssize == int`, but might be once // long vectors are supported static inline void stop_rep_times_oob(int times, struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call( call, "%s must be less than %i, not %i.", vec_arg_format(p_times_arg), R_LEN_T_MAX, times ); } // ----------------------------------------------------------------------------- static inline void check_rep_each_times(int times, r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg) { if (times < 0) { if (times == r_globals.na_int) { stop_rep_each_times_missing(i, call, p_times_arg); } else { stop_rep_each_times_negative(i, call, p_times_arg); } } else if (times_is_oob(times)) { stop_rep_each_times_oob(times, i, call, p_times_arg); } } static inline void stop_rep_each_times_negative(r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call(call, "%s must be a vector of positive numbers. Location %i is negative.", vec_arg_format(p_times_arg), i); } static inline void stop_rep_each_times_missing(r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call(call, "%s can't be missing. Location %i is missing.", vec_arg_format(p_times_arg), i); } // Not currently thrown since `r_ssize == int`, but might be once // long vectors are supported static inline void stop_rep_each_times_oob(int times, r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call( call, "%s must be less than %i, not %i. ", "Location %i is too large.", vec_arg_format(p_times_arg), R_LEN_T_MAX, times, i ); } static inline void stop_rep_size_oob(struct r_lazy call) { r_abort_lazy_call( call, "Long vectors are not yet supported. " "Requested output size must be less than %i.", R_LEN_T_MAX ); } static inline void stop_rep_times_size(struct r_lazy call, struct vctrs_arg* p_times_arg) { r_abort_lazy_call(call, "%s must be a single number.", vec_arg_format(p_times_arg)); } // ----------------------------------------------------------------------------- static r_obj* vec_unrep(r_obj* x, struct r_lazy error_call) { r_obj* times = KEEP(vec_run_sizes(x, error_call)); const int* v_times = r_int_cbegin(times); const r_ssize size = r_length(times); r_obj* loc = KEEP(r_alloc_integer(size)); int* v_loc = r_int_begin(loc); r_ssize current = 1; for (r_ssize i = 0; i < size; ++i) { v_loc[i] = current; current += v_times[i]; } r_obj* out = KEEP(r_new_list(2)); r_list_poke(out, 0, vec_slice_unsafe(x, loc)); r_list_poke(out, 1, times); r_obj* names = r_new_character(2); r_attrib_poke_names(out, names); r_chr_poke(names, 0, strings_key); r_chr_poke(names, 1, strings_times); init_data_frame(out, size); FREE(3); return out; } r_obj* ffi_vec_unrep(r_obj* x, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; return vec_unrep(x, error_call); } // ----------------------------------------------------------------------------- void vctrs_init_rep(r_obj* ns) { } vctrs/src/vec-int.h0000644000176200001440000000050615065005761013726 0ustar liggesusers#ifndef VCTRS_VEC_INT_H #define VCTRS_VEC_INT_H #include static inline r_ssize r_int_count_complete(r_obj* x) { const int* v_x = r_int_cbegin(x); const r_ssize size = r_length(x); r_ssize out = 0; for (r_ssize i = 0; i < size; ++i) { out += (v_x[i] != r_globals.na_int); } return out; } #endif vctrs/src/match-joint.h0000644000176200001440000000050614350637775014612 0ustar liggesusers#ifndef VCTRS_MATCH_JOINT_H #define VCTRS_MATCH_JOINT_H #include "vctrs-core.h" r_obj* vec_joint_xtfrm(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, bool nan_distinct, r_obj* chr_proxy_collate); #endif vctrs/src/lazy.h0000644000176200001440000000360515156001116013332 0ustar liggesusers#ifndef VCTRS_LAZY_H #define VCTRS_LAZY_H #include "vctrs-core.h" #include "utils.h" // ----------------------------------------------------------------------------- /* * @member self A RAWSXP for the struct memory. * @member data The RAWSXP that gets allocated lazily. * @member p_data A void pointer to the RAWSXP. * @member data_pi A protection index to `data` so it can reprotect itself * upon allocation. * @member size The total size of the RAWSXP to allocate. * This is computed as `size * n_bytes` in `new_lazy_raw()`, where `n_bytes` * is from `sizeof()`. */ struct lazy_raw { SEXP self; SEXP data; void* p_data; PROTECT_INDEX data_pi; r_ssize size; }; /* * @param size The size of the type you want to interpret the memory as. * @param n_bytes A `sizeof()` result for the type you are allocating * memory for. */ static inline struct lazy_raw* new_lazy_raw(r_ssize size, size_t n_bytes) { SEXP self = PROTECT(r_new_raw(sizeof(struct lazy_raw))); struct lazy_raw* p_out = (struct lazy_raw*) RAW(self); p_out->self = self; p_out->data = R_NilValue; p_out->size = size * n_bytes; UNPROTECT(1); return p_out; } /* * Allocate the lazy vector if it hasn't already been allocated. * This reprotects itself using the protection index. */ static inline void* init_lazy_raw(struct lazy_raw* p_x) { if (p_x->data != R_NilValue) { return p_x->p_data; } p_x->data = Rf_allocVector(RAWSXP, p_x->size); REPROTECT(p_x->data, p_x->data_pi); p_x->p_data = (void*) RAW(p_x->data); return p_x->p_data; } // ----------------------------------------------------------------------------- #define PROTECT_LAZY_VEC(p_info, p_n) do { \ PROTECT((p_info)->self); \ PROTECT_WITH_INDEX((p_info)->data, &(p_info)->data_pi); \ *(p_n) += 2; \ } while (0) #endif vctrs/src/parallel.c0000644000176200001440000002442415113325071014146 0ustar liggesusers#include "parallel.h" #include "assert.h" #include "slice-assign.h" enum vec_parallel_variant { VEC_PARALLEL_VARIANT_all, VEC_PARALLEL_VARIANT_any }; #include "decl/parallel-decl.h" r_obj* ffi_vec_pany(r_obj* ffi_xs, r_obj* ffi_missing, r_obj* ffi_size, r_obj* ffi_frame) { struct r_lazy xs_arg_lazy = { .x = syms.dot_arg, .env = ffi_frame }; struct vctrs_arg xs_arg = new_lazy_arg(&xs_arg_lazy); const struct r_lazy error_call = { .x = syms.dot_error_call, .env = ffi_frame }; const enum vec_parallel_missing missing = parse_vec_parallel_missing(ffi_missing, error_call); const r_ssize size = (ffi_size == r_null) ? -1 : r_arg_as_ssize(ffi_size, ".size"); return vec_pany(ffi_xs, missing, size, &xs_arg, error_call); } r_obj* ffi_vec_pall(r_obj* ffi_xs, r_obj* ffi_missing, r_obj* ffi_size, r_obj* ffi_frame) { struct r_lazy xs_arg_lazy = { .x = syms.dot_arg, .env = ffi_frame }; struct vctrs_arg xs_arg = new_lazy_arg(&xs_arg_lazy); const struct r_lazy error_call = { .x = syms.dot_error_call, .env = ffi_frame }; const enum vec_parallel_missing missing = parse_vec_parallel_missing(ffi_missing, error_call); const r_ssize size = (ffi_size == r_null) ? -1 : r_arg_as_ssize(ffi_size, ".size"); return vec_pall(ffi_xs, missing, size, &xs_arg, error_call); } r_obj* vec_pany( r_obj* xs, enum vec_parallel_missing missing, r_ssize size, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ) { return vec_parallel(xs, missing, size, p_xs_arg, error_call, VEC_PARALLEL_VARIANT_any); } r_obj* vec_pall( r_obj* xs, enum vec_parallel_missing missing, r_ssize size, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ) { return vec_parallel(xs, missing, size, p_xs_arg, error_call, VEC_PARALLEL_VARIANT_all); } static r_obj* vec_parallel( r_obj* xs, enum vec_parallel_missing missing, r_ssize size, struct vctrs_arg* p_xs_arg, struct r_lazy error_call, enum vec_parallel_variant parallel ) { // Input must be a list obj_check_list(xs, p_xs_arg, error_call); // Every element of that list must be a bare logical vector list_check_all_condition_indices(xs, p_xs_arg, error_call); // Every element of that list must be the same size size = compute_size(size, xs); list_check_all_size(xs, size, VCTRS_ALLOW_NULL_no, p_xs_arg, error_call); r_obj* out = KEEP(r_alloc_logical(size)); int* v_out = r_lgl_begin(out); const r_ssize xs_size = r_length(xs); r_obj* const* v_xs = r_list_cbegin(xs); if (xs_size == 0) { // Zero input case is special, fill with values that match `any()` and `all()` switch (parallel) { case VEC_PARALLEL_VARIANT_all: r_p_lgl_fill(v_out, 1, size); break; case VEC_PARALLEL_VARIANT_any: r_p_lgl_fill(v_out, 0, size); break; default: r_stop_unreachable(); } } else { // Initialize output with first input r_obj* x = v_xs[0]; const int* v_x = r_lgl_begin(x); vec_parallel_init(v_x, missing, size, v_out); // Combine with remaining inputs for (r_ssize i = 1; i < xs_size; ++i) { r_obj* x = v_xs[i]; const int* v_x = r_lgl_begin(x); switch (parallel) { case VEC_PARALLEL_VARIANT_all: vec_pall_fill(v_x, missing, size, v_out); break; case VEC_PARALLEL_VARIANT_any: vec_pany_fill(v_x, missing, size, v_out); break; default: r_stop_unreachable(); } } } FREE(1); return out; } // ----------------------------------------------------------------------------- // Same, regardless of variant static inline void vec_parallel_init(const int* v_x, enum vec_parallel_missing missing, r_ssize size, int* v_out) { switch (missing) { case VEC_PARALLEL_MISSING_na: vec_parallel_init_missing_as_na(v_x, size, v_out); break; case VEC_PARALLEL_MISSING_false: vec_parallel_init_missing_as_false(v_x, size, v_out); break; case VEC_PARALLEL_MISSING_true: vec_parallel_init_missing_as_true(v_x, size, v_out); break; default: r_stop_unreachable(); } } // Propagates `NA` static inline void vec_parallel_init_missing_as_na(const int* v_x, r_ssize size, int* v_out) { r_memcpy(v_out, v_x, sizeof(*v_out) * size); } // Turns `NA` into `FALSE` static inline void vec_parallel_init_missing_as_false(const int* v_x, r_ssize size, int* v_out) { for (r_ssize i = 0; i < size; ++i) { const int elt = v_x[i]; v_out[i] = (elt != r_globals.na_lgl) * elt; } } // Turns `NA` into `TRUE` static inline void vec_parallel_init_missing_as_true(const int* v_x, r_ssize size, int* v_out) { for (r_ssize i = 0; i < size; ++i) { v_out[i] = (bool) v_x[i]; } } // ----------------------------------------------------------------------------- static inline void vec_pany_fill(const int* v_x, enum vec_parallel_missing missing, r_ssize size, int* v_out) { switch (missing) { case VEC_PARALLEL_MISSING_na: vec_pany_fill_missing_as_na(v_x, size, v_out); break; case VEC_PARALLEL_MISSING_false: vec_pany_fill_missing_as_false(v_x, size, v_out); break; case VEC_PARALLEL_MISSING_true: vec_pany_fill_missing_as_true(v_x, size, v_out); break; default: r_stop_unreachable(); } } static inline void vec_pall_fill(const int* v_x, enum vec_parallel_missing missing, r_ssize size, int* v_out) { switch (missing) { case VEC_PARALLEL_MISSING_na: vec_pall_fill_missing_as_na(v_x, size, v_out); break; case VEC_PARALLEL_MISSING_false: vec_pall_fill_missing_as_false(v_x, size, v_out); break; case VEC_PARALLEL_MISSING_true: vec_pall_fill_missing_as_true(v_x, size, v_out); break; default: r_stop_unreachable(); } } /* * Each of these implementations has been highly optimized to be completely * branchless. Additionally, we are careful to ensure that the access of both * `v_out[i]` and `v_x[i]` is mandatory at each iteration rather than * conditional (i.e. `v_out[i] && v_x[i]` vs `elt_out && elt_x`). Conditional * access of `v_x[i]` in particular can destroy performance here, as it prevents * the compiler from heavily optimizing the actual computation. * * Additionally, the implementations of pall/pany have been designed to be as * symmetrical as possible to increase code clarity. For example, * `vec_pall_fill_*()` and `vec_pany_fill_*()` are symmetrical. * * A nice property of these implementations is that they don't rely on * assumptions about two's complement, bitwise operations, or the underlying * value of `NA_LOGICAL` in any way, making them as portable as possible. */ /* * F || F == F * F || T == T * F || N == N * * T || F == T * T || T == T * T || N == T * * N || F == N * N || T == T * N || N == N */ static inline void vec_pany_fill_missing_as_na(const int* v_x, r_ssize size, int* v_out) { for (r_ssize i = 0; i < size; ++i) { const int elt_out = v_out[i]; const int elt_x = v_x[i]; const bool any_true = (elt_out == 1) || (elt_x == 1); const bool equal = elt_out == elt_x; v_out[i] = any_true + !any_true * (equal * elt_out + !equal * r_globals.na_lgl); } } /* * F && F == F * F && T == F * F && N == F * * T && F == F * T && T == T * T && N == N * * N && F == F * N && T == N * N && N == N */ static inline void vec_pall_fill_missing_as_na(const int* v_x, r_ssize size, int* v_out) { for (r_ssize i = 0; i < size; ++i) { const int elt_out = v_out[i]; const int elt_x = v_x[i]; const bool any_false = !elt_out || !elt_x; const bool equal = elt_out == elt_x; v_out[i] = !any_false * (equal * elt_out + !equal * r_globals.na_lgl); } } /* * Never need to worry about `N || *`, because the initialization loop * turns the first input's `N`s into `F`s. * * Treat `N == F` * * F || F == F * F || T == T * F || N == F * * T || F == T * T || T == T * T || N == T */ static inline void vec_pany_fill_missing_as_false(const int* v_x, r_ssize size, int* v_out) { for (r_ssize i = 0; i < size; ++i) { const int elt_out = v_out[i]; const int elt_x = v_x[i]; v_out[i] = elt_out || (elt_x == 1); } } /* * Never need to worry about `N && *`, because the initialization loop * turns the first input's `N`s into `F`s. * * Treat `N == F` * * F && F == F * F && T == F * F && N == F * * T && F == F * T && T == T * T && N == F */ static inline void vec_pall_fill_missing_as_false(const int* v_x, r_ssize size, int* v_out) { for (r_ssize i = 0; i < size; ++i) { const int elt_out = v_out[i]; const int elt_x = v_x[i]; v_out[i] = elt_out && (elt_x == 1); } } /* * Never need to worry about `N || *`, because the initialization loop * turns the first input's `N`s into `T`s. * * Treat `N == T` * * F || F == F * F || T == T * F || N == T * * T || F == T * T || T == T * T || N == T */ static inline void vec_pany_fill_missing_as_true(const int* v_x, r_ssize size, int* v_out) { for (r_ssize i = 0; i < size; ++i) { const int elt_out = v_out[i]; const int elt_x = v_x[i]; v_out[i] = elt_out || elt_x; } } /* * Never need to worry about `N && *`, because the initialization loop * turns the first input's `N`s into `T`s. * * Treat `N == T` * * F && F == F * F && T == F * F && N == F * * T && F == F * T && T == T * T && N == T */ static inline void vec_pall_fill_missing_as_true(const int* v_x, r_ssize size, int* v_out) { for (r_ssize i = 0; i < size; ++i) { const int elt_out = v_out[i]; const int elt_x = v_x[i]; v_out[i] = elt_out && elt_x; } } // ----------------------------------------------------------------------------- static bool r_is_scalar_logical(r_obj* x) { return r_typeof(x) == R_TYPE_logical && r_length(x) == 1; } static enum vec_parallel_missing parse_vec_parallel_missing(r_obj* missing, struct r_lazy error_call) { if (!r_is_scalar_logical(missing)) { r_abort_lazy_call(error_call, "`.missing` must be `NA`, `FALSE`, or `TRUE`."); } const int c_missing = r_lgl_get(missing, 0); if (c_missing == r_globals.na_lgl) { return VEC_PARALLEL_MISSING_na; } else if (c_missing == 0) { return VEC_PARALLEL_MISSING_false; } else if (c_missing == 1) { return VEC_PARALLEL_MISSING_true; } else { r_stop_internal("Unexpected `missing` value, %i.", c_missing); } } // Figure out the output size // - `size` if supplied // - Size of 1st `conditions` element if one exists // - Size 0 if `conditions` is empty static r_ssize compute_size(r_ssize size, r_obj* xs) { if (size != -1) { return size; } if (r_length(xs) == 0) { return 0; } return r_length(r_list_get(xs, 0)); } vctrs/src/bind.c0000644000176200001440000005200215157322033013262 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/bind-decl.h" enum names_to_style { // Outer names are zapped NAMES_TO_STYLE_zap, // Outer names are promoted to a column. Indices are used if no names are provided. NAMES_TO_STYLE_column, // Outer names are merged with inner names subject to `.name_spec` NAMES_TO_STYLE_name_spec }; // [[ register(external = TRUE) ]] r_obj* ffi_rbind(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* frame) { args = r_node_cdr(args); r_obj* xs = r_node_car(args); args = r_node_cdr(args); r_obj* ptype = r_node_car(args); args = r_node_cdr(args); r_obj* names_to = r_node_car(args); args = r_node_cdr(args); r_obj* name_repair = r_node_car(args); args = r_node_cdr(args); r_obj* name_spec = r_node_car(args); struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; struct name_repair_opts name_repair_opts = validate_bind_name_repair( name_repair, false ); KEEP(name_repair_opts.shelter); name_repair_opts.call = error_call; r_obj* out = vec_rbind( xs, ptype, names_to, &name_repair_opts, name_spec, error_call ); FREE(1); return out; } static r_obj* vec_rbind(r_obj* xs, r_obj* ptype, r_obj* names_to, struct name_repair_opts* name_repair, r_obj* name_spec, struct r_lazy error_call) { // In case `.arg` is added later on struct vctrs_arg* p_arg = vec_args.empty; int n_prot = 0; const r_ssize xs_size = r_length(xs); // We need to own `xs`, we are going to modify it via `as_df_row()` xs = KEEP_N(r_clone_referenced(xs), &n_prot); for (r_ssize i = 0; i < xs_size; ++i) { r_obj* elt = r_list_get(xs, i); r_list_poke(xs, i, as_df_row(elt, name_repair, error_call)); } // The common type holds information about common column names, // types, etc. Each element of `xs` needs to be cast to that type // before assignment. ptype = vec_ptype_common( xs, ptype, PTYPE_FINALISE_DEFAULT, S3_FALLBACK_true, p_arg, error_call ); KEEP_N(ptype, &n_prot); if (ptype == r_null) { FREE(n_prot); return new_data_frame(r_globals.empty_list, 0); } if (r_typeof(ptype) == R_TYPE_logical && r_length(ptype) == 0) { ptype = as_df_row_impl( vctrs_shared_missing_lgl, name_repair, error_call ); KEEP_N(ptype, &n_prot); } if (!is_data_frame(ptype)) { r_abort_lazy_call(error_call, "Can't bind objects that are not coercible to a data frame."); } bool assign_names = !r_inherits(name_spec, "rlang_zap"); enum names_to_style names_to_style; if (names_to == r_null) { names_to_style = NAMES_TO_STYLE_name_spec; } else if (r_inherits(names_to, "rlang_zap")) { names_to_style = NAMES_TO_STYLE_zap; } else if (r_is_string(names_to)) { names_to_style = NAMES_TO_STYLE_column; } else { r_abort_lazy_call( error_call, "%s must be `NULL`, a string, or an `rlang::zap()` object.", r_c_str_format_error_arg(".names_to") ); } r_ssize names_to_loc = 0; if (names_to_style == NAMES_TO_STYLE_column) { // Add that column name to `ptype` names_to = r_chr_get(names_to, 0); r_obj* ptype_nms = KEEP(r_names(ptype)); names_to_loc = r_chr_find(ptype_nms, names_to); FREE(1); if (names_to_loc < 0) { ptype = cbind_names_to( r_names(xs) != r_null, names_to, ptype, error_call ); KEEP_N(ptype, &n_prot); names_to_loc = 0; } } // Must happen after the `names_to` column has been added to `ptype` xs = vec_cast_common_params( xs, ptype, S3_FALLBACK_true, vec_args.empty, error_call ); KEEP_N(xs, &n_prot); // Find individual input sizes and total size of output r_ssize n_rows = 0; r_obj* sizes_shelter = KEEP_N(r_alloc_integer(xs_size), &n_prot); int* sizes = r_int_begin(sizes_shelter); for (r_ssize i = 0; i < xs_size; ++i) { r_obj* elt = r_list_get(xs, i); r_ssize size = (elt == r_null) ? 0 : vec_size(elt); n_rows += size; sizes[i] = size; } r_obj* proxy = KEEP_N(vec_proxy_recurse(ptype), &n_prot); if (!is_data_frame(proxy)) { r_abort_lazy_call(error_call, "Can't fill a data frame that doesn't have a data frame proxy."); } r_keep_loc out_pi; r_obj* out = vec_init(proxy, n_rows); KEEP_HERE(out, &out_pi); ++n_prot; // - We own the `out` container // - We own the `out` columns recursively // - We call `vec_proxy_recurse()` so must restore recursively const struct vec_restore_opts bind_restore_opts = { .ownership = VCTRS_OWNERSHIP_deep, .recursively_proxied = true }; const struct vec_proxy_assign_opts bind_proxy_assign_opts = { .ownership = VCTRS_OWNERSHIP_deep, .recursively_proxied = true, .slice_value = ASSIGNMENT_SLICE_VALUE_no, .index_style = VCTRS_INDEX_STYLE_location, .assign_names = assign_names, // Unlike in `vec_c()` we don't need to ignore outer names because // `df_assign()` doesn't deal with those .ignore_outer_names = false }; r_obj* loc = KEEP_N(compact_seq(0, 0, true), &n_prot); int* p_loc = r_int_begin(loc); r_obj* row_names = r_null; r_keep_loc rownames_pi; KEEP_HERE(row_names, &rownames_pi); ++n_prot; const void* p_names_to_index = NULL; r_obj* names_to_out = r_null; void* p_names_to_out = NULL; enum r_type names_to_type = 99; // Set by the switch r_obj* xs_names; bool xs_is_named; r_obj* const* p_xs_names; switch (names_to_style) { case NAMES_TO_STYLE_zap: { xs_names = r_null; xs_is_named = false; p_xs_names = NULL; break; } case NAMES_TO_STYLE_column: { xs_names = r_null; xs_is_named = false; p_xs_names = NULL; r_obj* names_to_index = r_null; if (r_names(xs) == r_null) { names_to_index = KEEP_N(r_alloc_integer(xs_size), &n_prot); r_int_fill_seq(names_to_index, 1, xs_size); } else { names_to_index = KEEP_N(r_names(xs), &n_prot); } names_to_type = r_typeof(names_to_index); names_to_out = KEEP_N(r_alloc_vector(names_to_type, n_rows), &n_prot); p_names_to_index = r_vec_deref_barrier_const(names_to_index); p_names_to_out = r_vec_deref_barrier(names_to_out); break; } case NAMES_TO_STYLE_name_spec: { xs_names = KEEP_N(r_names(xs), &n_prot); xs_is_named = xs_names != r_null; p_xs_names = xs_is_named ? r_chr_cbegin(xs_names) : NULL; break; } default: { r_stop_unreachable(); } } // Compact sequences use 0-based counters r_ssize counter = 0; for (r_ssize i = 0; i < xs_size; ++i) { const r_ssize size = sizes[i]; if (!size) { continue; } r_obj* x = r_list_get(xs, i); // Update `loc` to assign within `out[counter:counter + size, ]` init_compact_seq(p_loc, counter, size, true); // Total ownership of `out` because it was freshly created with `vec_init()` out = df_assign(out, loc, x, &bind_proxy_assign_opts); KEEP_AT(out, out_pi); if (assign_names) { r_obj* outer = xs_is_named ? p_xs_names[i] : r_null; r_obj* inner = KEEP(vec_names(x)); r_obj* x_nms = KEEP(apply_name_spec(name_spec, outer, inner, size)); if (x_nms != r_null) { R_LAZY_ALLOC(row_names, rownames_pi, R_TYPE_character, n_rows); // If there is no name to assign, skip the assignment since // `out_names` already contains empty strings if (inner != chrs_empty) { row_names = chr_assign( row_names, loc, x_nms, VCTRS_OWNERSHIP_deep, ASSIGNMENT_SLICE_VALUE_no, VCTRS_INDEX_STYLE_location ); KEEP_AT(row_names, rownames_pi); } } FREE(2); } // Assign current name to group vector, if supplied if (names_to_style == NAMES_TO_STYLE_column) { r_vec_fill(names_to_type, p_names_to_out, counter, p_names_to_index, i, size); } counter += size; } if (row_names != r_null) { r_attrib_poke(out, r_syms.row_names, row_names); } if (needs_df_list_combine_common_class_fallback(out)) { // We take the common `ptype` with `S3_FALLBACK_true`, so we // may have common class fallback columns that we need to sequentially // combine (i.e. with a fallback `vec_c()` style operation). // This is not ideal! const bool has_indices = false; r_obj* indices = r_null; const enum vctrs_index_style indices_style = VCTRS_INDEX_STYLE_location; const bool has_default = false; r_obj* default_ = r_null; struct vctrs_arg* p_indices_arg = vec_args.empty; const enum list_combine_multiple multiple = LIST_COMBINE_MULTIPLE_last; const enum assignment_slice_value slice_xs = ASSIGNMENT_SLICE_VALUE_no; df_list_combine_common_class_fallback( out, xs, has_indices, indices, indices_style, n_rows, has_default, default_, multiple, slice_xs, ptype, name_spec, name_repair, p_indices_arg, error_call ); } out = vec_restore_opts(out, ptype, &bind_restore_opts); KEEP_AT(out, out_pi); if (names_to_style == NAMES_TO_STYLE_column) { out = df_poke(out, names_to_loc, names_to_out); KEEP_AT(out, out_pi); } FREE(n_prot); return out; } static r_obj* as_df_row(r_obj* x, struct name_repair_opts* name_repair, struct r_lazy error_call) { if (vec_is_unspecified(x) && r_names(x) == r_null) { return x; } else { return as_df_row_impl(x, name_repair, error_call); } } static r_obj* as_df_row_impl(r_obj* x, struct name_repair_opts* name_repair, struct r_lazy error_call) { if (x == r_null) { return x; } if (is_data_frame(x)) { return df_repair_names(x, name_repair); } int nprot = 0; r_obj* dim = KEEP_N(vec_bare_dim(x), &nprot); r_ssize ndim = (dim == r_null) ? 1 : r_length(dim); if (ndim > 2) { r_abort_lazy_call(error_call, "Can't bind arrays."); } if (ndim == 2) { r_obj* out = KEEP_N(r_as_data_frame(x), &nprot); r_attrib_poke_names(out, vec_as_names(KEEP_N(colnames2(x), &nprot), name_repair)); FREE(nprot); return out; } // Take names before removing dimensions so we get colnames if needed r_obj* nms = KEEP_N(vec_names2(x), &nprot); nms = KEEP_N(vec_as_names(nms, name_repair), &nprot); if (dim != r_null) { x = KEEP_N(r_clone_referenced(x), &nprot); r_attrib_poke(x, r_syms.dim, r_null); r_attrib_poke(x, r_syms.dim_names, r_null); } // Remove names first as they are promoted to data frame column names. // Can be a user side object, so use `VCTRS_OWNERSHIP_foreign`. x = KEEP_N(vec_set_names(x, r_null, VCTRS_OWNERSHIP_foreign), &nprot); x = KEEP_N(vec_chop_unsafe(x, r_null, r_null), &nprot); r_attrib_poke_names(x, nms); x = new_data_frame(x, 1); FREE(nprot); return x; } // [[ register() ]] r_obj* ffi_as_df_row(r_obj* x, r_obj* quiet, r_obj* frame) { struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_unique, .fn = r_null, .quiet = r_lgl_get(quiet, 0) }; struct r_lazy error_call = { .x = frame, .env = r_null }; return as_df_row(x, &name_repair_opts, error_call); } static r_obj* cbind_names_to(bool has_names, r_obj* names_to, r_obj* ptype, struct r_lazy error_call) { r_obj* index_ptype = has_names ? r_globals.empty_chr : r_globals.empty_int; r_obj* tmp = KEEP(r_alloc_list(2)); r_list_poke(tmp, 0, index_ptype); r_list_poke(tmp, 1, ptype); r_obj* tmp_nms = KEEP(r_alloc_character(2)); r_chr_poke(tmp_nms, 0, names_to); r_chr_poke(tmp_nms, 1, strings_empty); r_attrib_poke_names(tmp, tmp_nms); r_obj* out = vec_cbind(tmp, r_null, r_null, NULL, error_call); FREE(2); return out; } // [[ register(external = TRUE) ]] r_obj* ffi_cbind(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* frame) { args = r_node_cdr(args); r_obj* xs = r_node_car(args); args = r_node_cdr(args); r_obj* ptype = r_node_car(args); args = r_node_cdr(args); r_obj* size = r_node_car(args); args = r_node_cdr(args); r_obj* name_repair = r_node_car(args); struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; struct name_repair_opts name_repair_opts = validate_bind_name_repair( name_repair, true ); KEEP(name_repair_opts.shelter); name_repair_opts.call = error_call; r_obj* out = vec_cbind(xs, ptype, size, &name_repair_opts, error_call); FREE(1); return out; } static r_obj* vec_cbind(r_obj* xs, r_obj* ptype, r_obj* size, struct name_repair_opts* name_repair, struct r_lazy error_call) { // In case `.arg` is added later on struct vctrs_arg* p_arg = vec_args.empty; r_ssize n = r_length(xs); // Will contain two things at different points: // - The list of container types we compute the ptype from // - The list of `xs` converted to data frames / validated // that we then cbind together r_obj* xs_data_frames = KEEP(r_alloc_list(n)); // Find the common container type of inputs, `rownames` is learned // on the fly to be the first `x` we encounter that is a data frame // with character row names (it does not need protection because // it is an attribute on `x`) r_obj* rownames = r_null; for (r_ssize i = 0; i < n; ++i) { r_obj* x = r_list_get(xs, i); r_list_poke(xs_data_frames, i, cbind_container_type(x, &rownames)); } ptype = KEEP(cbind_container_type(ptype, &rownames)); r_obj* type = KEEP(vec_ptype_common( xs_data_frames, ptype, PTYPE_FINALISE_DEFAULT, S3_FALLBACK_false, p_arg, error_call )); if (type == r_null) { type = new_data_frame(r_globals.empty_list, 0); } else if (!is_data_frame(type)) { type = r_as_data_frame(type); } FREE(1); KEEP(type); r_ssize nrow; if (size == r_null) { nrow = vec_size_common(xs, 0, p_arg, error_call); } else { nrow = vec_as_short_length(size, vec_args.dot_size, error_call); } if (rownames != r_null && r_length(rownames) != nrow) { rownames = KEEP(vec_recycle(rownames, nrow, vec_args.empty, error_call)); rownames = vec_as_unique_names(rownames, false); FREE(1); } KEEP(rownames); // Convert inputs to data frames, validate, and collect total number of columns. // Converted inputs are stored in `xs_data_frames` so we can reuse that list // rather than cloning `xs`. r_keep_loc xs_names_pi; r_obj* xs_names = r_names(xs); KEEP_HERE(xs_names, &xs_names_pi); const bool has_xs_names = xs_names != r_null; r_obj* const* v_xs_names = has_xs_names ? r_chr_cbegin(xs_names) : NULL; // We don't own `xs_names` so if we have to modify it, then we need to // clone it first bool has_cloned_xs_names = false; r_ssize ncol = 0; for (r_ssize i = 0; i < n; ++i) { r_obj* x = r_list_get(xs, i); if (x == r_null) { r_list_poke(xs_data_frames, i, r_null); continue; } x = KEEP(vec_recycle(x, nrow, vec_args.empty, r_lazy_null)); r_obj* outer_name = has_xs_names ? v_xs_names[i] : strings_empty; bool allow_packing; x = KEEP(as_df_col(x, outer_name, &allow_packing, error_call)); // Remove outer name of column vectors because they shouldn't be repacked if (has_xs_names && !allow_packing) { if (!has_cloned_xs_names) { has_cloned_xs_names = true; xs_names = r_clone_referenced(xs_names); KEEP_AT(xs_names, xs_names_pi); v_xs_names = r_chr_cbegin(xs_names); } r_chr_poke(xs_names, i, strings_empty); } r_list_poke(xs_data_frames, i, x); FREE(2); // Named inputs are packed in a single column r_ssize x_ncol = outer_name == strings_empty ? r_length(x) : 1; ncol += x_ncol; } // Fill in columns r_keep_loc out_pi; r_obj* out = r_alloc_list(ncol); KEEP_HERE(out, &out_pi); init_data_frame(out, nrow); // We own the data frame list, but not the columns! const enum vctrs_ownership cbind_out_ownership = VCTRS_OWNERSHIP_shallow; // We restore this bare data frame to `type`. // Not restoring recursively, we don't proxy the columns and we // don't own them. const struct vec_restore_opts cbind_restore_opts = { .ownership = cbind_out_ownership, .recursively_proxied = false }; r_keep_loc names_pi; r_obj* names = r_alloc_character(ncol); KEEP_HERE(names, &names_pi); r_obj* idx = KEEP(compact_seq(0, 0, true)); int* idx_ptr = r_int_begin(idx); r_ssize counter = 0; for (r_ssize i = 0; i < n; ++i) { r_obj* x = r_list_get(xs_data_frames, i); if (x == r_null) { continue; } r_obj* outer_name = has_xs_names ? v_xs_names[i] : strings_empty; if (outer_name != strings_empty) { r_list_poke(out, counter, x); r_chr_poke(names, counter, outer_name); ++counter; continue; } r_ssize xn = r_length(x); init_compact_seq(idx_ptr, counter, xn, true); out = list_assign( out, idx, x, cbind_out_ownership, ASSIGNMENT_SLICE_VALUE_no, VCTRS_INDEX_STYLE_location ); KEEP_AT(out, out_pi); r_obj* xnms = KEEP(r_names(x)); if (xnms != r_null) { names = chr_assign( names, idx, xnms, VCTRS_OWNERSHIP_deep, ASSIGNMENT_SLICE_VALUE_no, VCTRS_INDEX_STYLE_location ); KEEP_AT(names, names_pi); } FREE(1); counter += xn; } names = KEEP(vec_as_names(names, name_repair)); r_attrib_poke(out, r_syms.names, names); if (rownames != r_null) { r_attrib_poke(out, r_syms.row_names, rownames); } out = vec_restore_opts(out, type, &cbind_restore_opts); FREE(9); return out; } r_obj* vec_cbind_frame_ptype(r_obj* x) { return vctrs_dispatch1(syms_vec_cbind_frame_ptype, fns_vec_cbind_frame_ptype, syms_x, x); } static r_obj* cbind_container_type(r_obj* x, void* data) { if (is_data_frame(x)) { r_obj* rn = df_rownames(x); if (rownames_type(rn) == ROWNAMES_TYPE_identifiers) { r_obj** learned_rn_p = (r_obj**) data; r_obj* learned_rn = *learned_rn_p; if (learned_rn == r_null) { *learned_rn_p = rn; } } return vec_cbind_frame_ptype(x); } else { return r_null; } } // [[ register() ]] r_obj* ffi_as_df_col(r_obj* x, r_obj* outer, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; bool allow_pack; return as_df_col(x, r_chr_get(outer, 0), &allow_pack, error_call); } static r_obj* as_df_col(r_obj* x, r_obj* outer, bool* allow_pack, struct r_lazy error_call) { if (is_data_frame(x)) { *allow_pack = true; return r_clone(x); } r_ssize ndim = vec_bare_dim_n(x); if (ndim > 2) { r_abort_lazy_call(error_call, "Can't bind arrays."); } if (ndim > 0) { *allow_pack = true; return shaped_as_df_col(x, outer); } *allow_pack = false; return vec_as_df_col(x, outer); } static r_obj* shaped_as_df_col(r_obj* x, r_obj* outer) { // If packed, store array as a column if (outer != strings_empty) { return x; } // If unpacked, transform to data frame first. We repair names // after unpacking and concatenation. r_obj* out = KEEP(r_as_data_frame(x)); // Remove names if they were repaired by `as.data.frame()` if (colnames(x) == r_null) { r_attrib_poke_names(out, r_null); } FREE(1); return out; } static r_obj* vec_as_df_col(r_obj* x, r_obj* outer) { r_obj* out = KEEP(r_alloc_list(1)); r_list_poke(out, 0, x); if (outer != strings_empty) { r_obj* names = KEEP(r_str_as_character(outer)); r_attrib_poke_names(out, names); FREE(1); } init_data_frame(out, r_length(x)); FREE(1); return out; } static struct name_repair_opts validate_bind_name_repair(r_obj* name_repair, bool allow_minimal) { struct name_repair_opts opts = new_name_repair_opts(name_repair, r_lazy_null, false, r_lazy_null); switch (opts.type) { case NAME_REPAIR_custom: case NAME_REPAIR_unique: case NAME_REPAIR_universal: case NAME_REPAIR_check_unique: break; case NAME_REPAIR_minimal: if (allow_minimal) break; // else fallthrough default: if (allow_minimal) { r_abort_call(r_null, "`.name_repair` can't be `\"%s\"`.\n" "It must be one of `\"unique\"`, `\"universal\"`, `\"check_unique\"`, or `\"minimal\"`.", name_repair_arg_as_c_string(opts.type)); } else { r_abort_call(r_null, "`.name_repair` can't be `\"%s\"`.\n" "It must be one of `\"unique\"`, `\"universal\"`, or `\"check_unique\"`.", name_repair_arg_as_c_string(opts.type)); } } return opts; } void vctrs_init_bind(r_obj* ns) { syms_vec_cbind_frame_ptype = r_sym("vec_cbind_frame_ptype"); fns_vec_cbind_frame_ptype = r_env_get(ns, syms_vec_cbind_frame_ptype); } static r_obj* syms_vec_cbind_frame_ptype = NULL; static r_obj* fns_vec_cbind_frame_ptype = NULL; vctrs/src/runs.c0000644000176200001440000003353415156537555013366 0ustar liggesusers#include "vctrs.h" #include "vec-bool.h" enum vctrs_run_bound { VCTRS_RUN_BOUND_start = 0, VCTRS_RUN_BOUND_end = 1 }; #include "decl/runs-decl.h" // ----------------------------------------------------------------------------- r_obj* ffi_vec_detect_run_bounds(r_obj* x, r_obj* ffi_start, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; const enum vctrs_run_bound which = as_run_bound(ffi_start, error_call); return vec_detect_run_bounds(x, which, error_call); } static r_obj* vec_detect_run_bounds(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call) { struct r_vector_bool* p_where = vec_detect_run_bounds_bool(x, which, error_call); KEEP(p_where->shelter); const bool* v_where = r_vector_bool_cbegin(p_where); const r_ssize size = r_vector_bool_length(p_where); r_obj* out = KEEP(r_alloc_logical(size)); int* v_out = r_lgl_begin(out); for (r_ssize i = 0; i < size; ++i) { v_out[i] = v_where[i]; } FREE(2); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_vec_locate_run_bounds(r_obj* x, r_obj* ffi_start, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; const enum vctrs_run_bound which = as_run_bound(ffi_start, error_call); return vec_locate_run_bounds(x, which, error_call); } static r_obj* vec_locate_run_bounds(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call) { struct r_vector_bool* p_where = vec_detect_run_bounds_bool(x, which, error_call); KEEP(p_where->shelter); const bool* v_where = r_vector_bool_cbegin(p_where); const r_ssize size = r_vector_bool_length(p_where); r_ssize n = 0; for (r_ssize i = 0; i < size; ++i) { n += v_where[i]; } r_obj* out = KEEP(r_alloc_integer(n)); int* v_out = r_int_begin(out); r_ssize j = compute_iter_loc(n, which); r_ssize loc = compute_iter_loc(size, which); const r_ssize step = compute_iter_step(which); // First/last value are always the final bound locations // (depending on `which`), so `j` won't ever write to OOB locations for (r_ssize i = 0; i < size; ++i) { v_out[j] = loc + 1; j += step * v_where[loc]; loc += step; } FREE(2); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_vec_identify_runs(r_obj* x, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; return vec_identify_runs(x, error_call); } r_obj* vec_identify_runs(r_obj* x, struct r_lazy error_call) { struct r_vector_bool* p_starts = vec_detect_run_bounds_bool(x, VCTRS_RUN_BOUND_start, error_call); KEEP(p_starts->shelter); const bool* v_starts = r_vector_bool_cbegin(p_starts); const r_ssize size = r_vector_bool_length(p_starts); r_obj* out = KEEP(r_alloc_integer(size)); int* v_out = r_int_begin(out); int n = 0; for (r_ssize i = 0; i < size; ++i) { n += v_starts[i]; v_out[i] = n; } r_obj* ffi_n = r_int(n); r_attrib_poke(out, syms_n, ffi_n); FREE(2); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_vec_run_sizes(r_obj* x, r_obj* frame) { struct r_lazy error_call = { .x = frame, .env = r_null }; return vec_run_sizes(x, error_call); } r_obj* vec_run_sizes(r_obj* x, struct r_lazy error_call) { struct r_vector_bool* p_ends = vec_detect_run_bounds_bool(x, VCTRS_RUN_BOUND_end, error_call); KEEP(p_ends->shelter); const bool* v_ends = r_vector_bool_cbegin(p_ends); const r_ssize size = r_vector_bool_length(p_ends); r_ssize n = 0; for (r_ssize i = 0; i < size; ++i) { n += v_ends[i]; } r_obj* out = KEEP(r_alloc_integer(n)); int* v_out = r_int_begin(out); r_ssize j = 0; int count = 1; for (r_ssize i = 0; i < size; ++i) { const bool end = v_ends[i]; v_out[j] = count; j += end; count = !end * count + 1; } FREE(2); return out; } // ----------------------------------------------------------------------------- /* * Like `vec_detect_run_bounds()`, but returns a less memory intensive * boolean array as an `r_vector_bool`. */ static struct r_vector_bool* vec_detect_run_bounds_bool(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call) { obj_check_vector(x, VCTRS_ALLOW_NULL_no, vec_args.x, error_call); r_obj* proxy = KEEP(vec_proxy_equal(x)); proxy = KEEP(obj_encode_utf8(proxy)); const r_ssize size = vec_size(proxy); struct r_vector_bool* p_out = r_new_vector_bool(size); KEEP(p_out->shelter); bool* v_out = r_vector_bool_begin(p_out); const enum vctrs_type type = vec_proxy_typeof(proxy); switch (type) { case VCTRS_TYPE_logical: lgl_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_integer: int_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_double: dbl_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_complex: cpl_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_character: chr_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_raw: raw_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_list: list_detect_run_bounds_bool(proxy, size, which, v_out); break; case VCTRS_TYPE_dataframe: df_detect_run_bounds_bool(proxy, size, which, v_out); break; default: stop_unimplemented_vctrs_type("vec_detect_run_bounds_bool", type); } FREE(3); return p_out; } // ----------------------------------------------------------------------------- // Algorithm for "ends" is same as "starts", we just iterate in reverse #define VEC_DETECT_RUN_BOUNDS_BOOL(CTYPE, CBEGIN, EQUAL_NA_EQUAL) { \ if (size == 0) { \ /* Algorithm requires at least 1 value */ \ return; \ } \ \ CTYPE const* v_x = CBEGIN(x); \ \ r_ssize loc = compute_iter_loc(size, which); \ const r_ssize step = compute_iter_step(which); \ \ /* Handle first/last value */ \ CTYPE ref = v_x[loc]; \ v_out[loc] = true; \ loc += step; \ \ for (r_ssize i = 1; i < size; ++i) { \ CTYPE const elt = v_x[loc]; \ v_out[loc] = !EQUAL_NA_EQUAL(elt, ref); \ ref = elt; \ loc += step; \ } \ } static inline void lgl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(int, r_lgl_cbegin, lgl_equal_na_equal); } static inline void int_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(int, r_int_cbegin, int_equal_na_equal); } static inline void dbl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(double, r_dbl_cbegin, dbl_equal_na_equal); } static inline void cpl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(Rcomplex, r_cpl_cbegin, cpl_equal_na_equal); } static inline void chr_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(r_obj*, r_chr_cbegin, chr_equal_na_equal); } static inline void raw_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(Rbyte, r_raw_cbegin, raw_equal_na_equal); } static inline void list_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_DETECT_RUN_BOUNDS_BOOL(r_obj*, r_list_cbegin, list_equal_na_equal); } #undef VEC_DETECT_RUN_BOUNDS_BOOL // ----------------------------------------------------------------------------- static inline void df_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { if (size == 0) { // Algorithm requires at least 1 value return; } const r_ssize n_col = r_length(x); r_obj* const* v_x = r_list_cbegin(x); r_ssize loc = compute_iter_loc(size, which); const r_ssize step = compute_iter_step(which); // `v_out` will eventually be `true` if we are in a run // continuation, and `false` if we are starting a new run. v_out[loc] = false; loc += step; for (r_ssize i = 1; i < size; ++i) { v_out[loc] = true; loc += step; } for (r_ssize i = 0; i < n_col; ++i) { col_detect_run_bounds_bool(v_x[i], size, which, v_out); } // Now invert to detect the bounds for (r_ssize i = 0; i < size; ++i) { v_out[i] = !v_out[i]; } } static inline void col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: lgl_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_integer: int_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_double: dbl_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_complex: cpl_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_character: chr_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_raw: raw_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_list: list_col_detect_run_bounds_bool(x, size, which, v_out); break; case VCTRS_TYPE_dataframe: r_stop_internal("Data frame columns should be flattened."); case VCTRS_TYPE_scalar: r_abort("Can't compare scalars."); default: r_abort("Unimplemented type."); } } #define VEC_COL_DETECT_RUN_BOUNDS_BOOL(CTYPE, CBEGIN, EQUAL_NA_EQUAL) { \ CTYPE const* v_x = CBEGIN(x); \ \ r_ssize loc = compute_iter_loc(size, which); \ const r_ssize step = compute_iter_step(which); \ \ CTYPE ref = v_x[loc]; \ loc += step; \ \ for (r_ssize i = 1; i < size; ++i) { \ CTYPE const elt = v_x[loc]; \ v_out[loc] = v_out[loc] && EQUAL_NA_EQUAL(ref, elt); \ ref = elt; \ loc += step; \ } \ } static inline void lgl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(int, r_lgl_cbegin, lgl_equal_na_equal); } static inline void int_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(int, r_int_cbegin, int_equal_na_equal); } static inline void dbl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(double, r_dbl_cbegin, dbl_equal_na_equal); } static inline void cpl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(Rcomplex, r_cpl_cbegin, cpl_equal_na_equal); } static inline void chr_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(r_obj*, r_chr_cbegin, chr_equal_na_equal); } static inline void raw_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(Rbyte, r_raw_cbegin, raw_equal_na_equal); } static inline void list_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out) { VEC_COL_DETECT_RUN_BOUNDS_BOOL(r_obj*, r_list_cbegin, list_equal_na_equal); } #undef VEC_COL_DETECT_RUN_BOUNDS_BOOL // ----------------------------------------------------------------------------- static inline r_ssize compute_iter_loc(r_ssize size, enum vctrs_run_bound which) { switch (which) { case VCTRS_RUN_BOUND_start: return 0; case VCTRS_RUN_BOUND_end: return size - 1; default: r_stop_internal("Unknown `which` value."); } } static inline r_ssize compute_iter_step(enum vctrs_run_bound which) { switch (which) { case VCTRS_RUN_BOUND_start: return 1; case VCTRS_RUN_BOUND_end: return -1; default: r_stop_internal("Unknown `which` value."); } } static inline enum vctrs_run_bound as_run_bound(r_obj* which, struct r_lazy error_call) { struct r_lazy error_arg = { .x = chrs_which, .env = r_null }; r_obj* values = KEEP(r_alloc_character(2)); r_chr_poke(values, 0, r_str("start")); r_chr_poke(values, 1, r_str("end")); const int match = r_arg_match(which, values, error_arg, error_call); enum vctrs_run_bound out; switch (match) { case 0: out = VCTRS_RUN_BOUND_start; break; case 1: out = VCTRS_RUN_BOUND_end; break; default: r_stop_internal("Unknown `which` value."); } FREE(1); return out; } vctrs/src/altrep.c0000644000176200001440000000015015156001116013625 0ustar liggesusers#include // [[ register() ]] r_obj* vctrs_is_altrep(r_obj* x) { return r_lgl(ALTREP(x)); } vctrs/src/parallel.h0000644000176200001440000000075015113325071014147 0ustar liggesusers#ifndef VCTRS_PARALLEL_H #define VCTRS_PARALLEL_H #include "vctrs-core.h" enum vec_parallel_missing { VEC_PARALLEL_MISSING_na, VEC_PARALLEL_MISSING_false, VEC_PARALLEL_MISSING_true }; r_obj* vec_pany( r_obj* xs, enum vec_parallel_missing missing, r_ssize size, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ); r_obj* vec_pall( r_obj* xs, enum vec_parallel_missing missing, r_ssize size, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ); #endif vctrs/src/size-common.h0000644000176200001440000000046715113325071014620 0ustar liggesusers#ifndef VCTRS_SIZE_COMMON_H #define VCTRS_SIZE_COMMON_H #include "vctrs-core.h" r_ssize vec_size_common( r_obj* xs, r_ssize absent, struct vctrs_arg* p_xs_arg, struct r_lazy call ); r_obj* vec_recycle_common( r_obj* xs, r_ssize size, struct vctrs_arg* p_xs_arg, struct r_lazy call ); #endif vctrs/src/size.h0000644000176200001440000000173215113325071013326 0ustar liggesusers#ifndef VCTRS_SIZE_H #define VCTRS_SIZE_H #include "vctrs-core.h" #include "globals.h" r_ssize vec_size(r_obj* x); r_ssize vec_size_params( r_obj* x, struct vctrs_arg* p_x_arg, struct r_lazy call ); r_obj* vec_recycle( r_obj* x, r_ssize size, struct vctrs_arg* p_x_arg, struct r_lazy call ); r_obj* vec_recycle_fallback(r_obj* x, r_ssize size, struct vctrs_arg* x_arg, struct r_lazy call); r_obj* list_sizes( r_obj* xs, struct vctrs_arg* p_xs_arg, struct r_lazy call ); r_ssize df_size(r_obj* x); r_ssize df_raw_size(r_obj* x); r_ssize df_rownames_size(r_obj* x); r_ssize df_raw_size_from_list(r_obj* x); r_ssize vec_as_short_length(r_obj* size, struct vctrs_arg* p_arg, struct r_lazy call); r_ssize vec_as_ssize(r_obj* n, struct vctrs_arg* arg, struct r_lazy call); #endif vctrs/src/conditions.h0000644000176200001440000000302415072256373014535 0ustar liggesusers#ifndef VCTRS_CONDITIONS_H #define VCTRS_CONDITIONS_H #include "vctrs-core.h" r_no_return void stop_scalar_type(SEXP x, struct vctrs_arg* arg, struct r_lazy call); r_no_return void stop_assert_size(r_ssize actual, r_ssize required, struct vctrs_arg* arg, struct r_lazy call); r_no_return void stop_incompatible_type(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, bool cast); r_no_return void stop_incompatible_size(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call); r_no_return void stop_recycle_incompatible_size(r_ssize x_size, r_ssize size, struct vctrs_arg* x_arg, struct r_lazy call); r_no_return void stop_incompatible_shape(SEXP x, SEXP y, R_len_t x_size, R_len_t y_size, int axis, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg); void stop_corrupt_factor_levels(SEXP x, struct vctrs_arg* arg) r_no_return; void stop_corrupt_ordered_levels(SEXP x, struct vctrs_arg* arg) r_no_return; #endif vctrs/src/slice-chop.h0000644000176200001440000000045015061070120014370 0ustar liggesusers#ifndef VCTRS_SLICE_CHOP_H #define VCTRS_SLICE_CHOP_H #include "vctrs-core.h" r_obj* vec_chop(r_obj* x, r_obj* indices, r_obj* sizes); r_obj* vec_chop_unsafe(r_obj*, r_obj* indices, r_obj* sizes); r_obj* list_as_locations(r_obj* indices, r_ssize n, r_obj* names, bool allow_compact); #endif vctrs/src/slice.c0000644000176200001440000005044315156537555013474 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/slice-decl.h" #define SLICE_SUBSCRIPT(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ const CTYPE* data = CONST_DEREF(x); \ r_ssize n = r_length(subscript); \ int* subscript_data = r_int_begin(subscript); \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ CTYPE* out_data = DEREF(out); \ \ for (r_ssize i = 0; i < n; ++i, ++subscript_data, ++out_data) { \ int j = *subscript_data; \ *out_data = (j == r_globals.na_int) ? NA_VALUE : data[j - 1]; \ } \ \ FREE(1); \ return out #define SLICE_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ const CTYPE* data = CONST_DEREF(x); \ \ int* subscript_data = r_int_begin(subscript); \ r_ssize j = subscript_data[0]; \ r_ssize n = subscript_data[1]; \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ CTYPE* out_data = DEREF(out); \ \ CTYPE elt = (j == r_globals.na_int) ? NA_VALUE : data[j - 1]; \ \ for (r_ssize i = 0; i < n; ++i, ++out_data) { \ *out_data = elt; \ } \ \ FREE(1); \ return out #define SLICE_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF) \ int* subscript_data = r_int_begin(subscript); \ r_ssize start = subscript_data[0]; \ r_ssize n = subscript_data[1]; \ r_ssize step = subscript_data[2]; \ \ const CTYPE* data = CONST_DEREF(x) + start; \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ CTYPE* out_data = DEREF(out); \ \ for (int i = 0; i < n; ++i, ++out_data, data += step) { \ *out_data = *data; \ } \ \ FREE(1); \ return out #define SLICE(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ if (!materialize && ALTREP(x)) { \ return vec_slice_altrep(x, subscript); \ } else if (is_compact_rep(subscript)) { \ SLICE_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } else if (is_compact_seq(subscript)) { \ SLICE_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF); \ } else { \ SLICE_SUBSCRIPT(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } static r_obj* lgl_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE(R_TYPE_logical, int, r_lgl_begin, r_lgl_cbegin, r_globals.na_lgl); } static r_obj* int_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE(R_TYPE_integer, int, r_int_begin, r_int_cbegin, r_globals.na_int); } static r_obj* dbl_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE(R_TYPE_double, double, r_dbl_begin, r_dbl_cbegin, r_globals.na_dbl); } static r_obj* cpl_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE(R_TYPE_complex, r_complex, r_cpl_begin, r_cpl_cbegin, r_globals.na_cpl); } static r_obj* raw_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE(R_TYPE_raw, char, (char*) r_raw_begin, (char*) r_raw_cbegin, 0); } #define SLICE_BARRIER_SUBSCRIPT(RTYPE, CONST_DEREF, SET, NA_VALUE) \ r_obj* const * data = CONST_DEREF(x); \ \ r_ssize n = r_length(subscript); \ int* subscript_data = r_int_begin(subscript); \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ \ for (r_ssize i = 0; i < n; ++i, ++subscript_data) { \ int j = *subscript_data; \ r_obj* elt = (j == r_globals.na_int) ? NA_VALUE : data[j - 1]; \ SET(out, i, elt); \ } \ \ FREE(1); \ return out #define SLICE_BARRIER_COMPACT_REP(RTYPE, CONST_DEREF, SET, NA_VALUE) \ r_obj* const * data = CONST_DEREF(x); \ \ int* subscript_data = r_int_begin(subscript); \ r_ssize j = subscript_data[0]; \ r_ssize n = subscript_data[1]; \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ \ r_obj* elt = (j == r_globals.na_int) ? NA_VALUE : data[j - 1]; \ \ for (r_ssize i = 0; i < n; ++i) { \ SET(out, i, elt); \ } \ \ FREE(1); \ return out #define SLICE_BARRIER_COMPACT_SEQ(RTYPE, CONST_DEREF, SET) \ r_obj* const * data = CONST_DEREF(x); \ \ int* subscript_data = r_int_begin(subscript); \ r_ssize start = subscript_data[0]; \ r_ssize n = subscript_data[1]; \ r_ssize step = subscript_data[2]; \ \ r_obj* out = KEEP(r_alloc_vector(RTYPE, n)); \ \ for (r_ssize i = 0; i < n; ++i, start += step) { \ SET(out, i, data[start]); \ } \ \ FREE(1); \ return out #define SLICE_BARRIER(RTYPE, CONST_DEREF, SET, NA_VALUE) \ if (!materialize && ALTREP(x)) { \ return vec_slice_altrep(x, subscript); \ } else if (is_compact_rep(subscript)) { \ SLICE_BARRIER_COMPACT_REP(RTYPE, CONST_DEREF, SET, NA_VALUE); \ } else if (is_compact_seq(subscript)) { \ SLICE_BARRIER_COMPACT_SEQ(RTYPE, CONST_DEREF, SET); \ } else { \ SLICE_BARRIER_SUBSCRIPT(RTYPE, CONST_DEREF, SET, NA_VALUE); \ } static r_obj* chr_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE_BARRIER(R_TYPE_character, r_chr_cbegin, r_chr_poke, r_globals.na_str); } static r_obj* chr_names_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE_BARRIER(R_TYPE_character, r_chr_cbegin, r_chr_poke, r_strs.empty); } static r_obj* list_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE_BARRIER(R_TYPE_list, r_list_cbegin, r_list_poke, r_null); } static r_obj* df_slice(r_obj* x, r_obj* subscript) { r_ssize n = r_length(x); r_ssize size = df_size(x); r_obj* out = KEEP(r_alloc_list(n)); // FIXME: Should that be restored? r_obj* nms = r_names(x); r_attrib_poke(out, r_syms.names, nms); for (r_ssize i = 0; i < n; ++i) { r_obj* elt = r_list_get(x, i); if (vec_size(elt) != size) { r_stop_internal("Column `%s` (size %" R_PRI_SSIZE ") must match the data frame (size %" R_PRI_SSIZE ").", r_chr_get_c_string(nms, i), vec_size(elt), size); } r_obj* sliced = vec_slice_unsafe(elt, subscript); r_list_poke(out, i, sliced); } init_data_frame(out, vec_subscript_size(subscript)); r_obj* row_nms = KEEP(df_rownames(x)); if (r_typeof(row_nms) == R_TYPE_character) { row_nms = slice_rownames(row_nms, subscript); r_attrib_poke(out, r_syms.row_names, row_nms); } FREE(2); return out; } r_obj* vec_slice_fallback(r_obj* x, r_obj* subscript) { // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 // objects to ensure correct slicing with `NA_integer_`. if (is_integer64(x)) { return vctrs_dispatch2(syms.vec_slice_fallback_integer64, fns.vec_slice_fallback_integer64, syms_x, x, syms_i, subscript); } return vctrs_dispatch2(syms.vec_slice_fallback, fns.vec_slice_fallback, syms_x, x, syms_i, subscript); } static r_obj* vec_slice_dispatch(r_obj* x, r_obj* subscript) { // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 // objects to ensure correct slicing with `NA_integer_`. if (is_integer64(x)) { return vctrs_dispatch2(syms.vec_slice_dispatch_integer64, fns.vec_slice_dispatch_integer64, syms_x, x, syms_i, subscript); } return vctrs_dispatch2(syms_bracket, fns_bracket, syms_x, x, syms_i, subscript); } r_obj* vec_slice_altrep(r_obj* x, r_obj* subscript) { subscript = KEEP(vec_subscript_materialize(subscript)); r_obj* out = vctrs_dispatch2( syms.vec_slice_altrep, fns.vec_slice_altrep, syms_x, x, syms_i, subscript ); FREE(1); return out; } bool vec_requires_fallback(r_obj* x, struct vctrs_proxy_info info) { return r_is_object(x) && !info.had_proxy_method && info.type != VCTRS_TYPE_dataframe; } r_obj* vec_slice_base(enum vctrs_type type, r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { switch (type) { case VCTRS_TYPE_logical: return lgl_slice(x, subscript, materialize); case VCTRS_TYPE_integer: return int_slice(x, subscript, materialize); case VCTRS_TYPE_double: return dbl_slice(x, subscript, materialize); case VCTRS_TYPE_complex: return cpl_slice(x, subscript, materialize); case VCTRS_TYPE_character: return chr_slice(x, subscript, materialize); case VCTRS_TYPE_raw: return raw_slice(x, subscript, materialize); case VCTRS_TYPE_list: return list_slice(x, subscript, materialize); default: stop_unimplemented_vctrs_type("vec_slice_base", type); } } r_obj* slice_names(r_obj* names, r_obj* subscript) { if (names == r_null) { return names; } else { // Ensures `NA_integer_` subscripts utilize `""` as the name return chr_names_slice(names, subscript, VCTRS_MATERIALIZE_false); } } r_obj* slice_rownames(r_obj* names, r_obj* subscript) { if (names == r_null) { return names; } names = KEEP(chr_slice(names, subscript, VCTRS_MATERIALIZE_false)); // Rownames can't contain `NA` or duplicates names = vec_as_unique_names(names, true); FREE(1); return names; } r_obj* vec_slice_unsafe(r_obj* x, r_obj* subscript) { int nprot = 0; struct vctrs_proxy_info info = vec_proxy_info(x); KEEP_N(info.inner, &nprot); // Fallback to `[` if the class doesn't implement a proxy. This is // to be maximally compatible with existing classes. if (vec_requires_fallback(x, info)) { if (info.type == VCTRS_TYPE_scalar) { obj_check_vector(x, VCTRS_ALLOW_NULL_no, NULL, r_lazy_null); } subscript = KEEP_N(vec_subscript_materialize(subscript), &nprot); r_obj* out; if (has_dim(x)) { out = KEEP_N(vec_slice_fallback(x, subscript), &nprot); } else { out = KEEP_N(vec_slice_dispatch(x, subscript), &nprot); } // Take over attribute restoration only if there is no `[` method if (!vec_is_restored(out, x)) { // Sliced `out` comes from R, so is foreign. Technically not proxied at all, // so "restoring" is a bit of a hack, but we only restore if it looks like the // `[` result is missing attributes. struct vec_restore_opts restore_opts = { .ownership = VCTRS_OWNERSHIP_foreign, .recursively_proxied = false }; out = vec_restore_opts(out, x, &restore_opts); } FREE(nprot); return out; } switch (info.type) { case VCTRS_TYPE_null: r_stop_internal("Unexpected `NULL`."); case VCTRS_TYPE_logical: case VCTRS_TYPE_integer: case VCTRS_TYPE_double: case VCTRS_TYPE_complex: case VCTRS_TYPE_character: case VCTRS_TYPE_raw: case VCTRS_TYPE_list: { r_obj* out; if (has_dim(x)) { out = KEEP_N(vec_slice_shaped(info.type, info.inner, subscript), &nprot); r_obj* names = KEEP_N(r_attrib_get(x, r_syms.dim_names), &nprot); if (names != r_null) { names = KEEP_N(r_clone(names), &nprot); r_obj* row_names = r_list_get(names, 0); row_names = KEEP_N(slice_names(row_names, subscript), &nprot); r_list_poke(names, 0, row_names); r_attrib_poke(out, r_syms.dim_names, names); } } else { out = KEEP_N(vec_slice_base(info.type, info.inner, subscript, VCTRS_MATERIALIZE_false), &nprot); r_obj* names = KEEP_N(r_names(x), &nprot); names = KEEP_N(slice_names(names, subscript), &nprot); r_attrib_poke_names(out, names); } // Sliced `out` is a fresh object from `vec_slice_base()` or // `vec_slice_shaped()` that we control (and we even modify names directly // above). For atomics, shallow and deep ownership are the same. We mark as // shallow just for consistency with the data frame path. struct vec_restore_opts restore_opts = { .ownership = VCTRS_OWNERSHIP_shallow, .recursively_proxied = false }; out = vec_restore_opts(out, x, &restore_opts); FREE(nprot); return out; } case VCTRS_TYPE_dataframe: { r_obj* out = KEEP_N(df_slice(info.inner, subscript), &nprot); // Sliced `out` is a fresh list container from `df_slice()`, but we don't // necessarily own the sliced columns (an individual column could have gone // through the fallback path) so we set shallow ownership. This is fine, we // don't restore recursively here, so only the list container will need to // be modified during restoration. struct vec_restore_opts restore_opts = { .ownership = VCTRS_OWNERSHIP_shallow, .recursively_proxied = false }; out = vec_restore_opts(out, x, &restore_opts); FREE(nprot); return out; } default: stop_unimplemented_vctrs_type("vec_slice_impl", info.type); } } static r_obj* vec_is_restored_cb(r_obj* tag, r_obj* _value, void* _data) { if (tag == r_syms.names) { // Keep iterating return NULL; } else { // Restored! return R_NilValue; } } bool vec_is_restored(r_obj* x, r_obj* to) { // Don't restore if there is an actual `[` method that ignored // attributes. Some methods like [.ts intentionally strip the class // and attributes. FIXME: This branch is now probably sufficient. if (s3_find_method("[", to, base_method_table) != r_null) { return true; } if (!r_attrib_has_any(x)) { return false; } // Class is restored if it contains any other attributes than names. // We might want to add support for data frames later on. return r_attrib_map(x, vec_is_restored_cb, NULL) != NULL; } r_obj* ffi_slice(r_obj* x, r_obj* i, r_obj* frame) { struct vec_slice_opts opts = { .x_arg = vec_args.x, .i_arg = vec_args.i, .call = {.x = r_syms.error_call, .env = frame} }; return vec_slice_opts(x, i, &opts); } r_obj* vec_slice_opts(r_obj* x, r_obj* i, const struct vec_slice_opts* opts) { obj_check_vector(x, VCTRS_ALLOW_NULL_no, opts->x_arg, opts->call); r_obj* names = KEEP(vec_names(x)); i = KEEP(vec_as_location_ctxt(i, vec_size(x), names, opts->i_arg, opts->call)); r_obj* out = vec_slice_unsafe(x, i); FREE(2); return out; } // Reverse a vector r_obj* vec_reverse(r_obj* x) { const r_ssize size = vec_size(x); const r_ssize start = (size == 0) ? 0 : size - 1; const bool increasing = false; r_obj* index = KEEP(compact_seq(start, size, increasing)); r_obj* out = vec_slice_unsafe(x, index); FREE(1); return out; } r_obj* vec_init(r_obj* x, r_ssize n) { obj_check_vector(x, VCTRS_ALLOW_NULL_no, vec_args.x, lazy_calls.vec_init); if (n < 0) { r_abort_lazy_call(lazy_calls.vec_init, "%s must be a positive integer.", r_c_str_format_error_arg("n")); } r_obj* i = KEEP(compact_rep(r_globals.na_int, n)); r_obj* out = vec_slice_unsafe(x, i); FREE(1); return out; } // [[ register() ]] r_obj* ffi_init(r_obj* x, r_obj* ffi_n, r_obj* ffi_frame) { struct r_lazy call = { .x = ffi_frame, .env = r_null }; r_ssize n = vec_as_short_length(ffi_n, vec_args.n, call); r_obj* out = vec_init(x, n); return out; } // Exported for testing // [[ register() ]] r_obj* ffi_slice_seq(r_obj* x, r_obj* ffi_start, r_obj* ffi_size, r_obj* ffi_increasing) { r_ssize start = r_int_get(ffi_start, 0); r_ssize size = r_int_get(ffi_size, 0); bool increasing = r_lgl_get(ffi_increasing, 0); r_obj* subscript = KEEP(compact_seq(start, size, increasing)); r_obj* out = vec_slice_unsafe(x, subscript); FREE(1); return out; } // Exported for testing // [[ register() ]] r_obj* ffi_slice_rep(r_obj* x, r_obj* ffi_i, r_obj* ffi_n) { r_ssize i = r_int_get(ffi_i, 0); r_ssize n = r_int_get(ffi_n, 0); r_obj* subscript = KEEP(compact_rep(i, n)); r_obj* out = vec_slice_unsafe(x, subscript); FREE(1); return out; } void vctrs_init_slice(r_obj* ns) { syms.vec_slice_altrep = r_sym("vec_slice_altrep"); syms.vec_slice_dispatch_integer64 = r_sym("vec_slice_dispatch_integer64"); syms.vec_slice_fallback = r_sym("vec_slice_fallback"); syms.vec_slice_fallback_integer64 = r_sym("vec_slice_fallback_integer64"); fns.vec_slice_altrep = r_eval(syms.vec_slice_altrep, ns); fns.vec_slice_dispatch_integer64 = r_eval(syms.vec_slice_dispatch_integer64, ns); fns.vec_slice_fallback = r_eval(syms.vec_slice_fallback, ns); fns.vec_slice_fallback_integer64 = r_eval(syms.vec_slice_fallback_integer64, ns); } vctrs/src/type-complex.h0000644000176200001440000000273215156001116015001 0ustar liggesusers#ifndef VCTRS_TYPE_COMPLEX_H #define VCTRS_TYPE_COMPLEX_H #include "vctrs-core.h" #include "utils.h" /* * Normalises a complex value so that if one side is missing, both are. This * ensures that all missing complex values are grouped together, no matter * what type of missingness it is. NA and NaN can still be separated by * `nan_distinct`, resulting in 4 different combinations of missingness. These * 4 groups of missingness will still all be grouped together, either before * or after any non-missing values have appeared. * See issue #1403 for more information. */ static inline r_complex cpl_normalise_missing(r_complex x) { const double na = r_globals.na_dbl; const double nan = R_NaN; const enum vctrs_dbl r_type = dbl_classify(x.r); const enum vctrs_dbl i_type = dbl_classify(x.i); switch (r_type) { case VCTRS_DBL_number: switch (i_type) { case VCTRS_DBL_number: return x; case VCTRS_DBL_missing: return (r_complex) { .r = na, .i = na}; case VCTRS_DBL_nan: return (r_complex) { .r = nan, .i = nan}; } case VCTRS_DBL_missing: switch (i_type) { case VCTRS_DBL_number: return (r_complex) { .r = na, .i = na}; case VCTRS_DBL_missing: return x; case VCTRS_DBL_nan: return x; } case VCTRS_DBL_nan: switch (i_type) { case VCTRS_DBL_number: return (r_complex) { .r = nan, .i = nan}; case VCTRS_DBL_missing: return x; case VCTRS_DBL_nan: return x; } } never_reached("cpl_normalise_missing"); } #endif vctrs/src/match-joint.c0000644000176200001440000003145515156537555014614 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/match-joint-decl.h" #define VEC_JOINT_XTFRM_LOOP(CMP) do { \ while (i < x_n_groups && j < y_n_groups) { \ const int x_group_size = v_x_group_sizes[i]; \ const int y_group_size = v_y_group_sizes[j]; \ \ const int x_loc = v_x_o[x_o_loc] - 1; \ const int y_loc = v_y_o[y_o_loc] - 1; \ \ const int cmp = CMP( \ p_x_vec, x_loc, \ p_y_vec, y_loc, \ nan_distinct \ ); \ \ if (cmp == -1) { \ for (int k = 0; k < x_group_size; ++k) { \ v_x_ranks[v_x_o[x_o_loc] - 1] = rank; \ ++x_o_loc; \ } \ ++i; \ } else if (cmp == 1) { \ for (int k = 0; k < y_group_size; ++k) { \ v_y_ranks[v_y_o[y_o_loc] - 1] = rank; \ ++y_o_loc; \ } \ ++j; \ } else { \ for (int k = 0; k < x_group_size; ++k) { \ v_x_ranks[v_x_o[x_o_loc] - 1] = rank; \ ++x_o_loc; \ } \ for (int k = 0; k < y_group_size; ++k) { \ v_y_ranks[v_y_o[y_o_loc] - 1] = rank; \ ++y_o_loc; \ } \ ++i; \ ++j; \ } \ \ ++rank; \ } \ } while(0) /* * `vec_joint_xtfrm()` takes two vectors of the same type and computes an * xtfrm-like integer proxy for each that takes into account the values between * the two columns. It is approximately equal to the idea of: * `vec_rank(vec_c(x, y), ties = "dense")` * followed by splitting the ranks back up into two vectors matching the sizes * of x and y. The reason we don't do that is because it limits the maximum size * that `vec_locate_matches()` can work on to * `vec_size(x) + vec_size(y) <= INT_MAX`, * since you have to combine the vectors together. * * The sole purpose of this function is to support `vec_locate_matches()`. * * # For example: * x <- c(2, 1.5, 1) * y <- c(3, 1.2, 2) * # vec_joint_xtfrm(x, y) theoretically results in: * x <- c(4L, 3L, 1L) * y <- c(5L, 2L, 4L) * # While the above result is the general idea, we actually start counting * # from `INT_MIN + 1` to maximally utilize the `int` space while still * # avoiding `INT_MIN == NA_INTEGER`. So the result is really: * x <- c(-2147483644L, -2147483645L, -2147483647L) * y <- c(-2147483643L, -2147483646L, -2147483644L) */ // [[ include("match-joint.h") ]] r_obj* vec_joint_xtfrm(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, bool nan_distinct, r_obj* chr_proxy_collate) { int n_prot = 0; r_obj* out = KEEP_N(r_alloc_list(2), &n_prot); // These aren't true ranks, but that name makes the most sense r_obj* x_ranks = r_alloc_integer(x_size); r_list_poke(out, 0, x_ranks); int* v_x_ranks = r_int_begin(x_ranks); r_obj* y_ranks = r_alloc_integer(y_size); r_list_poke(out, 1, y_ranks); int* v_y_ranks = r_int_begin(y_ranks); // Retain the results of applying the proxy, normalizing the encoding, and // doing the collation transform, since we will make comparisons directly // on these objects. // This also uses a special variant of `vec_proxy_order()` to support list // columns, which have proxies that can't be computed independently. r_obj* proxies = KEEP_N(vec_joint_proxy_order(x, y), &n_prot); r_obj* x_proxy = r_list_get(proxies, 0); x_proxy = KEEP_N(obj_encode_utf8(x_proxy), &n_prot); x_proxy = KEEP_N(proxy_apply_chr_proxy_collate(x_proxy, chr_proxy_collate), &n_prot); r_obj* y_proxy = r_list_get(proxies, 1); y_proxy = KEEP_N(obj_encode_utf8(y_proxy), &n_prot); y_proxy = KEEP_N(proxy_apply_chr_proxy_collate(y_proxy, chr_proxy_collate), &n_prot); // Called with `direction = "asc", na_value = "smallest"` to match the // comparison helpers in `match-compare.h` r_obj* x_info = KEEP_N(vec_order_info( x_proxy, chrs_asc, chrs_smallest, nan_distinct, r_null ), &n_prot); r_obj* y_info = KEEP_N(vec_order_info( y_proxy, chrs_asc, chrs_smallest, nan_distinct, r_null ), &n_prot); const int* v_x_o = r_int_cbegin(r_list_get(x_info, 0)); const int* v_x_group_sizes = r_int_cbegin(r_list_get(x_info, 1)); r_ssize x_n_groups = r_length(r_list_get(x_info, 1)); const int* v_y_o = r_int_cbegin(r_list_get(y_info, 0)); const int* v_y_group_sizes = r_int_cbegin(r_list_get(y_info, 1)); r_ssize y_n_groups = r_length(r_list_get(y_info, 1)); const enum vctrs_type type = vec_proxy_typeof(x_proxy); const struct poly_vec* p_x_poly = new_poly_vec(x_proxy, type); KEEP_N(p_x_poly->shelter, &n_prot); const void* p_x_vec = p_x_poly->p_vec; const struct poly_vec* p_y_poly = new_poly_vec(y_proxy, type); KEEP_N(p_y_poly->shelter, &n_prot); const void* p_y_vec = p_y_poly->p_vec; r_ssize i = 0; r_ssize j = 0; r_ssize x_o_loc = 0; r_ssize y_o_loc = 0; // Start rank as small as possible (while still different from NA), // to maximally utilize `int` storage int rank = INT_MIN + 1; // Now that we have the ordering of both vectors, // it is just a matter of merging two sorted arrays switch (type) { case VCTRS_TYPE_logical: VEC_JOINT_XTFRM_LOOP(p_lgl_order_compare_na_equal); break; case VCTRS_TYPE_integer: VEC_JOINT_XTFRM_LOOP(p_int_order_compare_na_equal); break; case VCTRS_TYPE_double: VEC_JOINT_XTFRM_LOOP(p_dbl_order_compare_na_equal); break; case VCTRS_TYPE_complex: VEC_JOINT_XTFRM_LOOP(p_cpl_order_compare_na_equal); break; case VCTRS_TYPE_character: VEC_JOINT_XTFRM_LOOP(p_chr_order_compare_na_equal); break; case VCTRS_TYPE_dataframe: VEC_JOINT_XTFRM_LOOP(p_df_order_compare_na_equal); break; default: stop_unimplemented_vctrs_type("vec_joint_xtfrm", type); } while (i < x_n_groups) { // Finish up remaining x groups const int x_group_size = v_x_group_sizes[i]; for (int k = 0; k < x_group_size; ++k) { v_x_ranks[v_x_o[x_o_loc] - 1] = rank; ++x_o_loc; } ++i; ++rank; } while (j < y_n_groups) { // Finish up remaining y groups const int y_group_size = v_y_group_sizes[j]; for (int k = 0; k < y_group_size; ++k) { v_y_ranks[v_y_o[y_o_loc] - 1] = rank; ++y_o_loc; } ++j; ++rank; } FREE(n_prot); return out; } #undef VEC_JOINT_XTFRM_LOOP // ----------------------------------------------------------------------------- /* * Specialized internal variant of `vec_proxy_order()` used in * `vec_joint_xtfrm()`. * * If we know that the `vec_proxy_order()` method of a type doesn't depend on * the data itself, then we just call `vec_proxy_order()` on `x` and `y` * separately. We know this is true for most base types (except lists) and * for the base R S3 types that we support natively in vctrs, so those get a * fast path. * * Otherwise, it is possible that the `vec_proxy_order()` method is dependent * on the data itself, like it is with lists and the bignum classes, so we need * to compute the order proxy "jointly" by combining `x` and `y` together. * * For example * x <- list(1.5, 2) * y <- list(2, 1.5) * vec_proxy_order(x) * # [1] 1 2 * vec_proxy_order(y) # can't compare proxies when taken individually * # [1] 1 2 * vec_proxy_order(c(x, y)) # jointly comparable * # [1] 1 2 2 1 * * Combining `x` and `y` has the downsides that it: * - Is slower than the independent proxy method * - Limits the maximum data size to `vec_size(x) + vec_size(y) <= INT_MAX` * * Data frames are analyzed one column at a time, so if one of the columns * requires a joint proxy, then we only have to combine those individual columns * together rather than the entire data frames. */ static inline r_obj* vec_joint_proxy_order(r_obj* x, r_obj* y) { if (r_typeof(x) != r_typeof(y)) { r_stop_internal("`x` and `y` should have the same type."); } switch (vec_typeof(x)) { case VCTRS_TYPE_unspecified: case VCTRS_TYPE_logical: case VCTRS_TYPE_integer: case VCTRS_TYPE_double: case VCTRS_TYPE_complex: case VCTRS_TYPE_character: case VCTRS_TYPE_raw: { return vec_joint_proxy_order_independent(x, y); } case VCTRS_TYPE_list: { return vec_joint_proxy_order_dependent(x, y); } case VCTRS_TYPE_dataframe: { return df_joint_proxy_order(x, y); } case VCTRS_TYPE_s3: { return vec_joint_proxy_order_s3(x, y); } case VCTRS_TYPE_null: case VCTRS_TYPE_scalar: { stop_unimplemented_vctrs_type("vec_joint_proxy_order", vec_typeof(x)); } } r_stop_unreachable(); } static inline r_obj* vec_joint_proxy_order_independent(r_obj* x, r_obj* y) { r_obj* out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, vec_proxy_order(x)); r_list_poke(out, 1, vec_proxy_order(y)); FREE(1); return out; } static inline r_obj* vec_joint_proxy_order_dependent(r_obj* x, r_obj* y) { r_ssize x_size = vec_size(x); r_ssize y_size = vec_size(y); r_obj* x_slicer = KEEP(compact_seq(0, x_size, true)); r_obj* y_slicer = KEEP(compact_seq(x_size, y_size, true)); r_obj* ptype = KEEP(vec_ptype(x, vec_args.empty, r_lazy_null)); r_obj* out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, x); r_list_poke(out, 1, y); // Combine // NOTE: Without long vector support, this limits the maximum allowed // size of `vec_locate_matches()` input to // `vec_size(x) + vec_size(y) <= INT_MAX` // when foreign columns are used. r_obj* combined = KEEP(vec_c( out, ptype, r_null, p_no_repair_opts, vec_args.empty, r_lazy_null )); // Compute joint order-proxy r_obj* proxy = KEEP(vec_proxy_order(combined)); // Separate and store back in `out` r_list_poke(out, 0, vec_slice_unsafe(proxy, x_slicer)); r_list_poke(out, 1, vec_slice_unsafe(proxy, y_slicer)); FREE(6); return out; } static inline r_obj* vec_joint_proxy_order_s3(r_obj* x, r_obj* y) { const enum vctrs_class_type type = class_type(x); if (type != class_type(y)) { r_stop_internal("`x` and `y` should have the same class type."); } switch (type) { case VCTRS_CLASS_bare_factor: case VCTRS_CLASS_bare_ordered: case VCTRS_CLASS_bare_date: case VCTRS_CLASS_bare_posixct: case VCTRS_CLASS_bare_posixlt: { return vec_joint_proxy_order_independent(x, y); } case VCTRS_CLASS_bare_asis: case VCTRS_CLASS_list: case VCTRS_CLASS_unknown: { return vec_joint_proxy_order_dependent(x, y); } case VCTRS_CLASS_bare_tibble: case VCTRS_CLASS_data_frame: { return df_joint_proxy_order(x, y); } case VCTRS_CLASS_bare_data_frame: { r_stop_internal("Bare data frames should have been handled earlier."); } case VCTRS_CLASS_none: { r_stop_internal("Unclassed objects should have been handled earlier."); } } r_stop_unreachable(); } static inline r_obj* df_joint_proxy_order(r_obj* x, r_obj* y) { x = KEEP(r_clone_referenced(x)); y = KEEP(r_clone_referenced(y)); const r_ssize n_cols = r_length(x); if (n_cols != r_length(y)) { r_stop_internal("`x` and `y` must have the same number of columns."); } r_obj* const* v_x = r_list_cbegin(x); r_obj* const* v_y = r_list_cbegin(y); for (r_ssize i = 0; i < n_cols; ++i) { r_obj* proxies = vec_joint_proxy_order(v_x[i], v_y[i]); r_list_poke(x, i, r_list_get(proxies, 0)); r_list_poke(y, i, r_list_get(proxies, 1)); } x = KEEP(df_flatten(x)); x = KEEP(vec_proxy_unwrap(x)); y = KEEP(df_flatten(y)); y = KEEP(vec_proxy_unwrap(y)); r_obj* out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, x); r_list_poke(out, 1, y); FREE(7); return out; } vctrs/src/expand.c0000644000176200001440000000662315113325071013632 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/expand-decl.h" r_obj* ffi_vec_expand_grid(r_obj* xs, r_obj* ffi_vary, r_obj* ffi_name_repair, r_obj* frame) { struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; enum vctrs_expand_vary vary = parse_vary(ffi_vary); struct name_repair_opts name_repair_opts = new_name_repair_opts( ffi_name_repair, lazy_args.dot_name_repair, false, error_call ); KEEP(name_repair_opts.shelter); r_obj* out = vec_expand_grid(xs, vary, &name_repair_opts, error_call); FREE(1); return out; } r_obj* vec_expand_grid(r_obj* xs, enum vctrs_expand_vary vary, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call) { obj_check_list(xs, vec_args.empty, error_call); if (vec_any_missing(xs)) { // Drop `NULL`s before any other checks r_obj* complete = KEEP(vec_detect_complete(xs)); xs = vec_slice(xs, complete); FREE(1); } KEEP(xs); const r_ssize n = r_length(xs); r_obj* out = KEEP(r_alloc_list(n)); r_obj* names = KEEP(vec_names2(xs)); if (!r_is_minimal_names(names)) { r_abort_lazy_call(error_call, "All inputs must be named."); } names = vec_as_names(names, p_name_repair_opts); r_attrib_poke_names(out, names); r_obj* sizes = KEEP(list_sizes(xs, vec_args.empty, error_call)); const int* v_sizes = r_int_cbegin(sizes); r_obj* cumulative = KEEP(r_alloc_raw(n * sizeof(r_ssize))); r_ssize* v_cumulative = r_raw_begin(cumulative); r_ssize size = 1; for (r_ssize i = 0; i < n; ++i) { size = r_ssize_mult(size, v_sizes[i]); v_cumulative[i] = size; } // TODO: Support long vectors here if (size > R_LEN_T_MAX) { r_abort_lazy_call( error_call, "Long vectors are not yet supported. " "Expansion results in an allocation larger than 2^31-1 elements. " "Attempted allocation size was %.0lf.", (double) size ); } r_obj* const* v_xs = r_list_cbegin(xs); r_obj* ffi_times_each = KEEP(r_alloc_integer(1)); int* p_ffi_times_each = r_int_begin(ffi_times_each); for (r_ssize i = 0; i < n; ++i) { r_obj* x = v_xs[i]; r_ssize times_each = 0; r_ssize times = 0; if (size != 0) { switch (vary) { case VCTRS_EXPAND_VARY_slowest: { times_each = size / v_cumulative[i]; times = v_cumulative[i] / v_sizes[i]; break; }; case VCTRS_EXPAND_VARY_fastest: { times_each = v_cumulative[i] / v_sizes[i]; times = size / v_cumulative[i]; break; } } } *p_ffi_times_each = r_ssize_as_integer(times_each); x = KEEP(vec_rep_each(x, ffi_times_each, error_call, vec_args.x, vec_args.empty)); x = vec_rep(x, r_ssize_as_integer(times), error_call, vec_args.x, vec_args.empty); r_list_poke(out, i, x); FREE(1); } init_data_frame(out, size); FREE(6); return out; } static inline enum vctrs_expand_vary parse_vary(r_obj* vary) { if (!r_is_string(vary)) { r_stop_internal("`vary` must be a string."); } const char* c_vary = r_chr_get_c_string(vary, 0); if (!strcmp(c_vary, "slowest")) return VCTRS_EXPAND_VARY_slowest; if (!strcmp(c_vary, "fastest")) return VCTRS_EXPAND_VARY_fastest; r_stop_internal( "`vary` must be either \"slowest\" or \"fastest\"." ); } vctrs/src/ptype-common.c0000644000176200001440000000667415120513137015010 0ustar liggesusers#include "vctrs.h" struct ptype_common_reduce_opts { struct r_lazy call; enum s3_fallback s3_fallback; }; #include "decl/ptype-common-decl.h" // [[ register(external = TRUE) ]] r_obj* ffi_ptype_common(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); r_obj* xs = r_node_car(args); args = r_node_cdr(args); r_obj* ptype = r_node_car(args); args = r_node_cdr(args); r_obj* ffi_finalise = r_node_car(args); struct r_lazy call = { .x = syms.dot_call, .env = env }; struct r_lazy xs_arg_lazy = { .x = syms.dot_arg, .env = env }; struct vctrs_arg xs_arg = new_lazy_arg(&xs_arg_lazy); const enum ptype_finalise finalise = r_arg_as_bool(ffi_finalise, ".finalise") ? PTYPE_FINALISE_true : PTYPE_FINALISE_false; const enum s3_fallback s3_fallback = S3_FALLBACK_false; r_obj* out = vec_ptype_common( xs, ptype, finalise, s3_fallback, &xs_arg, call ); return out; } // [[ register(external = TRUE) ]] r_obj* ffi_ptype_common_params(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); r_obj* xs = r_node_car(args); args = r_node_cdr(args); r_obj* ptype = r_node_car(args); args = r_node_cdr(args); r_obj* ffi_finalise = r_node_car(args); args = r_node_cdr(args); r_obj* opts = r_node_car(args); struct r_lazy call = { .x = syms.dot_call, .env = env }; struct r_lazy xs_arg_lazy = { .x = syms.dot_arg, .env = env }; struct vctrs_arg xs_arg = new_lazy_arg(&xs_arg_lazy); const enum ptype_finalise finalise = r_arg_as_bool(ffi_finalise, ".finalise") ? PTYPE_FINALISE_true : PTYPE_FINALISE_false; const enum s3_fallback s3_fallback = s3_fallback_from_opts(opts); r_obj* out = vec_ptype_common( xs, ptype, finalise, s3_fallback, &xs_arg, call ); return out; } // Invariant of `vec_ptype_common()` is that the output is always a finalised `ptype`, // even if the user provided their own, unless `PTYPE_FINALISE_false` is specified. r_obj* vec_ptype_common( r_obj* dots, r_obj* ptype, enum ptype_finalise finalise, enum s3_fallback s3_fallback, struct vctrs_arg* p_arg, struct r_lazy call ) { int n_prot = 0; r_obj* out; if (ptype != r_null) { out = KEEP_N(vec_ptype(ptype, vec_args.dot_ptype, call), &n_prot); } else { if (r_is_true(r_peek_option("vctrs.no_guessing"))) { r_abort_lazy_call(r_lazy_null, "strict mode is activated; you must supply complete `.ptype`."); } struct ptype_common_reduce_opts reduce_opts = { .call = call, .s3_fallback = s3_fallback }; out = KEEP_N( reduce( r_null, vec_args.empty, p_arg, dots, &ptype2_common, &reduce_opts ), &n_prot ); } if (should_finalise(finalise)) { out = KEEP_N(vec_ptype_finalise(out), &n_prot); } FREE(n_prot); return out; } static r_obj* ptype2_common(r_obj* current, r_obj* next, struct counters* counters, void* p_data) { int left = -1; struct ptype_common_reduce_opts* p_reduce_opts = (struct ptype_common_reduce_opts*) p_data; current = vec_ptype2( current, next, counters->curr_arg, counters->next_arg, p_reduce_opts->call, p_reduce_opts->s3_fallback, &left ); // Update current if RHS is the common type. Otherwise the previous // counter stays in effect. if (!left) { counters_shift(counters); } return current; } vctrs/src/dictionary.h0000644000176200001440000002047415060342773014536 0ustar liggesusers#ifndef VCTRS_DICTIONARY_H #define VCTRS_DICTIONARY_H #include "vctrs-core.h" #include "poly-op.h" #include "equal.h" #include "hash.h" #define DICT_EMPTY -1 // The dictionary structure is a little peculiar since R has no notion of // a scalar, so the `key`s are indexes into vector `x`. This means we can // only store values from a single vector, but we can still lookup using // another vector, provided that they're of the same type (which is ensured // at the R-level). struct dictionary { SEXP protect; struct poly_vec* p_poly_vec; uint32_t* hash; R_len_t* key; uint32_t size; uint32_t used; }; /** * Initialise a dictionary * * - `new_dictionary()` creates a dictionary and precaches the hashes for * each element of `x`. * * - `new_dictionary_partial()` creates a dictionary with precached hashes * as well, but does not allocate an array of keys. This is useful * for finding a key in another dictionary with `*_dict_hash_with()`. */ struct dictionary_opts { bool partial; bool na_equal; }; struct dictionary* new_dictionary(SEXP x); struct dictionary* new_dictionary_partial(SEXP x); #define PROTECT_DICT(d, n) do { \ struct dictionary* d_ = (d); \ KEEP(d_->p_poly_vec->shelter); \ KEEP(d_->protect); \ *(n) += 2; \ } while(0) static inline void dict_put(struct dictionary* d, uint32_t hash, R_len_t i) { d->key[hash] = i; d->used++; } // This is an EXTREMELY hot loop. The `*_dict_hash_with()` functions // themselves are always called within an outer loop over a vector, // so it is almost always a double loop. For performance, this makes // it very important to ensure that the `*_dict_hash_with()` function // is inlined (we have seen good evidence that this helps a lot, up // to a factor of 2x in some cases). // // To encourage inlining, we make typed wrappers over `P_EQUAL_NA_EQUAL` // which are marked with `static inline` and then we use these in // the dictionary functions. // // Quadratic probing: will try every slot if d->size is power of 2 // http://research.cs.vt.edu/AVresearch/hashing/quadratic.php #define DICT_HASH_WITH(P_EQUAL_NA_EQUAL) \ do { \ uint32_t hash = x->hash[i]; \ \ const void* p_d_vec = d->p_poly_vec->p_vec; \ const void* p_x_vec = x->p_poly_vec->p_vec; \ const uint32_t size = d->size; \ \ for (uint32_t k = 0; k < size; ++k) { \ uint32_t probe = (hash + k * (k + 1) / 2) & (d->size - 1); \ /* Rprintf("Probe: %i\n", probe); */ \ \ /* If we circled back to start, dictionary is full */ \ if (k > 1 && probe == hash) { \ break; \ } \ \ /* Check for unused slot */ \ R_len_t idx = d->key[probe]; \ if (idx == DICT_EMPTY) { \ return probe; \ } \ \ /* Check for same value as there might be a collision */ \ if (P_EQUAL_NA_EQUAL(p_d_vec, idx, p_x_vec, i)) { \ return probe; \ } \ \ /* Collision. next iteration will find another spot using */ \ /* quadratic probing. */ \ } \ r_stop_internal("Dictionary is full."); \ } \ while (0) /** * `*_dict_hash_with()` finds the hash for indexing into `d` with * element `i` of `x`. */ static inline uint32_t nil_dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i) { DICT_HASH_WITH(p_nil_equal_na_equal); } static inline uint32_t lgl_dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i) { DICT_HASH_WITH(p_lgl_equal_na_equal); } static inline uint32_t int_dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i) { DICT_HASH_WITH(p_int_equal_na_equal); } static inline uint32_t dbl_dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i) { DICT_HASH_WITH(p_dbl_equal_na_equal); } static inline uint32_t cpl_dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i) { DICT_HASH_WITH(p_cpl_equal_na_equal); } static inline uint32_t chr_dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i) { DICT_HASH_WITH(p_chr_equal_na_equal); } static inline uint32_t raw_dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i) { DICT_HASH_WITH(p_raw_equal_na_equal); } static inline uint32_t list_dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i) { DICT_HASH_WITH(p_list_equal_na_equal); } static inline uint32_t df_dict_hash_with(struct dictionary* d, struct dictionary* x, R_len_t i) { DICT_HASH_WITH(p_df_equal_na_equal); } #undef DICT_HASH_WITH /** * `*_dict_hash_scalar()` returns the key hash for element `i`. */ static inline uint32_t nil_dict_hash_scalar(struct dictionary* d, R_len_t i) { return nil_dict_hash_with(d, d, i); } static inline uint32_t lgl_dict_hash_scalar(struct dictionary* d, R_len_t i) { return lgl_dict_hash_with(d, d, i); } static inline uint32_t int_dict_hash_scalar(struct dictionary* d, R_len_t i) { return int_dict_hash_with(d, d, i); } static inline uint32_t dbl_dict_hash_scalar(struct dictionary* d, R_len_t i) { return dbl_dict_hash_with(d, d, i); } static inline uint32_t cpl_dict_hash_scalar(struct dictionary* d, R_len_t i) { return cpl_dict_hash_with(d, d, i); } static inline uint32_t chr_dict_hash_scalar(struct dictionary* d, R_len_t i) { return chr_dict_hash_with(d, d, i); } static inline uint32_t raw_dict_hash_scalar(struct dictionary* d, R_len_t i) { return raw_dict_hash_with(d, d, i); } static inline uint32_t list_dict_hash_scalar(struct dictionary* d, R_len_t i) { return list_dict_hash_with(d, d, i); } static inline uint32_t df_dict_hash_scalar(struct dictionary* d, R_len_t i) { return df_dict_hash_with(d, d, i); } static inline bool dict_hash_is_missing(struct dictionary* d, R_len_t i) { return d->hash[i] == HASH_MISSING; } static inline bool nil_dict_is_incomplete(struct dictionary* d, R_len_t i) { return dict_hash_is_missing(d, i) && p_nil_is_incomplete(d->p_poly_vec->p_vec, i); } static inline bool lgl_dict_is_incomplete(struct dictionary* d, R_len_t i) { return dict_hash_is_missing(d, i) && p_lgl_is_incomplete(d->p_poly_vec->p_vec, i); } static inline bool int_dict_is_incomplete(struct dictionary* d, R_len_t i) { return dict_hash_is_missing(d, i) && p_int_is_incomplete(d->p_poly_vec->p_vec, i); } static inline bool dbl_dict_is_incomplete(struct dictionary* d, R_len_t i) { return dict_hash_is_missing(d, i) && p_dbl_is_incomplete(d->p_poly_vec->p_vec, i); } static inline bool cpl_dict_is_incomplete(struct dictionary* d, R_len_t i) { return dict_hash_is_missing(d, i) && p_cpl_is_incomplete(d->p_poly_vec->p_vec, i); } static inline bool chr_dict_is_incomplete(struct dictionary* d, R_len_t i) { return dict_hash_is_missing(d, i) && p_chr_is_incomplete(d->p_poly_vec->p_vec, i); } static inline bool raw_dict_is_incomplete(struct dictionary* d, R_len_t i) { return dict_hash_is_missing(d, i) && p_raw_is_incomplete(d->p_poly_vec->p_vec, i); } static inline bool list_dict_is_incomplete(struct dictionary* d, R_len_t i) { return dict_hash_is_missing(d, i) && p_list_is_incomplete(d->p_poly_vec->p_vec, i); } static inline bool df_dict_is_incomplete(struct dictionary* d, R_len_t i) { return dict_hash_is_missing(d, i) && p_df_is_incomplete(d->p_poly_vec->p_vec, i); } #endif vctrs/src/slice-assign.c0000644000176200001440000014270515157322033014741 0ustar liggesusers#include "vctrs.h" #include "decl/slice-assign-decl.h" // [[ include("slice-assign.h") ]] r_obj* vec_assign_opts(r_obj* x, r_obj* index, r_obj* value, const struct vec_assign_opts* p_opts) { if (x == r_null) { return r_null; } struct r_lazy call = r_lazy_is_null(p_opts->call) ? lazy_calls.vec_assign : p_opts->call; struct vctrs_arg* x_arg = r_lazy_is_null(p_opts->call) ? vec_args.x : p_opts->x_arg; struct vctrs_arg* value_arg = r_lazy_is_null(p_opts->call) ? vec_args.value : p_opts->value_arg; obj_check_vector(x, VCTRS_ALLOW_NULL_no, x_arg, call); obj_check_vector(value, VCTRS_ALLOW_NULL_no, value_arg, call); const r_ssize x_size = vec_size(x); // Determine index style. Logical condition indices follow an optimized path. enum vctrs_index_style index_style = (is_condition_index(index) && r_length(index) == x_size) ? VCTRS_INDEX_STYLE_condition : VCTRS_INDEX_STYLE_location; if (index_style == VCTRS_INDEX_STYLE_location) { // Validate and convert to integer locations with `vec_as_location()` r_obj* x_names = KEEP(vec_names(x)); const struct location_opts location_opts = new_location_opts_assign(); index = vec_as_location_opts( index, x_size, x_names, &location_opts ); FREE(1); } KEEP(index); // We won't be proxying recursively const bool recursively_proxied = false; struct vec_proxy_assign_opts assign_opts = { .assign_names = p_opts->assign_names, .ignore_outer_names = p_opts->ignore_outer_names, .call = call, .x_arg = x_arg, .value_arg = value_arg, .index_style = index_style, .slice_value = p_opts->slice_value, .ownership = p_opts->ownership, .recursively_proxied = recursively_proxied }; struct vec_restore_opts restore_opts = { .ownership = p_opts->ownership, .recursively_proxied = recursively_proxied }; // Cast `value` and check that it can recycle value = KEEP(vec_cast(value, x, assign_opts.value_arg, assign_opts.x_arg, assign_opts.call)); check_recyclable_against_index( value, index, x_size, assign_opts.slice_value, assign_opts.index_style, assign_opts.value_arg, assign_opts.call ); r_obj* proxy = KEEP(vec_proxy(x)); proxy = KEEP(vec_proxy_assign_opts(proxy, index, value, &assign_opts)); r_obj* out = vec_restore_opts(proxy, x, &restore_opts); FREE(4); return out; } // [[ register() ]] r_obj* ffi_assign( r_obj* ffi_x, r_obj* ffi_i, r_obj* ffi_value, r_obj* ffi_slice_value, r_obj* ffi_frame ) { struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = ffi_frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy value_arg_lazy = { .x = syms.value_arg, .env = ffi_frame }; struct vctrs_arg value_arg = new_lazy_arg(&value_arg_lazy); struct r_lazy call = { .x = ffi_frame, .env = r_null }; const enum assignment_slice_value slice_value = r_arg_as_bool(ffi_slice_value, "slice_value") ? ASSIGNMENT_SLICE_VALUE_yes : ASSIGNMENT_SLICE_VALUE_no; // We don't expose this in the R API const bool assign_names = false; // Comes from the R side, so no known ownership const enum vctrs_ownership ownership = VCTRS_OWNERSHIP_foreign; const struct vec_assign_opts opts = { .assign_names = assign_names, .slice_value = slice_value, .ownership = ownership, .x_arg = &x_arg, .value_arg = &value_arg, .call = call }; return vec_assign_opts(ffi_x, ffi_i, ffi_value, &opts); } // [[ register() ]] r_obj* ffi_assign_params( r_obj* ffi_x, r_obj* ffi_index, r_obj* ffi_value, r_obj* ffi_assign_names, r_obj* ffi_slice_value, r_obj* ffi_frame ) { struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = ffi_frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy value_arg_lazy = { .x = syms.value_arg, .env = ffi_frame }; struct vctrs_arg value_arg = new_lazy_arg(&value_arg_lazy); struct r_lazy call = { .x = ffi_frame, .env = r_null }; const bool assign_names = r_arg_as_bool(ffi_assign_names, "assign_names"); const enum assignment_slice_value slice_value = r_arg_as_bool(ffi_slice_value, "slice_value") ? ASSIGNMENT_SLICE_VALUE_yes : ASSIGNMENT_SLICE_VALUE_no; // Comes from the R side, so no known ownership enum vctrs_ownership ownership = VCTRS_OWNERSHIP_foreign; const struct vec_assign_opts opts = { .assign_names = assign_names, .ownership = ownership, .slice_value = slice_value, .x_arg = &x_arg, .value_arg = &value_arg, .call = call }; return vec_assign_opts(ffi_x, ffi_index, ffi_value, &opts); } static r_obj* vec_assign_switch( r_obj* proxy, r_obj* index, r_obj* value, const struct vec_proxy_assign_opts* p_opts ) { switch (vec_proxy_typeof(proxy)) { case VCTRS_TYPE_logical: return lgl_assign(proxy, index, value, p_opts->ownership, p_opts->slice_value, p_opts->index_style); case VCTRS_TYPE_integer: return int_assign(proxy, index, value, p_opts->ownership, p_opts->slice_value, p_opts->index_style); case VCTRS_TYPE_double: return dbl_assign(proxy, index, value, p_opts->ownership, p_opts->slice_value, p_opts->index_style); case VCTRS_TYPE_complex: return cpl_assign(proxy, index, value, p_opts->ownership, p_opts->slice_value, p_opts->index_style); case VCTRS_TYPE_character: return chr_assign(proxy, index, value, p_opts->ownership, p_opts->slice_value, p_opts->index_style); case VCTRS_TYPE_raw: return raw_assign(proxy, index, value, p_opts->ownership, p_opts->slice_value, p_opts->index_style); case VCTRS_TYPE_list: return list_assign(proxy, index, value, p_opts->ownership, p_opts->slice_value, p_opts->index_style); case VCTRS_TYPE_dataframe: return df_assign(proxy, index, value, p_opts); case VCTRS_TYPE_scalar: stop_scalar_type(proxy, vec_args.empty, r_lazy_null); default: stop_unimplemented_vctrs_type("vec_assign_switch", vec_typeof(proxy)); } r_stop_unreachable(); } // `vec_proxy_assign_opts()` conditionally duplicates the `proxy` depending // on a number of factors. // // - If a fallback is required, the `proxy` is duplicated at the R level. // - If `p_opts->ownership` is `VCTRS_OWNERSHIP_deep`, the `proxy` is not duplicated. // - Notably, this is passed on to all columns when assigning to a data frame. // - If the `proxy` happens to be an ALTREP object, materialization will be // forced when we do the actual assignment, but this should really only // happen with cheap-to-materialize ALTREP "wrapper" objects since we've // claimed that we "own" the `proxy`. // - If `p_opts->ownership` is `VCTRS_OWNERSHIP_shallow`, the `proxy` container // is not duplicated, but in the case of the columns of a data frame the // columns are each treated as `VCTRS_OWNERSHIP_foreign`. // - If `p_opts->ownership` is `VCTRS_OWNERSHIP_foreign`, the `proxy` is only // duplicated if it is referenced, i.e. `MAYBE_REFERENCED()` returns `true`. // // We only set `VCTRS_OWNERSHIP_deep` when we've created a fresh data structure // at C level and we are about to fill it. Some examples: // - vec_c() // - list_combine() // - vec_rbind() // // For data frames, this `ownership` parameter is particularly important for R // 4.0.0 where references are tracked more precisely. In R 4.0.0, a freshly // created data frame's columns all have a refcount of 1 because of the // `r_list_poke()` call that set them in the data frame. This makes them // referenced, but not shared. If `VCTRS_OWNERSHIP_shallow` or // `VCTRS_OWNERSHIP_foreign` was set and `df_assign()` was used in a loop (as it // is in `vec_rbind()`), then a copy of each column would be made at each // iteration of the loop (any time a new set of rows is assigned into the output // object). // // Even though it can directly assign, the safe way to call // `vec_proxy_assign_opts()` is to catch and protect their output rather than // relying on them to assign directly. /* * @param proxy The proxy of the output container * @param index The locations to assign `value` to * @param value The value to assign into the proxy. Must already be * cast to the type of the true output container, and have already * been checked for recyclability (either size 1 or size of `index`). * Should not be proxied, in case we have to fallback. * @param p_opts The options to use during the assignment process */ r_obj* vec_proxy_assign_opts(r_obj* proxy, r_obj* index, r_obj* value, const struct vec_proxy_assign_opts* p_opts) { int n_protect = 0; // Ignore vectors marked as fallback because the caller will apply a fallback // method instead. We can't lift this check out of `vec_proxy_assign_opts()` // and just apply at the call sites because we recurse through here with data // frames and individual columns might need the fallback even if the whole // data frame itself doesn't need it. if (vec_is_common_class_fallback(proxy)) { return proxy; } // We only allow `ignore_outer_names` on the "outer" call to // `vec_proxy_assign_opts()`. After it has been used once, it is set to // `false` for any recursive calls back into this function. struct vec_proxy_assign_opts opts_copy = *p_opts; const bool ignore_outer_names = opts_copy.ignore_outer_names; opts_copy.ignore_outer_names = false; struct vctrs_proxy_info value_info = vec_proxy_info(value); KEEP_N(value_info.inner, &n_protect); if (r_typeof(proxy) != r_typeof(value_info.inner)) { r_stop_internal("`proxy` of type `%s` incompatible with `value` proxy of type `%s`.", r_type_as_c_string(r_typeof(proxy)), r_type_as_c_string(r_typeof(value_info.inner))); } // If a fallback is required, the `proxy` is identical to the output container // because no proxy method was called r_obj* out = r_null; if (vec_requires_fallback(value, value_info)) { index = KEEP_N(vec_subscript_materialize(index), &n_protect); out = KEEP_N(vec_assign_fallback(proxy, index, value, opts_copy.slice_value, opts_copy.index_style), &n_protect); } else if (has_dim(proxy)) { out = KEEP_N(vec_assign_shaped(proxy, index, value_info.inner, opts_copy.ownership, opts_copy.slice_value, opts_copy.index_style), &n_protect); } else { out = KEEP_N(vec_assign_switch(proxy, index, value_info.inner, &opts_copy), &n_protect); } if (!ignore_outer_names && p_opts->assign_names) { out = vec_proxy_assign_names(out, index, value_info.inner, opts_copy.ownership, opts_copy.slice_value, opts_copy.index_style); } FREE(n_protect); return out; } #define ASSIGN_LOCATION_INDEX(CTYPE, DEREF, CONST_DEREF, VALUE_LOC) \ const r_ssize index_size = r_length(index); \ const int* index_data = r_int_cbegin(index); \ \ const CTYPE* value_data = CONST_DEREF(value); \ \ r_obj* out = KEEP(vec_clone_referenced(x, ownership)); \ CTYPE* out_data = DEREF(out); \ \ for (r_ssize index_loc = 0; index_loc < index_size; ++index_loc) { \ const int index_elt = index_data[index_loc]; \ if (index_elt != r_globals.na_int) { \ const r_ssize out_loc = index_elt - 1; \ out_data[out_loc] = value_data[VALUE_LOC]; \ } \ } \ \ FREE(1); \ return out #define ASSIGN_LOCATION_COMPACT(CTYPE, DEREF, CONST_DEREF, VALUE_LOC) \ const int* index_data = r_int_cbegin(index); \ const r_ssize start = index_data[0]; \ const r_ssize index_size = index_data[1]; \ const r_ssize step = index_data[2]; \ \ const CTYPE* value_data = CONST_DEREF(value); \ \ r_obj* out = KEEP(vec_clone_referenced(x, ownership)); \ CTYPE* out_data = DEREF(out); \ r_ssize out_loc = start; \ \ for (r_ssize index_loc = 0; index_loc < index_size; ++index_loc) { \ out_data[out_loc] = value_data[VALUE_LOC]; \ out_loc += step; \ } \ \ FREE(1); \ return out #define ASSIGN_LOCATION(CTYPE, DEREF, CONST_DEREF) \ const r_ssize value_size = r_length(value); \ check_assign_sizes(x, index, value_size, slice_value, index_style); \ \ if (is_compact_seq(index)) { \ if (value_size == 1) { \ ASSIGN_LOCATION_COMPACT(CTYPE, DEREF, CONST_DEREF, 0); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_LOCATION_COMPACT(CTYPE, DEREF, CONST_DEREF, out_loc); \ } else { \ ASSIGN_LOCATION_COMPACT(CTYPE, DEREF, CONST_DEREF, index_loc); \ } \ } else { \ if (value_size == 1) { \ ASSIGN_LOCATION_INDEX(CTYPE, DEREF, CONST_DEREF, 0); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_LOCATION_INDEX(CTYPE, DEREF, CONST_DEREF, out_loc); \ } else { \ ASSIGN_LOCATION_INDEX(CTYPE, DEREF, CONST_DEREF, index_loc); \ } \ } /** * The performance of this loop is very sensitive to the way the assignment is done * * For optimal average performance, we use: * * ``` * out_data[index_loc] = (index_elt == 1) ? value_data[value_loc] : out_data[index_loc]; * ``` * * This seems to either: * - Internally compile to something less branchy * - Keep `out_data` "hot" at all times * * Compare that with this, which we used to do: * * ``` * if (index_elt == 1) { * out_data[index_loc] = value_data[value_loc]; * } * ``` * * This might look more efficient, but for a uniform random `index` it is significantly * slower than the ternary style that keeps `out_data` "hot". * * For example, this "random" index of uniform `TRUE` and `FALSE` is MUCH faster with * the ternary style (6.5ms ternary vs 33ms conditional). * * ``` * library(vctrs) * set.seed(123) * x <- sample(1e7) * value <- sample(1e7) * loc <- sample(c(TRUE, FALSE), 1e7, replace = TRUE) * bench::mark(vec_assign(x, loc, value, slice_value = TRUE)) * ``` * * If you change the `index` to this "mostly `FALSE`" index, then the conditional style * is _slightly_ faster (7ms ternary vs 6ms conditional). * * ``` * loc <- sample(c(TRUE, rep(FALSE, 1000)), 1e7, replace = TRUE) * ``` * * Given these benchmarks, we prefer the more predictable and highly competitive * average case performance of the ternary style. This ends up adding up to a big * difference in `list_combine()` (used by `dplyr::if_else()` and `dplyr::case_when()`) * where you end up hitting this loop once per expression. */ #define ASSIGN_CONDITION_IMPL( \ CTYPE, \ DEREF, \ CONST_DEREF, \ VALUE_INCR, \ INDEX_CTYPE, \ INDEX_SIZE, \ INDEX_CONST_DEREF, \ INDEX_ELT_CMP \ ) \ const r_ssize index_size = INDEX_SIZE(index); \ const INDEX_CTYPE* index_data = INDEX_CONST_DEREF(index); \ \ const CTYPE* value_data = CONST_DEREF(value); \ \ r_obj* out = KEEP(vec_clone_referenced(x, ownership)); \ CTYPE* out_data = DEREF(out); \ \ r_ssize value_loc = 0; \ \ for (r_ssize index_loc = 0; index_loc < index_size; ++index_loc) { \ const INDEX_CTYPE index_elt = index_data[index_loc]; \ out_data[index_loc] = (INDEX_ELT_CMP) ? value_data[value_loc] : out_data[index_loc]; \ value_loc += VALUE_INCR; \ } \ \ FREE(1); \ return out #define ASSIGN_CONDITION_INDEX( \ CTYPE, \ DEREF, \ CONST_DEREF, \ VALUE_INCR \ ) \ ASSIGN_CONDITION_IMPL( \ CTYPE, \ DEREF, \ CONST_DEREF, \ VALUE_INCR, \ int, \ r_length, \ r_lgl_cbegin, \ index_elt == 1 \ ) \ #define ASSIGN_CONDITION_COMPACT( \ CTYPE, \ DEREF, \ CONST_DEREF, \ VALUE_INCR \ ) \ ASSIGN_CONDITION_IMPL( \ CTYPE, \ DEREF, \ CONST_DEREF, \ VALUE_INCR, \ bool, \ compact_condition_size, \ compact_condition_cbegin, \ index_elt \ ) \ #define ASSIGN_CONDITION(CTYPE, DEREF, CONST_DEREF) \ const r_ssize value_size = r_length(value); \ check_assign_sizes(x, index, value_size, slice_value, index_style); \ \ if (is_compact_condition(index)) { \ if (value_size == 1) { \ ASSIGN_CONDITION_COMPACT(CTYPE, DEREF, CONST_DEREF, 0); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_CONDITION_COMPACT(CTYPE, DEREF, CONST_DEREF, 1); \ } else { \ ASSIGN_CONDITION_COMPACT(CTYPE, DEREF, CONST_DEREF, index_elt); \ } \ } else { \ if (value_size == 1) { \ ASSIGN_CONDITION_INDEX(CTYPE, DEREF, CONST_DEREF, 0); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_CONDITION_INDEX(CTYPE, DEREF, CONST_DEREF, 1); \ } else { \ ASSIGN_CONDITION_INDEX(CTYPE, DEREF, CONST_DEREF, index_elt != 0); \ } \ } #define ASSIGN(CTYPE, DEREF, CONST_DEREF) \ switch (index_style) { \ case VCTRS_INDEX_STYLE_location: { \ ASSIGN_LOCATION(CTYPE, DEREF, CONST_DEREF); \ } \ case VCTRS_INDEX_STYLE_condition: { \ ASSIGN_CONDITION(CTYPE, DEREF, CONST_DEREF); \ } \ default: r_stop_unreachable(); \ } static r_obj* lgl_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN(int, LOGICAL, LOGICAL_RO); } static r_obj* int_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN(int, r_int_begin, INTEGER_RO); } static r_obj* dbl_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN(double, REAL, REAL_RO); } static r_obj* cpl_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN(Rcomplex, COMPLEX, COMPLEX_RO); } static r_obj* raw_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN(Rbyte, RAW, RAW_RO); } #define ASSIGN_BARRIER_LOCATION_INDEX(CTYPE, CONST_DEREF, SET, VALUE_LOC) \ const r_ssize index_size = r_length(index); \ const int* index_data = r_int_cbegin(index); \ \ CTYPE const* value_data = CONST_DEREF(value); \ \ r_obj* out = KEEP(vec_clone_referenced(x, ownership)); \ \ for (r_ssize index_loc = 0; index_loc < index_size; ++index_loc) { \ const int index_elt = index_data[index_loc]; \ if (index_elt != r_globals.na_int) { \ const r_ssize out_loc = index_elt - 1; \ SET(out, out_loc, value_data[VALUE_LOC]); \ } \ } \ \ FREE(1); \ return out #define ASSIGN_BARRIER_LOCATION_COMPACT(CTYPE, CONST_DEREF, SET, VALUE_LOC) \ const int* index_data = r_int_cbegin(index); \ const r_ssize start = index_data[0]; \ const r_ssize index_size = index_data[1]; \ const r_ssize step = index_data[2]; \ \ CTYPE const* value_data = CONST_DEREF(value); \ \ r_obj* out = KEEP(vec_clone_referenced(x, ownership)); \ r_ssize out_loc = start; \ \ for (r_ssize index_loc = 0; index_loc < index_size; ++index_loc) { \ SET(out, out_loc, value_data[VALUE_LOC]); \ out_loc += step; \ } \ \ FREE(1); \ return out #define ASSIGN_BARRIER_LOCATION(CTYPE, CONST_DEREF, SET) \ const r_ssize value_size = r_length(value); \ check_assign_sizes(x, index, value_size, slice_value, index_style); \ \ if (is_compact_seq(index)) { \ if (value_size == 1) { \ ASSIGN_BARRIER_LOCATION_COMPACT(CTYPE, CONST_DEREF, SET, 0); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_BARRIER_LOCATION_COMPACT(CTYPE, CONST_DEREF, SET, out_loc); \ } else { \ ASSIGN_BARRIER_LOCATION_COMPACT(CTYPE, CONST_DEREF, SET, index_loc); \ } \ } else { \ if (value_size == 1) { \ ASSIGN_BARRIER_LOCATION_INDEX(CTYPE, CONST_DEREF, SET, 0); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_BARRIER_LOCATION_INDEX(CTYPE, CONST_DEREF, SET, out_loc); \ } else { \ ASSIGN_BARRIER_LOCATION_INDEX(CTYPE, CONST_DEREF, SET, index_loc); \ } \ } /** * Unlike with `ASSIGN_CONDITION_IMPL`, here we prefer the conditional style * of: * * ``` * if (index_elt == 1) { * SET(out, index_loc, value_data[value_loc]); * } * ``` * * This ends up working better than the ternary style of: * * ``` * SET(out, index_loc, (index_elt == 1) ? value_data[value_loc]) : out_data[index_loc]); * ``` * * This is likely the case because we no longer gain benefits from keeping `out_data` * "hot" due to the indirection required by the `SET()` function. */ #define ASSIGN_BARRIER_CONDITION_IMPL( \ CTYPE, \ CONST_DEREF, \ SET, \ VALUE_INCR, \ INDEX_CTYPE, \ INDEX_SIZE, \ INDEX_CONST_DEREF, \ INDEX_ELT_CMP \ ) \ const r_ssize index_size = INDEX_SIZE(index); \ const INDEX_CTYPE* index_data = INDEX_CONST_DEREF(index); \ \ CTYPE const* value_data = CONST_DEREF(value); \ \ r_obj* out = KEEP(vec_clone_referenced(x, ownership)); \ \ r_ssize value_loc = 0; \ \ for (r_ssize index_loc = 0; index_loc < index_size; ++index_loc) { \ const INDEX_CTYPE index_elt = index_data[index_loc]; \ if (INDEX_ELT_CMP) { \ SET(out, index_loc, value_data[value_loc]); \ } \ value_loc += VALUE_INCR; \ } \ \ FREE(1); \ return out #define ASSIGN_BARRIER_CONDITION_INDEX( \ CTYPE, \ CONST_DEREF, \ SET, \ VALUE_INCR \ ) \ ASSIGN_BARRIER_CONDITION_IMPL( \ CTYPE, \ CONST_DEREF, \ SET, \ VALUE_INCR, \ int, \ r_length, \ r_lgl_cbegin, \ index_elt == 1 \ ) \ #define ASSIGN_BARRIER_CONDITION_COMPACT( \ CTYPE, \ CONST_DEREF, \ SET, \ VALUE_INCR \ ) \ ASSIGN_BARRIER_CONDITION_IMPL( \ CTYPE, \ CONST_DEREF, \ SET, \ VALUE_INCR, \ bool, \ compact_condition_size, \ compact_condition_cbegin, \ index_elt \ ) \ #define ASSIGN_BARRIER_CONDITION(CTYPE, CONST_DEREF, SET) \ const r_ssize value_size = r_length(value); \ check_assign_sizes(x, index, value_size, slice_value, index_style); \ \ if (is_compact_condition(index)) { \ if (value_size == 1) { \ ASSIGN_BARRIER_CONDITION_COMPACT(CTYPE, CONST_DEREF, SET, 0); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_BARRIER_CONDITION_COMPACT(CTYPE, CONST_DEREF, SET, 1); \ } else { \ ASSIGN_BARRIER_CONDITION_COMPACT(CTYPE, CONST_DEREF, SET, index_elt); \ } \ } else { \ if (value_size == 1) { \ ASSIGN_BARRIER_CONDITION_INDEX(CTYPE, CONST_DEREF, SET, 0); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_BARRIER_CONDITION_INDEX(CTYPE, CONST_DEREF, SET, 1); \ } else { \ ASSIGN_BARRIER_CONDITION_INDEX(CTYPE, CONST_DEREF, SET, index_elt != 0); \ } \ } #define ASSIGN_BARRIER(CTYPE, CONST_DEREF, SET) \ switch (index_style) { \ case VCTRS_INDEX_STYLE_location: { \ ASSIGN_BARRIER_LOCATION(CTYPE, CONST_DEREF, SET); \ } \ case VCTRS_INDEX_STYLE_condition: { \ ASSIGN_BARRIER_CONDITION(CTYPE, CONST_DEREF, SET); \ } \ default: r_stop_unreachable(); \ } r_obj* chr_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN_BARRIER(r_obj*, r_chr_cbegin, r_chr_poke); } r_obj* list_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN_BARRIER(r_obj*, r_list_cbegin, r_list_poke); } /** * Invariants: * * - `out` and `value` must be rectangular lists. * - `value` must have the same size as `index` or be size 1. * * Performance and safety notes: * * In `vec_c()`, `vec_rbind()`, and `list_combine()` we totally own the * data frame and its columns recursively, so we set `VCTRS_OWNERSHIP_deep`. * This helps us avoid copies of the columns during restoration even if * `NO_REFERENCES()` disagrees (storing the column in a list counts as a * reference) (#1151). * * If we don't own `x` we set `VCTRS_OWNERSHIP_foreign`. This calls * `vec_clone_referenced -> r_clone_referenced -> Rf_shallow_duplicate`. For * lists (data.frames), this loops over the list and marks each element as * referenced. This helps in a particular special case where the data frame * itself could be referenced but the columns were not (mtcars was an example at * the time, which was likely an R bug). If each list element wasn't marked, * then `vec_proxy_assign_opts()` would see an unreferenced column and modify it * directly, resulting in improper mutable semantics (#986). * * [[ include("vctrs.h") ]] */ r_obj* df_assign(r_obj* x, r_obj* index, r_obj* value, const struct vec_proxy_assign_opts* p_opts) { r_obj* out = KEEP(vec_clone_referenced(x, p_opts->ownership)); r_ssize n = r_length(out); if (r_length(value) != n) { r_stop_internal("Can't assign %d columns to df of length %d.", r_length(value), n); } // During assignment, if we have deep ownership over `x` we can // propagate that ownership to the columns, otherwise we have no // known ownership over the columns enum vctrs_ownership col_ownership; switch (p_opts->ownership) { case VCTRS_OWNERSHIP_foreign: col_ownership = VCTRS_OWNERSHIP_foreign; break; case VCTRS_OWNERSHIP_shallow: col_ownership = VCTRS_OWNERSHIP_foreign; break; case VCTRS_OWNERSHIP_deep: col_ownership = VCTRS_OWNERSHIP_deep; break; default: r_stop_unreachable(); } const struct vec_proxy_assign_opts col_proxy_assign_opts = { .assign_names = p_opts->assign_names, .ignore_outer_names = p_opts->ignore_outer_names, .slice_value = p_opts->slice_value, .call = p_opts->call, .x_arg = p_opts->x_arg, .value_arg = p_opts->value_arg, .index_style = p_opts->index_style, .ownership = col_ownership, .recursively_proxied = p_opts->recursively_proxied }; for (r_ssize i = 0; i < n; ++i) { r_obj* out_elt = r_list_get(out, i); r_obj* value_elt = r_list_get(value, i); // No need to cast or recycle because those operations are // recursive and have already been performed. However, proxy and // restore are not necessarily recursive and we might need to // proxy each element we recurse into. // // NOTE: `vec_proxy_assign_opts()` proxies `value_elt`. r_obj* proxy_elt = KEEP(p_opts->recursively_proxied ? out_elt : vec_proxy(out_elt)); r_obj* assigned_elt = KEEP(vec_proxy_assign_opts(proxy_elt, index, value_elt, &col_proxy_assign_opts)); if (!p_opts->recursively_proxied) { const struct vec_restore_opts col_restore_opts = { .ownership = col_ownership, .recursively_proxied = false }; assigned_elt = vec_restore_opts(assigned_elt, out_elt, &col_restore_opts); } r_list_poke(out, i, assigned_elt); FREE(2); } FREE(1); return out; } static r_obj* vec_assign_fallback( r_obj* x, r_obj* index, r_obj* value, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { r_obj* ffi_slice_value; switch (slice_value) { case ASSIGNMENT_SLICE_VALUE_no: ffi_slice_value = r_false; break; case ASSIGNMENT_SLICE_VALUE_yes: ffi_slice_value = r_true; break; default: r_stop_unreachable(); } r_obj* ffi_index_style; switch (index_style) { case VCTRS_INDEX_STYLE_location: ffi_index_style = chrs.location; break; case VCTRS_INDEX_STYLE_condition: ffi_index_style = chrs.condition; break; default: r_stop_unreachable(); } return vctrs_dispatch5(syms_vec_assign_fallback, fns_vec_assign_fallback, syms_x, x, syms_i, index, syms_slice_value, ffi_slice_value, syms_index_style, ffi_index_style, syms_value, value); } static r_obj* vec_proxy_assign_names( r_obj* proxy, r_obj* index, r_obj* value_proxy, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { // Both of these inputs are assumed to be proxied already r_obj* proxy_nms = KEEP(vec_proxy_names(proxy)); r_obj* value_nms = KEEP(vec_proxy_names(value_proxy)); if (proxy_nms == r_null && value_nms == r_null) { // No names at all to worry about FREE(2); return proxy; } if (value_nms == r_null) { // If there are `proxy_nms` but no `value_nms`, we need clear any names at // this `index`. This also clears when we freshly create `proxy_nms` which // is a no-op, but that's okay, it keeps the logic simple. value_nms = r_chrs.empty_string; } if (proxy_nms == r_null) { proxy_nms = KEEP(r_alloc_character(vec_size(proxy))); } else { proxy_nms = KEEP(vec_clone_referenced(proxy_nms, ownership)); } proxy_nms = KEEP(chr_assign( proxy_nms, index, value_nms, ownership, slice_value, index_style )); proxy = KEEP(vec_clone_referenced(proxy, ownership)); proxy = vec_proxy_set_names(proxy, proxy_nms, ownership); FREE(5); return proxy; } // Helper for determining if we have a logical "condition" index we can optimize // via `VCTRS_INDEX_STYLE_condition`. Otherwise we use `vec_as_location()` and // convert to integer locations. // // Optimization avoids a `which(i)` conversion in `vec_as_location()`, // which helps in two ways: // - We don't allocate an integer vector of locations where the vector is `TRUE` // - We don't perform extra passes through `i`, typically a `which()` call requires // 2 passes over `i` // // Restrictions: // - Must be logical // - Can't be an array // - Can't be an object (objects go through `vec_as_location()` casting) // // Notably allowed: // - Can have other attributes, including names // // Caller typically also enforces a size check. bool is_condition_index(r_obj* index) { if (r_typeof(index) != R_TYPE_logical) { return false; } if (has_dim(index)) { return false; } if (r_is_object(index)) { return false; } return true; } void check_condition_index( r_obj* x, struct vctrs_arg* p_x_arg, struct r_lazy call ) { if (!is_condition_index(x)) { r_abort_lazy_call( call, "%s must be a logical vector, not %s.", vec_arg_format(p_x_arg), r_obj_type_friendly(x) ); } } void list_check_all_condition_indices( r_obj* xs, struct vctrs_arg* p_xs_arg, struct r_lazy call ) { if (r_typeof(xs) != R_TYPE_list) { r_stop_unexpected_type(r_typeof(xs)); } r_ssize i = 0; const r_ssize xs_size = r_length(xs); r_obj* xs_names = KEEP(r_names(xs)); r_obj* const* v_xs = r_list_cbegin(xs); struct vctrs_arg* p_x_arg = new_subscript_arg(p_xs_arg, xs_names, xs_size, &i); KEEP(p_x_arg->shelter); for (; i < xs_size; ++i) { check_condition_index(v_xs[i], p_x_arg, call); } FREE(2); } // Cheap internal checks done right before assignment to avoid R crashes in corrupt cases void check_assign_sizes( r_obj* x, r_obj* index, r_ssize value_size, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { switch (index_style) { case VCTRS_INDEX_STYLE_location: { switch (slice_value) { case ASSIGNMENT_SLICE_VALUE_no: { if (value_size != 1 && value_size != vec_subscript_size(index)) { r_stop_internal("`value` should have been recycled to match `index`."); } return; } case ASSIGNMENT_SLICE_VALUE_yes: { if (value_size != 1 && value_size != vec_size(x)) { r_stop_internal("`value` should have been recycled to match `x`."); } return; } default: r_stop_unreachable(); } } case VCTRS_INDEX_STYLE_condition: { switch (slice_value) { case ASSIGNMENT_SLICE_VALUE_no: { // In theory, we'd check `value_size` against the number of `TRUE` // values in `index`, but this is too expensive, so we rely on // the caller to check this. return; } case ASSIGNMENT_SLICE_VALUE_yes: { if (value_size != 1 && value_size != vec_size(x)) { r_stop_internal("`value` should have been recycled to match `x`."); } return; } default: r_stop_unreachable(); } } default: r_stop_unreachable(); } } // Checks that `value` has a compatible size with this `index` // // Note that `index` must have already been converted to positive integer indices // with `vec_as_location()`, because that can change its size. // // Note that `index` can be a `compact_seq()`, so we need `vec_subscript_size()`. // // `size` is the total output size. void check_recyclable_against_index( r_obj* value, r_obj* index, r_ssize size, enum assignment_slice_value slice_value, enum vctrs_index_style index_style, struct vctrs_arg* p_value_arg, struct r_lazy call ) { r_ssize check_size; switch (slice_value) { case ASSIGNMENT_SLICE_VALUE_no: { switch (index_style) { case VCTRS_INDEX_STYLE_location: check_size = vec_subscript_size(index); break; case VCTRS_INDEX_STYLE_condition: check_size = vec_condition_subscript_sum(index, true); break; default: r_stop_unreachable(); } break; } case ASSIGNMENT_SLICE_VALUE_yes: check_size = size; break; default: r_stop_unreachable(); } vec_check_recyclable(value, check_size, VCTRS_ALLOW_NULL_no, p_value_arg, call); } // Exported for testing // [[ register() ]] r_obj* ffi_assign_seq( r_obj* x, r_obj* value, r_obj* ffi_start, r_obj* ffi_size, r_obj* ffi_increasing, r_obj* ffi_slice_value ) { r_ssize start = r_int_get(ffi_start, 0); r_ssize size = r_int_get(ffi_size, 0); bool increasing = r_lgl_get(ffi_increasing, 0); struct r_lazy call = lazy_calls.vec_assign_seq; const enum assignment_slice_value slice_value = r_arg_as_bool(ffi_slice_value, "slice_value") ? ASSIGNMENT_SLICE_VALUE_yes : ASSIGNMENT_SLICE_VALUE_no; r_obj* index = KEEP(compact_seq(start, size, increasing)); const enum vctrs_index_style index_style = VCTRS_INDEX_STYLE_location; // Comes from the R side, so not owned, and not proxying recursively const struct vec_proxy_assign_opts assign_opts = { .x_arg = vec_args.x, .value_arg = vec_args.value, .call = call, .slice_value = slice_value, .index_style = index_style, .ownership = VCTRS_OWNERSHIP_foreign, .recursively_proxied = false }; struct vec_restore_opts restore_opts = { .ownership = VCTRS_OWNERSHIP_foreign, .recursively_proxied = false }; const r_ssize x_size = vec_size(x); // Cast `value` and check that it can recycle value = KEEP(vec_cast(value, x, vec_args.value, vec_args.x, call)); check_recyclable_against_index( value, index, x_size, assign_opts.slice_value, assign_opts.index_style, assign_opts.value_arg, assign_opts.call ); r_obj* proxy = KEEP(vec_proxy(x)); proxy = KEEP(vec_proxy_assign_opts(proxy, index, value, &assign_opts)); r_obj* out = vec_restore_opts(proxy, x, &restore_opts); FREE(4); return out; } // Exported for testing // [[ register() ]] r_obj* ffi_assign_compact_condition( r_obj* x, r_obj* index, r_obj* value, r_obj* ffi_slice_value ) { struct r_lazy call = r_lazy_null; const enum assignment_slice_value slice_value = r_arg_as_bool(ffi_slice_value, "slice_value") ? ASSIGNMENT_SLICE_VALUE_yes : ASSIGNMENT_SLICE_VALUE_no; if (!is_compact_condition(index)) { r_stop_internal("`index` must be a `compact_condition`."); } const enum vctrs_index_style index_style = VCTRS_INDEX_STYLE_condition; // Comes from the R side, so not owned, and not proxying recursively const struct vec_proxy_assign_opts assign_opts = { .x_arg = vec_args.x, .value_arg = vec_args.value, .call = call, .slice_value = slice_value, .index_style = index_style, .ownership = VCTRS_OWNERSHIP_foreign, .recursively_proxied = false }; struct vec_restore_opts restore_opts = { .ownership = VCTRS_OWNERSHIP_foreign, .recursively_proxied = false }; const r_ssize x_size = vec_size(x); // Cast `value` and check that it can recycle value = KEEP(vec_cast(value, x, vec_args.value, vec_args.x, call)); check_recyclable_against_index( value, index, x_size, assign_opts.slice_value, assign_opts.index_style, assign_opts.value_arg, assign_opts.call ); r_obj* proxy = KEEP(vec_proxy(x)); proxy = KEEP(vec_proxy_assign_opts(proxy, index, value, &assign_opts)); r_obj* out = vec_restore_opts(proxy, x, &restore_opts); FREE(3); return out; } void vctrs_init_slice_assign(r_obj* ns) { syms_vec_assign_fallback = r_sym("vec_assign_fallback"); fns_vec_assign_fallback = r_eval(syms_vec_assign_fallback, ns); } static r_obj* syms_vec_assign_fallback = NULL; static r_obj* fns_vec_assign_fallback = NULL; vctrs/src/ptype.h0000644000176200001440000000034615113335375013525 0ustar liggesusers#ifndef VCTRS_PTYPE_H #define VCTRS_PTYPE_H #include "vctrs-core.h" r_obj* vec_ptype(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call); r_obj* vec_ptype_final(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call); #endif vctrs/src/altrep-rle.h0000644000176200001440000000122015113325071014413 0ustar liggesusers#ifndef ALTREP_RLE_H #define ALTREP_RLE_H #include "vctrs-core.h" #include "R_ext/Altrep.h" SEXP altrep_rle_Make(SEXP input); R_xlen_t altrep_rle_Length(SEXP vec); Rboolean altrep_rle_Inspect( SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int)); SEXP altrep_rle_string_Elt(SEXP vec, R_xlen_t i); SEXP altrep_rle_Extract_subset(SEXP x, SEXP indx, SEXP call); SEXP altrep_rle_string_Materialize(SEXP vec); void* altrep_rle_Dataptr(SEXP vec, Rboolean writeable); const void* altrep_rle_Dataptr_or_null(SEXP vec); void vctrs_init_altrep_rle(DllInfo* dll); extern R_altrep_class_t altrep_rle_class; #endif vctrs/src/typeof2-s3.h0000644000176200001440000001051614315060310014262 0ustar liggesusers#ifndef VCTRS_TYPEOF2_S3_H #define VCTRS_TYPEOF2_S3_H #include "vctrs-core.h" enum vctrs_type2_s3 { VCTRS_TYPE2_S3_null_bare_factor, VCTRS_TYPE2_S3_null_bare_ordered, VCTRS_TYPE2_S3_null_bare_date, VCTRS_TYPE2_S3_null_bare_posixct, VCTRS_TYPE2_S3_null_bare_posixlt, VCTRS_TYPE2_S3_null_bare_tibble, VCTRS_TYPE2_S3_null_unknown, VCTRS_TYPE2_S3_unspecified_bare_factor, VCTRS_TYPE2_S3_unspecified_bare_ordered, VCTRS_TYPE2_S3_unspecified_bare_date, VCTRS_TYPE2_S3_unspecified_bare_posixct, VCTRS_TYPE2_S3_unspecified_bare_posixlt, VCTRS_TYPE2_S3_unspecified_bare_tibble, VCTRS_TYPE2_S3_unspecified_unknown, VCTRS_TYPE2_S3_logical_bare_factor, VCTRS_TYPE2_S3_logical_bare_ordered, VCTRS_TYPE2_S3_logical_bare_date, VCTRS_TYPE2_S3_logical_bare_posixct, VCTRS_TYPE2_S3_logical_bare_posixlt, VCTRS_TYPE2_S3_logical_bare_tibble, VCTRS_TYPE2_S3_logical_unknown, VCTRS_TYPE2_S3_integer_bare_factor, VCTRS_TYPE2_S3_integer_bare_ordered, VCTRS_TYPE2_S3_integer_bare_date, VCTRS_TYPE2_S3_integer_bare_posixct, VCTRS_TYPE2_S3_integer_bare_posixlt, VCTRS_TYPE2_S3_integer_bare_tibble, VCTRS_TYPE2_S3_integer_unknown, VCTRS_TYPE2_S3_double_bare_factor, VCTRS_TYPE2_S3_double_bare_ordered, VCTRS_TYPE2_S3_double_bare_date, VCTRS_TYPE2_S3_double_bare_posixct, VCTRS_TYPE2_S3_double_bare_posixlt, VCTRS_TYPE2_S3_double_bare_tibble, VCTRS_TYPE2_S3_double_unknown, VCTRS_TYPE2_S3_complex_bare_factor, VCTRS_TYPE2_S3_complex_bare_ordered, VCTRS_TYPE2_S3_complex_bare_date, VCTRS_TYPE2_S3_complex_bare_posixct, VCTRS_TYPE2_S3_complex_bare_posixlt, VCTRS_TYPE2_S3_complex_bare_tibble, VCTRS_TYPE2_S3_complex_unknown, VCTRS_TYPE2_S3_character_bare_factor, VCTRS_TYPE2_S3_character_bare_ordered, VCTRS_TYPE2_S3_character_bare_date, VCTRS_TYPE2_S3_character_bare_posixct, VCTRS_TYPE2_S3_character_bare_posixlt, VCTRS_TYPE2_S3_character_bare_tibble, VCTRS_TYPE2_S3_character_unknown, VCTRS_TYPE2_S3_raw_bare_factor, VCTRS_TYPE2_S3_raw_bare_ordered, VCTRS_TYPE2_S3_raw_bare_date, VCTRS_TYPE2_S3_raw_bare_posixct, VCTRS_TYPE2_S3_raw_bare_posixlt, VCTRS_TYPE2_S3_raw_bare_tibble, VCTRS_TYPE2_S3_raw_unknown, VCTRS_TYPE2_S3_list_bare_factor, VCTRS_TYPE2_S3_list_bare_ordered, VCTRS_TYPE2_S3_list_bare_date, VCTRS_TYPE2_S3_list_bare_posixct, VCTRS_TYPE2_S3_list_bare_posixlt, VCTRS_TYPE2_S3_list_bare_tibble, VCTRS_TYPE2_S3_list_unknown, VCTRS_TYPE2_S3_dataframe_bare_factor, VCTRS_TYPE2_S3_dataframe_bare_ordered, VCTRS_TYPE2_S3_dataframe_bare_date, VCTRS_TYPE2_S3_dataframe_bare_posixct, VCTRS_TYPE2_S3_dataframe_bare_posixlt, VCTRS_TYPE2_S3_dataframe_bare_tibble, VCTRS_TYPE2_S3_dataframe_unknown, VCTRS_TYPE2_S3_scalar_bare_factor, VCTRS_TYPE2_S3_scalar_bare_ordered, VCTRS_TYPE2_S3_scalar_bare_date, VCTRS_TYPE2_S3_scalar_bare_posixct, VCTRS_TYPE2_S3_scalar_bare_posixlt, VCTRS_TYPE2_S3_scalar_bare_tibble, VCTRS_TYPE2_S3_scalar_unknown, VCTRS_TYPE2_S3_bare_factor_bare_factor, VCTRS_TYPE2_S3_bare_factor_bare_ordered, VCTRS_TYPE2_S3_bare_factor_bare_date, VCTRS_TYPE2_S3_bare_factor_bare_posixct, VCTRS_TYPE2_S3_bare_factor_bare_posixlt, VCTRS_TYPE2_S3_bare_factor_bare_tibble, VCTRS_TYPE2_S3_bare_factor_unknown, VCTRS_TYPE2_S3_bare_ordered_bare_ordered, VCTRS_TYPE2_S3_bare_ordered_bare_date, VCTRS_TYPE2_S3_bare_ordered_bare_posixct, VCTRS_TYPE2_S3_bare_ordered_bare_posixlt, VCTRS_TYPE2_S3_bare_ordered_bare_tibble, VCTRS_TYPE2_S3_bare_ordered_unknown, VCTRS_TYPE2_S3_bare_date_bare_date, VCTRS_TYPE2_S3_bare_date_bare_posixct, VCTRS_TYPE2_S3_bare_date_bare_posixlt, VCTRS_TYPE2_S3_bare_date_bare_tibble, VCTRS_TYPE2_S3_bare_date_unknown, VCTRS_TYPE2_S3_bare_posixct_bare_posixct, VCTRS_TYPE2_S3_bare_posixct_bare_posixlt, VCTRS_TYPE2_S3_bare_posixct_bare_tibble, VCTRS_TYPE2_S3_bare_posixct_unknown, VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt, VCTRS_TYPE2_S3_bare_posixlt_bare_tibble, VCTRS_TYPE2_S3_bare_posixlt_unknown, VCTRS_TYPE2_S3_bare_tibble_bare_tibble, VCTRS_TYPE2_S3_bare_tibble_unknown, VCTRS_TYPE2_S3_unknown_unknown }; enum vctrs_type2_s3 vec_typeof2_s3_impl(r_obj* x, r_obj* y, enum vctrs_type type_x, enum vctrs_type type_y, int* left); #endif vctrs/src/typeof2-s3.c0000644000176200001440000007525614315060310014271 0ustar liggesusers#include "vctrs.h" #include "decl/typeof2-s3-decl.h" enum vctrs_type2_s3 vec_typeof2_s3_impl(r_obj* x, r_obj* y, enum vctrs_type type_x, enum vctrs_type type_y, int* left) { switch (type_x) { case VCTRS_TYPE_null: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_null_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_null_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_null_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_null_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_null_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_null_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_null_unknown; } } case VCTRS_TYPE_unspecified: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_unspecified_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_unspecified_unknown; } } case VCTRS_TYPE_logical: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_logical_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_logical_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_logical_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_logical_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_logical_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_logical_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_logical_unknown; } } case VCTRS_TYPE_integer: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_integer_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_integer_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_integer_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_integer_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_integer_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_integer_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_integer_unknown; } } case VCTRS_TYPE_double: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_double_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_double_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_double_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_double_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_double_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_double_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_double_unknown; } } case VCTRS_TYPE_complex: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_complex_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_complex_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_complex_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_complex_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_complex_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_complex_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_complex_unknown; } } case VCTRS_TYPE_character: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_character_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_character_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_character_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_character_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_character_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_character_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_character_unknown; } } case VCTRS_TYPE_raw: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_raw_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_raw_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_raw_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_raw_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_raw_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_raw_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_raw_unknown; } } case VCTRS_TYPE_list: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_list_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_list_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_list_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_list_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_list_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_list_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_list_unknown; } } case VCTRS_TYPE_dataframe: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_dataframe_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_dataframe_unknown; } } case VCTRS_TYPE_scalar: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_scalar_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_scalar_unknown; } } case VCTRS_TYPE_s3: { return vec_typeof2_s3_impl2(x, y, type_y, left); }} r_stop_unreachable(); } static enum vctrs_type2_s3 vec_typeof2_s3_impl2(r_obj* x, r_obj* y, enum vctrs_type type_y, int* left) { switch (class_type(x)) { case VCTRS_CLASS_bare_factor: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_factor; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_factor; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_factor; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_factor; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_factor; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_factor; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_factor; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_factor; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_factor; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_factor; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_factor; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = -1; return VCTRS_TYPE2_S3_bare_factor_bare_factor; case VCTRS_CLASS_bare_ordered: *left = 0; return VCTRS_TYPE2_S3_bare_factor_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_bare_factor_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_bare_factor_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_bare_factor_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_bare_factor_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_factor_unknown; } }} } case VCTRS_CLASS_bare_ordered: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_ordered; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_ordered; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_ordered; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_ordered; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_ordered; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_ordered; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_ordered; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_ordered; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_ordered; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_ordered; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_ordered; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_bare_ordered; case VCTRS_CLASS_bare_ordered: *left = -1; return VCTRS_TYPE2_S3_bare_ordered_bare_ordered; case VCTRS_CLASS_bare_date: *left = 0; return VCTRS_TYPE2_S3_bare_ordered_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_bare_ordered_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_bare_ordered_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_bare_ordered_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_ordered_unknown; } }} } case VCTRS_CLASS_bare_date: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_date; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_date; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_date; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_date; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_date; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_date; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_date; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_date; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_date; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_date; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_date; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_bare_date; case VCTRS_CLASS_bare_ordered: *left = 1; return VCTRS_TYPE2_S3_bare_ordered_bare_date; case VCTRS_CLASS_bare_date: *left = -1; return VCTRS_TYPE2_S3_bare_date_bare_date; case VCTRS_CLASS_bare_posixct: *left = 0; return VCTRS_TYPE2_S3_bare_date_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_bare_date_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_bare_date_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_date_unknown; } }} } case VCTRS_CLASS_bare_posixct: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_posixct; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_posixct; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_posixct; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_posixct; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_posixct; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_posixct; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_posixct; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_posixct; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_posixct; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_posixct; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_posixct; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_bare_posixct; case VCTRS_CLASS_bare_ordered: *left = 1; return VCTRS_TYPE2_S3_bare_ordered_bare_posixct; case VCTRS_CLASS_bare_date: *left = 1; return VCTRS_TYPE2_S3_bare_date_bare_posixct; case VCTRS_CLASS_bare_posixct: *left = -1; return VCTRS_TYPE2_S3_bare_posixct_bare_posixct; case VCTRS_CLASS_bare_posixlt: *left = 0; return VCTRS_TYPE2_S3_bare_posixct_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_bare_posixct_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_posixct_unknown; } }} } case VCTRS_CLASS_bare_posixlt: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_posixlt; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_posixlt; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_posixlt; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_posixlt; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_posixlt; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_posixlt; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_posixlt; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_posixlt; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_posixlt; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_posixlt; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_posixlt; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_bare_posixlt; case VCTRS_CLASS_bare_ordered: *left = 1; return VCTRS_TYPE2_S3_bare_ordered_bare_posixlt; case VCTRS_CLASS_bare_date: *left = 1; return VCTRS_TYPE2_S3_bare_date_bare_posixlt; case VCTRS_CLASS_bare_posixct: *left = 1; return VCTRS_TYPE2_S3_bare_posixct_bare_posixlt; case VCTRS_CLASS_bare_posixlt: *left = -1; return VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt; case VCTRS_CLASS_bare_tibble: *left = 0; return VCTRS_TYPE2_S3_bare_posixlt_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_posixlt_unknown; } }} } case VCTRS_CLASS_bare_tibble: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_bare_tibble; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_bare_tibble; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_bare_tibble; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_bare_tibble; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_bare_tibble; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_bare_tibble; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_bare_tibble; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_bare_tibble; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_bare_tibble; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_bare_tibble; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_bare_tibble; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_bare_tibble; case VCTRS_CLASS_bare_ordered: *left = 1; return VCTRS_TYPE2_S3_bare_ordered_bare_tibble; case VCTRS_CLASS_bare_date: *left = 1; return VCTRS_TYPE2_S3_bare_date_bare_tibble; case VCTRS_CLASS_bare_posixct: *left = 1; return VCTRS_TYPE2_S3_bare_posixct_bare_tibble; case VCTRS_CLASS_bare_posixlt: *left = 1; return VCTRS_TYPE2_S3_bare_posixlt_bare_tibble; case VCTRS_CLASS_bare_tibble: *left = -1; return VCTRS_TYPE2_S3_bare_tibble_bare_tibble; default: *left = 0; return VCTRS_TYPE2_S3_bare_tibble_unknown; } }} } default: { switch (type_y) { case VCTRS_TYPE_null: *left = 1; return VCTRS_TYPE2_S3_null_unknown; case VCTRS_TYPE_unspecified: *left = 1; return VCTRS_TYPE2_S3_unspecified_unknown; case VCTRS_TYPE_logical: *left = 1; return VCTRS_TYPE2_S3_logical_unknown; case VCTRS_TYPE_integer: *left = 1; return VCTRS_TYPE2_S3_integer_unknown; case VCTRS_TYPE_double: *left = 1; return VCTRS_TYPE2_S3_double_unknown; case VCTRS_TYPE_complex: *left = 1; return VCTRS_TYPE2_S3_complex_unknown; case VCTRS_TYPE_character: *left = 1; return VCTRS_TYPE2_S3_character_unknown; case VCTRS_TYPE_raw: *left = 1; return VCTRS_TYPE2_S3_raw_unknown; case VCTRS_TYPE_list: *left = 1; return VCTRS_TYPE2_S3_list_unknown; case VCTRS_TYPE_dataframe: *left = 1; return VCTRS_TYPE2_S3_dataframe_unknown; case VCTRS_TYPE_scalar: *left = 1; return VCTRS_TYPE2_S3_scalar_unknown; case VCTRS_TYPE_s3: { switch (class_type(y)) { case VCTRS_CLASS_bare_factor: *left = 1; return VCTRS_TYPE2_S3_bare_factor_unknown; case VCTRS_CLASS_bare_ordered: *left = 1; return VCTRS_TYPE2_S3_bare_ordered_unknown; case VCTRS_CLASS_bare_date: *left = 1; return VCTRS_TYPE2_S3_bare_date_unknown; case VCTRS_CLASS_bare_posixct: *left = 1; return VCTRS_TYPE2_S3_bare_posixct_unknown; case VCTRS_CLASS_bare_posixlt: *left = 1; return VCTRS_TYPE2_S3_bare_posixlt_unknown; case VCTRS_CLASS_bare_tibble: *left = 1; return VCTRS_TYPE2_S3_bare_tibble_unknown; default: *left = -1; return VCTRS_TYPE2_S3_unknown_unknown; } }} }} r_stop_unreachable(); } static enum vctrs_type2_s3 vec_typeof2_s3(r_obj* x, r_obj* y) { int _; return vec_typeof2_s3_impl(x, y, vec_typeof(x), vec_typeof(y), &_); } static const char* vctrs_type2_s3_as_str(enum vctrs_type2_s3 type) { switch (type) { case VCTRS_TYPE2_S3_null_bare_factor: return "VCTRS_TYPE2_S3_null_bare_factor"; case VCTRS_TYPE2_S3_null_bare_ordered: return "VCTRS_TYPE2_S3_null_bare_ordered"; case VCTRS_TYPE2_S3_null_bare_date: return "VCTRS_TYPE2_S3_null_bare_date"; case VCTRS_TYPE2_S3_null_bare_posixct: return "VCTRS_TYPE2_S3_null_bare_posixct"; case VCTRS_TYPE2_S3_null_bare_posixlt: return "VCTRS_TYPE2_S3_null_bare_posixlt"; case VCTRS_TYPE2_S3_null_bare_tibble: return "VCTRS_TYPE2_S3_null_bare_tibble"; case VCTRS_TYPE2_S3_null_unknown: return "VCTRS_TYPE2_S3_null_unknown"; case VCTRS_TYPE2_S3_unspecified_bare_factor: return "VCTRS_TYPE2_S3_unspecified_bare_factor"; case VCTRS_TYPE2_S3_unspecified_bare_ordered: return "VCTRS_TYPE2_S3_unspecified_bare_ordered"; case VCTRS_TYPE2_S3_unspecified_bare_date: return "VCTRS_TYPE2_S3_unspecified_bare_date"; case VCTRS_TYPE2_S3_unspecified_bare_posixct: return "VCTRS_TYPE2_S3_unspecified_bare_posixct"; case VCTRS_TYPE2_S3_unspecified_bare_posixlt: return "VCTRS_TYPE2_S3_unspecified_bare_posixlt"; case VCTRS_TYPE2_S3_unspecified_bare_tibble: return "VCTRS_TYPE2_S3_unspecified_bare_tibble"; case VCTRS_TYPE2_S3_unspecified_unknown: return "VCTRS_TYPE2_S3_unspecified_unknown"; case VCTRS_TYPE2_S3_logical_bare_factor: return "VCTRS_TYPE2_S3_logical_bare_factor"; case VCTRS_TYPE2_S3_logical_bare_ordered: return "VCTRS_TYPE2_S3_logical_bare_ordered"; case VCTRS_TYPE2_S3_logical_bare_date: return "VCTRS_TYPE2_S3_logical_bare_date"; case VCTRS_TYPE2_S3_logical_bare_posixct: return "VCTRS_TYPE2_S3_logical_bare_posixct"; case VCTRS_TYPE2_S3_logical_bare_posixlt: return "VCTRS_TYPE2_S3_logical_bare_posixlt"; case VCTRS_TYPE2_S3_logical_bare_tibble: return "VCTRS_TYPE2_S3_logical_bare_tibble"; case VCTRS_TYPE2_S3_logical_unknown: return "VCTRS_TYPE2_S3_logical_unknown"; case VCTRS_TYPE2_S3_integer_bare_factor: return "VCTRS_TYPE2_S3_integer_bare_factor"; case VCTRS_TYPE2_S3_integer_bare_ordered: return "VCTRS_TYPE2_S3_integer_bare_ordered"; case VCTRS_TYPE2_S3_integer_bare_date: return "VCTRS_TYPE2_S3_integer_bare_date"; case VCTRS_TYPE2_S3_integer_bare_posixct: return "VCTRS_TYPE2_S3_integer_bare_posixct"; case VCTRS_TYPE2_S3_integer_bare_posixlt: return "VCTRS_TYPE2_S3_integer_bare_posixlt"; case VCTRS_TYPE2_S3_integer_bare_tibble: return "VCTRS_TYPE2_S3_integer_bare_tibble"; case VCTRS_TYPE2_S3_integer_unknown: return "VCTRS_TYPE2_S3_integer_unknown"; case VCTRS_TYPE2_S3_double_bare_factor: return "VCTRS_TYPE2_S3_double_bare_factor"; case VCTRS_TYPE2_S3_double_bare_ordered: return "VCTRS_TYPE2_S3_double_bare_ordered"; case VCTRS_TYPE2_S3_double_bare_date: return "VCTRS_TYPE2_S3_double_bare_date"; case VCTRS_TYPE2_S3_double_bare_posixct: return "VCTRS_TYPE2_S3_double_bare_posixct"; case VCTRS_TYPE2_S3_double_bare_posixlt: return "VCTRS_TYPE2_S3_double_bare_posixlt"; case VCTRS_TYPE2_S3_double_bare_tibble: return "VCTRS_TYPE2_S3_double_bare_tibble"; case VCTRS_TYPE2_S3_double_unknown: return "VCTRS_TYPE2_S3_double_unknown"; case VCTRS_TYPE2_S3_complex_bare_factor: return "VCTRS_TYPE2_S3_complex_bare_factor"; case VCTRS_TYPE2_S3_complex_bare_ordered: return "VCTRS_TYPE2_S3_complex_bare_ordered"; case VCTRS_TYPE2_S3_complex_bare_date: return "VCTRS_TYPE2_S3_complex_bare_date"; case VCTRS_TYPE2_S3_complex_bare_posixct: return "VCTRS_TYPE2_S3_complex_bare_posixct"; case VCTRS_TYPE2_S3_complex_bare_posixlt: return "VCTRS_TYPE2_S3_complex_bare_posixlt"; case VCTRS_TYPE2_S3_complex_bare_tibble: return "VCTRS_TYPE2_S3_complex_bare_tibble"; case VCTRS_TYPE2_S3_complex_unknown: return "VCTRS_TYPE2_S3_complex_unknown"; case VCTRS_TYPE2_S3_character_bare_factor: return "VCTRS_TYPE2_S3_character_bare_factor"; case VCTRS_TYPE2_S3_character_bare_ordered: return "VCTRS_TYPE2_S3_character_bare_ordered"; case VCTRS_TYPE2_S3_character_bare_date: return "VCTRS_TYPE2_S3_character_bare_date"; case VCTRS_TYPE2_S3_character_bare_posixct: return "VCTRS_TYPE2_S3_character_bare_posixct"; case VCTRS_TYPE2_S3_character_bare_posixlt: return "VCTRS_TYPE2_S3_character_bare_posixlt"; case VCTRS_TYPE2_S3_character_bare_tibble: return "VCTRS_TYPE2_S3_character_bare_tibble"; case VCTRS_TYPE2_S3_character_unknown: return "VCTRS_TYPE2_S3_character_unknown"; case VCTRS_TYPE2_S3_raw_bare_factor: return "VCTRS_TYPE2_S3_raw_bare_factor"; case VCTRS_TYPE2_S3_raw_bare_ordered: return "VCTRS_TYPE2_S3_raw_bare_ordered"; case VCTRS_TYPE2_S3_raw_bare_date: return "VCTRS_TYPE2_S3_raw_bare_date"; case VCTRS_TYPE2_S3_raw_bare_posixct: return "VCTRS_TYPE2_S3_raw_bare_posixct"; case VCTRS_TYPE2_S3_raw_bare_posixlt: return "VCTRS_TYPE2_S3_raw_bare_posixlt"; case VCTRS_TYPE2_S3_raw_bare_tibble: return "VCTRS_TYPE2_S3_raw_bare_tibble"; case VCTRS_TYPE2_S3_raw_unknown: return "VCTRS_TYPE2_S3_raw_unknown"; case VCTRS_TYPE2_S3_list_bare_factor: return "VCTRS_TYPE2_S3_list_bare_factor"; case VCTRS_TYPE2_S3_list_bare_ordered: return "VCTRS_TYPE2_S3_list_bare_ordered"; case VCTRS_TYPE2_S3_list_bare_date: return "VCTRS_TYPE2_S3_list_bare_date"; case VCTRS_TYPE2_S3_list_bare_posixct: return "VCTRS_TYPE2_S3_list_bare_posixct"; case VCTRS_TYPE2_S3_list_bare_posixlt: return "VCTRS_TYPE2_S3_list_bare_posixlt"; case VCTRS_TYPE2_S3_list_bare_tibble: return "VCTRS_TYPE2_S3_list_bare_tibble"; case VCTRS_TYPE2_S3_list_unknown: return "VCTRS_TYPE2_S3_list_unknown"; case VCTRS_TYPE2_S3_dataframe_bare_factor: return "VCTRS_TYPE2_S3_dataframe_bare_factor"; case VCTRS_TYPE2_S3_dataframe_bare_ordered: return "VCTRS_TYPE2_S3_dataframe_bare_ordered"; case VCTRS_TYPE2_S3_dataframe_bare_date: return "VCTRS_TYPE2_S3_dataframe_bare_date"; case VCTRS_TYPE2_S3_dataframe_bare_posixct: return "VCTRS_TYPE2_S3_dataframe_bare_posixct"; case VCTRS_TYPE2_S3_dataframe_bare_posixlt: return "VCTRS_TYPE2_S3_dataframe_bare_posixlt"; case VCTRS_TYPE2_S3_dataframe_bare_tibble: return "VCTRS_TYPE2_S3_dataframe_bare_tibble"; case VCTRS_TYPE2_S3_dataframe_unknown: return "VCTRS_TYPE2_S3_dataframe_unknown"; case VCTRS_TYPE2_S3_scalar_bare_factor: return "VCTRS_TYPE2_S3_scalar_bare_factor"; case VCTRS_TYPE2_S3_scalar_bare_ordered: return "VCTRS_TYPE2_S3_scalar_bare_ordered"; case VCTRS_TYPE2_S3_scalar_bare_date: return "VCTRS_TYPE2_S3_scalar_bare_date"; case VCTRS_TYPE2_S3_scalar_bare_posixct: return "VCTRS_TYPE2_S3_scalar_bare_posixct"; case VCTRS_TYPE2_S3_scalar_bare_posixlt: return "VCTRS_TYPE2_S3_scalar_bare_posixlt"; case VCTRS_TYPE2_S3_scalar_bare_tibble: return "VCTRS_TYPE2_S3_scalar_bare_tibble"; case VCTRS_TYPE2_S3_scalar_unknown: return "VCTRS_TYPE2_S3_scalar_unknown"; case VCTRS_TYPE2_S3_bare_factor_bare_factor: return "VCTRS_TYPE2_S3_bare_factor_bare_factor"; case VCTRS_TYPE2_S3_bare_factor_bare_ordered: return "VCTRS_TYPE2_S3_bare_factor_bare_ordered"; case VCTRS_TYPE2_S3_bare_factor_bare_date: return "VCTRS_TYPE2_S3_bare_factor_bare_date"; case VCTRS_TYPE2_S3_bare_factor_bare_posixct: return "VCTRS_TYPE2_S3_bare_factor_bare_posixct"; case VCTRS_TYPE2_S3_bare_factor_bare_posixlt: return "VCTRS_TYPE2_S3_bare_factor_bare_posixlt"; case VCTRS_TYPE2_S3_bare_factor_bare_tibble: return "VCTRS_TYPE2_S3_bare_factor_bare_tibble"; case VCTRS_TYPE2_S3_bare_factor_unknown: return "VCTRS_TYPE2_S3_bare_factor_unknown"; case VCTRS_TYPE2_S3_bare_ordered_bare_ordered: return "VCTRS_TYPE2_S3_bare_ordered_bare_ordered"; case VCTRS_TYPE2_S3_bare_ordered_bare_date: return "VCTRS_TYPE2_S3_bare_ordered_bare_date"; case VCTRS_TYPE2_S3_bare_ordered_bare_posixct: return "VCTRS_TYPE2_S3_bare_ordered_bare_posixct"; case VCTRS_TYPE2_S3_bare_ordered_bare_posixlt: return "VCTRS_TYPE2_S3_bare_ordered_bare_posixlt"; case VCTRS_TYPE2_S3_bare_ordered_bare_tibble: return "VCTRS_TYPE2_S3_bare_ordered_bare_tibble"; case VCTRS_TYPE2_S3_bare_ordered_unknown: return "VCTRS_TYPE2_S3_bare_ordered_unknown"; case VCTRS_TYPE2_S3_bare_date_bare_date: return "VCTRS_TYPE2_S3_bare_date_bare_date"; case VCTRS_TYPE2_S3_bare_date_bare_posixct: return "VCTRS_TYPE2_S3_bare_date_bare_posixct"; case VCTRS_TYPE2_S3_bare_date_bare_posixlt: return "VCTRS_TYPE2_S3_bare_date_bare_posixlt"; case VCTRS_TYPE2_S3_bare_date_bare_tibble: return "VCTRS_TYPE2_S3_bare_date_bare_tibble"; case VCTRS_TYPE2_S3_bare_date_unknown: return "VCTRS_TYPE2_S3_bare_date_unknown"; case VCTRS_TYPE2_S3_bare_posixct_bare_posixct: return "VCTRS_TYPE2_S3_bare_posixct_bare_posixct"; case VCTRS_TYPE2_S3_bare_posixct_bare_posixlt: return "VCTRS_TYPE2_S3_bare_posixct_bare_posixlt"; case VCTRS_TYPE2_S3_bare_posixct_bare_tibble: return "VCTRS_TYPE2_S3_bare_posixct_bare_tibble"; case VCTRS_TYPE2_S3_bare_posixct_unknown: return "VCTRS_TYPE2_S3_bare_posixct_unknown"; case VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt: return "VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt"; case VCTRS_TYPE2_S3_bare_posixlt_bare_tibble: return "VCTRS_TYPE2_S3_bare_posixlt_bare_tibble"; case VCTRS_TYPE2_S3_bare_posixlt_unknown: return "VCTRS_TYPE2_S3_bare_posixlt_unknown"; case VCTRS_TYPE2_S3_bare_tibble_bare_tibble: return "VCTRS_TYPE2_S3_bare_tibble_bare_tibble"; case VCTRS_TYPE2_S3_bare_tibble_unknown: return "VCTRS_TYPE2_S3_bare_tibble_unknown"; case VCTRS_TYPE2_S3_unknown_unknown: return "VCTRS_TYPE2_S3_unknown_unknown"; } r_stop_unreachable(); } r_obj* ffi_typeof2_s3(r_obj* x, r_obj* y) { enum vctrs_type2_s3 type = vec_typeof2_s3(x, y); return r_chr(vctrs_type2_s3_as_str(type)); } vctrs/src/slice.h0000644000176200001440000000200315057550670013457 0ustar liggesusers#ifndef VCTRS_SLICE_H #define VCTRS_SLICE_H #include "vctrs-core.h" struct vec_slice_opts { struct vctrs_arg* x_arg; struct vctrs_arg* i_arg; struct r_lazy call; }; enum vctrs_materialize { VCTRS_MATERIALIZE_false = 0, VCTRS_MATERIALIZE_true }; r_obj* vec_slice_opts(r_obj* x, r_obj* i, const struct vec_slice_opts* opts); static inline r_obj* vec_slice(r_obj* x, r_obj* i) { const struct vec_slice_opts opts = { 0 }; return vec_slice_opts(x, i, &opts); } r_obj* vec_reverse(r_obj* x); r_obj* vec_init(r_obj* x, r_ssize n); r_obj* vec_slice_unsafe(r_obj* x, r_obj* i); r_obj* vec_slice_base(enum vctrs_type type, r_obj* x, r_obj* subscript, enum vctrs_materialize materialize); r_obj* slice_names(r_obj* names, r_obj* subscript); r_obj* slice_rownames(r_obj* names, r_obj* subscript); r_obj* vec_slice_fallback(r_obj* x, r_obj* subscript); bool vec_is_restored(r_obj* x, r_obj* to); #endif vctrs/src/rlang-dev.h0000644000176200001440000000051515036170003014226 0ustar liggesusers#ifndef VCTRS_RLANG_DEV_H #define VCTRS_RLANG_DEV_H #include static inline const char* r_c_str_format_error_arg(const char* x) { r_obj* ffi_x = KEEP(r_chr(x)); const char* out = r_format_error_arg(ffi_x); FREE(1); return out; } // vmax-protected result const char* r_obj_type_friendly_length(r_obj* x); #endif vctrs/src/vctrs-core.h0000644000176200001440000001136415156001116014443 0ustar liggesusers#ifndef VCTRS_CORE_H #define VCTRS_CORE_H #include // IWYU pragma: export #include "globals.h" // IWYU pragma: export #include "rlang-dev.h" // IWYU pragma: export #include "type-info.h" // IWYU pragma: export #include #include #include extern bool vctrs_debug_verbose; #define VCTRS_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)])) // An ERR indicates either a C NULL in case of no error, or a // condition object otherwise #define ERR SEXP // Generic swap macro #define SWAP(T, x, y) do { \ T tmp = x; \ x = y; \ y = tmp; \ } while (0) struct r_ssize_int_pair { r_ssize x; int y; }; /** * Ownership modeling * * Shallow and deep ownership imply that "we" own the object, and is not * dependent on the refcount in any way. * * Foreign ownership implies that R owns the object, and can only be modified in * place if the refcount is 0. */ enum vctrs_ownership { // No known ownership // // The object is "foreign" to us, typically meaning it came through FFI // (_foreign_ function interface) from the R side. // // If there are any references on this object, it will be cloned before being // modified, otherwise it will still be modified in place without cloning. VCTRS_OWNERSHIP_foreign, // Shallow ownership // - For atomics, we own the vector // - For lists (data frames), we own the list, but not the contents (columns) VCTRS_OWNERSHIP_shallow, // Deep ownership // We own the object recursively. Only used when we create it fully at C level. VCTRS_OWNERSHIP_deep }; /** * Index style * * - Location indices can be integer, character, or logical, but are ultimately * converted to positive integer locations by `vec_as_location()` before the * core assignment loop. * * - Condition indices are logical vectors the same size as `x`, where `TRUE` * denotes that you should assign to that spot. They are not converted to * integer locations before assignment. * * `vec_assign()` has separate optimized paths for each index style. * * TODO: `vec_slice()` should also have an optimized path for condition indices! * i.e. `vec_slice(x, )` should not call `vec_as_location()`. * * Condition indices are the inputs to `dplyr::if_else()` and `dplyr::case_when()`, * so having an optimized path for these is very helpful. */ enum vctrs_index_style { VCTRS_INDEX_STYLE_location, VCTRS_INDEX_STYLE_condition }; /** * Structure for argument tags * * Argument tags are used in error messages to provide information * about which elements of nested data structures (such as tibbles) * fail to match a given type. They are generated lazily by the `fill` * method in order to avoid any cost when there is no error. * * @member parent The previously active argument tag. * @member fill Takes a pointer to data, and a buffer to fill. If the * buffer is too small according to the `remaining` argument, * `fill()` must return a negative error value. */ struct vctrs_arg { r_obj* shelter; struct vctrs_arg* parent; r_ssize (*fill)(void* data, char* buf, r_ssize remaining); void* data; }; // Annex F of C99 specifies that `double` should conform to the IEEE 754 // type `binary64`, which is defined as: // * 1 bit : sign // * 11 bits: exponent // * 52 bits: significand // // R stores the value "1954" in the last 32 bits: this payload marks // the value as a NA, not a regular NaN. // // On big endian systems, this corresponds to the second element of an // integer array of size 2. On little endian systems, this is flipped // and the NA marker is in the first element. // // The type assumptions made here are asserted in `vctrs_init_utils()` #ifdef WORDS_BIGENDIAN static const int vctrs_indicator_pos = 1; #else static const int vctrs_indicator_pos = 0; #endif union vctrs_dbl_indicator { double value; // 8 bytes unsigned int key[2]; // 4 * 2 bytes }; enum vctrs_dbl { VCTRS_DBL_number, VCTRS_DBL_missing, VCTRS_DBL_nan }; // Inlining `dbl_classify()` greatly improves `vec_match()` performance // with doubles! static inline enum vctrs_dbl dbl_classify(double x) { if (!isnan(x)) { return VCTRS_DBL_number; } union vctrs_dbl_indicator indicator; indicator.value = x; if (indicator.key[vctrs_indicator_pos] == 1954) { return VCTRS_DBL_missing; } else { return VCTRS_DBL_nan; } } // Compatibility ------------------------------------------------ #if (R_VERSION < R_Version(4, 5, 0)) # define VECTOR_PTR_RO(x) ((const SEXP*) DATAPTR_RO(x)) #endif // https://github.com/wch/r-source/commit/38403c9c347dd5426da6009573b087188ec6be04 #if (R_VERSION < R_Version(4, 4, 0)) # ifdef LONG_VECTOR_SUPPORT # define R_PRIdXLEN_T "td" # else # define R_PRIdXLEN_T "d" # endif #endif #endif vctrs/src/ptype2.h0000644000176200001440000000133315127057357013612 0ustar liggesusers#ifndef VCTRS_PTYPE2_H #define VCTRS_PTYPE2_H #include "vctrs-core.h" // Sync with R constants in ptype2.R enum s3_fallback { S3_FALLBACK_false = 0, S3_FALLBACK_true = 1 }; r_obj* vec_ptype2( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback, int* left ); bool vec_is_coercible( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ); enum s3_fallback s3_fallback_from_opts(r_obj* opts); r_obj* vec_ptype_or_s3_fallback( r_obj* x, struct vctrs_arg* p_x_arg, enum vctrs_type x_type, struct r_lazy call, enum s3_fallback s3_fallback ); #endif vctrs/src/cast.c0000644000176200001440000002553415157322033013312 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/cast-decl.h" // [[ register() ]] r_obj* ffi_cast(r_obj* x, r_obj* to, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy to_arg_ = { .x = syms.to_arg, .env = frame }; struct vctrs_arg to_arg = new_lazy_arg(&to_arg_); struct r_lazy call = { .x = syms_call, .env = frame }; return vec_cast(x, to, &x_arg, &to_arg, call); } r_obj* vec_cast_opts(const struct cast_opts* opts) { r_obj* x = opts->x; r_obj* to = opts->to; struct vctrs_arg* p_x_arg = opts->p_x_arg; struct vctrs_arg* p_to_arg = opts->p_to_arg; struct r_lazy call = opts->call; if (x == r_null) { // Allow both `vec_cast(NULL, )` and `vec_cast(NULL, NULL)` obj_check_vector(to, VCTRS_ALLOW_NULL_yes, p_to_arg, call); return x; } if (to == r_null) { // Allow `vec_cast(, NULL)` obj_check_vector(x, VCTRS_ALLOW_NULL_no, p_x_arg, call); return x; } enum vctrs_type x_type = vec_typeof(x); enum vctrs_type to_type = vec_typeof(to); if (x_type == VCTRS_TYPE_unspecified) { return vec_init(to, vec_size(x)); } if (x_type == VCTRS_TYPE_scalar) { stop_scalar_type(x, p_x_arg, call); } if (to_type == VCTRS_TYPE_scalar) { stop_scalar_type(to, p_to_arg, call); } r_obj* out = r_null; bool lossy = false; if (to_type == VCTRS_TYPE_s3 || x_type == VCTRS_TYPE_s3) { out = KEEP(vec_cast_dispatch_native(opts, x_type, to_type, &lossy)); } else { out = KEEP(vec_cast_switch_native(opts, x_type, to_type, &lossy)); } if (lossy || out == r_null) { // This broadcasts dimensions too FREE(1); return vec_cast_dispatch_s3(opts); } if (has_dim(x) || has_dim(to)) { r_obj* x_dim = KEEP(r_dim(x)); r_obj* x_dim_names = KEEP(r_dim_names(x)); r_obj* out_dim = KEEP(r_dim(out)); r_obj* out_dim_names = KEEP(r_dim_names(out)); // Ensure `out` has the shape of `x`. // Native casting doesn't propagate shape. if (!obj_equal(out_dim, x_dim) || !obj_equal(out_dim_names, x_dim_names)) { out = KEEP(r_clone_referenced(out)); r_attrib_poke_dim(out, x_dim); r_attrib_poke_dim_names(out, x_dim_names); FREE(1); } KEEP(out); // Broadcast `out` to the shape of `to` out = vec_shape_broadcast(out, to, p_x_arg, p_to_arg, call); FREE(5); } FREE(1); return out; } static r_obj* vec_cast_switch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy) { r_obj* x = opts->x; int dir = 0; enum vctrs_type2 type2 = vec_typeof2_impl(x_type, to_type, &dir); switch (type2) { case VCTRS_TYPE2_logical_logical: case VCTRS_TYPE2_integer_integer: case VCTRS_TYPE2_double_double: case VCTRS_TYPE2_complex_complex: case VCTRS_TYPE2_raw_raw: case VCTRS_TYPE2_character_character: case VCTRS_TYPE2_list_list: return x; case VCTRS_TYPE2_logical_integer: if (dir == 0) { return lgl_as_integer(x, lossy); } else { return int_as_logical(x, lossy); } case VCTRS_TYPE2_logical_double: if (dir == 0) { return lgl_as_double(x, lossy); } else { return dbl_as_logical(x, lossy); } case VCTRS_TYPE2_integer_double: if (dir == 0) { return int_as_double(x, lossy); } else { return dbl_as_integer(x, lossy); } case VCTRS_TYPE2_dataframe_dataframe: return df_cast_opts(opts); default: break; } return r_null; } static inline r_obj* vec_cast_default_full(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call, enum s3_fallback s3_fallback, bool from_dispatch) { r_obj* ffi_s3_fallback = KEEP(r_int(s3_fallback)); r_obj* ffi_x_arg = KEEP(vctrs_arg(p_x_arg)); r_obj* ffi_to_arg = KEEP(vctrs_arg(p_to_arg)); r_obj* ffi_call = KEEP(r_lazy_eval(call)); r_obj* out = vctrs_eval_mask7(syms.vec_default_cast, syms_x, x, syms_to, to, syms_x_arg, ffi_x_arg, syms_to_arg, ffi_to_arg, syms_call, ffi_call, syms_from_dispatch, r_lgl(from_dispatch), syms_s3_fallback, ffi_s3_fallback); FREE(4); return out; } r_obj* vec_cast_default(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call, enum s3_fallback s3_fallback) { return vec_cast_default_full(x, to, p_x_arg, p_to_arg, call, s3_fallback, false); } static r_obj* vec_cast_dispatch_s3(const struct cast_opts* opts) { r_obj* x = opts->x; r_obj* to = opts->to; r_obj* method_sym = r_null; r_obj* method = s3_find_method_xy("vec_cast", to, x, vctrs_method_table, &method_sym); // Compatibility with legacy double dispatch mechanism if (method == r_null) { r_obj* to_method_sym = r_null; r_obj* to_method = KEEP(s3_find_method2("vec_cast", to, vctrs_method_table, &to_method_sym)); if (to_method != r_null) { // Only `to_method`s contained within a package will // have an S3 methods table to look in r_obj* to_table = s3_get_table(r_fn_env(to_method)); if (to_table != r_null) { const char* to_method_str = r_sym_c_string(to_method_sym); method = s3_find_method2( to_method_str, x, to_table, &method_sym ); } } FREE(1); } KEEP(method); if (method == r_null) { r_obj* out = vec_cast_default_full(x, to, opts->p_x_arg, opts->p_to_arg, opts->call, opts->s3_fallback, true); FREE(1); return out; } r_obj* r_x_arg = KEEP(vctrs_arg(opts->p_x_arg)); r_obj* r_to_arg = KEEP(vctrs_arg(opts->p_to_arg)); r_obj* out = vec_invoke_coerce_method(method_sym, method, syms_x, x, syms_to, to, syms_x_arg, r_x_arg, syms_to_arg, r_to_arg, opts->call, opts->s3_fallback); FREE(3); return out; } struct cast_err_data { const struct cast_opts* opts; r_obj* out; }; static void vec_cast_e_cb(void* data_) { struct cast_err_data* data = (struct cast_err_data*) data_; data->out = vec_cast_opts(data->opts); } r_obj* vec_cast_e(const struct cast_opts* opts, ERR* err) { struct cast_err_data data = { .opts = opts, .out = r_null }; *err = r_try_catch(&vec_cast_e_cb, &data, syms_vctrs_error_incompatible_type, NULL, NULL); return data.out; } r_obj* vec_cast_common_opts(r_obj* xs, r_obj* to, const struct cast_common_opts* opts) { r_obj* type = KEEP(vec_ptype_common( xs, to, PTYPE_FINALISE_DEFAULT, opts->s3_fallback, opts->p_arg, opts->call )); const r_ssize xs_size = r_length(xs); r_obj* xs_names = KEEP(r_names(xs)); r_obj* const* v_xs = r_list_cbegin(xs); r_obj* out = KEEP(r_alloc_list(xs_size)); r_attrib_poke_names(out, xs_names); r_ssize i = 0; struct vctrs_arg* p_x_arg = new_subscript_arg( opts->p_arg, xs_names, xs_size, &i ); KEEP(p_x_arg->shelter); for (; i < xs_size; ++i) { r_obj* elt = v_xs[i]; struct cast_opts cast_opts = { .x = elt, .to = type, .p_x_arg = p_x_arg, .call = opts->call, .s3_fallback = opts->s3_fallback }; r_list_poke(out, i, vec_cast_opts(&cast_opts)); } FREE(4); return out; } r_obj* vec_cast_common_params(r_obj* xs, r_obj* to, enum s3_fallback s3_fallback, struct vctrs_arg* p_arg, struct r_lazy call) { struct cast_common_opts opts = { .p_arg = p_arg, .call = call, .s3_fallback = s3_fallback }; return vec_cast_common_opts(xs, to, &opts); } r_obj* vec_cast_common(r_obj* xs, r_obj* to, struct vctrs_arg* p_arg, struct r_lazy call) { return vec_cast_common_params(xs, to, S3_FALLBACK_false, p_arg, call); } // [[ register(external = TRUE) ]] r_obj* ffi_cast_common(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); r_obj* xs = r_node_car(args); args = r_node_cdr(args); r_obj* to = r_node_car(args); struct r_lazy call = { .x = syms.dot_call, .env = env }; struct r_lazy xs_arg_lazy = { .x = syms.dot_arg, .env = env }; struct vctrs_arg xs_arg = new_lazy_arg(&xs_arg_lazy); r_obj* out = vec_cast_common(xs, to, &xs_arg, call); return out; } // [[ register(external = TRUE) ]] r_obj* ffi_cast_common_opts(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); r_obj* xs = r_node_car(args); args = r_node_cdr(args); r_obj* to = r_node_car(args); args = r_node_cdr(args); r_obj* ffi_opts = r_node_car(args); struct r_lazy call = { .x = syms.dot_call, .env = env }; struct r_lazy xs_arg_lazy = { .x = syms.dot_arg, .env = env }; struct vctrs_arg xs_arg = new_lazy_arg(&xs_arg_lazy); struct cast_common_opts opts = { .p_arg = &xs_arg, .call = call, .s3_fallback = s3_fallback_from_opts(ffi_opts) }; r_obj* out = vec_cast_common_opts(xs, to, &opts); return out; } struct cast_opts new_cast_opts(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call, r_obj* opts) { return (struct cast_opts) { .x = x, .to = to, .p_x_arg = p_x_arg, .p_to_arg = p_to_arg, .call = call, .s3_fallback = s3_fallback_from_opts(opts) }; } void vctrs_init_cast(r_obj* ns) { syms.vec_default_cast = r_sym("vec_default_cast"); } vctrs/src/utils.h0000644000176200001440000004063215156001116013514 0ustar liggesusers#ifndef VCTRS_UTILS_H #define VCTRS_UTILS_H #include "vctrs-core.h" #include "arg.h" #define PROTECT_N(x, n) (++*n, PROTECT(x)) #define PROTECT2(x, y) (PROTECT(x), PROTECT(y)) int r_bool_as_int(SEXP x); SEXP vctrs_eval_mask_n(SEXP fn, SEXP* syms, SEXP* args); SEXP vctrs_eval_mask1(SEXP fn, SEXP x_sym, SEXP x); SEXP vctrs_eval_mask2(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y); SEXP vctrs_eval_mask3(SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z); SEXP vctrs_eval_mask4(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4); SEXP vctrs_eval_mask5(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5); SEXP vctrs_eval_mask6(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6); SEXP vctrs_eval_mask7(SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6, SEXP x7_sym, SEXP x7); r_obj* vctrs_eval_mask8(r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* x6_sym, r_obj* x6, r_obj* x7_sym, r_obj* x7, r_obj* x8_sym, r_obj* x8); SEXP vctrs_dispatch_n(SEXP fn_sym, SEXP fn, SEXP* syms, SEXP* args); SEXP vctrs_dispatch1(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x); SEXP vctrs_dispatch2(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y); SEXP vctrs_dispatch3(SEXP fn_sym, SEXP fn, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z); SEXP vctrs_dispatch4(SEXP fn_sym, SEXP fn, SEXP w_sym, SEXP w, SEXP x_sym, SEXP x, SEXP y_sym, SEXP y, SEXP z_sym, SEXP z); static inline r_obj* vctrs_dispatch5(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5) { r_obj* syms[6] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, NULL }; r_obj* args[6] = { x1, x2, x3, x4, x5, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } SEXP vctrs_dispatch6(SEXP fn_sym, SEXP fn, SEXP x1_sym, SEXP x1, SEXP x2_sym, SEXP x2, SEXP x3_sym, SEXP x3, SEXP x4_sym, SEXP x4, SEXP x5_sym, SEXP x5, SEXP x6_sym, SEXP x6); static inline r_obj* vctrs_dispatch7(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* x6_sym, r_obj* x6, r_obj* x7_sym, r_obj* x7) { r_obj* syms[8] = { x1_sym, x2_sym, x3_sym, x4_sym, x5_sym, x6_sym, x7_sym, NULL }; r_obj* args[8] = { x1, x2, x3, x4, x5, x6, x7, NULL }; return vctrs_dispatch_n(fn_sym, fn, syms, args); } static inline __attribute__((noreturn)) void stop_unimplemented_type(const char* fn, SEXPTYPE type) { r_stop_internal("Unimplemented type `%s`.", Rf_type2char(type)); } SEXP map(SEXP x, SEXP (*fn)(SEXP)); SEXP map_with_data(SEXP x, SEXP (*fn)(SEXP, void*), void* data); SEXP df_map(SEXP df, SEXP (*fn)(SEXP)); SEXP bare_df_map(SEXP df, SEXP (*fn)(SEXP)); bool is_data_frame(SEXP x); bool is_bare_data_frame(SEXP x); bool is_bare_tibble(SEXP x); SEXP int_resize(SEXP x, r_ssize x_size, r_ssize size); SEXP raw_resize(SEXP x, r_ssize x_size, r_ssize size); SEXP chr_resize(SEXP x, r_ssize x_size, r_ssize size); // Returns S3 / S4 method for `generic` suitable for the class of `x`. The // inheritance hierarchy is explored except for the default method. SEXP s3_get_method(const char* generic, const char* cls, SEXP table); SEXP s3_sym_get_method(SEXP sym, SEXP table); SEXP s3_find_method(const char* generic, SEXP x, SEXP table); SEXP s3_class_find_method(const char* generic, SEXP cls, SEXP table); SEXP s3_get_class(SEXP x); SEXP s3_find_method_xy(const char* generic, SEXP x, SEXP y, SEXP table, SEXP* method_sym_out); SEXP s3_find_method2(const char* generic, SEXP x, SEXP table, SEXP* method_sym_out); SEXP s3_paste_method_sym(const char* generic, const char* cls); SEXP s3_bare_class(SEXP x); SEXP s4_find_method(SEXP x, SEXP table); SEXP s4_class_find_method(SEXP cls, SEXP table); bool vec_implements_ptype2(SEXP x); extern SEXP syms_s3_methods_table; // Only namespace environments have `.__S3MethodsTable__.` static inline SEXP s3_get_table(SEXP env) { if (r_env_has(env, syms_s3_methods_table)) { return r_env_get(env, syms_s3_methods_table); } else { return r_null; } } SEXP list_first_non_null(SEXP xs, R_len_t* non_null_i); // Destructive compacting SEXP node_compact_d(SEXP node); void never_reached(const char* fn) __attribute__((noreturn)); SEXP new_empty_factor(SEXP levels); SEXP new_empty_ordered(SEXP levels); bool list_has_inner_vec_names(SEXP x, R_len_t size); r_obj* list_pluck(r_obj* xs, r_ssize i); void init_compact_seq(int* p, R_len_t start, R_len_t size, bool increasing); SEXP compact_seq(R_len_t start, R_len_t size, bool increasing); bool is_compact_seq(SEXP x); void init_compact_rep(int* p, R_len_t i, R_len_t n); SEXP compact_rep(R_len_t i, R_len_t n); bool is_compact_rep(SEXP x); r_obj* new_compact_condition(R_xlen_t size); bool is_compact_condition(r_obj* x); r_ssize compact_condition_size(r_obj* x); bool* compact_condition_begin(r_obj* x); const bool* compact_condition_cbegin(r_obj* x); r_obj* compact_condition_materialize_location(r_obj* x); SEXP vec_subscript_materialize(SEXP x); R_len_t vec_subscript_size(SEXP x); r_ssize vec_condition_subscript_sum(r_obj* x, bool na_true); bool is_integer64(SEXP x); bool lgl_any_na(SEXP x); SEXP colnames(SEXP x); r_obj* colnames2(r_obj* x); void* r_vec_deref_barrier(SEXP x); const void* r_vec_deref_barrier_const(SEXP x); void r_vec_fill(SEXPTYPE type, void* p_dest, r_ssize dest_i, const void* p_src, r_ssize src_i, r_ssize n); void r_lgl_fill(SEXP x, int value, R_len_t n); void r_int_fill(SEXP x, int value, R_len_t n); void r_p_lgl_fill(int* p_x, int value, R_len_t n); void r_p_int_fill(int* p_x, int value, R_len_t n); void r_p_chr_fill(SEXP* p_x, SEXP value, R_len_t n); void r_int_fill_seq(SEXP x, int start, R_len_t n); SEXP r_seq(R_len_t from, R_len_t to); bool r_int_any_na(SEXP x); bool r_lgl_any(r_obj* x); r_obj* r_lgl_invert(r_obj* x); R_len_t r_chr_find(SEXP x, SEXP value); #define r_resize Rf_xlengthgets int r_chr_max_len(SEXP x); SEXP r_chr_iota(R_len_t n, char* buf, int len, const char* prefix); #define R_LAZY_ALLOC(SYM, PI, R_TYPE, SIZE) do { \ if (SYM == R_NilValue) { \ SYM = Rf_allocVector(R_TYPE, SIZE); \ REPROTECT(SYM, PI); \ } \ } while (0); static inline SEXP r_new_logical(R_len_t n) { return Rf_allocVector(LGLSXP, n); } static inline SEXP r_new_integer(R_len_t n) { return Rf_allocVector(INTSXP, n); } static inline SEXP r_new_character(R_len_t n) { return Rf_allocVector(STRSXP, n); } static inline SEXP r_new_raw(R_len_t n) { return Rf_allocVector(RAWSXP, n); } static inline SEXP r_new_list(R_len_t n) { return Rf_allocVector(VECSXP, n); } SEXP r_protect(SEXP x); bool r_is_number(SEXP x); bool r_is_positive_number(SEXP x); SEXP r_clone_referenced(SEXP x); SEXP r_call_n(SEXP fn, SEXP* tags, SEXP* cars); static inline bool r_is_s4(SEXP x) { return Rf_isS4(x); } static inline SEXP r_as_s4(SEXP x) { // - Return value must be used, unlike `SET_S4_OBJECT()` // - `Rf_asS4()` calls `shallow_duplicate(x)` if `MAYBE_SHARED(x)` // - `flag = 1` goes through `SET_S4_OBJECT()` // - `complete` is never utilized when `flag = 1` const Rboolean flag = 1; const int complete = 0; return Rf_asS4(x, flag, complete); } static inline SEXP r_as_not_s4(SEXP x) { // - Return value must be used, unlike `UNSET_S4_OBJECT()` // - `Rf_asS4()` calls `shallow_duplicate(x)` if `MAYBE_SHARED(x)` // - `flag = 0` goes through `UNSET_S4_OBJECT()` // - `complete` is for S4 objects that wrap a "complete" S3 object by placing // it in the `.Data` slot. If you set `complete = 1`, it will unwrap and // return that, which we don't want. If `complete = 0`, no additional // behavior will happen beyond the `UNSET_S4_OBJECT()` call. const Rboolean flag = 0; const int complete = 0; return Rf_asS4(x, flag, complete); } bool r_has_name_at(SEXP names, R_len_t i); bool r_is_names(SEXP names); bool r_is_minimal_names(SEXP x); bool r_is_empty_names(SEXP x); bool r_chr_has_string(SEXP x, SEXP str); static inline void* r_vec_unwrap(SEXPTYPE type, SEXP x) { switch (type) { case INTSXP: return (void*) INTEGER(x); default: stop_unimplemented_type("r_vec_unwrap", type); } } #define r_lgl Rf_ScalarLogical #define r_int Rf_ScalarInteger #define r_str Rf_mkChar #define r_chr Rf_mkString #define r_sym Rf_install // This unserialises ASCII Unicode tags of the form `` extern SEXP (*rlang_sym_as_character)(SEXP x); SEXP r_as_data_frame(SEXP x); static inline void r_dbg_save(SEXP x, const char* name) { Rf_defineVar(Rf_install(name), x, R_GlobalEnv); } ERR r_try_catch(void (*fn)(void*), void* fn_data, SEXP cnd_sym, void (*hnd)(void*), void* hnd_data); extern SEXP rlang_result_names; extern SEXP rlang_result_class; static inline SEXP r_result(SEXP x, ERR err) { if (!err) { err = R_NilValue; } SEXP result = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(result, 0, x); SET_VECTOR_ELT(result, 1, err); r_attrib_poke_names(result, rlang_result_names); r_attrib_poke_class(result, rlang_result_class); UNPROTECT(1); return result; } static inline SEXP r_result_get(SEXP x, ERR err) { if (err) { r_cnd_signal(err); } return x; } static inline struct vctrs_arg vec_as_arg(SEXP x) { if (x == R_NilValue) { return *vec_args.empty; } if (!r_is_string(x)) { Rf_errorcall(R_NilValue, "Argument tag must be a string."); } return new_wrapper_arg(NULL, r_chr_get_c_string(x, 0)); } extern SEXP fns_quote; static inline SEXP expr_protect(SEXP x) { switch (TYPEOF(x)) { case SYMSXP: case LANGSXP: return Rf_lang2(fns_quote, x); default: return x; } } static inline const void* vec_type_missing_value(enum vctrs_type type) { switch (type) { case VCTRS_TYPE_logical: return &NA_LOGICAL; case VCTRS_TYPE_integer: return &NA_INTEGER; case VCTRS_TYPE_double: return &NA_REAL; case VCTRS_TYPE_complex: return &vctrs_shared_na_cpl; case VCTRS_TYPE_character: return &NA_STRING; case VCTRS_TYPE_list: return &R_NilValue; default: stop_unimplemented_vctrs_type("vec_type_missing_value", type); } } void c_print_backtrace(void); SEXP chr_c(SEXP x, SEXP y); extern SEXP vctrs_ns_env; extern SEXP vctrs_shared_empty_str; extern SEXP vctrs_shared_zero_int; extern SEXP classes_data_frame; extern SEXP classes_factor; extern SEXP classes_ordered; extern SEXP classes_date; extern SEXP classes_posixct; extern SEXP classes_tibble; extern SEXP classes_vctrs_group_rle; extern SEXP strings_dots; extern SEXP strings_empty; extern SEXP strings_tbl; extern SEXP strings_tbl_df; extern SEXP strings_data_frame; extern SEXP strings_date; extern SEXP strings_posixct; extern SEXP strings_posixlt; extern SEXP strings_posixt; extern SEXP strings_factor; extern SEXP strings_ordered; extern SEXP strings_list; extern SEXP strings_none; extern SEXP strings_minimal; extern SEXP strings_unique; extern SEXP strings_universal; extern SEXP strings_check_unique; extern SEXP strings_unique_quiet; extern SEXP strings_universal_quiet; extern SEXP strings_key; extern SEXP strings_loc; extern SEXP strings_val; extern SEXP strings_group; extern SEXP strings_length; extern SEXP strings_vctrs_vctr; extern SEXP strings_times; extern SEXP strings_needles; extern SEXP strings_haystack; extern SEXP chrs_subset; extern SEXP chrs_extract; extern SEXP chrs_assign; extern SEXP chrs_rename; extern SEXP chrs_remove; extern SEXP chrs_negate; extern SEXP chrs_null; extern SEXP chrs_logical; extern SEXP chrs_integer; extern SEXP chrs_double; extern SEXP chrs_complex; extern SEXP chrs_character; extern SEXP chrs_raw; extern SEXP chrs_list; extern SEXP chrs_expression; extern SEXP chrs_numeric; extern SEXP chrs_function; extern SEXP chrs_empty; extern SEXP chrs_cast; extern SEXP chrs_error; extern SEXP chrs_combine; extern SEXP chrs_convert; extern SEXP chrs_asc; extern SEXP chrs_desc; extern SEXP chrs_largest; extern SEXP chrs_smallest; extern SEXP chrs_which; extern SEXP syms_i; extern SEXP syms_j; extern SEXP syms_n; extern SEXP syms_x; extern SEXP syms_y; extern SEXP syms_x_size; extern SEXP syms_y_size; extern SEXP syms_to; extern SEXP syms_dots; extern SEXP syms_bracket; extern SEXP syms_x_arg; extern SEXP syms_y_arg; extern SEXP syms_to_arg; extern SEXP syms_times_arg; extern SEXP syms_subscript_arg; extern SEXP syms_needles_arg; extern SEXP syms_haystack_arg; extern SEXP syms_out; extern SEXP syms_value; extern SEXP syms_quiet; extern SEXP syms_dot_name_spec; extern SEXP syms_outer; extern SEXP syms_inner; extern SEXP syms_tilde; extern SEXP syms_dot_environment; extern SEXP syms_ptype; extern SEXP syms_missing; extern SEXP syms_size; extern SEXP syms_subscript_action; extern SEXP syms_subscript_type; extern SEXP syms_repair; extern SEXP syms_tzone; extern SEXP syms_data; extern SEXP syms_vctrs_error_incompatible_type; extern SEXP syms_vctrs_error_cast_lossy; extern SEXP syms_cnd_signal; extern SEXP syms_logical; extern SEXP syms_numeric; extern SEXP syms_character; extern SEXP syms_body; extern SEXP syms_parent; extern SEXP syms_from_dispatch; extern SEXP syms_s3_fallback; extern SEXP syms_stop_incompatible_type; extern SEXP syms_stop_incompatible_size; extern SEXP syms_stop_assert_size; extern SEXP syms_stop_matches_overflow; extern SEXP syms_stop_matches_nothing; extern SEXP syms_stop_matches_remaining; extern SEXP syms_stop_matches_incomplete; extern SEXP syms_stop_matches_multiple; extern SEXP syms_warn_matches_multiple; extern SEXP syms_stop_matches_relationship_one_to_one; extern SEXP syms_stop_matches_relationship_one_to_many; extern SEXP syms_stop_matches_relationship_many_to_one; extern SEXP syms_warn_matches_relationship_many_to_many; extern SEXP syms_stop_combine_unmatched; extern SEXP syms_action; extern SEXP syms_vctrs_common_class_fallback; extern SEXP syms_fallback_class; extern SEXP syms_abort; extern SEXP syms_message; extern SEXP syms_chr_proxy_collate; extern SEXP syms_actual; extern SEXP syms_required; extern SEXP syms_call; extern SEXP syms_dot_call; extern SEXP syms_which; extern SEXP syms_slice_value; extern SEXP syms_index_style; extern SEXP syms_loc; static const char * const c_strs_vctrs_common_class_fallback = "vctrs:::common_class_fallback"; #define syms_names R_NamesSymbol extern SEXP fns_bracket; extern SEXP fns_quote; extern SEXP fns_names; extern SEXP vctrs_method_table; extern SEXP base_method_table; extern SEXP s4_c_method_table; #if defined(RLIB_DEBUG) SEXP R_inspect(SEXP x); SEXP R_inspect3(SEXP x, int deep, int pvec); #endif #endif vctrs/src/arg-counter.h0000644000176200001440000000272115072256373014615 0ustar liggesusers#ifndef VCTRS_ARG_COUNTER_H #define VCTRS_ARG_COUNTER_H #include "vctrs-core.h" #include "arg.h" struct counters { /* public: */ r_obj* shelter; // Argument tags for the current value of the reduction (the result // so far) and the next value. These handles typically point to the // local counter args, but might also point to external arg objects // like the initial current arg. struct vctrs_arg* curr_arg; struct vctrs_arg* next_arg; /* private: */ // Global counters r_ssize curr; r_ssize next; r_obj* names; struct vctrs_arg curr_counter; struct vctrs_arg next_counter; struct arg_data_counter curr_counter_arg_data; struct arg_data_counter next_counter_arg_data; void* p_data; }; enum counters_shelter { COUNTERS_SHELTER_data = 0, COUNTERS_SHELTER_names, COUNTERS_SHELTER_N }; /** * Swap the argument tags of the reduction * * There are two counters used for generating argument tags when an * error occur during a reduction. The first represent the result so * far, and the second the next input. Call `counters_shift()` to set * the counter of the next input as current counter, and start * iterating with a new counter for the next input. */ void counters_shift(struct counters* counters); r_obj* reduce( r_obj* current, struct vctrs_arg* p_current_arg, struct vctrs_arg* p_parent_arg, r_obj* rest, r_obj* (*impl)(r_obj* current, r_obj* next, struct counters* counters, void* data), void* data ); #endif vctrs/src/complete.h0000644000176200001440000000017214315060310014154 0ustar liggesusers#ifndef VCTRS_COMPLETE_H #define VCTRS_COMPLETE_H #include "vctrs-core.h" r_obj* vec_detect_complete(r_obj* x); #endif vctrs/src/conditions.c0000644000176200001440000001145314362266120014525 0ustar liggesusers#include "vctrs.h" #include "utils.h" // [[ include("vctrs.h") ]] void stop_scalar_type(r_obj* x, struct vctrs_arg* arg, struct r_lazy call) { r_obj* ffi_call = KEEP(r_lazy_eval(call)); ffi_call = KEEP(r_expr_protect(ffi_call)); r_obj* stop_call = KEEP(r_call4(r_sym("stop_scalar_type"), KEEP(r_protect(x)), KEEP(vctrs_arg(arg)), ffi_call)); r_eval(stop_call, vctrs_ns_env); r_stop_unreachable(); } // [[ include("vctrs.h") ]] void stop_assert_size(r_ssize actual, r_ssize required, struct vctrs_arg* arg, struct r_lazy call) { r_obj* ffi_call = KEEP(r_lazy_eval(call)); ffi_call = KEEP(r_expr_protect(ffi_call)); r_obj* syms[5] = { syms_actual, syms_required, r_syms.arg, r_syms.call, NULL }; r_obj* args[5] = { KEEP(r_int(actual)), KEEP(r_int(required)), KEEP(vctrs_arg(arg)), ffi_call, NULL }; r_obj* stop_call = KEEP(r_call_n(syms_stop_assert_size, syms, args)); r_eval(stop_call, vctrs_ns_env); never_reached("stop_assert_size"); } // [[ include("vctrs.h") ]] void stop_incompatible_type(SEXP x, SEXP y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, bool cast) { SEXP syms[6] = { syms_x, syms_y, syms_x_arg, syms_y_arg, syms_action, NULL }; SEXP args[6] = { PROTECT(r_protect(x)), PROTECT(r_protect(y)), PROTECT(vctrs_arg(x_arg)), PROTECT(vctrs_arg(y_arg)), cast ? chrs_convert : chrs_combine, NULL }; SEXP call = PROTECT(r_call_n(syms_stop_incompatible_type, syms, args)); Rf_eval(call, vctrs_ns_env); never_reached("stop_incompatible_type"); } r_no_return void stop_incompatible_size(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call) { r_obj* syms[8] = { syms_x, syms_y, syms_x_size, syms_y_size, syms_x_arg, syms_y_arg, r_syms.call, NULL }; r_obj* args[8] = { KEEP(r_protect(x)), KEEP(r_protect(y)), KEEP(r_int(x_size)), KEEP(r_int(y_size)), KEEP(vctrs_arg(x_arg)), KEEP(vctrs_arg(y_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_incompatible_size, syms, args)); r_eval(ffi_call, vctrs_ns_env); r_stop_unreachable(); } void stop_recycle_incompatible_size(r_ssize x_size, r_ssize size, struct vctrs_arg* x_arg, struct r_lazy call) { r_obj* syms[5] = { r_sym("x_size"), r_sym("size"), r_sym("x_arg"), syms_call, NULL }; r_obj* args[5] = { KEEP(r_int(x_size)), KEEP(r_int(size)), KEEP(vctrs_arg(x_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* stop_call = KEEP(r_call_n(r_sym("stop_recycle_incompatible_size"), syms, args)); r_eval(stop_call, vctrs_ns_env); never_reached("stop_recycle_incompatible_size"); } void stop_incompatible_shape(SEXP x, SEXP y, R_len_t x_size, R_len_t y_size, int axis, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg) { SEXP syms[8] = { r_sym("x"), r_sym("y"), r_sym("x_size"), r_sym("y_size"), r_sym("axis"), r_sym("x_arg"), r_sym("y_arg"), NULL }; SEXP args[8] = { PROTECT(r_protect(x)), PROTECT(r_protect(y)), PROTECT(r_int(x_size)), PROTECT(r_int(y_size)), PROTECT(r_int(axis)), PROTECT(vctrs_arg(p_x_arg)), PROTECT(vctrs_arg(p_y_arg)), NULL }; SEXP call = PROTECT(r_call_n(r_sym("stop_incompatible_shape"), syms, args)); Rf_eval(call, vctrs_ns_env); never_reached("stop_incompatible_shape"); } void stop_corrupt_factor_levels(SEXP x, struct vctrs_arg* arg) { SEXP call = PROTECT(Rf_lang3(Rf_install("stop_corrupt_factor_levels"), PROTECT(r_protect(x)), PROTECT(vctrs_arg(arg)))); Rf_eval(call, vctrs_ns_env); never_reached("stop_corrupt_factor_levels"); } void stop_corrupt_ordered_levels(SEXP x, struct vctrs_arg* arg) { SEXP call = PROTECT(Rf_lang3(Rf_install("stop_corrupt_ordered_levels"), PROTECT(r_protect(x)), PROTECT(vctrs_arg(arg)))); Rf_eval(call, vctrs_ns_env); never_reached("stop_corrupt_ordered_levels"); } vctrs/src/rlang-dev.c0000644000176200001440000000023414315060310014215 0ustar liggesusers#include "vctrs.h" #include "decl/rlang-dev-decl.h" const char* r_obj_type_friendly_length(r_obj* x) { return r_obj_type_friendly_full(x, true, true); } vctrs/src/case-when.c0000644000176200001440000001132315072256373014232 0ustar liggesusers#include "vctrs.h" #include "decl/case-when-decl.h" r_obj* vec_case_when( r_obj* conditions, r_obj* values, r_obj* default_, enum list_combine_unmatched unmatched, r_obj* ptype, r_ssize size, struct vctrs_arg* p_conditions_arg, struct vctrs_arg* p_values_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call ) { obj_check_list(conditions, p_conditions_arg, error_call); list_check_all_condition_indices(conditions, p_conditions_arg, error_call); obj_check_list(values, p_values_arg, error_call); list_check_all_vectors(values, VCTRS_ALLOW_NULL_no, p_values_arg, error_call); // Infer `size` from first element of `conditions` unless specified. // We do this in `vec_case_when()` but not in `list_combine()` // because `vec_case_when()` only takes logical indices, which // has less ambiguity about the output size. size = compute_size(size, conditions); list_check_all_size(conditions, size, VCTRS_ALLOW_NULL_no, p_conditions_arg, error_call); const enum list_combine_multiple multiple = LIST_COMBINE_MULTIPLE_first; const enum assignment_slice_value slice_values = ASSIGNMENT_SLICE_VALUE_yes; return list_combine( values, conditions, size, default_, unmatched, multiple, slice_values, ptype, name_spec_inner, p_no_repair_opts, p_values_arg, p_conditions_arg, p_default_arg, error_call ); } r_obj* vec_replace_when( r_obj* x, r_obj* conditions, r_obj* values, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_conditions_arg, struct vctrs_arg* p_values_arg, struct r_lazy error_call ) { obj_check_vector(x, VCTRS_ALLOW_NULL_no, p_x_arg, error_call); r_obj* default_ = x; struct vctrs_arg* p_default_arg = p_x_arg; const enum list_combine_unmatched unmatched = LIST_COMBINE_UNMATCHED_default; const r_ssize size = vec_size(x); r_obj* ptype = KEEP(vec_ptype_final(x, p_x_arg, error_call)); r_obj* out = KEEP(vec_case_when( conditions, values, default_, unmatched, ptype, size, p_conditions_arg, p_values_arg, p_default_arg, error_call )); // `vec_case_when()` creates a new vector and names come from any of `values` // or `default`, but `vec_replace_when()` modifies an existing vector and // should act like `[<-` and `base::replace()`, retaining existing names. // `out` is totally fresh, so we can claim deep ownership over it (though we // only require shallow ownership to set names). r_obj* names = KEEP(vec_names(x)); out = vec_set_names(out, names, VCTRS_OWNERSHIP_deep); FREE(3); return out; } r_obj* ffi_vec_case_when( r_obj* ffi_conditions, r_obj* ffi_values, r_obj* ffi_default, r_obj* ffi_unmatched, r_obj* ffi_ptype, r_obj* ffi_size, r_obj* ffi_frame ) { struct r_lazy conditions_arg_lazy = { .x = syms.conditions_arg, .env = ffi_frame }; struct vctrs_arg conditions_arg = new_lazy_arg(&conditions_arg_lazy); struct r_lazy values_arg_lazy = { .x = syms.values_arg, .env = ffi_frame }; struct vctrs_arg values_arg = new_lazy_arg(&values_arg_lazy); struct r_lazy default_arg_lazy = { .x = syms.default_arg, .env = ffi_frame }; struct vctrs_arg default_arg = new_lazy_arg(&default_arg_lazy); struct r_lazy error_call = { .x = r_syms.error_call, .env = ffi_frame }; const r_ssize size = (ffi_size == r_null) ? -1 : r_arg_as_ssize(ffi_size, "size"); const enum list_combine_unmatched unmatched = parse_list_combine_unmatched(ffi_unmatched, error_call); return vec_case_when( ffi_conditions, ffi_values, ffi_default, unmatched, ffi_ptype, size, &conditions_arg, &values_arg, &default_arg, error_call ); } r_obj* ffi_vec_replace_when( r_obj* ffi_x, r_obj* ffi_conditions, r_obj* ffi_values, r_obj* ffi_frame ) { struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = ffi_frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy conditions_arg_lazy = { .x = syms.conditions_arg, .env = ffi_frame }; struct vctrs_arg conditions_arg = new_lazy_arg(&conditions_arg_lazy); struct r_lazy values_arg_lazy = { .x = syms.values_arg, .env = ffi_frame }; struct vctrs_arg values_arg = new_lazy_arg(&values_arg_lazy); struct r_lazy error_call = { .x = r_syms.error_call, .env = ffi_frame }; return vec_replace_when( ffi_x, ffi_conditions, ffi_values, &x_arg, &conditions_arg, &values_arg, error_call ); } // Figure out the output size // - `size` if supplied // - Size of 1st `conditions` element if one exists // - Size 0 if `conditions` is empty static r_ssize compute_size(r_ssize size, r_obj* conditions) { if (size != -1) { return size; } if (r_length(conditions) == 0) { return 0; } return r_length(r_list_get(conditions, 0)); } vctrs/src/init.c0000644000176200001440000007057415156543731013340 0ustar liggesusers#include "vctrs.h" #include "altrep-rle.h" #include #include // for NULL #include // Compile with `-fvisibility=hidden -DHAVE_VISIBILITY_ATTRIBUTE` if you link to this library #include #define export attribute_visible extern extern SEXP vctrs_list_get(SEXP, SEXP); extern SEXP vctrs_list_set(SEXP, SEXP, SEXP); extern SEXP vctrs_field_get(SEXP, SEXP); extern SEXP vctrs_field_set(SEXP, SEXP, SEXP); extern SEXP vctrs_fields(SEXP); extern SEXP vctrs_n_fields(SEXP); extern r_obj* ffi_vec_hash(r_obj*); extern r_obj* ffi_obj_hash(r_obj*); extern r_obj* ffi_obj_equal(r_obj* x, r_obj* y); extern SEXP vctrs_duplicated(SEXP); extern SEXP vctrs_unique_loc(SEXP); extern SEXP vctrs_count(SEXP); extern SEXP vctrs_id(SEXP); extern SEXP vctrs_n_distinct(SEXP); extern SEXP vec_split(SEXP, SEXP); extern SEXP vctrs_group_id(SEXP); extern SEXP vctrs_group_rle(SEXP); extern SEXP vec_group_loc(SEXP); extern SEXP ffi_vec_equal(SEXP, SEXP, SEXP, SEXP, SEXP); extern r_obj* ffi_vec_detect_missing(r_obj*); extern r_obj* ffi_vec_any_missing(r_obj* x); extern r_obj* ffi_vec_compare(r_obj*, r_obj*, r_obj*); extern SEXP vctrs_match(SEXP, SEXP, SEXP, SEXP); extern r_obj* vctrs_in(r_obj*, r_obj*, r_obj*, r_obj*); extern SEXP vctrs_duplicated_any(SEXP); extern r_obj* ffi_size(r_obj*, r_obj*); extern r_obj* ffi_list_sizes(r_obj*, r_obj*); extern SEXP vctrs_dim(SEXP); extern SEXP vctrs_dim_n(SEXP); extern SEXP vctrs_is_unspecified(SEXP); extern SEXP vctrs_typeof(SEXP, SEXP); extern r_obj* ffi_obj_is_vector(r_obj*); extern r_obj* ffi_obj_check_vector(r_obj*, r_obj*); extern r_obj* ffi_vec_check_size(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_check_recyclable(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_ptype2(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_typeof2(r_obj*, r_obj*); extern r_obj* ffi_typeof2_s3(r_obj*, r_obj*); extern r_obj* ffi_cast(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_as_location(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_slice(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_init(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_chop(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_list_unchop(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_chop_seq(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_slice_seq(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_slice_rep(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_restore(r_obj*, r_obj*); extern r_obj* ffi_vec_restore_recurse(r_obj*, r_obj*); extern r_obj* ffi_vec_restore_default(r_obj*, r_obj*); extern SEXP vec_proxy_equal(SEXP); extern SEXP vec_proxy_compare(SEXP); extern SEXP vec_proxy_order(SEXP); extern r_obj* ffi_df_proxy(r_obj*, r_obj*); extern SEXP vctrs_unspecified(SEXP); extern r_obj* ffi_ptype(r_obj*, r_obj*, r_obj*); extern r_obj* vec_ptype_finalise(r_obj*); extern r_obj* ffi_minimal_names(r_obj*); extern r_obj* ffi_unique_names(r_obj*, r_obj*); extern SEXP ffi_as_minimal_names(SEXP); extern SEXP vec_names(SEXP); extern SEXP vctrs_is_unique_names(SEXP); extern SEXP vctrs_as_unique_names(SEXP, SEXP); extern SEXP ffi_vec_set_names(SEXP, SEXP); extern r_obj* ffi_df_cast_opts(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_df_ptype2_opts(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_type_info(r_obj*); extern SEXP ffi_proxy_info(SEXP); extern r_obj* ffi_class_type(r_obj*); extern r_obj* ffi_vec_bare_df_restore(r_obj*, r_obj*); extern r_obj* ffi_recycle(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_assign(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_assign_seq(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_assign_compact_condition(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_as_df_row(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_outer_names(r_obj*, r_obj*, r_obj*); extern SEXP vctrs_df_size(SEXP); extern r_obj* ffi_as_df_col(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_apply_name_spec(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_as_not_s4(r_obj*); extern SEXP vctrs_validate_name_repair_arg(SEXP); extern SEXP vctrs_validate_minimal_names(SEXP, SEXP); extern r_obj* ffi_vec_as_names(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_obj_is_list(r_obj*); extern SEXP vctrs_try_catch_callback(SEXP, SEXP); extern r_obj* ffi_is_coercible(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_as_subscript(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_as_subscript_result(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_df_flatten_info(r_obj*); extern r_obj* df_flatten(r_obj*); extern SEXP vctrs_linked_version(void); extern r_obj* ffi_tib_ptype2(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_tib_cast(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_assign_params(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern SEXP vctrs_has_dim(SEXP); extern r_obj* ffi_vec_rep(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_rep_each(r_obj*, r_obj*, r_obj*); extern SEXP vctrs_maybe_shared_col(SEXP, SEXP); extern SEXP vctrs_new_df_unshared_col(void); extern r_obj* ffi_vec_shaped_ptype(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_shape2(r_obj*, r_obj*, r_obj*); extern SEXP vctrs_new_date(SEXP); extern SEXP vctrs_date_validate(SEXP); extern SEXP vctrs_new_datetime(SEXP, SEXP); extern SEXP vctrs_datetime_validate(SEXP); extern r_obj* ffi_ptype2_opts(r_obj*, r_obj*, r_obj*, r_obj*); extern SEXP vctrs_s3_find_method(SEXP, SEXP, SEXP); extern SEXP vctrs_implements_ptype2(SEXP); extern r_obj* ffi_ptype2_dispatch_native(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_cast_dispatch_native(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern SEXP vctrs_fast_c(SEXP, SEXP); extern r_obj* ffi_data_frame(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_df_list(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_detect_run_bounds(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_locate_run_bounds(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_identify_runs(r_obj*, r_obj*); extern r_obj* ffi_vec_run_sizes(r_obj*, r_obj*); extern SEXP vctrs_slice_complete(SEXP); extern SEXP vctrs_locate_complete(SEXP); extern SEXP vctrs_detect_complete(SEXP); extern r_obj* ffi_obj_encode_utf8(r_obj*); extern r_obj* ffi_chr_is_ascii_or_utf8(r_obj*); extern SEXP vctrs_order(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_locate_sorted_groups(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_order_info(SEXP, SEXP, SEXP, SEXP, SEXP); extern r_obj* ffi_vec_unrep(r_obj*, r_obj*); extern SEXP vctrs_fill_missing(SEXP, SEXP, SEXP); extern r_obj* ffi_chr_paste_prefix(r_obj*, r_obj*, r_obj*); extern r_obj* vctrs_rank(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* vctrs_integer64_proxy(r_obj*); extern r_obj* vctrs_integer64_restore(r_obj*); extern r_obj* vctrs_list_drop_empty(r_obj*); extern r_obj* vctrs_is_altrep(r_obj* x); extern r_obj* ffi_list_interleave(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_compute_nesting_container_info(r_obj*, r_obj*); extern r_obj* ffi_locate_matches(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_interval_groups(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_interval_locate_groups(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_interval_complement(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_interval_locate_containers(r_obj*, r_obj*); extern r_obj* ffi_check_list(r_obj*, r_obj*); extern r_obj* ffi_list_all_vectors(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_list_check_all_vectors(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_as_short_length(r_obj*, r_obj*); extern r_obj* ffi_s3_get_method(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_list_all_size(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_list_check_all_size(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_list_all_recyclable(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_list_check_all_recyclable(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_set_intersect(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_set_difference(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_set_union(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_set_symmetric_difference(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_expand_grid(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_list_combine(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_compact_seq(r_obj*, r_obj*, r_obj*); extern r_obj* ffi_as_compact_condition(r_obj*); extern r_obj* ffi_vec_case_when(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_replace_when(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_recode_values(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_replace_values(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_if_else(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_pany(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_pall(r_obj*, r_obj*, r_obj*, r_obj*); // Maturing // In the public header extern bool maturing_obj_is_vector(SEXP); extern R_len_t maturing_short_vec_size(SEXP); extern SEXP maturing_short_vec_recycle(SEXP, R_len_t); // Defunct // Previously were in the public header, but have since been removed // Must be removed from the public header but still exist as a C callable for 1 CRAN release of vctrs, // so that dependent CRAN binaries (slider or tibblify) can be rebuilt with the new public header that // no longer looks for these callables. extern bool defunct_vec_is_vector(SEXP); // Experimental // Exported but not available in the public header extern SEXP exp_vec_cast(SEXP, SEXP); extern SEXP exp_vec_chop(SEXP, SEXP); extern SEXP exp_vec_slice_impl(SEXP, SEXP); extern SEXP exp_vec_names(SEXP); extern SEXP exp_vec_set_names(SEXP, SEXP); extern SEXP exp_short_compact_seq(R_len_t, R_len_t, bool); extern void exp_short_init_compact_seq(int*, R_len_t, R_len_t, bool); // Defined below SEXP vctrs_init_library(SEXP); // Defined in altrep-rle.h extern SEXP altrep_rle_Make(SEXP); extern SEXP altrep_rle_is_materialized(SEXP); void vctrs_init_altrep_rle(DllInfo*); // Defined in altrep-lazy-character.c extern r_obj* ffi_altrep_new_lazy_character(r_obj*); extern r_obj* ffi_altrep_lazy_character_is_materialized(r_obj*); extern void vctrs_init_altrep_lazy_character(DllInfo*); static const R_CallMethodDef CallEntries[] = { {"vctrs_list_get", (DL_FUNC) &vctrs_list_get, 2}, {"vctrs_list_set", (DL_FUNC) &vctrs_list_set, 3}, {"vctrs_field_get", (DL_FUNC) &vctrs_field_get, 2}, {"vctrs_field_set", (DL_FUNC) &vctrs_field_set, 3}, {"vctrs_fields", (DL_FUNC) &vctrs_fields, 1}, {"vctrs_n_fields", (DL_FUNC) &vctrs_n_fields, 1}, {"ffi_vec_hash", (DL_FUNC) &ffi_vec_hash, 1}, {"ffi_obj_hash", (DL_FUNC) &ffi_obj_hash, 1}, {"ffi_obj_equal", (DL_FUNC) &ffi_obj_equal, 2}, {"vctrs_unique_loc", (DL_FUNC) &vctrs_unique_loc, 1}, {"vctrs_duplicated", (DL_FUNC) &vctrs_duplicated, 1}, {"vctrs_duplicated_any", (DL_FUNC) &vctrs_duplicated_any, 1}, {"vctrs_count", (DL_FUNC) &vctrs_count, 1}, {"vctrs_id", (DL_FUNC) &vctrs_id, 1}, {"vctrs_n_distinct", (DL_FUNC) &vctrs_n_distinct, 1}, {"vctrs_split", (DL_FUNC) &vec_split, 2}, {"vctrs_group_id", (DL_FUNC) &vctrs_group_id, 1}, {"vctrs_group_rle", (DL_FUNC) &vctrs_group_rle, 1}, {"vctrs_group_loc", (DL_FUNC) &vec_group_loc, 1}, {"ffi_size", (DL_FUNC) &ffi_size, 2}, {"ffi_list_sizes", (DL_FUNC) &ffi_list_sizes, 2}, {"vctrs_dim", (DL_FUNC) &vctrs_dim, 1}, {"vctrs_dim_n", (DL_FUNC) &vctrs_dim_n, 1}, {"vctrs_is_unspecified", (DL_FUNC) &vctrs_is_unspecified, 1}, {"ffi_vec_equal", (DL_FUNC) &ffi_vec_equal, 5}, {"ffi_vec_detect_missing", (DL_FUNC) &ffi_vec_detect_missing, 1}, {"ffi_vec_any_missing", (DL_FUNC) &ffi_vec_any_missing, 1}, {"ffi_vec_compare", (DL_FUNC) &ffi_vec_compare, 3}, {"vctrs_match", (DL_FUNC) &vctrs_match, 4}, {"vctrs_in", (DL_FUNC) &vctrs_in, 4}, {"vctrs_typeof", (DL_FUNC) &vctrs_typeof, 2}, {"vctrs_init_library", (DL_FUNC) &vctrs_init_library, 1}, {"ffi_obj_is_vector", (DL_FUNC) &ffi_obj_is_vector, 1}, {"ffi_obj_check_vector", (DL_FUNC) &ffi_obj_check_vector, 2}, {"ffi_vec_check_size", (DL_FUNC) &ffi_vec_check_size, 3}, {"ffi_vec_check_recyclable", (DL_FUNC) &ffi_vec_check_recyclable, 3}, {"ffi_ptype2", (DL_FUNC) &ffi_ptype2, 3}, {"ffi_typeof2", (DL_FUNC) &ffi_typeof2, 2}, {"ffi_typeof2_s3", (DL_FUNC) &ffi_typeof2_s3, 2}, {"ffi_cast", (DL_FUNC) &ffi_cast, 3}, {"ffi_as_location", (DL_FUNC) &ffi_as_location, 8}, {"ffi_slice", (DL_FUNC) &ffi_slice, 3}, {"ffi_init", (DL_FUNC) &ffi_init, 3}, {"ffi_vec_chop", (DL_FUNC) &ffi_vec_chop, 3}, {"ffi_list_unchop", (DL_FUNC) &ffi_list_unchop, 6}, {"ffi_vec_chop_seq", (DL_FUNC) &ffi_vec_chop_seq, 4}, {"ffi_slice_seq", (DL_FUNC) &ffi_slice_seq, 4}, {"ffi_slice_rep", (DL_FUNC) &ffi_slice_rep, 3}, {"ffi_vec_restore", (DL_FUNC) &ffi_vec_restore, 2}, {"ffi_vec_restore_recurse", (DL_FUNC) &ffi_vec_restore_recurse, 2}, {"ffi_vec_restore_default", (DL_FUNC) &ffi_vec_restore_default, 2}, {"ffi_vec_proxy", (DL_FUNC) &vec_proxy, 1}, {"ffi_vec_proxy_recurse", (DL_FUNC) &vec_proxy_recurse, 1}, {"vctrs_proxy_equal", (DL_FUNC) &vec_proxy_equal, 1}, {"vctrs_proxy_compare", (DL_FUNC) &vec_proxy_compare, 1}, {"vctrs_proxy_order", (DL_FUNC) &vec_proxy_order, 1}, {"ffi_df_proxy", (DL_FUNC) &ffi_df_proxy, 2}, {"vctrs_unspecified", (DL_FUNC) &vctrs_unspecified, 1}, {"ffi_ptype", (DL_FUNC) &ffi_ptype, 3}, {"vctrs_ptype_finalise", (DL_FUNC) &vec_ptype_finalise, 1}, {"ffi_minimal_names", (DL_FUNC) &ffi_minimal_names, 1}, {"ffi_unique_names", (DL_FUNC) &ffi_unique_names, 2}, {"ffi_as_minimal_names", (DL_FUNC) &ffi_as_minimal_names, 1}, {"vctrs_names", (DL_FUNC) &vec_names, 1}, {"vctrs_is_unique_names", (DL_FUNC) &vctrs_is_unique_names, 1}, {"vctrs_as_unique_names", (DL_FUNC) &vctrs_as_unique_names, 2}, {"ffi_vec_set_names", (DL_FUNC) &ffi_vec_set_names, 2}, {"ffi_df_cast_opts", (DL_FUNC) &ffi_df_cast_opts, 4}, {"ffi_df_ptype2_opts", (DL_FUNC) &ffi_df_ptype2_opts, 4}, {"ffi_type_info", (DL_FUNC) &ffi_type_info, 1}, {"ffi_proxy_info", (DL_FUNC) &ffi_proxy_info, 1}, {"ffi_class_type", (DL_FUNC) &ffi_class_type, 1}, {"ffi_vec_bare_df_restore", (DL_FUNC) &ffi_vec_bare_df_restore, 2}, {"ffi_recycle", (DL_FUNC) &ffi_recycle, 3}, {"ffi_assign", (DL_FUNC) &ffi_assign, 5}, {"ffi_assign_seq", (DL_FUNC) &ffi_assign_seq, 6}, {"ffi_assign_compact_condition", (DL_FUNC) &ffi_assign_compact_condition, 4}, {"ffi_as_df_row", (DL_FUNC) &ffi_as_df_row, 3}, {"ffi_outer_names", (DL_FUNC) &ffi_outer_names, 3}, {"vctrs_df_size", (DL_FUNC) &vctrs_df_size, 1}, {"ffi_as_df_col", (DL_FUNC) &ffi_as_df_col, 3}, {"ffi_apply_name_spec", (DL_FUNC) &ffi_apply_name_spec, 4}, {"ffi_as_not_s4", (DL_FUNC) &ffi_as_not_s4, 1}, {"vctrs_altrep_rle_Make", (DL_FUNC) &altrep_rle_Make, 1}, {"vctrs_altrep_rle_is_materialized", (DL_FUNC) &altrep_rle_is_materialized, 1}, {"ffi_altrep_new_lazy_character", (DL_FUNC) &ffi_altrep_new_lazy_character, 1}, {"ffi_altrep_lazy_character_is_materialized", (DL_FUNC) &ffi_altrep_lazy_character_is_materialized, 1}, {"vctrs_validate_name_repair_arg", (DL_FUNC) &vctrs_validate_name_repair_arg, 1}, {"vctrs_validate_minimal_names", (DL_FUNC) &vctrs_validate_minimal_names, 2}, {"ffi_vec_as_names", (DL_FUNC) &ffi_vec_as_names, 4}, {"ffi_obj_is_list", (DL_FUNC) &ffi_obj_is_list, 1}, {"vctrs_try_catch_callback", (DL_FUNC) &vctrs_try_catch_callback, 2}, {"ffi_is_coercible", (DL_FUNC) &ffi_is_coercible, 4}, {"ffi_as_subscript", (DL_FUNC) &ffi_as_subscript, 5}, {"ffi_as_subscript_result", (DL_FUNC) &ffi_as_subscript_result, 5}, {"ffi_df_flatten_info", (DL_FUNC) &ffi_df_flatten_info, 1}, {"ffi_df_flatten", (DL_FUNC) &df_flatten, 1}, {"vctrs_linked_version", (DL_FUNC) &vctrs_linked_version, 0}, {"ffi_tib_ptype2", (DL_FUNC) &ffi_tib_ptype2, 5}, {"ffi_tib_cast", (DL_FUNC) &ffi_tib_cast, 5}, {"ffi_assign_params", (DL_FUNC) &ffi_assign_params, 6}, {"vctrs_has_dim", (DL_FUNC) &vctrs_has_dim, 1}, {"ffi_vec_rep", (DL_FUNC) &ffi_vec_rep, 3}, {"ffi_vec_rep_each", (DL_FUNC) &ffi_vec_rep_each, 3}, {"vctrs_maybe_shared_col", (DL_FUNC) &vctrs_maybe_shared_col, 2}, {"vctrs_new_df_unshared_col", (DL_FUNC) &vctrs_new_df_unshared_col, 0}, {"ffi_vec_shaped_ptype", (DL_FUNC) &ffi_vec_shaped_ptype, 4}, {"ffi_vec_shape2", (DL_FUNC) &ffi_vec_shape2, 3}, {"vctrs_new_date", (DL_FUNC) &vctrs_new_date, 1}, {"vctrs_date_validate", (DL_FUNC) &vctrs_date_validate, 1}, {"vctrs_new_datetime", (DL_FUNC) &vctrs_new_datetime, 2}, {"vctrs_datetime_validate", (DL_FUNC) &vctrs_datetime_validate, 1}, {"ffi_ptype2_opts", (DL_FUNC) &ffi_ptype2_opts, 4}, {"vctrs_s3_find_method", (DL_FUNC) &vctrs_s3_find_method, 3}, {"vctrs_implements_ptype2", (DL_FUNC) &vctrs_implements_ptype2, 1}, {"ffi_ptype2_dispatch_native", (DL_FUNC) &ffi_ptype2_dispatch_native, 4}, {"ffi_cast_dispatch_native", (DL_FUNC) &ffi_cast_dispatch_native, 6}, {"vctrs_fast_c", (DL_FUNC) &vctrs_fast_c, 2}, {"ffi_data_frame", (DL_FUNC) &ffi_data_frame, 4}, {"ffi_df_list", (DL_FUNC) &ffi_df_list, 5}, {"ffi_vec_detect_run_bounds", (DL_FUNC) &ffi_vec_detect_run_bounds, 3}, {"ffi_vec_locate_run_bounds", (DL_FUNC) &ffi_vec_locate_run_bounds, 3}, {"ffi_vec_identify_runs", (DL_FUNC) &ffi_vec_identify_runs, 2}, {"ffi_vec_run_sizes", (DL_FUNC) &ffi_vec_run_sizes, 2}, {"vctrs_slice_complete", (DL_FUNC) &vctrs_slice_complete, 1}, {"vctrs_locate_complete", (DL_FUNC) &vctrs_locate_complete, 1}, {"vctrs_detect_complete", (DL_FUNC) &vctrs_detect_complete, 1}, {"ffi_obj_encode_utf8", (DL_FUNC) &ffi_obj_encode_utf8, 1}, {"ffi_chr_is_ascii_or_utf8", (DL_FUNC) &ffi_chr_is_ascii_or_utf8, 1}, {"vctrs_order", (DL_FUNC) &vctrs_order, 5}, {"vctrs_locate_sorted_groups", (DL_FUNC) &vctrs_locate_sorted_groups, 5}, {"vctrs_order_info", (DL_FUNC) &vctrs_order_info, 5}, {"ffi_vec_unrep", (DL_FUNC) &ffi_vec_unrep, 2}, {"vctrs_fill_missing", (DL_FUNC) &vctrs_fill_missing, 3}, {"ffi_chr_paste_prefix", (DL_FUNC) &ffi_chr_paste_prefix, 3}, {"vctrs_rank", (DL_FUNC) &vctrs_rank, 7}, {"vctrs_integer64_proxy", (DL_FUNC) &vctrs_integer64_proxy, 1}, {"vctrs_integer64_restore", (DL_FUNC) &vctrs_integer64_restore, 1}, {"vctrs_list_drop_empty", (DL_FUNC) &vctrs_list_drop_empty, 1}, {"vctrs_is_altrep", (DL_FUNC) &vctrs_is_altrep, 1}, {"ffi_list_interleave", (DL_FUNC) &ffi_list_interleave, 6}, {"ffi_compute_nesting_container_info", (DL_FUNC) &ffi_compute_nesting_container_info, 2}, {"ffi_locate_matches", (DL_FUNC) &ffi_locate_matches, 14}, {"ffi_interval_groups", (DL_FUNC) &ffi_interval_groups, 4}, {"ffi_interval_locate_groups", (DL_FUNC) &ffi_interval_locate_groups, 4}, {"ffi_interval_complement", (DL_FUNC) &ffi_interval_complement, 4}, {"ffi_interval_locate_containers", (DL_FUNC) &ffi_interval_locate_containers, 2}, {"ffi_check_list", (DL_FUNC) &ffi_check_list, 2}, {"ffi_list_all_vectors", (DL_FUNC) &ffi_list_all_vectors, 3}, {"ffi_list_check_all_vectors", (DL_FUNC) &ffi_list_check_all_vectors, 3}, {"ffi_as_short_length", (DL_FUNC) &ffi_as_short_length, 2}, {"ffi_s3_get_method", (DL_FUNC) &ffi_s3_get_method, 3}, {"ffi_list_all_size", (DL_FUNC) &ffi_list_all_size, 4}, {"ffi_list_check_all_size", (DL_FUNC) &ffi_list_check_all_size, 4}, {"ffi_list_all_recyclable", (DL_FUNC) &ffi_list_all_recyclable, 4}, {"ffi_list_check_all_recyclable", (DL_FUNC) &ffi_list_check_all_recyclable, 4}, {"ffi_vec_set_intersect", (DL_FUNC) &ffi_vec_set_intersect, 4}, {"ffi_vec_set_difference", (DL_FUNC) &ffi_vec_set_difference, 4}, {"ffi_vec_set_union", (DL_FUNC) &ffi_vec_set_union, 4}, {"ffi_vec_set_symmetric_difference", (DL_FUNC) &ffi_vec_set_symmetric_difference, 4}, {"ffi_vec_expand_grid", (DL_FUNC) &ffi_vec_expand_grid, 4}, {"ffi_list_combine", (DL_FUNC) &ffi_list_combine, 11}, {"ffi_compact_seq", (DL_FUNC) &ffi_compact_seq, 3}, {"ffi_as_compact_condition", (DL_FUNC) &ffi_as_compact_condition, 1}, {"ffi_vec_case_when", (DL_FUNC) &ffi_vec_case_when, 7}, {"ffi_vec_replace_when", (DL_FUNC) &ffi_vec_replace_when, 4}, {"ffi_vec_recode_values", (DL_FUNC) &ffi_vec_recode_values, 9}, {"ffi_vec_replace_values", (DL_FUNC) &ffi_vec_replace_values, 6}, {"ffi_vec_if_else", (DL_FUNC) &ffi_vec_if_else, 6}, {"ffi_vec_pany", (DL_FUNC) &ffi_vec_pany, 4}, {"ffi_vec_pall", (DL_FUNC) &ffi_vec_pall, 4}, {"ffi_exp_vec_cast", (DL_FUNC) &exp_vec_cast, 2}, {NULL, NULL, 0} }; extern r_obj* ffi_ptype_common(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_ptype_common_params(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_size_common(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_recycle_common(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_cast_common(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_cast_common_opts(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_rbind(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_cbind(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_vec_c(r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_new_data_frame(r_obj*); static const R_ExternalMethodDef ExtEntries[] = { {"ffi_ptype_common", (DL_FUNC) &ffi_ptype_common, 3}, {"ffi_ptype_common_params", (DL_FUNC) &ffi_ptype_common_params, 4}, {"ffi_size_common", (DL_FUNC) &ffi_size_common, 3}, {"ffi_recycle_common", (DL_FUNC) &ffi_recycle_common, 2}, {"ffi_cast_common", (DL_FUNC) &ffi_cast_common, 2}, {"ffi_cast_common_opts", (DL_FUNC) &ffi_cast_common_opts, 3}, {"ffi_rbind", (DL_FUNC) &ffi_rbind, 5}, {"ffi_cbind", (DL_FUNC) &ffi_cbind, 4}, {"ffi_vec_c", (DL_FUNC) &ffi_vec_c, 4}, {"ffi_new_data_frame", (DL_FUNC) &ffi_new_data_frame, -1}, {NULL, NULL, 0} }; export void R_init_vctrs(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, ExtEntries); R_useDynamicSymbols(dll, FALSE); // Maturing // In the public header R_RegisterCCallable("vctrs", "obj_is_vector", (DL_FUNC) &maturing_obj_is_vector); R_RegisterCCallable("vctrs", "short_vec_size", (DL_FUNC) &maturing_short_vec_size); R_RegisterCCallable("vctrs", "short_vec_recycle", (DL_FUNC) &maturing_short_vec_recycle); // Defunct // Previously were in the public header, but have since been removed R_RegisterCCallable("vctrs", "vec_is_vector", (DL_FUNC) &defunct_vec_is_vector); // Experimental // Exported but not available in the public header R_RegisterCCallable("vctrs", "exp_vec_cast", (DL_FUNC) &exp_vec_cast); R_RegisterCCallable("vctrs", "exp_vec_chop", (DL_FUNC) &exp_vec_chop); R_RegisterCCallable("vctrs", "exp_vec_slice_impl", (DL_FUNC) &exp_vec_slice_impl); R_RegisterCCallable("vctrs", "exp_vec_names", (DL_FUNC) &exp_vec_names); R_RegisterCCallable("vctrs", "exp_vec_set_names", (DL_FUNC) &exp_vec_set_names); R_RegisterCCallable("vctrs", "exp_short_compact_seq", (DL_FUNC) &exp_short_compact_seq); R_RegisterCCallable("vctrs", "exp_short_init_compact_seq", (DL_FUNC) &exp_short_init_compact_seq); // Altrep classes vctrs_init_altrep_rle(dll); vctrs_init_altrep_lazy_character(dll); } void vctrs_init_bind(SEXP ns); void vctrs_init_cast(SEXP ns); void vctrs_init_data(SEXP ns); void vctrs_init_dictionary(SEXP ns); void vctrs_init_interval(r_obj* ns); void vctrs_init_match(r_obj* ns); void vctrs_init_names(SEXP ns); void vctrs_init_proxy_restore(SEXP ns); void vctrs_init_slice(SEXP ns); void vctrs_init_slice_assign(SEXP ns); void vctrs_init_subscript(SEXP ns); void vctrs_init_subscript_loc(SEXP ns); void vctrs_init_ptype(r_obj* ns); void vctrs_init_ptype2(SEXP ns); void vctrs_init_ptype2_dispatch(SEXP ns); void vctrs_init_rep(SEXP ns); void vctrs_init_type_data_frame(SEXP ns); void vctrs_init_type_date_time(SEXP ns); void vctrs_init_type_info(SEXP ns); void vctrs_init_unspecified(SEXP ns); void vctrs_init_utils(SEXP ns); void vctrs_init_globals(r_obj* ns); r_obj* vctrs_init_library(r_obj* ns) { r_init_library(ns); vctrs_init_bind(ns); vctrs_init_cast(ns); vctrs_init_data(ns); vctrs_init_dictionary(ns); vctrs_init_interval(ns); vctrs_init_match(ns); vctrs_init_names(ns); vctrs_init_proxy_restore(ns); vctrs_init_slice(ns); vctrs_init_slice_assign(ns); vctrs_init_subscript(ns); vctrs_init_subscript_loc(ns); vctrs_init_ptype(ns); vctrs_init_ptype2(ns); vctrs_init_ptype2_dispatch(ns); vctrs_init_rep(ns); vctrs_init_type_data_frame(ns); vctrs_init_type_date_time(ns); vctrs_init_type_info(ns); vctrs_init_unspecified(ns); vctrs_init_utils(ns); vctrs_init_globals(ns); return r_null; } vctrs/src/subscript.c0000644000176200001440000002273615120513137014374 0ustar liggesusers#include "vctrs.h" #include "decl/subscript-decl.h" r_obj* vec_as_subscript_opts(r_obj* subscript, const struct subscript_opts* opts, ERR* err) { if (vec_dim_n(subscript) != 1) { *err = new_error_subscript_type(subscript, opts, fns_cnd_body_subscript_dim); return r_null; } r_keep_loc subscript_pi; KEEP_HERE(subscript, &subscript_pi); r_obj* orig_names = KEEP(r_names(subscript)); switch (r_typeof(subscript)) { case R_TYPE_null: if (opts->numeric == SUBSCRIPT_TYPE_ACTION_CAST) { subscript = r_globals.empty_int; } break; case R_TYPE_symbol: if (opts->character == SUBSCRIPT_TYPE_ACTION_CAST) { subscript = rlang_sym_as_character(subscript); } break; default: break; } KEEP_AT(subscript, subscript_pi); if (!obj_is_vector(subscript, VCTRS_ALLOW_NULL_no)) { *err = new_error_subscript_type(subscript, opts, r_null); FREE(2); return r_null; } if (r_is_object(subscript)) { subscript = obj_cast_subscript(subscript, opts, err); } else if (r_typeof(subscript) == R_TYPE_double) { subscript = dbl_cast_subscript(subscript, opts, err); } KEEP_AT(subscript, subscript_pi); if (*err) { FREE(2); return r_null; } // Coerce unspecified vectors to integer only if logical indices are // not allowed if (opts->logical == SUBSCRIPT_TYPE_ACTION_ERROR && vec_is_unspecified(subscript)) { struct vctrs_arg* arg = opts->subscript_arg; if (opts->numeric == SUBSCRIPT_TYPE_ACTION_CAST) { subscript = vec_cast(subscript, r_globals.empty_int, arg, NULL, r_lazy_null); } else { subscript = vec_cast(subscript, r_globals.empty_chr, arg, NULL, r_lazy_null); } } KEEP_AT(subscript, subscript_pi); enum subscript_type_action action = SUBSCRIPT_TYPE_ACTION_ERROR; switch (r_typeof(subscript)) { case R_TYPE_logical: action = opts->logical; break; case R_TYPE_integer: action = opts->numeric; break; case R_TYPE_character: action = opts->character; break; default: break; } if (action == SUBSCRIPT_TYPE_ACTION_ERROR) { *err = new_error_subscript_type(subscript, opts, r_null); FREE(2); return r_null; } if (orig_names != r_null) { // FIXME: Handle names in cast methods subscript = r_clone_referenced(subscript); KEEP_AT(subscript, subscript_pi); r_attrib_poke_names(subscript, orig_names); } FREE(2); return subscript; } static r_obj* obj_cast_subscript(r_obj* subscript, const struct subscript_opts* opts, ERR* err) { struct vctrs_arg* p_subscript_arg = opts->subscript_arg; struct r_lazy call = opts->call; const enum s3_fallback s3_fallback = S3_FALLBACK_false; struct cast_opts cast_opts = { .x = subscript, .to = r_null, .p_x_arg = p_subscript_arg }; cast_opts.to = r_globals.empty_lgl; if (vec_is_coercible( subscript, cast_opts.to, p_subscript_arg, vec_args.empty, call, s3_fallback )) { return vec_cast_opts(&cast_opts); } cast_opts.to = r_globals.empty_int; if (vec_is_coercible( subscript, cast_opts.to, p_subscript_arg, vec_args.empty, call, s3_fallback )) { return vec_cast_opts(&cast_opts); } cast_opts.to = r_globals.empty_chr; if (vec_is_coercible( subscript, cast_opts.to, p_subscript_arg, vec_args.empty, call, s3_fallback )) { return vec_cast_opts(&cast_opts); } *err = new_error_subscript_type(subscript, opts, r_null); return r_null; } static r_obj* dbl_cast_subscript(r_obj* subscript, const struct subscript_opts* opts, ERR* err) { double* p = r_dbl_begin(subscript); r_ssize n = r_length(subscript); r_obj* out = KEEP(r_alloc_integer(n)); int* out_p = r_int_begin(out); for (r_ssize i = 0; i < n; ++i) { double elt = p[i]; // Generally `(int) nan` results in the correct `na_int` value, // but this is not guaranteed, so we have to explicitly check for it. // https://stackoverflow.com/questions/10366485/problems-casting-nan-floats-to-int if (isnan(elt)) { out_p[i] = r_globals.na_int; continue; } if (!isfinite(elt) || elt <= INT_MIN || elt > INT_MAX) { // Once we throw lazy errors from the cast method, we should // throw the error here as well FREE(1); return dbl_cast_subscript_fallback(subscript, opts, err); } int elt_int = (int) elt; if (elt != elt_int) { FREE(1); return dbl_cast_subscript_fallback(subscript, opts, err); } out_p[i] = elt_int; } FREE(1); return out; } static r_obj* dbl_cast_subscript_fallback(r_obj* subscript, const struct subscript_opts* opts, ERR* err) { struct cast_opts cast_opts = { .x = subscript, .to = r_globals.empty_int, opts->subscript_arg }; r_obj* out = KEEP(vec_cast_e(&cast_opts, err)); if (*err) { r_obj* err_obj = KEEP(*err); r_obj* body = KEEP(vctrs_eval_mask1(syms_new_dbl_cast_subscript_body, syms_lossy_err, err_obj)); *err = new_error_subscript_type(subscript, opts, body); FREE(3); return r_null; } FREE(1); return out; } // FFI ----------------------------------------------------------------- // [[ register() ]] r_obj* ffi_as_subscript(r_obj* subscript, r_obj* logical, r_obj* numeric, r_obj* character, r_obj* frame) { struct r_lazy arg_ = { .x = syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_); struct r_lazy call = { .x = r_syms.call, .env = frame }; struct subscript_opts opts = { .logical = parse_subscript_arg_type(logical, "logical"), .numeric = parse_subscript_arg_type(numeric, "numeric"), .character = parse_subscript_arg_type(character, "character"), .subscript_arg = &arg, .call = call }; ERR err = NULL; r_obj* out = vec_as_subscript_opts(subscript, &opts, &err); KEEP2(out, err); out = r_result_get(out, err); FREE(2); return out; } // [[ register() ]] r_obj* ffi_as_subscript_result(r_obj* subscript, r_obj* logical, r_obj* numeric, r_obj* character, r_obj* frame) { struct r_lazy arg_ = { .x = syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_); struct r_lazy call = { .x = r_syms.call, .env = frame }; struct subscript_opts opts = { .logical = parse_subscript_arg_type(logical, "logical"), .numeric = parse_subscript_arg_type(numeric, "numeric"), .character = parse_subscript_arg_type(character, "character"), .subscript_arg = &arg, .call = call }; ERR err = NULL; r_obj* out = vec_as_subscript_opts(subscript, &opts, &err); KEEP2(out, err); out = r_result(out, err); FREE(2); return out; } // Arguments ------------------------------------------------------------------- static void stop_subscript_arg_type(const char* kind) { r_abort("`%s` must be one of \"cast\" or \"error\".", kind); } static enum subscript_type_action parse_subscript_arg_type(r_obj* x, const char* kind) { if (r_typeof(x) != R_TYPE_character || r_length(x) == 0) { stop_subscript_arg_type(kind); } const char* str = CHAR(STRING_ELT(x, 0)); if (!strcmp(str, "cast")) return SUBSCRIPT_TYPE_ACTION_CAST; if (!strcmp(str, "error")) return SUBSCRIPT_TYPE_ACTION_ERROR; stop_subscript_arg_type(kind); r_stop_unreachable(); } // Conditions ------------------------------------------------------------------ static r_obj* new_error_subscript_type(r_obj* subscript, const struct subscript_opts* opts, r_obj* body) { r_obj* logical = subscript_type_action_chr(opts->logical); r_obj* numeric = subscript_type_action_chr(opts->numeric); r_obj* character = subscript_type_action_chr(opts->character); subscript = KEEP(expr_protect(subscript)); r_obj* subscript_arg = KEEP(vctrs_arg(opts->subscript_arg)); r_obj* ffi_call = r_lazy_eval_protect(opts->call); r_obj* syms[] = { syms_i, syms_subscript_arg, syms_subscript_action, syms_call, syms_logical, syms_numeric, syms_character, syms_body, NULL }; r_obj* args[] = { subscript, subscript_arg, get_opts_action(opts), ffi_call, logical, numeric, character, body, NULL }; r_obj* call = KEEP(r_call_n(syms_new_error_subscript_type, syms, args)); r_obj* out = r_eval(call, vctrs_ns_env); FREE(3); return out; } // Init ---------------------------------------------------------------- void vctrs_init_subscript(r_obj* ns) { syms_new_error_subscript_type = r_sym("new_error_subscript_type"); syms_new_dbl_cast_subscript_body = r_sym("new_cnd_bullets_subscript_lossy_cast"); syms_lossy_err = r_sym("lossy_err"); fns_cnd_body_subscript_dim = r_eval(r_sym("cnd_body_subscript_dim"), ns); } static r_obj* fns_cnd_body_subscript_dim = NULL; static r_obj* syms_new_dbl_cast_subscript_body = NULL; static r_obj* syms_lossy_err = NULL; static r_obj* syms_new_error_subscript_type = NULL; vctrs/src/if-else.c0000644000176200001440000005563615157322033013712 0ustar liggesusers#include "if-else.h" #include "vctrs.h" #include "decl/if-else-decl.h" r_obj* ffi_vec_if_else( r_obj* ffi_condition, r_obj* ffi_true, r_obj* ffi_false, r_obj* ffi_missing, r_obj* ffi_ptype, r_obj* ffi_frame ) { struct r_lazy condition_arg_lazy = { .x = syms.condition_arg, .env = ffi_frame }; struct vctrs_arg condition_arg = new_lazy_arg(&condition_arg_lazy); struct r_lazy true_arg_lazy = { .x = syms.true_arg, .env = ffi_frame }; struct vctrs_arg true_arg = new_lazy_arg(&true_arg_lazy); struct r_lazy false_arg_lazy = { .x = syms.false_arg, .env = ffi_frame }; struct vctrs_arg false_arg = new_lazy_arg(&false_arg_lazy); struct r_lazy missing_arg_lazy = { .x = syms.missing_arg, .env = ffi_frame }; struct vctrs_arg missing_arg = new_lazy_arg(&missing_arg_lazy); struct r_lazy error_call = { .x = r_syms.error_call, .env = ffi_frame }; return vec_if_else( ffi_condition, ffi_true, ffi_false, ffi_missing, ffi_ptype, &condition_arg, &true_arg, &false_arg, &missing_arg, error_call ); } r_obj* vec_if_else( r_obj* condition, r_obj* true_, r_obj* false_, r_obj* missing, r_obj* ptype, struct vctrs_arg* p_condition_arg, struct vctrs_arg* p_true_arg, struct vctrs_arg* p_false_arg, struct vctrs_arg* p_missing_arg, struct r_lazy error_call ) { obj_check_vector(condition, VCTRS_ALLOW_NULL_no, p_condition_arg, error_call); check_condition_index(condition, p_condition_arg, error_call); const r_ssize size = r_length(condition); const bool has_missing = missing != r_null; obj_check_vector(true_, VCTRS_ALLOW_NULL_no, p_true_arg, error_call); obj_check_vector(false_, VCTRS_ALLOW_NULL_no, p_false_arg, error_call); if (has_missing) { obj_check_vector(missing, VCTRS_ALLOW_NULL_no, p_missing_arg, error_call); } ptype = KEEP(ptype_finalize( ptype, true_, false_, missing, has_missing, p_true_arg, p_false_arg, p_missing_arg, error_call )); r_obj* out; if (ptype_is_atomic(ptype)) { out = atomic_if_else( condition, true_, false_, missing, ptype, size, p_true_arg, p_false_arg, p_missing_arg, error_call, has_missing ); } else { out = generic_if_else( condition, true_, false_, missing, ptype, size, p_true_arg, p_false_arg, p_missing_arg, error_call ); } FREE(1); return out; } static r_obj* generic_if_else( r_obj* condition, r_obj* true_, r_obj* false_, r_obj* missing, r_obj* ptype, r_ssize size, struct vctrs_arg* p_true_arg, struct vctrs_arg* p_false_arg, struct vctrs_arg* p_missing_arg, struct r_lazy error_call ) { r_obj* conditions = KEEP(r_alloc_list(2)); r_list_poke(conditions, 0, condition); // TODO: This is another place where we could use a compact-condition // if `list_combine()` could take them as `indices`. Would be 3x less // memory, which probably does matter quite a bit for simple cases. r_list_poke(conditions, 1, r_lgl_invert(condition)); r_obj* values = KEEP(r_alloc_list(2)); r_list_poke(values, 0, true_); r_list_poke(values, 1, false_); // Must materialize tags to set names on the list, for use in error messages r_obj* true_arg = KEEP(vctrs_arg(p_true_arg)); r_obj* false_arg = KEEP(vctrs_arg(p_false_arg)); r_obj* names = r_alloc_character(2); r_attrib_poke_names(values, names); r_chr_poke(names, 0, r_chr_get(true_arg, 0)); r_chr_poke(names, 1, r_chr_get(false_arg, 0)); // We want `missing` to be used const enum list_combine_unmatched unmatched = LIST_COMBINE_UNMATCHED_default; // This won't matter, there are no overlaps, so we choose the default "cheaper" option const enum list_combine_multiple multiple = LIST_COMBINE_MULTIPLE_first; // All of `true`, `false`, and `missing` are size 1 or size `size` const enum assignment_slice_value slice_values = ASSIGNMENT_SLICE_VALUE_yes; // We want error messages from outer names, but don't want them on the output r_obj* name_spec = name_spec_inner; // No name repair const struct name_repair_opts* p_name_repair_opts = p_no_repair_opts; // Don't use outer names if any errors occur struct vctrs_arg* p_values_arg = vec_args.empty; struct vctrs_arg* p_conditions_arg = vec_args.empty; r_obj* out = list_combine( values, conditions, size, missing, unmatched, multiple, slice_values, ptype, name_spec, p_name_repair_opts, p_values_arg, p_conditions_arg, p_missing_arg, error_call ); FREE(4); return out; } static r_obj* atomic_if_else( r_obj* condition, r_obj* true_, r_obj* false_, r_obj* missing, r_obj* ptype, r_ssize size, struct vctrs_arg* p_true_arg, struct vctrs_arg* p_false_arg, struct vctrs_arg* p_missing_arg, struct r_lazy error_call, bool has_missing ) { int n_prot = 0; // `true`, `false`, and `missing` must all recycle to the size of `condition` const r_ssize true_size = vec_check_recyclable(true_, size, VCTRS_ALLOW_NULL_no, p_true_arg, error_call); const r_ssize false_size = vec_check_recyclable(false_, size, VCTRS_ALLOW_NULL_no, p_false_arg, error_call); const r_ssize missing_size = has_missing ? vec_check_recyclable(missing, size, VCTRS_ALLOW_NULL_no, p_missing_arg, error_call) : 0; // Grab names before casting as casting may drop them // https://github.com/r-lib/vctrs/issues/623 r_obj* true_names = KEEP_N(r_names(true_), &n_prot); r_obj* false_names = KEEP_N(r_names(false_), &n_prot); r_obj* missing_names = KEEP_N(has_missing ? r_names(missing) : r_null, &n_prot); const bool has_true_names = true_names != r_null; const bool has_false_names = false_names != r_null; const bool has_missing_names = missing_names != r_null; true_ = KEEP_N( vec_cast( true_, ptype, p_true_arg, vec_args.empty, error_call ), &n_prot ); false_ = KEEP_N( vec_cast( false_, ptype, p_false_arg, vec_args.empty, error_call ), &n_prot ); if (has_missing) { missing = KEEP_N( vec_cast( missing, ptype, p_missing_arg, vec_args.empty, error_call ), &n_prot ); } r_obj* out = atomic_if_else_switch( r_typeof(ptype), condition, true_, false_, missing, size, true_size, false_size, missing_size, true_names, false_names, missing_names, has_missing, has_true_names, has_false_names, has_missing_names ); FREE(n_prot); return out; } // Extremely sensitive loop! // // In our testing, using a ternary style works best, and somehow // works even better than using explicit if/else #define ATOMIC_IF_ELSE_LOOP( \ ELT_TRUE, \ ELT_FALSE, \ ELT_MISSING, \ CTYPE, \ DEREF, \ SET \ ) do { \ DEREF; \ for (r_ssize i = 0; i < size; ++i) { \ const int cnd = v_condition[i]; \ CTYPE elt = (cnd == 1) ? ELT_TRUE : (cnd == 0) ? ELT_FALSE : ELT_MISSING; \ SET; \ } \ } while (0) // `r_chr_poke()` takes the majority of the time in the named case. We don't // want to slow down the main data loop with extra branching, so we only do this // if have to and just loop through a second time. #define ATOMIC_IF_ELSE_NAMES_LOOP( \ ELT_TRUE, \ ELT_FALSE, \ ELT_MISSING \ ) do { \ for (r_ssize i = 0; i < size; ++i) { \ const int cnd = v_condition[i]; \ r_obj* elt = (cnd == 1) ? ELT_TRUE : (cnd == 0) ? ELT_FALSE : ELT_MISSING; \ r_chr_poke(names, i, elt); \ } \ } while (0) // Core routing logic for atomic if-else // // Yes, this is insane. Yes, it is worth it to maximally avoid branching. #define ATOMIC_IF_ELSE( \ CTYPE, \ DEREF, \ SET, \ CONST_DEREF, \ ALLOC, \ MISSING \ ) do { \ const int* v_condition = r_lgl_cbegin(condition); \ \ if (!has_missing) { \ missing = MISSING; \ missing_size = 1; \ has_missing_names = false; \ } \ \ const bool true_recycles = true_size == 1; \ const bool false_recycles = false_size == 1; \ const bool missing_recycles = missing_size == 1; \ \ CTYPE const* v_true = CONST_DEREF(true_); \ CTYPE const* v_false = CONST_DEREF(false_); \ CTYPE const* v_missing = CONST_DEREF(missing); \ \ const bool has_names = has_true_names || has_false_names || has_missing_names; \ \ true_names = has_true_names ? true_names : r_chrs.empty_string; \ false_names = has_false_names ? false_names : r_chrs.empty_string; \ missing_names = has_missing_names ? missing_names : r_chrs.empty_string; \ \ const bool true_names_recycles = has_true_names ? true_recycles : true; \ const bool false_names_recycles = has_false_names ? false_recycles : true; \ const bool missing_names_recycles = has_missing_names ? missing_recycles : true; \ \ r_obj* const* v_true_names = r_chr_cbegin(true_names); \ r_obj* const* v_false_names = r_chr_cbegin(false_names); \ r_obj* const* v_missing_names = r_chr_cbegin(missing_names); \ \ r_obj* out = KEEP(ALLOC(size)); \ \ if (true_recycles) { \ if (false_recycles) { \ if (missing_recycles) { \ ATOMIC_IF_ELSE_LOOP(v_true[0], v_false[0], v_missing[0], CTYPE, DEREF, SET); \ } else { \ ATOMIC_IF_ELSE_LOOP(v_true[0], v_false[0], v_missing[i], CTYPE, DEREF, SET); \ } \ } else { \ if (missing_recycles) { \ ATOMIC_IF_ELSE_LOOP(v_true[0], v_false[i], v_missing[0], CTYPE, DEREF, SET); \ } else { \ ATOMIC_IF_ELSE_LOOP(v_true[0], v_false[i], v_missing[i], CTYPE, DEREF, SET); \ } \ } \ } else { \ if (false_recycles) { \ if (missing_recycles) { \ ATOMIC_IF_ELSE_LOOP(v_true[i], v_false[0], v_missing[0], CTYPE, DEREF, SET); \ } else { \ ATOMIC_IF_ELSE_LOOP(v_true[i], v_false[0], v_missing[i], CTYPE, DEREF, SET); \ } \ } else { \ if (missing_recycles) { \ ATOMIC_IF_ELSE_LOOP(v_true[i], v_false[i], v_missing[0], CTYPE, DEREF, SET); \ } else { \ ATOMIC_IF_ELSE_LOOP(v_true[i], v_false[i], v_missing[i], CTYPE, DEREF, SET); \ } \ } \ } \ \ if (has_names) { \ r_obj* names = r_alloc_character(size); \ r_attrib_poke_names(out, names); \ \ if (true_names_recycles) { \ if (false_names_recycles) { \ if (missing_names_recycles) { \ ATOMIC_IF_ELSE_NAMES_LOOP(v_true_names[0], v_false_names[0], v_missing_names[0]); \ } else { \ ATOMIC_IF_ELSE_NAMES_LOOP(v_true_names[0], v_false_names[0], v_missing_names[i]); \ } \ } else { \ if (missing_names_recycles) { \ ATOMIC_IF_ELSE_NAMES_LOOP(v_true_names[0], v_false_names[i], v_missing_names[0]); \ } else { \ ATOMIC_IF_ELSE_NAMES_LOOP(v_true_names[0], v_false_names[i], v_missing_names[i]); \ } \ } \ } else { \ if (false_names_recycles) { \ if (missing_names_recycles) { \ ATOMIC_IF_ELSE_NAMES_LOOP(v_true_names[i], v_false_names[0], v_missing_names[0]); \ } else { \ ATOMIC_IF_ELSE_NAMES_LOOP(v_true_names[i], v_false_names[0], v_missing_names[i]); \ } \ } else { \ if (missing_names_recycles) { \ ATOMIC_IF_ELSE_NAMES_LOOP(v_true_names[i], v_false_names[i], v_missing_names[0]); \ } else { \ ATOMIC_IF_ELSE_NAMES_LOOP(v_true_names[i], v_false_names[i], v_missing_names[i]); \ } \ } \ } \ } \ \ FREE(1); \ return out; \ } while (0) static r_obj* atomic_if_else_switch( enum r_type type, r_obj* condition, r_obj* true_, r_obj* false_, r_obj* missing, r_ssize size, r_ssize true_size, r_ssize false_size, r_ssize missing_size, r_obj* true_names, r_obj* false_names, r_obj* missing_names, bool has_missing, bool has_true_names, bool has_false_names, bool has_missing_names ) { switch (type) { case R_TYPE_logical: ATOMIC_IF_ELSE( int, int* v_out = r_lgl_begin(out), v_out[i] = elt, r_lgl_cbegin, r_alloc_logical, vctrs_shared_missing_lgl ); case R_TYPE_integer: ATOMIC_IF_ELSE( int, int* v_out = r_int_begin(out), v_out[i] = elt, r_int_cbegin, r_alloc_integer, vctrs_shared_missing_int ); case R_TYPE_double: ATOMIC_IF_ELSE( double, double* v_out = r_dbl_begin(out), v_out[i] = elt, r_dbl_cbegin, r_alloc_double, vctrs_shared_missing_dbl ); case R_TYPE_complex: ATOMIC_IF_ELSE( r_complex, r_complex* v_out = r_cpl_begin(out), v_out[i] = elt, r_cpl_cbegin, r_alloc_complex, vctrs_shared_missing_cpl ); case R_TYPE_raw: ATOMIC_IF_ELSE( Rbyte, Rbyte* v_out = r_raw_begin(out), v_out[i] = elt, r_raw_cbegin, r_alloc_raw, vctrs_shared_missing_raw ); case R_TYPE_character: ATOMIC_IF_ELSE( r_obj*, NULL, r_chr_poke(out, i, elt), r_chr_cbegin, r_alloc_character, vctrs_shared_missing_chr ); case R_TYPE_list: ATOMIC_IF_ELSE( r_obj*, NULL, r_list_poke(out, i, elt), r_list_cbegin, r_alloc_list, vctrs_shared_missing_list ); default: r_stop_unreachable(); } } #undef ATOMIC_IF_ELSE_LOOP #undef ATOMIC_IF_ELSE_NAMES_LOOP #undef ATOMIC_IF_ELSE // Determine if `ptype` can use the fast path or not // // We are extremely strict here, only allowing the fast // path for the absolute simplest cases of: // // - Atomic vectors, i.e. no classed objects // - No dim, i.e. no matrix or array // // Notably having extraneous attributes on an atomic vector is allowed, but they // are dropped from the output, which is in line with our general belief that // extraneous attributes are not part of the ptype. // // Names are always retained. static bool ptype_is_atomic(r_obj* ptype) { if (r_is_object(ptype)) { return false; } if (has_dim(ptype)) { return false; } return true; } static r_obj* ptype_finalize( r_obj* ptype, r_obj* true_, r_obj* false_, r_obj* missing, bool has_missing, struct vctrs_arg* p_true_arg, struct vctrs_arg* p_false_arg, struct vctrs_arg* p_missing_arg, struct r_lazy error_call ) { if (ptype != r_null) { // Performs scalar checks and whatnot return vec_ptype_final(ptype, vec_args.ptype, error_call); } int n_prot = 0; // Initialize `left` to point to `true`. // If we don't initialize it, there may be rare corner cases where it never // gets set by `vec_ptype2()`, in which case we don't want to hit the // unreachable error (bad user experience). int left = 1; // Common type of `true` and `false` ptype = KEEP_N( vec_ptype2( true_, false_, p_true_arg, p_false_arg, error_call, S3_FALLBACK_false, &left ), &n_prot ); // Mix in `missing` if needed if (has_missing) { // Same logic as `vec_ptype_common()` // 1 = `x` won // 0 = `y` won // -1 = same type, stick with `x` struct vctrs_arg* p_ptype_arg; switch (left) { case 1: p_ptype_arg = p_true_arg; break; case 0: p_ptype_arg = p_false_arg; break; case -1: p_ptype_arg = p_true_arg; break; default: r_stop_unreachable(); } ptype = KEEP_N( vec_ptype2( ptype, missing, p_ptype_arg, p_missing_arg, error_call, S3_FALLBACK_false, &left ), &n_prot ); } // Finalize on the way out ptype = vec_ptype_finalise(ptype); FREE(n_prot); return ptype; } vctrs/src/strides.h0000644000176200001440000001405715060045711014036 0ustar liggesusers#ifndef VCTRS_STRIDES_H #define VCTRS_STRIDES_H #include "vctrs-core.h" #include "utils.h" #include "dim.h" /* * Array slicing works by treating the array as a 1D structure, and transforming * the `index` passed from R into a series of 1D indices that are used to * extract elements from `x` into the new result. * * Strides represent the offset between elements in the same dimension. For * a (2, 2, 2) array, the strides would be [1, 2, 4]. This means that if you * flattened this 3D array to 1D in a column major order, there is 1 space * between row elements, 2 spaces between column elements and 4 spaces between * elements in the third dimension. In practice, we only need the shape strides * since the first stride is always 1, so `vec_strides()` only returns the shape * strides. Strides are computed as a cumulative product of the `dim`, with an * initial value of `1`, this is what `vec_strides()` does. * * Using the strides, any array index can be converted to a 1D index. * This is what `vec_strided_loc()` does. In a (2, 2, 2) array, to find * the location at the index [1, 0, 1] (C-based index, 2nd row, 1st col, * 2nd elem in 3rd dim) you compute a sum product between the array index * and the strides. So it looks like: * loc = 1 * (1) + 0 * (2) + 1 * (4) = 5 * (loc is a C-based index into `x`) * Since the first stride is always one, we leave it off and just do: * loc = 1 + 0 * (2) + 1 * (4) = 5 * * Example: * x = (3, 3, 2) array * vec_slice(x, 2:3) * * strides = [3, 9] // (shape strides) * * Indices are C-based * * | array index | x index | how? * ------------------------------------------------------- * out[0] | [1, 0, 0] | 1 | 1 + 0 * (3) + 0 * (9) * out[1] | [2, 0, 0] | 2 | * out[2] | [1, 1, 0] | 4 | * ... | ... | ... | * out[9] | [2, 1, 1] | 14 | 2 + 1 * (3) + 1 * (9) * out[10] | [1, 2, 1] | 16 | * out[11] | [2, 2, 1] | 17 | * ^ ^ ^ * | \/ * | |- shape_index * |- size_index */ struct strides_info { SEXP dim; const int* p_dim; SEXP strides; const int* p_strides; SEXP index; const int* p_index; SEXP steps; const int* p_steps; SEXP shape_index; int* p_shape_index; R_len_t dim_n; R_len_t shape_n; R_len_t index_n; R_len_t shape_elem_n; }; #define PROTECT_STRIDES_INFO(info, n) do { \ PROTECT((info)->dim); \ PROTECT((info)->strides); \ PROTECT((info)->index); \ PROTECT((info)->steps); \ PROTECT((info)->shape_index); \ *(n) += 5; \ } while(0) static inline SEXP vec_strides(const int* p_dim, const R_len_t shape_n) { SEXP strides = PROTECT(Rf_allocVector(INTSXP, shape_n)); int* p_strides = INTEGER(strides); int stride = 1; for (int i = 0; i < shape_n; ++i) { stride *= p_dim[i]; p_strides[i] = stride; } UNPROTECT(1); return strides; } static inline SEXP vec_steps(const int* p_index, const R_len_t index_n) { SEXP steps = PROTECT(Rf_allocVector(INTSXP, index_n)); int* p_steps = INTEGER(steps); // Indices come in 1-based int index_previous = 1; for (R_len_t i = 0; i < index_n; ++i) { const int index_current = p_index[i]; if (index_current == NA_INTEGER) { p_steps[i] = NA_INTEGER; continue; } p_steps[i] = index_current - index_previous; index_previous = index_current; } UNPROTECT(1); return steps; } static inline R_len_t vec_strided_loc(const int* p_shape_index, const int* p_strides, const R_len_t shape_n) { R_len_t loc = 0; for (R_len_t i = 0; i < shape_n; ++i) { loc += p_strides[i] * p_shape_index[i]; } return loc; } // Increment the `shape_index` value. This iterates through the array like: // [size, 0, 0] // [size, 1, 0] // [size, 0, 1] // [size, 1, 1] // ... static inline void vec_shape_index_increment(struct strides_info* p_info) { for (int j = 0; j < p_info->shape_n; ++j) { p_info->p_shape_index[j]++; if (p_info->p_shape_index[j] < p_info->p_dim[j + 1]) { break; } p_info->p_shape_index[j] = 0; } } // Given array dimensions like: // [i, j, k, l] // this gives you a count of: // j * k * l // which tells you the number of times an outer loop would // need to iterate to walk over the array if some inner loop // handled iteration over `i`, i.e. size. static inline R_len_t vec_shape_elem_n(const int* p_dim, const R_len_t dim_n) { R_len_t shape_elem_n = 1; for (int i = 1; i < dim_n; ++i) { shape_elem_n *= p_dim[i]; } return shape_elem_n; } static inline struct strides_info new_strides_info(SEXP x, SEXP index) { SEXP dim = PROTECT(vec_dim(x)); const int* p_dim = INTEGER_RO(dim); R_len_t dim_n = Rf_length(dim); R_len_t shape_n = dim_n - 1; R_len_t index_n = vec_subscript_size(index); SEXP strides = PROTECT(vec_strides(p_dim, shape_n)); const int* p_strides = INTEGER_RO(strides); const int* p_index = INTEGER_RO(index); // If using a compact rep/seq, the `steps` won't be used, but we still // need to put something in the struct SEXP steps; if (is_compact_rep(index) || is_compact_seq(index)) { steps = r_globals.empty_int; } else { steps = vec_steps(p_index, index_n); } PROTECT(steps); const int* p_steps = INTEGER_RO(steps); // Initialize `shape_index` to the first element SEXP shape_index = PROTECT(Rf_allocVector(INTSXP, shape_n)); int* p_shape_index = INTEGER(shape_index); for (int i = 0; i < shape_n; ++i) { p_shape_index[i] = 0; } R_len_t shape_elem_n = vec_shape_elem_n(p_dim, dim_n); struct strides_info info = { .dim = dim, .p_dim = p_dim, .strides = strides, .p_strides = p_strides, .index = index, .p_index = p_index, .steps = steps, .p_steps = p_steps, .shape_index = shape_index, .p_shape_index = p_shape_index, .dim_n = dim_n, .shape_n = shape_n, .index_n = index_n, .shape_elem_n = shape_elem_n }; UNPROTECT(4); return info; } #endif vctrs/src/globals.h0000644000176200001440000000457515156001116014005 0ustar liggesusers#ifndef VCTRS_GLOBALS_H #define VCTRS_GLOBALS_H #include struct syms { r_obj* arg; r_obj* condition_arg; r_obj* conditions_arg; r_obj* default_arg; r_obj* dot_arg; r_obj* dot_call; r_obj* dot_error_arg; r_obj* dot_error_call; r_obj* false_arg; r_obj* from_arg; r_obj* haystack_arg; r_obj* missing_arg; r_obj* indices_arg; r_obj* needles_arg; r_obj* recurse; r_obj* repair_arg; r_obj* times_arg; r_obj* to_arg; r_obj* true_arg; r_obj* value_arg; r_obj* values_arg; r_obj* vec_default_cast; r_obj* vec_slice_altrep; r_obj* vec_slice_dispatch_integer64; r_obj* vec_slice_fallback; r_obj* vec_slice_fallback_integer64; r_obj* x_arg; r_obj* y_arg; }; // These structs must be in sync as their elements are defined // together by the `INIT_STRING()` macro struct strings { r_obj* AsIs; r_obj* repair; r_obj* location; r_obj* condition; }; struct chrs { r_obj* AsIs; r_obj* repair; r_obj* location; r_obj* condition; }; struct fns { r_obj* vec_slice_altrep; r_obj* vec_slice_dispatch_integer64; r_obj* vec_slice_fallback; r_obj* vec_slice_fallback_integer64; }; struct vec_args { struct vctrs_arg* dot_name_repair; struct vctrs_arg* dot_ptype; struct vctrs_arg* dot_size; struct vctrs_arg* empty; struct vctrs_arg* i; struct vctrs_arg* max_fill; struct vctrs_arg* n; struct vctrs_arg* value; struct vctrs_arg* x; struct vctrs_arg* y; struct vctrs_arg* indices; struct vctrs_arg* sizes; struct vctrs_arg* ptype; struct vctrs_arg* size; }; struct lazy_args { struct r_lazy dot_name_repair; }; struct lazy_calls { struct r_lazy vec_assign; struct r_lazy vec_assign_seq; struct r_lazy vec_init; struct r_lazy vec_ptype_finalise; struct r_lazy vec_recycle; struct r_lazy vec_size; }; extern struct syms syms; extern struct strings strings; extern struct chrs chrs; extern struct fns fns; extern struct vec_args vec_args; extern struct lazy_args lazy_args; extern struct lazy_calls lazy_calls; extern r_obj* vctrs_shared_empty_date; extern r_obj* vctrs_shared_empty_uns; extern Rcomplex vctrs_shared_na_cpl; extern r_obj* vctrs_shared_missing_lgl; extern r_obj* vctrs_shared_missing_int; extern r_obj* vctrs_shared_missing_dbl; extern r_obj* vctrs_shared_missing_cpl; extern r_obj* vctrs_shared_missing_raw; extern r_obj* vctrs_shared_missing_chr; extern r_obj* vctrs_shared_missing_list; #endif vctrs/src/slice-chop.c0000644000176200001440000004234215157242340014404 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" /* * Index manager/generator for chopping purposes * * There are 3 types of possible indices: * - If `indices = NULL, sizes = NULL`, then we use a sequential size 1 index * that just increments by 1 from `0` to `vec_size(x) - 1`. * - If `indices` is supplied, then each element of `indices` is an integer * vector of locations to chop with. * - If `sizes` is supplied, then each element of `sizes` is the size of the * current slice to chop. The sizes are accumulated in order to get the * start location of the next slice. * * - Generate the next index with `indices_next()`. * - Generate the output size with `indices_out_size()`. * * @member shelter The shelter to protect the entire chop indices manager. * @member indices, v_indices * - If `NULL`, then `indices` aren't being used. `v_indices` is set to * `NULL`. * - Otherwise, a list of integer vector indices to chop with. `v_indices` is * set to `r_list_cbegin(indices)`. * @member sizes, v_sizes * - If `NULL`, then `sizes` aren't being used. `v_sizes` is set to `NULL`. * - Otherwise, an integer vector of sequential sizes to chop with. `v_sizes` * is set to `r_int_cbegin(sizes)`. * @member index, p_index * - If neither `indices` nor `sizes` are provided, `index` is a scalar * integer vector that starts at 0 and is incremented by 1 at every * iteration. `p_index` points to `r_int_begin(index)` and is used to * perform the increment. * - If `indices` is provided, this is set to the i-th element of `indices` * at each iteration, and `p_index` is set to `NULL`. * - If `sizes` is provided, this is a compact-seq representing the i-th * slice. `p_index` points to `r_int_begin(index)` and is used to updated * the compact-seq at each iteration. * @member has_indices Whether or not `indices` was provided. * @member has_sizes Whether or not `sizes` was provided. * @member loc The current iteration value. */ struct vctrs_chop_indices { r_obj* shelter; r_obj* indices; r_obj* const* v_indices; r_obj* sizes; const int* v_sizes; r_obj* index; int* p_index; bool has_indices; bool has_sizes; r_ssize loc; }; #include "decl/slice-chop-decl.h" // ----------------------------------------------------------------------------- static struct vctrs_chop_indices* new_chop_indices(r_obj* x, r_obj* indices, r_obj* sizes) { r_obj* shelter = KEEP(r_alloc_list(4)); r_obj* self = r_alloc_raw(sizeof(struct vctrs_chop_indices)); r_list_poke(shelter, 0, self); struct vctrs_chop_indices* p_indices = r_raw_begin(self); p_indices->shelter = shelter; p_indices->indices = indices; r_list_poke(p_indices->shelter, 1, p_indices->indices); p_indices->has_indices = p_indices->indices != r_null; p_indices->sizes = sizes; r_list_poke(p_indices->shelter, 2, p_indices->sizes); p_indices->has_sizes = p_indices->sizes != r_null; if (p_indices->has_indices) { p_indices->v_indices = r_list_cbegin(p_indices->indices); p_indices->v_sizes = NULL; p_indices->index = r_null; r_list_poke(p_indices->shelter, 3, p_indices->index); p_indices->p_index = NULL; } else if (p_indices->has_sizes) { p_indices->v_indices = NULL; p_indices->v_sizes = r_int_cbegin(p_indices->sizes); p_indices->index = compact_seq(0, 0, true); r_list_poke(p_indices->shelter, 3, p_indices->index); p_indices->p_index = r_int_begin(p_indices->index); } else { p_indices->v_indices = NULL; p_indices->v_sizes = NULL; p_indices->index = r_int(0); r_list_poke(p_indices->shelter, 3, p_indices->index); p_indices->p_index = r_int_begin(p_indices->index); } p_indices->loc = 0; FREE(1); return p_indices; } /* * Generate the next `index` * * You can assume that the returned `index` is always protected by `p_indices`, * so the caller doesn't need to protect it. */ static inline r_obj* indices_next(struct vctrs_chop_indices* p_indices) { const r_ssize loc = p_indices->loc; ++(p_indices->loc); if (p_indices->has_indices) { return p_indices->v_indices[loc]; } else if (p_indices->has_sizes) { const r_ssize start = p_indices->p_index[0] + p_indices->p_index[1]; const r_ssize size = p_indices->v_sizes[loc]; const bool increasing = true; init_compact_seq(p_indices->p_index, start, size, increasing); return p_indices->index; } else { *p_indices->p_index = loc + 1; return p_indices->index; } } static inline r_ssize indices_out_size(struct vctrs_chop_indices* p_indices, r_obj* x) { if (p_indices->has_indices) { return r_length(p_indices->indices); } else if (p_indices->has_sizes) { return r_length(p_indices->sizes); } else { return vec_size(x); } } // ----------------------------------------------------------------------------- r_obj* ffi_vec_chop_seq(r_obj* x, r_obj* starts, r_obj* sizes, r_obj* increasings) { int* v_starts = r_int_begin(starts); int* v_sizes = r_int_begin(sizes); int* v_increasings = r_lgl_begin(increasings); const r_ssize n = r_length(starts); r_obj* indices = KEEP(r_alloc_list(n)); for (r_ssize i = 0; i < n; ++i) { r_obj* index = compact_seq(v_starts[i], v_sizes[i], v_increasings[i]); r_list_poke(indices, i, index); } r_obj* out = KEEP(vec_chop_unsafe(x, indices, r_null)); FREE(2); return out; } r_obj* ffi_vec_chop(r_obj* x, r_obj* indices, r_obj* sizes) { return vec_chop(x, indices, sizes); } r_obj* vec_chop(r_obj* x, r_obj* indices, r_obj* sizes) { const r_ssize n = vec_size(x); r_obj* names = KEEP(vec_names(x)); if (indices != r_null && sizes != r_null) { r_abort_lazy_call(r_lazy_null, "Can't supply both `indices` and `sizes`."); } if (indices != r_null) { const bool allow_compact = false; indices = list_as_locations(indices, n, names, allow_compact); } KEEP(indices); if (sizes != r_null) { sizes = vec_as_chop_sizes(sizes, n); } KEEP(sizes); r_obj* out = vec_chop_unsafe(x, indices, sizes); FREE(3); return out; } // Performance variant that doesn't check the types or values of `indices` / `sizes` r_obj* vec_chop_unsafe(r_obj* x, r_obj* indices, r_obj* sizes) { struct vctrs_proxy_info info = vec_proxy_info(x); KEEP(info.inner); struct vctrs_chop_indices* p_indices = new_chop_indices(x, indices, sizes); KEEP(p_indices->shelter); r_obj* out = vec_chop_base(x, info, p_indices); FREE(2); return out; } static r_obj* vec_chop_base(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices) { if (vec_requires_fallback(x, info)) { // Fallback to `[` if the class doesn't implement a proxy. This is // to be maximally compatible with existing classes. if (info.type == VCTRS_TYPE_scalar) { r_abort_lazy_call(r_lazy_null, "Can't slice a scalar"); } if (has_dim(x)) { return chop_fallback_shaped(x, p_indices); } else { return chop_fallback(x, p_indices); } } switch (info.type) { case VCTRS_TYPE_logical: case VCTRS_TYPE_integer: case VCTRS_TYPE_double: case VCTRS_TYPE_complex: case VCTRS_TYPE_character: case VCTRS_TYPE_raw: case VCTRS_TYPE_list: { if (has_dim(x)) { return chop_shaped(x, info, p_indices); } else { return chop(x, info, p_indices); } } case VCTRS_TYPE_dataframe: { return chop_df(x, info, p_indices); } default: obj_check_vector(x, VCTRS_ALLOW_NULL_no, vec_args.empty, r_lazy_null); stop_unimplemented_vctrs_type("vec_chop_base", info.type); } } static r_obj* chop(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices) { r_obj* proxy = info.inner; r_obj* names = KEEP(r_names(proxy)); const enum vctrs_type type = info.type; const r_ssize out_size = indices_out_size(p_indices, proxy); r_obj* out = KEEP(r_alloc_list(out_size)); // Treat `elt` as owned after slicing (we also poke its names directly). // `vec_proxy_info()` doesn't recursively proxy. const struct vec_restore_opts elt_restore_opts = { .ownership = VCTRS_OWNERSHIP_shallow, .recursively_proxied = false }; for (r_ssize i = 0; i < out_size; ++i) { r_obj* index = indices_next(p_indices); // Always materialize ALTREP vectors when chopping to avoid inefficiently // creating a large amount of small ALTREP objects that are used downstream. // This is a heuristic and we should also be on the lookout for cases where // we chop to create a small amount of large ALTREP objects that are // quickly discarded (#1450). r_obj* elt = KEEP(vec_slice_base( type, proxy, index, VCTRS_MATERIALIZE_true )); if (names != r_null) { r_obj* elt_names = slice_names(names, index); r_attrib_poke_names(elt, elt_names); } elt = vec_restore_opts(elt, x, &elt_restore_opts); r_list_poke(out, i, elt); FREE(1); } FREE(2); return out; } static r_obj* chop_df(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices) { r_obj* proxy = info.inner; r_obj* const* v_proxy = r_list_cbegin(proxy); const r_ssize n_cols = r_length(proxy); r_obj* col_names = KEEP(r_names(proxy)); r_obj* row_names = KEEP(df_rownames(proxy)); const bool has_row_names = r_typeof(row_names) == R_TYPE_character; const r_ssize out_size = indices_out_size(p_indices, proxy); r_obj* out = KEEP(r_alloc_list(out_size)); r_obj* const* v_out = r_list_cbegin(out); // Pre-load the `out` container with empty bare data frames for (r_ssize i = 0; i < out_size; ++i) { r_obj* elt = r_alloc_list(n_cols); r_list_poke(out, i, elt); r_attrib_poke_names(elt, col_names); r_obj* index = indices_next(p_indices); const r_ssize size = vec_subscript_size(index); init_data_frame(elt, size); if (has_row_names) { r_obj* elt_row_names = slice_rownames(row_names, index); r_attrib_poke(elt, r_syms.row_names, elt_row_names); } } r_obj* indices = p_indices->indices; r_obj* sizes = p_indices->sizes; // Chop each column according to the indices, and then assign the results // into the appropriate data frame column in the `out` list for (r_ssize i = 0; i < n_cols; ++i) { r_obj* col = v_proxy[i]; r_obj* col_chopped = KEEP(vec_chop_unsafe(col, indices, sizes)); r_obj* const* v_col_chopped = r_list_cbegin(col_chopped); for (r_ssize j = 0; j < out_size; ++j) { r_obj* elt = v_out[j]; r_list_poke(elt, i, v_col_chopped[j]); } FREE(1); } // Each data frame container is owned by us. // Columns aren't necessarily owned by us, but that // doesn't matter because we don't recursively restore. // `vec_proxy_info()` doesn't recursively proxy. const struct vec_restore_opts elt_restore_opts = { .ownership = VCTRS_OWNERSHIP_shallow, .recursively_proxied = false }; // Restore each data frame for (r_ssize i = 0; i < out_size; ++i) { r_obj* elt = v_out[i]; elt = vec_restore_opts(elt, x, &elt_restore_opts); r_list_poke(out, i, elt); } FREE(3); return out; } static r_obj* chop_shaped(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices) { r_obj* proxy = info.inner; const enum vctrs_type type = info.type; r_obj* dim_names = KEEP(r_dim_names(proxy)); r_obj* row_names = r_null; if (dim_names != r_null) { row_names = r_list_get(dim_names, 0); } const r_ssize out_size = indices_out_size(p_indices, proxy); r_obj* out = KEEP(r_alloc_list(out_size)); // Treat each `elt` as owned (we also poke its dim names) // `vec_proxy_info()` doesn't recursively proxy. const struct vec_restore_opts elt_restore_opts = { .ownership = VCTRS_OWNERSHIP_shallow, .recursively_proxied = false }; for (r_ssize i = 0; i < out_size; ++i) { r_obj* index = indices_next(p_indices); r_obj* elt = KEEP(vec_slice_shaped(type, proxy, index)); if (dim_names != r_null) { if (row_names != r_null) { // Required to slice row names to the right size before poking to avoid // erroring on the dimnames length check in `Rf_setAttrib()` r_obj* new_dim_names = KEEP(r_clone(dim_names)); r_obj* new_row_names = slice_names(row_names, index); r_list_poke(new_dim_names, 0, new_row_names); r_attrib_poke_dim_names(elt, new_dim_names); FREE(1); } else { r_attrib_poke_dim_names(elt, dim_names); } } elt = vec_restore_opts(elt, x, &elt_restore_opts); r_list_poke(out, i, elt); FREE(1); } FREE(2); return out; } static r_obj* chop_fallback(r_obj* x, struct vctrs_chop_indices* p_indices) { // Evaluate in a child of the global environment to allow dispatch // to custom functions. We define `[` to point to its base // definition to ensure consistent look-up. This is the same logic // as in `vctrs_dispatch_n()`, reimplemented here to allow repeated // evaluations in a loop. r_obj* env = KEEP(r_alloc_empty_environment(r_envs.global)); r_env_bind(env, syms_x, x); // Construct call with symbols, not values, for performance. // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 // objects to ensure correct slicing with `NA_integer_`. r_obj* call; if (is_integer64(x)) { call = KEEP(r_call3(syms.vec_slice_dispatch_integer64, syms_x, syms_i)); r_env_bind(env, syms.vec_slice_dispatch_integer64, fns.vec_slice_dispatch_integer64); } else { call = KEEP(r_call3(syms_bracket, syms_x, syms_i)); r_env_bind(env, syms_bracket, fns_bracket); } // Sliced `elt` comes from R, so is foreign. Technically not proxied at all, // so "restoring" is a bit of a hack, but we only restore if it looks like the // `[` result is missing attributes. struct vec_restore_opts elt_restore_opts = { .ownership = VCTRS_OWNERSHIP_foreign, .recursively_proxied = false }; const r_ssize out_size = indices_out_size(p_indices, x); r_obj* out = KEEP(r_alloc_list(out_size)); for (r_ssize i = 0; i < out_size; ++i) { r_obj* index = indices_next(p_indices); index = KEEP(vec_subscript_materialize(index)); // Update `i` binding with the new index value r_env_bind(env, syms_i, index); r_obj* elt = KEEP(r_eval(call, env)); if (!vec_is_restored(elt, x)) { // No guarantee that we own `elt` here elt = vec_restore_opts(elt, x, &elt_restore_opts); } r_list_poke(out, i, elt); FREE(2); } FREE(3); return out; } static r_obj* chop_fallback_shaped(r_obj* x, struct vctrs_chop_indices* p_indices) { const r_ssize out_size = indices_out_size(p_indices, x); r_obj* out = KEEP(r_alloc_list(out_size)); for (r_ssize i = 0; i < out_size; ++i) { r_obj* index = indices_next(p_indices); index = KEEP(vec_subscript_materialize(index)); // `vec_slice_fallback()` will also `vec_restore()` for us r_obj* elt = vec_slice_fallback(x, index); r_list_poke(out, i, elt); FREE(1); } FREE(1); return out; } // ----------------------------------------------------------------------------- r_obj* list_as_locations(r_obj* indices, r_ssize n, r_obj* names, bool allow_compact) { if (r_typeof(indices) != R_TYPE_list) { r_abort_lazy_call(r_lazy_null, "`indices` must be a list of index values, or `NULL`."); } indices = KEEP(r_clone_referenced(indices)); const r_ssize size = r_length(indices); r_obj* const* v_indices = r_list_cbegin(indices); // Restrict index values to positive integer locations // Also, notably, the `index` vector can't change size, i.e. `0` and `NA` aren't dropped. const struct location_opts opts = { .subscript_opts = { .logical = SUBSCRIPT_TYPE_ACTION_ERROR, .numeric = SUBSCRIPT_TYPE_ACTION_CAST, .character = SUBSCRIPT_TYPE_ACTION_ERROR }, .missing = SUBSCRIPT_MISSING_PROPAGATE, .loc_negative = LOC_NEGATIVE_ERROR, .loc_oob = LOC_OOB_ERROR, .loc_zero = LOC_ZERO_ERROR }; for (r_ssize i = 0; i < size; ++i) { r_obj* index = v_indices[i]; if (is_compact_seq(index)) { if (allow_compact) { // Allow `compact_seq` to pass through untouched, // assume caller can handle them natively continue; } else { // We don't want them to slip through when not handled natively r_stop_internal("`compact_seq` are not allowed."); } } index = vec_as_location_opts(index, n, names, &opts); r_list_poke(indices, i, index); } FREE(1); return indices; } static r_obj* vec_as_chop_sizes(r_obj* sizes, r_ssize size) { sizes = KEEP(vec_cast( sizes, r_globals.empty_int, vec_args.sizes, vec_args.empty, r_lazy_null )); const r_ssize n_sizes = r_length(sizes); const int* v_sizes = r_int_cbegin(sizes); r_ssize total = 0; for (r_ssize i = 0; i < n_sizes; ++i) { const int elt = v_sizes[i]; if (elt == r_globals.na_int) { r_abort_lazy_call(r_lazy_null, "`sizes` can't contain missing values."); } else if (elt < 0) { r_abort_lazy_call(r_lazy_null, "`sizes` can't contain negative sizes."); } else if (elt > size) { r_abort_lazy_call(r_lazy_null, "`sizes` can't contain sizes larger than %i.", size); } total += elt; } if (total != size) { r_abort_lazy_call(r_lazy_null, "`sizes` must sum to size %i, not size %i.", size, total); } FREE(1); return sizes; } vctrs/src/arg-counter.c0000644000176200001440000000627515157322033014607 0ustar liggesusers#include "vctrs.h" #include "arg-counter.h" static struct counters* new_counters(r_obj* names, struct vctrs_arg* p_curr_arg, struct vctrs_arg* p_parent_arg) { // This protects `shelter` and `names` r_obj* shelter = KEEP(r_alloc_list(COUNTERS_SHELTER_N)); r_obj* data_shelter = r_alloc_raw(sizeof(struct counters)); r_list_poke(shelter, COUNTERS_SHELTER_data, data_shelter); // `names` probably don't need to be protected, but we do so to be safe // (We used to use splice boxes, and `names` needed to be protected then, // but we no longer do as of #1578) r_list_poke(shelter, COUNTERS_SHELTER_names, names); struct counters* p_counters = r_raw_begin(data_shelter); p_counters->shelter = shelter; p_counters->curr = 0; p_counters->next = 0; p_counters->names = names; p_counters->curr_counter_arg_data = new_counter_arg_data(p_parent_arg, &p_counters->curr, &p_counters->names); p_counters->next_counter_arg_data = new_counter_arg_data(p_parent_arg, &p_counters->next, &p_counters->names); p_counters->curr_counter = new_counter_arg(p_parent_arg, (void*) &p_counters->curr_counter_arg_data); p_counters->next_counter = new_counter_arg(p_parent_arg, (void*) &p_counters->next_counter_arg_data); p_counters->curr_arg = p_curr_arg; p_counters->next_arg = (struct vctrs_arg*) &p_counters->next_counter; FREE(1); return p_counters; } static inline void counters_increment(struct counters* counters) { ++(counters->next); } /** * Swap counters so that the `next` counter (the one being increased * on iteration and representing the new input in the reduction) * becomes the current counter (the one representing the result so * far of the reduction). */ void counters_shift(struct counters* p_counters) { // Swap the counters data SWAP(struct vctrs_arg, p_counters->curr_counter, p_counters->next_counter); SWAP(r_ssize*, p_counters->curr_counter_arg_data.i, p_counters->next_counter_arg_data.i); // Update the handles to `vctrs_arg` p_counters->curr_arg = (struct vctrs_arg*) &p_counters->curr_counter; p_counters->next_arg = (struct vctrs_arg*) &p_counters->next_counter; // Update the current index p_counters->curr = p_counters->next; } r_obj* reduce( r_obj* current, struct vctrs_arg* p_current_arg, struct vctrs_arg* p_parent_arg, r_obj* rest, r_obj* (*impl)(r_obj* current, r_obj* next, struct counters* counters, void* data), void* data ) { const r_ssize n = r_length(rest); r_obj* names = KEEP(r_names(rest)); r_obj* const* v_rest = r_list_cbegin(rest); struct counters* counters = new_counters( names, p_current_arg, p_parent_arg ); KEEP(counters->shelter); r_keep_loc current_pi; KEEP_HERE(current, ¤t_pi); for (r_ssize i = 0; i < n; ++i) { r_obj* next = v_rest[i]; current = impl(current, next, counters, data); KEEP_AT(current, current_pi); counters_increment(counters); } FREE(3); return current; } vctrs/src/order.h0000644000176200001440000000504615156001116013467 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_H #define VCTRS_ORDER_H #include "vctrs-core.h" #include "utils.h" // ----------------------------------------------------------------------------- SEXP vec_order(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate); SEXP vec_order_info(SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate); // ----------------------------------------------------------------------------- /* * `order` is an integer vector intended to hold the ordering vector * in `vec_order()`. It is allocated eagerly, but the initialization of its * values is done lazily. Typically, it is initialized to a 1-based sequential * ordering which is rearranged by the internal algorithm. However, for the * counting order, the initialization is not required for the first integer * column, which can result in a nice performance improvement. */ struct order { SEXP self; SEXP data; int* p_data; r_ssize size; bool initialized; }; #define PROTECT_ORDER(p_order, p_n) do { \ PROTECT((p_order)->self); \ PROTECT((p_order)->data); \ *(p_n) += 2; \ } while (0) static inline struct order* new_order(r_ssize size) { SEXP self = PROTECT(r_new_raw(sizeof(struct order))); struct order* p_order = (struct order*) RAW(self); SEXP data = PROTECT(Rf_allocVector(INTSXP, size)); int* p_data = INTEGER(data); p_order->self = self; p_order->data = data; p_order->p_data = p_data; p_order->size = size; p_order->initialized = false; UNPROTECT(2); return p_order; } static inline int* init_order(struct order* p_order) { if (p_order->initialized) { return p_order->p_data; } // Initialize `x` with sequential 1-based ordering for (r_ssize i = 0; i < p_order->size; ++i) { p_order->p_data[i] = i + 1; } p_order->initialized = true; return p_order->p_data; } // ----------------------------------------------------------------------------- #endif vctrs/src/rep.h0000644000176200001440000000072115156001116013135 0ustar liggesusers#ifndef VCTRS_REP_H #define VCTRS_REP_H #include "vctrs-core.h" r_obj* vec_rep(r_obj* x, int times, struct r_lazy error_call, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_times_arg); r_obj* vec_rep_each(r_obj* x, r_obj* times, struct r_lazy error_call, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_times_arg); #endif vctrs/src/version.c0000644000176200001440000000166515157322033014044 0ustar liggesusers#define R_NO_REMAP #include const char* vctrs_version = "0.7.2"; /** * This file records the expected package version in the shared * library (or DLL) of the package. This is useful to check that users * have properly installed your package. Installation issues where the * package is updated but the DLL isn't are common on Windows in * particular. To automatically check that the native library of the * package was properly installed: * * - Register the function below as a C callable under the name * "vctrs_linked_version". * * - Call `rlang::check_linked_version(pkg_name)` from your * `.onLoad()` hook. If you don't depend on rlang copy the * compat-linked-version.R file from the rlang repository to your R * folder. Find it at * */ // [[ register() ]] SEXP vctrs_linked_version(void) { return Rf_mkString(vctrs_version); } vctrs/src/compare.h0000644000176200001440000002345015156537555014026 0ustar liggesusers#ifndef VCTRS_COMPARE_H #define VCTRS_COMPARE_H #include "vctrs-core.h" #include "equal.h" #include "missing.h" #include // ----------------------------------------------------------------------------- r_obj* vec_compare(r_obj* x, r_obj* y, bool na_equal); // ----------------------------------------------------------------------------- // https://stackoverflow.com/questions/10996418 static inline int int_compare_scalar(int x, int y) { return (x > y) - (x < y); } static inline int dbl_compare_scalar(double x, double y) { return (x > y) - (x < y); } static inline int chr_compare_scalar(r_obj* x, r_obj* y) { // Assume UTF-8 encoding is handled by `obj_encode_utf8()` int cmp = strcmp(r_str_c_string(x), r_str_c_string(y)); return cmp / abs(cmp); } // ----------------------------------------------------------------------------- static inline int qsort_int_compare_scalar(const void* x, const void* y) { return int_compare_scalar(*((int*) x), *((int*) y)); } // ----------------------------------------------------------------------------- static inline r_no_return int nil_compare_na_equal(r_obj* x, r_obj* y) { r_stop_internal("Can't compare NULL values."); } static inline int lgl_compare_na_equal(int x, int y) { return int_compare_scalar(x, y); } static inline int int_compare_na_equal(int x, int y) { return int_compare_scalar(x, y); } static inline int dbl_compare_na_equal(double x, double y) { enum vctrs_dbl x_class = dbl_classify(x); enum vctrs_dbl y_class = dbl_classify(y); switch (x_class) { case VCTRS_DBL_number: { switch (y_class) { case VCTRS_DBL_number: return dbl_compare_scalar(x, y); case VCTRS_DBL_missing: return 1; case VCTRS_DBL_nan: return 1; } } case VCTRS_DBL_missing: { switch (y_class) { case VCTRS_DBL_number: return -1; case VCTRS_DBL_missing: return 0; case VCTRS_DBL_nan: return 1; } } case VCTRS_DBL_nan: { switch (y_class) { case VCTRS_DBL_number: return -1; case VCTRS_DBL_missing: return -1; case VCTRS_DBL_nan: return 0; } } } r_stop_unreachable(); } static inline r_no_return int cpl_compare_na_equal(Rcomplex x, Rcomplex y) { r_stop_internal("Can't compare complex types."); } static inline int chr_compare_na_equal(r_obj* x, r_obj* y) { if (chr_equal_na_equal(x, y)) { return 0; } else if (chr_is_missing(x)) { return -1; } else if (chr_is_missing(y)) { return 1; } else { return chr_compare_scalar(x, y); } } static inline r_no_return int raw_compare_na_equal(Rbyte x, Rbyte y) { r_stop_internal("Can't compare raw types."); } static inline r_no_return int list_compare_na_equal(r_obj* x, r_obj* y) { r_stop_internal("Can't compare list types."); } // ----------------------------------------------------------------------------- #define P_COMPARE_NA_EQUAL(CTYPE, COMPARE_NA_EQUAL) do { \ return COMPARE_NA_EQUAL(((CTYPE const*) p_x)[i], ((CTYPE const*) p_y)[j]); \ } while (0) static inline int p_nil_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(r_obj*, nil_compare_na_equal); } static inline int p_lgl_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(int, lgl_compare_na_equal); } static inline int p_int_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(int, int_compare_na_equal); } static inline int p_dbl_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(double, dbl_compare_na_equal); } static inline int p_cpl_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(Rcomplex, cpl_compare_na_equal); } static inline int p_chr_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(r_obj*, chr_compare_na_equal); } static inline int p_raw_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(Rbyte, raw_compare_na_equal); } static inline int p_list_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_EQUAL(r_obj*, list_compare_na_equal); } #undef P_COMPARE_NA_EQUAL // No support for df-cols, as they should be flattened static inline int p_col_compare_na_equal( const void* p_x, r_ssize i, const void* p_y, r_ssize j, const enum vctrs_type type ) { switch (type) { case VCTRS_TYPE_null: return p_nil_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_logical: return p_lgl_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_integer: return p_int_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_double: return p_dbl_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_complex: return p_cpl_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_character: return p_chr_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_raw: return p_raw_compare_na_equal(p_x, i, p_y, j); case VCTRS_TYPE_list: return p_list_compare_na_equal(p_x, i, p_y, j); default: stop_unimplemented_vctrs_type("p_col_compare_na_equal", type); } } static inline int p_df_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { struct poly_df_data* p_x_data = (struct poly_df_data*) p_x; struct poly_df_data* p_y_data = (struct poly_df_data*) p_y; r_ssize n_col = p_x_data->n_col; if (n_col != p_y_data->n_col) { r_stop_internal("`x` and `y` must have the same number of columns."); } enum vctrs_type* v_col_type = p_x_data->v_col_type; const void** v_x_col_ptr = p_x_data->v_col_ptr; const void** v_y_col_ptr = p_y_data->v_col_ptr; // df-cols should already be flattened for (r_ssize col = 0; col < n_col; ++col) { const int cmp = p_col_compare_na_equal( v_x_col_ptr[col], i, v_y_col_ptr[col], j, v_col_type[col] ); if (cmp != 0) { return cmp; } } return 0; } // ----------------------------------------------------------------------------- static inline r_no_return int nil_compare_na_propagate(r_obj* x, r_obj* y) { r_stop_internal("Can't compare NULL values."); } static inline int lgl_compare_na_propagate(int x, int y) { if (lgl_is_missing(x) || lgl_is_missing(y)) { return r_globals.na_int; } else { return int_compare_scalar(x, y); } } static inline int int_compare_na_propagate(int x, int y) { if (int_is_missing(x) || int_is_missing(y)) { return r_globals.na_int; } else { return int_compare_scalar(x, y); } } static inline int dbl_compare_na_propagate(double x, double y) { if (dbl_is_missing(x) || dbl_is_missing(y)) { return r_globals.na_int; } else { return dbl_compare_scalar(x, y); } } static inline r_no_return int cpl_compare_na_propagate(Rcomplex x, Rcomplex y) { r_stop_internal("Can't compare complex types."); } static inline int chr_compare_na_propagate(r_obj* x, r_obj* y) { if (chr_is_missing(x) || chr_is_missing(y)) { return r_globals.na_int; } else if (chr_equal_na_equal(x, y)) { return 0; } else { return chr_compare_scalar(x, y); } } static inline r_no_return int raw_compare_na_propagate(Rbyte x, Rbyte y) { r_stop_internal("Can't compare raw types."); } static inline r_no_return int list_compare_na_propagate(r_obj* x, r_obj* y) { r_stop_internal("Can't compare list types."); } // ----------------------------------------------------------------------------- #define P_COMPARE_NA_PROPAGATE(CTYPE, COMPARE_NA_PROPAGATE) do { \ return COMPARE_NA_PROPAGATE(((CTYPE const*) p_x)[i], ((CTYPE const*) p_y)[j]); \ } while (0) static inline int p_nil_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(r_obj*, nil_compare_na_propagate); } static inline int p_lgl_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(int, lgl_compare_na_propagate); } static inline int p_int_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(int, int_compare_na_propagate); } static inline int p_dbl_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(double, dbl_compare_na_propagate); } static inline int p_cpl_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(Rcomplex, cpl_compare_na_propagate); } static inline int p_chr_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(r_obj*, chr_compare_na_propagate); } static inline int p_raw_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(Rbyte, raw_compare_na_propagate); } static inline int p_list_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j) { P_COMPARE_NA_PROPAGATE(r_obj*, list_compare_na_propagate); } #undef P_COMPARE_NA_PROPAGATE static inline int p_compare_na_propagate(const void* p_x, r_ssize i, const void* p_y, r_ssize j, const enum vctrs_type type) { switch (type) { case VCTRS_TYPE_null: return p_nil_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_logical: return p_lgl_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_integer: return p_int_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_double: return p_dbl_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_complex: return p_cpl_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_character: return p_chr_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_raw: return p_raw_compare_na_propagate(p_x, i, p_y, j); case VCTRS_TYPE_list: return p_list_compare_na_propagate(p_x, i, p_y, j); default: stop_unimplemented_vctrs_type("p_compare_na_propagate", type); } } // ----------------------------------------------------------------------------- #endif // VCTRS_COMPARE_H vctrs/src/slice-assign-array.c0000644000176200001440000011771315060045711016054 0ustar liggesusers#include "vctrs.h" #define ASSIGN_SHAPED_LOCATION_INDEX( \ CTYPE, \ DEREF, \ CONST_DEREF, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT \ ) \ int n_protect = 0; \ \ struct strides_info info = new_strides_info(proxy, index); \ struct strides_info* p_info = &info; \ PROTECT_STRIDES_INFO(p_info, &n_protect); \ \ SEXP out = PROTECT_N(vec_clone_referenced(proxy, ownership), &n_protect); \ CTYPE* p_out = DEREF(out); \ \ const CTYPE* p_value = CONST_DEREF(value); \ \ /* The `value` location used in the `slice_value = FALSE` */ \ /* and `vec_size(value) == 1` cases. When `slice_value = TRUE` */ \ /* and we aren't recycling, `value` tracks `x` instead. */ \ R_len_t value_loc = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t out_loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t index_loc = 0; index_loc < p_info->index_n; ++index_loc) { \ const int step = p_info->p_steps[index_loc]; \ \ if (step != NA_INTEGER) { \ out_loc += step; \ p_out[out_loc] = p_value[SLICE_VALUE ? out_loc : value_loc]; \ } \ \ value_loc += VALUE_LOC_POST_INDEX_INCREMENT; \ } \ \ vec_shape_index_increment(p_info); \ value_loc += VALUE_LOC_POST_SHAPE_INCREMENT; \ } \ \ UNPROTECT(n_protect); \ return out #define ASSIGN_SHAPED_LOCATION_COMPACT( \ CTYPE, \ DEREF, \ CONST_DEREF, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT \ ) \ int n_protect = 0; \ \ struct strides_info info = new_strides_info(proxy, index); \ struct strides_info* p_info = &info; \ PROTECT_STRIDES_INFO(p_info, &n_protect); \ \ SEXP out = PROTECT_N(vec_clone_referenced(proxy, ownership), &n_protect); \ CTYPE* p_out = DEREF(out); \ \ const R_len_t start = p_info->p_index[0]; \ const R_len_t step = p_info->p_index[2]; \ \ const CTYPE* p_value = CONST_DEREF(value); \ \ R_len_t value_loc = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t out_loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ out_loc += start; \ \ for (R_len_t index_loc = 0; index_loc < p_info->index_n; ++index_loc) { \ p_out[out_loc] = p_value[SLICE_VALUE ? out_loc : value_loc]; \ out_loc += step; \ value_loc += VALUE_LOC_POST_INDEX_INCREMENT; \ } \ \ vec_shape_index_increment(p_info); \ value_loc += VALUE_LOC_POST_SHAPE_INCREMENT; \ } \ \ UNPROTECT(n_protect); \ return out // ----------------------------------------------------------------------------- // Strides information is not required here! // // See `ASSIGN_CONDITION_IMPL` for rationale on using a ternary inside // the assignment loop to keep `p_out` "hot". #define ASSIGN_SHAPED_CONDITION_IMPL( \ CTYPE, \ DEREF, \ CONST_DEREF, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT, \ INDEX_CTYPE, \ INDEX_SIZE, \ INDEX_CONST_DEREF, \ INDEX_ELT_CMP \ ) \ r_obj* dim = PROTECT(vec_dim(proxy)); \ const int* p_dim = INTEGER_RO(dim); \ R_len_t dim_n = Rf_length(dim); \ R_len_t shape_elem_n = vec_shape_elem_n(p_dim, dim_n); \ \ R_len_t index_size = INDEX_SIZE(index); \ const INDEX_CTYPE* p_index = INDEX_CONST_DEREF(index); \ \ SEXP out = PROTECT(vec_clone_referenced(proxy, ownership)); \ CTYPE* p_out = DEREF(out); \ R_len_t out_loc = 0; \ \ const CTYPE* p_value = CONST_DEREF(value); \ R_len_t value_loc = 0; \ \ for (R_len_t i = 0; i < shape_elem_n; ++i) { \ for (R_len_t index_loc = 0; index_loc < index_size; ++index_loc) { \ const INDEX_CTYPE index_elt = p_index[index_loc]; \ p_out[out_loc] = (INDEX_ELT_CMP) ? p_value[SLICE_VALUE ? out_loc : value_loc] : p_out[out_loc]; \ ++out_loc; \ value_loc += VALUE_LOC_POST_INDEX_INCREMENT; \ } \ value_loc += VALUE_LOC_POST_SHAPE_INCREMENT; \ } \ \ UNPROTECT(2); \ return out #define ASSIGN_SHAPED_CONDITION_INDEX( \ CTYPE, \ DEREF, \ CONST_DEREF, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT \ ) \ ASSIGN_SHAPED_CONDITION_IMPL( \ CTYPE, \ DEREF, \ CONST_DEREF, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT, \ int, \ r_length, \ r_lgl_cbegin, \ index_elt == 1 \ ) \ #define ASSIGN_SHAPED_CONDITION_COMPACT( \ CTYPE, \ DEREF, \ CONST_DEREF, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT \ ) \ ASSIGN_SHAPED_CONDITION_IMPL( \ CTYPE, \ DEREF, \ CONST_DEREF, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT, \ bool, \ compact_condition_size, \ compact_condition_cbegin, \ index_elt \ ) \ // ----------------------------------------------------------------------------- #define ASSIGN_SHAPED_LOCATION(CTYPE, DEREF, CONST_DEREF) \ const r_ssize value_size = vec_size(value); \ check_assign_sizes(proxy, index, value_size, slice_value, index_style); \ \ if (is_compact_seq(index)) { \ if (value_size == 1) { \ ASSIGN_SHAPED_LOCATION_COMPACT(CTYPE, DEREF, CONST_DEREF, false, 0, 1); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_SHAPED_LOCATION_COMPACT(CTYPE, DEREF, CONST_DEREF, true, 0, 0); \ } else { \ ASSIGN_SHAPED_LOCATION_COMPACT(CTYPE, DEREF, CONST_DEREF, false, 1, 0); \ } \ } else { \ if (value_size == 1) { \ ASSIGN_SHAPED_LOCATION_INDEX(CTYPE, DEREF, CONST_DEREF, false, 0, 1); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_SHAPED_LOCATION_INDEX(CTYPE, DEREF, CONST_DEREF, true, 0, 0); \ } else { \ ASSIGN_SHAPED_LOCATION_INDEX(CTYPE, DEREF, CONST_DEREF, false, 1, 0); \ } \ } #define ASSIGN_SHAPED_CONDITION(CTYPE, DEREF, CONST_DEREF) \ const r_ssize value_size = vec_size(value); \ check_assign_sizes(proxy, index, value_size, slice_value, index_style); \ \ if (is_compact_condition(index)) { \ if (value_size == 1) { \ ASSIGN_SHAPED_CONDITION_COMPACT(CTYPE, DEREF, CONST_DEREF, false, 0, 1); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_SHAPED_CONDITION_COMPACT(CTYPE, DEREF, CONST_DEREF, true, 0, 0); \ } else { \ ASSIGN_SHAPED_CONDITION_COMPACT(CTYPE, DEREF, CONST_DEREF, false, index_elt, 0); \ } \ } else { \ if (value_size == 1) { \ ASSIGN_SHAPED_CONDITION_INDEX(CTYPE, DEREF, CONST_DEREF, false, 0, 1); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_SHAPED_CONDITION_INDEX(CTYPE, DEREF, CONST_DEREF, true, 0, 0); \ } else { \ ASSIGN_SHAPED_CONDITION_INDEX(CTYPE, DEREF, CONST_DEREF, false, index_elt != 0, 0); \ } \ } #define ASSIGN_SHAPED(CTYPE, DEREF, CONST_DEREF) \ switch (index_style) { \ case VCTRS_INDEX_STYLE_location: { \ ASSIGN_SHAPED_LOCATION(CTYPE, DEREF, CONST_DEREF); \ } \ case VCTRS_INDEX_STYLE_condition: { \ ASSIGN_SHAPED_CONDITION(CTYPE, DEREF, CONST_DEREF); \ } \ default: r_stop_unreachable(); \ } static inline SEXP lgl_assign_shaped( SEXP proxy, SEXP index, SEXP value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN_SHAPED(int, LOGICAL, LOGICAL_RO); } static inline SEXP int_assign_shaped( SEXP proxy, SEXP index, SEXP value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN_SHAPED(int, INTEGER, INTEGER_RO); } static inline SEXP dbl_assign_shaped( SEXP proxy, SEXP index, SEXP value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN_SHAPED(double, REAL, REAL_RO); } static inline SEXP cpl_assign_shaped( SEXP proxy, SEXP index, SEXP value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN_SHAPED(Rcomplex, COMPLEX, COMPLEX_RO); } static inline SEXP raw_assign_shaped( SEXP proxy, SEXP index, SEXP value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN_SHAPED(Rbyte, RAW, RAW_RO); } #undef ASSIGN_SHAPED #undef ASSIGN_SHAPED_LOCATION #undef ASSIGN_SHAPED_LOCATION_COMPACT #undef ASSIGN_SHAPED_LOCATION_INDEX #undef ASSIGN_SHAPED_CONDITION #undef ASSIGN_SHAPED_CONDITION_INDEX #undef ASSIGN_SHAPED_CONDITION_COMPACT #undef ASSIGN_SHAPED_CONDITION_IMPL // ----------------------------------------------------------------------------- #define ASSIGN_BARRIER_SHAPED_LOCATION_INDEX( \ CTYPE, \ CONST_DEREF, \ SET, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT \ ) \ int n_protect = 0; \ \ struct strides_info info = new_strides_info(proxy, index); \ struct strides_info* p_info = &info; \ PROTECT_STRIDES_INFO(p_info, &n_protect); \ \ SEXP out = PROTECT_N(vec_clone_referenced(proxy, ownership), &n_protect); \ \ CTYPE const* p_value = CONST_DEREF(value); \ \ R_len_t value_loc = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t out_loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t index_loc = 0; index_loc < p_info->index_n; ++index_loc) { \ const int step = p_info->p_steps[index_loc]; \ \ if (step != NA_INTEGER) { \ out_loc += step; \ SET(out, out_loc, p_value[SLICE_VALUE ? out_loc : value_loc]); \ } \ \ value_loc += VALUE_LOC_POST_INDEX_INCREMENT; \ } \ \ vec_shape_index_increment(p_info); \ value_loc += VALUE_LOC_POST_SHAPE_INCREMENT; \ } \ \ UNPROTECT(n_protect); \ return out #define ASSIGN_BARRIER_SHAPED_LOCATION_COMPACT( \ CTYPE, \ CONST_DEREF, \ SET, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT \ ) \ int n_protect = 0; \ \ struct strides_info info = new_strides_info(proxy, index); \ struct strides_info* p_info = &info; \ PROTECT_STRIDES_INFO(p_info, &n_protect); \ \ SEXP out = PROTECT_N(vec_clone_referenced(proxy, ownership), &n_protect); \ \ const R_len_t start = p_info->p_index[0]; \ const R_len_t step = p_info->p_index[2]; \ \ CTYPE const* p_value = CONST_DEREF(value); \ \ R_len_t value_loc = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t out_loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ out_loc += start; \ \ for (R_len_t index_loc = 0; index_loc < p_info->index_n; ++index_loc) { \ SET(out, out_loc, p_value[SLICE_VALUE ? out_loc : value_loc]); \ out_loc += step; \ value_loc += VALUE_LOC_POST_INDEX_INCREMENT; \ } \ \ vec_shape_index_increment(p_info); \ value_loc += VALUE_LOC_POST_SHAPE_INCREMENT; \ } \ \ UNPROTECT(n_protect); \ return out // ----------------------------------------------------------------------------- // Strides information is not required here! // // See `ASSIGN_BARRIER_CONDITION_IMPL` for rationale on NOT using a ternary // inside the assignment loop. The indirection of `SET()` makes it not worth it. #define ASSIGN_BARRIER_SHAPED_CONDITION_IMPL( \ CTYPE, \ CONST_DEREF, \ SET, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT, \ INDEX_CTYPE, \ INDEX_SIZE, \ INDEX_CONST_DEREF, \ INDEX_ELT_CMP \ ) \ r_obj* dim = PROTECT(vec_dim(proxy)); \ const int* p_dim = INTEGER_RO(dim); \ R_len_t dim_n = Rf_length(dim); \ R_len_t shape_elem_n = vec_shape_elem_n(p_dim, dim_n); \ \ R_len_t index_size = INDEX_SIZE(index); \ const INDEX_CTYPE* p_index = INDEX_CONST_DEREF(index); \ \ SEXP out = PROTECT(vec_clone_referenced(proxy, ownership)); \ R_len_t out_loc = 0; \ \ CTYPE const* p_value = CONST_DEREF(value); \ R_len_t value_loc = 0; \ \ for (R_len_t i = 0; i < shape_elem_n; ++i) { \ for (R_len_t index_loc = 0; index_loc < index_size; ++index_loc) { \ const INDEX_CTYPE index_elt = p_index[index_loc]; \ if (INDEX_ELT_CMP) { \ SET(out, out_loc, p_value[SLICE_VALUE ? out_loc : value_loc]); \ } \ ++out_loc; \ value_loc += VALUE_LOC_POST_INDEX_INCREMENT; \ } \ value_loc += VALUE_LOC_POST_SHAPE_INCREMENT; \ } \ \ UNPROTECT(2); \ return out #define ASSIGN_BARRIER_SHAPED_CONDITION_INDEX( \ CTYPE, \ CONST_DEREF, \ SET, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT \ ) \ ASSIGN_BARRIER_SHAPED_CONDITION_IMPL( \ CTYPE, \ CONST_DEREF, \ SET, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT, \ int, \ r_length, \ r_lgl_cbegin, \ index_elt == 1 \ ) \ #define ASSIGN_BARRIER_SHAPED_CONDITION_COMPACT( \ CTYPE, \ CONST_DEREF, \ SET, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT \ ) \ ASSIGN_BARRIER_SHAPED_CONDITION_IMPL( \ CTYPE, \ CONST_DEREF, \ SET, \ SLICE_VALUE, \ VALUE_LOC_POST_INDEX_INCREMENT, \ VALUE_LOC_POST_SHAPE_INCREMENT, \ bool, \ compact_condition_size, \ compact_condition_cbegin, \ index_elt \ ) \ // ----------------------------------------------------------------------------- #define ASSIGN_BARRIER_SHAPED_LOCATION(CTYPE, CONST_DEREF, SET) \ const r_ssize value_size = vec_size(value); \ check_assign_sizes(proxy, index, value_size, slice_value, index_style); \ \ if (is_compact_seq(index)) { \ if (value_size == 1) { \ ASSIGN_BARRIER_SHAPED_LOCATION_COMPACT(CTYPE, CONST_DEREF, SET, false, 0, 1); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_BARRIER_SHAPED_LOCATION_COMPACT(CTYPE, CONST_DEREF, SET, true, 0, 0); \ } else { \ ASSIGN_BARRIER_SHAPED_LOCATION_COMPACT(CTYPE, CONST_DEREF, SET, false, 1, 0); \ } \ } else { \ if (value_size == 1) { \ ASSIGN_BARRIER_SHAPED_LOCATION_INDEX(CTYPE, CONST_DEREF, SET, false, 0, 1); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_BARRIER_SHAPED_LOCATION_INDEX(CTYPE, CONST_DEREF, SET, true, 0, 0); \ } else { \ ASSIGN_BARRIER_SHAPED_LOCATION_INDEX(CTYPE, CONST_DEREF, SET, false, 1, 0); \ } \ } #define ASSIGN_BARRIER_SHAPED_CONDITION(CTYPE, CONST_DEREF, SET) \ const r_ssize value_size = vec_size(value); \ check_assign_sizes(proxy, index, value_size, slice_value, index_style); \ \ if (is_compact_condition(index)) { \ if (value_size == 1) { \ ASSIGN_BARRIER_SHAPED_CONDITION_COMPACT(CTYPE, CONST_DEREF, SET, false, 0, 1); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_BARRIER_SHAPED_CONDITION_COMPACT(CTYPE, CONST_DEREF, SET, true, 0, 0); \ } else { \ ASSIGN_BARRIER_SHAPED_CONDITION_COMPACT(CTYPE, CONST_DEREF, SET, false, index_elt, 0); \ } \ } else { \ if (value_size == 1) { \ ASSIGN_BARRIER_SHAPED_CONDITION_INDEX(CTYPE, CONST_DEREF, SET, false, 0, 1); \ } else if (should_slice_value(slice_value)) { \ ASSIGN_BARRIER_SHAPED_CONDITION_INDEX(CTYPE, CONST_DEREF, SET, true, 0, 0); \ } else { \ ASSIGN_BARRIER_SHAPED_CONDITION_INDEX(CTYPE, CONST_DEREF, SET, false, index_elt != 0, 0); \ } \ } #define ASSIGN_BARRIER_SHAPED(CTYPE, CONST_DEREF, SET) \ switch (index_style) { \ case VCTRS_INDEX_STYLE_location: { \ ASSIGN_BARRIER_SHAPED_LOCATION(CTYPE, CONST_DEREF, SET); \ } \ case VCTRS_INDEX_STYLE_condition: { \ ASSIGN_BARRIER_SHAPED_CONDITION(CTYPE, CONST_DEREF, SET); \ } \ default: r_stop_unreachable(); \ } static inline SEXP chr_assign_shaped( SEXP proxy, SEXP index, SEXP value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN_BARRIER_SHAPED(SEXP, STRING_PTR_RO, SET_STRING_ELT); } static SEXP list_assign_shaped( SEXP proxy, SEXP index, SEXP value, const enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { ASSIGN_BARRIER_SHAPED(SEXP, VECTOR_PTR_RO, SET_VECTOR_ELT); } #undef ASSIGN_BARRIER_SHAPED #undef ASSIGN_BARRIER_SHAPED_LOCATION #undef ASSIGN_BARRIER_SHAPED_LOCATION_COMPACT #undef ASSIGN_BARRIER_SHAPED_LOCATION_INDEX #undef ASSIGN_BARRIER_SHAPED_CONDITION #undef ASSIGN_BARRIER_SHAPED_CONDITION_INDEX #undef ASSIGN_BARRIER_SHAPED_CONDITION_COMPACT #undef ASSIGN_BARRIER_SHAPED_CONDITION_IMPL // ----------------------------------------------------------------------------- // [[ include("vctrs.h") ]] SEXP vec_assign_shaped( SEXP proxy, SEXP index, SEXP value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ) { switch (vec_proxy_typeof(proxy)) { case VCTRS_TYPE_logical: return lgl_assign_shaped(proxy, index, value, ownership, slice_value, index_style); case VCTRS_TYPE_integer: return int_assign_shaped(proxy, index, value, ownership, slice_value, index_style); case VCTRS_TYPE_double: return dbl_assign_shaped(proxy, index, value, ownership, slice_value, index_style); case VCTRS_TYPE_complex: return cpl_assign_shaped(proxy, index, value, ownership, slice_value, index_style); case VCTRS_TYPE_character: return chr_assign_shaped(proxy, index, value, ownership, slice_value, index_style); case VCTRS_TYPE_raw: return raw_assign_shaped(proxy, index, value, ownership, slice_value, index_style); case VCTRS_TYPE_list: return list_assign_shaped(proxy, index, value, ownership, slice_value, index_style); default: stop_unimplemented_vctrs_type("vec_assign_shaped", vec_proxy_typeof(proxy)); } } vctrs/src/interval.c0000644000176200001440000006772415156537555014233 0ustar liggesusers#include "vctrs.h" enum vctrs_interval_missing { VCTRS_INTERVAL_MISSING_group = 0, VCTRS_INTERVAL_MISSING_drop = 1 }; #include "decl/interval-decl.h" // ----------------------------------------------------------------------------- r_obj* ffi_interval_groups(r_obj* start, r_obj* end, r_obj* ffi_abutting, r_obj* ffi_missing) { const bool abutting = r_arg_as_bool(ffi_abutting, "abutting"); const enum vctrs_interval_missing missing = parse_missing(ffi_missing); const bool locations = false; r_obj* out = KEEP(vec_interval_group_info(start, end, abutting, missing, locations)); r_obj* loc_start = r_list_get(out, 0); r_obj* loc_end = r_list_get(out, 1); r_list_poke(out, 0, vec_slice_unsafe(start, loc_start)); r_list_poke(out, 1, vec_slice_unsafe(end, loc_end)); FREE(1); return out; } r_obj* ffi_interval_locate_groups(r_obj* start, r_obj* end, r_obj* ffi_abutting, r_obj* ffi_missing) { const bool abutting = r_arg_as_bool(ffi_abutting, "abutting"); const enum vctrs_interval_missing missing = parse_missing(ffi_missing); const bool locations = true; r_obj* out = KEEP(vec_interval_group_info(start, end, abutting, missing, locations)); r_obj* key = r_list_get(out, 0); r_obj* loc_start = r_list_get(key, 0); r_obj* loc_end = r_list_get(key, 1); r_list_poke(key, 0, vec_slice_unsafe(start, loc_start)); r_list_poke(key, 1, vec_slice_unsafe(end, loc_end)); FREE(1); return out; } /* * If `locations = false`, returns a two column data frame containing a * `$start` column with locations to slice `start` with and an `$end` column * containing locations to slice `end` with. After slicing, the newly * generated intervals represent the "groups". * * If `locations = true`, returns a two column data frame containing a * `$key` column that holds the data frame generated by `locations = false` * and a `$loc` column that is a list-column of integer vectors that map each * interval defined by `[start, end)` to its corresponding group. * * We don't slice `start` and `end` here because it is often useful to just * know the locations, for example in `vec_interval_complement()`. */ static r_obj* vec_interval_group_info(r_obj* start, r_obj* end, bool abutting, enum vctrs_interval_missing missing, bool locations) { int n_prot = 0; int _; r_obj* ptype = vec_ptype2( start, end, args_start, args_end, r_lazy_null, S3_FALLBACK_false, &_ ); KEEP_N(ptype, &n_prot); ptype = KEEP_N(vec_ptype_finalise(ptype), &n_prot); start = vec_cast_params( start, ptype, args_start, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(start, &n_prot); end = vec_cast_params( end, ptype, args_end, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(end, &n_prot); r_obj* start_proxy = KEEP_N(vec_proxy_compare(start), &n_prot); start_proxy = KEEP_N(obj_encode_utf8(start_proxy), &n_prot); r_obj* end_proxy = KEEP_N(vec_proxy_compare(end), &n_prot); end_proxy = KEEP_N(obj_encode_utf8(end_proxy), &n_prot); const enum vctrs_type type_proxy = vec_proxy_typeof(start_proxy); struct poly_vec* p_poly_start = new_poly_vec(start_proxy, type_proxy); KEEP_N(p_poly_start->shelter, &n_prot); const void* p_start = p_poly_start->p_vec; struct poly_vec* p_poly_end = new_poly_vec(end_proxy, type_proxy); KEEP_N(p_poly_end->shelter, &n_prot); const void* p_end = p_poly_end->p_vec; poly_binary_int_fn* const fn_compare = poly_p_compare_na_equal(type_proxy); poly_unary_bool_fn* const fn_is_missing = poly_p_is_missing(type_proxy); const r_ssize size = vec_size(start_proxy); if (size != vec_size(end_proxy)) { r_abort("`start` and `end` must have the same size."); } // Order is computed as ascending order, placing missing intervals up front // as the "smallest" values. We document that we assume that if `start` is // missing, then `end` is missing too. r_obj* order = interval_order( start_proxy, end_proxy, chrs_asc, chrs_smallest, size ); KEEP_N(order, &n_prot); const int* v_order = r_int_cbegin(order); // Assume the intervals can be merged into half their original size. // Apply a minimum size to avoid a size of zero. const r_ssize initial_size = r_ssize_max(size / 2, 1); struct r_dyn_array* p_loc_start = r_new_dyn_vector(R_TYPE_integer, initial_size); KEEP_N(p_loc_start->shelter, &n_prot); struct r_dyn_array* p_loc_end = r_new_dyn_vector(R_TYPE_integer, initial_size); KEEP_N(p_loc_end->shelter, &n_prot); struct r_dyn_array* p_loc = NULL; if (locations) { p_loc = r_new_dyn_vector(R_TYPE_list, initial_size); KEEP_N(p_loc->shelter, &n_prot); } r_ssize i = 0; r_ssize loc_order_missing_start = 0; r_ssize loc_order_missing_end = -1; // Move `i` past any missing intervals (they are at the front), // recording last missing interval location for later. Only need to check // missingness of `start`, because we document that we assume that `end` // is missing if `start` is missing. for (; i < size; ++i) { const r_ssize loc = v_order[i] - 1; if (!fn_is_missing(p_start, loc)) { break; } loc_order_missing_end = i; } r_ssize loc_order_start = 0; r_ssize loc_order_end = -1; r_ssize loc_group_start = 0; r_ssize loc_group_end = -1; if (i < size) { // Set information about first usable interval const r_ssize loc = v_order[i] - 1; loc_order_start = i; loc_order_end = i; loc_group_start = loc; loc_group_end = loc; ++i; } const int merge_limit = abutting ? -1 : 0; for (; i < size; ++i) { const r_ssize loc = v_order[i] - 1; // If `abutting`, this says: if group end < new start, finish out the group // If `!abutting`, this says: if group end <= new start, finish out the group if (fn_compare(p_end, loc_group_end, p_start, loc) <= merge_limit) { r_dyn_int_push_back(p_loc_start, loc_group_start + 1); r_dyn_int_push_back(p_loc_end, loc_group_end + 1); if (locations) { const r_ssize loc_size = loc_order_end - loc_order_start + 1; r_obj* loc = r_new_integer(loc_size); r_dyn_list_push_back(p_loc, loc); int* v_loc = r_int_begin(loc); const int* v_order_start = v_order + loc_order_start; r_memcpy(v_loc, v_order_start, loc_size * sizeof(*v_loc)); } loc_order_start = loc_order_end + 1; loc_group_start = loc; loc_group_end = loc; } else if (fn_compare(p_end, loc_group_end, p_end, loc) == -1) { loc_group_end = loc; } loc_order_end = i; } if (loc_order_end >= loc_order_start) { // Log last interval r_dyn_int_push_back(p_loc_start, loc_group_start + 1); r_dyn_int_push_back(p_loc_end, loc_group_end + 1); if (locations) { const r_ssize loc_size = loc_order_end - loc_order_start + 1; r_obj* loc = r_new_integer(loc_size); r_dyn_list_push_back(p_loc, loc); int* v_loc = r_int_begin(loc); const int* v_order_start = v_order + loc_order_start; r_memcpy(v_loc, v_order_start, loc_size * sizeof(*v_loc)); } } if (missing == VCTRS_INTERVAL_MISSING_group && loc_order_missing_end >= loc_order_missing_start) { // Log missing interval at the end const r_ssize loc_group_missing_start = v_order[loc_order_missing_start] - 1; const r_ssize loc_group_missing_end = v_order[loc_order_missing_end] - 1; r_dyn_int_push_back(p_loc_start, loc_group_missing_start + 1); r_dyn_int_push_back(p_loc_end, loc_group_missing_end + 1); if (locations) { const r_ssize loc_size = loc_order_missing_end - loc_order_missing_start + 1; r_obj* loc = r_new_integer(loc_size); r_dyn_list_push_back(p_loc, loc); int* v_loc = r_int_begin(loc); const int* v_order_start = v_order + loc_order_missing_start; r_memcpy(v_loc, v_order_start, loc_size * sizeof(*v_loc)); } } r_obj* key = KEEP_N(r_new_list(2), &n_prot); r_list_poke(key, 0, r_dyn_unwrap(p_loc_start)); r_list_poke(key, 1, r_dyn_unwrap(p_loc_end)); r_obj* key_names = r_new_character(2); r_attrib_poke_names(key, key_names); r_chr_poke(key_names, 0, r_str("start")); r_chr_poke(key_names, 1, r_str("end")); r_init_data_frame(key, p_loc_start->count); r_obj* out = r_null; r_keep_loc out_shelter; KEEP_HERE(out, &out_shelter); ++n_prot; if (locations) { out = r_new_list(2); KEEP_AT(out, out_shelter); r_list_poke(out, 0, key); r_list_poke(out, 1, r_dyn_unwrap(p_loc)); r_obj* out_names = r_new_character(2); r_attrib_poke_names(out, out_names); r_chr_poke(out_names, 0, r_str("key")); r_chr_poke(out_names, 1, r_str("loc")); r_init_data_frame(out, p_loc_start->count); } else { out = key; } FREE(n_prot); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_interval_complement(r_obj* start, r_obj* end, r_obj* lower, r_obj* upper) { return vec_interval_complement(start, end, lower, upper); } static r_obj* vec_interval_complement(r_obj* start, r_obj* end, r_obj* lower, r_obj* upper) { int n_prot = 0; int _; r_obj* ptype = vec_ptype2( start, end, args_start, args_end, r_lazy_null, S3_FALLBACK_false, &_ ); KEEP_N(ptype, &n_prot); ptype = KEEP_N(vec_ptype_finalise(ptype), &n_prot); start = vec_cast_params( start, ptype, args_start, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(start, &n_prot); end = vec_cast_params( end, ptype, args_end, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(end, &n_prot); r_obj* start_proxy = KEEP_N(vec_proxy_compare(start), &n_prot); start_proxy = KEEP_N(obj_encode_utf8(start_proxy), &n_prot); r_obj* end_proxy = KEEP_N(vec_proxy_compare(end), &n_prot); end_proxy = KEEP_N(obj_encode_utf8(end_proxy), &n_prot); const enum vctrs_type type_proxy = vec_proxy_typeof(start_proxy); struct poly_vec* p_poly_start = new_poly_vec(start_proxy, type_proxy); KEEP_N(p_poly_start->shelter, &n_prot); const void* p_start = p_poly_start->p_vec; struct poly_vec* p_poly_end = new_poly_vec(end_proxy, type_proxy); KEEP_N(p_poly_end->shelter, &n_prot); const void* p_end = p_poly_end->p_vec; poly_binary_int_fn* const fn_compare = poly_p_compare_na_equal(type_proxy); bool use_lower = (lower != r_null); bool use_upper = (upper != r_null); bool append_lower = false; bool append_upper = false; const void* p_lower = NULL; if (use_lower) { if (vec_size(lower) != 1) { r_abort("`lower` must be size 1."); } lower = vec_cast_params( lower, ptype, args_lower, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(lower, &n_prot); r_obj* lower_proxy = KEEP_N(vec_proxy_compare(lower), &n_prot); lower_proxy = KEEP_N(obj_encode_utf8(lower_proxy), &n_prot); r_obj* lower_complete = KEEP_N(vec_detect_complete(lower_proxy), &n_prot); if (!r_lgl_get(lower_complete, 0)) { r_abort("`lower` can't contain missing values."); } struct poly_vec* p_poly_lower = new_poly_vec(lower_proxy, type_proxy); KEEP_N(p_poly_lower->shelter, &n_prot); p_lower = p_poly_lower->p_vec; } const void* p_upper = NULL; if (use_upper) { if (vec_size(upper) != 1) { r_abort("`upper` must be size 1."); } upper = vec_cast_params( upper, ptype, args_upper, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(upper, &n_prot); r_obj* upper_proxy = KEEP_N(vec_proxy_compare(upper), &n_prot); upper_proxy = KEEP_N(obj_encode_utf8(upper_proxy), &n_prot); r_obj* upper_complete = KEEP_N(vec_detect_complete(upper_proxy), &n_prot); if (!r_lgl_get(upper_complete, 0)) { r_abort("`upper` can't contain missing values."); } struct poly_vec* p_poly_upper = new_poly_vec(upper_proxy, type_proxy); KEEP_N(p_poly_upper->shelter, &n_prot); p_upper = p_poly_upper->p_vec; } if (use_lower && use_upper && fn_compare(p_lower, 0, p_upper, 0) >= 0) { // Handle the special case of `lower >= upper` up front. // This could also be an error, but we try to be a little flexible. // These can't follow the standard code path because it assumes // `lower < upper`, like the rest of the intervals. // - `lower > upper` is an invalid interval. // - `lower = upper` will always result in an empty complement. r_obj* out = KEEP_N(r_new_list(2), &n_prot); r_list_poke(out, 0, vec_slice_unsafe(start, r_globals.empty_int)); r_list_poke(out, 1, vec_slice_unsafe(end, r_globals.empty_int)); r_obj* out_names = r_new_character(2); r_attrib_poke_names(out, out_names); r_chr_poke(out_names, 0, r_str("start")); r_chr_poke(out_names, 1, r_str("end")); r_init_data_frame(out, 0); FREE(n_prot); return out; } // Merge to sort, remove all missings, and merge all abutting intervals const bool abutting = true; const bool locations = false; r_obj* minimal = KEEP_N(vec_interval_group_info( start, end, abutting, VCTRS_INTERVAL_MISSING_drop, locations ), &n_prot); const int* v_loc_minimal_start = r_int_cbegin(r_list_get(minimal, 0)); const int* v_loc_minimal_end = r_int_cbegin(r_list_get(minimal, 1)); r_ssize size = vec_size(minimal); // Because we have the minimal interval information (i.e. no intervals overlap // or abut!), we know that the complement takes exactly `size - 1` space if // `lower` and `upper` aren't used. // // If `lower` is used, it can at most add one more interval, and // requires one more `loc_end` location. No `loc_start` location is needed // because we just append `lower` to the front if needed. // // If `upper` is used, it can at most add one more interval, and // requires one more `loc_start` location. No `loc_end` location is needed // because we just append `upper` to the end if needed. const r_ssize max_size_start = r_ssize_max(size - 1 + use_upper, 0); const r_ssize max_size_end = r_ssize_max(size - 1 + use_lower, 0); r_obj* loc_start = KEEP_N(r_alloc_integer(max_size_start), &n_prot); int* v_loc_start = r_int_begin(loc_start); r_ssize i_start = 0; r_obj* loc_end = KEEP_N(r_alloc_integer(max_size_end), &n_prot); int* v_loc_end = r_int_begin(loc_end); r_ssize i_end = 0; r_ssize i = 0; r_ssize loc_lower_is_after_start_of = -1; r_ssize loc_lower_is_before_end_of = 0; if (use_lower) { // Shift `i` forward to the first interval completely past `lower`. // Track information about where `lower` is in relation to the intervals. for (; i < size; ++i) { const r_ssize loc_start = v_loc_minimal_start[i] - 1; const r_ssize loc_end = v_loc_minimal_end[i] - 1; if (fn_compare(p_lower, 0, p_end, loc_end) == 1) { ++loc_lower_is_before_end_of; ++loc_lower_is_after_start_of; } else if (fn_compare(p_lower, 0, p_start, loc_start) >= 0) { ++loc_lower_is_after_start_of; } else { break; } } } r_ssize loc_upper_is_after_start_of = size - 1; r_ssize loc_upper_is_before_end_of = size; if (use_upper) { // Shift `size` backwards to the first interval that is completely before `upper`. // Track information about where `upper` is in relation to the intervals. for (; size - 1 >= 0; --size) { const r_ssize loc_start = v_loc_minimal_start[size - 1] - 1; const r_ssize loc_end = v_loc_minimal_end[size - 1] - 1; if (fn_compare(p_upper, 0, p_start, loc_start) == -1) { --loc_upper_is_before_end_of; --loc_upper_is_after_start_of; } else if (fn_compare(p_upper, 0, p_end, loc_end) <= 0) { --loc_upper_is_before_end_of; } else { break; } } } const bool has_intervals_between = i < size; if (use_lower && has_intervals_between) { r_ssize loc_gap_start = -1; if (loc_lower_is_before_end_of == loc_lower_is_after_start_of) { // `lower` is in the middle of an interval, use the end of that interval loc_gap_start = v_loc_minimal_end[loc_lower_is_before_end_of] - 1; } else { // `lower` is not within an interval, use `lower` append_lower = true; } // The next start location is the end of the interval that `loc_gap_start` // lines up with. We know this start location exists because of // `has_intervals_between`. const r_ssize loc_gap_end = v_loc_minimal_start[loc_lower_is_after_start_of + 1] - 1; if (!append_lower) { v_loc_start[i_start] = loc_gap_start + 1; ++i_start; } v_loc_end[i_end] = loc_gap_end + 1; ++i_end; } r_ssize loc_previous_end = -1; if (i < size) { // Set information about first usable interval loc_previous_end = v_loc_minimal_end[i] - 1; ++i; } for (; i < size; ++i) { const r_ssize loc_elt_start = v_loc_minimal_start[i] - 1; const r_ssize loc_elt_end = v_loc_minimal_end[i] - 1; const r_ssize loc_gap_start = loc_previous_end; const r_ssize loc_gap_end = loc_elt_start; v_loc_start[i_start] = loc_gap_start + 1; ++i_start; v_loc_end[i_end] = loc_gap_end + 1; ++i_end; loc_previous_end = loc_elt_end; } if (use_upper && has_intervals_between) { // The previous end location is the start of the interval that `loc_gap_end` // lines up with. We know this end location exists because of // `has_intervals_between`. const r_ssize loc_gap_start = v_loc_minimal_end[loc_upper_is_before_end_of - 1] - 1; r_ssize loc_gap_end = -1; if (loc_upper_is_before_end_of == loc_upper_is_after_start_of) { // `upper` is in the middle of an interval, use the start of that interval loc_gap_end = v_loc_minimal_start[loc_upper_is_before_end_of] - 1; } else { // `upper` is not within an interval, use `upper` append_upper = true; } v_loc_start[i_start] = loc_gap_start + 1; ++i_start; if (!append_upper) { v_loc_end[i_end] = loc_gap_end + 1; ++i_end; } } if (use_lower && use_upper && !has_intervals_between) { /* * This branch handles the case when `lower` and `upper` have no full * intervals between them. They can be in any of these states. In * particular, if they are in the same interval together, then there is * no complement. * * | [ ) [ ) | append_lower = append_upper = true. complement: -> * | [ ) [ ) | append_upper = true. complement: ) -> * | [ ) [ ) | append_lower = true. complement: -> [ * | [ ) [ ) | both in separate intervals. complement: ) -> [ * | [ ) [ ) | both in same interval! complement: none * | [ ) [ ) | both in same interval! complement: none */ bool lower_in_interval = false; bool upper_in_interval = false; r_ssize loc_gap_start = -1; if (loc_lower_is_before_end_of == loc_lower_is_after_start_of) { lower_in_interval = true; loc_gap_start = v_loc_minimal_end[loc_lower_is_before_end_of] - 1; } else { append_lower = true; } r_ssize loc_gap_end = -1; if (loc_upper_is_before_end_of == loc_upper_is_after_start_of) { upper_in_interval = true; loc_gap_end = v_loc_minimal_start[loc_upper_is_before_end_of] - 1; } else { append_upper = true; } const bool lower_and_upper_in_same_interval = lower_in_interval && upper_in_interval && (loc_lower_is_before_end_of == loc_upper_is_before_end_of); if (!append_lower && !lower_and_upper_in_same_interval) { v_loc_start[i_start] = loc_gap_start + 1; ++i_start; } if (!append_upper && !lower_and_upper_in_same_interval) { v_loc_end[i_end] = loc_gap_end + 1; ++i_end; } } // This should essentially be free. // It will only ever shrink `loc_start` and `loc_end`. loc_start = KEEP_N(r_int_resize(loc_start, i_start), &n_prot); loc_end = KEEP_N(r_int_resize(loc_end, i_end), &n_prot); // Slice `end` to get new starts and `start` to get new ends! r_obj* out_start = KEEP_N(vec_slice_unsafe(end, loc_start), &n_prot); r_obj* out_end = KEEP_N(vec_slice_unsafe(start, loc_end), &n_prot); if (append_lower || append_upper) { r_obj* args = KEEP_N(r_new_list(2), &n_prot); const struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_none, .fn = R_NilValue }; if (append_lower) { // Push `lower` to the start of the new starts r_list_poke(args, 0, lower); r_list_poke(args, 1, out_start); out_start = KEEP_N(vec_c( args, ptype, R_NilValue, &name_repair_opts, vec_args.empty, r_lazy_null ), &n_prot); } if (append_upper) { // Push `upper` to the end of the new ends r_list_poke(args, 0, out_end); r_list_poke(args, 1, upper); out_end = KEEP_N(vec_c( args, ptype, R_NilValue, &name_repair_opts, vec_args.empty, r_lazy_null ), &n_prot); } } r_obj* out = KEEP_N(r_new_list(2), &n_prot); r_list_poke(out, 0, out_start); r_list_poke(out, 1, out_end); r_obj* out_names = r_new_character(2); r_attrib_poke_names(out, out_names); r_chr_poke(out_names, 0, r_str("start")); r_chr_poke(out_names, 1, r_str("end")); r_init_data_frame(out, vec_size(out_start)); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- r_obj* ffi_interval_locate_containers(r_obj* start, r_obj* end) { return vec_interval_locate_containers(start, end); } static r_obj* vec_interval_locate_containers(r_obj* start, r_obj* end) { int n_prot = 0; int _; r_obj* ptype = vec_ptype2( start, end, args_start, args_end, r_lazy_null, S3_FALLBACK_false, &_ ); KEEP_N(ptype, &n_prot); ptype = KEEP_N(vec_ptype_finalise(ptype), &n_prot); start = vec_cast_params( start, ptype, args_start, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(start, &n_prot); end = vec_cast_params( end, ptype, args_end, vec_args.empty, r_lazy_null, S3_FALLBACK_false ); KEEP_N(end, &n_prot); r_obj* start_proxy = KEEP_N(vec_proxy_compare(start), &n_prot); start_proxy = KEEP_N(obj_encode_utf8(start_proxy), &n_prot); r_obj* end_proxy = KEEP_N(vec_proxy_compare(end), &n_prot); end_proxy = KEEP_N(obj_encode_utf8(end_proxy), &n_prot); const enum vctrs_type type_proxy = vec_proxy_typeof(start_proxy); struct poly_vec* p_poly_start = new_poly_vec(start_proxy, type_proxy); KEEP_N(p_poly_start->shelter, &n_prot); const void* p_start = p_poly_start->p_vec; struct poly_vec* p_poly_end = new_poly_vec(end_proxy, type_proxy); KEEP_N(p_poly_end->shelter, &n_prot); const void* p_end = p_poly_end->p_vec; poly_binary_int_fn* const fn_compare = poly_p_compare_na_equal(type_proxy); poly_unary_bool_fn* const fn_is_missing = poly_p_is_missing(type_proxy); const r_ssize size = vec_size(start_proxy); if (size != vec_size(end_proxy)) { r_abort("`start` and `end` must have the same size."); } // Order is computed with the first column in ascending order, and the // second column in descending order. This makes it easy to find the // containers, as any time we detect something that isn't contained in the // current container, it must be a new container. Missing intervals are up // front for easy detection. We document that we assume that if `start` is // missing, then `end` is missing too. r_obj* direction = KEEP_N(r_new_character(2), &n_prot); r_chr_poke(direction, 0, r_str("asc")); r_chr_poke(direction, 1, r_str("desc")); r_obj* na_value = KEEP_N(r_new_character(2), &n_prot); r_chr_poke(na_value, 0, r_str("smallest")); r_chr_poke(na_value, 1, r_str("largest")); r_obj* order = interval_order( start_proxy, end_proxy, direction, na_value, size ); KEEP_N(order, &n_prot); const int* v_order = r_int_cbegin(order); // Assume that half the intervals are containers. // This is probably a little high. // Apply a minimum size to avoid a size of zero. const r_ssize initial_size = r_ssize_max(size / 2, 1); struct r_dyn_array* p_loc = r_new_dyn_vector(R_TYPE_integer, initial_size); KEEP_N(p_loc->shelter, &n_prot); r_ssize i = 0; bool any_missing = false; // Move `i` past any missing intervals (they are at the front), // recording if there are any missing intervals for later. Only need to check // missingness of `start`, because we document that we assume that `end` // is missing if `start` is missing. for (; i < size; ++i) { const r_ssize loc = v_order[i] - 1; if (!fn_is_missing(p_start, loc)) { break; } any_missing = true; } r_ssize loc_container = -1; if (i < size) { // Set information about first usable container const r_ssize loc = v_order[i] - 1; loc_container = loc; r_dyn_int_push_back(p_loc, loc_container + 1); ++i; } for (; i < size; ++i) { const r_ssize loc = v_order[i] - 1; if ((fn_compare(p_start, loc_container, p_start, loc) != 1) && (fn_compare(p_end, loc_container, p_end, loc) != -1)) { // Still in current container continue; } // New container loc_container = loc; r_dyn_int_push_back(p_loc, loc_container + 1); } if (any_missing) { // Push missing container as the last container. // We know missings are at the front, so just use the first order value // as the location. This matches ascending ordering with missing values // at the end, and breaking ties with the first missing location we saw. r_dyn_int_push_back(p_loc, v_order[0]); } r_obj* out = r_dyn_unwrap(p_loc); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- /* * `interval_order()` orders the `start` and `end` values of a vector of * intervals. We document that we make the assumption that if `start` is * missing, then `end` is also missing. We also document the assumption that * partially missing (i.e. incomplete but not missing) observations are not * allowed in either bound. */ static inline r_obj* interval_order(r_obj* start, r_obj* end, r_obj* direction, r_obj* na_value, r_ssize size) { // Put them in a data frame to compute joint ordering r_obj* df = KEEP(r_new_list(2)); r_list_poke(df, 0, start); r_list_poke(df, 1, end); r_obj* df_names = r_new_character(2); r_attrib_poke_names(df, df_names); r_chr_poke(df_names, 0, r_str("start")); r_chr_poke(df_names, 1, r_str("end")); r_init_data_frame(df, size); const bool nan_distinct = false; r_obj* chr_proxy_collate = r_null; r_obj* out = vec_order( df, direction, na_value, nan_distinct, chr_proxy_collate ); FREE(1); return out; } // ----------------------------------------------------------------------------- static inline enum vctrs_interval_missing parse_missing(r_obj* missing) { if (!r_is_string(missing)) { r_abort("`missing` must be a string."); } const char* c_missing = r_chr_get_c_string(missing, 0); if (!strcmp(c_missing, "group")) return VCTRS_INTERVAL_MISSING_group; if (!strcmp(c_missing, "drop")) return VCTRS_INTERVAL_MISSING_drop; r_abort("`missing` must be either \"group\" or \"drop\"."); } // ----------------------------------------------------------------------------- void vctrs_init_interval(r_obj* ns) { args_start_ = new_wrapper_arg(NULL, "start"); args_end_ = new_wrapper_arg(NULL, "end"); args_lower_ = new_wrapper_arg(NULL, "lower"); args_upper_ = new_wrapper_arg(NULL, "upper"); } vctrs/src/assert.c0000644000176200001440000002131415157322033013651 0ustar liggesusers#include "vctrs.h" r_obj* ffi_obj_is_vector(r_obj* x) { // Not exposed at the R level for single vector checks const enum vctrs_allow_null allow_null = VCTRS_ALLOW_NULL_no; return r_lgl(obj_is_vector(x, allow_null)); } r_obj* ffi_obj_check_vector(r_obj* x, r_obj* frame) { struct r_lazy call = { .x = r_syms.call, .env = frame }; struct r_lazy arg_lazy = { .x = r_syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); // Not exposed at the R level for single vector checks const enum vctrs_allow_null allow_null = VCTRS_ALLOW_NULL_no; obj_check_vector(x, allow_null, &arg, call); return r_null; } r_obj* ffi_list_all_vectors(r_obj* ffi_xs, r_obj* ffi_allow_null, r_obj* ffi_frame) { obj_check_list(ffi_xs, vec_args.x, (struct r_lazy) { ffi_frame, r_null }); const enum vctrs_allow_null allow_null = arg_as_allow_null(ffi_allow_null, "allow_null"); return r_lgl(list_all_vectors(ffi_xs, allow_null)); } bool list_all_vectors(r_obj* xs, enum vctrs_allow_null allow_null) { if (r_typeof(xs) != R_TYPE_list) { r_stop_unexpected_type(r_typeof(xs)); } const r_ssize size = r_length(xs); r_obj* const* v_xs = r_list_cbegin(xs); for (r_ssize i = 0; i < size; ++i) { r_obj* x = v_xs[i]; if (!obj_is_vector(x, allow_null)) { return false; } } return true; } r_obj* ffi_list_check_all_vectors(r_obj* ffi_xs, r_obj* ffi_allow_null, r_obj* ffi_frame) { // This is an internal error obj_check_list(ffi_xs, vec_args.x, (struct r_lazy) {.x = ffi_frame, .env = r_null }); struct r_lazy call = { .x = r_syms.call, .env = ffi_frame }; struct r_lazy xs_arg_lazy = { .x = syms.arg, .env = ffi_frame }; struct vctrs_arg xs_arg = new_lazy_arg(&xs_arg_lazy); const enum vctrs_allow_null allow_null = arg_as_allow_null(ffi_allow_null, "allow_null"); list_check_all_vectors(ffi_xs, allow_null, &xs_arg, call); return r_null; } void list_check_all_vectors( r_obj* xs, enum vctrs_allow_null allow_null, struct vctrs_arg* p_xs_arg, struct r_lazy call ) { r_ssize i = 0; struct vctrs_arg* x_arg = new_subscript_arg_vec(p_xs_arg, xs, &i); KEEP(x_arg->shelter); const r_ssize xs_size = r_length(xs); r_obj* const* v_xs = r_list_cbegin(xs); for (; i < xs_size; ++i) { r_obj* x = v_xs[i]; obj_check_vector(x, allow_null, x_arg, call); } FREE(1); } r_obj* ffi_vec_check_size(r_obj* x, r_obj* ffi_size, r_obj* frame) { struct r_lazy call = { .x = r_syms.call, .env = frame }; struct r_lazy arg_lazy = { .x = r_syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); const r_ssize size = r_arg_as_ssize(ffi_size, "size"); // Not exposed at the R level for single vector checks const enum vctrs_allow_null allow_null = VCTRS_ALLOW_NULL_no; vec_check_size(x, size, allow_null, &arg, call); return r_null; } r_obj* ffi_list_all_size( r_obj* ffi_xs, r_obj* ffi_size, r_obj* ffi_allow_null, r_obj* ffi_frame ) { struct r_lazy error_call = {.x = ffi_frame, .env = r_null }; // This is an internal error obj_check_list(ffi_xs, vec_args.x, error_call); const r_ssize size = r_arg_as_ssize(ffi_size, "size"); const enum vctrs_allow_null allow_null = arg_as_allow_null(ffi_allow_null, "allow_null"); return r_lgl(list_all_size(ffi_xs, size, allow_null, vec_args.x, error_call)); } bool list_all_size( r_obj* xs, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_xs_arg, struct r_lazy call ) { if (r_typeof(xs) != R_TYPE_list) { r_stop_unexpected_type(r_typeof(xs)); } r_ssize i = 0; const r_ssize xs_size = r_length(xs); r_obj* xs_names = KEEP(r_names(xs)); r_obj* const* v_xs = r_list_cbegin(xs); struct vctrs_arg* p_x_arg = new_subscript_arg(p_xs_arg, xs_names, xs_size, &i); KEEP(p_x_arg->shelter); bool out = true; for (; i < xs_size; ++i) { r_obj* x = v_xs[i]; if (!vec_is_size(x, size, allow_null, p_x_arg, call)) { out = false; break; } } FREE(2); return out; } r_obj* ffi_list_check_all_size( r_obj* ffi_xs, r_obj* ffi_size, r_obj* ffi_allow_null, r_obj* ffi_frame ) { // This is an internal error obj_check_list(ffi_xs, vec_args.x, (struct r_lazy) {.x = ffi_frame, .env = r_null }); struct r_lazy xs_arg_lazy = { .x = syms.arg, .env = ffi_frame }; struct vctrs_arg xs_arg = new_lazy_arg(&xs_arg_lazy); struct r_lazy call = { .x = r_syms.call, .env = ffi_frame }; const r_ssize size = r_arg_as_ssize(ffi_size, "size"); const enum vctrs_allow_null allow_null = arg_as_allow_null(ffi_allow_null, "allow_null"); list_check_all_size(ffi_xs, size, allow_null, &xs_arg, call); return r_null; } void list_check_all_size( r_obj* xs, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_xs_arg, struct r_lazy call ) { if (r_typeof(xs) != R_TYPE_list) { r_stop_unexpected_type(r_typeof(xs)); } r_ssize i = 0; const r_ssize xs_size = r_length(xs); r_obj* xs_names = KEEP(r_names(xs)); r_obj* const* v_xs = r_list_cbegin(xs); struct vctrs_arg* p_x_arg = new_subscript_arg(p_xs_arg, xs_names, xs_size, &i); KEEP(p_x_arg->shelter); for (; i < xs_size; ++i) { vec_check_size(v_xs[i], size, allow_null, p_x_arg, call); } FREE(2); } r_obj* ffi_obj_is_list(r_obj* x) { return r_lgl(obj_is_list(x)); } r_no_return void stop_non_list_type( r_obj* x, struct vctrs_arg* arg, struct r_lazy call ) { r_eval_with_xyz(KEEP(r_parse("stop_non_list_type(x, y, z)")), x, KEEP(vctrs_arg(arg)), KEEP(r_lazy_eval(call)), vctrs_ns_env); r_stop_unreachable(); } r_obj* ffi_check_list(r_obj* x, r_obj* frame) { struct r_lazy call = { .x = r_syms.call, .env = frame }; struct r_lazy arg_data = { .x = syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_data); obj_check_list(x, &arg, call); return r_null; } r_obj* ffi_vec_check_recyclable(r_obj* x, r_obj* ffi_size, r_obj* frame) { struct r_lazy call = { .x = r_syms.call, .env = frame }; struct r_lazy arg_lazy = { .x = r_syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); const r_ssize size = r_arg_as_ssize(ffi_size, "size"); // Not exposed at the R level for single vector checks const enum vctrs_allow_null allow_null = VCTRS_ALLOW_NULL_no; vec_check_recyclable(x, size, allow_null, &arg, call); return r_null; } r_obj* ffi_list_all_recyclable( r_obj* ffi_xs, r_obj* ffi_size, r_obj* ffi_allow_null, r_obj* ffi_frame ) { struct r_lazy error_call = {.x = ffi_frame, .env = r_null }; // This is an internal error obj_check_list(ffi_xs, vec_args.x, error_call); const r_ssize size = r_arg_as_ssize(ffi_size, "size"); const enum vctrs_allow_null allow_null = arg_as_allow_null(ffi_allow_null, "allow_null"); return r_lgl(list_all_recyclable(ffi_xs, size, allow_null, vec_args.x, error_call)); } bool list_all_recyclable( r_obj* xs, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_xs_arg, struct r_lazy call ) { if (r_typeof(xs) != R_TYPE_list) { r_stop_unexpected_type(r_typeof(xs)); } r_ssize i = 0; const r_ssize xs_size = r_length(xs); r_obj* xs_names = KEEP(r_names(xs)); r_obj* const* v_xs = r_list_cbegin(xs); struct vctrs_arg* p_x_arg = new_subscript_arg(p_xs_arg, xs_names, xs_size, &i); KEEP(p_x_arg->shelter); bool out = true; for (; i < xs_size; ++i) { r_obj* x = v_xs[i]; if (!vec_is_recyclable(x, size, allow_null, p_x_arg, call)) { out = false; break; } } FREE(2); return out; } r_obj* ffi_list_check_all_recyclable( r_obj* ffi_xs, r_obj* ffi_size, r_obj* ffi_allow_null, r_obj* ffi_frame ) { // This is an internal error obj_check_list(ffi_xs, vec_args.x, (struct r_lazy) {.x = ffi_frame, .env = r_null }); struct r_lazy xs_arg_lazy = { .x = syms.arg, .env = ffi_frame }; struct vctrs_arg xs_arg = new_lazy_arg(&xs_arg_lazy); struct r_lazy call = { .x = r_syms.call, .env = ffi_frame }; const r_ssize size = r_arg_as_ssize(ffi_size, "size"); const enum vctrs_allow_null allow_null = arg_as_allow_null(ffi_allow_null, "allow_null"); list_check_all_recyclable(ffi_xs, size, allow_null, &xs_arg, call); return r_null; } void list_check_all_recyclable( r_obj* xs, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_xs_arg, struct r_lazy call ) { if (r_typeof(xs) != R_TYPE_list) { r_stop_unexpected_type(r_typeof(xs)); } r_ssize i = 0; const r_ssize xs_size = r_length(xs); r_obj* xs_names = KEEP(r_names(xs)); r_obj* const* v_xs = r_list_cbegin(xs); struct vctrs_arg* p_x_arg = new_subscript_arg(p_xs_arg, xs_names, xs_size, &i); KEEP(p_x_arg->shelter); for (; i < xs_size; ++i) { vec_check_recyclable(v_xs[i], size, allow_null, p_x_arg, call); } FREE(2); } vctrs/src/runs.h0000644000176200001440000000030414363556517013356 0ustar liggesusers#ifndef VCTRS_RUNS_H #define VCTRS_RUNS_H #include "vctrs-core.h" r_obj* vec_identify_runs(r_obj* x, struct r_lazy error_call); r_obj* vec_run_sizes(r_obj* x, struct r_lazy error_call); #endif vctrs/src/match.c0000644000176200001440000027506615120513137013460 0ustar liggesusers#include "vctrs.h" // ----------------------------------------------------------------------------- enum vctrs_multiple { VCTRS_MULTIPLE_all = 0, VCTRS_MULTIPLE_any = 1, VCTRS_MULTIPLE_first = 2, VCTRS_MULTIPLE_last = 3, // Deprecated in favor of `relationship` VCTRS_MULTIPLE_warning = 4, VCTRS_MULTIPLE_error = 5 }; enum vctrs_relationship { VCTRS_RELATIONSHIP_none = 0, VCTRS_RELATIONSHIP_one_to_one = 1, VCTRS_RELATIONSHIP_one_to_many = 2, VCTRS_RELATIONSHIP_many_to_one = 3, VCTRS_RELATIONSHIP_many_to_many = 4, VCTRS_RELATIONSHIP_warn_many_to_many = 5 }; enum vctrs_filter { VCTRS_FILTER_none = 0, VCTRS_FILTER_min = 1, VCTRS_FILTER_max = 2 }; enum vctrs_ops { VCTRS_OPS_eq = 0, VCTRS_OPS_gt = 1, VCTRS_OPS_gte = 2, VCTRS_OPS_lt = 3, VCTRS_OPS_lte = 4 }; enum vctrs_incomplete_action { VCTRS_INCOMPLETE_ACTION_compare = 0, VCTRS_INCOMPLETE_ACTION_match = 1, VCTRS_INCOMPLETE_ACTION_value = 2, VCTRS_INCOMPLETE_ACTION_drop = 3, VCTRS_INCOMPLETE_ACTION_error = 4 }; struct vctrs_incomplete { enum vctrs_incomplete_action action; int value; }; enum vctrs_no_match_action { VCTRS_NO_MATCH_ACTION_drop = 0, VCTRS_NO_MATCH_ACTION_error = 1, VCTRS_NO_MATCH_ACTION_value = 2 }; struct vctrs_no_match { enum vctrs_no_match_action action; int value; }; enum vctrs_remaining_action { VCTRS_REMAINING_ACTION_drop = 0, VCTRS_REMAINING_ACTION_error = 1, VCTRS_REMAINING_ACTION_value = 2 }; struct vctrs_remaining { enum vctrs_remaining_action action; int value; }; struct vctrs_match_bounds { r_ssize lower; r_ssize upper; }; #define SIGNAL_NO_MATCH r_globals.na_int #define SIGNAL_INCOMPLETE -1 // ----------------------------------------------------------------------------- #include "decl/match-decl.h" // ----------------------------------------------------------------------------- // [[ register() ]] r_obj* ffi_locate_matches(r_obj* needles, r_obj* haystack, r_obj* condition, r_obj* filter, r_obj* incomplete, r_obj* no_match, r_obj* remaining, r_obj* multiple, r_obj* relationship, r_obj* nan_distinct, r_obj* chr_proxy_collate, r_obj* needles_arg, r_obj* haystack_arg, r_obj* frame) { struct r_lazy error_call = { .x = r_syms.error_call, .env = frame }; struct r_lazy internal_call = { .x = frame, .env = r_null }; const struct vctrs_incomplete c_incomplete = parse_incomplete(incomplete, internal_call); const struct vctrs_no_match c_no_match = parse_no_match(no_match, internal_call); const struct vctrs_remaining c_remaining = parse_remaining(remaining, internal_call); const enum vctrs_multiple c_multiple = parse_multiple(multiple, internal_call); const enum vctrs_relationship c_relationship = parse_relationship(relationship, internal_call); const bool c_nan_distinct = r_arg_as_bool(nan_distinct, "nan_distinct"); struct vctrs_arg c_needles_arg = vec_as_arg(needles_arg); struct vctrs_arg c_haystack_arg = vec_as_arg(haystack_arg); return vec_locate_matches( needles, haystack, condition, filter, &c_incomplete, &c_no_match, &c_remaining, c_multiple, c_relationship, c_nan_distinct, chr_proxy_collate, &c_needles_arg, &c_haystack_arg, error_call ); } static r_obj* vec_locate_matches(r_obj* needles, r_obj* haystack, r_obj* condition, r_obj* filter, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, bool nan_distinct, r_obj* chr_proxy_collate, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call) { int n_prot = 0; int _; r_obj* ptype = KEEP_N(vec_ptype2( needles, haystack, needles_arg, haystack_arg, error_call, S3_FALLBACK_false, &_ ), &n_prot); ptype = KEEP_N(vec_ptype_finalise(ptype), &n_prot); needles = KEEP_N(vec_cast_params( needles, ptype, needles_arg, vec_args.empty, error_call, S3_FALLBACK_false ), &n_prot); haystack = KEEP_N(vec_cast_params( haystack, ptype, haystack_arg, vec_args.empty, error_call, S3_FALLBACK_false ), &n_prot); r_ssize size_needles = vec_size(needles); r_ssize size_haystack = vec_size(haystack); // Support non-data frame types by wrapping them in a 1-col data frame if (!is_data_frame(needles)) { needles = KEEP_N(r_list(needles), &n_prot); haystack = KEEP_N(r_list(haystack), &n_prot); r_obj* names = KEEP_N(r_chr("x"), &n_prot); r_attrib_poke_names(needles, names); r_attrib_poke_names(haystack, names); r_init_data_frame(needles, size_needles); r_init_data_frame(haystack, size_haystack); } r_ssize n_cols = r_length(needles); enum vctrs_ops* v_ops = (enum vctrs_ops*) R_alloc(n_cols, sizeof(enum vctrs_ops)); parse_condition(condition, n_cols, v_ops); enum vctrs_filter* v_filters = (enum vctrs_filter*) R_alloc(n_cols, sizeof(enum vctrs_filter)); parse_filter(filter, n_cols, v_filters); bool any_filters = false; for (r_ssize i = 0; i < n_cols; ++i) { if (v_filters[i] != VCTRS_FILTER_none) { any_filters = true; break; } } if (n_cols == 0) { // If there are no columns, this operation isn't well defined. r_abort_lazy_call(error_call, "Must have at least 1 column to match on."); } // Compute the locations of incomplete values per column since computing // joint ranks per column is going to replace the incomplete values with // integer ranks r_obj* needles_complete = df_detect_complete_by_col(needles, size_needles, n_cols); KEEP_N(needles_complete, &n_prot); r_obj* haystack_complete = df_detect_complete_by_col(haystack, size_haystack, n_cols); KEEP_N(haystack_complete, &n_prot); // Compute joint xtfrm to simplify each column down to an integer vector r_obj* args = KEEP_N(df_joint_xtfrm_by_col( needles, haystack, size_needles, size_haystack, n_cols, nan_distinct, chr_proxy_collate ), &n_prot); needles = r_list_get(args, 0); haystack = r_list_get(args, 1); r_obj* out = df_locate_matches( needles, haystack, needles_complete, haystack_complete, size_needles, size_haystack, incomplete, no_match, remaining, multiple, relationship, any_filters, v_filters, v_ops, needles_arg, haystack_arg, error_call ); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- static r_obj* df_locate_matches(r_obj* needles, r_obj* haystack, r_obj* needles_complete, r_obj* haystack_complete, r_ssize size_needles, r_ssize size_haystack, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call) { int n_prot = 0; r_obj* o_needles = KEEP_N(vec_order( needles, chrs_asc, chrs_smallest, true, r_null ), &n_prot); const int* v_o_needles = r_int_cbegin(o_needles); r_obj* container_info = KEEP_N(compute_nesting_container_info( haystack, size_haystack, v_ops ), &n_prot); r_obj* o_haystack = r_list_get(container_info, 0); const int* v_o_haystack = r_int_cbegin(o_haystack); // Will be `integer()` if no container ids are required. // In that case, `n_containers == 1`. r_obj* container_ids = r_list_get(container_info, 1); const int* v_container_ids = r_int_cbegin(container_ids); const int n_containers = r_as_int(r_list_get(container_info, 2)); const bool any_non_equi = r_as_bool(r_list_get(container_info, 3)); // In the case of possible multiple matches that fall in separate // nesting containers, allocate ~20% extra room const r_ssize initial_capacity = (n_containers == 1) ? size_needles : r_double_as_ssize(r_ssize_as_double(size_needles) * 1.2); struct r_dyn_array* p_loc_first_match_o_haystack = r_new_dyn_vector(R_TYPE_integer, initial_capacity); KEEP_N(p_loc_first_match_o_haystack->shelter, &n_prot); { // Temporary unstable pointer int* v_loc_first_match_o_haystack = (int*) r_dyn_begin(p_loc_first_match_o_haystack); for (r_ssize i = 0; i < size_needles; ++i) { // Initialize to no match everywhere, no need to initialize extra buffer v_loc_first_match_o_haystack[i] = SIGNAL_NO_MATCH; } p_loc_first_match_o_haystack->count = size_needles; } // If we can skip, `size_match` will always be `1` const bool skip_size_match = (multiple == VCTRS_MULTIPLE_any); struct r_dyn_array* p_size_match = NULL; if (!skip_size_match) { p_size_match = r_new_dyn_vector(R_TYPE_integer, initial_capacity); KEEP_N(p_size_match->shelter, &n_prot); int* v_size_match = (int*) r_dyn_begin(p_size_match); for (r_ssize i = 0; i < size_needles; ++i) { // No need to initialize extra buffer v_size_match[i] = 1; } p_size_match->count = size_needles; } // If we can skip, `loc_needles` will always be an increasing sequence of values const bool skip_loc_needles = (multiple == VCTRS_MULTIPLE_any); struct r_dyn_array* p_loc_needles = NULL; if (!skip_loc_needles) { p_loc_needles = r_new_dyn_vector(R_TYPE_integer, initial_capacity); KEEP_N(p_loc_needles->shelter, &n_prot); int* v_loc_needles = (int*) r_dyn_begin(p_loc_needles); for (r_ssize i = 0; i < size_needles; ++i) { // No need to initialize extra buffer v_loc_needles[i] = i; } p_loc_needles->count = size_needles; } // When filtering, we find the filtered match for a particular needle in each // nesting container of the haystack. `v_loc_filter_match_o_haystack` // keeps track of the overall filtered match loc for a needle across all // nesting containers in the haystack. const bool has_loc_filter_match_o_haystack = any_filters && (multiple == VCTRS_MULTIPLE_all || multiple == VCTRS_MULTIPLE_warning || multiple == VCTRS_MULTIPLE_error || multiple == VCTRS_MULTIPLE_first || multiple == VCTRS_MULTIPLE_last); int* v_loc_filter_match_o_haystack = NULL; if (has_loc_filter_match_o_haystack) { r_obj* loc_filter_match_o_haystack = KEEP_N(r_alloc_integer(size_needles), &n_prot); v_loc_filter_match_o_haystack = r_int_begin(loc_filter_match_o_haystack); } struct poly_vec* p_poly_needles = new_poly_vec(needles, VCTRS_TYPE_dataframe); KEEP_N(p_poly_needles->shelter, &n_prot); const struct poly_df_data* p_needles = (const struct poly_df_data*) p_poly_needles->p_vec; struct poly_vec* p_poly_haystack = new_poly_vec(haystack, VCTRS_TYPE_dataframe); KEEP_N(p_poly_haystack->shelter, &n_prot); const struct poly_df_data* p_haystack = (const struct poly_df_data*) p_poly_haystack->p_vec; const struct poly_vec* p_poly_needles_complete = new_poly_vec(needles_complete, VCTRS_TYPE_dataframe); KEEP_N(p_poly_needles_complete->shelter, &n_prot); const struct poly_df_data* p_needles_complete = (const struct poly_df_data*) p_poly_needles_complete->p_vec; struct poly_vec* p_poly_haystack_complete = new_poly_vec(haystack_complete, VCTRS_TYPE_dataframe); KEEP_N(p_poly_haystack_complete->shelter, &n_prot); const struct poly_df_data* p_haystack_complete = (const struct poly_df_data*) p_poly_haystack_complete->p_vec; if (size_needles > 0) { // Recursion requires at least 1 row in needles. // In the case of size 0 needles, there is nothing to do, but this avoids // a segfault. const r_ssize col = 0; const r_ssize loc_lower_bound_o_needles = 0; const r_ssize loc_upper_bound_o_needles = size_needles - 1; const r_ssize loc_lower_bound_o_haystack = 0; const r_ssize loc_upper_bound_o_haystack = size_haystack - 1; if (n_containers == 1) { df_locate_matches_recurse( col, loc_lower_bound_o_needles, loc_upper_bound_o_needles, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } else { df_locate_matches_with_containers( n_containers, v_container_ids, col, loc_lower_bound_o_needles, loc_upper_bound_o_needles, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } } r_obj* out = KEEP_N(expand_compact_indices( v_o_haystack, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, skip_size_match, skip_loc_needles, incomplete, no_match, remaining, multiple, relationship, size_needles, size_haystack, any_non_equi, has_loc_filter_match_o_haystack, v_filters, v_loc_filter_match_o_haystack, p_haystack, needles_arg, haystack_arg, error_call ), &n_prot); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- static void df_locate_matches_recurse(r_ssize col, r_ssize loc_lower_bound_o_needles, r_ssize loc_upper_bound_o_needles, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack, const struct poly_df_data* p_needles, const struct poly_df_data* p_haystack, const struct poly_df_data* p_needles_complete, const struct poly_df_data* p_haystack_complete, const int* v_o_needles, const int* v_o_haystack, const struct vctrs_incomplete* incomplete, enum vctrs_multiple multiple, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, int* v_loc_filter_match_o_haystack) { const enum vctrs_ops op = v_ops[col]; const enum vctrs_filter filter = v_filters[col]; const r_ssize n_col = p_needles->n_col; const int* v_needles = (const int*) p_needles->v_col_ptr[col]; const int* v_needles_complete = (const int*) p_needles_complete->v_col_ptr[col]; const int* v_haystack = (const int*) p_haystack->v_col_ptr[col]; const int* v_haystack_complete = (const int*) p_haystack_complete->v_col_ptr[col]; const r_ssize loc_mid_bound_o_needles = midpoint(loc_lower_bound_o_needles, loc_upper_bound_o_needles); const r_ssize loc_mid_bound_needles = v_o_needles[loc_mid_bound_o_needles] - 1; const int val_needle = v_needles[loc_mid_bound_needles]; const bool needle_is_complete = v_needles_complete[loc_mid_bound_needles]; // Find lower and upper duplicate location for this needle const r_ssize loc_lower_duplicate_o_needles = int_locate_lower_duplicate( val_needle, v_needles, v_o_needles, loc_lower_bound_o_needles, loc_mid_bound_o_needles ); const r_ssize loc_upper_duplicate_o_needles = int_locate_upper_duplicate( val_needle, v_needles, v_o_needles, loc_mid_bound_o_needles, loc_upper_bound_o_needles ); if (!needle_is_complete && (incomplete->action == VCTRS_INCOMPLETE_ACTION_value || incomplete->action == VCTRS_INCOMPLETE_ACTION_drop || incomplete->action == VCTRS_INCOMPLETE_ACTION_error)) { // Signal incomplete needle, don't recursive into further columns. // Early return at the end of this branch. for (r_ssize i = loc_lower_duplicate_o_needles; i <= loc_upper_duplicate_o_needles; ++i) { // Will always be the first and only time the output is touched for this // needle, so we can poke directly into it const int loc_needles = v_o_needles[i] - 1; r_dyn_int_poke(p_loc_first_match_o_haystack, loc_needles, SIGNAL_INCOMPLETE); } // Learned nothing about haystack, so just update lhs/rhs bounds for // `o_needles` as needed and continue on bool do_lhs = loc_lower_duplicate_o_needles > loc_lower_bound_o_needles; bool do_rhs = loc_upper_duplicate_o_needles < loc_upper_bound_o_needles; if (do_lhs) { const r_ssize lhs_loc_lower_bound_o_needles = loc_lower_bound_o_needles; const r_ssize lhs_loc_upper_bound_o_needles = loc_lower_duplicate_o_needles - 1; df_locate_matches_recurse( col, lhs_loc_lower_bound_o_needles, lhs_loc_upper_bound_o_needles, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } if (do_rhs) { const r_ssize rhs_loc_lower_bound_o_needles = loc_upper_duplicate_o_needles + 1; const r_ssize rhs_loc_upper_bound_o_needles = loc_upper_bound_o_needles; df_locate_matches_recurse( col, rhs_loc_lower_bound_o_needles, rhs_loc_upper_bound_o_needles, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } return; } const struct vctrs_match_bounds bounds = int_locate_match( val_needle, v_haystack, v_o_haystack, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack ); r_ssize loc_lower_match_o_haystack = bounds.lower; r_ssize loc_upper_match_o_haystack = bounds.upper; // Adjust bounds based on non-equi condition. // If needle is NA and we are doing an exact match, then we treat it like an // equi condition here. Otherwise if needle is NA, then we are careful to // never extend the bounds to capture values past it. const enum vctrs_ops bounds_op = (!needle_is_complete && incomplete->action == VCTRS_INCOMPLETE_ACTION_match) ? VCTRS_OPS_eq : op; switch (bounds_op) { case VCTRS_OPS_lt: { // Exclude found needle loc_lower_match_o_haystack = loc_upper_match_o_haystack + 1; if (needle_is_complete) { loc_upper_match_o_haystack = loc_upper_bound_o_haystack; } break; } case VCTRS_OPS_lte: { if (needle_is_complete) { loc_upper_match_o_haystack = loc_upper_bound_o_haystack; } break; } case VCTRS_OPS_gt: { // Exclude found needle loc_upper_match_o_haystack = loc_lower_match_o_haystack - 1; if (needle_is_complete) { loc_lower_match_o_haystack = loc_lower_bound_o_haystack; } break; } case VCTRS_OPS_gte: { if (needle_is_complete) { loc_lower_match_o_haystack = loc_lower_bound_o_haystack; } break; } case VCTRS_OPS_eq: { break; } } if (needle_is_complete && (op == VCTRS_OPS_gt || op == VCTRS_OPS_gte) && (loc_lower_match_o_haystack <= loc_upper_match_o_haystack)) { // In this specific case, a non-NA needle may match an NA in the haystack // after applying the non-equi adjustments above because NA values are // always ordered as the "smallest" values, and we set // `loc_lower_match_o_haystack` to be `loc_lower_bound_o_haystack`, which // may capture NAs at the lower bound. If there is an NA on the lower bound, // we avoid it by finding the last NA and then going 1 location beyond it. const r_ssize loc_lower_match_haystack = v_o_haystack[loc_lower_match_o_haystack] - 1; const bool lower_match_haystack_is_complete = v_haystack_complete[loc_lower_match_haystack]; if (!lower_match_haystack_is_complete) { // Find the last incomplete value loc_lower_match_o_haystack = int_locate_upper_incomplete( v_haystack_complete, v_o_haystack, loc_lower_match_o_haystack, loc_upper_match_o_haystack ); // Exclude it and all before it ++loc_lower_match_o_haystack; } } if (loc_lower_match_o_haystack <= loc_upper_match_o_haystack) { // Hit! switch (filter) { case VCTRS_FILTER_max: { if (!needle_is_complete || op == VCTRS_OPS_eq) { // Lower match value will already equal upper match value break; } // We want the max values of this group. That's the upper match of the // haystack and its corresponding lower duplicate. const int loc_lower_match_haystack = v_o_haystack[loc_lower_match_o_haystack] - 1; const int loc_upper_match_haystack = v_o_haystack[loc_upper_match_o_haystack] - 1; const int val_lower_match_haystack = v_haystack[loc_lower_match_haystack]; const int val_upper_match_haystack = v_haystack[loc_upper_match_haystack]; if (val_lower_match_haystack != val_upper_match_haystack) { loc_lower_match_o_haystack = int_locate_lower_duplicate( val_upper_match_haystack, v_haystack, v_o_haystack, loc_lower_match_o_haystack, loc_upper_match_o_haystack ); } break; } case VCTRS_FILTER_min: { if (!needle_is_complete || op == VCTRS_OPS_eq) { // Lower match value will already equal upper match value break; } // We want the min values of this group. That's the lower match of the // haystack and its corresponding upper duplicate. const int loc_lower_match_haystack = v_o_haystack[loc_lower_match_o_haystack] - 1; const int loc_upper_match_haystack = v_o_haystack[loc_upper_match_o_haystack] - 1; const int val_lower_match_haystack = v_haystack[loc_lower_match_haystack]; const int val_upper_match_haystack = v_haystack[loc_upper_match_haystack]; if (val_lower_match_haystack != val_upper_match_haystack) { loc_upper_match_o_haystack = int_locate_upper_duplicate( val_lower_match_haystack, v_haystack, v_o_haystack, loc_lower_match_o_haystack, loc_upper_match_o_haystack ); } break; } case VCTRS_FILTER_none: { break; } } if (col < n_col - 1) { // For this column, we've bounded the needles locations to the upper/lower // duplicates of the current needle, and the haystack locations to the // upper/lower matches of that needle. Now recurse into the next column // to further refine the boundaries. df_locate_matches_recurse( col + 1, loc_lower_duplicate_o_needles, loc_upper_duplicate_o_needles, loc_lower_match_o_haystack, loc_upper_match_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } else { // We just finished locating matches for the last column, // and we still have at least 1 hit, so record it for (r_ssize i = loc_lower_duplicate_o_needles; i <= loc_upper_duplicate_o_needles; ++i) { const int loc_needles = v_o_needles[i] - 1; const int loc_first_match_o_haystack = r_dyn_int_get(p_loc_first_match_o_haystack, loc_needles); const bool first_touch = loc_first_match_o_haystack == r_globals.na_int; switch (multiple) { case VCTRS_MULTIPLE_any: { if (first_touch) { // Arbitrarily record the lower match r_dyn_int_poke(p_loc_first_match_o_haystack, loc_needles, loc_lower_match_o_haystack); break; } if (any_filters) { const int loc_first_match_haystack = v_o_haystack[loc_first_match_o_haystack] - 1; const int loc_lower_match_haystack = v_o_haystack[loc_lower_match_o_haystack] - 1; const int cmp = p_matches_df_compare_na_equal( p_haystack, loc_lower_match_haystack, p_haystack, loc_first_match_haystack, v_filters ); // -1 = New haystack value "loses", nothing to update // 1 = New haystack value "wins", it becomes new match // 0 = Equal values, nothing to update if (cmp == 1) { r_dyn_int_poke(p_loc_first_match_o_haystack, loc_needles, loc_lower_match_o_haystack); } } break; } case VCTRS_MULTIPLE_all: case VCTRS_MULTIPLE_error: case VCTRS_MULTIPLE_warning: case VCTRS_MULTIPLE_first: case VCTRS_MULTIPLE_last: { const int size_match = loc_upper_match_o_haystack - loc_lower_match_o_haystack + 1; if (first_touch) { r_dyn_int_poke(p_loc_first_match_o_haystack, loc_needles, loc_lower_match_o_haystack); r_dyn_int_poke(p_size_match, loc_needles, size_match); if (any_filters) { v_loc_filter_match_o_haystack[loc_needles] = loc_lower_match_o_haystack; } break; } if (any_filters) { const int loc_filter_match_o_haystack = v_loc_filter_match_o_haystack[loc_needles]; const int loc_filter_match_haystack = v_o_haystack[loc_filter_match_o_haystack] - 1; const int loc_lower_match_haystack = v_o_haystack[loc_lower_match_o_haystack] - 1; const int cmp = p_matches_df_compare_na_equal( p_haystack, loc_lower_match_haystack, p_haystack, loc_filter_match_haystack, v_filters ); // -1 = New haystack value "loses", nothing to update // 1 = New haystack value "wins", it becomes new filter match // 0 = Equal values, fall through and append this set of matches // Note that in the 1 case, we have no way to invalidate the old // match at this point in time. Instead, we record all matches and // in `expand_compact_indices()` we skip the ones that aren't // equivalent to the filter match. if (cmp == -1) { break; } else if (cmp == 1) { v_loc_filter_match_o_haystack[loc_needles] = loc_lower_match_o_haystack; } } r_dyn_push_back(p_loc_first_match_o_haystack, &loc_lower_match_o_haystack); r_dyn_push_back(p_size_match, &size_match); r_dyn_push_back(p_loc_needles, &loc_needles); break; } } } } } else if (col < n_col - 1 && (incomplete->action == VCTRS_INCOMPLETE_ACTION_value || incomplete->action == VCTRS_INCOMPLETE_ACTION_drop || incomplete->action == VCTRS_INCOMPLETE_ACTION_error)) { // This branch occurs if there is no match in `haystack` for this needle, // but we aren't on the last column and we are tracking incomplete needles. // Before we move on from this needle, we check its future columns for // incomplete values. If the current `val_needles` was incomplete, it would // have already been caught above, so we only look at future columns. for (r_ssize i = loc_lower_duplicate_o_needles; i <= loc_upper_duplicate_o_needles; ++i) { const r_ssize loc_needles = v_o_needles[i] - 1; for (r_ssize j = col + 1; j < n_col; ++j) { const int* v_future_needles_complete = (const int*) p_needles_complete->v_col_ptr[j]; const bool future_needle_is_complete = v_future_needles_complete[loc_needles]; if (!future_needle_is_complete) { r_dyn_int_poke(p_loc_first_match_o_haystack, loc_needles, SIGNAL_INCOMPLETE); break; } } } } // At this point we have finished recording matches for the current needle in // this column, and we need to move on to other needles on the LHS and RHS // of the current needle (remember the current needle is the midpoint). For // the `==` op case we can also limit the haystack bounds we search in for // needles on the LHS/RHS, since those needles won't ever match the current // haystack values. bool do_lhs = false; bool do_rhs = false; // Default to current bounds r_ssize lhs_loc_lower_bound_o_needles = loc_lower_bound_o_needles; r_ssize lhs_loc_upper_bound_o_needles = loc_upper_bound_o_needles; r_ssize lhs_loc_lower_bound_o_haystack = loc_lower_bound_o_haystack; r_ssize lhs_loc_upper_bound_o_haystack = loc_upper_bound_o_haystack; r_ssize rhs_loc_lower_bound_o_needles = loc_lower_bound_o_needles; r_ssize rhs_loc_upper_bound_o_needles = loc_upper_bound_o_needles; r_ssize rhs_loc_lower_bound_o_haystack = loc_lower_bound_o_haystack; r_ssize rhs_loc_upper_bound_o_haystack = loc_upper_bound_o_haystack; switch (op) { case VCTRS_OPS_eq: { do_lhs = (loc_lower_duplicate_o_needles > loc_lower_bound_o_needles) && (loc_lower_match_o_haystack > loc_lower_bound_o_haystack); do_rhs = (loc_upper_duplicate_o_needles < loc_upper_bound_o_needles) && (loc_upper_match_o_haystack < loc_upper_bound_o_haystack); // Limit bounds of both needle and haystack using existing info if (do_lhs) { lhs_loc_upper_bound_o_needles = loc_lower_duplicate_o_needles - 1; lhs_loc_upper_bound_o_haystack = loc_lower_match_o_haystack - 1; } if (do_rhs) { rhs_loc_lower_bound_o_needles = loc_upper_duplicate_o_needles + 1; rhs_loc_lower_bound_o_haystack = loc_upper_match_o_haystack + 1; } break; } case VCTRS_OPS_lt: case VCTRS_OPS_lte: case VCTRS_OPS_gt: case VCTRS_OPS_gte: { // Can't update haystack here, as nesting containers make this impossible do_lhs = loc_lower_duplicate_o_needles > loc_lower_bound_o_needles; do_rhs = loc_upper_duplicate_o_needles < loc_upper_bound_o_needles; if (do_lhs) { lhs_loc_upper_bound_o_needles = loc_lower_duplicate_o_needles - 1; } if (do_rhs) { rhs_loc_lower_bound_o_needles = loc_upper_duplicate_o_needles + 1; } break; } } if (do_lhs) { df_locate_matches_recurse( col, lhs_loc_lower_bound_o_needles, lhs_loc_upper_bound_o_needles, lhs_loc_lower_bound_o_haystack, lhs_loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } if (do_rhs) { df_locate_matches_recurse( col, rhs_loc_lower_bound_o_needles, rhs_loc_upper_bound_o_needles, rhs_loc_lower_bound_o_haystack, rhs_loc_upper_bound_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); } } // ----------------------------------------------------------------------------- static void df_locate_matches_with_containers(int n_containers, const int* v_container_ids, r_ssize col, r_ssize loc_lower_bound_o_needles, r_ssize loc_upper_bound_o_needles, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack, const struct poly_df_data* p_needles, const struct poly_df_data* p_haystack, const struct poly_df_data* p_needles_complete, const struct poly_df_data* p_haystack_complete, const int* v_o_needles, const int* v_o_haystack, const struct vctrs_incomplete* incomplete, enum vctrs_multiple multiple, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, int* v_loc_filter_match_o_haystack) { const int* v_haystack = v_container_ids; for (int i = 0; i < n_containers; ++i) { const int val_needle = i; const struct vctrs_match_bounds bounds = int_locate_match( val_needle, v_haystack, v_o_haystack, loc_lower_bound_o_haystack, loc_upper_bound_o_haystack ); const r_ssize loc_lower_match_o_haystack = bounds.lower; const r_ssize loc_upper_match_o_haystack = bounds.upper; df_locate_matches_recurse( col, loc_lower_bound_o_needles, loc_upper_bound_o_needles, loc_lower_match_o_haystack, loc_upper_match_o_haystack, p_needles, p_haystack, p_needles_complete, p_haystack_complete, v_o_needles, v_o_haystack, incomplete, multiple, any_filters, v_filters, v_ops, p_loc_first_match_o_haystack, p_size_match, p_loc_needles, v_loc_filter_match_o_haystack ); // Advance lower bound for next container loc_lower_bound_o_haystack = loc_upper_match_o_haystack + 1; } } // ----------------------------------------------------------------------------- // In a sorted array, binary search between // [loc_lower_bound_o_haystack, loc_upper_bound_o_haystack] // to find the last incomplete value static inline r_ssize int_locate_upper_incomplete(const int* v_haystack_complete, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack) { while (loc_lower_bound_o_haystack <= loc_upper_bound_o_haystack) { const r_ssize loc_mid_bound_o_haystack = midpoint(loc_lower_bound_o_haystack, loc_upper_bound_o_haystack); const r_ssize loc_mid_bound_haystack = v_o_haystack[loc_mid_bound_o_haystack] - 1; const int haystack_is_complete = v_haystack_complete[loc_mid_bound_haystack]; if (haystack_is_complete) { loc_upper_bound_o_haystack = loc_mid_bound_o_haystack - 1; } else { loc_lower_bound_o_haystack = loc_mid_bound_o_haystack + 1; } } return loc_upper_bound_o_haystack; } // ----------------------------------------------------------------------------- // In a sorted array, binary search between // [loc_lower_bound_o_haystack, loc_upper_bound_o_haystack] // to find the first occurrence of `val_needle` static inline r_ssize int_locate_lower_duplicate(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack) { while (loc_lower_bound_o_haystack <= loc_upper_bound_o_haystack) { const r_ssize loc_mid_bound_o_haystack = midpoint(loc_lower_bound_o_haystack, loc_upper_bound_o_haystack); const r_ssize loc_mid_bound_haystack = v_o_haystack[loc_mid_bound_o_haystack] - 1; const int val_haystack = v_haystack[loc_mid_bound_haystack]; if (int_equal_na_equal(val_needle, val_haystack)) { loc_upper_bound_o_haystack = loc_mid_bound_o_haystack - 1; } else { loc_lower_bound_o_haystack = loc_mid_bound_o_haystack + 1; } } return loc_lower_bound_o_haystack; } // ----------------------------------------------------------------------------- // In a sorted array, binary search between // [loc_lower_bound_o_haystack, loc_upper_bound_o_haystack] // to find the last occurrence of `val_needle` static inline r_ssize int_locate_upper_duplicate(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack) { while (loc_lower_bound_o_haystack <= loc_upper_bound_o_haystack) { const r_ssize loc_mid_bound_o_haystack = midpoint(loc_lower_bound_o_haystack, loc_upper_bound_o_haystack); const r_ssize loc_mid_bound_haystack = v_o_haystack[loc_mid_bound_o_haystack] - 1; const int val_haystack = v_haystack[loc_mid_bound_haystack]; if (int_equal_na_equal(val_needle, val_haystack)) { loc_lower_bound_o_haystack = loc_mid_bound_o_haystack + 1; } else { loc_upper_bound_o_haystack = loc_mid_bound_o_haystack - 1; } } return loc_upper_bound_o_haystack; } // ----------------------------------------------------------------------------- // In a sorted array, binary search between // [loc_lower_bound_o_haystack, loc_upper_bound_o_haystack] // to find the first and last occurrence of `val_needle` static inline struct vctrs_match_bounds int_locate_match(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack) { while (loc_lower_bound_o_haystack <= loc_upper_bound_o_haystack) { const r_ssize loc_mid_bound_o_haystack = midpoint(loc_lower_bound_o_haystack, loc_upper_bound_o_haystack); const r_ssize loc_mid_bound_haystack = v_o_haystack[loc_mid_bound_o_haystack] - 1; const int val_haystack = v_haystack[loc_mid_bound_haystack]; const int cmp = int_compare_na_equal(val_needle, val_haystack); if (cmp == 1) { loc_lower_bound_o_haystack = loc_mid_bound_o_haystack + 1; } else if (cmp == -1) { loc_upper_bound_o_haystack = loc_mid_bound_o_haystack - 1; } else { // Hit! // Find lower and upper duplicate bounds for the haystack value loc_lower_bound_o_haystack = int_locate_lower_duplicate( val_haystack, v_haystack, v_o_haystack, loc_lower_bound_o_haystack, loc_mid_bound_o_haystack ); loc_upper_bound_o_haystack = int_locate_upper_duplicate( val_haystack, v_haystack, v_o_haystack, loc_mid_bound_o_haystack, loc_upper_bound_o_haystack ); break; } } return (struct vctrs_match_bounds) { loc_lower_bound_o_haystack, loc_upper_bound_o_haystack }; } // ----------------------------------------------------------------------------- static r_obj* df_joint_xtfrm_by_col(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, r_ssize n_cols, bool nan_distinct, r_obj* chr_proxy_collate) { r_obj* out = KEEP(r_alloc_list(2)); x = r_clone(x); r_list_poke(out, 0, x); y = r_clone(y); r_list_poke(out, 1, y); r_obj* const* v_x = r_list_cbegin(x); r_obj* const* v_y = r_list_cbegin(y); for (r_ssize col = 0; col < n_cols; ++col) { r_obj* x_col = v_x[col]; r_obj* y_col = v_y[col]; r_obj* xtfrms = vec_joint_xtfrm(x_col, y_col, x_size, y_size, nan_distinct, chr_proxy_collate); r_list_poke(x, col, r_list_get(xtfrms, 0)); r_list_poke(y, col, r_list_get(xtfrms, 1)); } FREE(1); return out; } // ----------------------------------------------------------------------------- static r_obj* df_detect_complete_by_col(r_obj* x, r_ssize x_size, r_ssize n_cols) { r_obj* out = KEEP(r_alloc_list(n_cols)); r_attrib_poke_names(out, r_names(x)); r_init_data_frame(out, x_size); r_obj* const* v_x = r_list_cbegin(x); for (r_ssize i = 0; i < n_cols; ++i) { r_obj* col = v_x[i]; // Use completeness to match `vec_rank()` and `vec_match()` r_obj* complete = vec_detect_complete(col); r_list_poke(out, i, complete); } FREE(1); return out; } // ----------------------------------------------------------------------------- static inline enum vctrs_ops parse_condition_one(const char* condition) { if (!strcmp(condition, "==")) { return VCTRS_OPS_eq; } if (!strcmp(condition, ">")) { return VCTRS_OPS_gt; } if (!strcmp(condition, ">=")) { return VCTRS_OPS_gte; } if (!strcmp(condition, "<")) { return VCTRS_OPS_lt; } if (!strcmp(condition, "<=")) { return VCTRS_OPS_lte; } r_abort("`condition` must only contain \"==\", \">\", \">=\", \"<\", or \"<=\"."); } static inline void parse_condition(r_obj* condition, r_ssize n_cols, enum vctrs_ops* v_ops) { if (r_typeof(condition) != R_TYPE_character) { r_abort("`condition` must be a character vector."); } r_obj* const* v_condition = r_chr_cbegin(condition); r_ssize size_condition = vec_size(condition); if (size_condition == 1) { const char* elt = r_str_c_string(v_condition[0]); enum vctrs_ops op = parse_condition_one(elt); for (r_ssize i = 0; i < n_cols; ++i) { v_ops[i] = op; } return; } if (size_condition == n_cols) { for (r_ssize i = 0; i < n_cols; ++i) { const char* elt = r_str_c_string(v_condition[i]); v_ops[i] = parse_condition_one(elt); } return; } r_abort( "`condition` must be length 1, or the same " "length as the number of columns of the input." ); } // ----------------------------------------------------------------------------- static inline struct vctrs_incomplete parse_incomplete(r_obj* incomplete, struct r_lazy call) { if (r_length(incomplete) != 1) { r_abort_lazy_call( call, "`incomplete` must be length 1, not length %i.", r_length(incomplete) ); } if (r_is_string(incomplete)) { const char* c_incomplete = r_chr_get_c_string(incomplete, 0); if (!strcmp(c_incomplete, "compare")) { return (struct vctrs_incomplete) { .action = VCTRS_INCOMPLETE_ACTION_compare, .value = -1 }; } if (!strcmp(c_incomplete, "match")) { return (struct vctrs_incomplete) { .action = VCTRS_INCOMPLETE_ACTION_match, .value = -1 }; } if (!strcmp(c_incomplete, "drop")) { return (struct vctrs_incomplete) { .action = VCTRS_INCOMPLETE_ACTION_drop, .value = -1 }; } if (!strcmp(c_incomplete, "error")) { return (struct vctrs_incomplete) { .action = VCTRS_INCOMPLETE_ACTION_error, .value = -1 }; } r_abort_lazy_call( call, "`incomplete` must be one of: \"compare\", \"match\", \"drop\", or \"error\"." ); } incomplete = vec_cast( incomplete, r_globals.empty_int, args_incomplete, vec_args.empty, call ); int c_incomplete = r_int_get(incomplete, 0); return (struct vctrs_incomplete) { .action = VCTRS_INCOMPLETE_ACTION_value, .value = c_incomplete }; } // ----------------------------------------------------------------------------- static inline enum vctrs_multiple parse_multiple(r_obj* multiple, struct r_lazy call) { if (!r_is_string(multiple)) { r_abort_lazy_call(call, "`multiple` must be a string."); } const char* c_multiple = r_chr_get_c_string(multiple, 0); if (!strcmp(c_multiple, "all")) return VCTRS_MULTIPLE_all; if (!strcmp(c_multiple, "any")) return VCTRS_MULTIPLE_any; if (!strcmp(c_multiple, "first")) return VCTRS_MULTIPLE_first; if (!strcmp(c_multiple, "last")) return VCTRS_MULTIPLE_last; // TODO: Remove deprecated support for `multiple = "error"/"warning"` if (!strcmp(c_multiple, "warning")) return VCTRS_MULTIPLE_warning; if (!strcmp(c_multiple, "error")) return VCTRS_MULTIPLE_error; r_abort_lazy_call( call, "`multiple` must be one of \"all\", \"any\", \"first\", or \"last\"." ); } // ----------------------------------------------------------------------------- static inline enum vctrs_relationship parse_relationship(r_obj* relationship, struct r_lazy call) { if (!r_is_string(relationship)) { r_abort_lazy_call(call, "`relationship` must be a string."); } const char* c_relationship = r_chr_get_c_string(relationship, 0); if (!strcmp(c_relationship, "none")) return VCTRS_RELATIONSHIP_none; if (!strcmp(c_relationship, "one-to-one")) return VCTRS_RELATIONSHIP_one_to_one; if (!strcmp(c_relationship, "one-to-many")) return VCTRS_RELATIONSHIP_one_to_many; if (!strcmp(c_relationship, "many-to-one")) return VCTRS_RELATIONSHIP_many_to_one; if (!strcmp(c_relationship, "many-to-many")) return VCTRS_RELATIONSHIP_many_to_many; if (!strcmp(c_relationship, "warn-many-to-many")) return VCTRS_RELATIONSHIP_warn_many_to_many; r_abort_lazy_call( call, "`relationship` must be one of \"none\", \"one-to-one\", \"one-to-many\", \"many-to-one\", \"many-to-many\", or \"warn-many-to-many\"." ); } // ----------------------------------------------------------------------------- static inline enum vctrs_filter parse_filter_one(const char* filter) { if (!strcmp(filter, "none")) return VCTRS_FILTER_none; if (!strcmp(filter, "min")) return VCTRS_FILTER_min; if (!strcmp(filter, "max")) return VCTRS_FILTER_max; r_abort("`filter` must be one of \"none\", \"min\", or \"max\"."); } static inline void parse_filter(r_obj* filter, r_ssize n_cols, enum vctrs_filter* v_filters) { if (r_typeof(filter) != R_TYPE_character) { r_abort("`filter` must be a character vector."); } r_obj* const* v_filter = r_chr_cbegin(filter); r_ssize size_filter = vec_size(filter); if (size_filter == 1) { const char* elt = r_str_c_string(v_filter[0]); enum vctrs_filter elt_filter = parse_filter_one(elt); for (r_ssize i = 0; i < n_cols; ++i) { v_filters[i] = elt_filter; } return; } if (size_filter == n_cols) { for (r_ssize i = 0; i < n_cols; ++i) { const char* elt = r_str_c_string(v_filter[i]); v_filters[i] = parse_filter_one(elt); } return; } r_abort( "`filter` must be length 1, or the same " "length as the number of columns of the input." ); } // ----------------------------------------------------------------------------- static inline struct vctrs_no_match parse_no_match(r_obj* no_match, struct r_lazy call) { if (r_length(no_match) != 1) { r_abort_lazy_call( call, "`no_match` must be length 1, not length %i.", r_length(no_match) ); } if (r_is_string(no_match)) { const char* c_no_match = r_chr_get_c_string(no_match, 0); if (!strcmp(c_no_match, "error")) { return (struct vctrs_no_match) { .action = VCTRS_NO_MATCH_ACTION_error, .value = -1 }; } if (!strcmp(c_no_match, "drop")) { return (struct vctrs_no_match) { .action = VCTRS_NO_MATCH_ACTION_drop, .value = -1 }; } r_abort_lazy_call( call, "`no_match` must be either \"drop\" or \"error\"." ); } no_match = vec_cast( no_match, r_globals.empty_int, args_no_match, vec_args.empty, call ); int c_no_match = r_int_get(no_match, 0); return (struct vctrs_no_match) { .action = VCTRS_NO_MATCH_ACTION_value, .value = c_no_match }; } // ----------------------------------------------------------------------------- static inline struct vctrs_remaining parse_remaining(r_obj* remaining, struct r_lazy call) { if (r_length(remaining) != 1) { r_abort_lazy_call( call, "`remaining` must be length 1, not length %i.", r_length(remaining) ); } if (r_is_string(remaining)) { const char* c_remaining = r_chr_get_c_string(remaining, 0); if (!strcmp(c_remaining, "error")) { return (struct vctrs_remaining) { .action = VCTRS_REMAINING_ACTION_error, .value = -1 }; } if (!strcmp(c_remaining, "drop")) { return (struct vctrs_remaining) { .action = VCTRS_REMAINING_ACTION_drop, .value = -1 }; } r_abort_lazy_call( call, "`remaining` must be either \"drop\" or \"error\"." ); } remaining = vec_cast( remaining, r_globals.empty_int, args_remaining, vec_args.empty, call ); int c_remaining = r_int_get(remaining, 0); return (struct vctrs_remaining) { .action = VCTRS_REMAINING_ACTION_value, .value = c_remaining }; } // ----------------------------------------------------------------------------- static inline r_obj* new_matches_result(r_obj* needles, r_obj* haystack) { r_obj* out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, needles); r_list_poke(out, 1, haystack); r_obj* names = r_alloc_character(2); r_attrib_poke_names(out, names); r_chr_poke(names, 0, strings_needles); r_chr_poke(names, 1, strings_haystack); r_init_data_frame(out, r_length(needles)); FREE(1); return out; } // ----------------------------------------------------------------------------- static r_obj* expand_compact_indices(const int* v_o_haystack, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, bool skip_size_match, bool skip_loc_needles, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, r_ssize size_needles, r_ssize size_haystack, bool any_non_equi, bool has_loc_filter_match_o_haystack, const enum vctrs_filter* v_filters, const int* v_loc_filter_match_o_haystack, const struct poly_df_data* p_haystack, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call) { int n_prot = 0; const r_ssize n_used = p_loc_first_match_o_haystack->count; const int* v_loc_first_match_o_haystack = (const int*) r_dyn_cbegin(p_loc_first_match_o_haystack); const int* v_size_match = skip_size_match ? NULL : (const int*) r_dyn_cbegin(p_size_match); const int* v_loc_needles = skip_loc_needles ? NULL : (const int*) r_dyn_cbegin(p_loc_needles); const bool one_match_per_needle = multiple == VCTRS_MULTIPLE_any || multiple == VCTRS_MULTIPLE_first || multiple == VCTRS_MULTIPLE_last; r_ssize size_out = 0; if (one_match_per_needle) { size_out = size_needles; } else { double dbl_size_out = 0; for (r_ssize i = 0; i < n_used; ++i) { // This could get extremely large with improperly specified non-equi joins. // May over-allocate in the case of `filters` with `multiple = "all"`, // or when `no_match = "drop"` or `incomplete = "drop"`. dbl_size_out += (double) v_size_match[i]; } if (dbl_size_out > R_LEN_T_MAX) { // TODO: Update this after a switch to long vector support stop_matches_overflow(dbl_size_out, error_call); } size_out = r_double_as_ssize(dbl_size_out); } r_keep_loc out_needles_pi; r_obj* out_needles = r_alloc_integer(size_out); KEEP_HERE(out_needles, &out_needles_pi); ++n_prot; r_keep_loc out_haystack_pi; r_obj* out_haystack = r_alloc_integer(size_out); KEEP_HERE(out_haystack, &out_haystack_pi); ++n_prot; int* v_out_needles = r_int_begin(out_needles); int* v_out_haystack = r_int_begin(out_haystack); const int* v_o_loc_needles = NULL; if (!skip_loc_needles) { // `loc_needles` is used to record the location of the needle that the // matches correspond to. The first `size_needles` elements will be in // sequential order, but locations after that correspond to the "extra" // matches gathered from different nesting containers. We need the order of // this `loc_needles` vector so we can process all the matches for needle 1, // then 2, then 3, etc, in that order, across all nesting containers. r_obj* loc_needles = KEEP_N(r_dyn_unwrap(p_loc_needles), &n_prot); r_obj* o_loc_needles = KEEP_N(vec_order(loc_needles, chrs_asc, chrs_smallest, true, r_null), &n_prot); v_o_loc_needles = r_int_cbegin(o_loc_needles); } bool any_multiple_needles = false; bool any_multiple_haystack = false; r_ssize loc_first_multiple_needles = -1; r_ssize loc_first_multiple_haystack = -1; // Check is always needed for `multiple = "all"`. // This also handles `relationship` options too, since if `multiple` is // `"any"`, `"first"`, or `"last"`, we can't invalidate a `relationship`. bool check_multiple_needles = multiple == VCTRS_MULTIPLE_all || // TODO: Remove deprecated support for `multiple = "error"/"warning"` multiple == VCTRS_MULTIPLE_error || multiple == VCTRS_MULTIPLE_warning; // Used to enforce `check_multiple_needles` r_ssize loc_needles_previous = r_globals.na_int; bool check_multiple_haystack = false; switch (relationship) { // Expecting `haystack` can match any number of `needles` case VCTRS_RELATIONSHIP_none: case VCTRS_RELATIONSHIP_many_to_one: case VCTRS_RELATIONSHIP_many_to_many: { check_multiple_haystack = false; break; } // Expecting `haystack` to match at most 1 `needles` case VCTRS_RELATIONSHIP_one_to_one: case VCTRS_RELATIONSHIP_one_to_many: { check_multiple_haystack = true; break; } // Only check for multiple matches in `haystack` if we are also checking // for them in `needles`. Otherwise we can't possibly have a many-to-many // issue so there is no need to check for one. case VCTRS_RELATIONSHIP_warn_many_to_many: { check_multiple_haystack = check_multiple_needles; break; } } const bool retain_remaining_haystack = remaining->action == VCTRS_REMAINING_ACTION_value || remaining->action == VCTRS_REMAINING_ACTION_error; bool track_matches_haystack = check_multiple_haystack || retain_remaining_haystack; bool* v_detect_matches_haystack = NULL; if (track_matches_haystack) { r_obj* detect_matches_haystack = KEEP_N(r_alloc_raw(size_haystack * sizeof(bool)), &n_prot); v_detect_matches_haystack = r_raw_begin(detect_matches_haystack); r_memset(v_detect_matches_haystack, 0, size_haystack * sizeof(bool)); } // For `multiple = "first" / "last"` r_ssize loc_haystack_overall = r_globals.na_int; r_ssize loc_out = 0; for (r_ssize i = 0; i < n_used; ++i) { const int loc = skip_loc_needles ? i : v_o_loc_needles[i] - 1; const int loc_first_match_o_haystack = v_loc_first_match_o_haystack[loc]; const int size_match = skip_size_match ? 1 : v_size_match[loc]; const int loc_needles = skip_loc_needles ? loc : v_loc_needles[loc]; if (loc_first_match_o_haystack == SIGNAL_INCOMPLETE) { if (size_match != 1) { r_stop_internal( "`size_match` should always be 1 in the case of incomplete values." ); } switch (incomplete->action) { case VCTRS_INCOMPLETE_ACTION_value: { v_out_needles[loc_out] = loc_needles + 1; v_out_haystack[loc_out] = incomplete->value; ++loc_out; continue; } case VCTRS_INCOMPLETE_ACTION_drop: { // Do not increment `loc_out`, do not store locations continue; } case VCTRS_INCOMPLETE_ACTION_error: { stop_matches_incomplete(loc_needles, needles_arg, error_call); } case VCTRS_INCOMPLETE_ACTION_compare: case VCTRS_INCOMPLETE_ACTION_match: { r_stop_internal( "Needles should never be marked as `SIGNAL_INCOMPLETE`", "when `incomplete = 'compare'` or `incomplete = 'match'`." ); } default: { r_stop_internal("Unknown `incomplete->action`."); } } } if (loc_first_match_o_haystack == SIGNAL_NO_MATCH) { if (size_match != 1) { r_stop_internal( "`size_match` should always be 1 in the case of no matches." ); } switch (no_match->action) { case VCTRS_NO_MATCH_ACTION_value: { v_out_needles[loc_out] = loc_needles + 1; v_out_haystack[loc_out] = no_match->value; ++loc_out; continue; } case VCTRS_NO_MATCH_ACTION_drop: { continue; } case VCTRS_NO_MATCH_ACTION_error: { stop_matches_nothing(loc_needles, needles_arg, haystack_arg, error_call); } default: { r_stop_internal("Unknown `no_match->action`."); } } } if (has_loc_filter_match_o_haystack) { // When recording matches, if we updated the filter match value for a // particular needle, then we weren't able to remove the old match from // `p_loc_first_match_o_haystack`. So we need to check that the current // match value in the haystack is the same as the recorded filter match // value for this needle. If it is the same, we continue, otherwise we // move on to the next value. const int loc_filter_match_o_haystack = v_loc_filter_match_o_haystack[loc_needles]; bool equal = false; if (loc_filter_match_o_haystack == loc_first_match_o_haystack) { equal = true; } else { const int loc_filter_match_haystack = v_o_haystack[loc_filter_match_o_haystack] - 1; const int loc_first_match_haystack = v_o_haystack[loc_first_match_o_haystack] - 1; equal = p_matches_df_equal_na_equal( p_haystack, loc_first_match_haystack, p_haystack, loc_filter_match_haystack, v_filters ); } if (!equal) { continue; } } if (check_multiple_needles) { if (size_match > 1) { // Easy, obvious, case. // This containment group had >1 matches for this `needle` so we // immediately handle multiple `needles` matches. any_multiple_needles = true; } else if (loc_needles == loc_needles_previous) { // We've recorded a match for this `needle` before. Remember that // `needles` are processed in increasing order across all containment // groups due to `v_o_loc_needles` so this simple tracking of the // previous `needle` works. any_multiple_needles = true; } else { // There was exactly 1 match for the `needle` in this containment group, // and we've never recorded a match for this `needle` before. // In that case we record that we've seen it for the next iteration. loc_needles_previous = loc_needles; } if (any_multiple_needles) { loc_first_multiple_needles = loc_needles; // TODO: Remove deprecated support for `multiple = "error"/"warning"` switch (multiple) { case VCTRS_MULTIPLE_all: break; case VCTRS_MULTIPLE_error: stop_matches_multiple( loc_first_multiple_needles, needles_arg, haystack_arg, error_call ); case VCTRS_MULTIPLE_warning: { warn_matches_multiple( loc_first_multiple_needles, needles_arg, haystack_arg, error_call ); break; } default: r_stop_internal("`check_multiple_needles` should have been false."); } switch (relationship) { case VCTRS_RELATIONSHIP_one_to_one: stop_matches_relationship_one_to_one( loc_first_multiple_needles, "needles", needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_many_to_one: stop_matches_relationship_many_to_one( loc_first_multiple_needles, needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_warn_many_to_many: { if (any_multiple_haystack) { warn_matches_relationship_many_to_many( loc_first_multiple_needles, loc_first_multiple_haystack, needles_arg, haystack_arg, error_call ); } break; } default: { switch (multiple) { case VCTRS_MULTIPLE_all: // We are tracking if there are multiple matches, but don't throw // any errors or warnings on them break; // TODO: Remove deprecated support for `multiple = "error"/"warning"` case VCTRS_MULTIPLE_error: r_stop_internal("`multiple = 'error'` should have thrown by now."); case VCTRS_MULTIPLE_warning: break; default: r_stop_internal("`check_multiple_needles` should have been false."); } } } // We know there are multiple and don't need to continue checking check_multiple_needles = false; } } int loc_o_haystack = loc_first_match_o_haystack; switch (multiple) { case VCTRS_MULTIPLE_first: case VCTRS_MULTIPLE_last: { if (skip_loc_needles) { // We use `v_loc_needles` unconditionally below because it should always // be available when finding the first/last match r_stop_internal( "`skip_loc_needles` should never be `true` with `multiple = 'first'/'last'`." ); } if (loc_haystack_overall == r_globals.na_int) { // Start of a new needle loc_haystack_overall = v_o_haystack[loc_o_haystack] - 1; } // Branching here seems to help a good bit when there are many matches if (multiple == VCTRS_MULTIPLE_first) { for (r_ssize j = 0; j < size_match; ++j) { const int loc_haystack = v_o_haystack[loc_o_haystack] - 1; if (loc_haystack_overall > loc_haystack) { loc_haystack_overall = loc_haystack; } ++loc_o_haystack; } } else if (multiple == VCTRS_MULTIPLE_last) { for (r_ssize j = 0; j < size_match; ++j) { const int loc_haystack = v_o_haystack[loc_o_haystack] - 1; if (loc_haystack_overall < loc_haystack) { loc_haystack_overall = loc_haystack; } ++loc_o_haystack; } } else { r_stop_internal( "`multiple` should only be 'first' or 'last' here." ); } const bool at_end_of_all_matches = (i == n_used - 1); // Check if we are at the end of the vector or if the next needle location // is different from this one, at which point we can record the match // corresponding to the first/last result bool at_end_of_needle_matches = true; if (!at_end_of_all_matches) { const int loc_next = v_o_loc_needles[i + 1] - 1; const int loc_needles_next = v_loc_needles[loc_next]; at_end_of_needle_matches = (loc_needles != loc_needles_next); } if (at_end_of_needle_matches) { v_out_needles[loc_out] = loc_needles + 1; v_out_haystack[loc_out] = loc_haystack_overall + 1; if (track_matches_haystack) { if (check_multiple_haystack) { // `true` if a match already existed any_multiple_haystack = v_detect_matches_haystack[loc_haystack_overall]; if (any_multiple_haystack) { loc_first_multiple_haystack = loc_haystack_overall; switch (relationship) { case VCTRS_RELATIONSHIP_one_to_one: stop_matches_relationship_one_to_one( loc_first_multiple_haystack, "haystack", needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_one_to_many: stop_matches_relationship_one_to_many( loc_first_multiple_haystack, needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_warn_many_to_many: r_stop_internal( "`relationship = 'warn-many-to-many'` with " "`multiple = 'first'/'last' should have resulted in " "`check_multiple_haystack = false`." ); default: r_stop_internal("`check_multiple_haystack` should have been false."); } } } // This haystack value was a match, so it isn't "remaining". v_detect_matches_haystack[loc_haystack_overall] = true; } ++loc_out; loc_haystack_overall = r_globals.na_int; } break; } case VCTRS_MULTIPLE_all: case VCTRS_MULTIPLE_error: case VCTRS_MULTIPLE_warning: case VCTRS_MULTIPLE_any: { for (r_ssize j = 0; j < size_match; ++j) { const int loc_haystack = v_o_haystack[loc_o_haystack] - 1; v_out_needles[loc_out] = loc_needles + 1; v_out_haystack[loc_out] = loc_haystack + 1; if (track_matches_haystack) { if (check_multiple_haystack) { // `true` if a match already existed any_multiple_haystack = v_detect_matches_haystack[loc_haystack]; if (any_multiple_haystack) { loc_first_multiple_haystack = loc_haystack; switch (relationship) { case VCTRS_RELATIONSHIP_one_to_one: stop_matches_relationship_one_to_one( loc_first_multiple_haystack, "haystack", needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_one_to_many: stop_matches_relationship_one_to_many( loc_first_multiple_haystack, needles_arg, haystack_arg, error_call ); case VCTRS_RELATIONSHIP_warn_many_to_many: { if (any_multiple_needles) { warn_matches_relationship_many_to_many( loc_first_multiple_needles, loc_first_multiple_haystack, needles_arg, haystack_arg, error_call ); } // We know there are multiple and don't need to continue checking check_multiple_haystack = false; // Only continue tracking if needed for `remaining` track_matches_haystack = retain_remaining_haystack; break; } default: r_stop_internal("`check_multiple_haystack` should have been false."); } } } // This haystack value was a match, so it isn't "remaining". v_detect_matches_haystack[loc_haystack] = true; } ++loc_out; ++loc_o_haystack; } break; } } } if (loc_out < size_out) { // Can happen with a `filter` and `multiple = "all"`, where it is possible // for potential matches coming from a different nesting container // to be skipped over in the `has_loc_filter_match_o_haystack` section. // Can also happen with `no_match = "drop"` or `incomplete = "drop"`. // This resize should be essentially free by setting truelength/growable. size_out = loc_out; out_needles = r_int_resize(out_needles, size_out); KEEP_AT(out_needles, out_needles_pi); v_out_needles = r_int_begin(out_needles); out_haystack = r_int_resize(out_haystack, size_out); KEEP_AT(out_haystack, out_haystack_pi); v_out_haystack = r_int_begin(out_haystack); } if (any_multiple_needles && any_non_equi) { // If we had multiple matches and we were doing a non-equi join, then // the needles column will be correct, but any group of multiple matches in // the haystack column will be ordered incorrectly within the needle group. // They will be ordered using the order of the original haystack values, // rather than by first appearance. Reordering the entire output data frame // orders them correctly, as within each needle group it will put the // haystack locations in ascending order (i.e. by first appearance). // This is expensive! `out` could have a huge number of matches. r_obj* both = KEEP(new_matches_result(out_needles, out_haystack)); r_obj* o_haystack_appearance = KEEP(vec_order(both, chrs_asc, chrs_smallest, true, r_null)); int* v_o_haystack_appearance = r_int_begin(o_haystack_appearance); // Avoid a second allocation by reusing the appearance order vector, // which has the same size and type as the output and we won't overwrite it r_obj* out_haystack_reordered = o_haystack_appearance; int* v_out_haystack_reordered = v_o_haystack_appearance; for (r_ssize i = 0; i < size_out; ++i) { v_out_haystack_reordered[i] = v_out_haystack[v_o_haystack_appearance[i] - 1]; } out_haystack = out_haystack_reordered; v_out_haystack = v_out_haystack_reordered; FREE(2); KEEP_AT(out_haystack, out_haystack_pi); } if (retain_remaining_haystack) { r_ssize n_remaining_haystack = 0; switch (remaining->action) { case VCTRS_REMAINING_ACTION_error: { for (r_ssize i = 0; i < size_haystack; ++i) { if (!v_detect_matches_haystack[i]) { stop_matches_remaining(i, needles_arg, haystack_arg, error_call); } } break; } case VCTRS_REMAINING_ACTION_value: { for (r_ssize i = 0; i < size_haystack; ++i) { n_remaining_haystack += !v_detect_matches_haystack[i]; } break; } case VCTRS_REMAINING_ACTION_drop: { r_stop_internal("`remaining` should never be 'drop' here."); } } if (n_remaining_haystack > 0) { // Resize to have enough room for "remaining" haystack values at the end r_ssize new_size_out = r_ssize_add(size_out, n_remaining_haystack); out_needles = r_int_resize(out_needles, new_size_out); KEEP_AT(out_needles, out_needles_pi); v_out_needles = r_int_begin(out_needles); out_haystack = r_int_resize(out_haystack, new_size_out); KEEP_AT(out_haystack, out_haystack_pi); v_out_haystack = r_int_begin(out_haystack); // Add in "remaining" values at the end of the output for (r_ssize i = size_out; i < new_size_out; ++i) { v_out_needles[i] = remaining->value; } r_ssize j = size_out; for (r_ssize i = 0; i < size_haystack; ++i) { if (!v_detect_matches_haystack[i]) { v_out_haystack[j] = i + 1; ++j; } } size_out = new_size_out; } } r_obj* out = new_matches_result(out_needles, out_haystack); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- // Registered for testing purposes // [[ register() ]] r_obj* ffi_compute_nesting_container_info(r_obj* haystack, r_obj* condition) { r_ssize n_cols = r_length(haystack); enum vctrs_ops* v_ops = (enum vctrs_ops*) R_alloc(n_cols, sizeof(enum vctrs_ops)); parse_condition(condition, n_cols, v_ops); const r_ssize size_haystack = vec_size(haystack); return compute_nesting_container_info(haystack, size_haystack, v_ops); } static r_obj* compute_nesting_container_info(r_obj* haystack, r_ssize size_haystack, const enum vctrs_ops* v_ops) { int n_prot = 0; const r_ssize n_cols = r_length(haystack); // Outputs: // - `haystack` order // - Container id vector // - Number of containers as a scalar // - Boolean for if there are any-non-equi conditions r_obj* out = KEEP_N(r_alloc_list(4), &n_prot); bool any_non_equi = false; int first_non_equi = 0; for (r_ssize i = 0; i < n_cols; ++i) { const enum vctrs_ops op = v_ops[i]; if (op != VCTRS_OPS_eq) { any_non_equi = true; first_non_equi = i; break; } } if (!any_non_equi) { // Container info isn't required for only `==` r_list_poke(out, 0, vec_order(haystack, chrs_asc, chrs_smallest, true, r_null)); r_list_poke(out, 1, r_globals.empty_int); r_list_poke(out, 2, r_int(1)); r_list_poke(out, 3, r_lgl(any_non_equi)); FREE(n_prot); return out; } r_obj* info = KEEP_N(vec_order_info( haystack, chrs_asc, chrs_smallest, true, r_null ), &n_prot); r_obj* o_haystack = r_list_get(info, 0); const int* v_o_haystack = r_int_cbegin(o_haystack); r_obj* group_sizes = r_list_get(info, 1); const int* v_group_sizes = r_int_cbegin(group_sizes); const r_ssize n_groups = r_length(group_sizes); // This is the haystack we compute container ids with. // This is initially the whole `haystack`, but will be adjusted to contain // fewer columns if there are `==` conditions before the first non-equi // condition. r_keep_loc haystack_container_pi; r_obj* haystack_container = haystack; KEEP_HERE(haystack_container, &haystack_container_pi); ++n_prot; // If there are `==` conditions before the first non-equi condition, // we separate those columns from the haystack and compute their group sizes, // which are used for computing the container ids. bool has_outer_group_sizes = false; const int* v_outer_group_sizes = NULL; if (first_non_equi != 0) { // We have equality comparisons before the first non-equi comparison. // In this case, we can skip nested containment ordering for the equality // comparisons before the first non-equi comparison if we pass on the // group sizes of the ordered equality columns as `v_outer_group_sizes`. r_obj* const* v_haystack = r_list_cbegin(haystack); r_obj* const* v_haystack_names = r_chr_cbegin(r_names(haystack)); // "Outer" data frame columns before the first non-equi condition r_obj* haystack_outer = KEEP_N(r_alloc_list(first_non_equi), &n_prot); r_obj* haystack_outer_names = r_alloc_character(first_non_equi); r_attrib_poke_names(haystack_outer, haystack_outer_names); r_init_data_frame(haystack_outer, size_haystack); for (r_ssize i = 0; i < first_non_equi; ++i) { r_list_poke(haystack_outer, i, v_haystack[i]); r_chr_poke(haystack_outer_names, i, v_haystack_names[i]); } // "Inner" data frame columns at and after the first non-equi condition r_obj* haystack_inner = KEEP_N(r_alloc_list(n_cols - first_non_equi), &n_prot); r_obj* haystack_inner_names = r_alloc_character(n_cols - first_non_equi); r_attrib_poke_names(haystack_inner, haystack_inner_names); r_init_data_frame(haystack_inner, size_haystack); for (r_ssize i = first_non_equi, j = 0; i < n_cols; ++i, ++j) { r_list_poke(haystack_inner, j, v_haystack[i]); r_chr_poke(haystack_inner_names, j, v_haystack_names[i]); } // Compute the order info of the outer columns, just to pluck off the // group sizes. These automatically create a set of groups that // "surround" the non-equi columns. r_obj* info = vec_order_info( haystack_outer, chrs_asc, chrs_smallest, true, r_null ); r_obj* outer_group_sizes = KEEP_N(r_list_get(info, 1), &n_prot); v_outer_group_sizes = r_int_cbegin(outer_group_sizes); has_outer_group_sizes = true; // Inner columns become the new container haystack haystack_container = haystack_inner; KEEP_AT(haystack_container, haystack_container_pi); } r_obj* container_ids_info = KEEP_N(compute_nesting_container_ids( haystack_container, v_o_haystack, v_group_sizes, v_outer_group_sizes, size_haystack, n_groups, has_outer_group_sizes ), &n_prot); const int n_containers = r_as_int(r_list_get(container_ids_info, 1)); if (n_containers == 1) { // If only a single container exists at this point, either there was // only 1 non-equi column which must already be in order, or we hit the // somewhat rare case of having a >1 col `haystack_container` data frame // that is already in nested containment order. In that case, original // haystack ordering is sufficient and we don't need the ids. r_list_poke(out, 0, o_haystack); r_list_poke(out, 1, r_globals.empty_int); r_list_poke(out, 2, r_int(1)); r_list_poke(out, 3, r_lgl(any_non_equi)); FREE(n_prot); return out; } // Otherwise, we need to recompute the haystack ordering accounting for // `container_ids`. One way to do this is to append `container_ids` to the // front of the `haystack` data frame and recompute the order, but since // we already have `o_haystack` and `group_sizes`, we can build a simpler // proxy for `haystack` that orders the exact same, but faster. So we end // up with a two column data frame of `container_ids` and `haystack_proxy` // to compute the new order for. r_obj* container_ids = r_list_get(container_ids_info, 0); r_obj* haystack_proxy = KEEP_N(r_alloc_integer(size_haystack), &n_prot); int* v_haystack_proxy = r_int_begin(haystack_proxy); r_ssize loc_o_haystack = 0; // Insert group number as the proxy value for (r_ssize i = 0; i < n_groups; ++i) { const r_ssize group_size = v_group_sizes[i]; for (r_ssize j = 0; j < group_size; ++j) { v_haystack_proxy[v_o_haystack[loc_o_haystack] - 1] = i; ++loc_o_haystack; } } r_obj* df = KEEP_N(r_alloc_list(2), &n_prot); r_list_poke(df, 0, container_ids); r_list_poke(df, 1, haystack_proxy); r_obj* df_names = r_alloc_character(2); r_attrib_poke_names(df, df_names); r_chr_poke(df_names, 0, r_str("container_ids")); r_chr_poke(df_names, 1, r_str("haystack_proxy")); r_init_data_frame(df, size_haystack); o_haystack = KEEP_N(vec_order( df, chrs_asc, chrs_smallest, true, r_null ), &n_prot); r_list_poke(out, 0, o_haystack); r_list_poke(out, 1, container_ids); r_list_poke(out, 2, r_int(n_containers)); r_list_poke(out, 3, r_lgl(any_non_equi)); FREE(n_prot); return out; } // ----------------------------------------------------------------------------- static r_obj* compute_nesting_container_ids(r_obj* x, const int* v_order, const int* v_group_sizes, const int* v_outer_group_sizes, r_ssize size, r_ssize n_groups, bool has_outer_group_sizes) { if (!is_data_frame(x)) { r_stop_internal("`x` must be a data frame."); } int n_prot = 0; const r_ssize n_cols = r_length(x); r_obj* out = KEEP_N(r_alloc_list(2), &n_prot); r_obj* container_ids = r_alloc_integer(size); r_list_poke(out, 0, container_ids); int* v_container_ids = r_int_begin(container_ids); r_obj* n_container_ids = r_alloc_integer(1); r_list_poke(out, 1, n_container_ids); int* p_n_container_ids = r_int_begin(n_container_ids); // Initialize ids to 0, which is always our first container id value. // This means we start with 1 container. r_memset(v_container_ids, 0, size * sizeof(int)); *p_n_container_ids = 1; if (size == 0) { // Algorithm requires at least 1 row FREE(n_prot); return out; } if (n_cols == 1) { // If there is only 1 column, `x` is in increasing order already when // ordered by `v_order`. // If `v_outer_group_sizes` were supplied, within each group `x` will // be in increasing order (since the single `x` column is the one that // broke any ties), and that is all that is required. FREE(n_prot); return out; } struct r_dyn_array* p_prev_rows = r_new_dyn_vector(R_TYPE_integer, 10000); KEEP_N(p_prev_rows->shelter, &n_prot); struct poly_vec* p_poly_x = new_poly_vec(x, VCTRS_TYPE_dataframe); KEEP_N(p_poly_x->shelter, &n_prot); const void* v_x = p_poly_x->p_vec; // Will be used if `has_outer_group_sizes` is `true` r_ssize loc_outer_group_sizes = 0; r_ssize loc_next_outer_group_start = 0; r_ssize loc_group_start = 0; for (r_ssize i = 0; i < n_groups; ++i) { if (has_outer_group_sizes && loc_next_outer_group_start == loc_group_start) { // Start of a new outer group. Clear all stored previous rows. p_prev_rows->count = 0; loc_next_outer_group_start += v_outer_group_sizes[loc_outer_group_sizes]; ++loc_outer_group_sizes; } const r_ssize group_size = v_group_sizes[i]; const int cur_row = v_order[loc_group_start] - 1; int container_id = 0; int n_container_ids_group = p_prev_rows->count; for (; container_id < n_container_ids_group; ++container_id) { const int prev_row = r_dyn_int_get(p_prev_rows, container_id); if (p_nesting_container_df_compare_fully_ge_na_equal(v_x, cur_row, v_x, prev_row)) { // Current row is fully greater than or equal to previous row. // Meaning it is not a new `container_id`, and it falls in the current container. break; } } if (container_id == n_container_ids_group) { // New `container_id` for this outer group, which we add to the end r_dyn_push_back(p_prev_rows, &cur_row); ++n_container_ids_group; if (n_container_ids_group > *p_n_container_ids) { // `p_prev_rows` is reset for each outer group, // so we have to keep a running overall count *p_n_container_ids = n_container_ids_group; } } else { // Update stored row location to the current row, // since the current row is greater than or equal to it r_dyn_int_poke(p_prev_rows, container_id, cur_row); } for (r_ssize j = 0; j < group_size; ++j) { v_container_ids[v_order[loc_group_start] - 1] = container_id; ++loc_group_start; } } FREE(n_prot); return out; } static inline bool p_nesting_container_df_compare_fully_ge_na_equal(const void* x, r_ssize i, const void* y, r_ssize j) { // Checks if EVERY column of `x` is `>=` `y`. // Assumes original input that `x` and `y` came from is ordered, and that // `x` comes after `y` in terms of row location in that original input. This // means that the first column of `x` is always `>=` the first column of `y`, // so we can ignore it in the comparison. // Iterates backwards to (ideally) maximize chance of hitting the fastest // varying column. // All columns are integer vectors (ranks). const struct poly_df_data* x_data = (const struct poly_df_data*) x; const struct poly_df_data* y_data = (const struct poly_df_data*) y; const r_ssize n_col = x_data->n_col; const void** v_x_col_ptr = x_data->v_col_ptr; const void** v_y_col_ptr = y_data->v_col_ptr; for (r_ssize col = n_col - 1; col > 0; --col) { if (p_int_compare_na_equal(v_x_col_ptr[col], i, v_y_col_ptr[col], j) < 0) { return false; } } return true; } // ----------------------------------------------------------------------------- static inline int p_matches_df_compare_na_equal(const void* x, r_ssize i, const void* y, r_ssize j, const enum vctrs_filter* v_filters) { // First broken tie wins. // All columns are integer vectors (approximate ranks). const struct poly_df_data* x_data = (const struct poly_df_data*) x; const struct poly_df_data* y_data = (const struct poly_df_data*) y; const r_ssize n_col = x_data->n_col; const void** v_x_col_ptr = x_data->v_col_ptr; const void** v_y_col_ptr = y_data->v_col_ptr; for (r_ssize col = 0; col < n_col; ++col) { const enum vctrs_filter filter = v_filters[col]; switch (filter) { case VCTRS_FILTER_none: { break; } case VCTRS_FILTER_max: { const int cmp = p_int_compare_na_equal(v_x_col_ptr[col], i, v_y_col_ptr[col], j); if (cmp != 0) { // Want max, new value is greater (1), signal replace (1) // Want max, new value is smaller (-1), signal keep (-1) return cmp; } break; } case VCTRS_FILTER_min: { const int cmp = p_int_compare_na_equal(v_x_col_ptr[col], i, v_y_col_ptr[col], j); if (cmp != 0) { // Want min, new value is smaller (-1), signal replace (1) // Want min, new value is larger (1), signal keep (-1) return -cmp; } break; } default: { r_stop_internal("Unknown `filter`."); } } } // All columns are equal, or no columns. No need to update anything. return 0; } static inline bool p_matches_df_equal_na_equal(const void* x, r_ssize i, const void* y, r_ssize j, const enum vctrs_filter* v_filters) { // All columns are integer vectors (approximate ranks). const struct poly_df_data* x_data = (const struct poly_df_data*) x; const struct poly_df_data* y_data = (const struct poly_df_data*) y; const r_ssize n_col = x_data->n_col; const void** v_x_col_ptr = x_data->v_col_ptr; const void** v_y_col_ptr = y_data->v_col_ptr; for (r_ssize col = 0; col < n_col; ++col) { const enum vctrs_filter filter = v_filters[col]; if (filter == VCTRS_FILTER_none) { continue; } if (!p_int_equal_na_equal(v_x_col_ptr[col], i, v_y_col_ptr[col], j)) { return false; } } // All columns are equal, or no columns. return true; } // ----------------------------------------------------------------------------- static inline r_ssize midpoint(r_ssize lhs, r_ssize rhs) { return lhs + (rhs - lhs) / 2; } // ----------------------------------------------------------------------------- static inline void stop_matches_overflow(double size, struct r_lazy call) { r_obj* syms[3] = { syms_size, syms_call, NULL }; r_obj* args[3] = { KEEP(r_dbl(size)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_overflow, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_overflow"); } static inline void stop_matches_nothing(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_nothing, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_nothing"); } static inline void stop_matches_remaining(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_remaining, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_remaining"); } static inline void stop_matches_incomplete(r_ssize i, struct vctrs_arg* needles_arg, struct r_lazy call) { r_obj* syms[4] = { syms_i, syms_needles_arg, syms_call, NULL }; r_obj* args[4] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_incomplete, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_incomplete"); } static inline void stop_matches_multiple(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_multiple, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_multiple"); } static inline void warn_matches_multiple(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_warn_matches_multiple, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); FREE(5); } static inline void stop_matches_relationship_one_to_one(r_ssize i, const char* which, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[6] = { syms_i, syms_which, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[6] = { KEEP(r_int((int)i + 1)), KEEP(r_chr(which)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_relationship_one_to_one, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_relationship_one_to_one"); } static inline void stop_matches_relationship_one_to_many(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_relationship_one_to_many, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_relationship_one_to_many"); } static inline void stop_matches_relationship_many_to_one(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[5] = { syms_i, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[5] = { KEEP(r_int((int)i + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_matches_relationship_many_to_one, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_matches_relationship_many_to_one"); } static inline void warn_matches_relationship_many_to_many(r_ssize i, r_ssize j, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { r_obj* syms[6] = { syms_i, syms_j, syms_needles_arg, syms_haystack_arg, syms_call, NULL }; r_obj* args[6] = { KEEP(r_int((int)i + 1)), KEEP(r_int((int)j + 1)), KEEP(vctrs_arg(needles_arg)), KEEP(vctrs_arg(haystack_arg)), KEEP(r_lazy_eval_protect(call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_warn_matches_relationship_many_to_many, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); FREE(6); } // ----------------------------------------------------------------------------- void vctrs_init_match(r_obj* ns) { args_incomplete_ = new_wrapper_arg(NULL, "incomplete"); args_no_match_ = new_wrapper_arg(NULL, "no_match"); args_remaining_ = new_wrapper_arg(NULL, "remaining"); } // ----------------------------------------------------------------------------- #undef SIGNAL_NO_MATCH #undef SIGNAL_INCOMPLETE vctrs/src/order-collate.c0000644000176200001440000000543514404336165015117 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "vctrs.h" // ----------------------------------------------------------------------------- static SEXP chr_apply(SEXP x, SEXP chr_proxy_collate); static SEXP df_apply(SEXP x, SEXP chr_proxy_collate); // [[ include("order-collate.h") ]] SEXP proxy_apply_chr_proxy_collate(SEXP proxy, SEXP chr_proxy_collate) { if (chr_proxy_collate == r_null) { return proxy; } chr_proxy_collate = PROTECT(r_as_function(chr_proxy_collate, "chr_proxy_collate")); SEXP out; switch (vec_proxy_typeof(proxy)) { case VCTRS_TYPE_character: out = chr_apply(proxy, chr_proxy_collate); break; case VCTRS_TYPE_dataframe: out = df_apply(proxy, chr_proxy_collate); break; default: out = proxy; } UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- static SEXP chr_apply(SEXP x, SEXP chr_proxy_collate) { // Don't use vctrs dispatch utils because we match argument positionally SEXP call = PROTECT(Rf_lang2(syms_chr_proxy_collate, syms_x)); SEXP mask = PROTECT(r_alloc_empty_environment(R_GlobalEnv)); Rf_defineVar(syms_chr_proxy_collate, chr_proxy_collate, mask); Rf_defineVar(syms_x, x, mask); SEXP out = PROTECT(Rf_eval(call, mask)); if (vec_typeof(out) != VCTRS_TYPE_character) { Rf_errorcall( R_NilValue, "`chr_proxy_collate` must return a character vector." ); } R_len_t x_size = vec_size(x); R_len_t out_size = vec_size(out); if (x_size != out_size) { Rf_errorcall( R_NilValue, "`chr_proxy_collate` must return a vector of the same length (%i, not %i).", x_size, out_size ); } UNPROTECT(3); return out; } // ----------------------------------------------------------------------------- static SEXP df_apply(SEXP x, SEXP chr_proxy_collate) { const r_ssize n_cols = r_length(x); const SEXP* v_x = VECTOR_PTR_RO(x); r_ssize i = 0; for (; i < n_cols; ++i) { SEXP col = v_x[i]; if (vec_proxy_typeof(col) == VCTRS_TYPE_character) { break; } } if (i == n_cols) { // No character columns return x; } SEXP out = PROTECT(r_clone_referenced(x)); for (; i < n_cols; ++i) { SEXP col = v_x[i]; if (vec_proxy_typeof(col) != VCTRS_TYPE_character) { continue; } col = chr_apply(col, chr_proxy_collate); SET_VECTOR_ELT(out, i, col); } UNPROTECT(1); return out; } vctrs/src/order-groups.c0000644000176200001440000001335715107402670015011 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "vctrs.h" // ----------------------------------------------------------------------------- // Pair with `PROTECT_GROUP_INFO()` in the caller struct group_info* new_group_info(void) { SEXP self = PROTECT(r_new_raw(sizeof(struct group_info))); struct group_info* p_group_info = (struct group_info*) RAW(self); p_group_info->self = self; p_group_info->data_size = 0; p_group_info->data = r_globals.empty_int; p_group_info->n_groups = 0; p_group_info->max_group_size = 0; UNPROTECT(1); return p_group_info; } // ----------------------------------------------------------------------------- struct group_infos* new_group_infos(struct group_info* p_group_info0, struct group_info* p_group_info1, r_ssize max_data_size, bool force_groups, bool ignore_groups) { SEXP self = PROTECT(r_new_raw(sizeof(struct group_infos))); struct group_infos* p_group_infos = (struct group_infos*) RAW(self); SEXP p_p_group_info_data = PROTECT(r_new_raw(2 * sizeof(struct group_info*))); struct group_info** p_p_group_info = (struct group_info**) RAW(p_p_group_info_data); p_p_group_info[0] = p_group_info0; p_p_group_info[1] = p_group_info1; p_group_infos->self = self; p_group_infos->p_p_group_info_data = p_p_group_info_data; p_group_infos->p_p_group_info = p_p_group_info; p_group_infos->max_data_size = max_data_size; p_group_infos->current = 0; p_group_infos->force_groups = force_groups; p_group_infos->ignore_groups = ignore_groups; UNPROTECT(2); return p_group_infos; } // ----------------------------------------------------------------------------- static void group_realloc(r_ssize size, struct group_info* p_group_info); static r_ssize groups_realloc_size(r_ssize data_size, r_ssize max_data_size); /* * Push a group size onto the current `group_info*` * - Reallocates as needed * - Updates number of groups / max group size as well * * Should only be called through `groups_size_maybe_push()` to ensure * that we only push groups if we are tracking them. */ void groups_size_push(r_ssize size, struct group_infos* p_group_infos) { if (size == 0) { Rf_errorcall(R_NilValue, "Internal error: Group `size` to push should never be zero."); } struct group_info* p_group_info = groups_current(p_group_infos); // Extend `data` as required - reprotects itself if (p_group_info->data_size == p_group_info->n_groups) { r_ssize new_data_size = groups_realloc_size( p_group_info->data_size, p_group_infos->max_data_size ); group_realloc(new_data_size, p_group_info); } // Push group size p_group_info->p_data[p_group_info->n_groups] = size; // Bump number of groups ++p_group_info->n_groups; // Update max group size if (p_group_info->max_group_size < size) { p_group_info->max_group_size = size; } } // ----------------------------------------------------------------------------- /* * Reallocate `data` to be as long as `size`. */ static void group_realloc(r_ssize size, struct group_info* p_group_info) { // Reallocate p_group_info->data = int_resize( p_group_info->data, p_group_info->data_size, size ); // Reprotect REPROTECT(p_group_info->data, p_group_info->data_pi); // Update pointer p_group_info->p_data = INTEGER(p_group_info->data); // Update size p_group_info->data_size = size; } // ----------------------------------------------------------------------------- static r_ssize groups_realloc_size(r_ssize data_size, r_ssize max_data_size) { uint64_t new_data_size; if (data_size == 0) { // First allocation new_data_size = GROUP_DATA_SIZE_DEFAULT; } else { // Avoid potential overflow when doubling size new_data_size = ((uint64_t) data_size) * 2; } // Clamp maximum allocation size to the size of the input if (new_data_size > max_data_size) { return max_data_size; } // Can now safely cast back to `r_ssize` return (r_ssize) new_data_size; } // ----------------------------------------------------------------------------- /* * `groups_swap()` is called after each data frame column is processed. * It handles switching the `current` group info that we are working on, * and ensures that the information that might have been there before has * been zeroed out. It also ensures that the new current group info has at * least as much space as the previous one, which is especially important for * the first column swap where the 2nd group info array starts as a size 0 * integer vector (because we don't know if it will get used or not). */ void groups_swap(struct group_infos* p_group_infos) { if (p_group_infos->ignore_groups) { return; } struct group_info* p_group_info_pre = groups_current(p_group_infos); // Make the swap p_group_infos->current = 1 - p_group_infos->current; struct group_info* p_group_info_post = groups_current(p_group_infos); // Clear the info from last time the swap was made p_group_info_post->max_group_size = 0; p_group_info_post->n_groups = 0; // Ensure the new group info is at least as big as the old group info if (p_group_info_post->data_size < p_group_info_pre->data_size) { r_ssize new_data_size = p_group_info_pre->data_size; group_realloc(new_data_size, p_group_info_post); } } vctrs/src/ptype2-dispatch.h0000644000176200001440000000231315120513137015370 0ustar liggesusers#ifndef VCTRS_PTYPE2_DISPATCH_H #define VCTRS_PTYPE2_DISPATCH_H #include "vctrs-core.h" #include "ptype2.h" r_obj* vec_ptype2_dispatch_native( r_obj* x, r_obj* y, enum vctrs_type x_type, enum vctrs_type y_type, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback, int* left ); r_obj* vec_ptype2_dispatch_s3( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ); r_obj* vec_invoke_coerce_method(r_obj* method_sym, r_obj* method, r_obj* x_sym, r_obj* x, r_obj* y_sym, r_obj* y, r_obj* x_arg_sym, r_obj* x_arg, r_obj* y_arg_sym, r_obj* y_arg, struct r_lazy call, enum s3_fallback s3_fallback); r_obj* vec_ptype2_default(r_obj* x, r_obj* y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call, enum s3_fallback s3_fallback); #endif vctrs/src/poly-op.h0000644000176200001440000000116115047425317013761 0ustar liggesusers#ifndef VCTRS_POLY_OP #define VCTRS_POLY_OP #include "vctrs-core.h" struct poly_vec { r_obj* shelter; r_obj* vec; const void* p_vec; enum vctrs_type type; }; struct poly_vec* new_poly_vec(r_obj* proxy, enum vctrs_type type); struct poly_df_data { enum vctrs_type* v_col_type; const void** v_col_ptr; r_ssize n_col; }; typedef int (poly_binary_int_fn)(const void* x, r_ssize i, const void* y, r_ssize j); poly_binary_int_fn* poly_p_compare_na_equal(enum vctrs_type type); typedef bool (poly_unary_bool_fn)(const void* x, r_ssize i); poly_unary_bool_fn* poly_p_is_missing(enum vctrs_type type); #endif vctrs/src/Makevars0000644000176200001440000000006614305640202013674 0ustar liggesusersPKG_CPPFLAGS = -I./rlang PKG_CFLAGS = $(C_VISIBILITY) vctrs/src/fill.c0000644000176200001440000001333514362266120013303 0ustar liggesusers#include "vctrs.h" #define INFINITE_FILL -1 static void parse_direction(SEXP x, bool* p_down, bool* p_leading); static int parse_max_fill(SEXP x); static SEXP vec_fill_missing(SEXP x, bool down, bool leading, int max_fill); // [[ register() ]] SEXP vctrs_fill_missing(SEXP x, SEXP direction, SEXP max_fill) { bool down; bool leading; parse_direction(direction, &down, &leading); int c_max_fill = parse_max_fill(max_fill); return vec_fill_missing(x, down, leading, c_max_fill); } static void vec_fill_missing_down(const int* p_na, r_ssize size, bool leading, int* p_loc); static void vec_fill_missing_down_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc); static void vec_fill_missing_up(const int* p_na, r_ssize size, bool leading, int* p_loc); static void vec_fill_missing_up_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc); static SEXP vec_fill_missing(SEXP x, bool down, bool leading, int max_fill) { r_ssize size = vec_size(x); SEXP na = PROTECT(vec_detect_missing(x)); const int* p_na = LOGICAL_RO(na); SEXP loc = PROTECT(r_new_integer(size)); int* p_loc = INTEGER(loc); const bool has_max_fill = max_fill != INFINITE_FILL; if (down) { if (has_max_fill) { vec_fill_missing_down_with_max_fill(p_na, size, leading, max_fill, p_loc); } else { vec_fill_missing_down(p_na, size, leading, p_loc); } } else { if (has_max_fill) { vec_fill_missing_up_with_max_fill(p_na, size, leading, max_fill, p_loc); } else { vec_fill_missing_up(p_na, size, leading, p_loc); } } SEXP out = vec_slice_unsafe(x, loc); UNPROTECT(2); return out; } static void vec_fill_missing_down(const int* p_na, r_ssize size, bool leading, int* p_loc) { r_ssize loc = 0; if (leading) { // Increment `loc` to the first non-missing value for (r_ssize i = 0; i < size; ++i) { if (!p_na[i]) { loc = i; break; } } // Back-fill with first non-missing value for (r_ssize i = loc - 1; i >= 0; --i) { p_loc[i] = loc + 1; } } for (r_ssize i = loc; i < size; ++i) { if (!p_na[i]) { loc = i; } p_loc[i] = loc + 1; } } static void vec_fill_missing_down_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc) { r_ssize loc = 0; if (leading) { // Increment `loc` to the first non-missing value for (r_ssize i = 0; i < size; ++i) { if (!p_na[i]) { loc = i; break; } } // Back-fill with first non-missing value with a max_fill r_ssize n_fill = 0; for (r_ssize i = loc - 1; i >= 0; --i) { if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } r_ssize n_fill = 0; for (r_ssize i = loc; i < size; ++i) { if (!p_na[i]) { loc = i; n_fill = 0; p_loc[i] = i + 1; continue; } if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } static void vec_fill_missing_up(const int* p_na, r_ssize size, bool leading, int* p_loc) { r_ssize loc = size - 1; if (leading) { // Decrement `loc` to the last non-missing value for (r_ssize i = size - 1; i >= 0; --i) { if (!p_na[i]) { loc = i; break; } } // Forward-fill with last non-missing value for (r_ssize i = loc + 1; i < size; ++i) { p_loc[i] = loc + 1; } } for (r_ssize i = loc; i >= 0; --i) { if (!p_na[i]) { loc = i; } p_loc[i] = loc + 1; } } static void vec_fill_missing_up_with_max_fill(const int* p_na, r_ssize size, bool leading, int max_fill, int* p_loc) { r_ssize loc = size - 1; if (leading) { // Decrement `loc` to the last non-missing value for (r_ssize i = size - 1; i >= 0; --i) { if (!p_na[i]) { loc = i; break; } } // Forward-fill with last non-missing value with a max_fill r_ssize n_fill = 0; for (r_ssize i = loc + 1; i < size; ++i) { if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } r_ssize n_fill = 0; for (r_ssize i = loc; i >= 0; --i) { if (!p_na[i]) { loc = i; n_fill = 0; p_loc[i] = i + 1; continue; } if (n_fill == max_fill) { p_loc[i] = i + 1; } else { p_loc[i] = loc + 1; ++n_fill; } } } // ----------------------------------------------------------------------------- static void stop_bad_direction(void); static void parse_direction(SEXP x, bool* p_down, bool* p_leading) { if (TYPEOF(x) != STRSXP || Rf_length(x) == 0) { stop_bad_direction(); } const char* str = CHAR(STRING_ELT(x, 0)); if (!strcmp(str, "down")) { *p_down = true; *p_leading = false; return; } if (!strcmp(str, "up")) { *p_down = false; *p_leading = false; return; } if (!strcmp(str, "downup")) { *p_down = true; *p_leading = true; return; } if (!strcmp(str, "updown")) { *p_down = false; *p_leading = true; return; } stop_bad_direction(); never_reached("parse_direction"); } static void stop_bad_direction(void) { r_abort("`direction` must be one of \"down\", \"up\", \"downup\", or \"updown\"."); } static int parse_max_fill(r_obj* x) { if (x == R_NilValue) { return INFINITE_FILL; } x = KEEP(vec_cast(x, r_globals.empty_int, vec_args.max_fill, vec_args.empty, r_lazy_null)); if (!r_is_positive_number(x)) { r_abort("`max_fill` must be `NULL` or a single positive integer."); } int out = r_int_get(x, 0); FREE(1); return out; } vctrs/src/callables.c0000644000176200001440000000260115120272011014256 0ustar liggesusers#include "vctrs.h" // ----------------------------------------------------------------------------- // Maturing bool maturing_obj_is_vector(SEXP x) { return obj_is_vector(x, VCTRS_ALLOW_NULL_no); } R_len_t maturing_short_vec_size(SEXP x) { return vec_size(x); } SEXP maturing_short_vec_recycle(SEXP x, R_len_t size) { return vec_recycle(x, size, vec_args.x, lazy_calls.vec_recycle); } // ----------------------------------------------------------------------------- // Defunct bool defunct_vec_is_vector(SEXP x) { Rf_errorcall(R_NilValue, "`vec_is_vector()` is defunct."); } // ----------------------------------------------------------------------------- // Experimental SEXP exp_vec_cast(SEXP x, SEXP to) { return vec_cast(x, to, vec_args.empty, vec_args.empty, r_lazy_null); } SEXP exp_vec_chop(SEXP x, SEXP indices) { return vec_chop_unsafe(x, indices, r_null); } SEXP exp_vec_slice_impl(SEXP x, SEXP subscript) { return vec_slice_unsafe(x, subscript); } SEXP exp_vec_names(SEXP x) { return vec_names(x); } SEXP exp_vec_set_names(SEXP x, SEXP names) { return vec_set_names(x, names, VCTRS_OWNERSHIP_foreign); } SEXP exp_short_compact_seq(R_len_t start, R_len_t size, bool increasing) { return compact_seq(start, size, increasing); } void exp_short_init_compact_seq(int* p, R_len_t start, R_len_t size, bool increasing) { init_compact_seq(p, start, size, increasing); } vctrs/src/cast-dispatch.c0000644000176200001440000000603315113325071015075 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "type-factor.h" #include "type-tibble.h" r_obj* vec_cast_dispatch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy) { r_obj* x = opts->x; r_obj* to = opts->to; struct vctrs_arg* x_arg = opts->p_x_arg; struct vctrs_arg* to_arg = opts->p_to_arg; int dir = 0; enum vctrs_type2_s3 type2_s3 = vec_typeof2_s3_impl(x, to, x_type, to_type, &dir); switch (type2_s3) { case VCTRS_TYPE2_S3_character_bare_factor: if (dir == 0) { return chr_as_factor(x, to, lossy, to_arg); } else { return fct_as_character(x, x_arg); } case VCTRS_TYPE2_S3_character_bare_ordered: if (dir == 0) { return chr_as_ordered(x, to, lossy, to_arg); } else { return ord_as_character(x, x_arg); } case VCTRS_TYPE2_S3_bare_factor_bare_factor: return fct_as_factor(x, to, lossy, x_arg, to_arg); case VCTRS_TYPE2_S3_bare_ordered_bare_ordered: return ord_as_ordered(opts); case VCTRS_TYPE2_S3_bare_date_bare_posixct: if (dir == 0) { return date_as_posixct(x, to); } else { return posixct_as_date(x, lossy); } case VCTRS_TYPE2_S3_bare_date_bare_posixlt: if (dir == 0) { return date_as_posixlt(x, to); } else { return posixlt_as_date(x, lossy); } case VCTRS_TYPE2_S3_bare_posixct_bare_posixlt: if (dir == 0) { return posixct_as_posixlt(x, to); } else { return posixlt_as_posixct(x, to); } case VCTRS_TYPE2_S3_bare_date_bare_date: return date_as_date(x); case VCTRS_TYPE2_S3_bare_posixct_bare_posixct: return posixct_as_posixct(x, to); case VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt: return posixlt_as_posixlt(x, to); case VCTRS_TYPE2_S3_dataframe_bare_tibble: if (dir == 0) { return tib_cast(opts); } else { return df_cast_opts(opts); } case VCTRS_TYPE2_S3_bare_tibble_bare_tibble: return tib_cast(opts); default: return r_null; } } // [[ register() ]] r_obj* ffi_cast_dispatch_native(r_obj* x, r_obj* to, r_obj* opts, r_obj* x_arg, r_obj* to_arg, r_obj* frame) { struct vctrs_arg c_x_arg = vec_as_arg(x_arg); struct vctrs_arg c_to_arg = vec_as_arg(to_arg); struct r_lazy call = { .x = syms_call, .env = frame }; struct cast_opts cast_opts = new_cast_opts( x, to, &c_x_arg, &c_to_arg, call, opts ); bool lossy = false; r_obj* out = vec_cast_dispatch_native(&cast_opts, vec_typeof(x), vec_typeof(to), &lossy); if (lossy || out == r_null) { return vec_cast_default(x, to, &c_x_arg, &c_to_arg, cast_opts.call, cast_opts.s3_fallback); } else { return out; } } vctrs/src/subscript-loc.h0000644000176200001440000000255215156001116015144 0ustar liggesusers#ifndef VCTRS_SUBSCRIPT_LOC_H #define VCTRS_SUBSCRIPT_LOC_H #include "vctrs-core.h" #include "subscript.h" enum subscript_missing { SUBSCRIPT_MISSING_PROPAGATE = 0, SUBSCRIPT_MISSING_REMOVE, SUBSCRIPT_MISSING_ERROR }; enum num_loc_negative { LOC_NEGATIVE_INVERT = 0, LOC_NEGATIVE_ERROR, LOC_NEGATIVE_IGNORE }; enum num_loc_oob { LOC_OOB_ERROR = 0, LOC_OOB_REMOVE, LOC_OOB_EXTEND }; enum num_loc_zero { LOC_ZERO_REMOVE = 0, LOC_ZERO_ERROR, LOC_ZERO_IGNORE }; struct location_opts { struct subscript_opts subscript_opts; enum num_loc_negative loc_negative; enum num_loc_oob loc_oob; enum num_loc_zero loc_zero; enum subscript_missing missing; }; static inline struct location_opts new_location_opts_assign(void) { return (struct location_opts) { .subscript_opts = new_subscript_opts_assign() }; } r_obj* vec_as_location(r_obj* i, r_ssize n, r_obj* names); r_obj* vec_as_location_ctxt(r_obj* subscript, r_ssize n, r_obj* names, struct vctrs_arg* arg, struct r_lazy call); r_obj* vec_as_location_opts(r_obj* subscript, r_ssize n, r_obj* names, const struct location_opts* location_opts); #endif vctrs/src/equal.c0000644000176200001440000005020015157322033013453 0ustar liggesusers#include "vctrs.h" #include "decl/equal-decl.h" // ----------------------------------------------------------------------------- // [[ register() ]] SEXP ffi_vec_equal( SEXP ffi_x, SEXP ffi_y, SEXP ffi_na_equal, SEXP ffi_ptype, SEXP ffi_frame ) { struct r_lazy error_call = { .x = ffi_frame, .env = r_null }; const bool na_equal = r_arg_as_bool(ffi_na_equal, "na_equal"); return vec_equal( ffi_x, ffi_y, na_equal, ffi_ptype, vec_args.x, vec_args.y, error_call ); } static inline SEXP lgl_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal); static inline SEXP int_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal); static inline SEXP dbl_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal); static inline SEXP cpl_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal); static inline SEXP chr_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal); static inline SEXP raw_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal); static inline SEXP list_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal); static inline SEXP df_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal); SEXP vec_equal( SEXP x, SEXP y, bool na_equal, SEXP ptype, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy error_call ) { SEXP args = PROTECT(r_alloc_list(2)); r_list_poke(args, 0, x); r_list_poke(args, 1, y); SEXP names = r_alloc_character(2); r_attrib_poke_names(args, names); SEXP x_name = PROTECT(vctrs_arg(p_x_arg)); SEXP y_name = PROTECT(vctrs_arg(p_y_arg)); r_chr_poke(names, 0, r_chr_get(x_name, 0)); r_chr_poke(names, 1, r_chr_get(y_name, 0)); args = PROTECT(vec_cast_common(args, ptype, vec_args.empty, error_call)); const R_len_t size = vec_size_common(args, -1, vec_args.empty, error_call); x = r_list_get(args, 0); y = r_list_get(args, 1); const bool x_recycles = vec_size(x) == 1; const bool y_recycles = vec_size(y) == 1; SEXP x_proxy = PROTECT(vec_proxy_equal(x)); SEXP y_proxy = PROTECT(vec_proxy_equal(y)); x_proxy = PROTECT(obj_encode_utf8(x_proxy)); y_proxy = PROTECT(obj_encode_utf8(y_proxy)); enum vctrs_type type = vec_proxy_typeof(x_proxy); SEXP out; switch (type) { case VCTRS_TYPE_logical: out = lgl_equal(x_proxy, y_proxy, size, x_recycles, y_recycles, na_equal); break; case VCTRS_TYPE_integer: out = int_equal(x_proxy, y_proxy, size, x_recycles, y_recycles, na_equal); break; case VCTRS_TYPE_double: out = dbl_equal(x_proxy, y_proxy, size, x_recycles, y_recycles, na_equal); break; case VCTRS_TYPE_complex: out = cpl_equal(x_proxy, y_proxy, size, x_recycles, y_recycles, na_equal); break; case VCTRS_TYPE_character: out = chr_equal(x_proxy, y_proxy, size, x_recycles, y_recycles, na_equal); break; case VCTRS_TYPE_raw: out = raw_equal(x_proxy, y_proxy, size, x_recycles, y_recycles, na_equal); break; case VCTRS_TYPE_list: out = list_equal(x_proxy, y_proxy, size, x_recycles, y_recycles, na_equal); break; case VCTRS_TYPE_dataframe: out = df_equal(x_proxy, y_proxy, size, x_recycles, y_recycles, na_equal); break; case VCTRS_TYPE_scalar: r_abort_lazy_call(error_call, "Can't compare scalars with `vec_equal()`."); default: stop_unimplemented_vctrs_type("vec_equal", type); } UNPROTECT(8); return out; } // ----------------------------------------------------------------------------- #define EQUAL_IMPL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL, EQUAL_NA_PROPAGATE, X_I, Y_I) \ SEXP out = PROTECT(r_new_logical(size)); \ int* p_out = LOGICAL(out); \ \ const CTYPE* p_x = CONST_DEREF(x); \ const CTYPE* p_y = CONST_DEREF(y); \ \ if (na_equal) { \ for (R_len_t i = 0; i < size; ++i) { \ p_out[i] = EQUAL_NA_EQUAL(p_x[X_I], p_y[Y_I]); \ } \ } else { \ for (R_len_t i = 0; i < size; ++i) { \ p_out[i] = EQUAL_NA_PROPAGATE(p_x[X_I], p_y[Y_I]); \ } \ } \ \ UNPROTECT(1); \ return out #define EQUAL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL, EQUAL_NA_PROPAGATE) \ if (x_recycles) { \ if (y_recycles) { \ EQUAL_IMPL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL, EQUAL_NA_PROPAGATE, 0, 0); \ } else { \ EQUAL_IMPL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL, EQUAL_NA_PROPAGATE, 0, i); \ } \ } else { \ if (y_recycles) { \ EQUAL_IMPL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL, EQUAL_NA_PROPAGATE, i, 0); \ } else { \ EQUAL_IMPL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL, EQUAL_NA_PROPAGATE, i, i); \ } \ } static inline SEXP lgl_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal) { EQUAL(int, LOGICAL_RO, lgl_equal_na_equal, lgl_equal_na_propagate); } static inline SEXP int_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal) { EQUAL(int, INTEGER_RO, int_equal_na_equal, int_equal_na_propagate); } static inline SEXP dbl_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal) { EQUAL(double, REAL_RO, dbl_equal_na_equal, dbl_equal_na_propagate); } static inline SEXP cpl_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal) { EQUAL(Rcomplex, COMPLEX_RO, cpl_equal_na_equal, cpl_equal_na_propagate); } static inline SEXP chr_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal) { EQUAL(SEXP, STRING_PTR_RO, chr_equal_na_equal, chr_equal_na_propagate); } static inline SEXP raw_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal) { EQUAL(Rbyte, RAW_RO, raw_equal_na_equal, raw_equal_na_propagate); } static inline SEXP list_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal) { EQUAL(SEXP, VECTOR_PTR_RO, list_equal_na_equal, list_equal_na_propagate); } #undef EQUAL #undef EQUAL_IMPL // ----------------------------------------------------------------------------- static void vec_equal_col_na_equal( SEXP x, SEXP y, bool x_recycles, bool y_recycles, int* p_out, struct df_short_circuit_info* p_info ); static void vec_equal_col_na_propagate( SEXP x, SEXP y, bool x_recycles, bool y_recycles, int* p_out, struct df_short_circuit_info* p_info ); static SEXP df_equal(SEXP x, SEXP y, R_len_t size, bool x_recycles, bool y_recycles, bool na_equal) { int nprot = 0; SEXP out = PROTECT_N(r_new_logical(size), &nprot); int* p_out = LOGICAL(out); // Initialize to "equality" value // and only change if we learn that it differs for (R_len_t i = 0; i < size; ++i) { p_out[i] = 1; } struct df_short_circuit_info info = new_df_short_circuit_info(size, false); struct df_short_circuit_info* p_info = &info; PROTECT_DF_SHORT_CIRCUIT_INFO(p_info, &nprot); R_len_t n_col = Rf_length(x); if (n_col != Rf_length(y)) { Rf_errorcall(R_NilValue, "`x` and `y` must have the same number of columns"); } void (*vec_equal_col)(SEXP, SEXP, bool, bool, int*, struct df_short_circuit_info*); if (na_equal) { vec_equal_col = vec_equal_col_na_equal; } else { vec_equal_col = vec_equal_col_na_propagate; } const SEXP* p_x = VECTOR_PTR_RO(x); const SEXP* p_y = VECTOR_PTR_RO(y); for (R_len_t i = 0; i < n_col; ++i) { vec_equal_col(p_x[i], p_y[i], x_recycles, y_recycles, p_out, p_info); if (p_info->remaining == 0) { break; } } UNPROTECT(nprot); return out; } // ----------------------------------------------------------------------------- #define EQUAL_COL_IMPL(CTYPE, CONST_DEREF, EQUAL, X_I, Y_I) do { \ const CTYPE* p_x = CONST_DEREF(x); \ const CTYPE* p_y = CONST_DEREF(y); \ \ for (R_len_t i = 0; i < p_info->size; ++i) { \ if (p_info->p_row_known[i]) { \ continue; \ } \ \ int eq = EQUAL(p_x[X_I], p_y[Y_I]); \ \ if (eq <= 0) { \ p_out[i] = eq; \ p_info->p_row_known[i] = true; \ --p_info->remaining; \ \ if (p_info->remaining == 0) { \ break; \ } \ } \ } \ } while (0) #define EQUAL_COL(CTYPE, CONST_DEREF, EQUAL) \ if (x_recycles) { \ if (y_recycles) { \ EQUAL_COL_IMPL(CTYPE, CONST_DEREF, EQUAL, 0, 0); \ } else { \ EQUAL_COL_IMPL(CTYPE, CONST_DEREF, EQUAL, 0, i); \ } \ } else { \ if (y_recycles) { \ EQUAL_COL_IMPL(CTYPE, CONST_DEREF, EQUAL, i, 0); \ } else { \ EQUAL_COL_IMPL(CTYPE, CONST_DEREF, EQUAL, i, i); \ } \ } static void vec_equal_col_na_equal( SEXP x, SEXP y, bool x_recycles, bool y_recycles, int* p_out, struct df_short_circuit_info* p_info ) { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: EQUAL_COL(int, LOGICAL_RO, lgl_equal_na_equal); break; case VCTRS_TYPE_integer: EQUAL_COL(int, INTEGER_RO, int_equal_na_equal); break; case VCTRS_TYPE_double: EQUAL_COL(double, REAL_RO, dbl_equal_na_equal); break; case VCTRS_TYPE_complex: EQUAL_COL(Rcomplex, COMPLEX_RO, cpl_equal_na_equal); break; case VCTRS_TYPE_character: EQUAL_COL(SEXP, STRING_PTR_RO, chr_equal_na_equal); break; case VCTRS_TYPE_raw: EQUAL_COL(Rbyte, RAW_RO, raw_equal_na_equal); break; case VCTRS_TYPE_list: EQUAL_COL(SEXP, VECTOR_PTR_RO, list_equal_na_equal); break; case VCTRS_TYPE_dataframe: r_stop_internal("Data frame columns should be flattened already."); case VCTRS_TYPE_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vec_equal()`."); default: stop_unimplemented_vctrs_type("vec_equal", vec_proxy_typeof(x)); } } static void vec_equal_col_na_propagate( SEXP x, SEXP y, bool x_recycles, bool y_recycles, int* p_out, struct df_short_circuit_info* p_info ) { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: EQUAL_COL(int, LOGICAL_RO, lgl_equal_na_propagate); break; case VCTRS_TYPE_integer: EQUAL_COL(int, INTEGER_RO, int_equal_na_propagate); break; case VCTRS_TYPE_double: EQUAL_COL(double, REAL_RO, dbl_equal_na_propagate); break; case VCTRS_TYPE_complex: EQUAL_COL(Rcomplex, COMPLEX_RO, cpl_equal_na_propagate); break; case VCTRS_TYPE_character: EQUAL_COL(SEXP, STRING_PTR_RO, chr_equal_na_propagate); break; case VCTRS_TYPE_raw: EQUAL_COL(Rbyte, RAW_RO, raw_equal_na_propagate); break; case VCTRS_TYPE_list: EQUAL_COL(SEXP, VECTOR_PTR_RO, list_equal_na_propagate); break; case VCTRS_TYPE_dataframe: r_stop_internal("Data frame columns should be flattened already."); case VCTRS_TYPE_scalar: Rf_errorcall(R_NilValue, "Can't compare scalars with `vec_equal()`."); default: stop_unimplemented_vctrs_type("vec_equal", vec_proxy_typeof(x)); } } #undef EQUAL_COL #undef EQUAL_COL_IMPL // ----------------------------------------------------------------------------- // [[ register() ]] r_obj* ffi_obj_equal(r_obj* x, r_obj* y) { return r_lgl(obj_equal(x, y)); } // [[ include("vctrs.h") ]] bool obj_equal(r_obj* x, r_obj* y) { x = KEEP(obj_encode_utf8(x)); y = KEEP(obj_encode_utf8(y)); const bool out = obj_equal_utf8(x, y); FREE(2); return out; } // Assumes `obj_encode_utf8()` has already been called // [[ include("vctrs.h") ]] bool obj_equal_utf8(r_obj* x, r_obj* y) { const enum r_type type = r_typeof(x); // Types must be the same if (type != r_typeof(y)) { return false; } // Every type has a chance for an "equal pointer" optimization if (x == y) { return true; } switch (type) { // Pure pointer comparison. If it didn't early exit in the pointer // comparison above, then they must not be equal. case R_TYPE_null: case R_TYPE_symbol: case R_TYPE_special: case R_TYPE_builtin: case R_TYPE_string: case R_TYPE_environment: case R_TYPE_pointer: return false; // Vectors case R_TYPE_logical: case R_TYPE_integer: case R_TYPE_double: case R_TYPE_character: case R_TYPE_raw: case R_TYPE_complex: case R_TYPE_list: return obj_vec_equal(x, y, type); // Expression vectors case R_TYPE_expression: return obj_expr_equal(x, y); // Node like case R_TYPE_dots: case R_TYPE_call: case R_TYPE_pairlist: case R_TYPE_bytecode: return obj_node_equal(x, y); // Functions case R_TYPE_closure: return obj_fn_equal(x, y); default: stop_unimplemented_type("obj_equal_utf8", type); } } // Missingness is never propagated through objects, // so `na_equal` is always `true` in these macros #define OBJ_VEC_EQUAL(CTYPE, CONST_DEREF, EQUAL_NA_EQUAL) \ do { \ CTYPE const* v_x = CONST_DEREF(x); \ CTYPE const* v_y = CONST_DEREF(y); \ \ for (r_ssize i = 0; i < n; ++i) { \ if (!EQUAL_NA_EQUAL(v_x[i], v_y[i])) { \ return false; \ } \ } \ return true; \ } \ while (0) static inline bool obj_vec_equal(r_obj* x, r_obj* y, enum r_type type) { const r_ssize n = r_length(x); // Length check if (n != r_length(y)) { return false; } // Attribute check if (!obj_attrib_equal(x, y)) { return false; } // Data check switch (type) { case R_TYPE_logical: OBJ_VEC_EQUAL(int, r_lgl_cbegin, lgl_equal_na_equal); case R_TYPE_integer: OBJ_VEC_EQUAL(int, r_int_cbegin, int_equal_na_equal); case R_TYPE_double: OBJ_VEC_EQUAL(double, r_dbl_cbegin, dbl_equal_na_equal); case R_TYPE_character: OBJ_VEC_EQUAL(r_obj*, r_chr_cbegin, chr_equal_na_equal); case R_TYPE_raw: OBJ_VEC_EQUAL(Rbyte, r_raw_cbegin, raw_equal_na_equal); case R_TYPE_complex: OBJ_VEC_EQUAL(r_complex, r_cpl_cbegin, cpl_equal_na_equal); case R_TYPE_list: OBJ_VEC_EQUAL(r_obj*, r_list_cbegin, list_equal_na_equal); default: r_stop_unreachable(); } } #undef OBJ_VEC_EQUAL // Same as implementation for lists where we check length and attributes as // well, but `VECTOR_PTR_RO()` doesn't support EXPRSXP, so we must use a // separate loop that uses `VECTOR_ELT()` instead. static inline bool obj_expr_equal(r_obj* x, r_obj* y) { const r_ssize n = r_length(x); // Length check if (n != r_length(y)) { return false; } // Attribute check if (!obj_attrib_equal(x, y)) { return false; } // Data check for (r_ssize i = 0; i < n; ++i) { r_obj* x_elt = r_list_get(x, i); r_obj* y_elt = r_list_get(y, i); if (!obj_equal_utf8(x_elt, y_elt)) { return false; } } return true; } static inline bool obj_node_equal(r_obj* x, r_obj* y) { // Attribute check if (!obj_attrib_equal(x, y)) { return false; } // Tag check if (!obj_equal_utf8(r_node_tag(x), r_node_tag(y))) { return false; } // Value check if (!obj_equal_utf8(r_node_car(x), r_node_car(y))) { return false; } // Check rest if (!obj_equal_utf8(r_node_cdr(x), r_node_cdr(y))) { return false; } return true; } static inline bool obj_fn_equal(r_obj* x, r_obj* y) { // Attribute check if (!obj_attrib_equal(x, y)) { return false; } // Function body check if (!obj_equal_utf8(r_fn_body(x), r_fn_body(y))) { return false; } // Function environment check if (!obj_equal_utf8(r_fn_env(x), r_fn_env(y))) { return false; } // Function formals check if (!obj_equal_utf8(r_fn_formals(x), r_fn_formals(y))) { return false; } return true; } struct attrib_equal_data { r_obj* y; r_ssize x_size; }; struct attrib_count_data { r_ssize y_size; }; // Compares attributes of `x` and `y` in an order-independent manner by looking // up each of `x`'s attributes in `y` by tag, then verifying `y` has no extra // attributes. // // Note that this is not very efficient, but we can't do much better if we want // order-independent comparisons since we no longer have direct access to the // pairlists via `ATTRIB()`. It only really affects equality comparisons with // lists that have elements with many attributes, which is fairly rare. static inline bool obj_attrib_equal(r_obj* x, r_obj* y) { const bool x_has_attrib = r_attrib_has_any(x); const bool y_has_attrib = r_attrib_has_any(y); if (!x_has_attrib && !y_has_attrib) { // Neither have attributes return true; } if (x_has_attrib != y_has_attrib) { // One or the other has attributes, but not both return false; } // Ok, now we know both have attributes, compare them struct attrib_equal_data equal_data = { .y = y, .x_size = 0 }; r_obj* result = r_attrib_map(x, obj_attrib_equal_cb, &equal_data); // We got the signal that an attribute was different if (result == r_null) { return false; } // All attributes in `x` equal attributes in `y`. // Lastly, ensure `y` doesn't have more attributes than `x`. struct attrib_count_data count_data = { .y_size = 0 }; r_attrib_map(y, obj_attrib_count_cb, &count_data); return equal_data.x_size == count_data.y_size; } static r_obj* obj_attrib_equal_cb(r_obj* tag, r_obj* value, void* data) { struct attrib_equal_data* p_data = (struct attrib_equal_data*) data; p_data->x_size++; r_obj* y_value = KEEP(r_attrib_get(p_data->y, tag)); // Return `r_null` when different to signal that we are done SEXP out = obj_equal_utf8(value, y_value) ? NULL : r_null; FREE(1); return out; } static r_obj* obj_attrib_count_cb(r_obj* _tag, r_obj* _value, void* data) { struct attrib_count_data* p_data = (struct attrib_count_data*) data; p_data->y_size++; // Continue return NULL; } vctrs/src/encoding.h0000644000176200001440000000404015156537555014160 0ustar liggesusers#ifndef VCTRS_ENCODING_H #define VCTRS_ENCODING_H #include "vctrs-core.h" SEXP obj_encode_utf8(SEXP x); // String encoding normalization // // In R 4.5.0 we got `Rf_charIsUTF8()`, but we cannot use it. // // It returns: // - `true` if `IS_ASCII()`, i.e. has `SET_ASCII()` bit (also always marked `CE_NATIVE`) // - `true` if `IS_UTF8()`, i.e. has `SET_UTF8()` bit (also always marked `CE_UTF8`) // - `true` if `CE_NATIVE` (we call this "unmarked") but `utf8locale = true` // // The 3rd condition is problematic for us. For CHARSXP hashing purposes, // the following are different CHARSXPs: // // - `°C` that is marked `CE_UTF8`, and has `SET_UTF8()` bit set // - `°C` that is marked `CE_NATIVE`, but `utf8locale = true` // // Meaning `vec_match("°C", "°C")` would return `NA` with these. // // The 2nd is possible to create with `iconv(mark = FALSE)`, i.e. // `iconv("\u00B0C", from = Encoding("\u00B0C"), to = "", mark = FALSE)` // // We need the 2nd to be reencoded and marked as `CE_UTF8`, but // `Rf_charIsUTF8()` can't help us with that because it returns `true`. // // Instead, we do a more granular check of: // - `true` if `Rf_charIsASCII()`, i.e. if `IS_ASCII()` // - `true` if `Rf_getCharCE() == CE_UTF8`, i.e. if `IS_UTF8()` since if a string // has `CE_UTF8` it also has the `SET_UTF8()` bit set // // This forces the `°C` marked as `CE_NATIVE` with `utf8locale = true` to still // be forced through `Rf_translateCharUTF8()` (which does nothing due to // `utf8locale = true`) and into `Rf_mkCharCE(, CE_UTF8)`, which marks it with // `CE_UTF8` so now we can `vec_match()` against it. static inline bool str_is_ascii_or_utf8(r_obj* x) { #if (R_VERSION >= R_Version(4, 5, 0)) return Rf_charIsASCII(x) || (Rf_getCharCE(x) == CE_UTF8) || (x == r_globals.na_str); #else const int mask_ascii = 8; const int mask_utf8 = 64; const int levels = LEVELS(x); return (levels & mask_ascii) || (levels & mask_utf8) || (x == r_globals.na_str); #endif } static inline r_obj* str_as_utf8(r_obj* x) { return Rf_mkCharCE(Rf_translateCharUTF8(x), CE_UTF8); } #endif vctrs/src/case-when.h0000644000176200001440000000114015072256373014233 0ustar liggesusers#ifndef VCTRS_CASE_WHEN_H #define VCTRS_CASE_WHEN_H #include "vctrs-core.h" #include "list-combine.h" r_obj* vec_case_when( r_obj* conditions, r_obj* values, r_obj* default_, enum list_combine_unmatched unmatched, r_obj* ptype, r_ssize size, struct vctrs_arg* p_conditions_arg, struct vctrs_arg* p_values_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call ); r_obj* vec_replace_when( r_obj* x, r_obj* conditions, r_obj* values, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_conditions_arg, struct vctrs_arg* p_values_arg, struct r_lazy error_call ); #endif vctrs/src/unspecified.h0000644000176200001440000000065615120272011014647 0ustar liggesusers#ifndef VCTRS_UNSPECIFIED_H #define VCTRS_UNSPECIFIED_H #include "vctrs-core.h" SEXP vec_unspecified(R_len_t n); bool vec_is_unspecified(SEXP x); enum ptype_finalise { PTYPE_FINALISE_false, PTYPE_FINALISE_true }; #define PTYPE_FINALISE_DEFAULT PTYPE_FINALISE_true static inline bool should_finalise(enum ptype_finalise finalise) { return finalise == PTYPE_FINALISE_true; } r_obj* vec_ptype_finalise(r_obj* x); #endif vctrs/src/unspecified.c0000644000176200001440000000726415132161317014655 0ustar liggesusers#include "unspecified.h" #include "vctrs.h" #include "decl/unspecified-decl.h" // Initialised at load time static SEXP vctrs_unspecified_class = NULL; SEXP vctrs_shared_empty_uns = NULL; static r_obj* syms_vec_ptype_finalise_dispatch = NULL; static r_obj* fns_vec_ptype_finalise_dispatch = NULL; // [[ include("vctrs.h") ]] SEXP vec_unspecified(R_len_t n) { SEXP out = PROTECT(Rf_allocVector(LGLSXP, n)); r_lgl_fill(out, NA_LOGICAL, n); r_attrib_poke_class(out, vctrs_unspecified_class); UNPROTECT(1); return out; } // [[ register ]] SEXP vctrs_unspecified(SEXP n) { if (Rf_length(n) != 1) { Rf_errorcall(R_NilValue, "`n` must be a single number"); } if (TYPEOF(n) != INTSXP) { n = vec_cast(n, r_globals.empty_int, vec_args.empty, vec_args.empty, r_lazy_null); } int len = INTEGER(n)[0]; return vec_unspecified(len); } // [[ include("vctrs.h") ]] bool vec_is_unspecified(SEXP x) { if (r_typeof(x) != R_TYPE_logical) { // Both `` and `` must be logical return false; } if (has_dim(x)) { // Disallow arrays from being unspecified return false; } // If it is classed, that class must be ``, otherwise check // for all `NA`s. Notably, core attributes like `names` and extraneous // attributes are allowed on `x`, and it is still considered unspecified. if (r_is_object(x)) { return r_inherits(x, "vctrs_unspecified"); } else { return lgl_is_unspecified(x); } } static inline bool lgl_is_unspecified(SEXP x) { const r_ssize size = r_length(x); if (size == 0) { // We declare `logical()` to be , not return false; } const int* p_x = r_lgl_cbegin(x); for (r_ssize i = 0; i < size; ++i) { if (p_x[i] != r_globals.na_lgl) { return false; } } return true; } // [[ register ]] SEXP vctrs_is_unspecified(SEXP x) { return Rf_ScalarLogical(vec_is_unspecified(x)); } // [[ register() ]] r_obj* vec_ptype_finalise(r_obj* x) { if (x == r_null) { return x; } struct r_lazy call = lazy_calls.vec_ptype_finalise; if (!r_is_object(x)) { obj_check_vector(x, VCTRS_ALLOW_NULL_no, vec_args.x, call); return x; } if (vec_is_unspecified(x)) { return vec_ptype_finalise_unspecified(x); } obj_check_vector(x, VCTRS_ALLOW_NULL_no, vec_args.x, call); switch (class_type(x)) { case VCTRS_CLASS_bare_tibble: case VCTRS_CLASS_bare_data_frame: return bare_df_map(x, &vec_ptype_finalise); case VCTRS_CLASS_data_frame: return df_map(x, &vec_ptype_finalise); case VCTRS_CLASS_none: r_stop_internal("Non-S3 classes should have returned by now."); default: return vec_ptype_finalise_dispatch(x); } } static r_obj* vec_ptype_finalise_unspecified(r_obj* x) { r_ssize size = r_length(x); if (size == 0) { return r_globals.empty_lgl; } r_obj* out = KEEP(r_alloc_logical(size)); r_lgl_fill(out, r_globals.na_lgl, size); FREE(1); return out; } static r_obj* vec_ptype_finalise_dispatch(r_obj* x) { return vctrs_dispatch1( syms_vec_ptype_finalise_dispatch, fns_vec_ptype_finalise_dispatch, syms_x, x ); } void vctrs_init_unspecified(SEXP ns) { { vctrs_unspecified_class = Rf_allocVector(STRSXP, 1); R_PreserveObject(vctrs_unspecified_class); SET_STRING_ELT(vctrs_unspecified_class, 0, Rf_mkChar("vctrs_unspecified")); } vctrs_shared_empty_uns = vec_unspecified(0); R_PreserveObject(vctrs_shared_empty_uns); MARK_NOT_MUTABLE(vctrs_shared_empty_uns); syms_vec_ptype_finalise_dispatch = r_sym("vec_ptype_finalise_dispatch"); fns_vec_ptype_finalise_dispatch = r_eval(syms_vec_ptype_finalise_dispatch, ns); } vctrs/src/encoding.c0000644000176200001440000001043215156537555014155 0ustar liggesusers#include "vctrs.h" #include "decl/encoding-decl.h" /* * Recursively encode character vectors as UTF-8 * * A CHARSXP is left untouched if: * - It is the NA_STRING * - It is ASCII, which means the encoding will be "unknown", but is valid UTF-8 * - It is marked as UTF-8 * * Attributes are re-encoded as well. * * ASCII strings will never get marked with an encoding when they go * through `Rf_mkCharLenCE()`, but they will get marked as ASCII. Since * UTF-8 is fully compatible with ASCII, they are treated like UTF-8. * * This converts vectors that are completely marked as Latin-1 to UTF-8, rather * than leaving them as Latin-1. This ensures that two vectors can be compared * consistently if they have both been re-encoded. * * Bytes-encoded vectors are not supported, as they cannot be * converted to UTF-8 by `Rf_translateCharUTF8()`. * * This function never modifies `x` in place. */ r_obj* obj_encode_utf8(r_obj* x) { r_obj* out; switch (r_typeof(x)) { case R_TYPE_character: out = chr_encode_utf8(x); break; case R_TYPE_list: out = list_encode_utf8(x); break; default: out = x; break; } if (r_attrib_has_any(out)) { // Only `KEEP()` if there are attributes KEEP(out); // Pass down ownership to avoid a reclone if attributes change bool owned = x != out; out = obj_attrib_encode_utf8(out, owned); FREE(1); } return out; } // ----------------------------------------------------------------------------- static r_obj* chr_encode_utf8(r_obj* x) { r_ssize size = r_length(x); r_ssize start = chr_find_encoding_start(x, size); if (size == start) { return x; } x = KEEP(r_clone(x)); r_obj* const* p_x = r_chr_cbegin(x); const void* vmax = vmaxget(); for (r_ssize i = start; i < size; ++i) { r_obj* const elt = p_x[i]; if (!str_is_ascii_or_utf8(elt)) { r_chr_poke(x, i, str_as_utf8(elt)); } } vmaxset(vmax); FREE(1); return x; } static inline r_ssize chr_find_encoding_start(r_obj* x, r_ssize size) { r_obj* const* p_x = r_chr_cbegin(x); for (r_ssize i = 0; i < size; ++i) { r_obj* const elt = p_x[i]; if (!str_is_ascii_or_utf8(elt)) { return i; } } return size; } // ----------------------------------------------------------------------------- static r_obj* list_encode_utf8(r_obj* x) { bool owned = false; r_keep_loc pi; KEEP_HERE(x, &pi); r_ssize size = r_length(x); r_obj* const* p_x = r_list_cbegin(x); for (r_ssize i = 0; i < size; ++i) { r_obj* const elt_old = p_x[i]; r_obj* const elt_new = obj_encode_utf8(elt_old); if (elt_old == elt_new) { continue; } KEEP(elt_new); if (!owned) { x = r_clone(x); KEEP_AT(x, pi); p_x = r_list_cbegin(x); owned = true; } r_list_poke(x, i, elt_new); FREE(1); } FREE(1); return x; } // ----------------------------------------------------------------------------- struct cb_data { r_obj** p_out; r_keep_loc shelter; bool* p_owned; }; static r_obj* obj_attrib_encode_utf8_cb(r_obj* tag, r_obj* old, void* data) { struct cb_data* p_data = (struct cb_data*) data; r_obj* new = obj_encode_utf8(old); if (old == new) { return NULL; } KEEP(new); if (!(*p_data->p_owned)) { // Shallow clones `out` and its attributes *p_data->p_out = r_clone(*p_data->p_out); KEEP_AT(*p_data->p_out, p_data->shelter); *p_data->p_owned = true; } r_attrib_poke(*p_data->p_out, tag, new); FREE(1); return NULL; } static r_obj* obj_attrib_encode_utf8(r_obj* x, bool owned) { // `out` pointer may be updated in place by callback r_obj* out = x; r_keep_loc shelter; KEEP_HERE(out, &shelter); struct cb_data data = { .p_out = &out, .shelter = shelter, .p_owned = &owned }; r_attrib_map(x, obj_attrib_encode_utf8_cb, &data); FREE(1); return out; } // ----------------------------------------------------------------------------- // Testing SEXP ffi_obj_encode_utf8(SEXP x) { return obj_encode_utf8(x); } // Testing r_obj* ffi_chr_is_ascii_or_utf8(r_obj* x) { const r_ssize size = r_length(x); r_obj* const* v_x = r_chr_cbegin(x); r_obj* out = KEEP(r_alloc_logical(size)); int* v_out = r_lgl_begin(out); for (r_ssize i = 0; i < size; ++i) { v_out[i] = str_is_ascii_or_utf8(v_x[i]); } FREE(1); return out; } vctrs/src/utils-dispatch.c0000644000176200001440000000520015113325071015276 0ustar liggesusers#include "vctrs.h" #include "decl/utils-dispatch-decl.h" // [[ register() ]] r_obj* ffi_class_type(r_obj* x) { return r_chr(class_type_as_str(class_type(x))); } enum vctrs_class_type class_type(r_obj* x) { if (!r_is_object(x)) { return VCTRS_CLASS_none; } r_obj* cls = KEEP(r_class(x)); // Avoid corrupt objects where `x` is an object, but the class is NULL if (cls == r_null) { FREE(1); return VCTRS_CLASS_none; } enum vctrs_class_type type = class_type_impl(cls); FREE(1); return type; } static enum vctrs_class_type class_type_impl(r_obj* cls) { int n = r_length(cls); r_obj* const* p = r_chr_cbegin(cls); // First check for bare types for which we know how many strings are // the classes composed of switch (n) { case 1: { r_obj* p0 = p[0]; if (p0 == strings_data_frame) { return VCTRS_CLASS_bare_data_frame; } else if (p0 == strings_factor) { return VCTRS_CLASS_bare_factor; } else if (p0 == strings_date) { return VCTRS_CLASS_bare_date; } else if (p0 == strings.AsIs) { return VCTRS_CLASS_bare_asis; } break; } case 2: { r_obj* p0 = p[0]; r_obj* p1 = p[1]; if (p0 == strings_ordered && p1 == strings_factor) { return VCTRS_CLASS_bare_ordered; } if (p1 == strings_posixt) { if (p0 == strings_posixct) { return VCTRS_CLASS_bare_posixct; } else if (p0 == strings_posixlt) { return VCTRS_CLASS_bare_posixlt; } } break; } case 3: { if (p[0] == strings_tbl_df && p[1] == strings_tbl && p[2] == strings_data_frame) { return VCTRS_CLASS_bare_tibble; } break; }} // Now check for inherited classes p = p + n - 1; r_obj* last = *p; if (last == strings_data_frame) { return VCTRS_CLASS_data_frame; } else if (last == strings_list) { return VCTRS_CLASS_list; } return VCTRS_CLASS_unknown; } static const char* class_type_as_str(enum vctrs_class_type type) { switch (type) { case VCTRS_CLASS_list: return "list"; case VCTRS_CLASS_data_frame: return "data_frame"; case VCTRS_CLASS_bare_asis: return "bare_asis"; case VCTRS_CLASS_bare_data_frame: return "bare_data_frame"; case VCTRS_CLASS_bare_tibble: return "bare_tibble"; case VCTRS_CLASS_bare_factor: return "bare_factor"; case VCTRS_CLASS_bare_ordered: return "bare_ordered"; case VCTRS_CLASS_bare_date: return "bare_date"; case VCTRS_CLASS_bare_posixct: return "bare_posixct"; case VCTRS_CLASS_bare_posixlt: return "bare_posixlt"; case VCTRS_CLASS_unknown: return "unknown"; case VCTRS_CLASS_none: return "none"; } never_reached("class_type_as_str"); } vctrs/src/subscript-loc.c0000644000176200001440000005607514315060310015145 0ustar liggesusers#include "vctrs.h" #include "decl/subscript-loc-decl.h" r_obj* vec_as_location(r_obj* subscript, r_ssize n, r_obj* names) { const struct location_opts opts = { 0 }; return vec_as_location_opts(subscript, n, names, &opts); } r_obj* vec_as_location_ctxt(r_obj* subscript, r_ssize n, r_obj* names, struct vctrs_arg* arg, struct r_lazy call) { struct location_opts opts = { .subscript_opts = { .subscript_arg = arg, .call = call } }; return vec_as_location_opts(subscript, n, names, &opts); } r_obj* vec_as_location_opts(r_obj* subscript, r_ssize n, r_obj* names, const struct location_opts* opts) { ERR err = NULL; subscript = vec_as_subscript_opts(subscript, &opts->subscript_opts, &err); KEEP2(subscript, err); if (err) { r_cnd_signal(err); r_stop_unreachable(); } r_obj* out = r_null; switch (r_typeof(subscript)) { case R_TYPE_null: out = r_globals.empty_int; break; case R_TYPE_logical: out = lgl_as_location(subscript, n, opts); break; case R_TYPE_integer: out = int_as_location(subscript, n, opts); break; case R_TYPE_double: out = dbl_as_location(subscript, n, opts); break; case R_TYPE_character: out = chr_as_location(subscript, names, opts); break; default: r_stop_unimplemented_type(r_typeof(subscript)); } FREE(2); return out; } static r_obj* lgl_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts) { r_ssize subscript_n = r_length(subscript); if (subscript_n == n) { bool na_propagate = false; switch (opts->missing) { case SUBSCRIPT_MISSING_PROPAGATE: na_propagate = true; break; case SUBSCRIPT_MISSING_REMOVE: break; case SUBSCRIPT_MISSING_ERROR: { if (lgl_any_na(subscript)) { stop_subscript_missing(subscript, opts); } break; } } return r_lgl_which(subscript, na_propagate); } /* A single `TRUE` or `FALSE` index is recycled to the full vector * size. This means `TRUE` is synonym for missing index (subscript.e. no * subsetting) and `FALSE` is synonym for empty index. * * We could return the missing argument as sentinel to avoid * materialising the index vector for the `TRUE` case but this would * make `vec_as_location()` an option type just to optimise a rather * uncommon case. */ if (subscript_n == 1) { int elt = r_lgl_get(subscript, 0); r_ssize recycle_size = n; r_obj* out = r_null; r_keep_loc out_shelter; KEEP_HERE(out, &out_shelter); if (elt == r_globals.na_lgl) { switch (opts->missing) { case SUBSCRIPT_MISSING_PROPAGATE: { out = r_alloc_integer(n); KEEP_AT(out, out_shelter); r_int_fill(out, r_globals.na_int, n); break; } case SUBSCRIPT_MISSING_REMOVE: { out = r_copy(r_globals.empty_int); KEEP_AT(out, out_shelter); recycle_size = 0; break; } case SUBSCRIPT_MISSING_ERROR: { stop_subscript_missing(subscript, opts); } } } else if (elt) { out = r_alloc_integer(n); KEEP_AT(out, out_shelter); r_int_fill_seq(out, 1, n); } else { out = r_copy(r_globals.empty_int); KEEP_AT(out, out_shelter); recycle_size = 0; } r_obj* nms = KEEP(r_names(subscript)); if (nms != R_NilValue) { r_obj* recycled_nms = r_alloc_character(recycle_size); r_attrib_poke_names(out, recycled_nms); r_chr_fill(recycled_nms, r_chr_get(nms, 0), recycle_size); } FREE(2); return out; } r_obj* n_obj = KEEP(r_int(n)); stop_indicator_size(subscript, n_obj, opts); r_stop_unreachable(); } static r_obj* int_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts) { const int* data = r_int_cbegin(subscript); r_ssize loc_n = r_length(subscript); // Zeros need to be filtered out from the subscript vector. // `int_invert_location()` filters them out for negative indices, but // positive indices need to go through and `int_filter_zero()`. r_ssize n_zero = 0; r_ssize n_oob = 0; r_ssize n_missing = 0; for (r_ssize i = 0; i < loc_n; ++i, ++data) { int elt = *data; if (elt == r_globals.na_int) { switch (opts->missing) { case SUBSCRIPT_MISSING_PROPAGATE: break; case SUBSCRIPT_MISSING_REMOVE: ++n_missing; break; case SUBSCRIPT_MISSING_ERROR: stop_subscript_missing(subscript, opts); } } else if (elt == 0) { switch (opts->loc_zero) { case LOC_ZERO_REMOVE: ++n_zero; break; case LOC_ZERO_ERROR: stop_location_zero(subscript, opts); case LOC_ZERO_IGNORE: break; } } else if (elt < 0) { switch (opts->loc_negative) { case LOC_NEGATIVE_INVERT: return int_invert_location(subscript, n, opts); case LOC_NEGATIVE_ERROR: stop_location_negative(subscript, opts); case LOC_NEGATIVE_IGNORE: { if (abs(elt) > n) { switch (opts->loc_oob) { case LOC_OOB_ERROR: stop_subscript_oob_location(subscript, n, opts); case LOC_OOB_EXTEND: stop_subscript_negative_oob_location(subscript, n, opts); case LOC_OOB_REMOVE: ++n_oob; break; } } break; } } } else if (elt > n) { switch (opts->loc_oob) { case LOC_OOB_ERROR: stop_subscript_oob_location(subscript, n, opts); case LOC_OOB_EXTEND: ++n_oob; break; case LOC_OOB_REMOVE: ++n_oob; break; } } } r_keep_loc subscript_shelter; KEEP_HERE(subscript, &subscript_shelter); if (n_missing > 0) { subscript = int_filter_missing(subscript, n_missing); KEEP_AT(subscript, subscript_shelter); } if (n_zero > 0) { subscript = int_filter_zero(subscript, n_zero); KEEP_AT(subscript, subscript_shelter); } if (n_oob > 0) { switch (opts->loc_oob) { case LOC_OOB_ERROR: { r_stop_internal("An error should have been thrown on the first OOB value."); } case LOC_OOB_EXTEND: { int_check_consecutive(subscript, n, n_oob, opts); break; } case LOC_OOB_REMOVE: { subscript = int_filter_oob(subscript, n, n_oob); KEEP_AT(subscript, subscript_shelter); break; } } } FREE(1); return subscript; } static r_obj* int_invert_location(r_obj* subscript, r_ssize n, const struct location_opts* opts) { const int* data = r_int_cbegin(subscript); r_ssize loc_n = r_length(subscript); r_obj* sel = KEEP(r_alloc_logical(n)); r_lgl_fill(sel, 1, n); int* sel_data = r_lgl_begin(sel); for (r_ssize i = 0; i < loc_n; ++i, ++data) { int j = *data; if (j == r_globals.na_int) { // Following base R by erroring on `missing = "propagate"`, e.g. `1[c(NA, -1)]`. // Doesn't make sense to invert an `NA`, so we can't meaningfully propagate. switch (opts->missing) { case SUBSCRIPT_MISSING_PROPAGATE: stop_location_negative_missing(subscript, opts); case SUBSCRIPT_MISSING_REMOVE: continue; case SUBSCRIPT_MISSING_ERROR: stop_location_negative_missing(subscript, opts); } } if (j >= 0) { if (j == 0) { switch (opts->loc_zero) { case LOC_ZERO_REMOVE: continue; case LOC_ZERO_IGNORE: continue; case LOC_ZERO_ERROR: stop_location_zero(subscript, opts); } } else { stop_location_negative_positive(subscript, opts); } } j = -j; if (j > n) { switch (opts->loc_oob) { case LOC_OOB_REMOVE: { continue; } case LOC_OOB_EXTEND: case LOC_OOB_ERROR: { // Setting `oob` to `"error"` and `"extend"` result in errors here, // because extending with a negative subscript is nonsensical stop_subscript_negative_oob_location(subscript, n, opts); } } } sel_data[j - 1] = 0; } r_obj* out = lgl_as_location(sel, n, opts); FREE(1); return out; } static r_obj* int_filter(r_obj* subscript, r_ssize n_filter, int value) { const r_ssize size = r_length(subscript); const int* v_subscript = r_int_cbegin(subscript); r_obj* out = KEEP(r_alloc_integer(size - n_filter)); int* v_out = r_int_begin(out); r_obj* names = r_names(subscript); const bool has_names = names != r_null; r_obj* const* v_names = NULL; r_obj* out_names = r_null; if (has_names) { v_names = r_chr_cbegin(names); out_names = r_alloc_character(size - n_filter); r_attrib_poke_names(out, out_names); } r_ssize j = 0; for (r_ssize i = 0; i < size; ++i) { const int elt = v_subscript[i]; if (elt != value) { v_out[j] = elt; if (has_names) { r_chr_poke(out_names, j, v_names[i]); } ++j; } } FREE(1); return out; } static r_obj* int_filter_zero(r_obj* subscript, r_ssize n_zero) { return int_filter(subscript, n_zero, 0); } static r_obj* int_filter_missing(r_obj* subscript, r_ssize n_missing) { return int_filter(subscript, n_missing, r_globals.na_int); } static r_obj* int_filter_oob(r_obj* subscript, r_ssize n, r_ssize n_oob) { const r_ssize n_subscript = r_length(subscript); const r_ssize n_out = n_subscript - n_oob; const int* v_subscript = r_int_cbegin(subscript); r_obj* out = KEEP(r_alloc_integer(n_out)); int* v_out = r_int_begin(out); r_obj* names = r_names(subscript); const bool has_names = names != r_null; r_obj* const* v_names = NULL; r_obj* out_names = r_null; if (has_names) { v_names = r_chr_cbegin(names); out_names = r_alloc_character(n_out); r_attrib_poke_names(out, out_names); } r_ssize j = 0; for (r_ssize i = 0; i < n_subscript; ++i) { const int elt = v_subscript[i]; if (abs(elt) <= n || elt == r_globals.na_int) { v_out[j] = elt; if (has_names) { r_chr_poke(out_names, j, v_names[i]); } ++j; } } FREE(1); return out; } static void int_check_consecutive(r_obj* subscript, r_ssize n, r_ssize n_extend, const struct location_opts* opts) { r_obj* extended = KEEP(r_alloc_integer(n_extend)); int* p_extended = r_int_begin(extended); int i_extend = 0; int new_n = n; int* p_subscript = r_int_begin(subscript); r_ssize n_subscript = Rf_length(subscript); for (r_ssize i = 0; i < n_subscript; ++i) { int elt = p_subscript[i]; // Missing value also covered here if (elt <= n) { continue; } // Special case: appending in ascending sequence at the end // should not require any sorting if (elt - 1 == new_n) { ++new_n; --n_extend; } else { p_extended[i_extend++] = elt - 1; } } if (n_extend != i_extend) { r_stop_internal("int_check_consecutive", "n_extend (%d) != i_extend (%d).", n_extend, i_extend); } if (i_extend == 0) { FREE(1); return; } // Only the first i_extend entries of the array are populated, // the rest is never touched. qsort(p_extended, i_extend, sizeof(int), &qsort_int_compare_scalar); for (r_ssize i = 0; i < i_extend; ++i) { int elt = p_extended[i]; if (elt != new_n + i) { stop_location_oob_non_consecutive(subscript, n, opts); } } FREE(1); } static r_obj* dbl_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts) { subscript = KEEP(vec_cast(subscript, r_globals.empty_int, vec_args.empty, vec_args.empty, r_lazy_null)); subscript = int_as_location(subscript, n, opts); FREE(1); return subscript; } static r_obj* chr_as_location(r_obj* subscript, r_obj* names, const struct location_opts* opts) { if (names == R_NilValue) { r_abort("Can't use character names to index an unnamed vector."); } if (r_typeof(names) != R_TYPE_character) { r_abort("`names` must be a character vector."); } bool remove_missing = false; r_obj* matched = KEEP(Rf_match(names, subscript, r_globals.na_int)); r_attrib_poke_names(matched, r_names(subscript)); r_ssize n = r_length(matched); int* p = r_int_begin(matched); r_obj* const * ip = r_chr_cbegin(subscript); for (r_ssize k = 0; k < n; ++k) { const r_obj* elt = ip[k]; if (elt == r_strs.empty) { // `""` never matches, even if `names` contains a `""` name stop_subscript_empty(subscript, opts); } if (elt == r_globals.na_str) { // `NA_character_` never matches, even if `names` contains a missing name p[k] = r_globals.na_int; switch (opts->missing) { case SUBSCRIPT_MISSING_PROPAGATE: continue; case SUBSCRIPT_MISSING_REMOVE: remove_missing = true; continue; case SUBSCRIPT_MISSING_ERROR: stop_subscript_missing(subscript, opts); } } if (p[k] == r_globals.na_int) { stop_subscript_oob_name(subscript, names, opts); } } if (remove_missing) { if (opts->missing != SUBSCRIPT_MISSING_REMOVE) { r_stop_internal("`missing = 'remove'` must be set if `n_missing > 0`."); } r_obj* not_missing = KEEP(vec_detect_complete(matched)); matched = KEEP(vec_slice(matched, not_missing)); FREE(2); } KEEP(matched); FREE(2); return matched; } // [[ register() ]] r_obj* ffi_as_location(r_obj* subscript, r_obj* ffi_n, r_obj* names, r_obj* loc_negative, r_obj* loc_oob, r_obj* loc_zero, r_obj* missing, r_obj* frame) { r_ssize n = 0; if (ffi_n == r_null && r_typeof(subscript) == R_TYPE_character) { n = r_length(subscript); } else { if (r_is_object(ffi_n) || r_typeof(ffi_n) != R_TYPE_integer) { ffi_n = vec_cast(ffi_n, r_globals.empty_int, vec_args.n, vec_args.empty, (struct r_lazy) { .x = frame, .env = r_null }); } KEEP(ffi_n); if (r_length(ffi_n) != 1) { r_stop_internal("`n` must be a scalar number."); } n = r_int_get(ffi_n, 0); FREE(1); } struct r_lazy arg_ = { .x = syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_); struct r_lazy call = (struct r_lazy) { .x = syms_call, .env = frame }; struct location_opts opts = { .subscript_opts = { .subscript_arg = &arg, .call = call }, .missing = parse_subscript_arg_missing(missing, call), .loc_negative = parse_loc_negative(loc_negative, call), .loc_oob = parse_loc_oob(loc_oob, call), .loc_zero = parse_loc_zero(loc_zero, call) }; return vec_as_location_opts(subscript, n, names, &opts); } static enum subscript_missing parse_subscript_arg_missing(r_obj* x, struct r_lazy call) { if (r_typeof(x) != R_TYPE_character || r_length(x) == 0) { stop_subscript_arg_missing(call); } const char* str = r_chr_get_c_string(x, 0); if (!strcmp(str, "propagate")) return SUBSCRIPT_MISSING_PROPAGATE; if (!strcmp(str, "remove")) return SUBSCRIPT_MISSING_REMOVE; if (!strcmp(str, "error")) return SUBSCRIPT_MISSING_ERROR; stop_subscript_arg_missing(call); r_stop_unreachable(); } static enum num_loc_negative parse_loc_negative(r_obj* x, struct r_lazy call) { if (r_typeof(x) != R_TYPE_character || r_length(x) == 0) { stop_bad_negative(call); } const char* str = r_chr_get_c_string(x, 0); if (!strcmp(str, "invert")) return LOC_NEGATIVE_INVERT; if (!strcmp(str, "error")) return LOC_NEGATIVE_ERROR; if (!strcmp(str, "ignore")) return LOC_NEGATIVE_IGNORE; stop_bad_negative(call); r_stop_unreachable(); } static enum num_loc_oob parse_loc_oob(r_obj* x, struct r_lazy call) { if (r_typeof(x) != R_TYPE_character || r_length(x) == 0) { stop_bad_oob(call); } const char* str = r_chr_get_c_string(x, 0); if (!strcmp(str, "error")) return LOC_OOB_ERROR; if (!strcmp(str, "remove")) return LOC_OOB_REMOVE; if (!strcmp(str, "extend")) return LOC_OOB_EXTEND; stop_bad_oob(call); r_stop_unreachable(); } static enum num_loc_zero parse_loc_zero(r_obj* x, struct r_lazy call) { if (r_typeof(x) != R_TYPE_character || r_length(x) == 0) { stop_bad_zero(call); } const char* str = r_chr_get_c_string(x, 0); if (!strcmp(str, "remove")) return LOC_ZERO_REMOVE; if (!strcmp(str, "error")) return LOC_ZERO_ERROR; if (!strcmp(str, "ignore")) return LOC_ZERO_IGNORE; stop_bad_zero(call); r_stop_unreachable(); } static void stop_subscript_arg_missing(struct r_lazy call) { r_abort_call(call.env, "`missing` must be one of \"propagate\", \"remove\", or \"error\"."); } static void stop_bad_negative(struct r_lazy call) { r_abort_call(call.env, "`negative` must be one of \"invert\", \"error\", or \"ignore\"."); } static void stop_bad_oob(struct r_lazy call) { r_abort_call(call.env, "`oob` must be one of \"error\", \"remove\", or \"extend\"."); } static void stop_bad_zero(struct r_lazy call) { r_abort_call(call.env, "`zero` must be one of \"remove\", \"error\", or \"ignore\"."); } static void stop_subscript_missing(r_obj* i, const struct location_opts* opts) { r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask2(r_sym("stop_subscript_missing"), syms_i, i, syms_call, call); r_stop_unreachable(); } static void stop_subscript_empty(r_obj* i, const struct location_opts* opts) { r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask2(r_sym("stop_subscript_empty"), syms_i, i, syms_call, call); r_stop_unreachable(); } static void stop_location_negative_missing(r_obj* i, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask4(r_sym("stop_location_negative_missing"), syms_i, i, syms_subscript_arg, arg, syms_call, call, syms_subscript_action, get_opts_action(&opts->subscript_opts)); r_stop_unreachable(); } static void stop_location_negative_positive(r_obj* i, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask4(r_sym("stop_location_negative_positive"), syms_i, i, syms_subscript_arg, arg, syms_call, call, syms_subscript_action, get_opts_action(&opts->subscript_opts)); r_stop_unreachable(); } static void stop_subscript_oob_location(r_obj* i, r_ssize size, const struct location_opts* opts) { r_obj* size_obj = KEEP(r_int(size)); r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask6(r_sym("stop_subscript_oob"), syms_i, i, syms_subscript_type, chrs_numeric, syms_size, size_obj, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); r_stop_unreachable(); } static void stop_subscript_negative_oob_location(r_obj* i, r_ssize size, const struct location_opts* opts) { struct location_opts error_opts = *opts; error_opts.subscript_opts.action = SUBSCRIPT_ACTION_NEGATE; stop_subscript_oob_location(i, size, &error_opts); } static void stop_subscript_oob_name(r_obj* i, r_obj* names, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask6(r_sym("stop_subscript_oob"), syms_i, i, syms_subscript_type, chrs_character, syms_names, names, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); r_stop_unreachable(); } static void stop_location_negative(r_obj* i, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask4(r_sym("stop_location_negative"), syms_i, i, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); r_stop_unreachable(); } static void stop_location_zero(r_obj* i, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask4(r_sym("stop_location_zero"), syms_i, i, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); r_stop_unreachable(); } static void stop_indicator_size(r_obj* i, r_obj* n, const struct location_opts* opts) { r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask5(r_sym("stop_indicator_size"), syms_i, i, syms_n, n, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); r_stop_unreachable(); } static void stop_location_oob_non_consecutive(r_obj* i, r_ssize size, const struct location_opts* opts) { r_obj* size_obj = KEEP(r_int(size)); r_obj* arg = KEEP(vctrs_arg(opts->subscript_opts.subscript_arg)); r_obj* call = KEEP(r_lazy_eval(opts->subscript_opts.call)); vctrs_eval_mask5(r_sym("stop_location_oob_non_consecutive"), syms_i, i, syms_size, size_obj, syms_subscript_action, get_opts_action(&opts->subscript_opts), syms_subscript_arg, arg, syms_call, call); FREE(1); r_stop_unreachable(); } void vctrs_init_subscript_loc(r_obj* ns) { } vctrs/src/rank.c0000644000176200001440000001440715120272011013276 0ustar liggesusers#include "vctrs.h" enum ties { TIES_min, TIES_max, TIES_sequential, TIES_dense }; enum incomplete { INCOMPLETE_rank, INCOMPLETE_na }; #include "decl/rank-decl.h" // [[ register() ]] r_obj* vctrs_rank(r_obj* x, r_obj* ties, r_obj* incomplete, r_obj* direction, r_obj* na_value, r_obj* nan_distinct, r_obj* chr_proxy_collate) { const enum ties c_ties = parse_ties(ties); const enum incomplete c_incomplete = parse_incomplete(incomplete); const bool c_nan_distinct = r_as_bool(nan_distinct); return vec_rank( x, c_ties, c_incomplete, direction, na_value, c_nan_distinct, chr_proxy_collate ); } static r_obj* vec_rank(r_obj* x, enum ties ties_type, enum incomplete incomplete_type, r_obj* direction, r_obj* na_value, bool nan_distinct, r_obj* chr_proxy_collate) { obj_check_vector(x, VCTRS_ALLOW_NULL_no, vec_args.x, r_lazy_null); r_ssize size = vec_size(x); r_keep_loc pi_x; KEEP_HERE(x, &pi_x); r_obj* complete = r_null; r_keep_loc pi_complete; KEEP_HERE(complete, &pi_complete); int* v_complete = NULL; r_ssize rank_size = size; bool rank_incomplete_with_na = (incomplete_type == INCOMPLETE_na); if (rank_incomplete_with_na) { // Slice out complete values of `x` to rank. // Retain the logical vector for constructing `out`. complete = vec_detect_complete(x); KEEP_AT(complete, pi_complete); v_complete = r_lgl_begin(complete); bool all_complete = r_lgl_all(complete); if (all_complete) { // No incomplete values to rank rank_incomplete_with_na = false; } else { x = vec_slice(x, complete); KEEP_AT(x, pi_x); rank_size = vec_size(x); } } r_obj* rank = KEEP(r_alloc_integer(rank_size)); int* v_rank = r_int_begin(rank); r_obj* info = KEEP(vec_order_info(x, direction, na_value, nan_distinct, chr_proxy_collate)); r_obj* order = r_list_get(info, 0); const int* v_order = r_int_cbegin(order); r_obj* group_sizes = r_list_get(info, 1); const int* v_group_sizes = r_int_cbegin(group_sizes); r_ssize n_groups = r_length(group_sizes); switch (ties_type) { case TIES_min: vec_rank_min(v_order, v_group_sizes, n_groups, v_rank); break; case TIES_max: vec_rank_max(v_order, v_group_sizes, n_groups, v_rank); break; case TIES_sequential: vec_rank_sequential(v_order, v_group_sizes, n_groups, v_rank); break; case TIES_dense: vec_rank_dense(v_order, v_group_sizes, n_groups, v_rank); break; } r_obj* out = r_null; if (rank_incomplete_with_na) { out = KEEP(r_alloc_integer(size)); int* v_out = r_int_begin(out); r_ssize j = 0; for (r_ssize i = 0; i < size; ++i) { v_out[i] = v_complete[i] ? v_rank[j++] : r_globals.na_int; } FREE(1); } else { out = rank; } FREE(4); return out; } // ----------------------------------------------------------------------------- static void vec_rank_min(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank) { r_ssize k = 0; r_ssize rank = 1; for (r_ssize i = 0; i < n_groups; ++i) { const r_ssize group_size = v_group_sizes[i]; for (r_ssize j = 0; j < group_size; ++j) { r_ssize loc = v_order[k] - 1; v_rank[loc] = rank; ++k; } rank += group_size; } } static void vec_rank_max(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank) { r_ssize k = 0; r_ssize rank = 0; for (r_ssize i = 0; i < n_groups; ++i) { const r_ssize group_size = v_group_sizes[i]; rank += group_size; for (r_ssize j = 0; j < group_size; ++j) { r_ssize loc = v_order[k] - 1; v_rank[loc] = rank; ++k; } } } static void vec_rank_sequential(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank) { r_ssize k = 0; r_ssize rank = 1; for (r_ssize i = 0; i < n_groups; ++i) { const r_ssize group_size = v_group_sizes[i]; for (r_ssize j = 0; j < group_size; ++j) { r_ssize loc = v_order[k] - 1; v_rank[loc] = rank; ++k; ++rank; } } } static void vec_rank_dense(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank) { r_ssize k = 0; r_ssize rank = 1; for (r_ssize i = 0; i < n_groups; ++i) { const r_ssize group_size = v_group_sizes[i]; for (r_ssize j = 0; j < group_size; ++j) { r_ssize loc = v_order[k] - 1; v_rank[loc] = rank; ++k; } ++rank; } } // ----------------------------------------------------------------------------- static inline enum ties parse_ties(r_obj* ties) { if (!r_is_string(ties)) { r_stop_internal("`ties` must be a string."); } const char* c_ties = r_chr_get_c_string(ties, 0); if (!strcmp(c_ties, "min")) return TIES_min; if (!strcmp(c_ties, "max")) return TIES_max; if (!strcmp(c_ties, "sequential")) return TIES_sequential; if (!strcmp(c_ties, "dense")) return TIES_dense; r_stop_internal( "`ties` must be one of: \"min\", \"max\", \"sequential\", or \"dense\"." ); } // ----------------------------------------------------------------------------- static inline enum incomplete parse_incomplete(r_obj* incomplete) { if (!r_is_string(incomplete)) { r_stop_internal("`incomplete` must be a string."); } const char* c_incomplete = r_chr_get_c_string(incomplete, 0); if (!strcmp(c_incomplete, "rank")) return INCOMPLETE_rank; if (!strcmp(c_incomplete, "na")) return INCOMPLETE_na; r_stop_internal( "`incomplete` must be either \"rank\" or \"na\"." ); } // ----------------------------------------------------------------------------- // Treats missing values as `true` static inline bool r_lgl_all(r_obj* x) { if (r_typeof(x) != R_TYPE_logical) { r_stop_internal("`x` must be a logical vector."); } const int* v_x = r_lgl_cbegin(x); r_ssize size = r_length(x); for (r_ssize i = 0; i < size; ++i) { if (!v_x[i]) { return false; } } return true; } vctrs/src/type-info.h0000644000176200001440000000344315072256373014303 0ustar liggesusers#ifndef VCTRS_TYPE_INFO_H #define VCTRS_TYPE_INFO_H #include enum vctrs_type { VCTRS_TYPE_null = 0, VCTRS_TYPE_unspecified, VCTRS_TYPE_logical, VCTRS_TYPE_integer, VCTRS_TYPE_double, VCTRS_TYPE_complex, VCTRS_TYPE_character, VCTRS_TYPE_raw, VCTRS_TYPE_list, VCTRS_TYPE_dataframe, VCTRS_TYPE_scalar, VCTRS_TYPE_s3 = 255 }; /** * Proxy info * * @member inner If a `proxy_method` was found, the result of invoking * the method. Otherwise, the original data. * @member type If a `proxy_method` was found, the vector type of the * proxy data. Otherwise, the vector type of the original data. * This is never `vctrs_type_s3`. * @member had_proxy_method Whether or not a `proxy_method` was found, * which is looked up by [vec_proxy_method()]. * * NOTE: Resist the urge to add a `shelter` here. `vec_proxy_info()` is called * in EXTREMELY tight loops, like `list_sizes()`, `vec_size_common()`, and * `vec_ptype_common()`. The overhead of creating and protecting a `shelter` * list is very noticeable! Instead we use `inner` to denote that this struct * "wraps" an inner object, and we `KEEP()` that directly at call sites (#2042). */ struct vctrs_proxy_info { r_obj* inner; enum vctrs_type type; bool had_proxy_method; }; /** * Return type information of a vector's proxy * * `vec_proxy_info()` returns the vctrs type of `x` or its proxy if it has one. * This never returns `vctrs_type_s3`, as we invoke `vec_proxy()` on classed * objects and assume the result is a native type. */ struct vctrs_proxy_info vec_proxy_info(r_obj* x); enum vctrs_type vec_typeof(r_obj* x); enum vctrs_type vec_proxy_typeof(r_obj* x); const char* vec_type_as_str(enum vctrs_type type); r_no_return void stop_unimplemented_vctrs_type(const char* fn, enum vctrs_type type); #endif vctrs/src/rlang/0000755000176200001440000000000015157552632013320 5ustar liggesusersvctrs/src/rlang/obj.h0000644000176200001440000000502015157273666014247 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_OBJ_H #define RLANG_OBJ_H #include #include "rlang-types.h" #define r_missing_arg R_MissingArg static inline r_ssize r_length(r_obj* x) { return Rf_xlength(x); } static inline enum r_type r_typeof(r_obj* x) { return (enum r_type) TYPEOF(x); } void _r_preserve(r_obj* x); void _r_unpreserve(r_obj* x); static r_unused r_obj* _r_placeholder = NULL; #define r_preserve(X) \ (R_PreserveObject(_r_placeholder = X), \ (_r_preserve)(_r_placeholder), \ (void) NULL) #define r_unpreserve(X) \ (R_ReleaseObject(_r_placeholder = X), \ (_r_unpreserve)(_r_placeholder), \ (void) NULL) static inline void r_mark_shared(r_obj* x) { MARK_NOT_MUTABLE(x); } static inline bool r_is_shared(r_obj* x) { return MAYBE_REFERENCED(x); } static inline void _r_preserve_global(r_obj* x) { (_r_preserve)(x); r_mark_shared(x); } #define r_preserve_global(X) \ (R_PreserveObject(_r_placeholder = X), \ (_r_preserve_global)(_r_placeholder), \ (void) NULL) static inline bool r_is_object(r_obj* x) { return Rf_isObject(x); } static inline bool r_inherits(r_obj* x, const char* tag) { return Rf_inherits(x, tag); } static inline r_obj* r_copy(r_obj* x) { return Rf_duplicate(x); } static inline r_obj* r_clone(r_obj* x) { return Rf_shallow_duplicate(x); } // These also clone names r_obj* r_vec_clone(r_obj* x); r_obj* r_vec_clone_shared(r_obj* x); static inline r_obj* r_type_as_string(enum r_type type) { return Rf_type2str(type); } static inline r_obj* r_type_as_character(enum r_type type) { r_obj* str = KEEP(r_type_as_string(type)); r_obj* out = Rf_ScalarString(str); return FREE(1), out; } static inline const char* r_type_as_c_string(enum r_type type) { return CHAR(Rf_type2str(type)); } static inline enum r_type r_c_str_as_r_type(const char* type) { return (enum r_type) Rf_str2type(type); } enum r_type r_chr_as_r_type(r_obj* type); static inline bool r_is_symbolic(r_obj* x) { return r_typeof(x) == LANGSXP || r_typeof(x) == SYMSXP; } static inline void r_obj_print(r_obj* x) { Rf_PrintValue(x); } static inline bool r_is_identical(r_obj* x, r_obj* y) { // 16 corresponds to base::identical()'s defaults // Do we need less conservative versions? return R_compute_identical(x, y, 16); } r_obj* r_obj_address(r_obj* x); extern r_obj* (*r_obj_encode_utf8)(r_obj* x); r_obj* r_as_label(r_obj* x); #endif vctrs/src/rlang/dyn-array.h0000644000176200001440000001267215157273666015416 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_DYN_ARRAY_H #define RLANG_DYN_ARRAY_H #include "rlang-types.h" #include "c-utils.h" #include "cnd.h" #include "vec.h" struct r_dyn_array { r_obj* shelter; r_ssize count; r_ssize capacity; int growth_factor; r_obj* data; void* v_data; const void* v_data_const; // private: enum r_type type; r_ssize elt_byte_size; void (*barrier_set)(r_obj* x, r_ssize i, r_obj* value); }; struct r_dyn_array* r_new_dyn_vector(enum r_type type, r_ssize capacity); struct r_dyn_array* r_new_dyn_array(r_ssize elt_byte_size, r_ssize capacity); void r_dyn_resize(struct r_dyn_array* p_arr, r_ssize capacity); void r_dyn_push_back(struct r_dyn_array* p_arr, const void* p_elt); r_obj* r_dyn_unwrap(struct r_dyn_array* p_arr); static inline void* r_dyn_pointer(struct r_dyn_array* p_arr, r_ssize i) { if (p_arr->barrier_set) { r_abort("Can't take mutable pointer of barrier vector."); } r_ssize offset = i * p_arr->elt_byte_size; return ((unsigned char*) p_arr->v_data) + offset; } static inline void* r_dyn_begin(struct r_dyn_array* p_arr) { return r_dyn_pointer(p_arr, 0); } static inline void* r_dyn_last(struct r_dyn_array* p_arr) { return r_dyn_pointer(p_arr, p_arr->count - 1); } static inline void* r_dyn_end(struct r_dyn_array* p_arr) { return r_dyn_pointer(p_arr, p_arr->count); } static inline const void* r_dyn_cpointer(struct r_dyn_array* p_arr, r_ssize i) { r_ssize offset = i * p_arr->elt_byte_size; return ((const unsigned char*) p_arr->v_data_const) + offset; } static inline const void* r_dyn_cbegin(struct r_dyn_array* p_arr) { return r_dyn_cpointer(p_arr, 0); } static inline const void* r_dyn_clast(struct r_dyn_array* p_arr) { return r_dyn_cpointer(p_arr, p_arr->count - 1); } static inline const void* r_dyn_cend(struct r_dyn_array* p_arr) { return r_dyn_cpointer(p_arr, p_arr->count); } #define R_DYN_GET(TYPE, X, I) (*((TYPE*) r_dyn_pointer((X), (I)))) #define R_DYN_POKE(TYPE, X, I, VAL) (*((TYPE*) r_dyn_pointer((X), (I))) = (VAL)) static inline int r_dyn_lgl_get(struct r_dyn_array* p_vec, r_ssize i) { return ((const int*) p_vec->v_data_const)[i]; } static inline int r_dyn_int_get(struct r_dyn_array* p_vec, r_ssize i) { return ((const int*) p_vec->v_data_const)[i]; } static inline double r_dyn_dbl_get(struct r_dyn_array* p_vec, r_ssize i) { return ((const double*) p_vec->v_data_const)[i]; } static inline r_complex r_dyn_cpl_get(struct r_dyn_array* p_vec, r_ssize i) { return ((const r_complex*) p_vec->v_data_const)[i]; } static inline char r_dyn_raw_get(struct r_dyn_array* p_vec, r_ssize i) { return ((const char*) p_vec->v_data_const)[i]; } static inline r_obj* r_dyn_chr_get(struct r_dyn_array* p_vec, r_ssize i) { return ((r_obj* const *) p_vec->v_data_const)[i]; } static inline r_obj* r_dyn_list_get(struct r_dyn_array* p_vec, r_ssize i) { return ((r_obj* const *) p_vec->v_data_const)[i]; } static inline void r_dyn_lgl_poke(struct r_dyn_array* p_vec, r_ssize i, int value) { ((int*) p_vec->v_data)[i] = value; } static inline void r_dyn_int_poke(struct r_dyn_array* p_vec, r_ssize i, int value) { ((int*) p_vec->v_data)[i] = value; } static inline void r_dyn_dbl_poke(struct r_dyn_array* p_vec, r_ssize i, double value) { ((double*) p_vec->v_data)[i] = value; } static inline void r_dyn_cpl_poke(struct r_dyn_array* p_vec, r_ssize i, r_complex value) { ((r_complex*) p_vec->v_data)[i] = value; } static inline void r_dyn_raw_poke(struct r_dyn_array* p_vec, r_ssize i, char value) { ((char*) p_vec->v_data)[i] = value; } static inline void r_dyn_chr_poke(struct r_dyn_array* p_vec, r_ssize i, r_obj* value) { r_chr_poke(p_vec->data, i, value); } static inline void r_dyn_list_poke(struct r_dyn_array* p_vec, r_ssize i, r_obj* value) { r_list_poke(p_vec->data, i, value); } static inline void* const * r_dyn_pop_back(struct r_dyn_array* p_arr) { void* const * out = (void* const *) r_dyn_clast(p_arr); --p_arr->count; return out; } static inline r_ssize r__dyn_increment(struct r_dyn_array* p_arr) { r_ssize loc = p_arr->count++; if (p_arr->count > p_arr->capacity) { r_ssize new_capacity = r_ssize_mult(p_arr->capacity, p_arr->growth_factor); r_dyn_resize(p_arr, new_capacity); } return loc; } static inline void r_dyn_lgl_push_back(struct r_dyn_array* p_vec, int elt) { r_ssize loc = r__dyn_increment(p_vec); r_dyn_lgl_poke(p_vec, loc, elt); } static inline void r_dyn_int_push_back(struct r_dyn_array* p_vec, int elt) { r_ssize loc = r__dyn_increment(p_vec); r_dyn_int_poke(p_vec, loc, elt); } static inline void r_dyn_dbl_push_back(struct r_dyn_array* p_vec, double elt) { r_ssize loc = r__dyn_increment(p_vec); r_dyn_dbl_poke(p_vec, loc, elt); } static inline void r_dyn_cpl_push_back(struct r_dyn_array* p_vec, r_complex elt) { r_ssize loc = r__dyn_increment(p_vec); r_dyn_cpl_poke(p_vec, loc, elt); } static inline void r_dyn_raw_push_back(struct r_dyn_array* p_vec, char elt) { r_ssize loc = r__dyn_increment(p_vec); r_dyn_raw_poke(p_vec, loc, elt); } static inline void r_dyn_chr_push_back(struct r_dyn_array* p_vec, r_obj* elt) { KEEP(elt); r_ssize loc = r__dyn_increment(p_vec); r_dyn_chr_poke(p_vec, loc, elt); FREE(1); } static inline void r_dyn_list_push_back(struct r_dyn_array* p_vec, r_obj* elt) { KEEP(elt); r_ssize loc = r__dyn_increment(p_vec); r_dyn_list_poke(p_vec, loc, elt); FREE(1); } #endif vctrs/src/rlang/debug.c0000644000176200001440000000120415157273666014556 0ustar liggesusers#include "rlang.h" void r_sexp_inspect(r_obj* x) { r_obj* call = KEEP(r_parse(".Internal(inspect(x))")); r_eval_with_x(call, x, r_envs.base); FREE(1); } void r_browse(r_obj* x) { r_env_bind(r_envs.global, r_sym(".debug"), x); r_printf("Object saved in `.debug`:\n"); r_obj_print(x); r_obj* frame = KEEP(r_peek_frame()); r_browse_at(frame); FREE(1); } void r_browse_at(r_obj* env) { // The NULL expression is needed because of a limitation in ESS r_parse_eval("{ browser(); NULL }", env); } void r_dbg_str(r_obj* x) { r_obj* call = KEEP(r_parse("str(x)")); r_eval_with_x(call, x, r_ns_env("utils")); FREE(1); } vctrs/src/rlang/arg.h0000644000176200001440000000045715157273666014257 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_ARG_H #define RLANG_ARG_H #include "rlang-types.h" extern int (*r_arg_match)(r_obj* arg, r_obj* values, struct r_lazy error_arg, struct r_lazy error_call); #endif vctrs/src/rlang/vec-chr.h0000644000176200001440000000355115157273666015033 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_VECTOR_CHR_H #define RLANG_VECTOR_CHR_H #include "globals.h" #include "rlang-types.h" static inline const char* r_str_c_string(r_obj* str) { return CHAR(str); } bool r_chr_has(r_obj* chr, const char* c_string); bool r_chr_has_any(r_obj* chr, const char** c_strings); r_ssize r_chr_detect_index(r_obj* chr, const char* c_string); void r_chr_fill(r_obj* chr, r_obj* value, r_ssize n); static inline r_obj* r_str_as_character(r_obj* x) { return Rf_ScalarString(x); } /* * A symbol is always in the native encoding. This means that UTF-8 * data frame names undergo a lossy translation when they are * transformed to symbols to create a data mask. To deal with this, we * translate all serialised unicode tags back to UTF-8. This way the * UTF-8 -> native -> UTF-8 translation that occurs during the * character -> symbol -> character conversion fundamental for data * masking is transparent and lossless for the end user. * * Starting from R 4.0, `installChar()` warns when translation to * native encoding is lossy. This warning is disruptive for us since * we correctly translate strings behind the scene. To work around * this, we call `translateChar()` which doesn't warn (at least * currently). If the pointers are the same, no translation is * needed and we can call `installChar()`, which preserves the * current encoding of the string. Otherwise we intern the symbol * with `install()` without encoding. */ static inline r_obj* r_str_as_symbol(r_obj* str) { const char* str_native = Rf_translateChar(str); if (str_native == CHAR(str)) { return Rf_installChar(str); } else { return Rf_install(str_native); } } static inline bool r_str_is_name(r_obj* str) { if (str == r_globals.na_str) { return false; } if (str == r_strs.empty) { return false; } return true; } #endif vctrs/src/rlang/arg.c0000644000176200001440000000051715157273666014247 0ustar liggesusers#include "rlang.h" int (*r_arg_match)(r_obj* arg, r_obj* values, struct r_lazy error_arg, struct r_lazy error_call); void r_init_library_arg(void) { r_arg_match = (int (*)(r_obj*, r_obj*, struct r_lazy, struct r_lazy)) r_peek_c_callable("rlang", "rlang_arg_match_2"); } vctrs/src/rlang/rlang.h0000644000176200001440000000375415157273702014603 0ustar liggesusers#ifndef RLANG_RLANG_H #define RLANG_RLANG_H /* * `_ISOC99_SOURCE` is defined to avoid warnings on Windows UCRT builds where * usage of `PRIx64` in Microsoft's `printf()` can generate the warnings shown * below. Defining this before including `` forces usage of MinGW's * custom `printf()`, which is C99 compliant. * warning: unknown conversion type character 'l' in format [-Wformat] * warning: too many arguments for format [-Wformat-extra-args] * * The conventional define for this is `__USE_MINGW_ANSI_STDIO`, but according * to the thread below it is recommended to instead use a feature test macro * (such as `_ISOC99_SOURCE`) which will indirectly define the internal * `__USE_MINGW_ANSI_STDIO` macro for us. * https://osdn.net/projects/mingw/lists/archive/users/2019-January/000199.html */ #ifndef _ISOC99_SOURCE #define _ISOC99_SOURCE #endif #define R_NO_REMAP #include // IWYU pragma: export #include // IWYU pragma: export #include // IWYU pragma: export #include "rlang-types.h" // IWYU pragma: export r_obj* r_init_library(r_obj* ns); r_ssize r_arg_as_ssize(r_obj* n, const char* arg); static inline r_ssize r_as_ssize(r_obj* n) { return r_arg_as_ssize(n, "n"); } extern bool _r_use_local_precious_list; // IWYU pragma: begin_exports #include "obj.h" #include "globals.h" #include "arg.h" #include "attrib.h" #include "debug.h" #include "c-utils.h" #include "call.h" #include "cnd.h" #include "dict.h" #include "df.h" #include "dyn-array.h" #include "dyn-list-of.h" #include "env.h" #include "env-binding.h" #include "eval.h" #include "export.h" #include "fn.h" #include "formula.h" #include "node.h" #include "parse.h" #include "quo.h" #include "session.h" #include "stack.h" #include "state.h" #include "sym.h" #include "vec.h" #include "vec-chr.h" #include "vec-lgl.h" #include "vendor.h" #include "walk.h" // IWYU pragma: end_exports #define r_abort_lazy_call(LAZY, ...) \ r_abort_call(KEEP(r_lazy_eval(LAZY)), __VA_ARGS__) #endif vctrs/src/rlang/parse.c0000644000176200001440000000132515157273666014606 0ustar liggesusers#include "rlang.h" #include static void abort_parse(r_obj* code, const char* why) { if (r_peek_option("rlang__verbose_errors") != r_null) { r_obj_print(code); } r_abort("Internal error: %s", why); } r_obj* r_parse(const char* str) { r_obj* str_ = KEEP(r_chr(str)); ParseStatus status; r_obj* out = KEEP(R_ParseVector(str_, -1, &status, r_null)); if (status != PARSE_OK) { abort_parse(str_, "Parsing failed"); } if (r_length(out) != 1) { abort_parse(str_, "Expected a single expression"); } out = r_list_get(out, 0); FREE(2); return out; } r_obj* r_parse_eval(const char* str, r_obj* env) { r_obj* out = r_eval(KEEP(r_parse(str)), env); FREE(1); return out; } vctrs/src/rlang/dyn-list-of.c0000644000176200001440000001562615157273666015652 0ustar liggesusers#include #include "decl/dyn-list-of-decl.h" #define R_DYN_LOF_GROWTH_FACTOR 2 #define R_DYN_LOF_INIT_SIZE 32 enum shelter_dyn_list_of { SHELTER_DYN_LOF_raw, SHELTER_DYN_LOF_reserve, SHELTER_DYN_LOF_arr_locs, SHELTER_DYN_LOF_extra_array, SHELTER_DYN_LOF_extra_shelter_array, SHELTER_DYN_LOF_moved_arr, SHELTER_DYN_LOF_moved_shelter_arr, SHELTER_DYN_LOF_arrays, SHELTER_DYN_LOF_SIZE }; struct r_dyn_list_of* r_new_dyn_list_of(enum r_type type, r_ssize capacity, r_ssize width) { switch (type) { case R_TYPE_character: case R_TYPE_list: r_abort("Can't create a dynamic list of barrier vectors."); default: break; } r_obj* shelter = KEEP(r_alloc_list(SHELTER_DYN_LOF_SIZE)); r_obj* lof_raw = r_alloc_raw(sizeof(struct r_dyn_list_of)); r_list_poke(shelter, SHELTER_DYN_LOF_raw, lof_raw); struct r_dyn_array* p_moved_arr = r_new_dyn_array(sizeof(struct r_dyn_array*), R_DYN_LOF_INIT_SIZE); r_list_poke(shelter, SHELTER_DYN_LOF_moved_arr, p_moved_arr->shelter); struct r_dyn_array* p_moved_shelter_arr = r_new_dyn_vector(R_TYPE_list, R_DYN_LOF_INIT_SIZE); r_list_poke(shelter, SHELTER_DYN_LOF_moved_shelter_arr, p_moved_shelter_arr->shelter); r_obj* reserve = r_alloc_vector(type, r_ssize_mult(capacity, width)); r_list_poke(shelter, SHELTER_DYN_LOF_reserve, reserve); void* v_reserve = r_vec_begin(reserve); r_obj* arr_locs = r_alloc_raw(sizeof(r_ssize) * capacity); r_list_poke(shelter, SHELTER_DYN_LOF_arr_locs, arr_locs); r_ssize* v_arr_locs = r_raw_begin(arr_locs); R_MEM_SET(r_ssize, v_arr_locs, -1, capacity); struct r_dyn_array* p_arrays = r_new_dyn_array(sizeof(struct r_pair_ptr_ssize), capacity); r_list_poke(shelter, SHELTER_DYN_LOF_arrays, p_arrays->shelter); struct r_dyn_list_of* p_lof = r_raw_begin(lof_raw); *p_lof = (struct r_dyn_list_of) { .shelter = shelter, .count = 0, .capacity = capacity, .growth_factor = R_DYN_LOF_GROWTH_FACTOR, .v_data = r_dyn_begin(p_arrays), // private: .width = width, .type = type, .elt_byte_size = r_vec_elt_sizeof0(type), .reserve = reserve, .v_reserve = v_reserve, .p_moved_arr = p_moved_arr, .p_moved_shelter_arr = p_moved_shelter_arr, .arr_locs = arr_locs, .v_arr_locs = v_arr_locs, .p_arrays = p_arrays, }; FREE(1); return p_lof; } r_obj* r_lof_unwrap(struct r_dyn_list_of* p_lof) { r_obj* out = KEEP(r_alloc_list(p_lof->count)); enum r_type type = p_lof->type; r_ssize n = p_lof->count; struct r_pair_ptr_ssize* v_arrays = r_dyn_begin(p_lof->p_arrays); for (r_ssize i = 0; i < n; ++i) { struct r_pair_ptr_ssize array = v_arrays[i]; r_list_poke(out, i, r_vec_n(type, array.ptr, array.size)); } FREE(1); return out; } static void r_lof_resize(struct r_dyn_list_of* p_lof, r_ssize capacity) { r_ssize count = p_lof->count; // Resize reserve r_obj* reserve = r_vec_resize0(p_lof->type, p_lof->reserve, r_ssize_mult(capacity, p_lof->width)); r_list_poke(p_lof->shelter, SHELTER_DYN_LOF_reserve, reserve); p_lof->reserve = reserve; p_lof->v_reserve = r_vec_begin0(p_lof->type, reserve); p_lof->capacity = capacity; // Resize array indirections r_obj* arr_locs = r_raw_resize(p_lof->arr_locs, r_ssize_mult(sizeof(r_ssize), capacity)); r_list_poke(p_lof->shelter, SHELTER_DYN_LOF_arr_locs, arr_locs); r_ssize* v_arr_locs = r_raw_begin(arr_locs); r_ssize n_new = capacity - count; R_MEM_SET(r_ssize, v_arr_locs + count, -1, n_new); p_lof->arr_locs = arr_locs; p_lof->v_arr_locs = v_arr_locs; // Resize addresses and update them to point to the new memory r_dyn_resize(p_lof->p_arrays, capacity); struct r_pair_ptr_ssize* v_data = r_dyn_begin(p_lof->p_arrays); p_lof->v_data = v_data; unsigned char* v_reserve_u = (unsigned char*) p_lof->v_reserve; r_ssize bytes = p_lof->width * p_lof->elt_byte_size; for (r_ssize i = 0; i < count; ++i) { // Preserve addresses of moved arrays if (v_arr_locs[i] < 0) { r_ssize offset = i * bytes; v_data[i].ptr = v_reserve_u + offset; } } } void r_lof_push_back(struct r_dyn_list_of* p_lof) { r_ssize count = p_lof->count + 1; if (count > p_lof->capacity) { r_ssize new_size = r_ssize_mult(p_lof->capacity, R_DYN_LOF_GROWTH_FACTOR); r_lof_resize(p_lof, new_size); } p_lof->count = count; unsigned char* v_reserve_u = (unsigned char*) p_lof->v_reserve; r_ssize offset = (count - 1) * p_lof->width * p_lof->elt_byte_size; struct r_pair_ptr_ssize info = { .ptr = v_reserve_u + offset, .size = 0 }; r_dyn_push_back(p_lof->p_arrays, &info); } void r_lof_arr_push_back(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt) { if (i >= p_lof->count) { r_stop_internal("Location %d does not exist.", i); } if (reserve_push_back(p_lof, i, p_elt)) { return; } struct r_dyn_array* p_arr = p_lof->p_moved_arr; r_ssize arr_i = p_lof->v_arr_locs[i]; if (arr_i >= p_arr->count) { r_stop_internal("Location %d does not exist in the extra array", arr_i); } struct r_dyn_array* p_inner_arr = R_DYN_GET(struct r_dyn_array*, p_arr, arr_i); r_dyn_push_back(p_inner_arr, p_elt); // Also update pointer in case of resize R_DYN_POKE(struct r_pair_ptr_ssize, p_lof->p_arrays, i, ((struct r_pair_ptr_ssize) { .ptr = r_dyn_begin(p_inner_arr), .size = p_inner_arr->count })); } static bool reserve_push_back(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt) { if (p_lof->v_arr_locs[i] >= 0) { return false; } struct r_pair_ptr_ssize* p_arr_info = r_dyn_pointer(p_lof->p_arrays, i); if (p_arr_info->size >= p_lof->width) { // Inner array is getting too big for the reserve. Move it to a // dynamic array. reserve_move(p_lof, i, p_elt); return false; } r_ssize count = ++p_arr_info->size; r_ssize offset = (i * p_lof->width + count - 1) * p_lof->elt_byte_size; void* p = ((unsigned char*) p_lof->v_reserve) + offset; if (p_elt) { r_memcpy(p, p_elt, p_lof->elt_byte_size); } else { r_memset(p, 0, p_lof->elt_byte_size); } return true; } static void reserve_move(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt) { struct r_dyn_array* p_moved_arr = p_lof->p_moved_arr; r_ssize n = p_lof->width; struct r_dyn_array* p_new = r_new_dyn_vector(p_lof->type, p_lof->width); r_dyn_list_push_back(p_lof->p_moved_shelter_arr, p_new->shelter); r_dyn_push_back(p_moved_arr, &p_new); void* v_new = r_dyn_begin(p_new); void* v_old = R_DYN_GET(struct r_pair_ptr_ssize, p_lof->p_arrays, i).ptr; r_memcpy(v_new, v_old, r_ssize_mult(n, p_lof->elt_byte_size)); p_new->count = n; R_DYN_POKE(struct r_pair_ptr_ssize, p_lof->p_arrays, i, ((struct r_pair_ptr_ssize) { .ptr = v_new, .size = n })); p_lof->v_arr_locs[i] = p_moved_arr->count - 1; } vctrs/src/rlang/node.h0000644000176200001440000000427415157273666014434 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_NODE_H #define RLANG_NODE_H #include "rlang-types.h" static inline r_obj* r_node_car(r_obj* x) { return CAR(x); } static inline r_obj* r_node_cdr(r_obj* x) { return CDR(x); } static inline r_obj* r_node_tag(r_obj* x) { return TAG(x); } static inline r_obj* r_node_caar(r_obj* x) { return CAAR(x); } static inline r_obj* r_node_cadr(r_obj* x) { return CADR(x); } static inline r_obj* r_node_cdar(r_obj* x) { return CDAR(x); } static inline r_obj* r_node_cddr(r_obj* x) { return CDDR(x); } static inline void r_node_poke_car(r_obj* x, r_obj* newcar) { SETCAR(x, newcar); } static inline void r_node_poke_cdr(r_obj* x, r_obj* newcdr) { SETCDR(x, newcdr); } static inline void r_node_poke_tag(r_obj* x, r_obj* tag) { SET_TAG(x, tag); } static inline void r_node_poke_caar(r_obj* x, r_obj* newcaar) { SETCAR(CAR(x), newcaar); } static inline void r_node_poke_cadr(r_obj* x, r_obj* newcar) { SETCADR(x, newcar); } static inline void r_node_poke_cdar(r_obj* x, r_obj* newcdar) { SETCDR(CAR(x), newcdar); } static inline void r_node_poke_cddr(r_obj* x, r_obj* newcdr) { SETCDR(CDR(x), newcdr); } static inline r_obj* r_new_node(r_obj* car, r_obj* cdr) { return Rf_cons(car, cdr); } static inline r_obj* r_new_node3(r_obj* car, r_obj* cdr, r_obj* tag) { r_obj* out = Rf_cons(car, cdr); SET_TAG(out, tag); return out; } r_obj* r_new_pairlist(const struct r_pair* args, int n, r_obj** tail); #define r_pairlist Rf_list1 #define r_pairlist2 Rf_list2 #define r_pairlist3 Rf_list3 #define r_pairlist4 Rf_list4 #define r_pairlist5 Rf_list5 r_obj* r_pairlist_rev(r_obj* node); // Used by `r_attrib_get()` via `r_pairlist_get()`, // so we want it to be fully inlined static inline r_obj* r_pairlist_find(r_obj* node, r_obj* tag) { while (node != r_null) { if (r_node_tag(node) == tag) { return node; } node = r_node_cdr(node); } return r_null; } static inline r_obj* r_pairlist_get(r_obj* node, r_obj* tag) { return r_node_car(r_pairlist_find(node, tag)); } static inline r_obj* r_pairlist_tail(r_obj* x) { r_obj* cdr = r_null; while ((cdr = r_node_cdr(x)) != r_null) { x = cdr; } return x; } r_obj* r_node_tree_clone(r_obj* x); #endif vctrs/src/rlang/cpp/0000755000176200001440000000000015157273666014111 5ustar liggesusersvctrs/src/rlang/cpp/rlang.cpp0000644000176200001440000000002315157273666015713 0ustar liggesusers#include "vec.cpp" vctrs/src/rlang/cpp/vec.cpp0000644000176200001440000000073115157273666015373 0ustar liggesusers#include #include extern "C" { int* r_int_unique0(int* v_data, r_ssize size) { try { return std::unique(v_data, v_data + size); } catch (...) { rcc_abort("r_int_unique0"); } } bool r_list_all_of0(r_obj* const * v_first, r_ssize size, bool (*predicate)(r_obj* x)) { try { return std::all_of(v_first, v_first + size, predicate); } catch (...) { rcc_abort("r_list_all_of"); } } } vctrs/src/rlang/call.h0000644000176200001440000000067215157273666014420 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_LANG_H #define RLANG_LANG_H #include "rlang-types.h" #define r_new_call Rf_lcons #define r_call Rf_lang1 #define r_call2 Rf_lang2 #define r_call3 Rf_lang3 #define r_call4 Rf_lang4 #define r_call5 Rf_lang5 bool r_is_call(r_obj* x, const char* name); bool r_is_call_any(r_obj* x, const char** names, int n); r_obj* r_expr_protect(r_obj* x); r_obj* r_call_clone(r_obj* x); #endif vctrs/src/rlang/fn.c0000644000176200001440000000161415157273666014100 0ustar liggesusers#include "rlang.h" r_obj* rlang_formula_formals = NULL; r_obj* r_as_function(r_obj* x, const char* arg) { switch (r_typeof(x)) { case R_TYPE_closure: case R_TYPE_builtin: case R_TYPE_special: return x; case R_TYPE_call: if (r_node_car(x) == r_syms.tilde && r_node_cddr(x) == r_null) { r_obj* env = r_attrib_get(x, r_syms.dot_environment); if (env == r_null) { r_abort("Can't transform formula to function because it doesn't have an environment."); } return r_new_function(rlang_formula_formals, r_node_cadr(x), env); } // else fallthrough; default: r_abort("Can't convert `%s` to a function", arg); } } void r_init_library_fn(void) { const char* formals_code = "formals(function(..., .x = ..1, .y = ..2, . = ..1) NULL)"; rlang_formula_formals = r_parse_eval(formals_code, r_envs.base); r_preserve_global(rlang_formula_formals); } vctrs/src/rlang/formula.c0000644000176200001440000000343415157273666015144 0ustar liggesusers#include "rlang.h" r_obj* r_f_rhs(r_obj* f) { if (r_typeof(f) != LANGSXP) { r_abort("`x` must be a formula"); } switch (r_length(f)) { case 2: return r_node_cadr(f); case 3: return CADDR(f); default: r_abort("Invalid formula"); } } r_obj* r_f_lhs(r_obj* f) { if (r_typeof(f) != LANGSXP) { r_abort("`x` must be a formula"); } switch (r_length(f)) { case 2: return r_null; case 3: return r_node_cadr(f); default: r_abort("Invalid formula"); } } r_obj* r_f_env(r_obj* f) { return r_attrib_get(f, r_sym(".Environment")); } bool r_f_has_env(r_obj* f) { return r_is_environment(r_f_env(f)); } bool r_is_formula(r_obj* x, int scoped, int lhs) { if (r_typeof(x) != R_TYPE_call) { return false; } if (r_node_car(x) != r_syms.tilde) { return false; } if (scoped >= 0) { bool has_env = r_typeof(r_f_env(x)) == R_TYPE_environment; bool has_class = r_inherits(x, "formula"); if (scoped != (has_env && has_class)) { return false; } } if (lhs >= 0) { int has_lhs = r_length(x) > 2; if (lhs != has_lhs) { return false; } } return true; } r_obj* new_raw_formula(r_obj* lhs, r_obj* rhs, r_obj* env) { static r_obj* tilde_sym = NULL; if (!tilde_sym) { tilde_sym = r_sym("~"); } if (!r_is_environment(env) && env != r_null) { r_abort("`env` must be an environment"); } r_obj* f; r_obj* args; if (lhs == r_null) { args = KEEP(r_pairlist(rhs)); } else { args = KEEP(r_pairlist2(lhs, rhs)); } f = KEEP(r_new_call(tilde_sym, args)); r_attrib_poke(f, r_sym(".Environment"), env); FREE(2); return f; } r_obj* r_new_formula(r_obj* lhs, r_obj* rhs, r_obj* env) { r_obj* f = KEEP(new_raw_formula(lhs, rhs, env)); r_attrib_poke_class(f, r_chr("formula")); FREE(1); return f; } vctrs/src/rlang/globals.c0000644000176200001440000000642115157273666015121 0ustar liggesusers#include "rlang-types.h" #include "sym.h" struct r_globals r_globals; struct r_globals_chrs r_chrs; struct r_globals_classes r_classes; struct r_globals_strs r_strs; struct r_globals_syms r_syms; struct r_globals_envs r_envs; r_obj* r_true = NULL; r_obj* r_false = NULL; void r_init_library_globals(r_obj* ns) { r_preserve_global(r_classes.data_frame = r_chr("data.frame")); const char* v_tibble_class[] = { "tbl_df", "tbl", "data.frame" }; r_preserve_global(r_globals.empty_lgl = r_alloc_logical(0)); r_preserve_global(r_globals.empty_int = r_alloc_integer(0)); r_preserve_global(r_globals.empty_dbl = r_alloc_double(0)); r_preserve_global(r_globals.empty_cpl = r_alloc_complex(0)); r_preserve_global(r_globals.empty_raw = r_alloc_raw(0)); r_preserve_global(r_globals.empty_chr = r_alloc_character(0)); r_preserve_global(r_globals.empty_list = r_alloc_list(0)); r_globals.na_lgl = NA_LOGICAL; r_globals.na_int = NA_INTEGER; r_globals.na_dbl = NA_REAL; r_globals.na_cpl = (r_complex) { .r = NA_REAL, .i = NA_REAL }; r_globals.na_str = NA_STRING; r_preserve_global(r_chrs.empty_string = r_chr("")); r_preserve_global(r_chrs.full = r_chr("full")); r_classes.tibble = r_chr_n(v_tibble_class, R_ARR_SIZEOF(v_tibble_class)); r_preserve_global(r_classes.tibble); r_strs.dots = r_sym_string(r_syms.dots); r_strs.condition = r_sym_string(r_syms.condition); r_strs.empty = r_chr_get(r_chrs.empty_string, 0); r_strs.error = r_sym_string(r_syms.error); r_strs.interrupt = r_sym_string(r_syms.interrupt); r_strs.na = r_globals.na_str; r_strs.message = r_sym_string(r_syms.message); r_strs.warning = r_sym_string(r_syms.warning); r_preserve_global(r_false = r_lgl(0)); r_preserve_global(r_true = r_lgl(1)); r_envs.empty = R_EmptyEnv; r_envs.base = R_BaseEnv; r_envs.global = R_GlobalEnv; r_envs.ns = ns; } void r_init_library_globals_syms(void) { r_syms.abort = r_sym("abort"); r_syms.arg = r_sym("arg"); r_syms.brace = R_BraceSymbol; r_syms.brackets = R_BracketSymbol; r_syms.brackets2 = R_Bracket2Symbol; r_syms.call = r_sym("call"); r_syms.class_ = R_ClassSymbol; r_syms.colon2 = R_DoubleColonSymbol; r_syms.colon3 = R_TripleColonSymbol; r_syms.condition = r_sym("condition"); r_syms.dots = R_DotsSymbol; r_syms.error = r_sym("error"); r_syms.error_arg = r_sym("error_arg"); r_syms.error_call = r_sym("error_call"); r_syms.error_call_flag = r_sym(".__error_call__."); r_syms.expr = r_sym("expr"); r_syms.interrupt = r_sym("interrupt"); r_syms.missing = R_MissingArg; r_syms.message = r_sym("message"); r_syms.names = R_NamesSymbol; r_syms.options = r_sym("options"); r_syms.dim = R_DimSymbol; r_syms.dim_names = R_DimNamesSymbol; r_syms.row_names = R_RowNamesSymbol; r_syms.stack_overflow_error = r_sym("stackOverflowError"); r_syms.unbound = R_UnboundValue; r_syms.warning = r_sym("warning"); r_syms.dot_environment = r_sym(".Environment"); r_syms.dot_fn = r_sym(".fn"); r_syms.dot_x = r_sym(".x"); r_syms.dot_y = r_sym(".y"); r_syms.function = r_sym("function"); r_syms.srcfile = r_sym("srcfile"); r_syms.srcref = r_sym("srcref"); r_syms.tilde = r_sym("~"); r_syms.w = r_sym("w"); r_syms.wholeSrcref = r_sym("wholeSrcref"); r_syms.x = r_sym("x"); r_syms.y = r_sym("y"); r_syms.z = r_sym("z"); } vctrs/src/rlang/env.c0000644000176200001440000001563715157273666014277 0ustar liggesusers#include "rlang.h" #include "decl/env-decl.h" r_obj* rlang_ns_env; r_obj* r_ns_env(const char* pkg) { r_obj* pkg_sym = r_sym(pkg); if (!r_env_has(R_NamespaceRegistry, pkg_sym)) { r_abort("Can't find namespace `%s`", pkg); } return r_env_get(R_NamespaceRegistry, pkg_sym); } r_obj* r_base_ns_get(const char* name) { return r_env_get(r_envs.base, r_sym(name)); } r_obj* rlang_ns_get(const char* name) { return r_env_get(rlang_ns_env, r_sym(name)); } r_obj* r_alloc_environment(r_ssize size, r_obj* parent) { #if R_VERSION < R_Version(4, 1, 0) parent = parent ? parent : r_envs.empty; r_node_poke_car(new_env__parent_node, parent); size = size ? size : 29; r_node_poke_car(new_env__size_node, r_int(size)); r_obj* env = r_eval(new_env_call, r_envs.base); // Free for gc r_node_poke_car(new_env__parent_node, r_null); return env; #else const int hash = 1; return R_NewEnv(parent, hash, size); #endif } r_obj* r_env_as_list(r_obj* env) { return eval_with_x(env2list_call, env); } r_obj* r_env_clone(r_obj* env, r_obj* parent) { if (parent == NULL) { parent = r_env_parent(env); } // This better reproduces the behaviour of `list2env()` which in // turn affects how bindings are stored in the hash table and the // default sort of the character vector generated by `names()`. size_t size = R_MAX(r_length(env), 29); r_obj* out = KEEP(r_alloc_environment(size, parent)); r_env_coalesce(out, env); FREE(1); return out; } void r_env_coalesce(r_obj* env, r_obj* from) { r_obj* syms = KEEP(r_env_syms(from)); r_obj* types = KEEP(r_env_binding_types(from, syms)); if (types == r_null) { env_coalesce_plain(env, from, syms); FREE(2); return; } r_ssize n = r_length(syms); r_obj* const * v_syms = r_list_cbegin(syms); enum r_env_binding_type* v_types = (enum r_env_binding_type*) r_int_begin(types); for (r_ssize i = 0; i < n; ++i) { r_obj* sym = v_syms[i]; if (r_env_has(env, sym)) { continue; } switch (v_types[i]) { case R_ENV_BINDING_TYPE_unbound: break; case R_ENV_BINDING_TYPE_value: r_env_bind(env, sym, KEEP(r_env_get(from, sym))); FREE(1); break; case R_ENV_BINDING_TYPE_delayed: r_env_bind_delayed( env, sym, KEEP(r_env_binding_delayed_expr(from, sym)), KEEP(r_env_binding_delayed_env(from, sym)) ); FREE(2); break; case R_ENV_BINDING_TYPE_forced: r_env_bind_forced( env, sym, KEEP(r_env_binding_forced_expr(from, sym)), KEEP(r_env_get(from, sym)) ); FREE(2); break; case R_ENV_BINDING_TYPE_missing: r_env_bind_missing(env, sym); break; case R_ENV_BINDING_TYPE_active: r_env_bind_active(env, sym, KEEP(r_env_binding_active_fn(from, sym))); FREE(1); break; } } FREE(2); return; } static void env_coalesce_plain(r_obj* env, r_obj* from, r_obj* syms) { r_ssize n = r_length(syms); r_obj* const * v_syms = r_list_cbegin(syms); for (r_ssize i = 0; i < n; ++i) { r_obj* sym = v_syms[i]; if (r_env_has(env, sym)) { continue; } r_env_bind(env, sym, KEEP(r_env_get(from, sym))); FREE(1); } return; } r_obj* r_list_as_environment(r_obj* x, r_obj* parent) { parent = parent ? parent : r_envs.empty; return eval_with_xy(list2env_call, x, parent); } #if RLANG_USE_R_EXISTS bool r__env_has(r_obj* env, r_obj* sym) { // `exists("")` errors on older R if (sym == R_MissingArg) { return Rf_findVarInFrame3(env, sym, FALSE) != r_syms.unbound; } r_obj* nm = KEEP(r_sym_as_utf8_character(sym)); r_obj* out = eval_with_xyz(exists_call, env, nm, r_false); FREE(1); return r_as_bool(out); } bool r__env_has_anywhere(r_obj* env, r_obj* sym) { // `exists("")` errors on older R if (sym == R_MissingArg) { return Rf_findVar(sym, env) != r_syms.unbound; } r_obj* nm = KEEP(r_sym_as_utf8_character(sym)); r_obj* out = eval_with_xyz(exists_call, env, nm, r_true); FREE(1); return r_as_bool(out); } #endif bool r_env_inherits(r_obj* env, r_obj* ancestor, r_obj* top) { top = top ? top : r_envs.empty; if (r_typeof(env) != R_TYPE_environment) { r_abort("`env` must be an environment"); } if (r_typeof(ancestor) != R_TYPE_environment) { r_abort("`ancestor` must be an environment"); } if (r_typeof(top) != R_TYPE_environment) { r_abort("`top` must be an environment"); } if (env == r_envs.empty) { return false; } while (env != top && env != r_envs.empty) { if (env == ancestor) { return true; } env = r_env_parent(env); } return env == ancestor; } r_obj* r_env_until(r_obj* env, r_obj* sym, r_obj* last) { r_obj* stop = r_envs.empty; if (last != r_envs.empty) { stop = r_env_parent(last); } while (true) { if (env == r_envs.empty) { return r_envs.empty; } if (r_env_has(env, sym)) { return env; } r_obj* next = r_env_parent(env); if (next == r_envs.empty || next == stop) { return r_envs.empty; } env = next; } } r_obj* r_env_get_anywhere(r_obj* env, r_obj* sym) { env = r_env_until(env, sym, r_envs.empty); return r_env_get(env, sym); } r_obj* r_env_get_until(r_obj* env, r_obj* sym, r_obj* last) { env = r_env_until(env, sym, last); return r_env_get(env, sym); } bool r_env_has_until(r_obj* env, r_obj* sym, r_obj* last) { env = r_env_until(env, sym, last); return r_env_has(env, sym); } bool r_env_has_missing(r_obj* env, r_obj* sym) { // That's a special primitive so no need to protect `sym` r_obj* call = KEEP(r_call2(missing_prim, sym)); r_obj* out = r_eval(call, env); FREE(1); return r_as_bool(out); } void r_init_rlang_ns_env(void) { rlang_ns_env = r_ns_env("rlang"); } void r_init_library_env(void) { #if R_VERSION < R_Version(4, 1, 0) new_env_call = r_parse_eval("as.call(list(new.env, TRUE, NULL, NULL))", r_envs.base); r_preserve(new_env_call); new_env__parent_node = r_node_cddr(new_env_call); new_env__size_node = r_node_cdr(new_env__parent_node); #endif missing_prim = r_parse_eval("missing", r_envs.base); env2list_call = r_parse("as.list.environment(x, all.names = TRUE)"); r_preserve(env2list_call); list2env_call = r_parse("list2env(x, envir = NULL, parent = y, hash = TRUE)"); r_preserve(list2env_call); exists_call = r_parse("exists(y, envir = x, inherits = z)"); r_preserve(exists_call); remove_call = r_parse("remove(list = y, envir = x, inherits = z)"); r_preserve(remove_call); r_methods_ns_env = r_parse_eval("asNamespace('methods')", r_envs.base); } r_obj* rlang_ns_env = NULL; r_obj* r_methods_ns_env = NULL; #if R_VERSION < R_Version(4, 1, 0) static r_obj* new_env_call = NULL; static r_obj* new_env__parent_node = NULL; static r_obj* new_env__size_node = NULL; #endif static r_obj* exists_call = NULL; static r_obj* remove_call = NULL; static r_obj* env2list_call = NULL; static r_obj* list2env_call = NULL; static r_obj* missing_prim = NULL; vctrs/src/rlang/vec-lgl.h0000644000176200001440000000034315157273666015031 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_VECTOR_LGL_H #define RLANG_VECTOR_LGL_H #include "rlang-types.h" r_ssize r_lgl_sum(r_obj* x, bool na_true); r_obj* r_lgl_which(r_obj* x, bool na_propagate); #endif vctrs/src/rlang/c-utils.h0000644000176200001440000001077715157273666015074 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_C_UTILS_H #define RLANG_C_UTILS_H #include "rlang-types.h" #include #include #include #include #include "cnd.h" #define R_ARR_SIZEOF(X) sizeof(X) / sizeof(X[0]) #define R_MIN(a, b) ((a) < (b) ? (a) : (b)) #define R_MAX(a, b) ((a) > (b) ? (a) : (b)) // Like `memset()` with support for multi-byte types #define R_MEM_SET(TYPE, PTR, VALUE, N) do { \ TYPE* v = (PTR); \ TYPE value = (VALUE); \ size_t n = (N); \ for (size_t i = 0; i < n; ++i) { \ v[i] = value; \ } \ } while(0) void* r_shelter_deref(r_obj* x); // Allow integers up to 2^52, same as R_XLEN_T_MAX when long vector // support is enabled #define RLANG_MAX_DOUBLE_INT 4503599627370496 #define RLANG_MIN_DOUBLE_INT -4503599627370496 static inline bool r_dbl_is_whole(double x) { if (x > RLANG_MAX_DOUBLE_INT || x < RLANG_MIN_DOUBLE_INT) { return false; } // C99 guarantees existence of the int_least_N_t types, even on // machines that don't support arithmetic on width N: if (x != (int_least64_t) x) { return false; } return true; } // Adapted from CERT C coding standards static inline intmax_t r__intmax_add(intmax_t x, intmax_t y) { if ((y > 0 && x > (INTMAX_MAX - y)) || (y < 0 && x < (INTMAX_MIN - y))) { r_stop_internal("Values too large to be added."); } return x + y; } static inline intmax_t r__intmax_subtract(intmax_t x, intmax_t y) { if ((y > 0 && x < (INTMAX_MIN + y)) || (y < 0 && x > (INTMAX_MAX + y))) { r_stop_internal("Subtraction resulted in overflow or underflow."); } return x - y; } static inline r_ssize r_ssize_add(r_ssize x, r_ssize y) { intmax_t out = r__intmax_add(x, y); if (out > R_SSIZE_MAX) { r_stop_internal("Result too large for an `r_ssize`."); } return (r_ssize) out; } static inline r_ssize r_ssize_mult(r_ssize x, r_ssize y) { if (x > 0) { if (y > 0) { if (x > (R_SSIZE_MAX / y)) { goto error; } } else { if (y < (R_SSIZE_MIN / x)) { goto error; } } } else { if (y > 0) { if (x < (R_SSIZE_MIN / y)) { goto error; } } else { if ( (x != 0) && (y < (R_SSIZE_MAX / x))) { goto error; } } } return x * y; error: r_stop_internal("Result too large for an `r_ssize`."); } static inline int r_int_min(int x, int y) { return (y < x) ? y : x; } static inline int r_int_max(int x, int y) { return (y < x) ? x : y; } static inline r_ssize r_ssize_min(r_ssize x, r_ssize y) { return (y < x) ? y : x; } static inline r_ssize r_ssize_max(r_ssize x, r_ssize y) { return (y < x) ? x : y; } static inline int r_ssize_as_integer(r_ssize x) { if (x > INT_MAX || x < INT_MIN) { r_stop_internal("Result can't be represented as `int`."); } return (int) x; } static inline double r_ssize_as_double(r_ssize x) { if (x > DBL_MAX || x < -DBL_MAX) { r_stop_internal("Result can't be represented as `double`."); } return (double) x; } static inline r_ssize r_double_as_ssize(double x) { if (x > R_SSIZE_MAX || x < R_SSIZE_MIN) { r_stop_internal("Result can't be represented as `r_ssize`."); } return (r_ssize) x; } static inline double r_double_mult(double x, double y) { double out = x * y; if (!isfinite(out)) { r_stop_internal("Can't multiply double values."); } return out; } // Slightly safer version of `memcpy()` for use with R object memory // // Prefer this over `memcpy()`, especially when providing pointers to R object // memory. As of R 4.5.0, `DATAPTR()` and friends return `(void*) 1` on 0-length // R objects, so we must be extremely careful to never use dereference those // pointers. In particular, it is not safe to call `memcpy(dest, src, 0)` on // some machines (likely with sanitizers active) when either `dest` or `src` // resolve to `(void*) 1`. // // https://github.com/r-lib/vctrs/pull/1968 // https://github.com/r-devel/r-svn/blob/9976c3d7f08c754593d01ba8380afb6be803dde2/src/main/memory.c#L4137-L4150 static inline void r_memcpy(void* dest, const void* src, size_t count) { if (count) { memcpy(dest, src, count); } } // Slightly safer version of `memset()` for use with R object memory // // See `r_memcpy()` for rationale static inline void r_memset(void* dest, int value, size_t count) { if (count) { memset(dest, value, count); } } #endif vctrs/src/rlang/session.c0000644000176200001440000000276015157273666015163 0ustar liggesusers#include "rlang.h" r_obj* eval_with_x(r_obj* call, r_obj* x); static r_obj* is_installed_call = NULL; bool r_is_installed(const char* pkg) { r_obj* installed = eval_with_x(is_installed_call, KEEP(r_chr(pkg))); bool out = *r_lgl_begin(installed); FREE(1); return out; } static r_obj* has_colour_call = NULL; bool r_has_colour(void) { if (!r_is_installed("crayon")) { return false; } return *r_lgl_begin(r_eval(has_colour_call, r_envs.base)); } void r_init_library_session(void) { is_installed_call = r_parse("requireNamespace(x, quietly = TRUE)"); r_preserve(is_installed_call); has_colour_call = r_parse("crayon::has_color()"); r_preserve(has_colour_call); } #ifdef _WIN32 # include # include r_obj* r_getppid(void) { DWORD pid = GetCurrentProcessId(); HANDLE handle = NULL; PROCESSENTRY32W pe = { 0 }; pe.dwSize = sizeof(PROCESSENTRY32W); handle = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); if (handle == INVALID_HANDLE_VALUE) { r_abort("Can't query parent pid."); } if (Process32FirstW(handle, &pe)) { do { if (pe.th32ProcessID == pid) { DWORD ppid = pe.th32ParentProcessID; CloseHandle(handle); return r_int(ppid); } } while (Process32NextW(handle, &pe)); } /* Should not get here */ CloseHandle(handle); r_stop_internal("Can't find my own process."); return r_null; } #else # include r_obj* r_getppid(void) { return r_int(getppid()); } #endif vctrs/src/rlang/sym.c0000644000176200001440000000263115157273666014305 0ustar liggesusers#include #include "rlang.h" // In old R versions `as.name()` does not translate to native which // loses the encoding. This symbol constructor always translates. r_obj* r_new_symbol(r_obj* x, int* err) { switch (r_typeof(x)) { case SYMSXP: return x; case R_TYPE_character: if (r_length(x) == 1) { const char* string = Rf_translateChar(r_chr_get(x, 0)); return r_sym(string); } // else fallthrough default: { if (err) { *err = -1; return r_null; } else { const char* type = r_type_as_c_string(r_typeof(x)); r_abort("Can't create a symbol with a %s", type); } }} } bool r_is_symbol(r_obj* x, const char* string) { if (r_typeof(x) != SYMSXP) { return false; } else { return strcmp(CHAR(PRINTNAME(x)), string) == 0; } } bool r_is_symbol_any(r_obj* x, const char** strings, int n) { if (r_typeof(x) != SYMSXP) { return false; } const char* name = CHAR(PRINTNAME(x)); for (int i = 0; i < n; ++i) { if (strcmp(name, strings[i]) == 0) { return true; } } return false; } r_obj* (*r_sym_as_utf8_character)(r_obj* x) = NULL; r_obj* (*r_sym_as_utf8_string)(r_obj* x) = NULL; void r_init_library_sym(void) { r_sym_as_utf8_character = (r_obj* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_sym_as_character"); r_sym_as_utf8_string = (r_obj* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_sym_as_string"); } vctrs/src/rlang/vec-lgl.c0000644000176200001440000000556215157273666015034 0ustar liggesusers#include "rlang.h" #include r_ssize r_lgl_sum(r_obj* x, bool na_true) { if (r_typeof(x) != R_TYPE_logical) { r_abort("Internal error: Excepted logical vector in `r_lgl_sum()`"); } const r_ssize n = r_length(x); const int* v_x = r_lgl_cbegin(x); // This can't overflow since `sum` is necessarily smaller or equal // to the vector length expressed in `r_ssize` r_ssize sum = 0; if (na_true) { for (r_ssize i = 0; i < n; ++i) { sum += (bool) v_x[i]; } } else { for (r_ssize i = 0; i < n; ++i) { sum += (v_x[i] == 1); } } return sum; } r_obj* r_lgl_which(r_obj* x, bool na_propagate) { const enum r_type type = r_typeof(x); if (type != R_TYPE_logical) { r_stop_unexpected_type(type); } const r_ssize n = r_length(x); const int* v_x = r_lgl_cbegin(x); const r_ssize out_n = r_lgl_sum(x, na_propagate); if (out_n > INT_MAX) { r_stop_internal("Can't fit result in an integer vector."); } r_obj* out = KEEP(r_alloc_integer(out_n)); int* v_out = r_int_begin(out); r_obj* names = r_names(x); const bool has_names = (names != r_null); if (na_propagate) { if (has_names) { // Mark `NA` locations with negative location for extracting names later for (r_ssize i = 0, j = 0; i < n && j < out_n; ++i) { const int x_elt = v_x[i]; const bool missing = x_elt == r_globals.na_lgl; const int elt = missing * (-i - 1) + !missing * x_elt * (i + 1); v_out[j] = elt; j += (bool) elt; } } else { for (r_ssize i = 0, j = 0; i < n && j < out_n; ++i) { const int x_elt = v_x[i]; const bool missing = x_elt == r_globals.na_lgl; const int elt = missing * r_globals.na_int + !missing * x_elt * (i + 1); v_out[j] = elt; j += (bool) elt; } } } else { for (r_ssize i = 0, j = 0; i < n && j < out_n; ++i) { const int x_elt = v_x[i]; v_out[j] = i + 1; j += (x_elt == 1); } } if (has_names) { r_obj* const* v_names = r_chr_cbegin(names); r_obj* out_names = r_alloc_character(out_n); r_attrib_poke_names(out, out_names); if (na_propagate) { // `v_out` contains negative locations which tells you the location of the // name to extract while also serving as a signal of where `NA`s should go // in the finalized output for (r_ssize i = 0; i < out_n; ++i) { const int loc = v_out[i]; const int abs_loc = abs(loc); const bool same = (loc == abs_loc); v_out[i] = same * loc + !same * r_globals.na_int; r_chr_poke(out_names, i, v_names[abs_loc - 1]); } } else { // `v_out` doesn't contain `NA`, so we can use the locations directly for (r_ssize i = 0; i < out_n; ++i) { const int loc = v_out[i] - 1; r_chr_poke(out_names, i, v_names[loc]); } } } FREE(1); return out; } vctrs/src/rlang/stack.c0000644000176200001440000000504515157273666014604 0ustar liggesusers#include "rlang.h" #include "decl/stack-decl.h" void r_on_exit(r_obj* expr, r_obj* frame) { static r_obj* on_exit_prim = NULL; if (!on_exit_prim) { on_exit_prim = r_base_ns_get("on.exit"); } r_obj* args = r_pairlist2(expr, r_lgl(1)); r_obj* lang = KEEP(r_new_call(on_exit_prim, args)); r_eval(lang, frame); FREE(1); } r_obj* r_peek_frame(void) { return r_eval(peek_frame_call, r_envs.base); } r_obj* r_caller_env(r_obj* n) { if (r_typeof(n) != R_TYPE_environment) { r_stop_internal("`n` must be an environment."); } return r_eval(caller_env_call, n); } static r_obj* sys_frame_call = NULL; static r_obj* sys_call_call = NULL; static int* sys_frame_n_addr = NULL; static int* sys_call_n_addr = NULL; r_obj* r_sys_frame(int n, r_obj* frame) { int n_kept = 0; if (!frame) { frame = r_peek_frame(); KEEP_N(frame, &n_kept); } *sys_frame_n_addr = n; SEXP value = r_eval(sys_frame_call, frame); FREE(n_kept); return value; } r_obj* r_sys_call(int n, r_obj* frame) { int n_kept = 0; if (!frame) { frame = r_peek_frame(); KEEP_N(frame, &n_kept); } *sys_call_n_addr = n; SEXP value = r_eval(sys_call_call, frame); FREE(n_kept); return value; } static r_obj* generate_sys_call(const char* name, int** n_addr) { r_obj* sys_n = KEEP(r_int(0)); *n_addr = r_int_begin(sys_n); r_obj* sys_args = KEEP(r_new_node(sys_n, r_null)); r_obj* sys_call = KEEP(r_new_call(r_base_ns_get(name), sys_args)); r_preserve(sys_call); FREE(3); return sys_call; } void r_init_library_stack(void) { // `sys.frame(sys.nframe())` doesn't work because `sys.nframe()` // returns the number of the frame in which evaluation occurs. It // doesn't return the number of frames on the stack. So we'd need // to evaluate it in the last frame on the stack which is what we // are looking for to begin with. We use instead this workaround: // Call `sys.frame()` from a closure to push a new frame on the // stack, and use negative indexing to get the previous frame. r_obj* current_frame_body = KEEP(r_parse("sys.frame(-1)")); r_obj* current_frame_fn = KEEP(r_new_function(r_null, current_frame_body, r_envs.base)); peek_frame_call = r_new_call(current_frame_fn, r_null); r_preserve(peek_frame_call); FREE(2); sys_frame_call = generate_sys_call("sys.frame", &sys_frame_n_addr); sys_call_call = generate_sys_call("sys.call", &sys_call_n_addr); caller_env_call = r_parse("parent.frame()"); r_preserve_global(caller_env_call); } static r_obj* peek_frame_call = NULL; static r_obj* caller_env_call = NULL; vctrs/src/rlang/eval.c0000644000176200001440000001100715157273666014421 0ustar liggesusers#include "rlang.h" r_obj* r_eval_with_x(r_obj* call, r_obj* x, r_obj* parent) { r_obj* env = KEEP(r_alloc_environment(1, parent)); r_env_bind(env, r_syms.x, x); r_obj* out = r_eval(call, env); FREE(1); return out; } r_obj* r_eval_with_xy(r_obj* call, r_obj* x, r_obj* y, r_obj* parent) { r_obj* env = KEEP(r_alloc_environment(2, parent)); r_env_bind(env, r_syms.x, x); r_env_bind(env, r_syms.y, y); r_obj* out = r_eval(call, env); FREE(1); return out; } r_obj* r_eval_with_xyz(r_obj* call, r_obj* x, r_obj* y, r_obj* z, r_obj* parent) { r_obj* env = KEEP(r_alloc_environment(3, parent)); r_env_bind(env, r_syms.x, x); r_env_bind(env, r_syms.y, y); r_env_bind(env, r_syms.z, z); r_obj* out = r_eval(call, env); FREE(1); return out; } r_obj* r_eval_with_wxyz(r_obj* call, r_obj* w, r_obj* x, r_obj* y, r_obj* z, r_obj* parent) { r_obj* env = KEEP(r_alloc_environment(4, parent)); r_env_bind(env, r_syms.w, w); r_env_bind(env, r_syms.x, x); r_env_bind(env, r_syms.y, y); r_env_bind(env, r_syms.z, z); r_obj* out = r_eval(call, env); FREE(1); return out; } // Evaluate call with a preallocated environment containing a single // `x` binding and inheriting from base env. // // Since this has side effects, it should not be used when there is a // chance of recursing into the C library. It should only be used to // evaluate pure R calls or functions from other packages, such as the // base package. static r_obj* shared_x_env; static r_obj* shared_xy_env; static r_obj* shared_xyz_env; r_obj* eval_with_x(r_obj* call, r_obj* x) { r_env_bind(shared_x_env, r_syms.x, x); r_obj* out = KEEP(r_eval(call, shared_x_env)); // Release for gc r_env_bind(shared_x_env, r_syms.x, r_null); FREE(1); return out; } r_obj* eval_with_xy(r_obj* call, r_obj* x, r_obj* y) { r_env_bind(shared_xy_env, r_syms.x, x); r_env_bind(shared_xy_env, r_syms.y, y); r_obj* out = KEEP(r_eval(call, shared_xy_env)); // Release for gc r_env_bind(shared_xy_env, r_syms.x, r_null); r_env_bind(shared_xy_env, r_syms.y, r_null); FREE(1); return out; } r_obj* eval_with_xyz(r_obj* call, r_obj* x, r_obj* y, r_obj* z) { r_env_bind(shared_xyz_env, r_syms.x, x); r_env_bind(shared_xyz_env, r_syms.y, y); r_env_bind(shared_xyz_env, r_syms.z, z); r_obj* out = KEEP(r_eval(call, shared_xyz_env)); // Release for gc r_env_bind(shared_xyz_env, r_syms.x, r_null); r_env_bind(shared_xyz_env, r_syms.y, r_null); r_env_bind(shared_xyz_env, r_syms.z, r_null); FREE(1); return out; } r_obj* r_exec_mask_n(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* parent) { r_obj* mask = KEEP(r_alloc_environment(n + 1, parent)); r_obj* call = KEEP(r_exec_mask_n_call_poke(fn_sym, fn, args, n, mask)); r_obj* out = r_eval(call, mask); FREE(2); return out; } r_obj* r_exec_n(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* env) { r_obj* call = KEEP(r_exec_mask_n_call_poke(fn_sym, fn, args, n, env)); r_obj* out = r_eval(call, env); FREE(1); return out; } // Create a call from arguments and poke elements with a non-NULL // symbol in `env`. Symbolic arguments are protected from evaluation // with `quote()`. r_obj* r_exec_mask_n_call_poke(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* env) { if (fn_sym != r_null) { r_env_bind(env, fn_sym, fn); fn = fn_sym; } r_obj* list = KEEP(r_new_pairlist(args, n, NULL)); r_obj* node = list; while (node != r_null) { r_obj* car = r_node_car(node); r_obj* tag = r_node_tag(node); if (tag == r_null) { // If symbol is not supplied, protect symbolic arguments from // evaluation. If supplied this is not needed because of the // masking. r_node_poke_car(node, r_expr_protect(car)); } else { // If symbol is supplied, assign the value in the environment and // use the symbol instead of the value in the list of arguments r_env_bind(env, tag, car); r_node_poke_car(node, tag); } node = r_node_cdr(node); } r_obj* call = r_new_call(fn, list); FREE(1); return call; } void r_init_library_eval(void) { r_lazy_missing_arg = (struct r_lazy) { .x = r_missing_arg, .env = r_null }; } struct r_lazy r_lazy_null = { 0 }; struct r_lazy r_lazy_missing_arg = { 0 }; vctrs/src/rlang/dyn-array.c0000644000176200001440000000575315157273666015413 0ustar liggesusers#include #include "dyn-array.h" #define R_DYN_ARRAY_GROWTH_FACTOR 2 static r_obj* dyn_array_class = NULL; struct r_dyn_array* r_new_dyn_vector(enum r_type type, r_ssize capacity) { r_obj* shelter = KEEP(r_alloc_list(2)); r_attrib_poke_class(shelter, dyn_array_class); r_obj* vec_raw = r_alloc_raw(sizeof(struct r_dyn_array)); r_list_poke(shelter, 0, vec_raw); r_obj* vec_data = r_alloc_vector(type, capacity); r_list_poke(shelter, 1, vec_data); struct r_dyn_array* p_vec = r_raw_begin(vec_raw); p_vec->shelter = shelter; p_vec->count = 0; p_vec->capacity = capacity; p_vec->growth_factor = R_DYN_ARRAY_GROWTH_FACTOR; p_vec->type = type; p_vec->elt_byte_size = r_vec_elt_sizeof0(type); p_vec->data = vec_data; switch (type) { case R_TYPE_character: p_vec->v_data = NULL; p_vec->barrier_set = &r_chr_poke; break; case R_TYPE_list: p_vec->v_data = NULL; p_vec->barrier_set = &r_list_poke; break; default: p_vec->barrier_set = NULL; p_vec->v_data = r_vec_begin0(type, vec_data); break; } p_vec->v_data_const = r_vec_cbegin0(type, vec_data); FREE(1); return p_vec; } r_obj* r_dyn_unwrap(struct r_dyn_array* p_arr) { if (p_arr->type == R_TYPE_raw) { return r_raw_resize(p_arr->data, p_arr->count * p_arr->elt_byte_size); } else { return r_vec_resize0(p_arr->type, p_arr->data, p_arr->count); } } struct r_dyn_array* r_new_dyn_array(r_ssize elt_byte_size, r_ssize capacity) { r_ssize arr_byte_size = r_ssize_mult(capacity, elt_byte_size); struct r_dyn_array* p_arr = r_new_dyn_vector(R_TYPE_raw, arr_byte_size); p_arr->capacity = capacity; p_arr->elt_byte_size = elt_byte_size; return p_arr; } void r_dyn_push_back(struct r_dyn_array* p_arr, const void* p_elt) { r_ssize loc = r__dyn_increment(p_arr); if (p_arr->barrier_set) { r_obj* value = *((r_obj* const *) p_elt); p_arr->barrier_set(p_arr->data, loc, value); } else if (p_elt) { r_memcpy(r_dyn_last(p_arr), p_elt, p_arr->elt_byte_size); } else { r_memset(r_dyn_last(p_arr), 0, p_arr->elt_byte_size); } } void r_dyn_resize(struct r_dyn_array* p_arr, r_ssize capacity) { enum r_type type = p_arr->type; r_ssize capacity_multiplier = p_arr->type == R_TYPE_raw ? r_ssize_mult(p_arr->elt_byte_size, capacity) : capacity; r_obj* data = r_vec_resize0(type, r_list_get(p_arr->shelter, 1), capacity_multiplier); r_list_poke(p_arr->shelter, 1, data); p_arr->count = r_ssize_min(p_arr->count, capacity); p_arr->capacity = capacity; p_arr->data = data; switch (type) { case R_TYPE_character: case R_TYPE_list: break; default: p_arr->v_data = r_vec_begin0(type, data); break; } p_arr->v_data_const = r_vec_cbegin0(type, data); } void r_init_library_dyn_array(void) { r_preserve_global(dyn_array_class = r_chr("rlang_dyn_array")); } vctrs/src/rlang/env-binding.h0000644000176200001440000000303715157273666015703 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_ENV_BINDING_H #define RLANG_ENV_BINDING_H #include "rlang-types.h" enum r_env_binding_type { R_ENV_BINDING_TYPE_unbound = 0, R_ENV_BINDING_TYPE_value = 1, R_ENV_BINDING_TYPE_missing = 2, R_ENV_BINDING_TYPE_delayed = 3, R_ENV_BINDING_TYPE_forced = 4, R_ENV_BINDING_TYPE_active = 5 }; enum r_env_binding_type r_env_binding_type(r_obj* env, r_obj* sym); r_obj* r_env_binding_types(r_obj* env, r_obj* syms); r_obj* r_env_syms(r_obj* env); r_obj* r_env_get(r_obj* env, r_obj* sym); // Binding constructors static inline void r_env_bind(r_obj* env, r_obj* sym, r_obj* value) { // See rchk concerns in https://github.com/r-lib/rlang/commit/28ce7b01 KEEP(value); Rf_defineVar(sym, value, env); FREE(1); } // Silently ignores bindings that are not defined in `env`. static inline void r_env_unbind(r_obj* env, r_obj* sym) { R_removeVarFromFrame(sym, env); } void r_env_bind_active(r_obj* env, r_obj* sym, r_obj* fn); void r_env_bind_delayed(r_obj* env, r_obj* sym, r_obj* expr, r_obj* eval_env); void r_env_bind_forced(r_obj* env, r_obj* sym, r_obj* expr, r_obj* value); void r_env_bind_missing(r_obj* env, r_obj* sym); // Delayed binding accessors r_obj* r_env_binding_delayed_expr(r_obj* env, r_obj* sym); r_obj* r_env_binding_delayed_env(r_obj* env, r_obj* sym); // Forced binding accessors r_obj* r_env_binding_forced_expr(r_obj* env, r_obj* sym); // Active binding accessors r_obj* r_env_binding_active_fn(r_obj* env, r_obj* sym); void r_init_library_env_binding(void); #endif vctrs/src/rlang/rlang.hpp0000644000176200001440000000065515157273666015151 0ustar liggesusers#ifndef RLANG_RLANG_HPP #define RLANG_RLANG_HPP #include #define R_NO_REMAP #include extern "C" { #include } static inline r_no_return void rcc_abort(const char* fn) { try { throw; } catch (const std::exception& err) { r_abort(err.what()); } catch (...) { r_obj* call = KEEP(r_call(r_sym(fn))); (r_stop_internal)("", -1, call, "Caught unknown C++ exception."); } } #endif vctrs/src/rlang/dyn-list-of.h0000644000176200001440000000325615157273666015653 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_DYN_LIST_OF_H #define RLANG_DYN_LIST_OF_H #include "rlang-types.h" #include "dyn-array.h" struct r_dyn_list_of { r_obj* shelter; r_ssize count; r_ssize capacity; int growth_factor; // Contains the addresses and sizes of each element of the // list-of. If you copy that pointer, consider it invalid after a // push because it might have moved in memory due to a resize. struct r_pair_ptr_ssize* v_data; // private: r_ssize width; enum r_type type; r_ssize elt_byte_size; r_obj* reserve; void* v_reserve; struct r_dyn_array* p_moved_arr; struct r_dyn_array* p_moved_shelter_arr; r_obj* arr_locs; r_ssize* v_arr_locs; struct r_dyn_array* p_arrays; }; struct r_dyn_list_of* r_new_dyn_list_of(enum r_type type, r_ssize capacity, r_ssize width); r_obj* r_lof_unwrap(struct r_dyn_list_of* p_lof); void r_lof_push_back(struct r_dyn_list_of* p_lof); void r_lof_arr_push_back(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt); static inline void* r_lof_arr_ptr(struct r_dyn_list_of* p_lof, r_ssize i, r_ssize j) { r_ssize offset = j * p_lof->elt_byte_size; struct r_pair_ptr_ssize* v_arrays = (struct r_pair_ptr_ssize*) r_dyn_pointer(p_lof->p_arrays, i); return ((unsigned char*) v_arrays->ptr) + offset; } static inline void* r_lof_arr_ptr_front(struct r_dyn_list_of* p_lof, r_ssize i) { return r_lof_arr_ptr(p_lof, i, 0); } static inline void* r_lof_arr_ptr_back(struct r_dyn_list_of* p_lof, r_ssize i) { return r_lof_arr_ptr(p_lof, i, p_lof->count - 1); } #endif vctrs/src/rlang/stack.h0000644000176200001440000000057515157273666014614 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_STACK_H #define RLANG_STACK_H #include "rlang-types.h" void r_on_exit(r_obj* expr, r_obj* frame); r_obj* r_peek_frame(void); r_obj* r_caller_env(r_obj* n); r_obj* r_sys_frame(int n, r_obj* frame); r_obj* r_sys_call(int n, r_obj* frame); static inline void r_yield_interrupt(void) { R_CheckUserInterrupt(); } #endif vctrs/src/rlang/env.h0000644000176200001440000000476015157273666014277 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_ENV_H #define RLANG_ENV_H #include "rlang-types.h" #include "cnd.h" #include "globals.h" #include "obj.h" #define RLANG_USE_R_EXISTS (R_VERSION < R_Version(4, 2, 0)) extern r_obj* r_methods_ns_env; static inline r_obj* r_env_names(r_obj* env) { return R_lsInternal3(env, TRUE, FALSE); } static inline r_ssize r_env_length(r_obj* env) { if (r_typeof(env) != R_TYPE_environment) { r_abort("Expected an environment"); } return Rf_xlength(env); } static inline r_obj* r_env_parent(r_obj* env) { if (env == r_envs.empty) { r_stop_internal("Can't take the parent of the empty environment."); } #if R_VERSION >= R_Version(4, 5, 0) return R_ParentEnv(env); #else return ENCLOS(env); #endif } static inline bool r_is_environment(r_obj* x) { return TYPEOF(x) == ENVSXP; } static inline bool r_is_namespace(r_obj* x) { return R_IsNamespaceEnv(x); } r_obj* r_env_until(r_obj* env, r_obj* sym, r_obj* last); r_obj* r_env_get_anywhere(r_obj* env, r_obj* sym); r_obj* r_env_get_until(r_obj* env, r_obj* sym, r_obj* last); bool r_env_has_until(r_obj* env, r_obj* sym, r_obj* last); static inline bool r_env_has(r_obj* env, r_obj* sym) { #if RLANG_USE_R_EXISTS bool r__env_has(r_obj*, r_obj*); return r__env_has(env, sym); #else return R_existsVarInFrame(env, sym); #endif } static inline bool r_env_has_anywhere(r_obj* env, r_obj* sym) { #if RLANG_USE_R_EXISTS bool r__env_has_anywhere(r_obj*, r_obj*); return r__env_has_anywhere(env, sym); #else while (env != r_envs.empty) { if (r_env_has(env, sym)) { return true; } env = r_env_parent(env); } return false; #endif } bool r_env_has_missing(r_obj* env, r_obj* sym); r_obj* r_ns_env(const char* pkg); r_obj* r_base_ns_get(const char* name); r_obj* r_alloc_environment(r_ssize size, r_obj* parent); static inline r_obj* r_alloc_empty_environment(r_obj* parent) { // Non-hashed environment. // Very fast and useful when you aren't getting/setting from the result. #if R_VERSION >= R_Version(4, 1, 0) const int hash = 0; const int size = 0; // Not used when `hash = 0` return R_NewEnv(parent, hash, size); #else r_obj* env = Rf_allocSExp(R_TYPE_environment); SET_ENCLOS(env, parent); return env; #endif } r_obj* r_env_as_list(r_obj* x); r_obj* r_list_as_environment(r_obj* x, r_obj* parent); r_obj* r_env_clone(r_obj* env, r_obj* parent); void r_env_coalesce(r_obj* env, r_obj* from); bool r_env_inherits(r_obj* env, r_obj* ancestor, r_obj* top); #endif vctrs/src/rlang/cnd.c0000644000176200001440000001136115157273666014241 0ustar liggesusers#include "rlang.h" #include "decl/cnd-decl.h" #define BUFSIZE 8192 #define INTERP(BUF, FMT, DOTS) \ { \ va_list dots; \ va_start(dots, FMT); \ vsnprintf(BUF, BUFSIZE, FMT, dots); \ va_end(dots); \ \ BUF[BUFSIZE - 1] = '\0'; \ } static r_obj* msg_call = NULL; void r_inform(const char* fmt, ...) { char buf[BUFSIZE]; INTERP(buf, fmt, ...); r_eval_with_x(msg_call, KEEP(r_chr(buf)), r_envs.ns); FREE(1); } static r_obj* wng_call = NULL; void r_warn(const char* fmt, ...) { char buf[BUFSIZE]; INTERP(buf, fmt, ...); r_eval_with_x(wng_call, KEEP(r_chr(buf)), r_envs.ns); FREE(1); } static r_obj* err_call = NULL; void r_abort(const char* fmt, ...) { char buf[BUFSIZE]; INTERP(buf, fmt, ...); r_obj* message = KEEP(r_chr(buf)); // Evaluate in a mask but forward error call to the current frame r_obj* frame = KEEP(r_peek_frame()); r_obj* mask = KEEP(r_alloc_environment(2, frame)); r_env_bind(mask, r_syms.error_call_flag, frame); struct r_pair args[] = { { r_syms.message, message } }; r_exec_n(r_null, r_syms.abort, args, R_ARR_SIZEOF(args), mask); while (1); // No return } r_no_return void r_abort_n(const struct r_pair* args, int n) { r_exec_mask_n(r_null, r_syms.abort, args, n, r_peek_frame()); r_stop_unreachable(); } r_no_return void r_abort_call(r_obj* call, const char* fmt, ...) { char buf[BUFSIZE]; INTERP(buf, fmt, ...); r_obj* message = KEEP(r_chr(buf)); struct r_pair args[] = { { r_syms.message, message }, { r_syms.call, call } }; r_obj* frame = KEEP(r_peek_frame()); r_exec_mask_n(r_null, r_syms.abort, args, R_ARR_SIZEOF(args), frame); r_stop_unreachable(); } void r_cnd_signal(r_obj* cnd) { r_eval_with_x(cnd_signal_call, cnd, r_envs.base); } // For `R_interrupts_suspended` #include #include #ifdef _WIN32 #include void r_interrupt(void) { UserBreak = 1; R_CheckUserInterrupt(); } #else #include void r_interrupt(void) { Rf_onintr(); } #endif enum r_cnd_type r_cnd_type(r_obj* cnd) { r_obj* classes = r_class(cnd); if (r_typeof(cnd) != R_TYPE_list || r_typeof(classes) != R_TYPE_character) { goto error; } r_obj* const * v_classes = r_chr_cbegin(classes); r_ssize n_classes = r_length(classes); for (r_ssize i = n_classes - 2; i >= 0; --i) { r_obj* class_str = v_classes[i]; if (class_str == r_strs.error) { return R_CND_TYPE_error; } if (class_str == r_strs.warning) { return R_CND_TYPE_warning; } if (class_str == r_strs.message) { return R_CND_TYPE_message; } if (class_str == r_strs.interrupt) { return R_CND_TYPE_interrupt; } } if (r_inherits(cnd, "condition")) { return R_CND_TYPE_condition; } error: r_abort("`cnd` is not a condition object."); } void r_init_library_cnd(void) { msg_call = r_parse("message(x)"); r_preserve(msg_call); wng_call = r_parse("warning(x, call. = FALSE)"); r_preserve(wng_call); err_call = r_parse("rlang::abort(x)"); r_preserve(err_call); cnd_signal_call = r_parse("rlang::cnd_signal(x)"); r_preserve(cnd_signal_call); // Silence "'noreturn' attribute does not apply to types warning". // It seems like GCC doesn't handle attributes in casts so we need // to cast through a typedef. // https://stackoverflow.com/questions/9441262/function-pointer-to-attribute-const-function typedef r_no_return void (*r_stop_internal_t)(const char*, int, r_obj*, const char* fmt, ...); r_stop_internal = (r_stop_internal_t) R_GetCCallable("rlang", "rlang_stop_internal2"); r_format_error_arg = (const char* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_format_error_arg"); r_obj_type_friendly_full = (const char* (*)(r_obj*, bool, bool)) r_peek_c_callable("rlang", "rlang_obj_type_friendly_full"); } r_no_return void (*r_stop_internal)(const char* file, int line, r_obj* call, const char* fmt, ...) = NULL; static r_obj* cnd_signal_call = NULL; const char* (*r_format_error_arg)(r_obj* arg) = NULL; const char* (*r_obj_type_friendly_full)(r_obj* x, bool value, bool length) = NULL; const char* r_format_lazy_error_arg(struct r_lazy arg) { r_obj* ffi_arg = KEEP(r_lazy_eval(arg)); const char* out = r_format_error_arg(ffi_arg); FREE(1); return out; } vctrs/src/rlang/env-binding.c0000644000176200001440000001577115157273666015706 0ustar liggesusers#include "rlang.h" #include "env.h" #include "decl/env-binding-decl.h" // https://bugs.r-project.org/show_bug.cgi?id=18928 #define RLANG_HAS_R_BINDING_API (R_VERSION >= R_Version(4, 6, 0)) #if !RLANG_HAS_R_BINDING_API static inline r_obj* env_find(r_obj* env, r_obj* sym) { return Rf_findVarInFrame3(env, sym, FALSE); } #endif static r_obj* new_binding_types(r_ssize n) { r_obj* types = r_alloc_integer(n); int* types_ptr = r_int_begin(types); r_memset(types_ptr, 0, n * sizeof *types_ptr); return types; } static inline r_obj* binding_as_sym(bool list, r_obj* bindings, r_ssize i) { if (list) { r_obj* out = r_list_get(bindings, i); if (r_typeof(out) != R_TYPE_symbol) { r_abort("Binding must be a symbol."); } return out; } else { return r_str_as_symbol(r_chr_get(bindings, i)); } } static r_ssize detect_special_binding(r_obj* env, r_obj* bindings, bool symbols) { r_ssize n = r_length(bindings); for (r_ssize i = 0; i < n; ++i) { r_obj* sym = binding_as_sym(symbols, bindings, i); enum r_env_binding_type type = r_env_binding_type(env, sym); if (type == R_ENV_BINDING_TYPE_active || type == R_ENV_BINDING_TYPE_delayed) { return i; } } return -1; } // Returns NULL if all values to spare an alloc r_obj* r_env_binding_types(r_obj* env, r_obj* bindings) { if (r_typeof(env) != R_TYPE_environment) { r_abort("Expected environment in promise binding predicate."); } bool symbols; switch (r_typeof(bindings)) { case R_TYPE_list: symbols = true; break; case R_TYPE_character: symbols = false; break; default: r_abort("Internal error: Unexpected `bindings` type in `r_env_binding_types()`"); } r_ssize i = detect_special_binding(env, bindings, symbols); if (i < 0) { return r_null; } r_ssize n = r_length(bindings); r_obj* types = KEEP(new_binding_types(n)); int* types_ptr = r_int_begin(types); // Fill value type for bindings before first special binding for (r_ssize j = 0; j < i; ++j) { *types_ptr = R_ENV_BINDING_TYPE_value; ++types_ptr; } while (i < n) { r_obj* sym = binding_as_sym(symbols, bindings, i); *types_ptr = r_env_binding_type(env, sym); ++i; ++types_ptr; } FREE(1); return types; } // This does an extra alloc, see https://bugs.r-project.org/show_bug.cgi?id=18928#c2 r_obj* r_env_syms(r_obj* env) { r_obj* nms = KEEP(r_env_names(env)); r_ssize n = r_length(nms); r_obj* out = KEEP(r_alloc_list(n)); r_obj* const * v_nms = r_chr_cbegin(nms); for (r_ssize i = 0; i < n; ++i) { r_list_poke(out, i, r_str_as_symbol(v_nms[i])); } FREE(2); return out; } // Binding type API // Implements future R API from https://bugs.r-project.org/show_bug.cgi?id=18928 enum r_env_binding_type r_env_binding_type(r_obj* env, r_obj* sym) { #if RLANG_HAS_R_BINDING_API switch (R_GetBindingType(sym, env)) { case R_BindingTypeUnbound: return R_ENV_BINDING_TYPE_unbound; case R_BindingTypeValue: return R_ENV_BINDING_TYPE_value; case R_BindingTypeMissing: return R_ENV_BINDING_TYPE_missing; case R_BindingTypeDelayed: return R_ENV_BINDING_TYPE_delayed; case R_BindingTypeForced: return R_ENV_BINDING_TYPE_forced; case R_BindingTypeActive: return R_ENV_BINDING_TYPE_active; } r_stop_unreachable(); #else // Active binding check must come first since `r_env_find()` triggers them if (R_BindingIsActive(sym, env)) { return R_ENV_BINDING_TYPE_active; } r_obj* value = env_find(env, sym); if (value == r_syms.unbound) { return R_ENV_BINDING_TYPE_unbound; } if (value == r_missing_arg) { return R_ENV_BINDING_TYPE_missing; } if (r_typeof(value) == R_TYPE_promise) { bool forced; rlang_promise_unwrap(value, &forced); if (forced) { return R_ENV_BINDING_TYPE_forced; } return R_ENV_BINDING_TYPE_delayed; } return R_ENV_BINDING_TYPE_value; #endif } r_obj* r_env_get(r_obj* env, r_obj* sym) { enum r_env_binding_type type = r_env_binding_type(env, sym); if (type == R_ENV_BINDING_TYPE_unbound) { r_abort("object '%s' not found", r_sym_c_string(sym)); } if (type == R_ENV_BINDING_TYPE_missing) { return r_missing_arg; } #if R_VERSION >= R_Version(4, 5, 0) return R_getVar(sym, env, TRUE); #else r_obj* value = env_find(env, sym); if (r_typeof(value) == R_TYPE_dots) { return value; } // Handles value, delayed, forced, and active bindings return Rf_eval(sym, env); #endif } // Binding constructors void r_env_bind_active(r_obj* env, r_obj* sym, r_obj* fn) { KEEP(fn); r_env_unbind(env, sym); R_MakeActiveBinding(sym, fn, env); FREE(1); } void r_env_bind_delayed(r_obj* env, r_obj* sym, r_obj* expr, r_obj* eval_env) { #if RLANG_HAS_R_BINDING_API R_MakeDelayedBinding(sym, expr, eval_env, env); #else r_obj* promise = KEEP(Rf_allocSExp(PROMSXP)); SET_PRCODE(promise, expr); SET_PRENV(promise, eval_env); SET_PRVALUE(promise, r_syms.unbound); Rf_defineVar(sym, promise, env); FREE(1); #endif } void r_env_bind_forced(r_obj* env, r_obj* sym, r_obj* expr, r_obj* value) { #if RLANG_HAS_R_BINDING_API R_MakeForcedBinding(sym, expr, value, env); #else r_obj* promise = KEEP(Rf_allocSExp(PROMSXP)); SET_PRCODE(promise, expr); SET_PRENV(promise, r_null); SET_PRVALUE(promise, value); Rf_defineVar(sym, promise, env); FREE(1); #endif } void r_env_bind_missing(r_obj* env, r_obj* sym) { #if RLANG_HAS_R_BINDING_API R_MakeMissingBinding(sym, env); #else Rf_defineVar(sym, r_missing_arg, env); #endif } // Delayed binding accessors r_obj* r_env_binding_delayed_expr(r_obj* env, r_obj* sym) { #if RLANG_HAS_R_BINDING_API return R_DelayedBindingExpression(sym, env); #else r_obj* value = env_find(env, sym); if (r_typeof(value) != R_TYPE_promise) { r_abort("not a delayed binding"); } bool forced; r_obj* inner = rlang_promise_unwrap(value, &forced); if (forced) { r_abort("not a delayed binding"); } return R_PromiseExpr(inner); #endif } r_obj* r_env_binding_delayed_env(r_obj* env, r_obj* sym) { #if RLANG_HAS_R_BINDING_API return R_DelayedBindingEnvironment(sym, env); #else r_obj* value = env_find(env, sym); if (r_typeof(value) != R_TYPE_promise) { r_abort("not a delayed binding"); } bool forced; r_obj* inner = rlang_promise_unwrap(value, &forced); if (forced) { r_abort("not a delayed binding"); } return PRENV(inner); #endif } // Forced binding accessors r_obj* r_env_binding_forced_expr(r_obj* env, r_obj* sym) { #if RLANG_HAS_R_BINDING_API return R_ForcedBindingExpression(sym, env); #else r_obj* value = env_find(env, sym); if (r_typeof(value) != R_TYPE_promise) { r_abort("not a forced binding"); } bool forced; r_obj* inner = rlang_promise_unwrap(value, &forced); if (!forced) { r_abort("not a forced binding"); } return R_PromiseExpr(inner); #endif } // Use `r_env_get()` to get the value of a forced binding // Active binding accessors r_obj* r_env_binding_active_fn(r_obj* env, r_obj* sym) { return R_ActiveBindingFunction(sym, env); } vctrs/src/rlang/call.c0000644000176200001440000000223215157273666014405 0ustar liggesusers#include "rlang.h" static r_obj* quote_prim = NULL; bool r_is_call(r_obj* x, const char* name) { if (r_typeof(x) != LANGSXP) { return false; } else { return name == NULL || r_is_symbol(r_node_car(x), name); } } bool r_is_call_any(r_obj* x, const char** names, int n) { if (r_typeof(x) != LANGSXP) { return false; } else { return r_is_symbol_any(r_node_car(x), names, n); } } r_obj* r_expr_protect(r_obj* x) { switch (r_typeof(x)) { case R_TYPE_symbol: case R_TYPE_call: case R_TYPE_promise: return r_call2(quote_prim, x); default: return x; } } static inline bool is_node(r_obj* x) { switch (r_typeof(x)) { case R_TYPE_call: case R_TYPE_pairlist: return true; default: return false; } } r_obj* r_call_clone(r_obj* x) { if (!is_node(x)) { r_abort("Input must be a call."); } x = KEEP(r_clone(x)); r_obj* rest = x; while (rest != r_null) { r_obj* head = r_node_car(rest); if (is_node(head)) { r_node_poke_car(rest, r_call_clone(head)); } rest = r_node_cdr(rest); } FREE(1); return x; } void r_init_library_call(void) { quote_prim = r_base_ns_get("quote"); } vctrs/src/rlang/df.c0000644000176200001440000000271515157273666014071 0ustar liggesusers#include "rlang.h" #include "decl/df-decl.h" r_obj* r_alloc_df_list(r_ssize n_rows, r_obj* names, const enum r_type* v_types, r_ssize types_size) { r_obj* out = KEEP(r_alloc_list(types_size)); if (r_typeof(names) != R_TYPE_character) { r_abort("`names` must be a character vector."); } if (r_length(names) != types_size) { r_abort("`names` must match the number of columns."); } r_attrib_poke_names(out, names); for (r_ssize i = 0; i < types_size; ++i) { // A nil type stands for no column allocation enum r_type type = v_types[i]; if (type != R_TYPE_null) { r_obj* col = r_alloc_vector(type, n_rows); r_list_poke(out, i, col); } } FREE(1); return out; } void r_init_data_frame(r_obj* x, r_ssize n_rows) { init_compact_rownames(x, n_rows); r_attrib_poke(x, r_syms.class_, r_classes.data_frame); } void r_init_tibble(r_obj* x, r_ssize n_rows) { r_init_data_frame(x, n_rows); r_attrib_poke(x, r_syms.class_, r_classes.tibble); } static void init_compact_rownames(r_obj* x, r_ssize n_rows) { r_obj* rn = KEEP(new_compact_rownames(n_rows)); r_attrib_poke(x, r_syms.row_names, rn); FREE(1); } static r_obj* new_compact_rownames(r_ssize n_rows) { if (n_rows <= 0) { return r_globals.empty_int; } r_obj* out = r_alloc_integer(2); int* p_out = r_int_begin(out); p_out[0] = r_globals.na_int; p_out[1] = -n_rows; return out; } vctrs/src/rlang/c-utils.c0000644000176200001440000000103115157273666015046 0ustar liggesusers#include void* r_shelter_deref(r_obj* x) { enum r_type type = r_typeof(x); switch (type) { case R_TYPE_list: if (r_length(x) < 1) { r_abort("Shelter must have at least one element"); } x = r_list_get(x, 0); type = r_typeof(x); break; case R_TYPE_pairlist: x = r_node_car(x); type = r_typeof(x); break; case R_TYPE_raw: break; default: r_stop_unimplemented_type(type); } if (type != R_TYPE_raw) { r_stop_unexpected_type(type); } return r_raw_begin(x); } vctrs/src/rlang/walk.h0000644000176200001440000000745715157273666014453 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_INTERNAL_WALK_H #define RLANG_INTERNAL_WALK_H #include "rlang-types.h" #include "cnd.h" /** * Direction of iteration * * Each non-leaf node of the sexp tree is visited twice: First before * visiting the children, and again after the children have been * visited. See * about * this iteration process. * * There are three directions: * - Incoming: The first time a non-leaf node is visited. * - Leaf: After reaching a leaf node, the direction changes from * incoming to outgoing. * - Outgoing: The second time a non-leaf node is visited on the way back. */ enum r_sexp_it_direction { R_SEXP_IT_DIRECTION_leaf = 0, R_SEXP_IT_DIRECTION_incoming, R_SEXP_IT_DIRECTION_outgoing }; enum r_sexp_it_relation { R_SEXP_IT_RELATION_none = -1, R_SEXP_IT_RELATION_root = 0, R_SEXP_IT_RELATION_attrib, // Nodes R_SEXP_IT_RELATION_node_car, R_SEXP_IT_RELATION_node_cdr, R_SEXP_IT_RELATION_node_tag, R_SEXP_IT_RELATION_symbol_string, R_SEXP_IT_RELATION_symbol_value, R_SEXP_IT_RELATION_symbol_internal, R_SEXP_IT_RELATION_function_fmls, R_SEXP_IT_RELATION_function_body, R_SEXP_IT_RELATION_function_env, R_SEXP_IT_RELATION_environment_frame, R_SEXP_IT_RELATION_environment_enclos, R_SEXP_IT_RELATION_environment_hashtab, R_SEXP_IT_RELATION_promise_value, R_SEXP_IT_RELATION_promise_expr, R_SEXP_IT_RELATION_promise_env, R_SEXP_IT_RELATION_pointer_prot, R_SEXP_IT_RELATION_pointer_tag, // Vectors R_SEXP_IT_RELATION_list_elt, R_SEXP_IT_RELATION_character_elt, R_SEXP_IT_RELATION_expression_elt }; enum r_sexp_it_raw_relation { R_SEXP_IT_RAW_RELATION_root = 0, R_SEXP_IT_RAW_RELATION_attrib, R_SEXP_IT_RAW_RELATION_node_tag, R_SEXP_IT_RAW_RELATION_node_car, R_SEXP_IT_RAW_RELATION_node_cdr, R_SEXP_IT_RAW_RELATION_vector_elt }; struct r_sexp_iterator { r_obj* shelter; bool skip_incoming; r_obj* x; enum r_type type; int depth; r_obj* parent; enum r_sexp_it_relation rel; r_ssize i; enum r_sexp_it_direction dir; /* private: */ struct r_dyn_array* p_stack; }; struct r_sexp_iterator* r_new_sexp_iterator(r_obj* root); bool r_sexp_next(struct r_sexp_iterator* p_it); bool r_sexp_skip(struct r_sexp_iterator* p_it); static inline enum r_sexp_it_raw_relation r_sexp_it_raw_relation(enum r_sexp_it_relation rel) { switch (rel) { case R_SEXP_IT_RELATION_root: return R_SEXP_IT_RAW_RELATION_root; case R_SEXP_IT_RELATION_attrib: return R_SEXP_IT_RAW_RELATION_attrib; case R_SEXP_IT_RELATION_node_car: case R_SEXP_IT_RELATION_symbol_string: case R_SEXP_IT_RELATION_environment_frame: case R_SEXP_IT_RELATION_function_fmls: case R_SEXP_IT_RELATION_promise_value: return R_SEXP_IT_RAW_RELATION_node_car; case R_SEXP_IT_RELATION_node_cdr: case R_SEXP_IT_RELATION_symbol_value: case R_SEXP_IT_RELATION_environment_enclos: case R_SEXP_IT_RELATION_function_body: case R_SEXP_IT_RELATION_promise_expr: case R_SEXP_IT_RELATION_pointer_prot: return R_SEXP_IT_RAW_RELATION_node_cdr; case R_SEXP_IT_RELATION_node_tag: case R_SEXP_IT_RELATION_symbol_internal: case R_SEXP_IT_RELATION_environment_hashtab: case R_SEXP_IT_RELATION_function_env: case R_SEXP_IT_RELATION_promise_env: case R_SEXP_IT_RELATION_pointer_tag: return R_SEXP_IT_RAW_RELATION_node_tag; case R_SEXP_IT_RELATION_list_elt: case R_SEXP_IT_RELATION_character_elt: case R_SEXP_IT_RELATION_expression_elt: return R_SEXP_IT_RAW_RELATION_vector_elt; default: r_abort("Unimplemented type."); } } const char* r_sexp_it_direction_as_c_string(enum r_sexp_it_direction dir); const char* r_sexp_it_relation_as_c_string(enum r_sexp_it_relation rel); const char* r_sexp_it_raw_relation_as_c_string(enum r_sexp_it_raw_relation rel); #endif vctrs/src/rlang/globals.h0000644000176200001440000000364115157273666015127 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_GLOBALS_H #define RLANG_GLOBALS_H #include "rlang-types.h" struct r_globals { r_obj* empty_lgl; r_obj* empty_int; r_obj* empty_dbl; r_obj* empty_cpl; r_obj* empty_raw; r_obj* empty_chr; r_obj* empty_list; int na_lgl; int na_int; double na_dbl; r_complex na_cpl; r_obj* na_str; }; struct r_globals_chrs { r_obj* empty_string; r_obj* full; }; struct r_globals_classes { r_obj* data_frame; r_obj* tibble; }; struct r_globals_strs { r_obj* dots; r_obj* condition; r_obj* empty; r_obj* error; r_obj* interrupt; r_obj* message; r_obj* na; r_obj* warning; }; struct r_globals_syms { r_obj* abort; r_obj* arg; r_obj* brace; r_obj* brackets; r_obj* brackets2; r_obj* call; // `_` is required to avoid conflicts with the C++ keyword `class`. // See https://github.com/r-lib/rlang/pull/1359 for details. r_obj* class_; r_obj* condition; r_obj* dots; r_obj* dot_environment; r_obj* dot_fn; r_obj* dot_x; r_obj* dot_y; r_obj* error; r_obj* error_arg; r_obj* error_call; r_obj* error_call_flag; r_obj* expr; r_obj* function; r_obj* interrupt; r_obj* message; r_obj* missing; r_obj* names; r_obj* options; r_obj* colon2; r_obj* colon3; r_obj* srcfile; r_obj* srcref; r_obj* dim; r_obj* dim_names; r_obj* row_names; r_obj* stack_overflow_error; r_obj* tilde; r_obj* unbound; r_obj* w; r_obj* warning; r_obj* wholeSrcref; r_obj* x; r_obj* y; r_obj* z; }; struct r_globals_envs { r_obj* empty; r_obj* base; r_obj* global; r_obj* ns; // The namespace of the embedding package }; extern struct r_globals r_globals; extern struct r_globals_chrs r_chrs; extern struct r_globals_classes r_classes; extern struct r_globals_strs r_strs; extern struct r_globals_syms r_syms; extern struct r_globals_envs r_envs; extern r_obj* r_true; extern r_obj* r_false; #endif vctrs/src/rlang/walk.c0000644000176200001440000003134015157273666014432 0ustar liggesusers#include #include "walk.h" #define SEXP_STACK_INIT_SIZE 256 enum sexp_iterator_type { SEXP_ITERATOR_TYPE_node, SEXP_ITERATOR_TYPE_pointer, SEXP_ITERATOR_TYPE_vector, SEXP_ITERATOR_TYPE_atomic }; enum sexp_iterator_state { SEXP_ITERATOR_STATE_done, SEXP_ITERATOR_STATE_attrib, SEXP_ITERATOR_STATE_tag, SEXP_ITERATOR_STATE_car, SEXP_ITERATOR_STATE_cdr, SEXP_ITERATOR_STATE_elt }; struct sexp_stack_info { r_obj* x; enum r_type type; const enum sexp_iterator_state* p_state; r_obj* const * v_arr; r_obj* const * v_arr_end; int depth; r_obj* parent; enum r_sexp_it_relation rel; enum r_sexp_it_direction dir; }; #include "decl/walk-decl.h" static const enum sexp_iterator_state node_states[] = { SEXP_ITERATOR_STATE_attrib, SEXP_ITERATOR_STATE_tag, SEXP_ITERATOR_STATE_car, SEXP_ITERATOR_STATE_cdr, SEXP_ITERATOR_STATE_done }; static const enum sexp_iterator_state pointer_states[] = { SEXP_ITERATOR_STATE_attrib, SEXP_ITERATOR_STATE_tag, SEXP_ITERATOR_STATE_cdr, SEXP_ITERATOR_STATE_done }; static const enum sexp_iterator_state vector_states[] = { SEXP_ITERATOR_STATE_attrib, SEXP_ITERATOR_STATE_elt, SEXP_ITERATOR_STATE_done }; static const enum sexp_iterator_state structure_states[] = { SEXP_ITERATOR_STATE_attrib, SEXP_ITERATOR_STATE_done }; static const enum sexp_iterator_state done_state[] = { SEXP_ITERATOR_STATE_done }; struct r_sexp_iterator* r_new_sexp_iterator(r_obj* root) { r_obj* shelter = KEEP(r_alloc_list(3)); r_obj* it = r_alloc_raw(sizeof(struct r_sexp_iterator)); r_list_poke(shelter, 0, it); struct r_sexp_iterator* p_it = r_raw_begin(it); struct r_dyn_array* p_stack = r_new_dyn_array(sizeof(struct sexp_stack_info), SEXP_STACK_INIT_SIZE); r_list_poke(shelter, 1, p_stack->shelter); // Slot 2 holds a pairlist chain of collected attribute pairlists // that need protection for the lifetime of the iterator enum r_type type = r_typeof(root); enum sexp_iterator_type it_type = sexp_iterator_type(type, root); bool has_attrib = sexp_has_attrib(type, root); struct sexp_stack_info root_info = { .x = root, .type = type, .depth = -1, .parent = r_null, .rel = R_SEXP_IT_RELATION_root }; if (it_type == SEXP_ITERATOR_TYPE_atomic && !has_attrib) { root_info.p_state = NULL; root_info.dir = R_SEXP_IT_DIRECTION_leaf; } else { init_incoming_stack_info(&root_info, it_type, has_attrib); } r_dyn_push_back(p_stack, &root_info); *p_it = (struct r_sexp_iterator) { .shelter = shelter, .p_stack = p_stack, .x = r_null, .parent = r_null, }; FREE(1); return p_it; } /* * An incoming node has a state indicating which edge we're at. An * outgoing node just need to be visited again and then popped. A * leaf node is just visited once and then popped. */ bool r_sexp_next(struct r_sexp_iterator* p_it) { struct r_dyn_array* p_stack = p_it->p_stack; if (!p_stack->count) { return false; } struct sexp_stack_info* p_info = (struct sexp_stack_info*) r_dyn_last(p_stack); if (p_it->skip_incoming) { p_it->skip_incoming = false; if (p_it->dir == R_SEXP_IT_DIRECTION_incoming) { r_dyn_pop_back(p_stack); return r_sexp_next(p_it); } } // In the normal case, if we push an "incoming" node on the stack it // means that we have already visited it and we are now visiting its // children. The root node is signalled with a depth of -1 so it can // be visited first before being visited as an incoming node. bool root = (p_info->depth == -1); if (!root && p_info->dir == R_SEXP_IT_DIRECTION_incoming) { return sexp_next_incoming(p_it, p_info); } r_ssize i = -1; if (p_info->v_arr) { i = p_info->v_arr_end - p_info->v_arr; } p_it->x = p_info->x; p_it->type = p_info->type; p_it->depth = p_info->depth; p_it->parent = p_info->parent; p_it->rel = p_info->rel; p_it->i = i; p_it->dir = p_info->dir; if (root) { ++p_it->depth; ++p_info->depth; // Incoming visit for the root node if (p_it->dir == R_SEXP_IT_DIRECTION_incoming) { return true; } } r_dyn_pop_back(p_stack); return true; } static bool sexp_next_incoming(struct r_sexp_iterator* p_it, struct sexp_stack_info* p_info) { enum sexp_iterator_state state = *p_info->p_state; r_obj* x = p_info->x; enum r_type type = p_info->type; struct sexp_stack_info child = { 0 }; child.parent = x; child.depth = p_info->depth + 1; switch (state) { case SEXP_ITERATOR_STATE_attrib: { // Allocates a new pairlist to avoid direct ATTRIB() access. // Could use R_mapAttrib() instead but would require refactoring. r_obj* collected = r_attrib_collect(x); // Protect collected pairlist in the iterator's shelter chain r_obj* chain = r_new_node(collected, r_list_get(p_it->shelter, 2)); r_list_poke(p_it->shelter, 2, chain); child.x = collected; child.rel = R_SEXP_IT_RELATION_attrib; break; } case SEXP_ITERATOR_STATE_elt: child.x = *p_info->v_arr; child.rel = R_SEXP_IT_RELATION_list_elt; break; case SEXP_ITERATOR_STATE_tag: child.x = sexp_node_tag(type, x, &child.rel); break; case SEXP_ITERATOR_STATE_car: child.x = sexp_node_car(type, x, &child.rel); break; case SEXP_ITERATOR_STATE_cdr: child.x = sexp_node_cdr(type, x, &child.rel); break; case SEXP_ITERATOR_STATE_done: r_stop_unreachable(); } child.type = r_typeof(child.x); bool has_attrib = sexp_has_attrib(child.type, child.x); enum sexp_iterator_type it_type = sexp_iterator_type(child.type, child.x); if (it_type == SEXP_ITERATOR_TYPE_atomic && !has_attrib) { child.p_state = NULL; child.dir = R_SEXP_IT_DIRECTION_leaf; } else { init_incoming_stack_info(&child, it_type, has_attrib); // Push incoming node on the stack so it can be visited again, // either to descend its children or to visit it again on the // outgoing trip r_dyn_push_back(p_it->p_stack, &child); } // Bump state for next iteration if (state == SEXP_ITERATOR_STATE_elt) { ++p_info->v_arr; if (p_info->v_arr == p_info->v_arr_end) { p_info->p_state = done_state; } } else { ++p_info->p_state; } // Flip incoming to outgoing if we're done visiting children after // this iteration. We don't leave a done node on the stack because // that would break the invariant that there are remaining nodes to // visit when `n > 0` and that the stack can be popped. if (*p_info->p_state == SEXP_ITERATOR_STATE_done) { p_info->dir = R_SEXP_IT_DIRECTION_outgoing; } r_ssize i = -1; if (child.v_arr) { i = child.v_arr_end - child.v_arr; } p_it->x = child.x; p_it->type = child.type; p_it->depth = child.depth; p_it->parent = child.parent; p_it->rel = child.rel; p_it->i = i; p_it->dir = child.dir; return true; } static inline void init_incoming_stack_info(struct sexp_stack_info* p_info, enum sexp_iterator_type it_type, bool has_attrib) { p_info->dir = R_SEXP_IT_DIRECTION_incoming; switch (it_type) { case SEXP_ITERATOR_TYPE_atomic: p_info->p_state = structure_states; break; case SEXP_ITERATOR_TYPE_node: p_info->p_state = node_states + !has_attrib; break; case SEXP_ITERATOR_TYPE_pointer: p_info->p_state = pointer_states + !has_attrib; break; case SEXP_ITERATOR_TYPE_vector: p_info->v_arr = r_vec_cbegin(p_info->x); p_info->v_arr_end = p_info->v_arr + r_length(p_info->x); p_info->p_state = vector_states + !has_attrib; break; } } static inline enum sexp_iterator_type sexp_iterator_type(enum r_type type, r_obj* x) { switch (type) { case R_TYPE_closure: case R_TYPE_environment: case R_TYPE_promise: case R_TYPE_pairlist: case R_TYPE_call: case R_TYPE_dots: return SEXP_ITERATOR_TYPE_node; case R_TYPE_pointer: return SEXP_ITERATOR_TYPE_pointer; case R_TYPE_list: case R_TYPE_expression: case R_TYPE_character: if (r_length(x)) { return SEXP_ITERATOR_TYPE_vector; } else { return SEXP_ITERATOR_TYPE_atomic; } default: return SEXP_ITERATOR_TYPE_atomic; } } static inline bool sexp_has_attrib(enum r_type type, r_obj* x) { // Strings have private data stored in attributes if (type == R_TYPE_string) { return false; } else { return r_attrib_has_any(x); } } static inline r_obj* sexp_node_car(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel) { switch (type) { case R_TYPE_closure: *p_rel = R_SEXP_IT_RELATION_function_fmls; return r_fn_formals(x); case R_TYPE_environment: *p_rel = R_SEXP_IT_RELATION_environment_frame; return FRAME(x); case R_TYPE_promise: *p_rel = R_SEXP_IT_RELATION_promise_value; return PRVALUE(x); case R_TYPE_pairlist: case R_TYPE_call: case R_TYPE_dots: *p_rel = R_SEXP_IT_RELATION_node_car; return CAR(x); case R_TYPE_pointer: default: *p_rel = -1; return r_null; } } static inline r_obj* sexp_node_cdr(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel) { switch (type) { case R_TYPE_closure: *p_rel = R_SEXP_IT_RELATION_function_body; return r_fn_body(x); case R_TYPE_environment: *p_rel = R_SEXP_IT_RELATION_environment_enclos; return r_env_parent(x); case R_TYPE_promise: *p_rel = R_SEXP_IT_RELATION_promise_expr; return PREXPR(x); case R_TYPE_pointer: *p_rel = R_SEXP_IT_RELATION_pointer_prot; return EXTPTR_PROT(x); case R_TYPE_pairlist: case R_TYPE_call: case R_TYPE_dots: *p_rel = R_SEXP_IT_RELATION_node_cdr; return CDR(x); default: *p_rel = -1; return r_null; } } static inline r_obj* sexp_node_tag(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel) { switch (type) { case R_TYPE_closure: *p_rel = R_SEXP_IT_RELATION_function_env; return r_fn_env(x); case R_TYPE_environment: *p_rel = R_SEXP_IT_RELATION_environment_hashtab; return HASHTAB(x); case R_TYPE_promise: *p_rel = R_SEXP_IT_RELATION_promise_env; return PRENV(x); case R_TYPE_pointer: *p_rel = R_SEXP_IT_RELATION_pointer_tag; return EXTPTR_TAG(x); case R_TYPE_pairlist: case R_TYPE_call: case R_TYPE_dots: *p_rel = R_SEXP_IT_RELATION_node_tag; return TAG(x); default: *p_rel = -1; return r_null; } } const char* r_sexp_it_direction_as_c_string(enum r_sexp_it_direction dir) { switch (dir) { case R_SEXP_IT_DIRECTION_leaf: return "leaf"; case R_SEXP_IT_DIRECTION_incoming: return "incoming"; case R_SEXP_IT_DIRECTION_outgoing: return "outgoing"; default: r_stop_unreachable(); } } const char* r_sexp_it_relation_as_c_string(enum r_sexp_it_relation rel) { switch (rel) { case R_SEXP_IT_RELATION_root: return "root"; case R_SEXP_IT_RELATION_attrib: return "attrib"; case R_SEXP_IT_RELATION_node_car: return "node_car"; case R_SEXP_IT_RELATION_node_cdr: return "node_cdr"; case R_SEXP_IT_RELATION_node_tag: return "node_tag"; case R_SEXP_IT_RELATION_symbol_string: return "symbol_string"; case R_SEXP_IT_RELATION_symbol_value: return "symbol_value"; case R_SEXP_IT_RELATION_symbol_internal: return "symbol_internal"; case R_SEXP_IT_RELATION_function_fmls: return "function_fmls"; case R_SEXP_IT_RELATION_function_body: return "function_body"; case R_SEXP_IT_RELATION_function_env: return "function_env"; case R_SEXP_IT_RELATION_environment_frame: return "environment_frame"; case R_SEXP_IT_RELATION_environment_enclos: return "environment_enclos"; case R_SEXP_IT_RELATION_environment_hashtab: return "environment_hashtab"; case R_SEXP_IT_RELATION_promise_value: return "promise_value"; case R_SEXP_IT_RELATION_promise_expr: return "promise_expr"; case R_SEXP_IT_RELATION_promise_env: return "promise_env"; case R_SEXP_IT_RELATION_pointer_prot: return "pointer_prot"; case R_SEXP_IT_RELATION_pointer_tag: return "pointer_tag"; case R_SEXP_IT_RELATION_list_elt: return "list_elt"; case R_SEXP_IT_RELATION_character_elt: return "character_elt"; case R_SEXP_IT_RELATION_expression_elt: return "expression_elt"; case R_SEXP_IT_RELATION_none: r_stop_internal("r_sexp_it_relation_as_c_string", "Found `R_SEXP_IT_RELATION_none`."); default: r_stop_unreachable(); } } const char* r_sexp_it_raw_relation_as_c_string(enum r_sexp_it_raw_relation rel) { switch (rel) { case R_SEXP_IT_RAW_RELATION_root: return "root"; case R_SEXP_IT_RAW_RELATION_attrib: return "attrib"; case R_SEXP_IT_RAW_RELATION_node_car: return "node_car"; case R_SEXP_IT_RAW_RELATION_node_cdr: return "node_cdr"; case R_SEXP_IT_RAW_RELATION_node_tag: return "node_tag"; case R_SEXP_IT_RAW_RELATION_vector_elt: return "vector_elt"; default: r_stop_unreachable(); } } vctrs/src/rlang/quo.c0000644000176200001440000000113215157273666014274 0ustar liggesusers#include "rlang.h" r_obj* (*r_quo_get_expr)(r_obj* quo); r_obj* (*r_quo_set_expr)(r_obj* quo, r_obj* expr); r_obj* (*r_quo_get_env)(r_obj* quo); r_obj* (*r_quo_set_env)(r_obj* quo, r_obj* env); void r_init_library_quo(void) { r_quo_get_expr = (r_obj* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_quo_get_expr"); r_quo_set_expr = (r_obj* (*)(r_obj*, r_obj*)) r_peek_c_callable("rlang", "rlang_quo_set_expr"); r_quo_get_env = (r_obj* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_quo_get_env"); r_quo_set_env = (r_obj* (*)(r_obj*, r_obj*)) r_peek_c_callable("rlang", "rlang_quo_set_env"); } vctrs/src/rlang/vec-chr.c0000644000176200001440000000356015157273666015026 0ustar liggesusers#include #include "rlang.h" r_ssize r_chr_detect_index(r_obj* chr, const char* c_string) { r_ssize n = r_length(chr); for (r_ssize i = 0; i != n; ++i) { const char* cur = CHAR(r_chr_get(chr, i)); if (strcmp(cur, c_string) == 0) { return i; } } return -1; } bool r_chr_has(r_obj* chr, const char* c_string) { r_ssize idx = r_chr_detect_index(chr, c_string); return idx >= 0; } bool r_chr_has_any(r_obj* chr, const char** c_strings) { r_ssize n = r_length(chr); for (r_ssize i = 0; i != n; ++i) { const char* cur = CHAR(r_chr_get(chr, i)); while (*c_strings) { if (strcmp(cur, *c_strings) == 0) { return true; } ++c_strings; } } return false; } void r_chr_fill(r_obj* chr, r_obj* value, r_ssize n) { for (r_ssize i = 0; i < n; ++i) { r_chr_poke(chr, i, value); } } static void validate_chr_setter(r_obj* chr, r_obj* r_string) { if (r_typeof(chr) != R_TYPE_character) { r_abort("`chr` must be a character vector"); } if (r_typeof(r_string) != R_TYPE_string) { r_abort("`r_string` must be an internal R string"); } } // From rlang/vec.c void r_vec_poke_n(r_obj* x, r_ssize offset, r_obj* y, r_ssize from, r_ssize n); r_obj* chr_prepend(r_obj* chr, r_obj* r_string) { if (chr == r_null) { return r_str_as_character(r_string); } else { validate_chr_setter(chr, r_string); } int n = r_length(chr); r_obj* out = KEEP(r_alloc_character(n + 1)); r_vec_poke_n(out, 1, chr, 0, n); r_chr_poke(out, 0, r_string); FREE(1); return out; } r_obj* chr_append(r_obj* chr, r_obj* r_str) { if (chr == r_null) { return r_str_as_character(r_str); } validate_chr_setter(chr, r_str); int n = r_length(chr); r_obj* out = KEEP(r_alloc_character(n + 1)); r_vec_poke_n(out, 0, chr, 0, n); r_chr_poke(out, n, r_str); FREE(1); return out; } vctrs/src/rlang/formula.h0000644000176200001440000000043515157273666015147 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_FORMULA_H #define RLANG_FORMULA_H #include "rlang-types.h" bool r_is_formula(r_obj* x, int scoped, int lhs); r_obj* r_f_rhs(r_obj* f); r_obj* r_f_lhs(r_obj* f); r_obj* r_f_env(r_obj* f); bool r_f_has_env(r_obj* f); #endif vctrs/src/rlang/state.h0000644000176200001440000000107215157273666014620 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_STATE_H #define RLANG_STATE_H #include "call.h" #include "eval.h" #include "globals.h" #include "node.h" #include "rlang-types.h" #include "sym.h" static inline r_obj* r_peek_option(const char* name) { return Rf_GetOption1(Rf_install(name)); } static inline void r_poke_option(const char* name, r_obj* value) { r_obj* args = KEEP(r_new_node(value, r_null)); r_node_poke_tag(args, r_sym(name)); r_obj* call = KEEP(r_new_call(r_syms.options, args)); r_eval(call, r_envs.base); FREE(2); } #endif vctrs/src/rlang/vec.c0000644000176200001440000002032615157273666014253 0ustar liggesusers#include "rlang.h" #include #include r_obj* r_chr_n(const char* const * strings, r_ssize n) { r_obj* out = KEEP(r_alloc_character(n)); for (r_ssize i = 0; i < n; ++i) { r_chr_poke(out, i, r_str(strings[i])); } FREE(1); return out; } #define RESIZE(R_TYPE, C_TYPE, CONST_DEREF, DEREF) \ do { \ r_ssize old_size = r_length(x); \ if (old_size == new_size) { \ return x; \ } \ if (!ALTREP(x) && new_size < old_size) { \ return vec_shrink(x, new_size, old_size); \ } \ \ const C_TYPE* p_x = CONST_DEREF(x); \ r_obj* out = KEEP(r_alloc_vector(R_TYPE, new_size)); \ C_TYPE* p_out = DEREF(out); \ \ r_ssize cpy_size = (new_size > old_size) ? old_size : new_size; \ r_memcpy(p_out, p_x, cpy_size * sizeof(C_TYPE)); \ \ FREE(1); \ return out; \ } while (0) #define RESIZE_BARRIER(R_TYPE, CONST_DEREF, SET) \ do { \ r_ssize old_size = r_length(x); \ if (old_size == new_size) { \ return x; \ } \ if (!ALTREP(x) && new_size < old_size) { \ return vec_shrink(x, new_size, old_size); \ } \ \ r_obj* const * p_x = CONST_DEREF(x); \ r_obj* out = KEEP(r_alloc_vector(R_TYPE, new_size)); \ \ r_ssize cpy_size = (new_size > old_size) ? old_size : new_size; \ for (r_ssize i = 0; i < cpy_size; ++i) { \ SET(out, i, p_x[i]); \ } \ \ FREE(1); \ return out; \ } while (0) // Assumption on older R: `new_size` smaller than `old_size` static inline r_obj* vec_shrink(r_obj* x, r_ssize new_size, r_ssize old_size) { #if R_VERSION >= R_Version(4, 6, 0) if (R_isResizable(x)) { R_resizeVector(x, new_size); return x; } else { return Rf_xlengthgets(x, new_size); } #else SETLENGTH(x, new_size); SET_TRUELENGTH(x, old_size); SET_GROWABLE_BIT(x); return x; #endif } // Compared to `Rf_xlengthgets()` this does not initialise the new // extended locations with `NA` r_obj* r_lgl_resize(r_obj* x, r_ssize new_size) { RESIZE(R_TYPE_logical, int, r_lgl_cbegin, r_lgl_begin); } r_obj* r_int_resize(r_obj* x, r_ssize new_size) { RESIZE(R_TYPE_integer, int, r_int_cbegin, r_int_begin); } r_obj* r_dbl_resize(r_obj* x, r_ssize new_size) { RESIZE(R_TYPE_double, double, r_dbl_cbegin, r_dbl_begin); } r_obj* r_cpl_resize(r_obj* x, r_ssize new_size) { RESIZE(R_TYPE_complex, r_complex, r_cpl_cbegin, r_cpl_begin); } r_obj* r_raw_resize(r_obj* x, r_ssize new_size) { RESIZE(R_TYPE_raw, unsigned char, r_raw_cbegin, r_raw_begin); } r_obj* r_chr_resize(r_obj* x, r_ssize new_size) { RESIZE_BARRIER(R_TYPE_character, r_chr_cbegin, r_chr_poke); } r_obj* r_list_resize(r_obj* x, r_ssize new_size) { RESIZE_BARRIER(R_TYPE_list, r_list_cbegin, r_list_poke); } #undef RESIZE #undef RESIZE_BARRIER r_obj* r_list_compact(r_obj* x) { r_ssize n = r_length(x); r_obj* inc = KEEP(r_alloc_logical(n)); int* v_inc = r_int_begin(inc); r_obj* const * v_x = r_list_cbegin(x); r_ssize new_n = 0; for (r_ssize i = 0; i < n; ++i) { v_inc[i] = v_x[i] != r_null; new_n += v_inc[i]; } r_obj* out = KEEP(r_alloc_list(new_n)); for (r_ssize i = 0, count = 0; i < n; ++i) { if (v_inc[i]) { r_list_poke(out, count, v_x[i]); ++count; } } FREE(2); return out; } r_obj* r_list_of_as_ptr_ssize(r_obj* xs, enum r_type type, struct r_pair_ptr_ssize** p_v_out) { if (r_typeof(xs) != R_TYPE_list) { r_abort("`xs` must be a list."); } r_ssize n = r_length(xs); r_obj* shelter = KEEP(r_alloc_raw(sizeof(struct r_pair_ptr_ssize) * n)); struct r_pair_ptr_ssize* v_out = r_raw_begin(shelter); r_obj* const * v_xs = r_list_cbegin(xs); for (r_ssize i = 0; i < n; ++i) { r_obj* x = v_xs[i]; if (r_typeof(x) != type) { r_abort("`xs` must be a list of vectors of type `%s`.", r_type_as_c_string(type)); } v_out[i] = (struct r_pair_ptr_ssize) { .ptr = r_int_begin(x), .size = r_length(x) }; } FREE(1); *p_v_out = v_out; return shelter; } // FIXME: Does this have a place in the library? void r_vec_poke_n(r_obj* x, r_ssize offset, r_obj* y, r_ssize from, r_ssize n) { if ((r_length(x) - offset) < n) { r_abort("Can't copy data to `x` because it is too small"); } if ((r_length(y) - from) < n) { r_abort("Can't copy data from `y` because it is too small"); } switch (r_typeof(x)) { case R_TYPE_logical: { int* src_data = r_lgl_begin(y); int* dest_data = r_lgl_begin(x); for (r_ssize i = 0; i != n; ++i) dest_data[i + offset] = src_data[i + from]; break; } case R_TYPE_integer: { int* src_data = r_int_begin(y); int* dest_data = r_int_begin(x); for (r_ssize i = 0; i != n; ++i) dest_data[i + offset] = src_data[i + from]; break; } case R_TYPE_double: { double* src_data = r_dbl_begin(y); double* dest_data = r_dbl_begin(x); for (r_ssize i = 0; i != n; ++i) dest_data[i + offset] = src_data[i + from]; break; } case R_TYPE_complex: { r_complex* src_data = r_cpl_begin(y); r_complex* dest_data = r_cpl_begin(x); for (r_ssize i = 0; i != n; ++i) dest_data[i + offset] = src_data[i + from]; break; } case R_TYPE_raw: { unsigned char* src_data = RAW(y); unsigned char* dest_data = RAW(x); for (r_ssize i = 0; i != n; ++i) dest_data[i + offset] = src_data[i + from]; break; } case R_TYPE_character: { r_obj* elt; for (r_ssize i = 0; i != n; ++i) { elt = r_chr_get(y, i + from); r_chr_poke(x, i + offset, elt); } break; } case R_TYPE_list: { r_obj* elt; for (r_ssize i = 0; i != n; ++i) { elt = r_list_get(y, i + from); r_list_poke(x, i + offset, elt); } break; } default: r_abort("Copy requires vectors"); } } void r_vec_poke_range(r_obj* x, r_ssize offset, r_obj* y, r_ssize from, r_ssize to) { r_vec_poke_n(x, offset, y, from, to - from + 1); } bool _r_is_finite(r_obj* x) { r_ssize n = r_length(x); switch(r_typeof(x)) { case R_TYPE_integer: { const int* p_x = r_int_cbegin(x); for (r_ssize i = 0; i < n; ++i) { if (p_x[i] == r_globals.na_int) { return false; } } break; } case R_TYPE_double: { const double* p_x = r_dbl_cbegin(x); for (r_ssize i = 0; i < n; ++i) { if (!isfinite(p_x[i])) { return false; } } break; } case R_TYPE_complex: { const r_complex* p_x = r_cpl_cbegin(x); for (r_ssize i = 0; i < n; ++i) { if (!isfinite(p_x[i].r) || !isfinite(p_x[i].i)) { return false; } } break; } default: r_abort("Internal error: expected a numeric vector"); } return true; } vctrs/src/rlang/session.h0000644000176200001440000000032715157273666015165 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_SESSION_H #define RLANG_SESSION_H #include "rlang-types.h" bool r_is_installed(const char* pkg); bool r_has_colour(void); r_obj* r_getppid(void); #endif vctrs/src/rlang/export.h0000644000176200001440000000072215157273666015022 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_EXPORT_H #define RLANG_EXPORT_H #include "rlang-types.h" typedef DL_FUNC r_void_fn; static inline r_void_fn r_peek_c_callable(const char* pkg, const char* callable) { return R_GetCCallable(pkg, callable); } static inline r_obj* r_new_fn_ptr(r_void_fn p) { return R_MakeExternalPtrFn(p, r_null, r_null); } static inline r_void_fn r_fn_ptr_addr(r_obj* p) { return R_ExternalPtrAddrFn(p); } #endif vctrs/src/rlang/export.c0000644000176200001440000000046315157273666015017 0ustar liggesusers#include "rlang.h" #include "export.h" #include r_obj* rlang_namespace(const char* ns) { r_obj* ns_string = KEEP(Rf_mkString(ns)); r_obj* call = KEEP(r_sym("getNamespace")); call = KEEP(Rf_lang2(call, ns_string)); r_obj* ns_env = r_eval(call, R_BaseEnv); FREE(3); return ns_env; } vctrs/src/rlang/df.h0000644000176200001440000000060315157273666014070 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_DF_H #define RLANG_DF_H #include "rlang-types.h" r_obj* r_alloc_df_list(r_ssize n_rows, r_obj* names, const enum r_type* v_types, r_ssize types_size); void r_init_data_frame(r_obj* x, r_ssize n_nows); void r_init_tibble(r_obj* x, r_ssize n_rows); #endif vctrs/src/rlang/dict.c0000644000176200001440000002024515157273666014421 0ustar liggesusers#include #include "dict.h" #define DICT_LOAD_THRESHOLD 0.75 #define DICT_GROWTH_FACTOR 2 static size_t size_round_power_2(size_t size); #include "decl/dict-decl.h" #define DICT_DEREF(D) r_list_cbegin(D) #define DICT_KEY(V) r_list_get(V, 0) #define DICT_VALUE(V) r_list_get(V, 1) #define DICT_CDR(V) r_list_get(V, 2) #define DICT_POKE_KEY(D, K) r_list_poke(D, 0, K) #define DICT_POKE_VALUE(D, V) r_list_poke(D, 1, V) #define DICT_POKE_CDR(D, N) r_list_poke(D, 2, N) #define V_DICT_KEY(V) (V)[0] #define V_DICT_VALUE(V) (V)[1] #define V_DICT_CDR(V) (V)[2] static r_obj* new_dict_node(r_obj* key, r_obj* value) { r_obj* bucket = r_alloc_list(3); DICT_POKE_KEY(bucket, key); DICT_POKE_VALUE(bucket, value); return bucket; } struct r_dict* r_new_dict(r_ssize size) { if (size <= 0) { r_abort("`size` of dictionary must be positive."); } size = size_round_power_2(size); r_obj* shelter = KEEP(r_alloc_list(2)); r_obj* dict_raw = r_alloc_raw0(sizeof(struct r_dict)); r_list_poke(shelter, 0, dict_raw); struct r_dict* p_dict = r_raw_begin(dict_raw); p_dict->shelter = shelter; p_dict->buckets = r_alloc_list(size); r_list_poke(shelter, 1, p_dict->buckets); p_dict->p_buckets = r_list_cbegin(p_dict->buckets); p_dict->n_buckets = size; r_attrib_poke(shelter, r_syms.class_, r_chr("rlang_dict")); FREE(1); return p_dict; } void r_dict_resize(struct r_dict* p_dict, r_ssize size) { if (size < 0) { size = p_dict->n_buckets * DICT_GROWTH_FACTOR; } struct r_dict* p_new_dict = r_new_dict(size); KEEP(p_new_dict->shelter); r_ssize n = r_length(p_dict->buckets); r_obj* const * p_buckets = p_dict->p_buckets; for (r_ssize i = 0; i < n; ++i) { r_obj* bucket = p_buckets[i]; while (bucket != r_null) { r_obj* const * v_bucket = DICT_DEREF(bucket); r_obj* key = V_DICT_KEY(v_bucket); r_obj* value = V_DICT_VALUE(v_bucket); r_dict_put(p_new_dict, key, value); bucket = V_DICT_CDR(v_bucket); } } // Update all data in place except the shelter and the raw sexp // which must stay validly protected by the callers r_obj* old_shelter = p_dict->shelter; r_list_poke(old_shelter, 1, r_list_get(p_new_dict->shelter, 1)); r_memcpy(p_dict, p_new_dict, sizeof(*p_dict)); p_dict->shelter = old_shelter; FREE(1); } static size_t size_round_power_2(size_t size) { size_t out = 1; while (out < size) { out <<= 1; } return out; } static r_ssize dict_hash(const struct r_dict* p_dict, r_obj* key) { uint64_t hash = r_xxh3_64bits(&key, sizeof(r_obj*)); return hash % p_dict->n_buckets; } // Returns previous value of `key` if it existed or a C `NULL` r_obj* r_dict_poke(struct r_dict* p_dict, r_obj* key, r_obj* value) { r_ssize hash; r_obj* parent; r_obj* node = dict_find_node_info(p_dict, key, &hash, &parent); if (node != r_null) { r_obj* old = DICT_VALUE(node); DICT_POKE_VALUE(node, value); return old; } else { dict_push(p_dict, hash, parent, key, value); return NULL; } } // Returns `false` if `key` already exists in the dictionary, `true` // otherwise bool r_dict_put(struct r_dict* p_dict, r_obj* key, r_obj* value) { r_ssize hash; r_obj* parent; r_obj* node = dict_find_node_info(p_dict, key, &hash, &parent); if (node != r_null) { return false; } else { dict_push(p_dict, hash, parent, key, value); return true; } } static void dict_push(struct r_dict* p_dict, r_ssize hash, r_obj* parent, r_obj* key, r_obj* value) { r_obj* node = KEEP(new_dict_node(key, value)); if (parent == r_null) { // Empty bucket r_list_poke(p_dict->buckets, hash, node); } else { DICT_POKE_CDR(parent, node); } ++p_dict->n_entries; float load = (float) p_dict->n_entries / (float) p_dict->n_buckets; if (!p_dict->prevent_resize && load > DICT_LOAD_THRESHOLD) { r_dict_resize(p_dict, -1); } FREE(1); } // Returns `true` if key existed and was deleted. Returns `false` if // the key could not be deleted because it did not exist in the dict. bool r_dict_del(struct r_dict* p_dict, r_obj* key) { r_ssize hash; r_obj* parent; r_obj* node = dict_find_node_info(p_dict, key, &hash, &parent); if (node == r_null) { return false; } r_obj* node_cdr = DICT_CDR(node); if (parent == r_null) { r_list_poke(p_dict->buckets, hash, node_cdr); } else { DICT_POKE_CDR(parent, node_cdr); } return true; } bool r_dict_has(struct r_dict* p_dict, r_obj* key) { return dict_find_node(p_dict, key) != r_null; } r_obj* r_dict_get(struct r_dict* p_dict, r_obj* key) { r_obj* out = r_dict_get0(p_dict, key); if (!out) { r_abort("Can't find key in dictionary."); } return out; } /* The 0-suffixed variant returns a C `NULL` if the object doesn't exist. The regular variant throws an error in that case. */ r_obj* r_dict_get0(struct r_dict* p_dict, r_obj* key) { r_obj* node = dict_find_node(p_dict, key); if (node == r_null) { return NULL; } else { return DICT_VALUE(node); } } static r_obj* dict_find_node(struct r_dict* p_dict, r_obj* key) { r_ssize i = dict_hash(p_dict, key); r_obj* bucket = p_dict->p_buckets[i]; while (bucket != r_null) { r_obj* const * v_bucket = DICT_DEREF(bucket); if (V_DICT_KEY(v_bucket) == key) { return bucket; } bucket = V_DICT_CDR(v_bucket); } return r_null; } // Also returns hash and parent node if any static r_obj* dict_find_node_info(struct r_dict* p_dict, r_obj* key, r_ssize* hash, r_obj** parent) { r_ssize i = dict_hash(p_dict, key); *hash = i; r_obj* bucket = p_dict->p_buckets[i]; *parent = r_null; while (bucket != r_null) { r_obj* const * v_bucket = DICT_DEREF(bucket); if (V_DICT_KEY(v_bucket) == key) { return bucket; } *parent = bucket; bucket = V_DICT_CDR(v_bucket); } return r_null; } struct r_dict_iterator* r_new_dict_iterator(struct r_dict* p_dict) { r_obj* shelter = r_alloc_raw(sizeof(struct r_dict_iterator)); struct r_dict_iterator* p_it = r_raw_begin(shelter); p_it->shelter = shelter; p_it->key = r_null; p_it->value = r_null; p_it->i = 0; p_it->n = p_dict->n_buckets; p_it->v_buckets = p_dict->p_buckets; if (p_it->n == 0) { r_stop_internal("Empty dictionary."); } p_it->node = p_it->v_buckets[0]; return p_it; } bool r_dict_next(struct r_dict_iterator* p_it) { if (p_it->v_buckets == NULL) { return false; } r_obj* node = p_it->node; while (node == r_null) { r_ssize i = ++p_it->i; if (i >= p_it->n) { p_it->v_buckets = NULL; return false; } node = p_it->v_buckets[i]; p_it->node = node; } r_obj* const * v_node = DICT_DEREF(node); p_it->key = V_DICT_KEY(v_node); p_it->value = V_DICT_VALUE(v_node); p_it->node = V_DICT_CDR(v_node); return true; } static const char* v_dict_it_df_names_c_strings[] = { "key", "value" }; static const enum r_type v_dict_it_df_types[] = { R_TYPE_list, R_TYPE_list }; enum dict_it_df_locs { DICT_IT_DF_LOCS_key, DICT_IT_DF_LOCS_value }; #define DICT_IT_DF_SIZE R_ARR_SIZEOF(v_dict_it_df_types) r_obj* r_dict_as_df_list(struct r_dict* p_dict) { r_obj* nms = KEEP(r_chr_n(v_dict_it_df_names_c_strings, DICT_IT_DF_SIZE)); r_obj* out = KEEP(r_alloc_df_list(p_dict->n_entries, nms, v_dict_it_df_types, DICT_IT_DF_SIZE)); r_obj* key = r_list_get(out, DICT_IT_DF_LOCS_key); r_obj* value = r_list_get(out, DICT_IT_DF_LOCS_value); struct r_dict_iterator* p_it = r_new_dict_iterator(p_dict); KEEP(p_it->shelter); for (r_ssize i = 0; r_dict_next(p_it); ++i) { r_list_poke(key, i, p_it->key); r_list_poke(value, i, p_it->value); } FREE(3); return out; } r_obj* r_dict_as_list(struct r_dict* p_dict) { r_obj* out = KEEP(r_alloc_list(p_dict->n_entries)); struct r_dict_iterator* p_it = r_new_dict_iterator(p_dict); KEEP(p_it->shelter); for (r_ssize i = 0; r_dict_next(p_it); ++i) { r_list_poke(out, i, p_it->value); } FREE(2); return out; } vctrs/src/rlang/rlang.c0000644000176200001440000000606315157273702014572 0ustar liggesusers#include #include #include "arg.c" #include "attrib.c" #include "call.c" #include "cnd.c" #include "c-utils.c" #include "debug.c" #include "dict.c" #include "df.c" #include "dyn-array.c" #include "dyn-list-of.c" #include "env.c" #include "env-binding.c" #include "eval.c" #include "export.c" #include "fn.c" #include "formula.c" #include "globals.c" #include "node.c" #include "obj.c" #include "parse.c" #include "quo.c" #include "session.c" #include "stack.c" #include "sym.c" #include "vec.c" #include "vec-chr.c" #include "vec-lgl.c" #include "vendor.c" #ifdef RLANG_USE_PRIVATE_ACCESSORS #include "walk.c" #endif // Allows long vectors to be indexed with doubles r_ssize r_arg_as_ssize(r_obj* n, const char* arg) { switch (r_typeof(n)) { case R_TYPE_double: { if (r_length(n) != 1) { goto invalid; } double out = r_dbl_get(n, 0); if (out > R_SSIZE_MAX) { r_abort("`%s` is too large a number.", arg); } if (out != (int_least64_t) out) { r_abort("`%s` must be a whole number, not a decimal number.", arg); } return (r_ssize) floor(out); } case R_TYPE_integer: { if (r_length(n) != 1) { goto invalid; } return (r_ssize) r_int_get(n, 0); } invalid: default: r_abort("`%s` must be a scalar integer or double.", arg); } } static r_obj* shared_x_env; static r_obj* shared_xy_env; static r_obj* shared_xyz_env; // This *must* be called before making any calls to the functions // provided in the library. Register this function in your init file // and `.Call()` it from your `.onLoad()` hook. r_obj* r_init_library(r_obj* ns) { if (!R_IsNamespaceEnv(ns)) { Rf_errorcall(r_null, "Can't initialise rlang library.\n" "x `ns` must be a namespace environment."); } // Local precious lists are disabled by default because rchk // requires the base precious list and we don't want to // double-preserve. Still enable it on CI to get that part of the // code tested. _r_use_local_precious_list = getenv("RLIB_USE_LOCAL_PRECIOUS_LIST") || getenv("CI"); // Need to be first r_init_library_vendor(); // Needed for xxh used in `r_preserve()` r_init_library_globals_syms(); r_init_library_obj(ns); r_init_library_globals(ns); shared_x_env = r_parse_eval("new.env(hash = FALSE, parent = baseenv(), size = 1L)", r_envs.base); r_preserve(shared_x_env); shared_xy_env = r_parse_eval("new.env(hash = FALSE, parent = baseenv(), size = 1L)", r_envs.base); r_preserve(shared_xy_env); shared_xyz_env = r_parse_eval("new.env(hash = FALSE, parent = baseenv(), size = 1L)", r_envs.base); r_preserve(shared_xyz_env); r_init_library_sym(); r_init_library_eval(); r_init_library_env(); r_init_rlang_ns_env(); r_init_library_arg(); r_init_library_call(); r_init_library_cnd(); r_init_library_dyn_array(); r_init_library_fn(); r_init_library_quo(); r_init_library_session(); r_init_library_stack(); // Return a SEXP so the init function can be called from R return r_null; } bool _r_use_local_precious_list = false; vctrs/src/rlang/rlang-types.h0000644000176200001440000000770015157273666015751 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_RLANG_TYPES_H #define RLANG_RLANG_TYPES_H #define R_NO_REMAP #include // IWYU pragma: export #include // IWYU pragma: export #include // IWYU pragma: export #include // IWYU pragma: export // Use `r_visible` to mark your init function. Then users can compile // with `-fvisibility=hidden -DHAVE_VISIBILITY_ATTRIBUTE` to link to // your library (as opposed to dynamically loading it) without risking // symbol clashes. #define r_visible attribute_visible extern #ifdef __GNUC__ # define r_unused __attribute__ ((unused)) #else # define r_unused #endif #define r_no_return __attribute__ ((noreturn)) typedef struct SEXPREC r_obj; typedef Rcomplex r_complex; typedef R_xlen_t r_ssize; #define R_SSIZE_MAX R_XLEN_T_MAX #define R_SSIZE_MIN (-R_XLEN_T_MAX) #ifdef LONG_VECTOR_SUPPORT # define R_PRI_SSIZE "td" #else # define R_PRI_SSIZE "d" #endif enum r_type { R_TYPE_null = 0, R_TYPE_symbol = 1, R_TYPE_pairlist = 2, R_TYPE_closure = 3, R_TYPE_environment = 4, R_TYPE_promise = 5, R_TYPE_call = 6, R_TYPE_special = 7, R_TYPE_builtin = 8, R_TYPE_string = 9, R_TYPE_logical = 10, R_TYPE_integer = 13, R_TYPE_double = 14, R_TYPE_complex = 15, R_TYPE_character = 16, R_TYPE_dots = 17, R_TYPE_any = 18, R_TYPE_list = 19, R_TYPE_expression = 20, R_TYPE_bytecode = 21, R_TYPE_pointer = 22, R_TYPE_weakref = 23, R_TYPE_raw = 24, R_TYPE_s4 = 25, R_TYPE_new = 30, R_TYPE_free = 31, R_TYPE_function = 99 }; #define r_null R_NilValue struct r_pair { r_obj* x; r_obj* y; }; struct r_triple { r_obj* x; r_obj* y; r_obj* z; }; struct r_pair_ptr_ssize { void* ptr; r_ssize size; }; struct r_pair_callback { r_obj* (*fn)(void* data); void* data; }; struct r_lazy { r_obj* x; r_obj* env; }; #define KEEP PROTECT #define FREE UNPROTECT #define KEEP2(x, y) (KEEP(x), KEEP(y)) #define KEEP_N(x, n) (++(*n), KEEP(x)) #define r_keep_loc PROTECT_INDEX #define KEEP_AT REPROTECT #define KEEP_HERE PROTECT_WITH_INDEX #define KEEP_WHILE(X, EXPR) do { \ KEEP(X); \ EXPR; \ FREE(1); \ } while (0) #define RLANG_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)])) // Polyfills for R API #if R_VERSION < R_Version(4, 5, 0) static inline int ANY_ATTRIB(SEXP x) { return ATTRIB(x) != R_NilValue; } static inline void CLEAR_ATTRIB(SEXP x) { SET_ATTRIB(x, R_NilValue); SET_OBJECT(x, 0); UNSET_S4_OBJECT(x); } #endif #if 1 || R_VERSION < R_Version(4, 6, 0) static inline bool rlang_promise_is_forced(r_obj* x) { return PRVALUE(x) != R_UnboundValue; } // Unwrap nested promises to the innermost one. // Sets `*forced` to TRUE if the innermost promise is forced. // Uses Floyd's cycle detection to guard against promise loops. static inline r_obj* rlang_promise_unwrap(r_obj* x, bool *forced) { r_obj* slow = x; bool advance_slow = false; while (TRUE) { r_obj* expr = PREXPR(x); if (TYPEOF(expr) != PROMSXP) { *forced = rlang_promise_is_forced(x); return x; } x = expr; if (x == slow) { Rf_error("Cycle detected in promise chain"); } if (advance_slow) { slow = PREXPR(slow); } advance_slow = !advance_slow; } } #endif #if R_VERSION < R_Version(4, 6, 0) static inline SEXP R_mapAttrib(SEXP x, SEXP (*FUN)(SEXP, SEXP, void *), void *data) { PROTECT_INDEX api; SEXP a = ATTRIB(x); SEXP val = NULL; PROTECT_WITH_INDEX(a, &api); while (a != R_NilValue) { SEXP tag = PROTECT(TAG(a)); SEXP attr = PROTECT(CAR(a)); val = FUN(tag, attr, data); UNPROTECT(2); if (val != NULL) break; REPROTECT(a = CDR(a), api); } UNPROTECT(1); return val; } #endif #endif vctrs/src/rlang/quo.h0000644000176200001440000000050315157273666014302 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_QUO_H #define RLANG_QUO_H #include "rlang-types.h" extern r_obj* (*r_quo_get_expr)(r_obj* quo); extern r_obj* (*r_quo_set_expr)(r_obj* quo, r_obj* expr); extern r_obj* (*r_quo_get_env)(r_obj* quo); extern r_obj* (*r_quo_set_env)(r_obj* quo, r_obj* env); #endif vctrs/src/rlang/dict.h0000644000176200001440000000246315157273666014430 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_DICT_H #define RLANG_DICT_H #include "rlang-types.h" /** * This is a simple hash table of `r_obj*`. It is structured like R * environments and uses xxhash for hashing. */ struct r_dict { r_obj* shelter; /* private: */ r_obj* buckets; r_obj* const * p_buckets; r_ssize n_buckets; r_ssize n_entries; // For testing collisions bool prevent_resize; }; struct r_dict* r_new_dict(r_ssize size); r_obj* r_dict_poke(struct r_dict* p_dict, r_obj* key, r_obj* value); bool r_dict_put(struct r_dict* p_dict, r_obj* key, r_obj* value); bool r_dict_del(struct r_dict* p_dict, r_obj* key); bool r_dict_has(struct r_dict* p_dict, r_obj* key); r_obj* r_dict_get(struct r_dict* p_dict, r_obj* key); r_obj* r_dict_get0(struct r_dict* p_dict, r_obj* key); // Pass a negative size to resize by the default growth factor void r_dict_resize(struct r_dict* p_dict, r_ssize size); r_obj* r_dict_as_df_list(struct r_dict* p_dict); r_obj* r_dict_as_list(struct r_dict* p_dict); struct r_dict_iterator { r_obj* shelter; r_obj* key; r_obj* value; /* private: */ r_ssize i; r_ssize n; r_obj* const * v_buckets; r_obj* node; }; struct r_dict_iterator* r_new_dict_iterator(struct r_dict* p_dict); bool r_dict_next(struct r_dict_iterator* p_it); #endif vctrs/src/rlang/decl/0000755000176200001440000000000015157322654014226 5ustar liggesusersvctrs/src/rlang/decl/walk-decl.h0000644000176200001440000000162615157273666016257 0ustar liggesusersstatic inline enum sexp_iterator_type sexp_iterator_type(enum r_type type, r_obj* x); static inline bool sexp_has_attrib(enum r_type type, r_obj* x); static inline r_obj* sexp_node_car(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel); static inline r_obj* sexp_node_cdr(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel); static inline r_obj* sexp_node_tag(enum r_type type, r_obj* x, enum r_sexp_it_relation* p_rel); static inline void init_incoming_stack_info(struct sexp_stack_info* p_info, enum sexp_iterator_type it_type, bool has_attrib); static bool sexp_next_incoming(struct r_sexp_iterator* p_it, struct sexp_stack_info* p_info); vctrs/src/rlang/decl/obj-decl.h0000644000176200001440000000023315157273666016064 0ustar liggesusersstatic r_obj* new_precious_stack(r_obj* x); static int push_precious(r_obj* stack); static int pop_precious(r_obj* stack); static r_obj* as_label_call; vctrs/src/rlang/decl/df-decl.h0000644000176200001440000000016215157273666015704 0ustar liggesusersstatic void init_compact_rownames(r_obj* x, r_ssize n_rows); static r_obj* new_compact_rownames(r_ssize n_rows); vctrs/src/rlang/decl/dyn-list-of-decl.h0000644000176200001440000000024415157273666017461 0ustar liggesusersstatic bool reserve_push_back(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt); static void reserve_move(struct r_dyn_list_of* p_lof, r_ssize i, void* p_elt); vctrs/src/rlang/decl/dict-decl.h0000644000176200001440000000062615157273666016243 0ustar liggesusersstatic r_obj* dict_find_node_info(struct r_dict* dict, r_obj* key, r_ssize* hash, r_obj** parent); static r_obj* dict_find_node(struct r_dict* dict, r_obj* key); static void dict_push(struct r_dict* p_dict, r_ssize hash, r_obj* parent, r_obj* key, r_obj* value); vctrs/src/rlang/decl/env-decl.h0000644000176200001440000000101015157273666016074 0ustar liggesusersr_obj* eval_with_x(r_obj* call, r_obj* x); r_obj* eval_with_xy(r_obj* call, r_obj* x, r_obj* y); r_obj* eval_with_xyz(r_obj* call, r_obj* x, r_obj* y, r_obj* z); #if R_VERSION < R_Version(4, 1, 0) static r_obj* new_env_call; static r_obj* new_env__parent_node; static r_obj* new_env__size_node; #endif static r_obj* exists_call; static r_obj* remove_call; static r_obj* env2list_call; static r_obj* list2env_call; static r_obj* missing_prim; static void env_coalesce_plain(r_obj* env, r_obj* from, r_obj* nms); vctrs/src/rlang/decl/cnd-decl.h0000644000176200001440000000003715157273666016060 0ustar liggesusersstatic r_obj* cnd_signal_call; vctrs/src/rlang/decl/env-binding-decl.h0000644000176200001440000000003415157273666017511 0ustar liggesusersextern r_obj* rlang_ns_env; vctrs/src/rlang/decl/stack-decl.h0000644000176200001440000000016415157273666016422 0ustar liggesusers// From env.c r_obj* rlang_ns_get(const char* name); static r_obj* peek_frame_call; static r_obj* caller_env_call; vctrs/src/rlang/node.c0000644000176200001440000000255515157273666014427 0ustar liggesusers#include "rlang.h" r_obj* r_new_pairlist(const struct r_pair* args, int n, r_obj** tail) { r_obj* shelter = KEEP(r_new_node(R_NilValue, R_NilValue)); r_obj* node = shelter; for (int i = 0; i < n; ++i) { struct r_pair arg = args[i]; r_obj* tag = arg.x; r_obj* car = arg.y; r_obj* cdr = r_new_node(car, r_null); r_node_poke_tag(cdr, tag); r_node_poke_cdr(node, cdr); node = cdr; } if (n && tail) { *tail = node; } FREE(1); return r_node_cdr(shelter); } // Shallow copy of a node tree. Other objects are not cloned. r_obj* r_node_tree_clone(r_obj* x) { enum r_type type = r_typeof(x); if (type != R_TYPE_pairlist && type != R_TYPE_call) { return x; } x = KEEP(r_clone(x)); r_obj* rest = x; while (rest != r_null) { r_obj* head = r_node_car(rest); enum r_type head_type = r_typeof(head); if (head_type == R_TYPE_pairlist || head_type == R_TYPE_call) { r_node_poke_car(rest, r_node_tree_clone(head)); } rest = r_node_cdr(rest); } FREE(1); return x; } r_obj* r_pairlist_rev(r_obj* node) { if (node == r_null) { return node; } r_obj* prev = r_null; r_obj* tail = node; r_obj* next; while (tail != r_null) { next = r_node_cdr(tail); r_node_poke_cdr(tail, prev); prev = tail; tail = next; } return prev; } vctrs/src/rlang/cnd.h0000644000176200001440000000440015157273666014242 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_CND_H #define RLANG_CND_H #include "rlang-types.h" #include "obj.h" void r_inform(const char* fmt, ...); void r_warn(const char* fmt, ...); void r_interrupt(void); void r_no_return r_abort(const char* fmt, ...); void r_no_return r_abort_n(const struct r_pair* args, int n); void r_no_return r_abort_call(r_obj* call, const char* fmt, ...); // Formats input as an argument, using cli if available. Returns a // vmax-protected string. extern const char* (*r_format_error_arg)(r_obj* arg); const char* r_format_lazy_error_arg(struct r_lazy arg); // Return vmax-protected strings extern const char* (*r_obj_type_friendly_full)(r_obj* x, bool value, bool length); static inline const char* r_obj_type_friendly(r_obj* x) { return r_obj_type_friendly_full(x, true, false); } extern r_no_return void (*r_stop_internal)(const char* file, int line, r_obj* call, const char* fmt, ...); r_obj* r_peek_frame(void); #define r_stop_internal(...) \ (r_stop_internal)(__FILE__, __LINE__, r_peek_frame(), \ __VA_ARGS__) #define r_stop_unreachable() \ (r_stop_internal)(__FILE__, __LINE__, r_peek_frame(), \ "Reached the unreachable") #define r_stop_unimplemented_type(TYPE) \ (r_stop_internal)(__FILE__, __LINE__, r_peek_frame(), \ "Unimplemented type `%s`.", Rf_type2char(TYPE)) #define r_stop_unexpected_type(TYPE) \ (r_stop_internal)(__FILE__, __LINE__, r_peek_frame(), \ "Unexpected type `%s`.", Rf_type2char(TYPE)) static inline bool r_is_condition(r_obj* x) { return r_typeof(x) == R_TYPE_list && r_inherits(x, "condition"); } void r_cnd_signal(r_obj* cnd); void r_cnd_inform(r_obj* cnd, bool mufflable); void r_cnd_warn(r_obj* cnd, bool mufflable); void r_cnd_abort(r_obj* cnd, bool mufflable); enum r_cnd_type { R_CND_TYPE_condition = 0, R_CND_TYPE_message = 1, R_CND_TYPE_warning = 2, R_CND_TYPE_error = 3, R_CND_TYPE_interrupt = 4 }; enum r_cnd_type r_cnd_type(r_obj* cnd); #endif vctrs/src/rlang/obj.c0000644000176200001440000000576015157273666014255 0ustar liggesusers#include "rlang.h" #define PRECIOUS_DICT_INIT_SIZE 256 static struct r_dict* p_precious_dict = NULL; #include "decl/obj-decl.h" r_obj* r_vec_clone(r_obj* x) { r_obj* out = KEEP(r_clone(x)); r_obj* names = r_names(x); if (names != r_null) { r_attrib_poke_names(out, r_clone(names)); } FREE(1); return out; } r_obj* r_vec_clone_shared(r_obj* x) { if (r_is_shared(x)) { return r_vec_clone(x); } r_obj* names = r_names(x); if (names != r_null && r_is_shared(names)) { r_attrib_poke_names(x, r_clone(names)); return x; } return x; } void (_r_preserve)(r_obj* x) { if (!_r_use_local_precious_list) { return; } r_obj* stack = r_dict_get0(p_precious_dict, x); if (!stack) { stack = KEEP(new_precious_stack(x)); r_dict_put(p_precious_dict, x, stack); FREE(1); } push_precious(stack); } void (_r_unpreserve)(r_obj* x) { if (!_r_use_local_precious_list) { return; } r_obj* stack = r_dict_get0(p_precious_dict, x); if (!stack) { r_abort("Can't unpreserve `x` because it was not being preserved."); } int n = pop_precious(stack); if (n < 0) { r_stop_internal("`n` unexpectedly < 0."); } if (n == 0) { r_dict_del(p_precious_dict, x); } } static r_obj* new_precious_stack(r_obj* x) { r_obj* stack = KEEP(r_alloc_list(2)); // Store (0) protection count and (1) element to protect r_list_poke(stack, 0, r_int(0)); r_list_poke(stack, 1, x); FREE(1); return stack; } static int push_precious(r_obj* stack) { r_obj* n = r_list_get(stack, 0); int* p_n = r_int_begin(n); return ++(*p_n); } static int pop_precious(r_obj* stack) { r_obj* n = r_list_get(stack, 0); int* p_n = r_int_begin(n); return --(*p_n); } // For unit tests struct r_dict* rlang__precious_dict(void) { return p_precious_dict; } enum r_type r_chr_as_r_type(r_obj* type) { if (!r_is_string(type)) { r_abort("`type` must be a character string."); } return r_c_str_as_r_type(r_chr_get_c_string(type, 0)); } const char* obj_address_formatter = "%p"; r_obj* r_obj_address(r_obj* x) { static char buf[1000]; snprintf(buf, 1000, obj_address_formatter, (void*) x); return Rf_mkChar(buf); } r_obj* (*r_obj_encode_utf8)(r_obj* x) = NULL; r_obj* r_as_label(r_obj* x) { return r_eval_with_x(as_label_call, x, r_ns_env("rlang")); } void r_init_library_obj(r_obj* ns) { p_precious_dict = r_new_dict(PRECIOUS_DICT_INIT_SIZE); KEEP(p_precious_dict->shelter); r_env_bind(ns, r_sym(".__rlang_lib_precious_dict__."), p_precious_dict->shelter); FREE(1); // The Microsoft C library doesn't implement the hexadecimal // formatter correctly const char* null_addr = r_str_c_string(r_obj_address(r_null)); if (null_addr[0] != '0' || null_addr[1] != 'x') { obj_address_formatter = "0x%p"; } r_obj_encode_utf8 = (r_obj* (*)(r_obj*)) r_peek_c_callable("rlang", "rlang_obj_encode_utf8"); as_label_call = r_parse("as_label(x)"); r_preserve_global(as_label_call); } static r_obj* as_label_call = NULL; vctrs/src/rlang/vendor.c0000644000176200001440000000032115157273666014764 0ustar liggesusers#include "rlang.h" uint64_t (*r_xxh3_64bits)(const void*, size_t); void r_init_library_vendor(void) { r_xxh3_64bits = (uint64_t (*)(const void*, size_t)) r_peek_c_callable("rlang", "rlang_xxh3_64bits"); } vctrs/src/rlang/attrib.c0000644000176200001440000000223715157273666014764 0ustar liggesusers#include "rlang.h" static r_obj* r_attrib_get_cb(r_obj* tag, r_obj* val, void* data) { if (tag == *(r_obj**) data) { return val; } return NULL; } r_obj* r_attrib_get(r_obj* x, r_obj* tag) { r_obj* out = r_attrib_map(x, &r_attrib_get_cb, &tag); return out ? out : r_null; } // Collect attributes into a pairlist using `R_mapAttrib` static r_obj* r_attrib_collect_cb(r_obj* tag, r_obj* val, void* data) { r_obj** p_tail = (r_obj**) data; r_obj* node = r_new_node(val, r_null); r_node_poke_tag(node, tag); r_node_poke_cdr(*p_tail, node); *p_tail = node; return NULL; } r_obj* r_attrib_collect(r_obj* x) { r_obj* sentinel = KEEP(r_new_node(r_null, r_null)); r_obj* tail = sentinel; r_attrib_map(x, &r_attrib_collect_cb, &tail); FREE(1); return r_node_cdr(sentinel); } bool r_is_named(r_obj* x) { r_obj* nms = r_names(x); if (r_typeof(nms) != R_TYPE_character) { return false; } if (r_chr_has(nms, "")) { return false; } return true; } void r_attrib_poke_classes(r_obj* x, const char** classes, r_ssize n) { r_obj* classes_chr = KEEP(r_chr_n(classes, n)); r_attrib_poke(x, r_syms.class_, classes_chr); FREE(1); } vctrs/src/rlang/parse.h0000644000176200001440000000031715157273666014613 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_PARSE_H #define RLANG_PARSE_H #include "rlang-types.h" r_obj* r_parse(const char* str); r_obj* r_parse_eval(const char* str, r_obj* env); #endif vctrs/src/rlang/vec.h0000644000176200001440000003046115157273666014261 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_VECTOR_H #define RLANG_VECTOR_H #include #include "rlang-types.h" #include "c-utils.h" #include "cnd.h" #include "globals.h" #include "obj.h" static inline int* r_lgl_begin(r_obj* x) { return LOGICAL(x); } static inline int* r_int_begin(r_obj* x) { return INTEGER(x); } static inline double* r_dbl_begin(r_obj* x) { return REAL(x); } static inline r_complex* r_cpl_begin(r_obj* x) { return COMPLEX(x); } static inline void* r_raw_begin(r_obj* x) { return RAW(x); } static inline const int* r_int_cbegin(r_obj* x) { return (const int*) INTEGER(x); } static inline const int* r_lgl_cbegin(r_obj* x) { return (const int*) LOGICAL(x); } static inline const double* r_dbl_cbegin(r_obj* x) { return (const double*) REAL(x); } static inline const r_complex* r_cpl_cbegin(r_obj* x) { return (const r_complex*) COMPLEX(x); } static inline const void* r_raw_cbegin(r_obj* x) { return (const void*) RAW(x); } static inline r_obj* const * r_chr_cbegin(r_obj* x) { return STRING_PTR_RO(x); } static inline r_obj* const * r_list_cbegin(r_obj* x) { #if (R_VERSION >= R_Version(4, 5, 0)) return VECTOR_PTR_RO(x); #else return ((r_obj* const *) DATAPTR_RO(x)); #endif } static inline void* r_vec_begin0(enum r_type type, r_obj* x) { switch (type) { case R_TYPE_logical: return r_lgl_begin(x); case R_TYPE_integer: return r_int_begin(x); case R_TYPE_double: return r_dbl_begin(x); case R_TYPE_complex: return r_cpl_begin(x); case R_TYPE_raw: return r_raw_begin(x); default: r_stop_unimplemented_type(type); } } static inline void* r_vec_begin(r_obj* x) { return r_vec_begin0(r_typeof(x), x); } static inline const void* r_vec_cbegin0(enum r_type type, r_obj* x) { switch (type) { case R_TYPE_logical: return r_lgl_cbegin(x); case R_TYPE_integer: return r_int_cbegin(x); case R_TYPE_double: return r_dbl_cbegin(x); case R_TYPE_complex: return r_cpl_cbegin(x); case R_TYPE_raw: return r_raw_cbegin(x); case R_TYPE_character: return r_chr_cbegin(x); case R_TYPE_list: return r_list_cbegin(x); default: r_stop_unimplemented_type(type); } } static inline const void* r_vec_cbegin(r_obj* x) { return r_vec_cbegin0(r_typeof(x), x); } static inline int r_vec_elt_sizeof0(enum r_type type) { switch (type) { case R_TYPE_logical: return sizeof(int); case R_TYPE_integer: return sizeof(int); case R_TYPE_double: return sizeof(double); case R_TYPE_complex: return sizeof(r_complex); case R_TYPE_raw: return sizeof(char); case R_TYPE_character: return sizeof(r_obj*); case R_TYPE_list: return sizeof(r_obj*); default: r_stop_unimplemented_type(type); } } static inline int r_vec_elt_sizeof(r_obj* x) { return r_vec_elt_sizeof0(r_typeof(x)); } static inline int r_lgl_get(r_obj* x, r_ssize i) { return LOGICAL(x)[i]; } static inline int r_int_get(r_obj* x, r_ssize i) { return INTEGER(x)[i]; } static inline double r_dbl_get(r_obj* x, r_ssize i) { return REAL(x)[i]; } static inline r_complex r_cpl_get(r_obj* x, r_ssize i) { return COMPLEX(x)[i]; } static inline char r_raw_get(r_obj* x, r_ssize i) { return RAW(x)[i]; } static inline r_obj* r_chr_get(r_obj* x, r_ssize i) { return STRING_ELT(x, i); } static inline const char* r_chr_get_c_string(r_obj* x, r_ssize i) { return CHAR(r_chr_get(x, i)); } static inline r_obj* r_list_get(r_obj* x, r_ssize i) { return VECTOR_ELT(x, i); } static inline void r_lgl_poke(r_obj* x, r_ssize i, int y) { LOGICAL(x)[i] = y; } static inline void r_int_poke(r_obj* x, r_ssize i, int y) { INTEGER(x)[i] = y; } static inline void r_dbl_poke(r_obj* x, r_ssize i, double y) { REAL(x)[i] = y; } static inline void r_cpl_poke(r_obj* x, r_ssize i, r_complex y) { COMPLEX(x)[i] = y; } static inline void r_raw_poke(r_obj* x, r_ssize i, char y) { RAW(x)[i] = y; } static inline void r_chr_poke(r_obj* x, r_ssize i, r_obj* y) { SET_STRING_ELT(x, i, y); } static inline void r_list_poke(r_obj* x, r_ssize i, r_obj* y) { SET_VECTOR_ELT(x, i, y); } #define r_chr_poke(X, I, Y) SET_STRING_ELT(X, I, Y) #define r_list_poke(X, I, Y) SET_VECTOR_ELT(X, I, Y) static inline r_obj* r_alloc_vector(enum r_type type, r_ssize n) { #if R_VERSION >= R_Version(4, 6, 0) return R_allocResizableVector(type, n); #else return Rf_allocVector(type, n); #endif } static inline r_obj* r_alloc_logical(r_ssize n) { #if R_VERSION >= R_Version(4, 6, 0) return R_allocResizableVector(R_TYPE_logical, n); #else return Rf_allocVector(R_TYPE_logical, n); #endif } static inline r_obj* r_alloc_integer(r_ssize n) { #if R_VERSION >= R_Version(4, 6, 0) return R_allocResizableVector(R_TYPE_integer, n); #else return Rf_allocVector(R_TYPE_integer, n); #endif } static inline r_obj* r_alloc_double(r_ssize n) { #if R_VERSION >= R_Version(4, 6, 0) return R_allocResizableVector(R_TYPE_double, n); #else return Rf_allocVector(R_TYPE_double, n); #endif } static inline r_obj* r_alloc_complex(r_ssize n) { #if R_VERSION >= R_Version(4, 6, 0) return R_allocResizableVector(R_TYPE_complex, n); #else return Rf_allocVector(R_TYPE_complex, n); #endif } static inline r_obj* r_alloc_raw(r_ssize n) { #if R_VERSION >= R_Version(4, 6, 0) return R_allocResizableVector(R_TYPE_raw, n); #else return Rf_allocVector(R_TYPE_raw, n); #endif } static inline r_obj* r_alloc_character(r_ssize n) { #if R_VERSION >= R_Version(4, 6, 0) return R_allocResizableVector(R_TYPE_character, n); #else return Rf_allocVector(R_TYPE_character, n); #endif } static inline r_obj* r_alloc_list(r_ssize n) { #if R_VERSION >= R_Version(4, 6, 0) return R_allocResizableVector(R_TYPE_list, n); #else return Rf_allocVector(R_TYPE_list, n); #endif } static inline r_obj* r_alloc_raw0(r_ssize n) { r_obj* out = r_alloc_raw(n); unsigned char* p_out = (unsigned char*) r_raw_begin(out); r_memset(p_out, 0, n); return out; } static inline r_obj* r_lgl(bool x) { return Rf_ScalarLogical(x); } static inline r_obj* r_int(int x) { return Rf_ScalarInteger(x); } static inline r_obj* r_dbl(double x) { return Rf_ScalarReal(x); } static inline r_obj* r_cpl(r_complex x) { return Rf_ScalarComplex(x); } static inline r_obj* r_raw(char x) { return Rf_ScalarRaw(x); } static inline r_obj* r_str(const char* c_string) { return Rf_mkCharCE(c_string, CE_UTF8); } static inline r_obj* r_chr(const char* c_string) { r_obj* out = KEEP(r_alloc_character(1)); r_chr_poke(out, 0, r_str(c_string)); FREE(1); return out; } static inline r_obj* r_list(r_obj* x) { r_obj* out = r_alloc_list(1); r_list_poke(out, 0, x); return out; } r_obj* r_chr_n(const char* const * strings, r_ssize n); static inline r_obj* r_len(r_ssize x) { if (x > INT_MAX) { return r_dbl(x); } else { return r_int(x); } } // FIXME: Redundant with `r_lgl()` static inline r_obj* r_shared_lgl(bool x) { if (x) { return r_true; } else { return r_false; } } static inline bool _r_has_correct_length(r_obj* x, r_ssize n) { return n < 0 || r_length(x) == n; } extern bool _r_is_finite(r_obj* x); static inline bool _r_is_double(r_obj* x, r_ssize n, int finite) { if (r_typeof(x) != R_TYPE_double || !_r_has_correct_length(x, n)) { return false; } if (finite >= 0 && (bool) finite != _r_is_finite(x)) { return false; } return true; } static inline bool _r_is_complex(r_obj* x, r_ssize n, int finite) { if (r_typeof(x) != R_TYPE_complex || !_r_has_correct_length(x, n)) { return false; } if (finite >= 0 && (bool) finite != _r_is_finite(x)) { return false; } return true; } static inline bool r_is_bool(r_obj* x) { return r_typeof(x) == R_TYPE_logical && r_length(x) == 1 && r_lgl_get(x, 0) != r_globals.na_lgl; } static inline bool r_is_int(r_obj* x) { return r_typeof(x) == R_TYPE_integer && r_length(x) == 1 && r_int_get(x, 0) != r_globals.na_int; } static inline bool r_is_true(r_obj* x) { return r_is_bool(x) && r_lgl_get(x, 0); } static inline bool r_is_false(r_obj* x) { return r_is_bool(x) && !r_lgl_get(x, 0); } static inline bool r_is_string(r_obj* x) { return r_typeof(x) == R_TYPE_character && r_length(x) == 1 && r_chr_get(x, 0) != R_NaString; } static inline bool r_arg_as_bool(r_obj* x, const char* arg) { if (!r_is_bool(x)) { r_abort("`%s` must be `TRUE` or `FALSE`.", arg); } return r_lgl_get(x, 0); } static inline bool r_as_bool(r_obj* x) { return r_arg_as_bool(x, "x"); } static inline int r_arg_as_int(r_obj* x, const char* arg) { if (!r_is_int(x)) { r_abort("`%s` must be a single integer value.", arg); } return r_int_get(x, 0); } static inline int r_as_int(r_obj* x) { return r_arg_as_int(x, "x"); } static inline double r_arg_as_double(r_obj* x, const char* arg) { // TODO: Coercion of int and lgl values if (!_r_is_double(x, 1, -1)) { r_abort("`%s` must be a single double value.", arg); } return r_dbl_get(x, 0); } static inline double r_as_double(r_obj* x) { return r_arg_as_double(x, "x"); } static inline r_complex r_arg_as_complex(r_obj* x, const char* arg) { if (!_r_is_complex(x, 1, 1)) { r_abort("`%s` must be a single complex value.", arg); } return r_cpl_get(x, 0); } static inline r_complex r_as_complex(r_obj* x) { return r_arg_as_complex(x, "x"); } static inline char r_arg_as_char(r_obj* x, const char* arg) { if (r_typeof(x) != R_TYPE_raw && r_length(x) != 1) { r_abort("`%s` must be a single raw value.", arg); } return r_raw_get(x, 0); } static inline char r_as_char(r_obj* x) { return r_arg_as_char(x, "x"); } r_obj* r_lgl_resize(r_obj* x, r_ssize new_size); r_obj* r_int_resize(r_obj* x, r_ssize new_size); r_obj* r_dbl_resize(r_obj* x, r_ssize new_size); r_obj* r_cpl_resize(r_obj* x, r_ssize new_size); r_obj* r_raw_resize(r_obj* x, r_ssize new_size); r_obj* r_chr_resize(r_obj* x, r_ssize new_size); r_obj* r_list_resize(r_obj* x, r_ssize new_size); static inline r_obj* r_vec_resize0(enum r_type type, r_obj* x, r_ssize new_size) { switch (type) { case R_TYPE_logical: return r_lgl_resize(x, new_size); case R_TYPE_integer: return r_int_resize(x, new_size); case R_TYPE_double: return r_dbl_resize(x, new_size); case R_TYPE_complex: return r_cpl_resize(x, new_size); case R_TYPE_raw: return r_raw_resize(x, new_size); case R_TYPE_character: return r_chr_resize(x, new_size); case R_TYPE_list: return r_list_resize(x, new_size); default: r_stop_unimplemented_type(type); } } static inline r_obj* r_vec_resize(r_obj* x, r_ssize new_size) { return r_vec_resize0(r_typeof(x), x, new_size); } static inline r_obj* r_vec_n(enum r_type type, void* v_src, r_ssize n) { switch (type) { case R_TYPE_logical: case R_TYPE_integer: case R_TYPE_double: case R_TYPE_complex: case R_TYPE_raw: { r_obj* out = r_alloc_vector(type, n); r_memcpy(r_vec_begin(out), v_src, n * r_vec_elt_sizeof0(type)); return out; } case R_TYPE_character: case R_TYPE_list: r_abort("TODO: barrier types in `r_vec_n()`"); default: r_stop_unimplemented_type(type); } } static inline r_obj* r_lgl_n(int* v_src, r_ssize n) { return r_vec_n(R_TYPE_logical, v_src, n); } static inline r_obj* r_int_n(int* v_src, r_ssize n) { return r_vec_n(R_TYPE_integer, v_src, n); } static inline r_obj* r_dbl_n(int* v_src, r_ssize n) { return r_vec_n(R_TYPE_double, v_src, n); } static inline r_obj* r_cpl_n(int* v_src, r_ssize n) { return r_vec_n(R_TYPE_complex, v_src, n); } static inline r_obj* r_raw_n(int* v_src, r_ssize n) { return r_vec_n(R_TYPE_raw, v_src, n); } static inline r_obj* r_copy_in_raw(const void* src, size_t size) { r_obj* out = r_alloc_raw(size); r_memcpy(r_raw_begin(out), src, size); return out; } static inline void r_int_fill_iota0(int* p_x, int start, r_ssize n) { for (r_ssize i = 0; i < n; ++i) { p_x[i] = start++; } } static inline void r_int_fill_iota(r_obj* x) { r_int_fill_iota0(r_int_begin(x), 0, r_length(x)); } r_obj* r_list_compact(r_obj* x); r_obj* r_list_of_as_ptr_ssize(r_obj* xs, enum r_type type, struct r_pair_ptr_ssize** p_v_out); // From cpp/vec.cpp int* r_int_unique0(int* v_data, r_ssize size); bool r_list_all_of0(r_obj* const * v_first, r_ssize size, bool (*predicate)(r_obj* x)); static inline int* r_int_unique(r_obj* x) { return r_int_unique0(r_int_begin(x), r_length(x)); } static inline bool r_list_all_of(r_obj* x, bool (*predicate)(r_obj* x)) { return r_list_all_of0(r_list_cbegin(x), r_length(x), predicate); } #endif vctrs/src/rlang/fn.h0000644000176200001440000000246415157273666014111 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_FN_H #define RLANG_FN_H #include "rlang-types.h" #include "obj.h" static inline r_obj* r_fn_formals(r_obj* fn) { #if R_VERSION >= R_Version(4, 5, 0) return R_ClosureFormals(fn); #else return FORMALS(fn); #endif } // Identical to `R_BytecodeExpr(R_ClosureBody(fn))`, which we always want // since it matches the R level `body()` static inline r_obj* r_fn_body(r_obj* fn) { return R_ClosureExpr(fn); } static inline r_obj* r_fn_env(r_obj* fn) { #if R_VERSION >= R_Version(4, 5, 0) return R_ClosureEnv(fn); #else return CLOENV(fn); #endif } static inline r_obj* r_new_function(r_obj* formals, r_obj* body, r_obj* env) { #if R_VERSION >= R_Version(4, 5, 0) return R_mkClosure(formals, body, env); #else SEXP fn = Rf_allocSExp(R_TYPE_closure); SET_FORMALS(fn, formals); SET_BODY(fn, body); SET_CLOENV(fn, env); return fn; #endif } r_obj* r_as_function(r_obj* x, const char* arg); static inline bool r_is_function(r_obj* x) { switch (r_typeof(x)) { case R_TYPE_closure: case R_TYPE_builtin: case R_TYPE_special: return true; default: return false; } } static inline bool r_is_primitive(r_obj* x) { switch (r_typeof(x)) { case R_TYPE_builtin: case R_TYPE_special: return true; default: return false; } } #endif vctrs/src/rlang/debug.h0000644000176200001440000000040515157273666014565 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_DEBUG_H #define RLANG_DEBUG_H #include "rlang-types.h" #define r_printf Rprintf void r_sexp_inspect(r_obj* x); void r_browse(r_obj* x); void r_browse_at(r_obj* env); void r_dbg_str(r_obj* x); #endif vctrs/src/rlang/eval.h0000644000176200001440000001146715157273666014440 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_EVAL_H #define RLANG_EVAL_H #include "rlang-types.h" #include "c-utils.h" #include "call.h" static inline r_obj* r_eval(r_obj* expr, r_obj* env) { return Rf_eval(expr, env); } r_obj* r_eval_with_x(r_obj* call, r_obj* x, r_obj* parent); r_obj* r_eval_with_xy(r_obj* call, r_obj* x, r_obj* y, r_obj* parent); r_obj* r_eval_with_xyz(r_obj* call, r_obj* x, r_obj* y, r_obj* z, r_obj* parent); r_obj* r_eval_with_wxyz(r_obj* call, r_obj* w, r_obj* x, r_obj* y, r_obj* z, r_obj* parent); r_obj* r_exec_mask_n(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* parent); r_obj* r_exec_n(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* env); r_obj* r_exec_mask_n_call_poke(r_obj* fn_sym, r_obj* fn, const struct r_pair* args, int n, r_obj* env); static inline r_obj* r_exec_mask1(r_obj* fn_sym, r_obj* fn, r_obj* x_sym, r_obj* x, r_obj* env) { struct r_pair args[] = { { x_sym, x } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask2(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask3(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 }, { x3_sym, x3 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask4(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 }, { x3_sym, x3 }, { x4_sym, x4 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask5(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 }, { x3_sym, x3 }, { x4_sym, x4 }, { x5_sym, x5 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask6(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* x6_sym, r_obj* x6, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 }, { x3_sym, x3 }, { x4_sym, x4 }, { x5_sym, x5 }, { x6_sym, x6 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_exec_mask7(r_obj* fn_sym, r_obj* fn, r_obj* x1_sym, r_obj* x1, r_obj* x2_sym, r_obj* x2, r_obj* x3_sym, r_obj* x3, r_obj* x4_sym, r_obj* x4, r_obj* x5_sym, r_obj* x5, r_obj* x6_sym, r_obj* x6, r_obj* x7_sym, r_obj* x7, r_obj* env) { struct r_pair args[] = { { x1_sym, x1 }, { x2_sym, x2 }, { x3_sym, x3 }, { x4_sym, x4 }, { x5_sym, x5 }, { x6_sym, x6 }, { x7_sym, x7 } }; return r_exec_mask_n(fn_sym, fn, args, R_ARR_SIZEOF(args), env); } static inline r_obj* r_lazy_eval(struct r_lazy lazy) { if (!lazy.env) { // Unitialised lazy variable return r_null; } else if (lazy.env == r_null) { // Forced lazy variable return lazy.x; } else { return r_eval(lazy.x, lazy.env); } } extern struct r_lazy r_lazy_null; extern struct r_lazy r_lazy_missing_arg; static inline r_obj* r_lazy_eval_protect(struct r_lazy lazy) { r_obj* out = KEEP(r_lazy_eval(lazy)); out = r_expr_protect(out); FREE(1); return out; } static inline bool r_lazy_is_null(struct r_lazy call) { return !call.x && !call.env; } #endif vctrs/src/rlang/sym.h0000644000176200001440000000125315157273666014311 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_SYM_H #define RLANG_SYM_H #include "rlang-types.h" // The results of `r_sym_as_` functions must be protected extern r_obj* (*r_sym_as_utf8_character)(r_obj* x); extern r_obj* (*r_sym_as_utf8_string)(r_obj* x); r_obj* r_new_symbol(r_obj* x, int* err); static inline r_obj* r_sym(const char* c_string) { return Rf_install(c_string); } static inline r_obj* r_sym_string(r_obj* sym) { return PRINTNAME(sym); } static inline const char* r_sym_c_string(r_obj* sym) { return CHAR(PRINTNAME(sym)); } bool r_is_symbol(r_obj* sym, const char* string); bool r_is_symbol_any(r_obj* x, const char** strings, int n); #endif vctrs/src/rlang/vendor.h0000644000176200001440000000023415157273666014774 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_VENDOR_H #define RLANG_VENDOR_H extern uint64_t (*r_xxh3_64bits)(const void*, size_t); #endif vctrs/src/rlang/attrib.h0000644000176200001440000000363415157273666014773 0ustar liggesusers// IWYU pragma: private; include "rlang.h" #ifndef RLANG_ATTRIB_H #define RLANG_ATTRIB_H #include "rlang-types.h" #include "globals.h" static inline bool r_attrib_has_any(r_obj* x) { return ANY_ATTRIB(x); } // Collect attributes into a fresh pairlist r_obj* r_attrib_collect(r_obj* x); typedef r_obj* (r_attrib_map_fn)(r_obj* tag, r_obj* value, void* data); // Map a callback to each attribute of an object. Prefer this to collecting for // performance-critical applications. static inline r_obj* r_attrib_map(r_obj* x, r_attrib_map_fn* fn, void* data) { return R_mapAttrib(x, fn, data); } static inline void r_attrib_zap(r_obj* x, r_obj* tag) { Rf_setAttrib(x, tag, r_null); } static inline void r_attrib_zap_all(r_obj* x) { CLEAR_ATTRIB(x); } static inline void r_attrib_clone_from(r_obj* to, r_obj* from) { SHALLOW_DUPLICATE_ATTRIB(to, from); } // Unlike Rf_getAttrib(), this doesn't allocate, but in practice requires // protection because rchk considers the return value to be a fresh pointer. r_obj* r_attrib_get(r_obj* x, r_obj* tag); static inline r_obj* r_class(r_obj* x) { return r_attrib_get(x, r_syms.class_); } void r_attrib_poke_classes(r_obj* x, const char** classes, r_ssize n); static inline r_obj* r_dim(r_obj* x) { return r_attrib_get(x, r_syms.dim); } static inline r_obj* r_dim_names(r_obj* x) { return r_attrib_get(x, r_syms.dim_names); } static inline r_obj* r_names(r_obj* x) { return r_attrib_get(x, r_syms.names); } bool r_is_named(r_obj* x); // Defined as macros so rchk can see that `X` protects `VALUE` #define r_attrib_poke(X, SYM, VALUE) Rf_setAttrib(X, SYM, VALUE) #define r_attrib_poke_class(X, VALUE) Rf_setAttrib(X, r_syms.class_, VALUE) #define r_attrib_poke_dim(X, VALUE) Rf_setAttrib(X, r_syms.dim, VALUE) #define r_attrib_poke_dim_names(X, VALUE) Rf_setAttrib(X, r_syms.dim_names, VALUE) #define r_attrib_poke_names(X, VALUE) Rf_setAttrib(X, r_syms.names, VALUE) #endif vctrs/src/empty.c0000644000176200001440000000164014401377400013505 0ustar liggesusers#include "vctrs.h" #include "decl/empty-decl.h" // [[ register() ]] r_obj* vctrs_list_drop_empty(r_obj* x) { return list_drop_empty(x); } static r_obj* list_drop_empty(r_obj* x) { if (!obj_is_list(x)) { r_abort("`x` must be a list."); } r_ssize i = 0; const r_ssize size = vec_size(x); r_obj* const* v_x = r_list_cbegin(x); // Locate first element to drop for (; i < size; ++i) { if (vec_size(v_x[i]) == 0) { break; } } if (i == size) { // Nothing to drop return x; } r_obj* keep = KEEP(r_alloc_logical(size)); int* v_keep = r_lgl_begin(keep); for (r_ssize j = 0; j < i; ++j) { // Keep everything before first element to drop v_keep[j] = true; } // `i` should be dropped so handle that here v_keep[i] = false; ++i; for (; i < size; ++i) { v_keep[i] = vec_size(v_x[i]) != 0; } r_obj* out = vec_slice(x, keep); FREE(1); return out; } vctrs/src/complete.c0000644000176200001440000001555015156001116014160 0ustar liggesusers#include "vctrs.h" // ----------------------------------------------------------------------------- static SEXP vec_slice_complete(SEXP x); // [[ register() ]] SEXP vctrs_slice_complete(SEXP x) { return vec_slice_complete(x); } static SEXP vec_locate_complete(SEXP x); static SEXP vec_slice_complete(SEXP x) { SEXP loc = PROTECT(vec_locate_complete(x)); // Skip `vec_as_location()` in `vec_slice()` SEXP out = vec_slice_unsafe(x, loc); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_locate_complete(SEXP x) { return vec_locate_complete(x); } static SEXP vec_locate_complete(SEXP x) { SEXP where = PROTECT(vec_detect_complete(x)); SEXP out = r_lgl_which(where, false); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_detect_complete(SEXP x) { return vec_detect_complete(x); } static inline void vec_detect_complete_switch(SEXP x, R_len_t size, int* p_out); // [[ include("complete.h") ]] SEXP vec_detect_complete(SEXP x) { SEXP proxy = PROTECT(vec_proxy_equal(x)); R_len_t size = vec_size(proxy); SEXP out = PROTECT(r_new_logical(size)); int* p_out = LOGICAL(out); // Initialize assuming fully complete for (R_len_t i = 0; i < size; ++i) { p_out[i] = 1; } vec_detect_complete_switch(proxy, size, p_out); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- static inline void lgl_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void int_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void dbl_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void cpl_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void chr_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void raw_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void list_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void df_detect_complete(SEXP x, R_len_t size, int* p_out); static inline void vec_detect_complete_switch(SEXP x, R_len_t size, int* p_out) { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: lgl_detect_complete(x, size, p_out); break; case VCTRS_TYPE_integer: int_detect_complete(x, size, p_out); break; case VCTRS_TYPE_double: dbl_detect_complete(x, size, p_out); break; case VCTRS_TYPE_complex: cpl_detect_complete(x, size, p_out); break; case VCTRS_TYPE_character: chr_detect_complete(x, size, p_out); break; case VCTRS_TYPE_raw: raw_detect_complete(x, size, p_out); break; case VCTRS_TYPE_list: list_detect_complete(x, size, p_out); break; case VCTRS_TYPE_dataframe: df_detect_complete(x, size, p_out); break; case VCTRS_TYPE_null: break; case VCTRS_TYPE_scalar: stop_scalar_type(x, vec_args.empty, r_lazy_null); default: stop_unimplemented_vctrs_type("vec_detect_complete", vec_proxy_typeof(x)); } } // ----------------------------------------------------------------------------- /* * Avoid the temptation to add an extra if branch at the start of the for * loop like: * * ``` * if (!p_out[i]) { * continue; * } * ``` * * In theory this avoids calculations if we already know the row is incomplete, * but in practice it can wreck performance. I imagine it is due to the cost * of the extra branch + the volatility of this value, causing the result of * the branch to be "guessed" incorrectly many times. For example, the vctrs * result here gets 6x slower (i.e. slower than the R solution) by adding that * branch. * * ``` * # Place many NA values randomly in the first column * first <- sample(c(1, NA, 3), size = 1e6, replace = TRUE) * cols <- rep_len(list(rep(1, 1e6)), 100) * cols <- c(list(first), cols) * names(cols) <- paste0("a", 1:length(cols)) * df <- new_data_frame(cols) * bench::mark(vec_detect_complete(df), complete.cases(df)) * ``` */ #define VEC_DETECT_COMPLETE(CTYPE, CONST_DEREF, IS_MISSING) { \ const CTYPE* p_x = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < size; ++i) { \ const CTYPE elt = p_x[i]; \ \ if (IS_MISSING(elt)) { \ p_out[i] = 0; \ } \ } \ } static inline void lgl_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(int, LOGICAL_RO, lgl_is_missing); } static inline void int_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(int, INTEGER_RO, int_is_missing); } static inline void dbl_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(double, REAL_RO, dbl_is_missing); } static inline void cpl_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(Rcomplex, COMPLEX_RO, cpl_is_missing); } static inline void chr_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(SEXP, STRING_PTR_RO, chr_is_missing); } static inline void raw_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(unsigned char, RAW_RO, raw_is_missing); } static inline void list_detect_complete(SEXP x, R_len_t size, int* p_out) { VEC_DETECT_COMPLETE(SEXP, VECTOR_PTR_RO, list_is_missing); } #undef VEC_DETECT_COMPLETE // ----------------------------------------------------------------------------- static inline void col_detect_complete_switch(SEXP x, R_len_t size, int* p_out); static inline void df_detect_complete(SEXP x, R_len_t size, int* p_out) { r_ssize n_cols = r_length(x); const SEXP* p_x = VECTOR_PTR_RO(x); for (r_ssize i = 0; i < n_cols; ++i) { col_detect_complete_switch(p_x[i], size, p_out); } } static inline void col_detect_complete_switch(SEXP x, R_len_t size, int* p_out) { switch (vec_proxy_typeof(x)) { case VCTRS_TYPE_logical: lgl_detect_complete(x, size, p_out); break; case VCTRS_TYPE_integer: int_detect_complete(x, size, p_out); break; case VCTRS_TYPE_double: dbl_detect_complete(x, size, p_out); break; case VCTRS_TYPE_complex: cpl_detect_complete(x, size, p_out); break; case VCTRS_TYPE_character: chr_detect_complete(x, size, p_out); break; case VCTRS_TYPE_raw: raw_detect_complete(x, size, p_out); break; case VCTRS_TYPE_list: list_detect_complete(x, size, p_out); break; case VCTRS_TYPE_dataframe: r_stop_internal("Data frame columns should have been flattened by now."); case VCTRS_TYPE_null: r_abort("Unexpected `NULL` column found in a data frame."); case VCTRS_TYPE_scalar: stop_scalar_type(x, vec_args.empty, r_lazy_null); default: stop_unimplemented_vctrs_type("vec_detect_complete", vec_proxy_typeof(x)); } } vctrs/src/dim.c0000644000176200001440000000037014315060310013110 0ustar liggesusers#include "vctrs.h" // [[ register() ]] SEXP vctrs_dim(SEXP x) { return vec_dim(x); } // [[ register() ]] SEXP vctrs_dim_n(SEXP x) { return r_int(vec_dim_n(x)); } // [[ register() ]] SEXP vctrs_has_dim(SEXP x) { return r_lgl(has_dim(x)); } vctrs/src/size-common.c0000644000176200001440000001421215157322033014607 0ustar liggesusers#include "vctrs.h" #include "decl/size-common-decl.h" struct size_common_reduce_opts { // Updated at each iteration. // Allows us to reuse `vec_size()` info from the previous iteration. r_ssize current_size; const struct r_lazy call; }; // [[ register(external = TRUE) ]] r_obj* ffi_size_common(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); struct r_lazy call = { .x = syms.dot_call, .env = env }; struct r_lazy arg_lazy = { .x = syms.dot_arg, .env = env }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); struct r_lazy internal_call = { .x = env, .env = r_null }; r_obj* xs = r_node_car(args); args = r_node_cdr(args); r_obj* size = r_node_car(args); args = r_node_cdr(args); r_obj* absent = r_node_car(args); if (size != r_null) { r_ssize out = vec_as_short_length(size, vec_args.dot_size, internal_call); return r_int(out); } if (absent != r_null && (r_typeof(absent) != R_TYPE_integer || r_length(absent) != 1)) { r_abort_lazy_call(internal_call, "%s must be a single integer.", r_c_str_format_error_arg(".absent")); } r_ssize common = vec_size_common(xs, -1, &arg, call); r_obj* out; if (common < 0) { if (absent == r_null) { r_abort_lazy_call(internal_call, "%s must be supplied when %s is empty.", r_c_str_format_error_arg(".absent"), r_c_str_format_error_arg("...")); } out = absent; } else { out = r_int(common); } return out; } r_ssize vec_size_common( r_obj* xs, r_ssize absent, struct vctrs_arg* p_xs_arg, struct r_lazy call ) { struct size_common_reduce_opts reduce_opts = { .current_size = -1, .call = call }; // Interested in `reduce_opts.current_size`, // not in the returned `r_obj*` from `reduce()` reduce( r_null, vec_args.empty, p_xs_arg, xs, &size2_common, &reduce_opts ); r_ssize out = reduce_opts.current_size; if (out == -1) { out = absent; } return out; } /** * `vec_size2()` implementation * * `left` works the same as `vec_typeof2_impl()` * * @param left Output parameter. Set to 1 when the common size comes * from the left, 0 when it comes from the right, and -1 when it * comes from both sides. This means that "left" is the default * when coerced to a boolean value. */ static inline r_obj* vec_size2_impl( r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, int* left ) { // `NULL` handling rules: // - If `x` and `y` are `NULL`, do nothing // - If `x` is `NULL`, use `y` // - If `y` is `NULL`, use `x` // // The first rule is important to ensure that this works // `vec_size_common(NULL, .absent = 5L)` if (x == r_null) { if (y == r_null) { *left = -1; return x; } else { *left = 0; return y; } } if (y == r_null) { if (x == r_null) { r_stop_unreachable(); } else { *left = 1; return x; } } // Now apply common size rules // - Same size, use `x` // - Size 1 `x`, use `y` // - Size 1 `y`, use `x` if (x_size == y_size) { *left = -1; return x; } if (x_size == 1) { *left = 0; return y; } if (y_size == 1) { *left = 1; return x; } stop_incompatible_size( x, y, x_size, y_size, p_x_arg, p_y_arg, call ); } // Size2 computation // // `reduce_opts->current_size` updates when we switch to `y` static r_obj* size2_common( r_obj* x, r_obj* y, struct counters* counters, void* data ) { struct size_common_reduce_opts* reduce_opts = data; const r_ssize x_size = reduce_opts->current_size; const r_ssize y_size = vec_size_params(y, counters->next_arg, reduce_opts->call); int left = -1; r_obj* out = vec_size2_impl( x, y, x_size, y_size, counters->curr_arg, counters->next_arg, reduce_opts->call, &left ); if (!left) { counters_shift(counters); reduce_opts->current_size = y_size; } return out; } // [[ register(external = TRUE) ]] r_obj* ffi_recycle_common(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* env) { args = r_node_cdr(args); struct r_lazy call = { .x = syms.dot_call, .env = env }; struct r_lazy internal_call = { .x = env, .env = r_null }; struct r_lazy xs_arg_lazy = { .x = syms.dot_arg, .env = env }; struct vctrs_arg xs_arg = new_lazy_arg(&xs_arg_lazy); r_obj* xs = r_node_car(args); args = r_node_cdr(args); r_obj* ffi_size = r_node_car(args); r_ssize size; if (ffi_size == r_null) { size = vec_size_common(xs, -1, &xs_arg, call); } else { size = vec_as_short_length( ffi_size, vec_args.dot_size, internal_call ); } r_obj* out = vec_recycle_common(xs, size, &xs_arg, call); return out; } r_obj* vec_recycle_common( r_obj* xs, r_ssize size, struct vctrs_arg* p_xs_arg, struct r_lazy call ) { if (size < 0) { return xs; } r_obj* const* v_xs = r_list_cbegin(xs); r_obj* xs_names = KEEP(r_names(xs)); const r_ssize xs_size = r_length(xs); r_ssize xs_index = 0; struct vctrs_arg* p_x_arg = new_subscript_arg( p_xs_arg, xs_names, xs_size, &xs_index ); KEEP(p_x_arg->shelter); // If all elements are of size `size`, there is nothing to do // and we can avoid an allocation for (r_ssize i = 0; i < xs_size; ++i) { r_obj* x = v_xs[i]; if (!vec_is_size(x, size, VCTRS_ALLOW_NULL_yes, p_x_arg, call)) { break; } ++xs_index; } if (xs_index == xs_size) { FREE(2); return xs; } // Otherwise we need a new list r_obj* out = KEEP(r_alloc_list(xs_size)); r_attrib_poke_names(out, xs_names); // Copy over everything before `xs_index` for (r_ssize i = 0; i < xs_index; ++i) { r_obj* x = v_xs[i]; r_list_poke(out, i, x); } // Recycle everything at and after `xs_index` for (r_ssize i = xs_index; i < xs_size; ++i) { r_obj* x = v_xs[i]; r_list_poke(out, i, vec_recycle(x, size, p_x_arg, call)); ++xs_index; } FREE(3); return out; } vctrs/src/size.c0000644000176200001440000001612315156020761013326 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/size-decl.h" // [[ register() ]] r_obj* ffi_size(r_obj* x, r_obj* frame) { struct r_lazy call = { .x = frame, .env = r_null }; return r_len(vec_size_params(x, vec_args.x, call)); } r_ssize vec_size(r_obj* x) { return vec_size_params(x, vec_args.x, lazy_calls.vec_size); } r_ssize vec_size_params( r_obj* x, struct vctrs_arg* p_x_arg, struct r_lazy call ) { struct vctrs_proxy_info info = vec_proxy_info(x); KEEP(info.inner); r_obj* data = info.inner; r_ssize size; switch (info.type) { case VCTRS_TYPE_null: size = 0; break; case VCTRS_TYPE_logical: case VCTRS_TYPE_integer: case VCTRS_TYPE_double: case VCTRS_TYPE_complex: case VCTRS_TYPE_character: case VCTRS_TYPE_raw: case VCTRS_TYPE_list: size = vec_raw_size(data); break; case VCTRS_TYPE_dataframe: size = df_size(data); break; default: stop_scalar_type(x, p_x_arg, call); } FREE(1); return size; } static r_ssize vec_raw_size(r_obj* x) { r_obj* dimensions = r_dim(x); if (dimensions == r_null || r_length(dimensions) == 0) { return r_length(x); } if (r_typeof(dimensions) != R_TYPE_integer) { r_stop_internal("Corrupt vector, `dim` attribute is not an integer vector."); } return r_int_get(dimensions, 0); } // [[ register() ]] r_obj* ffi_list_sizes(r_obj* x, r_obj* frame) { struct r_lazy call = { .x = frame, .env = r_null }; return list_sizes(x, vec_args.x, call); } r_obj* list_sizes( r_obj* xs, struct vctrs_arg* p_xs_arg, struct r_lazy call ) { obj_check_list(xs, p_xs_arg, call); r_ssize size = vec_size(xs); r_obj* const * v_xs = r_list_cbegin(xs); r_obj* out = KEEP(r_alloc_integer(size)); int* v_out = r_int_begin(out); r_obj* names = vec_names(xs); r_attrib_poke_names(out, names); r_ssize i = 0; struct vctrs_arg* p_x_arg = new_subscript_arg_vec(p_xs_arg, xs, &i); KEEP(p_x_arg->shelter); for (; i < size; ++i) { r_obj* x = v_xs[i]; v_out[i] = vec_size_params(x, p_x_arg, call); } FREE(2); return out; } r_obj* df_rownames_size_cb(r_obj* tag, r_obj* value, void* _data) { if (tag == r_syms.row_names) { // Found row names return value; } else { // Continue iterating return NULL; } } // For performance, avoid Rf_getAttrib() because it automatically transforms // automatic compact rownames into an ALTREP intseq. r_ssize df_rownames_size(r_obj* x) { r_obj* row_names = r_attrib_map(x, df_rownames_size_cb, NULL); if (row_names == NULL) { return -1; } return rownames_size(row_names); } r_ssize df_size(r_obj* x) { r_ssize n = df_rownames_size(x); if (n < 0) { r_stop_internal("Corrupt data frame: row.names are missing"); } return n; } // Supports bare lists as well r_ssize df_raw_size(r_obj* x) { r_ssize n = df_rownames_size(x); if (n >= 0) { return n; } return df_raw_size_from_list(x); } r_ssize df_raw_size_from_list(r_obj* x) { if (r_length(x) >= 1) { return vec_size(r_list_get(x, 0)); } else { return 0; } } // [[ register() ]] SEXP vctrs_df_size(SEXP x) { return r_int(df_raw_size(x)); } r_obj* vec_recycle( r_obj* x, r_ssize size, struct vctrs_arg* p_x_arg, struct r_lazy call ) { if (x == r_null) { return r_null; } r_ssize n_x = vec_size(x); if (n_x == size) { return x; } if (n_x == 1L) { r_obj* i = KEEP(compact_rep(1, size)); r_obj* out = vec_slice_unsafe(x, i); FREE(1); return out; } stop_recycle_incompatible_size(n_x, size, p_x_arg, call); } // [[ register() ]] r_obj* ffi_recycle(r_obj* x, r_obj* size_obj, r_obj* frame) { if (x == r_null || size_obj == r_null) { return r_null; } struct r_lazy recycle_call = { .x = frame, .env = r_null }; size_obj = KEEP(vec_cast(size_obj, r_globals.empty_int, vec_args.empty, vec_args.empty, recycle_call)); R_len_t size = r_int_get(size_obj, 0); FREE(1); struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy call = { .x = syms_call, .env = frame }; return vec_recycle(x, size, &x_arg, call); } r_obj* vec_recycle_fallback(r_obj* x, r_ssize size, struct vctrs_arg* x_arg, struct r_lazy call) { if (x == r_null) { return r_null; } r_ssize x_size = vec_size(x); if (x_size == size) { return x; } if (x_size == 1) { r_obj* subscript = KEEP(r_alloc_integer(size)); r_int_fill(subscript, 1, size); r_obj* out = vec_slice_fallback(x, subscript); FREE(1); return out; } stop_recycle_incompatible_size(x_size, size, x_arg, call); } r_obj* ffi_as_short_length(r_obj* n, r_obj* frame) { struct r_lazy call = { .x = frame, .env = r_null }; struct r_lazy arg_lazy = { .x = syms.arg, .env = frame }; struct vctrs_arg arg = new_lazy_arg(&arg_lazy); return r_len(vec_as_short_length(n, &arg, call)); } r_ssize vec_as_short_length(r_obj* n, struct vctrs_arg* p_arg, struct r_lazy call) { r_ssize out = vec_as_ssize(n, p_arg, call); if (out < 0) { r_abort_lazy_call(call, "%s must be a positive number or zero.", vec_arg_format(p_arg)); } if (out > INT_MAX) { // Ideally we'd mention long vector support in an info bullets r_abort_lazy_call(call, "%s is too large a number and long vectors are not supported.", vec_arg_format(p_arg)); } return out; } // Adapted from `r_arg_as_ssize()` r_ssize vec_as_ssize(r_obj* n, struct vctrs_arg* p_arg, struct r_lazy call) { if (r_is_object(n)) { struct cast_opts cast_opts = { .x = n, .to = r_globals.empty_dbl, .p_x_arg = p_arg, .call = call }; ERR err = NULL; n = vec_cast_e(&cast_opts, &err); if (err) { goto invalid; } } KEEP(n); switch (r_typeof(n)) { case R_TYPE_double: { if (r_length(n) != 1) { goto invalid; } double out = r_dbl_get(n, 0); if (out == r_globals.na_int) { goto invalid; } if (out != floor(out)) { r_abort_lazy_call(call, "%s must be a whole number, not a fractional number.", vec_arg_format(p_arg)); } if (out > R_SSIZE_MAX) { r_abort_lazy_call(call, "%s is too large a number.", vec_arg_format(p_arg)); } FREE(1); return (r_ssize) out; } case R_TYPE_integer: { if (r_length(n) != 1) { goto invalid; } int out = r_int_get(n, 0); if (out == r_globals.na_int) { goto invalid; } FREE(1); return (r_ssize) out; } invalid: default: r_abort_lazy_call(call, "%s must be a single number, not %s.", vec_arg_format(p_arg), r_obj_type_friendly_length(n)); } } vctrs/src/ptype2-dispatch.c0000644000176200001440000001562615156001116015374 0ustar liggesusers#include "vctrs.h" #include "type-factor.h" #include "type-tibble.h" #include "decl/ptype2-dispatch-decl.h" r_obj* vec_ptype2_dispatch_native( r_obj* x, r_obj* y, enum vctrs_type x_type, enum vctrs_type y_type, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback, int* left ) { enum vctrs_type2_s3 type2_s3 = vec_typeof2_s3_impl(x, y, x_type, y_type, left); switch (type2_s3) { case VCTRS_TYPE2_S3_character_bare_factor: case VCTRS_TYPE2_S3_character_bare_ordered: return r_globals.empty_chr; case VCTRS_TYPE2_S3_bare_factor_bare_factor: return fct_ptype2( x, y, p_x_arg, p_y_arg ); case VCTRS_TYPE2_S3_bare_ordered_bare_ordered: return ord_ptype2( x, y, p_x_arg, p_y_arg, call, s3_fallback ); case VCTRS_TYPE2_S3_bare_date_bare_date: return vctrs_shared_empty_date; case VCTRS_TYPE2_S3_bare_date_bare_posixct: case VCTRS_TYPE2_S3_bare_date_bare_posixlt: return date_datetime_ptype2(x, y); case VCTRS_TYPE2_S3_bare_posixct_bare_posixct: case VCTRS_TYPE2_S3_bare_posixct_bare_posixlt: case VCTRS_TYPE2_S3_bare_posixlt_bare_posixlt: return datetime_datetime_ptype2(x, y); case VCTRS_TYPE2_S3_dataframe_bare_tibble: case VCTRS_TYPE2_S3_bare_tibble_bare_tibble: return tib_ptype2( x, y, p_x_arg, p_y_arg, call, s3_fallback ); default: return r_null; } } // @param from_dispatch Used to implement special behaviour when // `vec_default_ptype2()` is invoked directly from the dispatch // mechanism as opposed from a method. static inline r_obj* vec_ptype2_default_full(r_obj* x, r_obj* y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call, enum s3_fallback s3_fallback, bool from_dispatch) { r_obj* ffi_s3_fallback = KEEP(r_int(s3_fallback)); r_obj* ffi_x_arg = KEEP(vctrs_arg(x_arg)); r_obj* ffi_y_arg = KEEP(vctrs_arg(y_arg)); r_obj* ffi_call = KEEP(r_lazy_eval(call)); r_obj* out = vctrs_eval_mask7(syms_vec_ptype2_default, syms_x, x, syms_y, y, syms_x_arg, ffi_x_arg, syms_y_arg, ffi_y_arg, syms_call, ffi_call, syms_from_dispatch, r_lgl(from_dispatch), syms_s3_fallback, ffi_s3_fallback); FREE(4); return out; } r_obj* vec_ptype2_default(r_obj* x, r_obj* y, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call, enum s3_fallback s3_fallback) { return vec_ptype2_default_full(x, y, x_arg, y_arg, call, s3_fallback, false); } r_obj* vec_ptype2_dispatch_s3( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ) { x = KEEP(vec_ptype(x, p_x_arg, call)); y = KEEP(vec_ptype(y, p_y_arg, call)); r_obj* method_sym = r_null; r_obj* method = s3_find_method_xy("vec_ptype2", x, y, vctrs_method_table, &method_sym); // Compatibility with legacy double dispatch mechanism if (method == r_null) { r_obj* x_method_sym = r_null; r_obj* x_method = KEEP(s3_find_method2( "vec_ptype2", x, vctrs_method_table, &x_method_sym )); if (x_method != r_null) { // Only `x_method`s contained within a package will // have an S3 methods table to look in r_obj* x_table = s3_get_table(r_fn_env(x_method)); if (x_table != r_null) { const char* x_method_str = r_sym_c_string(x_method_sym); method = s3_find_method2( x_method_str, y, x_table, &method_sym ); } } FREE(1); } KEEP(method); if (method == r_null) { r_obj* out = vec_ptype2_default_full( x, y, p_x_arg, p_y_arg, call, s3_fallback, true ); FREE(3); return out; } r_obj* ffi_x_arg = KEEP(vctrs_arg(p_x_arg)); r_obj* ffi_y_arg = KEEP(vctrs_arg(p_y_arg)); r_obj* out = vec_invoke_coerce_method( method_sym, method, syms_x, x, syms_y, y, syms_x_arg, ffi_x_arg, syms_y_arg, ffi_y_arg, call, s3_fallback ); FREE(5); return out; } r_obj* vec_invoke_coerce_method(r_obj* method_sym, r_obj* method, r_obj* x_sym, r_obj* x, r_obj* y_sym, r_obj* y, r_obj* x_arg_sym, r_obj* x_arg, r_obj* y_arg_sym, r_obj* y_arg, struct r_lazy lazy_call, enum s3_fallback s3_fallback) { r_obj* call = KEEP(r_lazy_eval(lazy_call)); if (s3_fallback != S3_FALLBACK_false) { r_obj* ffi_s3_fallback = KEEP(r_int(s3_fallback)); r_obj* out = vctrs_dispatch6(method_sym, method, x_sym, x, y_sym, y, x_arg_sym, x_arg, y_arg_sym, y_arg, syms_call, call, syms_s3_fallback, ffi_s3_fallback); FREE(2); return out; } else { r_obj* out = vctrs_dispatch5(method_sym, method, x_sym, x, y_sym, y, x_arg_sym, x_arg, y_arg_sym, y_arg, syms_call, call); FREE(1); return out; } } // [[ register() ]] r_obj* ffi_ptype2_dispatch_native(r_obj* x, r_obj* y, r_obj* opts, r_obj* frame) { struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); struct r_lazy call = { .x = syms_call, .env = frame }; const enum s3_fallback s3_fallback = s3_fallback_from_opts(opts); int _; r_obj* out = vec_ptype2_dispatch_native( x, y, vec_typeof(x), vec_typeof(y), &x_arg, &y_arg, call, s3_fallback, &_ ); if (out == r_null) { out = vec_ptype2_default_full( x, y, &x_arg, &y_arg, call, s3_fallback, true ); return out; } else { return out; } } void vctrs_init_ptype2_dispatch(r_obj* ns) { syms_vec_ptype2_default = r_sym("vec_default_ptype2"); } static r_obj* syms_vec_ptype2_default = NULL; vctrs/src/poly-op.c0000644000176200001440000001150115047425317013753 0ustar liggesusers#include "vctrs.h" #include "decl/poly-op-decl.h" // TODO: Remove in favor of inlining `p_*_compare_na_equal()` // through the use of a macro. As is, this prevents significant // inlining optimizations. Currently only used in `interval.c`. poly_binary_int_fn* poly_p_compare_na_equal(enum vctrs_type type) { switch (type) { case VCTRS_TYPE_null: return p_nil_compare_na_equal; case VCTRS_TYPE_logical: return p_lgl_compare_na_equal; case VCTRS_TYPE_integer: return p_int_compare_na_equal; case VCTRS_TYPE_double: return p_dbl_compare_na_equal; case VCTRS_TYPE_complex: return p_cpl_compare_na_equal; case VCTRS_TYPE_character: return p_chr_compare_na_equal; case VCTRS_TYPE_raw: return p_raw_compare_na_equal; case VCTRS_TYPE_list: return p_list_compare_na_equal; case VCTRS_TYPE_dataframe: return p_df_compare_na_equal; default: stop_unimplemented_vctrs_type("poly_p_compare_na_equal", type); } } // TODO: Remove in favor of inlining `p_*_is_missing()` // through the use of a macro. As is, this prevents significant // inlining optimizations. Currently only used in `interval.c`. poly_unary_bool_fn* poly_p_is_missing(enum vctrs_type type) { switch (type) { case VCTRS_TYPE_null: return p_nil_is_missing; case VCTRS_TYPE_logical: return p_lgl_is_missing; case VCTRS_TYPE_integer: return p_int_is_missing; case VCTRS_TYPE_double: return p_dbl_is_missing; case VCTRS_TYPE_complex: return p_cpl_is_missing; case VCTRS_TYPE_character: return p_chr_is_missing; case VCTRS_TYPE_raw: return p_raw_is_missing; case VCTRS_TYPE_list: return p_list_is_missing; case VCTRS_TYPE_dataframe: return p_df_is_missing; default: stop_unimplemented_vctrs_type("poly_p_is_missing", type); } } struct poly_vec* new_poly_vec(r_obj* proxy, enum vctrs_type type) { r_obj* shelter = KEEP(r_alloc_list(2)); r_obj* self = r_alloc_raw(sizeof(struct poly_vec)); r_list_poke(shelter, 0, self); r_list_poke(shelter, 1, proxy); struct poly_vec* p_poly_vec = r_raw_begin(self); p_poly_vec->shelter = shelter; p_poly_vec->vec = proxy; p_poly_vec->type = type; switch (type) { case VCTRS_TYPE_null: init_nil_poly_vec(p_poly_vec); break; case VCTRS_TYPE_logical: init_lgl_poly_vec(p_poly_vec); break; case VCTRS_TYPE_integer: init_int_poly_vec(p_poly_vec); break; case VCTRS_TYPE_double: init_dbl_poly_vec(p_poly_vec); break; case VCTRS_TYPE_complex: init_cpl_poly_vec(p_poly_vec); break; case VCTRS_TYPE_character: init_chr_poly_vec(p_poly_vec); break; case VCTRS_TYPE_raw: init_raw_poly_vec(p_poly_vec); break; case VCTRS_TYPE_list: init_list_poly_vec(p_poly_vec); break; case VCTRS_TYPE_dataframe: init_df_poly_vec(p_poly_vec); break; default: stop_unimplemented_vctrs_type("new_poly_vec", type); } FREE(1); return p_poly_vec; } static void init_nil_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = NULL; } static void init_lgl_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_lgl_cbegin(p_poly_vec->vec); } static void init_int_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_int_cbegin(p_poly_vec->vec); } static void init_dbl_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_dbl_cbegin(p_poly_vec->vec); } static void init_cpl_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_cpl_cbegin(p_poly_vec->vec); } static void init_chr_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_chr_cbegin(p_poly_vec->vec); } static void init_raw_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_raw_cbegin(p_poly_vec->vec); } static void init_list_poly_vec(struct poly_vec* p_poly_vec) { p_poly_vec->p_vec = (const void*) r_list_cbegin(p_poly_vec->vec); } static void init_df_poly_vec(struct poly_vec* p_poly_vec) { r_obj* df = p_poly_vec->vec; r_ssize n_col = r_length(df); r_obj* shelter = KEEP(r_alloc_list(4)); r_list_poke(shelter, 0, p_poly_vec->shelter); p_poly_vec->shelter = shelter; r_obj* data_handle = KEEP(r_alloc_raw(sizeof(struct poly_df_data))); struct poly_df_data* data = (struct poly_df_data*) r_raw_begin(data_handle); r_list_poke(shelter, 1, data_handle); r_obj* col_type_handle = KEEP(r_alloc_raw(n_col * sizeof(enum vctrs_type))); enum vctrs_type* v_col_type = (enum vctrs_type*) r_raw_begin(col_type_handle); r_list_poke(shelter, 2, col_type_handle); r_obj* col_ptr_handle = KEEP(r_alloc_raw(n_col * sizeof(void*))); const void** v_col_ptr = (const void**) r_raw_begin(col_ptr_handle); r_list_poke(shelter, 3, col_ptr_handle); for (r_ssize i = 0; i < n_col; ++i) { r_obj* col = r_list_get(df, i); v_col_type[i] = vec_proxy_typeof(col); v_col_ptr[i] = r_vec_cbegin(col); } data->v_col_type = v_col_type; data->v_col_ptr = v_col_ptr; data->n_col = n_col; p_poly_vec->p_vec = (void*) data; FREE(4); } vctrs/src/vctrs.h0000644000176200001440000001771715156537555013552 0ustar liggesusers#ifndef VCTRS_H #define VCTRS_H #include "vctrs-core.h" // IWYU pragma: export // Vector types ------------------------------------------------- #include "type-info.h" // IWYU pragma: export #include "arg-counter.h" // IWYU pragma: export #include "arg.h" // IWYU pragma: export #include "assert.h" // IWYU pragma: export #include "c.h" // IWYU pragma: export #include "cast-bare.h" // IWYU pragma: export #include "cast-dispatch.h" // IWYU pragma: export #include "cast.h" // IWYU pragma: export #include "compare.h" // IWYU pragma: export #include "complete.h" // IWYU pragma: export #include "conditions.h" // IWYU pragma: export #include "dictionary.h" // IWYU pragma: export #include "dim.h" // IWYU pragma: export #include "encoding.h" // IWYU pragma: export #include "equal.h" // IWYU pragma: export #include "expand.h" // IWYU pragma: export #include "lazy.h" // IWYU pragma: export #include "list-combine.h" // IWYU pragma: export #include "match-compare.h" // IWYU pragma: export #include "match-joint.h" // IWYU pragma: export #include "missing.h" // IWYU pragma: export #include "names.h" // IWYU pragma: export #include "order-collate.h" // IWYU pragma: export #include "order-groups.h" // IWYU pragma: export #include "order-sortedness.h" // IWYU pragma: export #include "order.h" // IWYU pragma: export #include "ownership.h" // IWYU pragma: export #include "poly-op.h" // IWYU pragma: export #include "proxy.h" // IWYU pragma: export #include "proxy-restore.h" // IWYU pragma: export #include "ptype-common.h" // IWYU pragma: export #include "ptype.h" // IWYU pragma: export #include "ptype2-dispatch.h" // IWYU pragma: export #include "ptype2.h" // IWYU pragma: export #include "rep.h" // IWYU pragma: export #include "runs.h" // IWYU pragma: export #include "set.h" // IWYU pragma: export #include "shape.h" // IWYU pragma: export #include "size-common.h" // IWYU pragma: export #include "size.h" // IWYU pragma: export #include "slice-assign.h" // IWYU pragma: export #include "slice.h" // IWYU pragma: export #include "slice-chop.h" // IWYU pragma: export #include "strides.h" // IWYU pragma: export #include "subscript-loc.h" // IWYU pragma: export #include "subscript.h" // IWYU pragma: export #include "typeof2.h" // IWYU pragma: export #include "typeof2-s3.h" // IWYU pragma: export #include "utils-dispatch.h" // IWYU pragma: export #include "utils.h" // IWYU pragma: export #include "unspecified.h" // IWYU pragma: export // Vector methods ------------------------------------------------ enum vctrs_proxy_kind { VCTRS_PROXY_KIND_equal = 0, VCTRS_PROXY_KIND_compare, VCTRS_PROXY_KIND_order }; SEXP vec_proxy(SEXP x); SEXP vec_proxy_equal(SEXP x); SEXP vec_proxy_compare(SEXP x); SEXP vec_proxy_order(SEXP x); SEXP vec_proxy_unwrap(SEXP x); SEXP vec_slice_shaped(enum vctrs_type type, SEXP x, SEXP index); bool vec_requires_fallback(SEXP x, struct vctrs_proxy_info info); r_obj* vec_ptype(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call); SEXP vec_names(SEXP x); SEXP vec_proxy_names(SEXP x); SEXP vec_group_loc(SEXP x); SEXP vec_match_params(SEXP needles, SEXP haystack, bool na_equal, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline SEXP vec_match(SEXP needles, SEXP haystack) { return vec_match_params(needles, haystack, true, NULL, NULL, r_lazy_null); } SEXP vec_in( SEXP needles, SEXP haystack, bool na_equal, struct vctrs_arg* p_needles_arg, struct vctrs_arg* p_haystack_arg, struct r_lazy call ); bool is_data_frame(SEXP x); SEXP vec_unique(SEXP x); bool duplicated_any(SEXP names); // Data frame column iteration ---------------------------------- // Used in functions that treat data frames as vectors of rows, but // iterate over columns. Examples are `vec_equal()` and // `vec_compare()`. /** * @member row_known A boolean array of size `n_row`. Allocated on the R heap. * Initially, all values are initialized to `false`. As we iterate along the * columns, we flip the corresponding row's `row_known` value to `true` if we * can determine the `out` value for that row from the current columns. * Once a row's `row_known` value is `true`, we never check that row again * as we continue through the columns. * @member p_row_known A pointer to the boolean array stored in `row_known`. * Initialized with `(bool*) RAW(info.row_known)`. * @member remaining The number of `row_known` values that are still `false`. * If this hits `0` before we traverse the entire data frame, we can exit * immediately because all `out` values are already known. * @member size The number of rows in the data frame. */ struct df_short_circuit_info { SEXP row_known; bool* p_row_known; PROTECT_INDEX row_known_pi; R_len_t remaining; R_len_t size; }; #define PROTECT_DF_SHORT_CIRCUIT_INFO(p_info, p_n) do { \ PROTECT_WITH_INDEX((p_info)->row_known, &(p_info)->row_known_pi); \ *(p_n) += 1; \ } while (0) static inline struct df_short_circuit_info new_df_short_circuit_info(R_len_t size, bool lazy) { SEXP row_known; bool* p_row_known; if (lazy) { row_known = PROTECT(R_NilValue); p_row_known = NULL; } else { row_known = PROTECT(Rf_allocVector(RAWSXP, size * sizeof(bool))); p_row_known = (bool*) RAW(row_known); // To begin with, no rows have a known comparison value r_memset(p_row_known, false, size * sizeof(bool)); } struct df_short_circuit_info info = { .row_known = row_known, .p_row_known = p_row_known, .remaining = size, .size = size }; UNPROTECT(1); return info; } static inline void init_lazy_df_short_circuit_info(struct df_short_circuit_info* p_info) { if (p_info->row_known != R_NilValue) { return; } p_info->row_known = Rf_allocVector(RAWSXP, p_info->size * sizeof(bool)); REPROTECT(p_info->row_known, p_info->row_known_pi); p_info->p_row_known = (bool*) RAW(p_info->row_known); } // Factor methods ----------------------------------------------- SEXP chr_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg); SEXP chr_as_ordered(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* to_arg); SEXP fct_as_character(SEXP x, struct vctrs_arg* x_arg); SEXP fct_as_factor(SEXP x, SEXP to, bool* lossy, struct vctrs_arg* x_arg, struct vctrs_arg* to_arg); SEXP ord_as_character(SEXP x, struct vctrs_arg* x_arg); // Datetime methods --------------------------------------------- SEXP date_as_date(SEXP x); SEXP date_as_posixct(SEXP x, SEXP to); SEXP date_as_posixlt(SEXP x, SEXP to); SEXP posixct_as_date(SEXP x, bool* lossy); SEXP posixlt_as_date(SEXP x, bool* lossy); SEXP posixct_as_posixct(SEXP x, SEXP to); SEXP posixlt_as_posixct(SEXP x, SEXP to); SEXP posixct_as_posixlt(SEXP x, SEXP to); SEXP posixlt_as_posixlt(SEXP x, SEXP to); SEXP vec_date_restore(SEXP x, SEXP to, const enum vctrs_ownership ownership); SEXP vec_posixct_restore(SEXP x, SEXP to, const enum vctrs_ownership ownership); SEXP vec_posixlt_restore(SEXP x, SEXP to, const enum vctrs_ownership ownership); SEXP date_datetime_ptype2(SEXP x, SEXP y); SEXP datetime_datetime_ptype2(SEXP x, SEXP y); // Growable vector ---------------------------------------------- struct growable { SEXP x; SEXPTYPE type; void* array; PROTECT_INDEX idx; int n; int capacity; }; struct growable new_growable(SEXPTYPE type, int capacity); SEXP growable_values(struct growable* g); static inline void growable_push_int(struct growable* g, int i) { if (g->n == g->capacity) { g->capacity *= 2; g->x = Rf_lengthgets(g->x, g->capacity); REPROTECT(g->x, g->idx); g->array = INTEGER(g->x); } int* p = (int*) g->array; p[g->n] = i; ++(g->n); } #define PROTECT_GROWABLE(g, n) do { \ PROTECT_WITH_INDEX((g)->x, &((g)->idx)); \ *n += 1; \ } while(0) #define UNPROTECT_GROWABLE(g) do { UNPROTECT(1);} while(0) #endif vctrs/src/names.h0000644000176200001440000000424415156001116013456 0ustar liggesusers#ifndef VCTRS_NAMES_H #define VCTRS_NAMES_H #include "vctrs-core.h" r_obj* vec_names(r_obj* x); r_obj* vec_names2(r_obj* x); r_obj* vec_proxy_names(r_obj* x); r_obj* vec_unique_names(r_obj* x, bool quiet); r_obj* vec_unique_colnames(r_obj* x, bool quiet); r_obj* outer_names(r_obj* names, r_obj* outer, r_ssize n); r_obj* apply_name_spec(r_obj* name_spec, r_obj* outer, r_obj* inner, r_ssize n); bool name_spec_is_inner(r_obj* name_spec); enum name_repair_type { NAME_REPAIR_none = 0, NAME_REPAIR_minimal, NAME_REPAIR_unique, NAME_REPAIR_universal, NAME_REPAIR_check_unique, NAME_REPAIR_custom = 99 }; struct name_repair_opts { r_obj* shelter; enum name_repair_type type; struct r_lazy name_repair_arg; r_obj* fn; bool quiet; struct r_lazy call; }; struct name_repair_opts new_name_repair_opts(r_obj* name_repair, struct r_lazy name_repair_arg, bool quiet, struct r_lazy call); r_obj* vec_as_universal_names(r_obj* names, bool quiet); r_obj* vec_as_custom_names(r_obj* names, const struct name_repair_opts* opts); extern r_obj* name_spec_inner; extern struct name_repair_opts unique_repair_default_opts; extern struct name_repair_opts unique_repair_silent_opts; extern struct name_repair_opts no_repair_opts; static struct name_repair_opts const * const p_unique_repair_default_opts = &unique_repair_default_opts; static struct name_repair_opts const * const p_unique_repair_silent_opts = &unique_repair_silent_opts; static struct name_repair_opts const * const p_no_repair_opts = &no_repair_opts; r_obj* vec_as_names(r_obj* names, const struct name_repair_opts* opts); const char* name_repair_arg_as_c_string(enum name_repair_type type); bool is_unique_names(r_obj* names); r_obj* vec_as_unique_names(r_obj* names, bool quiet); r_obj* r_seq_chr(const char* prefix, r_ssize n); r_obj* r_chr_paste_prefix(r_obj* names, const char* prefix, const char* sep); r_obj* vec_set_names(r_obj* x, r_obj* names, const enum vctrs_ownership ownership); r_obj* vec_proxy_set_names(r_obj* x, r_obj* names, const enum vctrs_ownership ownership); #endif vctrs/src/ownership.h0000644000176200001440000000146215056611175014403 0ustar liggesusers#ifndef VCTRS_OWNERSHIP_H #define VCTRS_OWNERSHIP_H #include "vctrs-core.h" #include "utils.h" // Wrapper around `r_clone_referenced()` that knows about `ownership` // // For example, if we have `VCTRS_OWNERSHIP_deep` ownership over a data frame // and one of its columns passes through here, we won't clone it even though it // has a refcount >0. // // For `VCTRS_OWNERSHIP_foreign`, we only clone if not already referenced. The // goal is generally to avoid cloning temporary values passed to vctrs // functions. static inline SEXP vec_clone_referenced(SEXP x, const enum vctrs_ownership ownership) { switch (ownership) { case VCTRS_OWNERSHIP_foreign: return r_clone_referenced(x); case VCTRS_OWNERSHIP_shallow: return x; case VCTRS_OWNERSHIP_deep: return x; default: r_stop_unreachable(); } } #endif vctrs/src/subscript.h0000644000176200001440000000327214362266120014377 0ustar liggesusers#ifndef VCTRS_SUBSCRIPT_H #define VCTRS_SUBSCRIPT_H #include "vctrs-core.h" #include "utils.h" enum subscript_action { SUBSCRIPT_ACTION_DEFAULT = 0, SUBSCRIPT_ACTION_SUBSET, SUBSCRIPT_ACTION_EXTRACT, SUBSCRIPT_ACTION_ASSIGN, SUBSCRIPT_ACTION_RENAME, SUBSCRIPT_ACTION_REMOVE, SUBSCRIPT_ACTION_NEGATE }; enum subscript_type_action { SUBSCRIPT_TYPE_ACTION_CAST = 0, SUBSCRIPT_TYPE_ACTION_ERROR }; struct subscript_opts { enum subscript_action action; enum subscript_type_action logical; enum subscript_type_action numeric; enum subscript_type_action character; struct vctrs_arg* subscript_arg; struct r_lazy call; }; static inline struct subscript_opts new_subscript_opts_assign(void) { return (struct subscript_opts) { .action = SUBSCRIPT_ACTION_ASSIGN }; } SEXP vec_as_subscript_opts(SEXP subscript, const struct subscript_opts* opts, ERR* err); static inline SEXP subscript_type_action_chr(enum subscript_type_action action) { switch (action) { case SUBSCRIPT_TYPE_ACTION_CAST: return chrs_cast; case SUBSCRIPT_TYPE_ACTION_ERROR: return chrs_error; } never_reached("subscript_type_action_chr"); } static inline SEXP get_opts_action(const struct subscript_opts* opts) { switch (opts->action) { case SUBSCRIPT_ACTION_DEFAULT: return R_NilValue; case SUBSCRIPT_ACTION_SUBSET: return chrs_subset; case SUBSCRIPT_ACTION_EXTRACT: return chrs_extract; case SUBSCRIPT_ACTION_ASSIGN: return chrs_assign; case SUBSCRIPT_ACTION_RENAME: return chrs_rename; case SUBSCRIPT_ACTION_REMOVE: return chrs_remove; case SUBSCRIPT_ACTION_NEGATE: return chrs_negate; } never_reached("get_opts_action"); } #endif vctrs/src/hash.h0000644000176200001440000000511615072256373013313 0ustar liggesusers#ifndef VCTRS_HASH_H #define VCTRS_HASH_H #include "vctrs-core.h" #define HASH_MISSING 1 // ---------------------------------------------------------------------------- // Vector void vec_hash_fill(r_obj* x, r_ssize size, bool na_equal, uint32_t* v_out); // ---------------------------------------------------------------------------- // Object // Must be exposed for `list_hash_scalar()` uint32_t obj_hash(r_obj* x); // ---------------------------------------------------------------------------- // Hash utilities // boost::hash_combine from https://stackoverflow.com/questions/35985960 static inline uint32_t hash_combine(uint32_t x, uint32_t y) { return x ^ (y + 0x9e3779b9 + (x << 6) + (x >> 2)); } // 32-bit mixer from murmurhash // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp#L68 static inline uint32_t uint32_hash(uint32_t x) { x ^= x >> 16; x *= 0x85ebca6b; x ^= x >> 13; x *= 0xc2b2ae35; x ^= x >> 16; return x; } // 64-bit mixer from murmurhash // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp#L81 static inline uint32_t uint64_hash(uint64_t x) { x ^= x >> 33; x *= UINT64_C(0xff51afd7ed558ccd); x ^= x >> 33; x *= UINT64_C(0xc4ceb9fe1a85ec53); x ^= x >> 33; return x; } // ---------------------------------------------------------------------------- // Scalars static inline uint32_t lgl_hash_scalar(int x) { return uint32_hash(x); } static inline uint32_t int_hash_scalar(int x) { return uint32_hash(x); } static inline uint32_t dbl_hash_scalar(double x) { // Seems like something designed specifically for doubles should work better // but I haven't been able to find anything // Hash all NAs and NaNs to same value (i.e. ignoring significand) double value; switch (dbl_classify(x)) { case VCTRS_DBL_number: value = x; break; case VCTRS_DBL_missing: value = NA_REAL; break; case VCTRS_DBL_nan: value = R_NaN; break; default: r_stop_unreachable(); } // Treat positive/negative 0 as equivalent if (value == 0.0) { value = 0.0; } union { double d; uint64_t i; } value_union; value_union.d = value; return uint64_hash(value_union.i); } static inline uint32_t cpl_hash_scalar(r_complex x) { uint32_t hash = 0; hash = hash_combine(hash, dbl_hash_scalar(x.r)); hash = hash_combine(hash, dbl_hash_scalar(x.i)); return hash; } static inline uint32_t raw_hash_scalar(Rbyte x) { return uint32_hash(x); } static inline uint32_t chr_hash_scalar(r_obj* x) { return uint64_hash((uintptr_t) x); } static inline uint32_t list_hash_scalar(r_obj* x) { return obj_hash(x); } #endif vctrs/src/type-date-time.c0000644000176200001440000003224515156005042015202 0ustar liggesusers#include "vctrs.h" static SEXP new_date(SEXP x); static SEXP new_datetime(SEXP x, SEXP tzone); static SEXP new_empty_datetime(SEXP tzone); static SEXP date_validate(SEXP x); static SEXP datetime_validate(SEXP x); static SEXP datetime_validate_tzone(SEXP x); static SEXP datetime_validate_type(SEXP x); static SEXP datetime_rezone(SEXP x, SEXP tzone); static SEXP tzone_get(SEXP x); static SEXP tzone_union(SEXP x_tzone, SEXP y_tzone); static bool tzone_equal(SEXP x_tzone, SEXP y_tzone); static SEXP r_as_date(SEXP x); static SEXP r_as_posixct(SEXP x, SEXP tzone); static SEXP r_as_posixlt(SEXP x, SEXP tzone); static SEXP r_date_as_character(SEXP x); static SEXP r_chr_date_as_posixct(SEXP x, SEXP tzone); static SEXP r_chr_date_as_posixlt(SEXP x, SEXP tzone); static SEXP posixlt_as_posixct_impl(SEXP x, SEXP tzone); static SEXP posixct_as_posixlt_impl(SEXP x, SEXP tzone); // ----------------------------------------------------------------------------- // ptype2 // [[ include("vctrs.h") ]] SEXP date_datetime_ptype2(SEXP x, SEXP y) { SEXP x_class = PROTECT(Rf_getAttrib(x, R_ClassSymbol)); SEXP x_first_class = STRING_ELT(x_class, 0); SEXP tzone = (x_first_class == strings_date) ? tzone_get(y) : tzone_get(x); PROTECT(tzone); SEXP out = new_empty_datetime(tzone); UNPROTECT(2); return out; } // [[ include("vctrs.h") ]] SEXP datetime_datetime_ptype2(SEXP x, SEXP y) { SEXP x_tzone = PROTECT(tzone_get(x)); SEXP y_tzone = PROTECT(tzone_get(y)); // Never allocates SEXP tzone = tzone_union(x_tzone, y_tzone); SEXP out = new_empty_datetime(tzone); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- // cast // [[ include("vctrs.h") ]] SEXP date_as_date(SEXP x) { return date_validate(x); } // [[ include("vctrs.h") ]] SEXP date_as_posixct(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); // Date -> character -> POSIXct // This is the only way to retain the same clock time SEXP out = PROTECT(r_date_as_character(x)); out = PROTECT(r_chr_date_as_posixct(out, tzone)); UNPROTECT(3); return out; } // [[ include("vctrs.h") ]] SEXP date_as_posixlt(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); // Date -> character -> POSIXlt // This is the only way to retain the same clock time SEXP out = PROTECT(r_date_as_character(x)); out = PROTECT(r_chr_date_as_posixlt(out, tzone)); UNPROTECT(3); return out; } static SEXP posixt_as_date(SEXP ct, SEXP lt, bool* lossy); // [[ include("vctrs.h") ]] SEXP posixct_as_date(SEXP x, bool* lossy) { SEXP ct = PROTECT(datetime_validate(x)); SEXP tzone = PROTECT(tzone_get(ct)); SEXP lt = PROTECT(posixct_as_posixlt_impl(ct, tzone)); SEXP out = posixt_as_date(ct, lt, lossy); UNPROTECT(3); return out; } // [[ include("vctrs.h") ]] SEXP posixlt_as_date(SEXP x, bool* lossy) { SEXP lt = x; SEXP tzone = PROTECT(tzone_get(lt)); SEXP ct = PROTECT(posixlt_as_posixct_impl(lt, tzone)); SEXP out = posixt_as_date(ct, lt, lossy); UNPROTECT(2); return out; } // POSIXct is required for lossy checking. // POSIXlt is required for converting to Date. // `as.Date.POSIXct()` must go through `as.POSIXlt()`, so the POSIXct // time alone is not enough. static SEXP posixt_as_date(SEXP ct, SEXP lt, bool* lossy) { ct = PROTECT(datetime_validate(ct)); const double* p_ct = REAL(ct); SEXP out = PROTECT(r_as_date(lt)); SEXP roundtrip = PROTECT(date_as_posixct(out, ct)); const double* p_roundtrip = REAL(roundtrip); const R_len_t size = Rf_length(out); for (R_len_t i = 0; i < size; ++i) { const double ct_elt = p_ct[i]; // `NaN` and `NA` always convert without issue if (isnan(ct_elt)) { continue; } const double roundtrip_elt = p_roundtrip[i]; if (ct_elt != roundtrip_elt) { *lossy = true; UNPROTECT(3); return R_NilValue; } } UNPROTECT(3); return out; } static SEXP posixct_as_posixct_impl(SEXP x, SEXP tzone); // [[ include("vctrs.h") ]] SEXP posixct_as_posixct(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); SEXP out = posixct_as_posixct_impl(x, tzone); UNPROTECT(1); return out; } static SEXP posixct_as_posixct_impl(SEXP x, SEXP tzone) { x = PROTECT(datetime_validate(x)); SEXP out = datetime_rezone(x, tzone); UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP posixlt_as_posixct(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); SEXP out = posixlt_as_posixct_impl(x, tzone); UNPROTECT(1); return out; } static SEXP posixlt_as_posixct_impl(SEXP x, SEXP tzone) { SEXP x_tzone = PROTECT(tzone_get(x)); x = PROTECT(r_as_posixct(x, x_tzone)); SEXP out = posixct_as_posixct_impl(x, tzone); UNPROTECT(2); return out; } // [[ include("vctrs.h") ]] SEXP posixct_as_posixlt(SEXP x, SEXP to) { SEXP tzone = PROTECT(tzone_get(to)); SEXP out = posixct_as_posixlt_impl(x, tzone); UNPROTECT(1); return out; } static SEXP posixct_as_posixlt_impl(SEXP x, SEXP tzone) { return r_as_posixlt(x, tzone); } // [[ include("vctrs.h") ]] SEXP posixlt_as_posixlt(SEXP x, SEXP to) { SEXP x_tzone = PROTECT(tzone_get(x)); SEXP to_tzone = PROTECT(tzone_get(to)); if (tzone_equal(x_tzone, to_tzone)) { UNPROTECT(2); return x; } SEXP out = x; // `as.POSIXlt.default()` doesn't respect `tz` so we have to do: // POSIXlt -> POSIXct -> POSIXct -> POSIXlt out = PROTECT(posixlt_as_posixct_impl(out, x_tzone)); out = PROTECT(posixct_as_posixct_impl(out, to_tzone)); out = PROTECT(posixct_as_posixlt_impl(out, to_tzone)); UNPROTECT(5); return out; } // ----------------------------------------------------------------------------- // restore // [[ include("vctrs.h") ]] SEXP vec_date_restore(SEXP x, SEXP to, const enum vctrs_ownership ownership) { SEXP out = PROTECT(vec_restore_default(x, to, ownership)); out = date_validate(out); UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP vec_posixct_restore(SEXP x, SEXP to, const enum vctrs_ownership ownership) { SEXP out = PROTECT(vec_restore_default(x, to, ownership)); out = datetime_validate(out); UNPROTECT(1); return out; } // [[ include("vctrs.h") ]] SEXP vec_posixlt_restore(SEXP x, SEXP to, const enum vctrs_ownership ownership) { SEXP out = PROTECT(vec_restore_default(x, to, ownership)); out = datetime_validate_tzone(out); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_new_date(SEXP x) { return new_date(x); } static SEXP new_date(SEXP x) { if (TYPEOF(x) != REALSXP) { Rf_errorcall(R_NilValue, "`x` must be a double vector."); } SEXP names = PROTECT(r_names(x)); SEXP out = PROTECT(r_clone_referenced(x)); r_attrib_zap_all(out); r_attrib_poke_names(out, names); r_attrib_poke_class(out, classes_date); UNPROTECT(2); return out; } // [[ register() ]] SEXP vctrs_new_datetime(SEXP x, SEXP tzone) { return new_datetime(x, tzone); } static SEXP new_datetime(SEXP x, SEXP tzone) { if (TYPEOF(x) != REALSXP) { Rf_errorcall(R_NilValue, "`x` must be a double vector."); } // Convenience special case where we allow a // null `tzone` to represent local time if (tzone == R_NilValue) { tzone = chrs_empty; } if (TYPEOF(tzone) != STRSXP) { Rf_errorcall(R_NilValue, "`tzone` must be a character vector or `NULL`."); } SEXP names = PROTECT(r_names(x)); SEXP out = PROTECT(r_clone_referenced(x)); r_attrib_zap_all(out); r_attrib_poke_names(out, names); r_attrib_poke_class(out, classes_posixct); r_attrib_poke(out, syms_tzone, tzone); UNPROTECT(2); return out; } static SEXP new_empty_datetime(SEXP tzone) { return new_datetime(r_globals.empty_dbl, tzone); } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_date_validate(SEXP x) { return date_validate(x); } // Ensure that a `Date` is internally stored as a double vector static SEXP date_validate(SEXP x) { switch (TYPEOF(x)) { case REALSXP: return x; case INTSXP: // Keeps attributes return Rf_coerceVector(x, REALSXP); default: r_stop_internal("Corrupt `Date` with unknown type %s.", Rf_type2char(TYPEOF(x))); } } // [[ register() ]] SEXP vctrs_datetime_validate(SEXP x) { return datetime_validate(x); } // Ensure that a `POSIXct` is internally stored as a double vector. // Also checks that the `tzone` attribute is non-NULL. static SEXP datetime_validate(SEXP x) { x = PROTECT(datetime_validate_tzone(x)); x = PROTECT(datetime_validate_type(x)); UNPROTECT(2); return x; } static SEXP datetime_validate_tzone(SEXP x) { SEXP tzone = Rf_getAttrib(x, syms_tzone); if (tzone != R_NilValue) { return x; } x = PROTECT(r_clone_referenced(x)); Rf_setAttrib(x, syms_tzone, chrs_empty); UNPROTECT(1); return x; } static SEXP datetime_validate_type(SEXP x) { switch (TYPEOF(x)) { case REALSXP: return x; case INTSXP: // Keeps attributes return Rf_coerceVector(x, REALSXP); default: r_stop_internal("Corrupt `POSIXct` with unknown type %s.", Rf_type2char(TYPEOF(x))); } never_reached("datetime_validate_type"); } // ----------------------------------------------------------------------------- // Same underlying numeric representation, different `tzone` static SEXP datetime_rezone(SEXP x, SEXP tzone) { SEXP x_tzone = PROTECT(tzone_get(x)); if (tzone_equal(x_tzone, tzone)) { UNPROTECT(1); return x; } SEXP out = PROTECT(r_clone_referenced(x)); Rf_setAttrib(out, syms_tzone, tzone); UNPROTECT(2); return out; } // ----------------------------------------------------------------------------- // Time zone utilities static SEXP tzone_get(SEXP x) { SEXP tzone = PROTECT(Rf_getAttrib(x, syms_tzone)); if (tzone == R_NilValue) { UNPROTECT(1); return chrs_empty; } R_len_t size = Rf_length(tzone); if (size == 1) { UNPROTECT(1); return tzone; } if (size == 0) { Rf_errorcall(R_NilValue, "Corrupt datetime with 0-length `tzone` attribute"); } // If there are multiple, only take the first SEXP out = PROTECT(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(out, 0, STRING_ELT(tzone, 0)); UNPROTECT(2); return out; } // `tzone_get()` is guaranteed to return 1 element static inline bool tzone_is_local(SEXP tzone) { return STRING_ELT(tzone, 0) == strings_empty; } static SEXP tzone_union(SEXP x_tzone, SEXP y_tzone) { if (tzone_is_local(x_tzone)) { return y_tzone; } else { return x_tzone; } } // `tzone_get()` is guaranteed to return 1 element static bool tzone_equal(SEXP x_tzone, SEXP y_tzone) { // Equal objects? if (x_tzone == y_tzone) { return true; } // Equal CHARSXPs? SEXP x_string = STRING_ELT(x_tzone, 0); SEXP y_string = STRING_ELT(y_tzone, 0); if (x_string == y_string) { return true; } // Equal C char? const char* x_tzone_char = CHAR(x_string); const char* y_tzone_char = CHAR(y_string); return !strcmp(x_tzone_char, y_tzone_char); } // ----------------------------------------------------------------------------- static SEXP syms_tz = NULL; static SEXP syms_as_date = NULL; static SEXP fns_as_date = NULL; static SEXP r_as_date(SEXP x) { return vctrs_dispatch1(syms_as_date, fns_as_date, syms_x, x); } static SEXP syms_as_posixct = NULL; static SEXP fns_as_posixct = NULL; static SEXP r_as_posixct(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_as_posixct, fns_as_posixct, syms_x, x, syms_tz, tzone); } static SEXP syms_as_posixlt = NULL; static SEXP fns_as_posixlt = NULL; static SEXP r_as_posixlt(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_as_posixlt, fns_as_posixlt, syms_x, x, syms_tz, tzone); } static SEXP syms_date_as_character = NULL; static SEXP fns_date_as_character = NULL; static SEXP r_date_as_character(SEXP x) { return vctrs_dispatch1(syms_date_as_character, fns_date_as_character, syms_x, x); } static SEXP syms_chr_date_as_posixct = NULL; static SEXP fns_chr_date_as_posixct = NULL; static SEXP r_chr_date_as_posixct(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_chr_date_as_posixct, fns_chr_date_as_posixct, syms_x, x, syms_tzone, tzone); } static SEXP syms_chr_date_as_posixlt = NULL; static SEXP fns_chr_date_as_posixlt = NULL; static SEXP r_chr_date_as_posixlt(SEXP x, SEXP tzone) { return vctrs_dispatch2(syms_chr_date_as_posixlt, fns_chr_date_as_posixlt, syms_x, x, syms_tzone, tzone); } // ----------------------------------------------------------------------------- void vctrs_init_type_date_time(SEXP ns) { syms_tz = Rf_install("tz"); syms_as_date = Rf_install("as.Date"); syms_as_posixct = Rf_install("as.POSIXct"); syms_as_posixlt = Rf_install("as.POSIXlt"); syms_date_as_character = Rf_install("date_as_character"); syms_chr_date_as_posixct = Rf_install("chr_date_as_posixct"); syms_chr_date_as_posixlt = Rf_install("chr_date_as_posixlt"); fns_as_date = r_env_get(R_BaseEnv, syms_as_date); fns_as_posixct = r_env_get(R_BaseEnv, syms_as_posixct); fns_as_posixlt = r_env_get(R_BaseEnv, syms_as_posixlt); fns_date_as_character = r_env_get(ns, syms_date_as_character); fns_chr_date_as_posixct = r_env_get(ns, syms_chr_date_as_posixct); fns_chr_date_as_posixlt = r_env_get(ns, syms_chr_date_as_posixlt); } vctrs/src/vec-bool.h0000644000176200001440000000506015060045711014061 0ustar liggesusers#ifndef VCTRS_VEC_BOOL_H #define VCTRS_VEC_BOOL_H #include struct r_vector_bool { r_obj* shelter; r_obj* data; bool* v_data; r_ssize n; }; static inline struct r_vector_bool* r_new_vector_bool(r_ssize n) { r_obj* shelter = KEEP(r_alloc_list(2)); r_obj* vec = r_alloc_raw(sizeof(struct r_vector_bool)); r_list_poke(shelter, 0, vec); r_obj* data = r_alloc_raw(n * sizeof(bool)); r_list_poke(shelter, 1, data); struct r_vector_bool* p_vec = r_raw_begin(vec); p_vec->shelter = shelter; p_vec->data = data; p_vec->v_data = r_raw_begin(data); p_vec->n = n; FREE(1); return p_vec; } static inline bool* r_vector_bool_begin(struct r_vector_bool* p_vec) { return p_vec->v_data; } static inline const bool* r_vector_bool_cbegin(struct r_vector_bool* p_vec) { return (const bool*) p_vec->v_data; } static inline r_ssize r_vector_bool_length(struct r_vector_bool* p_vec) { return p_vec->n; } static inline void p_bool_fill(bool* v_x, r_ssize size, bool value) { for (r_ssize i = 0; i < size; ++i) { v_x[i] = value; } } static inline void r_vector_bool_fill(struct r_vector_bool* p_vec, bool value) { bool* v_data = r_vector_bool_begin(p_vec); const r_ssize size = r_vector_bool_length(p_vec); p_bool_fill(v_data, size, value); } static inline bool p_bool_any(const bool* v_x, r_ssize size) { for (r_ssize i = 0; i < size; ++i) { if (v_x[i]) { return true; } } return false; } static inline bool r_vector_bool_any(struct r_vector_bool* p_vec) { const bool* v_data = r_vector_bool_cbegin(p_vec); const r_ssize size = r_vector_bool_length(p_vec); return p_bool_any(v_data, size); } static inline r_ssize p_bool_sum(const bool* v_x, r_ssize size) { r_ssize out = 0; for (r_ssize i = 0; i < size; ++i) { out += v_x[i]; } return out; } static inline r_ssize r_vector_bool_sum(struct r_vector_bool* p_vec) { const bool* v_data = r_vector_bool_cbegin(p_vec); const r_ssize size = r_vector_bool_length(p_vec); return p_bool_sum(v_data, size); } static inline r_obj* p_bool_which(const bool* v_x, r_ssize size) { const r_ssize out_size = p_bool_sum(v_x, size); r_obj* out = KEEP(r_alloc_integer(out_size)); int* v_out = r_int_begin(out); for (r_ssize i = 0, j = 0; i < size && j < out_size; ++i) { v_out[j] = i + 1; j += v_x[i]; } FREE(1); return out; } static inline r_obj* r_vector_bool_which(struct r_vector_bool* p_vec) { const bool* v_data = r_vector_bool_cbegin(p_vec); const r_ssize size = r_vector_bool_length(p_vec); return p_bool_which(v_data, size); } #endif vctrs/src/type-data-frame.h0000644000176200001440000000252415132161317015336 0ustar liggesusers#ifndef VCTRS_TYPE_DATA_FRAME_H #define VCTRS_TYPE_DATA_FRAME_H #include "vctrs-core.h" #include "cast.h" #include "names.h" #include "ptype2.h" r_obj* new_data_frame(r_obj* x, r_ssize n); void init_data_frame(r_obj* x, r_ssize n); void init_tibble(r_obj* x, r_ssize n); void init_compact_rownames(r_obj* x, r_ssize n); static inline r_obj* df_rownames(r_obj* x) { return r_attrib_get(x, R_RowNamesSymbol); } bool is_native_df(r_obj* x); r_obj* df_poke(r_obj* x, r_ssize i, r_obj* value); r_obj* df_poke_at(r_obj* x, r_obj* name, r_obj* value); r_obj* df_flatten(r_obj* x); r_obj* df_repair_names(r_obj* x, struct name_repair_opts* name_repair); r_obj* df_cast_opts(const struct cast_opts* opts); static inline r_obj* df_cast(r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg) { const struct cast_opts opts = { .x = x, .to = to, .p_x_arg = p_x_arg, .p_to_arg = p_to_arg }; return df_cast_opts(&opts); } enum rownames_type { ROWNAMES_TYPE_automatic, ROWNAMES_TYPE_automatic_compact, ROWNAMES_TYPE_identifiers }; enum rownames_type rownames_type(r_obj* rn); r_ssize rownames_size(r_obj* rn); r_obj* df_ptype2( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ); #endif vctrs/src/names.c0000644000176200001440000006540215157322033013461 0ustar liggesusers#include #include "vctrs.h" #include "type-data-frame.h" #include "decl/names-decl.h" // 3 leading '.' + 1 trailing '\0' + 24 characters #define MAX_IOTA_SIZE 28 r_obj* vec_as_names(r_obj* names, const struct name_repair_opts* opts) { if (!opts) { return names; } switch (opts->type) { case NAME_REPAIR_none: return names; case NAME_REPAIR_minimal: return ffi_as_minimal_names(names); case NAME_REPAIR_unique: return vec_as_unique_names(names, opts->quiet); case NAME_REPAIR_universal: return vec_as_universal_names(names, opts->quiet); case NAME_REPAIR_check_unique: return check_unique_names(names, opts); case NAME_REPAIR_custom: return vec_as_custom_names(names, opts); } r_stop_unreachable(); } r_obj* ffi_vec_as_names(r_obj* names, r_obj* repair, r_obj* ffi_quiet, r_obj* frame) { if (!r_is_bool(ffi_quiet)) { r_abort("`quiet` must a boolean value."); } bool quiet = r_lgl_get(ffi_quiet, 0); struct r_lazy call = (struct r_lazy) { .x = r_syms.call, .env = frame }; struct r_lazy repair_arg = { .x = syms.repair_arg, .env = frame }; struct name_repair_opts repair_opts = new_name_repair_opts(repair, repair_arg, quiet, call); KEEP(repair_opts.shelter); r_obj* out = vec_as_names(names, &repair_opts); FREE(1); return out; } struct repair_error_info { r_obj* shelter; r_obj* repair_arg; r_obj* call; r_obj* input_error_repair_arg; r_obj* input_error_call; }; struct repair_error_info new_repair_error_info(struct name_repair_opts* p_opts) { struct repair_error_info out; out.shelter = r_new_list(4); KEEP(out.shelter); out.repair_arg = r_lazy_eval(p_opts->name_repair_arg); r_list_poke(out.shelter, 0, out.repair_arg); out.call = r_lazy_eval(p_opts->call); r_list_poke(out.shelter, 1, out.call); // If this is NULL, the `repair` value has been hard-coded by the // frontend. Input errors are internal, and we provide no // recommendation to fix user errors by providing a different value // for `repair`. if (out.repair_arg == r_null) { out.input_error_repair_arg = chrs.repair; r_list_poke(out.shelter, 2, out.input_error_repair_arg); out.input_error_call = r_call(r_sym("vec_as_names")); r_list_poke(out.shelter, 3, out.input_error_call); } else { out.input_error_repair_arg = r_lazy_eval(p_opts->name_repair_arg); r_list_poke(out.shelter, 2, out.input_error_repair_arg); out.input_error_call = r_lazy_eval(p_opts->call); r_list_poke(out.shelter, 3, out.input_error_call); } FREE(1); return out; } r_obj* vec_as_universal_names(r_obj* names, bool quiet) { r_obj* quiet_obj = KEEP(r_lgl(quiet)); r_obj* out = vctrs_dispatch2(syms_as_universal_names, fns_as_universal_names, syms_names, names, syms_quiet, quiet_obj); FREE(1); return out; } static r_obj* check_unique_names(r_obj* names, const struct name_repair_opts* opts) { r_obj* ffi_arg = KEEP(r_lazy_eval(opts->name_repair_arg)); r_obj* ffi_call = KEEP(r_lazy_eval(opts->call)); r_obj* out = KEEP(vctrs_dispatch3(syms_check_unique_names, fns_check_unique_names, syms_names, names, r_syms.arg, ffi_arg, syms_call, ffi_call)); // Restore visibility r_eval(r_null, r_envs.empty); FREE(3); return out; } r_obj* vec_as_custom_names(r_obj* names, const struct name_repair_opts* opts) { names = KEEP(ffi_as_minimal_names(names)); // Don't use vctrs dispatch utils because we match argument positionally r_obj* call = KEEP(r_call2(syms_repair, syms_names)); r_obj* mask = KEEP(r_alloc_empty_environment(R_GlobalEnv)); r_env_bind(mask, syms_repair, opts->fn); r_env_bind(mask, syms_names, names); r_obj* out = KEEP(r_eval(call, mask)); vec_validate_minimal_names(out, r_length(names), opts->call); FREE(4); return out; } static r_obj* vec_names_impl(r_obj* x, bool proxy) { bool has_class = r_is_object(x); if (has_class && r_inherits(x, "data.frame")) { // Only return row names if they are character. Data frames with // automatic row names are treated as unnamed. r_obj* rn = df_rownames(x); if (rownames_type(rn) == ROWNAMES_TYPE_identifiers) { return rn; } else { return r_null; } } if (vec_bare_dim(x) == r_null) { if (!proxy && has_class) { return vctrs_dispatch1(syms_names, fns_names, syms_x, x); } else { return r_names(x); } } r_obj* dimnames = KEEP(r_attrib_get(x, r_syms.dim_names)); if (dimnames == r_null || r_length(dimnames) < 1) { FREE(1); return r_null; } r_obj* out = r_list_get(dimnames, 0); FREE(1); return out; } // [[ register() ]] r_obj* vec_names(r_obj* x) { return vec_names_impl(x, false); } r_obj* vec_proxy_names(r_obj* x) { return vec_names_impl(x, true); } r_obj* vec_names2(r_obj* x) { r_obj* names = vec_names(x); if (names == r_null) { return r_alloc_character(vec_size(x)); } else { return names; } } r_obj* ffi_as_minimal_names(r_obj* names) { if (r_typeof(names) != R_TYPE_character) { r_abort("`names` must be a character vector"); } r_ssize i = 0; r_ssize n = r_length(names); r_obj* const * v_names = r_chr_cbegin(names); for (; i < n; ++i) { if (v_names[i] == r_globals.na_str) { break; } } if (i == n) { return names; } names = KEEP(r_clone(names)); for (; i < n; ++i) { if (v_names[i] == r_globals.na_str) { r_chr_poke(names, i, strings_empty); } } FREE(1); return names; } r_obj* ffi_minimal_names(r_obj* x) { r_obj* names = KEEP(vec_names(x)); if (names == r_null) { names = r_alloc_character(vec_size(x)); } else { names = ffi_as_minimal_names(names); } FREE(1); return names; } // From dictionary.c r_obj* vctrs_duplicated(r_obj* x); // [[ include("vctrs.h") ]] r_obj* vec_as_unique_names(r_obj* names, bool quiet) { if (is_unique_names(names) && !any_has_suffix(names)) { return names; } else { return(as_unique_names_impl(names, quiet)); } } // [[ include("vctrs.h") ]] bool is_unique_names(r_obj* names) { if (r_typeof(names) != R_TYPE_character) { r_abort("`names` must be a character vector"); } r_ssize n = r_length(names); r_obj* const * v_names = r_chr_cbegin(names); if (duplicated_any(names)) { return false; } for (r_ssize i = 0; i < n; ++i) { if (needs_suffix(v_names[i])) { return false; } } return true; } bool any_has_suffix(r_obj* names) { r_ssize n = r_length(names); r_obj* const * v_names = r_chr_cbegin(names); for (r_ssize i = 0; i < n; ++i) { if (suffix_pos(r_str_c_string(v_names[i])) >= 0) { return true; } } return false; } r_obj* as_unique_names_impl(r_obj* names, bool quiet) { r_ssize n = r_length(names); r_obj* new_names = KEEP(r_clone(names)); r_obj* const * v_new_names = r_chr_cbegin(new_names); for (r_ssize i = 0; i < n; ++i) { r_obj* elt = v_new_names[i]; // Set `NA` and dots values to "" so they get replaced by `...n` // later on if (needs_suffix(elt)) { elt = strings_empty; r_chr_poke(new_names, i, elt); continue; } // Strip `...n` suffixes const char* nm = r_str_c_string(elt); int pos = suffix_pos(nm); if (pos >= 0) { elt = Rf_mkCharLenCE(nm, pos, Rf_getCharCE(elt)); r_chr_poke(new_names, i, elt); continue; } } // Append all duplicates with a suffix r_obj* dups = KEEP(vctrs_duplicated(new_names)); const int* dups_ptr = r_lgl_cbegin(dups); for (r_ssize i = 0; i < n; ++i) { r_obj* elt = v_new_names[i]; if (elt != strings_empty && !dups_ptr[i]) { continue; } const char* name = r_str_c_string(elt); int size = strlen(name); int buf_size = size + MAX_IOTA_SIZE; R_CheckStack2(buf_size); char buf[buf_size]; buf[0] = '\0'; r_memcpy(buf, name, size); int remaining = buf_size - size; int needed = snprintf(buf + size, remaining, "...%d", (int) i + 1); if (needed >= remaining) { stop_large_name(); } r_chr_poke(new_names, i, Rf_mkCharLenCE(buf, size + needed, Rf_getCharCE(elt))); } if (!quiet) { describe_repair(names, new_names); } FREE(2); return new_names; } r_obj* vctrs_as_unique_names(r_obj* names, r_obj* quiet) { r_obj* out = KEEP(vec_as_unique_names(names, r_lgl_get(quiet, 0))); FREE(1); return out; } r_obj* vctrs_is_unique_names(r_obj* names) { bool out = is_unique_names(names); return r_lgl(out); } static bool is_dotdotint(const char* name) { int n = strlen(name); if (n < 3) { return false; } if (name[0] != '.' || name[1] != '.') { return false; } if (name[2] == '.') { name += 3; } else { name += 2; } return (bool) strtol(name, NULL, 10); } static ptrdiff_t suffix_pos(const char* name) { int n = strlen(name); const char* suffix_end = NULL; int in_dots = 0; bool in_digits = false; for (const char* ptr = name + n - 1; ptr >= name; --ptr) { char c = *ptr; if (in_digits) { if (c == '.') { in_digits = false; in_dots = 1; continue; } if (isdigit(c)) { continue; } goto done; } switch (in_dots) { case 0: if (isdigit(c)) { in_digits = true; continue; } goto done; case 1: case 2: if (c == '.') { ++in_dots; continue; } goto done; case 3: suffix_end = ptr + 1; if (isdigit(c)) { in_dots = 0; in_digits = true; continue; } goto done; default: r_stop_internal("Unexpected state."); }} done: if (suffix_end) { return suffix_end - name; } else { return -1; } } static void stop_large_name(void) { r_abort("Can't tidy up name because it is too large."); } static bool needs_suffix(r_obj* str) { return str == r_globals.na_str || str == strings_dots || str == strings_empty || is_dotdotint(r_str_c_string(str)); } r_obj* ffi_unique_names(r_obj* x, r_obj* quiet) { return vec_unique_names(x, LOGICAL(quiet)[0]); } r_obj* vec_unique_names(r_obj* x, bool quiet) { r_obj* names = KEEP(vec_names(x)); r_obj* out = vec_unique_names_impl(names, vec_size(x), quiet); FREE(1); return out; } r_obj* vec_unique_colnames(r_obj* x, bool quiet) { r_obj* names = KEEP(colnames(x)); r_obj* out = vec_unique_names_impl(names, Rf_ncols(x), quiet); FREE(1); return out; } static r_obj* vec_unique_names_impl(r_obj* names, r_ssize n, bool quiet) { r_obj* out; if (names == r_null) { out = KEEP(names_iota(n)); if (!quiet) { describe_repair(names, out); } } else { out = KEEP(vec_as_unique_names(names, quiet)); } FREE(1); return(out); } static r_obj* names_iota(r_ssize n) { char buf[MAX_IOTA_SIZE]; r_obj* nms = r_chr_iota(n, buf, MAX_IOTA_SIZE, "..."); if (nms == r_null) { r_abort("Too many names to repair."); } return nms; } static void describe_repair(r_obj* old_names, r_obj* new_names) { r_obj* call = KEEP(r_call3(r_sym("describe_repair"), old_names, new_names)); r_eval(call, vctrs_ns_env); // To reset visibility when called from a `.External2()` r_eval(r_null, r_envs.empty); FREE(1); } r_obj* ffi_outer_names(r_obj* names, r_obj* outer, r_obj* n) { if (names != r_null && r_typeof(names) != R_TYPE_character) { r_stop_internal("`names` must be `NULL` or a string."); } if (!r_is_number(n)) { r_stop_internal("`n` must be a single integer."); } if (outer != r_null) { outer = r_chr_get(outer, 0); } return outer_names(names, outer, r_int_get(n, 0)); } r_obj* outer_names(r_obj* names, r_obj* outer, r_ssize n) { if (outer == r_null) { return names; } if (r_typeof(outer) != R_TYPE_string) { r_stop_internal("`outer` must be a scalar string."); } if (outer == strings_empty || outer == r_globals.na_str) { return names; } if (r_is_empty_names(names)) { if (n == 1) { return r_str_as_character(outer); } else { return r_seq_chr(r_str_c_string(outer), n); } } else { return r_chr_paste_prefix(names, r_str_c_string(outer), ".."); } } r_obj* ffi_apply_name_spec(r_obj* name_spec, r_obj* outer, r_obj* inner, r_obj* n) { return apply_name_spec(name_spec, r_chr_get(outer, 0), inner, r_int_get(n, 0)); } // Applies a `name_spec` and returns one of the following: // - `NULL` // - A character vector of length 1 // - A character vector of length `n` // // In the case of: // // ```r // list_combine( // list(outer = c(a = 1)), // list(1:2), // name_spec = "{outer}_{inner}" // ) // ``` // // The `n` will be 2 but `inner` will only be length 1, "a". We apply `name_spec` // to get `"outer_a"` and return that, expecting that the caller recycles that // explicitly if required, or uses something like `chr_assign()` which can // efficiently recycle internally. r_obj* apply_name_spec(r_obj* name_spec, r_obj* outer, r_obj* inner, r_ssize n) { if (r_inherits(name_spec, "rlang_zap")) { return r_null; } if (name_spec_is_inner(name_spec)) { // Ignore `outer` entirely return inner; } if (outer == r_null) { // `outer` doesn't exist, no need to apply `name_spec` return inner; } if (r_typeof(outer) != R_TYPE_string) { r_stop_internal("`outer` must be a scalar string."); } if (outer == strings_empty || outer == r_globals.na_str) { if (inner == r_null) { return chrs_empty; } else { return inner; } } if (r_is_empty_names(inner)) { if (n == 0) { return r_globals.empty_chr; } if (n == 1) { return r_str_as_character(outer); } inner = KEEP(r_seq(1, n + 1)); } else { inner = KEEP(inner); } switch (r_typeof(name_spec)) { case R_TYPE_closure: break; case R_TYPE_character: name_spec = glue_as_name_spec(name_spec); break; default: name_spec = r_as_function(name_spec, ".name_spec"); break; case R_TYPE_null: { const char* reason; if (n > 1) { reason = "a vector of length > 1"; } else { reason = "a named vector"; } r_abort("Can't merge the outer name `%s` with %s.\n" "Please supply a `.name_spec` specification.", r_str_c_string(outer), reason); }} KEEP(name_spec); r_obj* outer_chr = KEEP(r_str_as_character(outer)); r_obj* out = KEEP(vctrs_dispatch2(syms_dot_name_spec, name_spec, syms_outer, outer_chr, syms_inner, inner)); if (out != r_null) { if (r_typeof(out) != R_TYPE_character) { r_abort("`.name_spec` must return a character vector."); } vec_check_recyclable(out, n, VCTRS_ALLOW_NULL_no, vec_args.empty, r_lazy_null); } FREE(4); return out; } static r_obj* glue_as_name_spec(r_obj* spec) { if (!r_is_string(spec)) { r_abort("Glue specification in `.name_spec` must be a single string."); } return vctrs_dispatch1(syms_glue_as_name_spec, fns_glue_as_name_spec, syms_internal_spec, spec); } bool name_spec_is_inner(r_obj* name_spec) { if (!r_is_string(name_spec)) { return false; } const char* name_spec_c_string = r_chr_get_c_string(name_spec, 0); return !strcmp(name_spec_c_string, "inner"); } #define VCTRS_PASTE_BUFFER_MAX_SIZE 4096 char vctrs_paste_buffer[VCTRS_PASTE_BUFFER_MAX_SIZE]; r_obj* r_chr_paste_prefix(r_obj* names, const char* prefix, const char* sep) { int n_prot = 0; names = KEEP_N(r_clone(names), &n_prot); r_ssize n = r_length(names); int outer_len = strlen(prefix); int names_len = r_chr_max_len(names); int sep_len = strlen(sep); int total_len = outer_len + names_len + sep_len + 1; char* buf = vctrs_paste_buffer; if (total_len > VCTRS_PASTE_BUFFER_MAX_SIZE) { r_obj* buf_box = KEEP_N( r_alloc_raw(total_len * sizeof(char)), &n_prot ); buf = (char*) RAW(buf_box); } buf[total_len - 1] = '\0'; char* bufp = buf; r_memcpy(bufp, prefix, outer_len); bufp += outer_len; for (int i = 0; i < sep_len; ++i) { *bufp++ = sep[i]; } r_obj* const* p_names = r_chr_cbegin(names); for (r_ssize i = 0; i < n; ++i) { const char* inner = r_str_c_string(p_names[i]); int inner_n = strlen(inner); r_memcpy(bufp, inner, inner_n); bufp[inner_n] = '\0'; r_chr_poke(names, i, r_str(buf)); } FREE(n_prot); return names; } r_obj* ffi_chr_paste_prefix(r_obj* names, r_obj* prefix, r_obj* sep) { return r_chr_paste_prefix(names, r_chr_get_c_string(prefix, 0), r_chr_get_c_string(sep, 0)); } r_obj* r_seq_chr(const char* prefix, r_ssize n) { int total_len = 24 + strlen(prefix) + 1; R_CheckStack2(total_len); char buf[total_len]; return r_chr_iota(n, buf, total_len, prefix); } static r_obj* set_rownames_dispatch(r_obj* x, r_obj* names) { return vctrs_dispatch2(syms_set_rownames_dispatch, fns_set_rownames_dispatch, syms_x, x, syms_names, names); } static r_obj* set_names_dispatch(r_obj* x, r_obj* names) { return vctrs_dispatch2(syms_set_names_dispatch, fns_set_names_dispatch, syms_x, x, syms_names, names); } static void check_names(r_obj* x, r_obj* names) { if (names == r_null) { return; } if (r_typeof(names) != R_TYPE_character) { r_abort( "`names` must be a character vector, not a %s.", r_type_as_c_string(r_typeof(names)) ); } r_ssize x_size = vec_size(x); r_ssize names_size = vec_size(names); if (x_size != names_size) { r_abort( "The size of `names`, %i, must be the same as the size of `x`, %i.", names_size, x_size ); } } r_obj* vec_set_rownames(r_obj* x, r_obj* names, bool proxy, const enum vctrs_ownership ownership) { if (!proxy && r_is_object(x)) { return set_rownames_dispatch(x, names); } r_obj* dim_names = r_attrib_get(x, r_syms.dim_names); // Early exit when no new row names and no existing row names if (names == r_null) { if (dim_names == r_null || r_list_get(dim_names, 0) == r_null) { return x; } } // Okay, now protect `dim_names` KEEP(dim_names); x = KEEP(vec_clone_referenced(x, ownership)); if (dim_names == r_null) { dim_names = KEEP(r_alloc_list(vec_dim_n(x))); } else { // Also clone attribute dim_names = KEEP(r_clone(dim_names)); } r_list_poke(dim_names, 0, names); r_attrib_poke(x, r_syms.dim_names, dim_names); FREE(3); return x; } r_obj* vec_set_df_rownames(r_obj* x, r_obj* names, bool proxy, const enum vctrs_ownership ownership) { if (names == r_null) { if (rownames_type(df_rownames(x)) != ROWNAMES_TYPE_identifiers) { return(x); } x = KEEP(vec_clone_referenced(x, ownership)); init_compact_rownames(x, vec_size(x)); FREE(1); return x; } // Repair row names silently if (!proxy) { names = vec_as_names(names, p_unique_repair_silent_opts); } KEEP(names); x = KEEP(vec_clone_referenced(x, ownership)); r_attrib_poke(x, r_syms.row_names, names); FREE(2); return x; } // FIXME: Do we need to get the vec_proxy() and only fall back if it doesn't // exist? See #526 and #531 for discussion and the related issue. r_obj* vec_set_names_impl(r_obj* x, r_obj* names, bool proxy, const enum vctrs_ownership ownership) { check_names(x, names); if (is_data_frame(x)) { return vec_set_df_rownames(x, names, proxy, ownership); } if (has_dim(x)) { return vec_set_rownames(x, names, proxy, ownership); } if (!proxy && r_is_object(x)) { return set_names_dispatch(x, names); } // Early exit if no new names and no existing names if (names == r_null && r_attrib_get(x, r_syms.names) == r_null) { return x; } switch (ownership) { case VCTRS_OWNERSHIP_foreign: { // We likely need to clone, but to do this we will use `names<-` // which can perform a cheaper ALTREP shallow duplication x = KEEP(set_names_dispatch(x, names)); break; } case VCTRS_OWNERSHIP_shallow: case VCTRS_OWNERSHIP_deep: { // This ends up skipping the cloning altogether x = KEEP(vec_clone_referenced(x, ownership)); r_attrib_poke(x, r_syms.names, names); break; } default: r_stop_unreachable(); } FREE(1); return x; } r_obj* vec_set_names(r_obj* x, r_obj* names, const enum vctrs_ownership ownership) { return vec_set_names_impl(x, names, false, ownership); } r_obj* vec_proxy_set_names(r_obj* x, r_obj* names, const enum vctrs_ownership ownership) { return vec_set_names_impl(x, names, true, ownership); } r_obj* ffi_vec_set_names(r_obj* x, r_obj* names) { // Comes from the R side, so `VCTRS_OWNERSHIP_foreign` return vec_set_names(x, names, VCTRS_OWNERSHIP_foreign); } r_obj* vctrs_validate_name_repair_arg(r_obj* arg) { struct name_repair_opts opts = new_name_repair_opts(arg, r_lazy_null, true, r_lazy_null); if (opts.type == NAME_REPAIR_custom) { return opts.fn; } else if (r_length(arg) != 1) { return r_str_as_character(r_str(name_repair_arg_as_c_string(opts.type))); } else { return arg; } } void stop_name_repair(struct name_repair_opts* p_opts) { struct repair_error_info info = new_repair_error_info(p_opts); KEEP(info.shelter); r_abort_call(info.input_error_call, "%s must be a string or a function. See `?vctrs::vec_as_names`.", r_format_error_arg(info.input_error_repair_arg)); } struct name_repair_opts new_name_repair_opts(r_obj* name_repair, struct r_lazy name_repair_arg, bool quiet, struct r_lazy call) { struct name_repair_opts opts = { .shelter = r_null, .type = 0, .fn = r_null, .name_repair_arg = name_repair_arg, .quiet = quiet, .call = call }; switch (r_typeof(name_repair)) { case R_TYPE_character: { if (!r_length(name_repair)) { stop_name_repair(&opts); } r_obj* c = r_chr_get(name_repair, 0); if (c == strings_none) { opts.type = NAME_REPAIR_none; } else if (c == strings_minimal) { opts.type = NAME_REPAIR_minimal; } else if (c == strings_unique) { opts.type = NAME_REPAIR_unique; } else if (c == strings_universal) { opts.type = NAME_REPAIR_universal; } else if (c == strings_check_unique) { opts.type = NAME_REPAIR_check_unique; } else if (c == strings_unique_quiet) { opts.type = NAME_REPAIR_unique; opts.quiet = true; } else if (c == strings_universal_quiet) { opts.type = NAME_REPAIR_universal; opts.quiet = true; } else { struct repair_error_info info = new_repair_error_info(&opts); KEEP(info.shelter); r_abort_call(info.input_error_call, "%s can't be \"%s\". See `?vctrs::vec_as_names`.", r_format_error_arg(info.input_error_repair_arg), r_str_c_string(c)); } return opts; } case R_TYPE_call: opts.fn = r_as_function(name_repair, ".name_repair"); opts.shelter = opts.fn; opts.type = NAME_REPAIR_custom; return opts; case R_TYPE_closure: opts.fn = name_repair; opts.type = NAME_REPAIR_custom; return opts; default: stop_name_repair(&opts); } r_stop_unreachable(); } const char* name_repair_arg_as_c_string(enum name_repair_type type) { switch (type) { case NAME_REPAIR_none: return "none"; case NAME_REPAIR_minimal: return "minimal"; case NAME_REPAIR_unique: return "unique"; case NAME_REPAIR_universal: return "universal"; case NAME_REPAIR_check_unique: return "check_unique"; case NAME_REPAIR_custom: return "custom"; } r_stop_unreachable(); } static void vec_validate_minimal_names(r_obj* names, r_ssize n, struct r_lazy call) { if (names == r_null) { r_abort_lazy_call(call, "Names repair functions can't return `NULL`."); } if (r_typeof(names) != R_TYPE_character) { r_abort_lazy_call(call, "Names repair functions must return a character vector."); } if (n >= 0 && r_length(names) != n) { r_abort_lazy_call(call, "Repaired names have length %d instead of length %d.", r_length(names), n); } if (r_chr_has_string(names, r_globals.na_str)) { r_abort_lazy_call(call, "Names repair functions can't return `NA` values."); } } r_obj* vctrs_validate_minimal_names(r_obj* names, r_obj* n_) { r_ssize n = -1; if (r_typeof(n_) == R_TYPE_integer) { if (r_length(n_) != 1) { r_stop_internal("`n` must be a single number."); } n = r_int_get(n_, 0); } vec_validate_minimal_names(names, n, r_lazy_null); return names; } r_obj* name_spec_inner = NULL; struct name_repair_opts unique_repair_default_opts; struct name_repair_opts unique_repair_silent_opts; struct name_repair_opts no_repair_opts; void vctrs_init_names(r_obj* ns) { syms_set_rownames_dispatch = r_sym("set_rownames_dispatch"); syms_set_names_dispatch = r_sym("set_names_dispatch"); syms_as_universal_names = r_sym("as_universal_names"); syms_check_unique_names = r_sym("validate_unique"); fns_set_rownames_dispatch = r_env_get(ns, syms_set_rownames_dispatch); fns_set_names_dispatch = r_env_get(ns, syms_set_names_dispatch); fns_as_universal_names = r_env_get(ns, syms_as_universal_names); fns_check_unique_names = r_env_get(ns, syms_check_unique_names); syms_glue_as_name_spec = r_sym("glue_as_name_spec"); fns_glue_as_name_spec = r_env_get(ns, syms_glue_as_name_spec); syms_internal_spec = r_sym("_spec"); name_spec_inner = r_chr("inner"); r_preserve(name_spec_inner); unique_repair_default_opts.type = NAME_REPAIR_unique; unique_repair_default_opts.fn = r_null; unique_repair_default_opts.quiet = false; unique_repair_silent_opts.type = NAME_REPAIR_unique; unique_repair_silent_opts.fn = r_null; unique_repair_silent_opts.quiet = true; no_repair_opts.type = NAME_REPAIR_none; no_repair_opts.fn = r_null; no_repair_opts.quiet = true; } static r_obj* syms_as_universal_names = NULL; static r_obj* syms_check_unique_names = NULL; static r_obj* syms_glue_as_name_spec = NULL; static r_obj* syms_internal_spec = NULL; static r_obj* syms_set_rownames_dispatch = NULL; static r_obj* syms_set_names_dispatch = NULL; static r_obj* fns_as_universal_names = NULL; static r_obj* fns_check_unique_names = NULL; static r_obj* fns_glue_as_name_spec = NULL; static r_obj* fns_set_rownames_dispatch = NULL; static r_obj* fns_set_names_dispatch = NULL; vctrs/src/cast-bare.h0000644000176200001440000000062614315060310014211 0ustar liggesusers#ifndef VCTRS_CAST_BARE_H #define VCTRS_CAST_BARE_H #include "vctrs-core.h" r_obj* int_as_double(r_obj* x, bool* lossy); r_obj* lgl_as_double(r_obj* x, bool* lossy); r_obj* dbl_as_integer(r_obj* x, bool* lossy); r_obj* lgl_as_integer(r_obj* x, bool* lossy); r_obj* chr_as_logical(r_obj* x, bool* lossy); r_obj* dbl_as_logical(r_obj* x, bool* lossy); r_obj* int_as_logical(r_obj* x, bool* lossy); #endif vctrs/src/altrep-rle.c0000644000176200001440000001127315113325071014417 0ustar liggesusers#include "altrep-rle.h" // Initialised at load time R_altrep_class_t altrep_rle_class; SEXP altrep_rle_is_materialized(SEXP x) { return Rf_ScalarLogical(R_altrep_data2(x) != R_NilValue); } SEXP altrep_rle_Make(SEXP input) { SEXP res = R_new_altrep(altrep_rle_class, input, R_NilValue); MARK_NOT_MUTABLE(res); return res; } // ALTREP methods ------------------- // The length of the object inline R_xlen_t altrep_rle_Length(SEXP vec) { SEXP data2 = R_altrep_data2(vec); if (data2 != R_NilValue) { return Rf_xlength(data2); } R_xlen_t sz = 0; SEXP rle = R_altrep_data1(vec); int* rle_p = INTEGER(rle); for (R_xlen_t i = 0; i < Rf_xlength(rle); ++i) { sz += rle_p[i]; } return sz; } // What gets printed when .Internal(inspect()) is used Rboolean altrep_rle_Inspect(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int)) { Rprintf("vctrs_altrep_rle (len=%" R_PRIdXLEN_T ", materialized=%s)\n", altrep_rle_Length(x), R_altrep_data2(x) != R_NilValue ? "T" : "F"); return TRUE; } // ALTSTRING methods ----------------- // the element at the index `i` SEXP altrep_rle_string_Elt(SEXP vec, R_xlen_t i) { SEXP data2 = R_altrep_data2(vec); if (data2 != R_NilValue) { return STRING_ELT(data2, i); } SEXP rle = R_altrep_data1(vec); int* rle_p = INTEGER(rle); SEXP nms = Rf_getAttrib(rle, Rf_install("names")); R_xlen_t idx = 0; while (i >= 0 && idx < Rf_xlength(rle)) { i -= rle_p[idx++]; } return STRING_ELT(nms, idx - 1); } R_xlen_t find_rle_index(int* rle_data, R_xlen_t i, R_xlen_t size) { R_xlen_t idx = 0; while (i >= 0 && idx < size) { i -= rle_data[idx++]; } return idx - 1; } // This is a simple implementation, a more complex one would produce a // altrep_rle object as well SEXP altrep_rle_Extract_subset(SEXP x, SEXP indx, SEXP call) { SEXP data2 = R_altrep_data2(x); // If the vector is already materialized, just fall back to the default // implementation if (data2 != R_NilValue) { return NULL; } SEXP data1 = R_altrep_data1(x); int* index_data = INTEGER(indx); R_xlen_t index_n = Rf_length(indx); int* rle_data = INTEGER(data1); R_xlen_t rle_n = Rf_length(data1); SEXP nms = PROTECT(Rf_getAttrib(data1, Rf_install("names"))); SEXP out = PROTECT(Rf_allocVector(STRSXP, index_n)); for (R_len_t i = 0; i < index_n; ++i) { int index_elt = index_data[i]; if (index_elt == NA_INTEGER) { SET_STRING_ELT(out, i, NA_STRING); continue; } --index_elt; R_xlen_t rle_idx = find_rle_index(rle_data, index_elt, rle_n); SET_STRING_ELT(out, i, STRING_ELT(nms, rle_idx)); } UNPROTECT(2); return out; } // --- Altvec SEXP altrep_rle_string_Materialize(SEXP vec) { SEXP data2 = R_altrep_data2(vec); if (data2 != R_NilValue) { return data2; } R_xlen_t sz = altrep_rle_Length(vec); SEXP rle = R_altrep_data1(vec); int* rle_p = INTEGER(rle); SEXP out = PROTECT(Rf_allocVector(STRSXP, sz)); R_xlen_t idx = 0; SEXP nms = Rf_getAttrib(rle, Rf_install("names")); for (R_xlen_t i = 0; i < Rf_xlength(rle); ++i) { for (R_xlen_t j = 0; j < rle_p[i]; ++j) { SET_STRING_ELT(out, idx++, STRING_ELT(nms, i)); } } UNPROTECT(1); R_set_altrep_data2(vec, out); return out; } void* altrep_rle_Dataptr(SEXP vec, Rboolean writeable) { if (writeable) { r_stop_internal("Can't get writeable `DATAPTR()` to ``"); } else { // R promises not to write to this array, but we still have to return a // `void*` pointer rather than a `const void*` pointer. `STRING_PTR()` is // non-API so we use `STRING_PTR_RO()` and cast. This is really a bad ALTREP // API. It should have been separated into `void* Dataptr()` and `const // void* Dataptr_ro()`. return (void*) STRING_PTR_RO(altrep_rle_string_Materialize(vec)); } } const void* altrep_rle_Dataptr_or_null(SEXP vec) { SEXP data2 = R_altrep_data2(vec); if (data2 == R_NilValue) { return NULL; } else { return r_chr_cbegin(data2); } } void vctrs_init_altrep_rle(DllInfo* dll) { altrep_rle_class = R_make_altstring_class("altrep_rle", "vctrs", dll); // altrep R_set_altrep_Length_method(altrep_rle_class, altrep_rle_Length); R_set_altrep_Inspect_method(altrep_rle_class, altrep_rle_Inspect); // altvec R_set_altvec_Dataptr_method(altrep_rle_class, altrep_rle_Dataptr); R_set_altvec_Dataptr_or_null_method(altrep_rle_class, altrep_rle_Dataptr_or_null); R_set_altvec_Extract_subset_method(altrep_rle_class, altrep_rle_Extract_subset); // altstring R_set_altstring_Elt_method(altrep_rle_class, altrep_rle_string_Elt); } vctrs/src/proxy.c0000644000176200001440000001725715121047364013546 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/proxy-decl.h" r_obj* vec_proxy(r_obj* x) { return vec_proxy_2(x, false); } r_obj* vec_proxy_recurse(r_obj* x) { return vec_proxy_2(x, true); } static inline r_obj* vec_proxy_2(r_obj* x, bool recurse) { switch (vec_typeof(x)) { case VCTRS_TYPE_s3: { r_obj* x_proxy_method = KEEP(vec_proxy_method(x)); r_obj* out = KEEP(vec_proxy_invoke(x, x_proxy_method)); if (recurse && is_data_frame(out)) { out = df_proxy_recurse(out); } FREE(2); return out; } // Avoid `KEEP()` in the most common paths (data frames and unclassed atomics) case VCTRS_TYPE_dataframe: { return recurse ? df_proxy_recurse(x) : x; } default: return x; } } // Recurse into data frames static r_obj* df_proxy_recurse(r_obj* x) { r_obj* out = KEEP(r_clone(x)); r_ssize n = r_length(out); r_obj* const * v_out = r_list_cbegin(out); for (r_ssize i = 0; i < n; ++i) { r_list_poke(out, i, vec_proxy_recurse(v_out[i])); } FREE(1); return out; } // [[ register() ]] r_obj* vec_proxy_equal(r_obj* x) { r_obj* out = KEEP(vec_proxy_equal_impl(x)); if (is_data_frame(out)) { // Automatically proxy df-proxies recursively. // Also flattens and unwraps them (#1537, #1664). out = df_proxy(out, VCTRS_PROXY_KIND_equal); } FREE(1); return out; } // [[ register() ]] r_obj* vec_proxy_compare(r_obj* x) { r_obj* out = KEEP(vec_proxy_compare_impl(x)); if (is_data_frame(out)) { // Automatically proxy df-proxies recursively. // Also flattens and unwraps them (#1537, #1664). out = df_proxy(out, VCTRS_PROXY_KIND_compare); } FREE(1); return out; } // [[ register() ]] r_obj* vec_proxy_order(r_obj* x) { r_obj* out = KEEP(vec_proxy_order_impl(x)); if (is_data_frame(out)) { // Automatically proxy df-proxies recursively. // Also flattens and unwraps them (#1537, #1664). out = df_proxy(out, VCTRS_PROXY_KIND_order); } FREE(1); return out; } // Non-recursive variants called by the fallback path to ensure we only // fallback on the container itself (like a df or rcrd) and not its elements // (like columns or fields) #define VEC_PROXY_KIND_IMPL(METHOD, INVOKE) do { \ r_obj* method = KEEP(METHOD(x)); \ r_obj* out = INVOKE(x, method); \ FREE(1); \ return out; \ } while (0) \ static inline r_obj* vec_proxy_equal_impl(r_obj* x) { VEC_PROXY_KIND_IMPL(vec_proxy_equal_method, vec_proxy_equal_invoke); } static inline r_obj* vec_proxy_compare_impl(r_obj* x) { VEC_PROXY_KIND_IMPL(vec_proxy_compare_method, vec_proxy_compare_invoke); } static inline r_obj* vec_proxy_order_impl(r_obj* x) { VEC_PROXY_KIND_IMPL(vec_proxy_order_method, vec_proxy_order_invoke); } #undef VEC_PROXY_KIND_IMPL r_obj* vec_proxy_method(r_obj* x) { return s3_find_method("vec_proxy", x, vctrs_method_table); } // This should be faster than normal dispatch but also means that // proxy methods can't call `NextMethod()`. This could be changed if // it turns out a problem. r_obj* vec_proxy_invoke(r_obj* x, r_obj* method) { if (method == r_null) { return x; } else { return vctrs_dispatch1(syms_vec_proxy, method, syms_x, x); } } static inline r_obj* vec_proxy_method_impl(r_obj* x, const char* generic, r_obj* fn_proxy_array) { r_obj* cls = KEEP(s3_get_class(x)); r_obj* method = s3_class_find_method(generic, cls, vctrs_method_table); if (method != r_null) { FREE(1); return method; } /* FIXME: Stopgap check for bare arrays */ /* which equality functions don't handle well */ if (vec_dim_n(x) > 1) { FREE(1); return fn_proxy_array; } FREE(1); return r_null; } static inline r_obj* vec_proxy_equal_method(r_obj* x) { return vec_proxy_method_impl(x, "vec_proxy_equal", fns_vec_proxy_equal_array); } static inline r_obj* vec_proxy_compare_method(r_obj* x) { return vec_proxy_method_impl(x, "vec_proxy_compare", fns_vec_proxy_compare_array); } static inline r_obj* vec_proxy_order_method(r_obj* x) { return vec_proxy_method_impl(x, "vec_proxy_order", fns_vec_proxy_order_array); } static inline r_obj* vec_proxy_invoke_impl(r_obj* x, r_obj* method, r_obj* vec_proxy_sym, r_obj* (*vec_proxy_impl_fn)(r_obj*)) { if (method != r_null) { return vctrs_dispatch1(vec_proxy_sym, method, syms_x, x); } /* Fallback on S3 objects with no proxy */ if (vec_typeof(x) == VCTRS_TYPE_s3) { return vec_proxy_impl_fn(x); } else { return x; } } static inline r_obj* vec_proxy_equal_invoke(r_obj* x, r_obj* method) { return vec_proxy_invoke_impl(x, method, syms_vec_proxy_equal, vec_proxy); } static inline r_obj* vec_proxy_compare_invoke(r_obj* x, r_obj* method) { return vec_proxy_invoke_impl(x, method, syms_vec_proxy_compare, &vec_proxy_equal_impl); } static inline r_obj* vec_proxy_order_invoke(r_obj* x, r_obj* method) { return vec_proxy_invoke_impl(x, method, syms_vec_proxy_order, &vec_proxy_compare_impl); } #define DF_PROXY(PROXY) do { \ const r_ssize n_cols = r_length(x); \ r_obj* const* v_x = r_list_cbegin(x); \ \ for (r_ssize i = 0; i < n_cols; ++i) { \ r_obj* col = v_x[i]; \ r_list_poke(x, i, PROXY(col)); \ } \ } while (0) static inline r_obj* df_proxy(r_obj* x, enum vctrs_proxy_kind kind) { // Always clone to avoid modifying the original object, even if it is one // we freshly created in C, because we often work with both the proxy and the // original object within the same function (#1837) x = KEEP(r_clone(x)); switch (kind) { case VCTRS_PROXY_KIND_equal: DF_PROXY(vec_proxy_equal); break; case VCTRS_PROXY_KIND_compare: DF_PROXY(vec_proxy_compare); break; case VCTRS_PROXY_KIND_order: DF_PROXY(vec_proxy_order); break; } x = KEEP(df_flatten(x)); x = vec_proxy_unwrap(x); FREE(2); return x; } r_obj* ffi_df_proxy(r_obj* x, r_obj* kind) { if (!r_is_number(kind)) { r_stop_internal("`kind` must be a single integer."); } enum vctrs_proxy_kind c_kind = r_int_get(kind, 0); return df_proxy(x, c_kind); } r_obj* vec_proxy_unwrap(r_obj* x) { if (r_typeof(x) == R_TYPE_list && r_length(x) == 1 && is_data_frame(x)) { x = vec_proxy_unwrap(r_list_get(x, 0)); } return x; } r_obj* ffi_as_not_s4(r_obj* x) { return r_as_not_s4(x); } void vctrs_init_data(r_obj* ns) { syms_vec_proxy = r_sym("vec_proxy"); syms_vec_proxy_equal = r_sym("vec_proxy_equal"); syms_vec_proxy_equal_array = r_sym("vec_proxy_equal.array"); syms_vec_proxy_compare = r_sym("vec_proxy_compare"); syms_vec_proxy_compare_array = r_sym("vec_proxy_compare.array"); syms_vec_proxy_order = r_sym("vec_proxy_order"); syms_vec_proxy_order_array = r_sym("vec_proxy_order.array"); fns_vec_proxy_equal_array = r_env_get(ns, syms_vec_proxy_equal_array); fns_vec_proxy_compare_array = r_env_get(ns, syms_vec_proxy_compare_array); fns_vec_proxy_order_array = r_env_get(ns, syms_vec_proxy_order_array); } r_obj* syms_vec_proxy = NULL; r_obj* syms_vec_proxy_equal = NULL; r_obj* syms_vec_proxy_equal_array = NULL; r_obj* syms_vec_proxy_compare = NULL; r_obj* syms_vec_proxy_compare_array = NULL; r_obj* syms_vec_proxy_order = NULL; r_obj* syms_vec_proxy_order_array = NULL; r_obj* fns_vec_proxy_equal_array = NULL; r_obj* fns_vec_proxy_compare_array = NULL; r_obj* fns_vec_proxy_order_array = NULL; vctrs/src/compare.c0000644000176200001440000001676515156537555014034 0ustar liggesusers#include "vctrs.h" #include #include "decl/compare-decl.h" static void stop_not_comparable(r_obj* x, r_obj* y, const char* message) { r_abort("`x` and `y` are not comparable: %s", message); } // ----------------------------------------------------------------------------- #define COMPARE(CTYPE, CBEGIN, SCALAR_COMPARE) \ do { \ r_obj* out = KEEP(r_alloc_integer(size)); \ int* v_out = r_int_begin(out); \ \ CTYPE const* v_x = CBEGIN(x); \ CTYPE const* v_y = CBEGIN(y); \ \ for (r_ssize i = 0; i < size; ++i) { \ v_out[i] = SCALAR_COMPARE(v_x[i], v_y[i]); \ } \ \ FREE(3); \ return out; \ } \ while (0) r_obj* vec_compare(r_obj* x, r_obj* y, bool na_equal) { r_ssize size = vec_size(x); enum vctrs_type type = vec_proxy_typeof(x); if (type != vec_proxy_typeof(y) || size != vec_size(y)) { stop_not_comparable(x, y, "must have the same types and lengths"); } x = KEEP(obj_encode_utf8(x)); y = KEEP(obj_encode_utf8(y)); if (type == VCTRS_TYPE_dataframe) { r_obj* out = df_compare(x, y, na_equal, size); FREE(2); return out; } if (na_equal) { switch (type) { case VCTRS_TYPE_logical: COMPARE(int, r_lgl_cbegin, lgl_compare_na_equal); case VCTRS_TYPE_integer: COMPARE(int, r_int_cbegin, int_compare_na_equal); case VCTRS_TYPE_double: COMPARE(double, r_dbl_cbegin, dbl_compare_na_equal); case VCTRS_TYPE_character: COMPARE(r_obj*, r_chr_cbegin, chr_compare_na_equal); case VCTRS_TYPE_complex: r_abort("Can't compare complexes."); case VCTRS_TYPE_scalar: r_abort("Can't compare scalars."); case VCTRS_TYPE_list: r_abort("Can't compare lists."); default: stop_unimplemented_vctrs_type("vec_compare", type); } } else { switch (type) { case VCTRS_TYPE_logical: COMPARE(int, r_lgl_cbegin, lgl_compare_na_propagate); case VCTRS_TYPE_integer: COMPARE(int, r_int_cbegin, int_compare_na_propagate); case VCTRS_TYPE_double: COMPARE(double, r_dbl_cbegin, dbl_compare_na_propagate); case VCTRS_TYPE_character: COMPARE(r_obj*, r_chr_cbegin, chr_compare_na_propagate); case VCTRS_TYPE_complex: r_abort("Can't compare complexes."); case VCTRS_TYPE_scalar: r_abort("Can't compare scalars."); case VCTRS_TYPE_list: r_abort("Can't compare lists."); default: stop_unimplemented_vctrs_type("vec_compare", type); } } } #undef COMPARE r_obj* ffi_vec_compare(r_obj* x, r_obj* y, r_obj* ffi_na_equal) { const bool na_equal = r_bool_as_int(ffi_na_equal); return vec_compare(x, y, na_equal); } // ----------------------------------------------------------------------------- static r_obj* df_compare(r_obj* x, r_obj* y, bool na_equal, r_ssize size) { int nprot = 0; r_obj* out = KEEP_N(r_alloc_integer(size), &nprot); int* v_out = r_int_begin(out); // Initialize to "equality" value and only change if we learn that it differs. // This also determines the zero column result. r_memset(v_out, 0, size * sizeof(int)); struct df_short_circuit_info info = new_df_short_circuit_info(size, false); struct df_short_circuit_info* p_info = &info; PROTECT_DF_SHORT_CIRCUIT_INFO(p_info, &nprot); df_compare_impl(v_out, p_info, x, y, na_equal); FREE(nprot); return out; } static void df_compare_impl(int* v_out, struct df_short_circuit_info* p_info, r_obj* x, r_obj* y, bool na_equal) { r_ssize n_col = r_length(x); if (n_col != r_length(y)) { stop_not_comparable(x, y, "must have the same number of columns"); } for (r_ssize i = 0; i < n_col; ++i) { r_obj* x_col = r_list_get(x, i); r_obj* y_col = r_list_get(y, i); vec_compare_col(v_out, p_info, x_col, y_col, na_equal); // If we know all comparison values, break if (p_info->remaining == 0) { break; } } } // ----------------------------------------------------------------------------- #define COMPARE_COL(CTYPE, CBEGIN, SCALAR_COMPARE) \ do { \ CTYPE const* v_x = CBEGIN(x); \ CTYPE const* v_y = CBEGIN(y); \ \ for (r_ssize i = 0; i < p_info->size; ++i) { \ if (p_info->p_row_known[i]) { \ continue; \ } \ \ int cmp = SCALAR_COMPARE(v_x[i], v_y[i]); \ \ if (cmp != 0) { \ v_out[i] = cmp; \ p_info->p_row_known[i] = true; \ --p_info->remaining; \ \ if (p_info->remaining == 0) { \ break; \ } \ } \ } \ } \ while (0) static void vec_compare_col(int* v_out, struct df_short_circuit_info* p_info, r_obj* x, r_obj* y, bool na_equal) { enum vctrs_type type = vec_proxy_typeof(x); if (type == VCTRS_TYPE_dataframe) { df_compare_impl(v_out, p_info, x, y, na_equal); return; } if (na_equal) { switch (type) { case VCTRS_TYPE_logical: COMPARE_COL(int, r_lgl_cbegin, lgl_compare_na_equal); break; case VCTRS_TYPE_integer: COMPARE_COL(int, r_int_cbegin, int_compare_na_equal); break; case VCTRS_TYPE_double: COMPARE_COL(double, r_dbl_cbegin, dbl_compare_na_equal); break; case VCTRS_TYPE_character: COMPARE_COL(r_obj*, r_chr_cbegin, chr_compare_na_equal); break; case VCTRS_TYPE_complex: r_abort("Can't compare complexes."); case VCTRS_TYPE_scalar: r_abort("Can't compare scalars."); case VCTRS_TYPE_list: r_abort("Can't compare lists."); default: stop_unimplemented_vctrs_type("vec_compare_col", type); } } else { switch (type) { case VCTRS_TYPE_logical: COMPARE_COL(int, r_lgl_cbegin, lgl_compare_na_propagate); break; case VCTRS_TYPE_integer: COMPARE_COL(int, r_int_cbegin, int_compare_na_propagate); break; case VCTRS_TYPE_double: COMPARE_COL(double, r_dbl_cbegin, dbl_compare_na_propagate); break; case VCTRS_TYPE_character: COMPARE_COL(r_obj*, r_chr_cbegin, chr_compare_na_propagate); break; case VCTRS_TYPE_complex: r_abort("Can't compare complexes."); case VCTRS_TYPE_scalar: r_abort("Can't compare scalars."); case VCTRS_TYPE_list: r_abort("Can't compare lists."); default: stop_unimplemented_vctrs_type("vec_compare_col", type); } } } #undef COMPARE_COL vctrs/src/dim.h0000644000176200001440000000134715156010305013125 0ustar liggesusers#ifndef VCTRS_DIM_H #define VCTRS_DIM_H #include "vctrs-core.h" #include "utils.h" // These versions return NULL and 0 for bare vectors. // This is useful to distinguish them from 1D arrays. static inline SEXP vec_bare_dim(SEXP x) { return r_dim(x); } static inline R_len_t vec_bare_dim_n(SEXP x) { return Rf_length(vec_bare_dim(x)); } static inline SEXP vec_dim(SEXP x) { SEXP dim = vec_bare_dim(x); if (dim == R_NilValue) { dim = r_int(Rf_length(x)); } return dim; } static inline R_len_t vec_dim_n(SEXP x) { SEXP dim = vec_bare_dim(x); if (dim == R_NilValue) { return 1; } return Rf_length(dim); } static inline bool has_dim(SEXP x) { return r_attrib_has_any(x) && r_dim(x) != R_NilValue; } #endif vctrs/src/shape.h0000644000176200001440000000105715120272011013445 0ustar liggesusers#ifndef VCTRS_SHAPE_H #define VCTRS_SHAPE_H #include "vctrs-core.h" // Computes the common shape of `x` and `y` and attaches it as the // dimensions of `ptype`. If `x` and `y` are both atomic with `NULL` dimensions, // then no dimensions are attached and `ptype` is returned unmodified. r_obj* vec_shaped_ptype( r_obj* ptype, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg ); r_obj* vec_shape_broadcast( r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call ); #endif vctrs/src/rlang.c0000644000176200001440000000025315157273666013473 0ustar liggesusers// This is an include point for the implementations of the rlang // library. It should be included in a single and separate compilation // unit. #include "rlang/rlang.c" vctrs/src/recode.h0000644000176200001440000000131515065005761013621 0ustar liggesusers#ifndef VCTRS_RECODE_H #define VCTRS_RECODE_H #include "vctrs-core.h" #include "list-combine.h" r_obj* vec_recode_values( r_obj* x, r_obj* from, r_obj* to, r_obj* default_, enum list_combine_unmatched unmatched, bool from_as_list_of_vectors, bool to_as_list_of_vectors, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_from_arg, struct vctrs_arg* p_to_arg, struct vctrs_arg* p_default_arg, r_obj* ptype, struct r_lazy error_call ); r_obj* vec_replace_values( r_obj* x, r_obj* from, r_obj* to, bool from_as_list_of_vectors, bool to_as_list_of_vectors, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_from_arg, struct vctrs_arg* p_to_arg, struct r_lazy error_call ); #endif vctrs/src/type-factor.h0000644000176200001440000000066315156001116014611 0ustar liggesusers#ifndef VCTRS_TYPE_FACTOR_H #define VCTRS_TYPE_FACTOR_H #include "vctrs-core.h" #include "cast.h" #include "ptype2.h" SEXP fct_ptype2( SEXP x, SEXP y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg ); r_obj* ord_ptype2( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ); SEXP ord_as_ordered(const struct cast_opts* opts); #endif vctrs/src/slice-assign.h0000644000176200001440000000631315156001116014733 0ustar liggesusers#ifndef VCTRS_SLICE_ASSIGN_H #define VCTRS_SLICE_ASSIGN_H #include "vctrs-core.h" /** * Optional `value` slicing * * - For `ASSIGNMENT_SLICE_VALUE_no`, assignment is treated the standard `[<-` * way, i.e. `x[i] <- value`. * * `value` must be size 1 or the same size as `i` after `i` has been converted * to a positive integer location vector with `vec_as_location()` (but note * that we don't actually make that conversion in the * `VCTRS_INDEX_STYLE_condition` case, we just count the number of `TRUE` and * `NA` values to perform the size check between `i` and `value`). * * - For `ASSIGNMENT_SLICE_VALUE_yes`, assignment proceeds as an optimized form * of: `x[i] <- value[i]`. Internally, we avoid actually materializing the * slice of `value`. * * `value` must be size 1 or the same size as `x`. */ enum assignment_slice_value { ASSIGNMENT_SLICE_VALUE_no, ASSIGNMENT_SLICE_VALUE_yes }; struct vec_assign_opts { bool assign_names; bool ignore_outer_names; enum vctrs_ownership ownership; enum assignment_slice_value slice_value; struct vctrs_arg* x_arg; struct vctrs_arg* value_arg; struct r_lazy call; }; struct vec_proxy_assign_opts { bool assign_names; bool ignore_outer_names; enum vctrs_ownership ownership; enum assignment_slice_value slice_value; enum vctrs_index_style index_style; struct vctrs_arg* x_arg; struct vctrs_arg* value_arg; struct r_lazy call; // Whether the `proxy` was proxied recursively or not bool recursively_proxied; }; r_obj* vec_assign_opts( r_obj* x, r_obj* index, r_obj* value, const struct vec_assign_opts* p_opts ); r_obj* vec_proxy_assign_opts( r_obj* proxy, r_obj* index, r_obj* value, const struct vec_proxy_assign_opts* p_opts ); r_obj* chr_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ); r_obj* list_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ); r_obj* df_assign( r_obj* x, r_obj* index, r_obj* value, const struct vec_proxy_assign_opts* p_opts ); r_obj* vec_assign_shaped( r_obj* proxy, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ); bool is_condition_index(r_obj* index); void check_condition_index( r_obj* x, struct vctrs_arg* p_x_arg, struct r_lazy call ); void list_check_all_condition_indices( r_obj* xs, struct vctrs_arg* p_xs_arg, struct r_lazy call ); void check_recyclable_against_index( r_obj* value, r_obj* index, r_ssize size, enum assignment_slice_value slice_value, enum vctrs_index_style index_style, struct vctrs_arg* p_value_arg, struct r_lazy call ); // Exposed for `slice-assign-array.c` void check_assign_sizes( r_obj* x, r_obj* index, r_ssize value_size, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ); // Exposed for `slice-assign-array.c` static inline bool should_slice_value(enum assignment_slice_value slice_value) { return slice_value == ASSIGNMENT_SLICE_VALUE_yes; } #endif vctrs/src/list-combine.c0000644000176200001440000014400715157322033014742 0ustar liggesusers#include "list-combine.h" #include "vctrs.h" #include "vec-bool.h" #include "decl/list-combine-decl.h" r_obj* ffi_list_combine( r_obj* ffi_xs, r_obj* ffi_indices, r_obj* ffi_size, r_obj* ffi_default, r_obj* ffi_unmatched, r_obj* ffi_multiple, r_obj* ffi_slice_xs, r_obj* ffi_ptype, r_obj* ffi_name_spec, r_obj* ffi_name_repair, r_obj* ffi_frame ) { // On the R side it's `x_arg` to go with `x`, but on the C side we use `xs` struct r_lazy xs_arg_lazy = { .x = syms.x_arg, .env = ffi_frame }; struct vctrs_arg xs_arg = new_lazy_arg(&xs_arg_lazy); struct r_lazy indices_arg_lazy = { .x = syms.indices_arg, .env = ffi_frame }; struct vctrs_arg indices_arg = new_lazy_arg(&indices_arg_lazy); struct r_lazy default_arg_lazy = { .x = syms.default_arg, .env = ffi_frame }; struct vctrs_arg default_arg = new_lazy_arg(&default_arg_lazy); struct r_lazy error_call = { .x = r_syms.error_call, .env = ffi_frame }; const r_ssize size = r_arg_as_ssize(ffi_size, "size"); const enum list_combine_unmatched unmatched = parse_list_combine_unmatched(ffi_unmatched, error_call); const enum list_combine_multiple multiple = parse_list_combine_multiple(ffi_multiple, error_call); // On the R side it's `slice_x` to go with `x`, but on the C side we use `xs` const enum assignment_slice_value slice_xs = r_arg_as_bool(ffi_slice_xs, "slice_x") ? ASSIGNMENT_SLICE_VALUE_yes : ASSIGNMENT_SLICE_VALUE_no; struct name_repair_opts name_repair_opts = new_name_repair_opts( ffi_name_repair, r_lazy_null, false, error_call ); KEEP(name_repair_opts.shelter); r_obj* out = list_combine( ffi_xs, ffi_indices, size, ffi_default, unmatched, multiple, slice_xs, ffi_ptype, ffi_name_spec, &name_repair_opts, &xs_arg, &indices_arg, &default_arg, error_call ); FREE(1); return out; } r_obj* list_combine( r_obj* xs, r_obj* indices, r_ssize size, r_obj* default_, enum list_combine_unmatched unmatched, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_indices_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call ) { const enum s3_fallback s3_fallback = r_is_true(r_peek_option("vctrs:::base_c_in_progress")) ? S3_FALLBACK_false : S3_FALLBACK_true; // `list_combine_impl()` supports `NULL` `indices` for `vec_c()` and // `list_unchop()`, which `list_combine()` does not, so we early check here // for that. This can technically be hit by users so we want a good error // message. `vec_c()` and `list_unchop()` use this to sequentially combine // `xs`, but `list_combine()` requires `indices` to be a list. obj_check_list(indices, p_indices_arg, error_call); const enum vctrs_index_style indices_style = compute_indices_style(indices, size); const bool has_indices = true; const bool has_default = default_ != r_null; return list_combine_impl( xs, has_indices, indices, indices_style, size, has_default, default_, unmatched, multiple, slice_xs, ptype, name_spec, p_name_repair_opts, p_xs_arg, p_indices_arg, p_default_arg, error_call, s3_fallback ); } /** * `vec_c()` backport */ r_obj* list_combine_for_vec_c( r_obj* xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ) { r_obj* indices = r_null; return list_combine_for_list_unchop( xs, indices, ptype, name_spec, p_name_repair_opts, p_xs_arg, error_call ); } /** * `list_unchop()` backport */ r_obj* list_combine_for_list_unchop( r_obj* xs, r_obj* indices, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ) { const enum s3_fallback s3_fallback = r_is_true(r_peek_option("vctrs:::base_c_in_progress")) ? S3_FALLBACK_false : S3_FALLBACK_true; bool has_indices = indices != r_null; struct vctrs_arg* p_indices_arg = vec_args.indices; // If `!has_indices`, `list_combine()` will compute the size from the sizes of // `xs` and will ignore whatever we put here. r_ssize size = 0; if (has_indices) { // Sums the length of each `index` to compute the `size` // // This was the way that `list_unchop()` would compute the output size when // `indices` were provided. In `list_combine()`, the `size` is explicitly // required to account for a few edge cases and to work well with `default`. // // Note that `list_as_locations()` in `list_combine()` isn't allowed to // change the `index` size, which is the only reason this works from a // theoretical point of view. obj_check_list(indices, p_indices_arg, error_call); r_obj* const* v_indices = r_list_cbegin(indices); r_ssize indices_size = vec_size(indices); for (r_ssize i = 0; i < indices_size; ++i) { size += r_length(v_indices[i]); } } bool has_default = false; r_obj* default_ = r_null; struct vctrs_arg* p_default_arg = vec_args.empty; enum list_combine_unmatched unmatched = LIST_COMBINE_UNMATCHED_default; enum list_combine_multiple multiple = LIST_COMBINE_MULTIPLE_last; const enum assignment_slice_value slice_xs = ASSIGNMENT_SLICE_VALUE_no; const enum vctrs_index_style indices_style = VCTRS_INDEX_STYLE_location; r_obj* out = KEEP(list_combine_impl( xs, has_indices, indices, indices_style, size, has_default, default_, unmatched, multiple, slice_xs, ptype, name_spec, p_name_repair_opts, p_xs_arg, p_indices_arg, p_default_arg, error_call, s3_fallback )); if (vec_is_unspecified(out) && r_is_object(out)) { // The following `list_c()` and `list_unchop()` cases historically return // `NULL` because they don't have a `size` argument to maintain an invariant // for. `list_combine()` returns `unspecified` for these, because there // could be a `size > 0` argument supplied and we need to retain the size // invariant. We rectify the difference here. // // ``` // vec_c() // vec_c(NULL) // list_unchop(list()) // list_unchop(list(), indices = list()) // list_unchop(list(NULL)) // list_unchop(list(NULL), indices = list(integer())) // ``` // // This is an ambiguous edge case that we've currently defined as also // returning `NULL`. `list_combine()` returns `unspecified[2]` here, but // that's clearer because `size` has to be explicitly provided. // // ``` // list_unchop(list(NULL), indices = list(1:2)) // ``` // // We still want these cases to return `NA` even though they are technically // "unspecified" outputs, so we explicitly check if the output is an S3 object // as well, i.e. its an explicit `"vctrs_unspecified"` and not just a logical // vector of `NA`s. // // ``` // vec_c(NA) // list_unchop(list(NA), indices = list(1)) // ``` out = r_null; } FREE(1); return out; } /** * Actual implementation for `list_combine()` * * Exposes `s3_fallback` here for use in the fallback */ static r_obj* list_combine_impl( r_obj* xs, bool has_indices, r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, bool has_default, r_obj* default_, enum list_combine_unmatched unmatched, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_indices_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call, enum s3_fallback s3_fallback ) { int n_protect = 0; obj_check_list(xs, p_xs_arg, error_call); r_obj* const* v_xs = r_list_cbegin(xs); r_ssize xs_size = vec_size(xs); // This is impossible with the exposed API, but let's sanity check if (!has_indices) { switch (multiple) { case LIST_COMBINE_MULTIPLE_last: break; case LIST_COMBINE_MULTIPLE_first: r_stop_internal("`multiple = 'first'` can't be set with sequential combination."); break; default: r_stop_unreachable(); } } if (has_indices) { // Apply size/type checking to `indices` before possibly early exiting from // needing to apply a fallback obj_check_list(indices, p_indices_arg, error_call); vec_check_size(indices, xs_size, VCTRS_ALLOW_NULL_no, p_indices_arg, error_call); } // Sizes are reused by the sequential path when advancing the compact-seq index. // It's more efficient to build them once even though it requires an allocation. r_obj* xs_sizes = NULL; r_ssize* v_xs_sizes = NULL; if (!has_indices) { // Infer `size` from `xs` for `vec_c()` and `list_unchop(indices = NULL)` // sequential approach size = 0; xs_sizes = KEEP_N(r_alloc_raw(xs_size * sizeof(r_ssize)), &n_protect); v_xs_sizes = r_raw_begin(xs_sizes); for (r_ssize i = 0; i < xs_size; ++i) { r_obj* x = v_xs[i]; r_ssize x_size = vec_size(x); size += x_size; v_xs_sizes[i] = x_size; } } if (has_indices && indices_style == VCTRS_INDEX_STYLE_location) { // Validate and convert `indices` if they exist. // // Note that we don't allow an individual `index` vector to change size // during validation. This is the only reason we can "infer" the output // size from the sum of the lengths of the `indices` before doing validation // and conversion. In particular: // - `NULL` indices become `integer()` (both size `0`). // - We don't allow character or logical indices. // - We don't allow negative or zero indices (these change the size). // - We don't allow oob indices (makes no sense since we inferred the size from lengths). // - Numeric `NA` propagates. // // Note that `compact_seq()` objects are valid `index` values for // `list_combine()`. We have tests to ensure it can handle them! // // There is nothing to validate for condition indices, they are logical vectors // where all 3 possible values are handled and we've already checked their sizes // match `size` in `compute_indices_style()`. const bool allow_compact = true; indices = KEEP_N(list_as_locations(indices, size, r_null, allow_compact), &n_protect); } // Perform `unmatched` check // (before fallback cases!) switch (unmatched) { case LIST_COMBINE_UNMATCHED_default: { // Will use `default` in unmatched locations break; } case LIST_COMBINE_UNMATCHED_error: { if (default_ != r_null) { r_abort_lazy_call( error_call, "Can't set %s when `unmatched = \"error\"`.", vec_arg_format(p_default_arg) ); } if (!has_indices) { r_stop_internal("`indices` should have been required if `unmatched` was set."); } check_any_unmatched(indices, indices_style, size, error_call); break; } default: { r_stop_unreachable(); } } ptype = KEEP_N( ptype_common_with_default( ptype, xs, has_default, default_, p_xs_arg, p_default_arg, error_call, s3_fallback ), &n_protect ); if (needs_list_combine_common_class_fallback(ptype)) { r_obj* out = list_combine_common_class_fallback( xs, has_indices, indices, indices_style, size, has_default, default_, multiple, slice_xs, ptype, name_spec, p_name_repair_opts, p_xs_arg, p_indices_arg, p_default_arg, error_call ); FREE(n_protect); return out; } if (needs_list_combine_homogeneous_fallback(xs, has_default, default_, ptype)) { r_obj* out = list_combine_homogeneous_fallback( xs, has_indices, indices, indices_style, size, has_default, default_, multiple, slice_xs, name_spec, p_xs_arg, p_default_arg, error_call ); FREE(n_protect); return out; } if (ptype == r_null) { // Even when there are no inputs and we can't determine a `ptype`, the user // will have supplied a `size`, so as an invariant we should return // something with this `size`. Our size preserving identity type is // ``, so we use that. // // We catch `` in `vec_c()` and `list_unchop()` and return // `NULL` in those cases instead, because that is what they have historically // returned and you can't supply `size` there, so this only happens in the // empty input cases of those functions. // // Assuming `NULL` is roughly equivalent to `unspecified(0)`, this gives us: // // ``` // vec_c() // #> NULL // list_unchop(list(), indices = list()) // #> NULL // list_combine(list(), indices = list(), size = 0) // #> unspecified[0] # Consistent with size != 0 case. // list_combine(list(), indices = list(), size = 5) // #> unspecified[5] # Preserves size, good. // ``` // // The most theoretically correct thing may be to return `unspecified(0)` // from `vec_c()` and `list_unchop()` as well, but we make a concious effort // to avoid exposing this type to users when we can. Since those functions // don't have the `size` invariant issue, `NULL` seems to be a good // alternative. // https://github.com/r-lib/vctrs/issues/1980 ptype = vctrs_shared_empty_uns; } const bool assign_names = !r_inherits(name_spec, "rlang_zap"); r_obj* xs_names = KEEP_N(r_names(xs), &n_protect); const bool xs_is_named = xs_names != r_null && !is_data_frame(ptype); r_keep_loc out_pi; r_obj* out = vec_init(ptype, size); KEEP_HERE(out, &out_pi); ++n_protect; out = vec_proxy_recurse(out); KEEP_AT(out, out_pi); // - We own the `proxy` container // - We own `proxy` recursively // - We call `vec_proxy_recurse()` so must restore recursively const struct vec_restore_opts restore_opts = { .ownership = VCTRS_OWNERSHIP_deep, .recursively_proxied = true }; const struct vec_proxy_assign_opts proxy_assign_opts = { .ownership = VCTRS_OWNERSHIP_deep, .recursively_proxied = true, .slice_value = slice_xs, .index_style = indices_style, .assign_names = assign_names, .ignore_outer_names = true, .call = error_call }; r_keep_loc out_names_pi; r_obj* out_names = r_null; KEEP_HERE(out_names, &out_names_pi); ++n_protect; r_ssize xs_i = 0; struct vctrs_arg* p_x_arg = new_subscript_arg( p_xs_arg, xs_names, xs_size, &xs_i ); KEEP_N(p_x_arg->shelter, &n_protect); struct cast_opts cast_opts = { .to = ptype, .p_x_arg = p_x_arg, .call = error_call, .s3_fallback = s3_fallback }; r_keep_loc x_pi; r_obj* x = r_null; KEEP_HERE(x, &x_pi); ++n_protect; r_keep_loc index_pi; r_obj* index = r_null; KEEP_HERE(index, &index_pi); ++n_protect; // For the sequential path r_ssize start = 0; int* v_index = NULL; if (!has_indices) { // Sequential path reuses the same compact sequence `index` index = compact_seq(0, 0, true); KEEP_AT(index, index_pi); v_index = r_int_begin(index); } for (r_ssize i = 0; i < xs_size; ++i) { switch (multiple) { case LIST_COMBINE_MULTIPLE_last: xs_i = i; break; case LIST_COMBINE_MULTIPLE_first: xs_i = xs_size - 1 - i; break; default: r_stop_unreachable(); } x = v_xs[xs_i]; if (x == r_null) { continue; } r_ssize index_size; // Advance `index` if (has_indices) { index = r_list_get(indices, xs_i); index_size = vec_subscript_size(index); } else { index_size = v_xs_sizes[xs_i]; init_compact_seq(v_index, start, index_size, true); } // When we have `indices`, `x`'s size must be compatible with the `index`'s // size. This is dependent on `slice_xs` and `indices_style`. // // When we don't have `indices`, we derive the index sizes from // `x` itself so there is no reason to recheck the size. // // We don't actually recycle `x` because both `vec_proxy_assign_opts()` and // `chr_assign()` efficiently recycle size 1 inputs. if (has_indices) { check_recyclable_against_index( x, index, size, slice_xs, indices_style, p_x_arg, error_call ); } // Handle optional names assignment if (assign_names) { r_obj* outer = xs_is_named ? r_chr_get(xs_names, xs_i) : r_null; r_obj* inner = KEEP(vec_names(x)); r_obj* x_names = KEEP(apply_name_spec(name_spec, outer, inner, index_size)); if (has_indices && x_names == r_null && out_names != r_null) { // We don't have names on this element, but `out_names` will exist. // Someone before us may have written to `out_names` at this `index` by // providing an overlapping `index`, so we must clear that. x_names = r_chrs.empty_string; } if (x_names != r_null) { R_LAZY_ALLOC(out_names, out_names_pi, R_TYPE_character, size); out_names = chr_assign( out_names, index, x_names, VCTRS_OWNERSHIP_deep, slice_xs, indices_style ); KEEP_AT(out_names, out_names_pi); } FREE(2); } cast_opts.x = x; x = vec_cast_opts(&cast_opts); KEEP_AT(x, x_pi); out = vec_proxy_assign_opts(out, index, x, &proxy_assign_opts); KEEP_AT(out, out_pi); if (!has_indices) { start += index_size; } } if (has_default) { // `default` uses a slightly modified form of `proxy_assign_opts` and // `cast_opts` // - `default` is size 1 or size of the output, so uses // `ASSIGNMENT_SLICE_VALUE_yes`. // - `default`'s index is always built using a special compact condition // vector, so uses `VCTRS_INDEX_STYLE_condition`. // - `default` has its own special `p_default_arg`. struct vec_proxy_assign_opts default_proxy_assign_opts = proxy_assign_opts; default_proxy_assign_opts.index_style = VCTRS_INDEX_STYLE_condition; default_proxy_assign_opts.slice_value = ASSIGNMENT_SLICE_VALUE_yes; struct cast_opts default_cast_opts = cast_opts; default_cast_opts.p_x_arg = p_default_arg; // Compute `default` compact condition index index = compute_default_index(indices, indices_style, size); KEEP_AT(index, index_pi); // `default` recycles against the output size, not the `index` vec_check_recyclable(default_, size, VCTRS_ALLOW_NULL_no, p_default_arg, error_call); // Handle optional names assignment if (assign_names) { // `outer` names don't exist, but `name_spec` could still `zap()` any `inner` names r_obj* outer = r_null; r_obj* inner = KEEP(vec_names(default_)); r_obj* x_names = KEEP(apply_name_spec(name_spec, outer, inner, size)); if (x_names != r_null) { R_LAZY_ALLOC(out_names, out_names_pi, R_TYPE_character, size); out_names = chr_assign( out_names, index, x_names, VCTRS_OWNERSHIP_deep, default_proxy_assign_opts.slice_value, default_proxy_assign_opts.index_style ); KEEP_AT(out_names, out_names_pi); } FREE(2); } default_cast_opts.x = default_; x = vec_cast_opts(&default_cast_opts); KEEP_AT(x, x_pi); out = vec_proxy_assign_opts(out, index, x, &default_proxy_assign_opts); KEEP_AT(out, out_pi); } if ( s3_fallback == S3_FALLBACK_true && is_data_frame(out) && needs_df_list_combine_common_class_fallback(out) ) { // Perform the common class fallback on any columns of the // data frame that require it df_list_combine_common_class_fallback( out, xs, has_indices, indices, indices_style, size, has_default, default_, multiple, slice_xs, ptype, name_spec, p_name_repair_opts, p_indices_arg, error_call ); } out = vec_restore_opts(out, ptype, &restore_opts); KEEP_AT(out, out_pi); if (out_names != r_null) { out_names = KEEP(vec_as_names(out_names, p_name_repair_opts)); out = vec_set_names(out, out_names, restore_opts.ownership); FREE(1); } else if (!assign_names) { // FIXME: `vec_ptype2()` doesn't consistently zaps names, so `out` // might have been initialised with names. This branch can be // removed once #1020 is resolved. out = vec_set_names(out, r_null, restore_opts.ownership); } FREE(n_protect); return out; } // ------------------------------------------------------------------------------------------- static bool needs_list_combine_common_class_fallback(r_obj* ptype) { if (!vec_is_common_class_fallback(ptype)) { return false; } // Suboptimal: Prevent infinite recursion through `vctrs_vctr` method r_obj* cls = r_attrib_get(ptype, syms_fallback_class); cls = r_chr_get(cls, r_length(cls) - 1); return cls != strings_vctrs_vctr; } // Common class fallback combination method // // If it doesn't look like we know how to handle a class, // we still provide a fallback approach in some cases. // // `vec_ptype_common()` knows to return a special "common type" `ptype` object // in these cases, which is then detectable by `vec_is_common_class_fallback()`. // // Attached to that `ptype` is the class name of interest that we are // working with. // // - If that class has a `c()` method, we invoke it. // // - Otherwise, we try `list_combine()` again, this time without the // ability to fallback. This only works if our other fallback case is hit, // which is when every object in the list is of a homogenous type, in which // case we again call `c()` if that homogenous type has a `c()` method, or we // fall through and let `list_combine()` try to run and push the // homogenous attributes onto the final output. static r_obj* list_combine_common_class_fallback( r_obj* xs, bool has_indices, r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, bool has_default, r_obj* default_, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_indices_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call ) { r_obj* cls = KEEP(r_attrib_get(ptype, syms_fallback_class)); bool implements_c = class_implements_base_c(cls); FREE(1); if (implements_c) { return base_list_combine_fallback( xs, has_indices, indices, indices_style, size, has_default, default_, multiple, slice_xs, name_spec, p_xs_arg, p_default_arg, error_call ); } else { // Throw out the `vctrs:::common_class_fallback` ptype, // it's served its purpose by getting us here ptype = r_null; const enum s3_fallback s3_fallback = S3_FALLBACK_false; // Should cause a common type error, unless another fallback // kicks in (for instance, homogeneous class with homogeneous // attributes) vec_ptype_common( xs, ptype, PTYPE_FINALISE_DEFAULT, s3_fallback, p_xs_arg, error_call ); // We will have already checked `unmatched` before the fallback // is invoked, so no need to check it again enum list_combine_unmatched unmatched = LIST_COMBINE_UNMATCHED_default; // Suboptimal: Call `list_combine_impl()` again to // combine vector with homogeneous class fallback return list_combine_impl( xs, has_indices, indices, indices_style, size, has_default, default_, unmatched, multiple, slice_xs, ptype, name_spec, p_name_repair_opts, p_xs_arg, p_indices_arg, p_default_arg, error_call, s3_fallback ); } } // ------------------------------------------------------------------------------------------- // To check if a data frame needs common class fallback treatment, we // recursively look through the columns for `vec_is_common_class_fallback()` to // be `true` on any column. bool needs_df_list_combine_common_class_fallback(r_obj* x) { r_ssize n_cols = r_length(x); r_obj* const * v_x = r_list_cbegin(x); for (r_ssize i = 0; i < n_cols; ++i) { r_obj* col = v_x[i]; if (vec_is_common_class_fallback(col)) { return true; } if (is_data_frame(col) && needs_df_list_combine_common_class_fallback(col)) { return true; } } return false; } // If a column of a data frame requires common class treatment, then it has not // actually been assigned into `out` yet. We pluck out the relevant column from // each element of `xs` and perform a common class fallback combination approach // on that column to form the final column, then push it into place in `out`. void df_list_combine_common_class_fallback( r_obj* out, r_obj* xs, bool has_indices, r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, bool has_default, r_obj* default_, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_indices_arg, struct r_lazy error_call ) { int n_protect = 0; r_ssize n_cols = r_length(out); r_obj* ptype_orig = ptype; if (!is_data_frame(ptype)) { ptype = KEEP_N(vec_proxy(ptype), &n_protect); if (!is_data_frame(ptype)) { r_stop_internal("Expected fallback target to have a df proxy."); } } if (r_length(ptype) != n_cols || r_typeof(out) != R_TYPE_list || r_typeof(ptype) != R_TYPE_list) { r_stop_internal("`ptype` and `out` must be lists of the same length."); } for (r_ssize i = 0; i < n_cols; ++i) { r_obj* col = r_list_get(out, i); r_obj* ptype_col = r_list_get(ptype, i); if (is_data_frame(col) && needs_df_list_combine_common_class_fallback(ptype_col)) { // Recurse into df-cols r_obj* out_col = r_list_get(out, i); r_obj* xs_col = KEEP(list_pluck(xs, i)); r_obj* default_col = has_default ? r_list_get(default_, i) : r_null; df_list_combine_common_class_fallback( out_col, xs_col, has_indices, indices, indices_style, size, has_default, default_col, multiple, slice_xs, ptype_col, name_spec, p_name_repair_opts, p_indices_arg, error_call ); FREE(1); } else if (needs_list_combine_common_class_fallback(ptype_col)) { r_obj* xs_col = KEEP(list_pluck(xs, i)); r_obj* default_col = has_default ? r_list_get(default_, i) : r_null; struct vctrs_arg* p_xs_col_arg = vec_args.empty; struct vctrs_arg* p_default_col_arg = vec_args.empty; r_obj* out_col = list_combine_common_class_fallback( xs_col, has_indices, indices, indices_style, size, has_default, default_col, multiple, slice_xs, ptype_col, name_spec, p_name_repair_opts, p_xs_col_arg, p_indices_arg, p_default_col_arg, error_call ); r_list_poke(out, i, out_col); if (vec_size(out_col) != size) { r_stop_internal( "`c()` method returned a vector of unexpected size %d instead of %d.", vec_size(out_col), size ); } // Remove fallback vector from the ptype so it doesn't get in // the way of restoration later on r_list_poke(ptype_orig, i, vec_ptype_final(out_col, vec_args.empty, error_call)); FREE(1); } } FREE(n_protect); } // ------------------------------------------------------------------------------------------- static bool needs_list_combine_homogeneous_fallback( r_obj* xs, bool has_default, r_obj* default_, r_obj* ptype ) { r_obj* first = list_first_non_null(xs, NULL); if (first == r_null && has_default) { // i.e. `list_combine(x = list(), default = foobar(1))` first = default_; } if (!obj_is_vector(first, VCTRS_ALLOW_NULL_no)) { return false; } // Never fall back for `vctrs_vctr` classes to avoid infinite // recursion through `c.vctrs_vctr()` if (r_inherits(first, "vctrs_vctr")) { return false; } r_obj* ptype_class = KEEP(r_class(ptype)); if (!obj_has_class(first, ptype_class)) { // Cheap test before consulting `vec_ptype2()` and `c()` methods FREE(1); return false; } bool out = !vec_implements_ptype2(first) && list_all_have_class(xs, ptype_class) && (has_default ? obj_has_class(default_, ptype_class) : true) && vec_implements_base_c(first); FREE(1); return out; } // To perform homogeneous fallback, we invoke `c()` because we've // checked in `needs_list_combine_homogeneous_fallback()` that // this class does implement a `c()` method, so we trust it. static r_obj* list_combine_homogeneous_fallback( r_obj* xs, bool has_indices, r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, bool has_default, r_obj* default_, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* name_spec, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call ) { return base_list_combine_fallback( xs, has_indices, indices, indices_style, size, has_default, default_, multiple, slice_xs, name_spec, p_xs_arg, p_default_arg, error_call ); } static bool list_all_have_class(r_obj* xs, r_obj* class) { r_ssize size = r_length(xs); r_obj* const* v_xs = r_list_cbegin(xs); for (r_ssize i = 0; i < size; ++i) { r_obj* x = v_xs[i]; if (x == r_null) { // Allow `NULL`s continue; } if (!obj_has_class(x, class)) { return false; } } return true; } static bool obj_has_class(r_obj* x, r_obj* class) { r_obj* x_class = KEEP(r_class(x)); bool out = obj_equal(x_class, class); FREE(1); return out; } // ------------------------------------------------------------------------------------------- /** * Core routine for R level list combine fallback * * If `indices` aren't involved, this just calls out R level `c()` to combine * the `xs` in order, with some special handling for `NULL` and `unspecified`. * * If `indices` are involved, we have to recreate some of the behavior we get in * the "main" path, like computing the total output size and recycling `xs` to * each `index` size. We still use `c()` to combine sequentially, but then we * reslice the combined results to put them in the order specified by the * `indices`. */ static r_obj* base_list_combine_fallback( r_obj* xs, bool has_indices, r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, bool has_default, r_obj* default_, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* name_spec, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call ) { if (!has_indices) { // Sequential combination, nothing fancy here return base_c_invoke(xs, name_spec, error_call); } // Otherwise we have `indices`. We need to recreate a bunch of the "main" path // logic, and then combine all `xs` together and reorder using the `indices`. // // We end up doing something like: // // ``` // vec_slice_fallback(base_c(!!!xs), order(vec_c(!!!indices))) // ``` // Normalize `indices` to the location style, because that's what the fallback // is designed to handle. It's also the style we convert `default` to. switch (indices_style) { case VCTRS_INDEX_STYLE_location: { indices = list_location_to_location_indices(indices); break; } case VCTRS_INDEX_STYLE_condition: { indices_style = VCTRS_INDEX_STYLE_location; indices = list_condition_to_location_indices(indices); break; } default: r_stop_unreachable(); } KEEP(indices); // Normalize and check `xs` sizes // // - If `slice_xs = no`, each `x` must be size 1 or the size of the `index` // - Size 1 must be recycled up to the size of the `index` // - If `slice_xs = yes`, each `x` must be size 1 or size `size` // - Size 1 must be recycled up to the size of the `index` // - Size `size` must be sliced down to the size of the `index` switch (slice_xs) { case ASSIGNMENT_SLICE_VALUE_no: { xs = vec_recycle_xs_fallback(xs, indices, p_xs_arg, error_call); break; } case ASSIGNMENT_SLICE_VALUE_yes: { xs = vec_slice_xs_fallback(xs, indices, size, p_xs_arg, error_call); break; } default: r_stop_unreachable(); } KEEP(xs); // Reverse `xs` and `indices` if required for `multiple` // // - Done after recycling/slicing of `xs` because `p_xs_arg` is used there // and we need to generate correct index locations in errors. // - Done before `default` handling because `default` is always pushed // at the end. switch (multiple) { case LIST_COMBINE_MULTIPLE_last: { // Nothing to do, this is the standard behavior break; } case LIST_COMBINE_MULTIPLE_first: { xs = KEEP(vec_reverse(xs)); indices = KEEP(vec_reverse(indices)); FREE(2); break; } } KEEP(xs); KEEP(indices); if (has_default) { // Materialize the `default`'s index in location style, as that is what // we normalized `indices` to. r_obj* default_index = KEEP(compute_default_index(indices, indices_style, size)); default_index = KEEP(compact_condition_materialize_location(default_index)); // `default` recycles against the output size const r_ssize default_size = vec_size(default_); const r_ssize default_index_size = r_length(default_index); // Other `xs` have been sliced already, we now need to sliced `default`, // which is always provided in `slice_xs = yes` style. if (default_size == 1) { // Recycle "up" to the size of the index default_ = vec_recycle_fallback(default_, default_index_size, p_default_arg, error_call); } else if (default_size == size) { // Slice "down" to the size of the index default_ = vec_slice_fallback(default_, default_index); } else { // `default` is the wrong size, error vec_check_recyclable(default_, size, VCTRS_ALLOW_NULL_no, p_default_arg, error_call); } KEEP(default_); // Append the default to `xs` and `indices` before the fallback xs = KEEP(push_default(xs, default_)); indices = KEEP(push_default_index(indices, default_index)); FREE(5); } KEEP(xs); KEEP(indices); // Remove all `NULL`s from `xs` and their corresponding slot in `indices`. // // `base_c_invoke()` does this as well, but we need to remove the `indices` slot // at the same time. // // - Done after `default_index` computation, so `default_index` doesn't capture // dropped indices. // - Done after `vec_recycle_xs_fallback()` so we have correct indices in recycling // error messages. if (vec_any_missing(xs)) { r_obj* complete = KEEP(vec_detect_complete(xs)); complete = KEEP(r_lgl_which(complete, false)); xs = KEEP(vec_slice_unsafe(xs, complete)); indices = KEEP(vec_slice_unsafe(indices, complete)); FREE(4); } KEEP(xs); KEEP(indices); r_obj* out = KEEP(base_c_invoke(xs, name_spec, error_call)); r_obj* index = KEEP(build_fallback_index(indices, size, error_call)); out = vec_slice_fallback(out, index); FREE(10); return out; } static r_obj* base_c_invoke( r_obj* xs, r_obj* name_spec, struct r_lazy error_call ) { if (vctrs_debug_verbose) { r_obj* x = list_first_non_null(xs, NULL); r_printf( "Falling back to `base::c()` for class `%s`.\n", r_chr_get_c_string(r_class(x), 0) ); } if (name_spec_is_inner(name_spec)) { // We don't support most `name_spec` options in the fallback, // but we do allow this one because it is extremely useful // and easy to implement name_spec = r_null; if (r_names(xs) != r_null) { // Remove outer names, but remember we likely don't own `xs`! xs = KEEP(r_clone_referenced(xs)); r_attrib_poke_names(xs, r_null); FREE(1); } } KEEP(xs); if (name_spec != r_null) { stop_name_spec_in_fallback(xs, error_call); } r_obj* ffi_call = KEEP(r_call2(r_sym("base_c_invoke"), xs)); r_obj* out = r_eval(ffi_call, vctrs_ns_env); FREE(2); return out; } static void stop_name_spec_in_fallback(r_obj* xs, struct r_lazy error_call) { r_obj* common_class = KEEP(r_class(list_first_non_null(xs, NULL))); const char* class_str = r_chr_get_c_string(common_class, 0); r_abort_lazy_call( error_call, "Can't use a name specification with non-vctrs types.\n" "vctrs methods must be implemented for class `%s`.\n" "See .", class_str ); } static r_obj* push_default( r_obj* xs, r_obj* default_ ) { const r_ssize xs_size = vec_size(xs); r_obj* xs_names = KEEP(r_names(xs)); xs = KEEP(r_list_resize(xs, xs_size + 1)); r_list_poke(xs, xs_size, default_); if (xs_names != r_null) { xs_names = r_chr_resize(xs_names, xs_size + 1); r_attrib_poke_names(xs, xs_names); r_chr_poke(xs_names, xs_size, r_strs.empty); } FREE(2); return xs; } static r_obj* push_default_index( r_obj* indices, r_obj* default_index ) { const r_ssize indices_size = vec_size(indices); r_obj* indices_names = KEEP(r_names(indices)); indices = KEEP(r_list_resize(indices, indices_size + 1)); r_list_poke(indices, indices_size, default_index); if (indices_names != r_null) { indices_names = r_chr_resize(indices_names, indices_size + 1); r_attrib_poke_names(indices, indices_names); r_chr_poke(indices_names, indices_size, r_strs.empty); } FREE(2); return indices; } static r_obj* build_fallback_index(r_obj* indices, r_ssize size, struct r_lazy error_call) { const struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_none, .fn = r_null, .call = error_call }; r_obj* index = KEEP(vec_c( indices, r_globals.empty_int, name_spec_inner, &name_repair_opts, vec_args.indices, error_call )); const int* v_index = r_int_cbegin(index); // Not necessarily same as `size`! // // ``` // local_c_foobar() // list_combine( // list(foobar("a"), NULL, foobar("b")), // indices = list(2, 3, 1), // size = 3 // ) // ``` // // Implies `size` of 3 but `NULL` causes the `3` // index to get dropped, so `index_size` is `2`. const r_ssize index_size = r_length(index); r_obj* locations = KEEP(r_alloc_integer(size)); int* v_locations = r_int_begin(locations); // Initialize with missing to handle locations that are never selected for (r_ssize i = 0; i < size; ++i) { v_locations[i] = r_globals.na_int; } // At each index location, put the order value, note that results in "last // wins" behavior when multiple indices overwrite the same location for (r_ssize i = 0; i < index_size; ++i) { const int elt = v_index[i]; if (elt != r_globals.na_int) { v_locations[elt - 1] = i + 1; } } FREE(2); return locations; } /** * Recycles each element of `xs` to match the size * of the corresponding `indices` index. * * Used for `slice_xs = no`. */ static r_obj* vec_recycle_xs_fallback( r_obj* xs, r_obj* indices, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ) { r_ssize xs_size = vec_size(xs); r_obj* xs_names = KEEP(r_names(xs)); xs = KEEP(r_clone_referenced(xs)); r_ssize i = 0; struct vctrs_arg* p_x_arg = new_subscript_arg( p_xs_arg, xs_names, xs_size, &i ); KEEP(p_x_arg->shelter); r_obj* const* v_xs = r_list_cbegin(xs); r_obj* const* v_indices = r_list_cbegin(indices); for (; i < xs_size; ++i) { r_obj* x = v_xs[i]; r_ssize index_size = r_length(v_indices[i]); r_list_poke(xs, i, vec_recycle_fallback(x, index_size, p_x_arg, error_call)); } FREE(3); return xs; } /** * Slices each element of `xs` to match the size * of the corresponding `indices` index. * * Used for `slice_xs = yes`. */ static r_obj* vec_slice_xs_fallback( r_obj* xs, r_obj* indices, r_ssize size, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ) { r_ssize xs_size = vec_size(xs); r_obj* xs_names = KEEP(r_names(xs)); xs = KEEP(r_clone_referenced(xs)); r_ssize i = 0; struct vctrs_arg* p_x_arg = new_subscript_arg( p_xs_arg, xs_names, xs_size, &i ); KEEP(p_x_arg->shelter); r_obj* const* v_xs = r_list_cbegin(xs); r_obj* const* v_indices = r_list_cbegin(indices); for (; i < xs_size; ++i) { r_obj* x = v_xs[i]; r_obj* index = v_indices[i]; const r_ssize x_size = vec_size(x); const r_ssize index_size = r_length(index); if (x_size == 1) { // Recycle "up" to the size of the index x = vec_recycle_fallback(x, index_size, p_x_arg, error_call); } else if (x_size == size) { // Slice "down" to the size of the index x = vec_slice_fallback(x, index); } else { // `x` is the wrong size, error vec_check_recyclable(x, size, VCTRS_ALLOW_NULL_no, p_x_arg, error_call); } r_list_poke(xs, i, x); } FREE(3); return xs; } // Converts compact `VCTRS_INDEX_STYLE_location` indices to // materialized `VCTRS_INDEX_STYLE_location` indices for the fallback. static r_obj* list_location_to_location_indices(r_obj* indices) { // Only clone if at least one index is compact, which is rare bool has_compact = false; const r_ssize indices_size = r_length(indices); r_obj* const* v_indices = r_list_cbegin(indices); for (r_ssize i = 0; i < indices_size; ++i) { r_obj* index = v_indices[i]; if (is_compact_seq(index)) { has_compact = true; break; } } if (!has_compact) { // Nothing to do, we are already in expanded location form return indices; } // We probably don't own these, the user provides them. indices = KEEP(r_clone_referenced(indices)); v_indices = r_list_cbegin(indices); for (r_ssize i = 0; i < indices_size; ++i) { r_obj* index = v_indices[i]; if (is_compact_seq(index)) { r_list_poke(indices, i, vec_subscript_materialize(index)); } } FREE(1); return indices; } // Converts `VCTRS_INDEX_STYLE_condition` indices to // `VCTRS_INDEX_STYLE_location` indices for the fallback. static r_obj* list_condition_to_location_indices(r_obj* indices) { // We probably don't own these, the user provides them. indices = KEEP(r_clone_referenced(indices)); const r_ssize indices_size = r_length(indices); r_obj* const* v_indices = r_list_cbegin(indices); // Because we want `c(FALSE, NA, TRUE)` to become `c(NA, TRUE)` // for assignment and size checking purposes const bool na_propagate = true; for (r_ssize i = 0; i < indices_size; ++i) { r_obj* index = v_indices[i]; r_list_poke(indices, i, r_lgl_which(index, na_propagate)); } FREE(1); return indices; } // Determines if the vector `x` implements an S3/S4 method for the `c()` generic static bool vec_implements_base_c(r_obj* x) { if (!r_is_object(x)) { return false; } if (r_is_s4(x)) { return s4_find_method(x, s4_c_method_table) != r_null; } else { return s3_find_method("c", x, base_method_table) != r_null; } } // Determines if the class vector `cls` implements an S3/S4 method for the `c()` generic // (inheritance is taken into account) static bool class_implements_base_c(r_obj* cls) { if (s3_class_find_method("c", cls, base_method_table) != r_null) { return true; } if (s4_class_find_method(cls, s4_c_method_table) != r_null) { return true; } return false; } // ------------------------------------------------------------------------------------------- enum list_combine_unmatched parse_list_combine_unmatched(r_obj* unmatched, struct r_lazy error_call) { if (!r_is_string(unmatched)) { r_abort_lazy_call( error_call, "`unmatched` must be a string, not %s.", r_obj_type_friendly(unmatched) ); } const char* c_unmatched = r_chr_get_c_string(unmatched, 0); if (!strcmp(c_unmatched, "default")) return LIST_COMBINE_UNMATCHED_default; if (!strcmp(c_unmatched, "error")) return LIST_COMBINE_UNMATCHED_error; r_abort_lazy_call( error_call, "`unmatched` must be either \"default\" or \"error\", not \"%s\".", c_unmatched ); } enum list_combine_multiple parse_list_combine_multiple(r_obj* multiple, struct r_lazy error_call) { if (!r_is_string(multiple)) { r_abort_lazy_call( error_call, "`multiple` must be a string, not %s.", r_obj_type_friendly(multiple) ); } const char* c_multiple = r_chr_get_c_string(multiple, 0); if (!strcmp(c_multiple, "last")) return LIST_COMBINE_MULTIPLE_last; if (!strcmp(c_multiple, "first")) return LIST_COMBINE_MULTIPLE_first; r_abort_lazy_call( error_call, "`multiple` must be either \"last\" or \"first\", not \"%s\".", c_multiple ); } // ------------------------------------------------------------------------------------------- /** * Compute the `indices` index style * * The index style is "all or nothing" for simplicity. * * - If all index vectors are simple logical condition index vectors, we use * `VCTRS_INDEX_STYLE_condition`. * - Otherwise we use `VCTRS_INDEX_STYLE_location`, which then goes through * `list_as_locations()` which requires that all index vectors be positive * integer location vectors to begin with. */ static enum vctrs_index_style compute_indices_style(r_obj* indices, r_ssize size) { r_obj* const* v_indices = r_list_cbegin(indices); r_ssize indices_size = vec_size(indices); for (r_ssize i = 0; i < indices_size; ++i) { r_obj* index = v_indices[i]; if (!is_condition_index(index)) { return VCTRS_INDEX_STYLE_location; } if (r_length(index) != size) { return VCTRS_INDEX_STYLE_location; } } return VCTRS_INDEX_STYLE_condition; } // ------------------------------------------------------------------------------------------- static void check_any_unmatched( r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, struct r_lazy error_call ) { r_obj* default_index = KEEP(compute_default_index(indices, indices_style, size)); const bool* v_default_index = compact_condition_cbegin(default_index); if (p_bool_any(v_default_index, size)) { r_obj* loc = KEEP(compact_condition_materialize_location(default_index)); stop_combine_unmatched(loc, error_call); } FREE(1); } static void stop_combine_unmatched(r_obj* loc, struct r_lazy error_call) { r_obj* syms[3] = { syms_loc, syms_call, NULL }; r_obj* args[3] = { loc, KEEP(r_lazy_eval_protect(error_call)), NULL }; r_obj* ffi_call = KEEP(r_call_n(syms_stop_combine_unmatched, syms, args)); Rf_eval(ffi_call, vctrs_ns_env); never_reached("stop_combine_unmatched"); } // Returns a compact_condition index static r_obj* compute_default_index( r_obj* indices, enum vctrs_index_style indices_style, r_ssize size ) { const r_ssize indices_size = r_length(indices); r_obj* const* v_indices = r_list_cbegin(indices); r_obj* out = KEEP(new_compact_condition(size)); bool* v_out = compact_condition_begin(out); // Initialize mark everything as unmatched p_bool_fill(v_out, size, true); // Unmark matched locations according to the index style switch (indices_style) { case VCTRS_INDEX_STYLE_location: { for (r_ssize i = 0; i < indices_size; ++i) { r_obj* index = v_indices[i]; if (is_compact_seq(index)) { const int* v_index = r_int_cbegin(index); const r_ssize start = v_index[0]; const r_ssize size = v_index[1]; const r_ssize step = v_index[2]; r_ssize loc = start; for (r_ssize j = 0; j < size; ++j) { v_out[loc] = false; loc += step; } } else { const r_ssize size = r_length(index); const int* v_index = r_int_cbegin(index); for (r_ssize j = 0; j < size; ++j) { const int loc = v_index[j]; if (loc != r_globals.na_int) { // If not `NA`, mark location as matched by at least 1 index v_out[loc - 1] = false; } } } } break; } case VCTRS_INDEX_STYLE_condition: { for (r_ssize i = 0; i < indices_size; ++i) { r_obj* index = v_indices[i]; const int* v_index = r_lgl_cbegin(index); for (r_ssize j = 0; j < size; ++j) { const int elt = v_index[j]; // If `TRUE`, mark location as matched by at least 1 index. // Specially optimized to be branchless, which does greatly help. v_out[j] &= (elt != 1); } } break; } default: { r_stop_unreachable(); } } FREE(1); return out; } // ------------------------------------------------------------------------------------------- // `ptype` determination is complicated by the fact that both `xs` and `default` // will contribute to the output type, and we want the best error messages // possible. We can't just fold `default` into `xs` because we don't get // a chance to use `p_default_arg`. We could materialize `p_default_arg` // and use it as a name on the `xs` list, but that means it will get combined // with `p_xs_arg` when there is an error, which we don't want. static r_obj* ptype_common_with_default( r_obj* ptype, r_obj* xs, bool has_default, r_obj* default_, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call, enum s3_fallback s3_fallback ) { if (ptype != r_null) { // Performs scalar checks and whatnot return vec_ptype_final(ptype, vec_args.ptype, error_call); } // Okay `ptype` is `NULL`. We determine it from `xs` and `default`. // Use only `xs` and `p_xs_arg` first for best errors. // Not finalising `ptype` yet in case we need to incorporate `default`! ptype = KEEP(vec_ptype_common( xs, ptype, PTYPE_FINALISE_false, s3_fallback, p_xs_arg, error_call )); // Now incorporate `default` and `p_default_arg` if required if (has_default) { int _; ptype = vec_ptype2( ptype, default_, vec_args.empty, p_default_arg, error_call, s3_fallback, &_ ); } KEEP(ptype); // Now finalise after incorporating `default` ptype = KEEP(vec_ptype_finalise(ptype)); FREE(3); return ptype; } // ------------------------------------------------------------------------------------------- vctrs/src/match-compare.h0000644000176200001440000001264015156001116015072 0ustar liggesusers#ifndef VCTRS_MATCH_COMPARE_H #define VCTRS_MATCH_COMPARE_H #include "vctrs-core.h" #include "compare.h" #include "poly-op.h" #include "type-complex.h" /* * These comparison operators are designed to match the comparison order * returned from: * `vec_order(x, direction = "asc", na_value = "smallest", nan_distinct = nan_distinct)` * * They are intended for internal use in `vec_joint_xtfrm()`, which uses that * exact setup to call `vec_order_info()`. * * In particular, double and complex types match the ordering results from * using `nan_distinct`. If `false`, they are treated equally. If `true`, * since this is ascending order and `NA` values are the smallest value, it * places `NA` before `NaN` followed by real numbers to match `vec_order()`. */ // ----------------------------------------------------------------------------- static inline int lgl_order_compare_na_equal(int x, int y, bool nan_distinct) { return lgl_compare_na_equal(x, y); } static inline int int_order_compare_na_equal(int x, int y, bool nan_distinct) { return int_compare_na_equal(x, y); } static inline int dbl_order_compare_na_equal(double x, double y, bool nan_distinct) { enum vctrs_dbl x_class = dbl_classify(x); enum vctrs_dbl y_class = dbl_classify(y); switch (x_class) { case VCTRS_DBL_number: { switch (y_class) { case VCTRS_DBL_number: return dbl_compare_scalar(x, y); case VCTRS_DBL_missing: return 1; case VCTRS_DBL_nan: return 1; } } case VCTRS_DBL_missing: { switch (y_class) { case VCTRS_DBL_number: return -1; case VCTRS_DBL_missing: return 0; case VCTRS_DBL_nan: return nan_distinct ? -1 : 0; } } case VCTRS_DBL_nan: { switch (y_class) { case VCTRS_DBL_number: return -1; case VCTRS_DBL_missing: return nan_distinct ? 1 : 0; case VCTRS_DBL_nan: return 0; } } } r_stop_unreachable(); } static inline int cpl_order_compare_na_equal(r_complex x, r_complex y, bool nan_distinct) { x = cpl_normalise_missing(x); y = cpl_normalise_missing(y); const int cmp = dbl_order_compare_na_equal(x.r, y.r, nan_distinct); if (cmp == 0) { return dbl_order_compare_na_equal(x.i, y.i, nan_distinct); } else { return cmp; } } static inline int chr_order_compare_na_equal(r_obj* x, r_obj* y, bool nan_distinct) { return chr_compare_na_equal(x, y); } // ----------------------------------------------------------------------------- #define P_ORDER_COMPARE_NA_EQUAL(CTYPE, ORDER_COMPARE_NA_EQUAL) do { \ return ORDER_COMPARE_NA_EQUAL(((CTYPE const*) p_x)[i], ((CTYPE const*) p_y)[j], nan_distinct); \ } while (0) static inline int p_lgl_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct) { P_ORDER_COMPARE_NA_EQUAL(int, lgl_order_compare_na_equal); } static inline int p_int_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct) { P_ORDER_COMPARE_NA_EQUAL(int, int_order_compare_na_equal); } static inline int p_dbl_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct) { P_ORDER_COMPARE_NA_EQUAL(double, dbl_order_compare_na_equal); } static inline int p_cpl_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct) { P_ORDER_COMPARE_NA_EQUAL(r_complex, cpl_order_compare_na_equal); } static inline int p_chr_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct) { P_ORDER_COMPARE_NA_EQUAL(r_obj*, chr_order_compare_na_equal); } #undef P_ORDER_COMPARE_NA_EQUAL static inline int p_order_compare_na_equal(const void* p_x, r_ssize i, const void* p_y, r_ssize j, bool nan_distinct, const enum vctrs_type type) { switch (type) { case VCTRS_TYPE_logical: return p_lgl_order_compare_na_equal(p_x, i, p_y, j, nan_distinct); case VCTRS_TYPE_integer: return p_int_order_compare_na_equal(p_x, i, p_y, j, nan_distinct); case VCTRS_TYPE_double: return p_dbl_order_compare_na_equal(p_x, i, p_y, j, nan_distinct); case VCTRS_TYPE_complex: return p_cpl_order_compare_na_equal(p_x, i, p_y, j, nan_distinct); case VCTRS_TYPE_character: return p_chr_order_compare_na_equal(p_x, i, p_y, j, nan_distinct); default: stop_unimplemented_vctrs_type("p_order_compare_na_equal", type); } } static inline int p_df_order_compare_na_equal(const void* x, r_ssize i, const void* y, r_ssize j, bool nan_distinct) { struct poly_df_data* x_data = (struct poly_df_data*) x; struct poly_df_data* y_data = (struct poly_df_data*) y; r_ssize n_col = x_data->n_col; enum vctrs_type* v_col_type = x_data->v_col_type; const void** v_x_col_ptr = x_data->v_col_ptr; const void** v_y_col_ptr = y_data->v_col_ptr; // df-cols should already be flattened for (r_ssize col = 0; col < n_col; ++col) { int cmp = p_order_compare_na_equal( v_x_col_ptr[col], i, v_y_col_ptr[col], j, nan_distinct, v_col_type[col] ); if (cmp == 0) { // Equal values for this column continue; } // Difference detected return cmp; } // All columns were equal return 0; } // ----------------------------------------------------------------------------- #endif vctrs/src/proxy.h0000644000176200001440000000055714315060310013534 0ustar liggesusers#ifndef VCTRS_PROXY_H #define VCTRS_PROXY_H #include "vctrs-core.h" r_obj* vec_proxy(r_obj* x); r_obj* vec_proxy_equal(r_obj* x); r_obj* vec_proxy_compare(r_obj* x); r_obj* vec_proxy_order(r_obj* x); r_obj* vec_proxy_recurse(r_obj* x); r_obj* vec_proxy_method(r_obj* x); r_obj* vec_proxy_invoke(r_obj* x, r_obj* method); r_obj* vec_proxy_unwrap(r_obj* x); #endif vctrs/src/cast-dispatch.h0000644000176200001440000000051514315060310015074 0ustar liggesusers#ifndef VCTRS_CAST_DISPATCH_H #define VCTRS_CAST_DISPATCH_H #include "vctrs-core.h" #include "cast.h" r_obj* vec_cast_dispatch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy); #endif vctrs/src/order-sortedness.h0000644000176200001440000000670315120272011015652 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #ifndef VCTRS_ORDER_SORTEDNESS_H #define VCTRS_ORDER_SORTEDNESS_H #include "vctrs-core.h" #include "order-groups.h" // ----------------------------------------------------------------------------- enum vctrs_sortedness { VCTRS_SORTEDNESS_unsorted, VCTRS_SORTEDNESS_sorted, VCTRS_SORTEDNESS_reversed, }; // ----------------------------------------------------------------------------- enum vctrs_sortedness dbl_sortedness(const double* p_x, r_ssize size, bool decreasing, bool na_last, bool nan_distinct, struct group_infos* p_group_infos); enum vctrs_sortedness int_sortedness(const int* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos); enum vctrs_sortedness chr_sortedness(const SEXP* p_x, r_ssize size, bool decreasing, bool na_last, struct group_infos* p_group_infos); // ----------------------------------------------------------------------------- void ord_resolve_sortedness(enum vctrs_sortedness sortedness, r_ssize size, int* p_o); void ord_resolve_sortedness_chunk(enum vctrs_sortedness sortedness, r_ssize size, int* p_o); // ----------------------------------------------------------------------------- /* * Compare `x` to `y` lexicographically in a C-locale * * - `direction` is `1` for ascending and `-1` for descending * - `na_order` is `1` if `na_last = true` and `-1` if `na_last = false` */ static inline int str_cmp_maybe_na( SEXP x, SEXP y, const char* x_string, const char* y_string, const int direction, const int na_order ) { // Same pointer - including `NA`s if (x == y) { return 0; } if (x == NA_STRING) { return na_order; } if (y == NA_STRING) { return -na_order; } return direction * strcmp(x_string, y_string); } /* * Compare `x` to `y` lexicographically in a C-locale with `pass` information * * Guaranteed to never be `NA` */ static inline int str_cmp( const char* x, const char* y, const int direction ) { // Same pointer // In our research it seems like `strcmp()` doesn't optimize this check, // since it would be rare for most `strcmp()` usage. But for R's interned // strings it definitely matters for us. if (x == y) { return 0; } else { return direction * strcmp(x, y); } } static inline bool str_ge( const char* x, const char* y, const int direction ) { return str_cmp(x, y, direction) >= 0; } // ----------------------------------------------------------------------------- #endif vctrs/src/ptype.c0000644000176200001440000000641615156007671013526 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/ptype-decl.h" // [[ register() ]] r_obj* ffi_ptype(r_obj* x, r_obj* x_arg_ffi, r_obj* frame) { struct vctrs_arg x_arg = vec_as_arg(x_arg_ffi); struct r_lazy call = { .x = r_syms.call, .env = frame }; return vec_ptype(x, &x_arg, call); } r_obj* vec_ptype(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call) { switch (vec_typeof(x)) { case VCTRS_TYPE_null: return r_null; case VCTRS_TYPE_unspecified: return vctrs_shared_empty_uns; case VCTRS_TYPE_logical: return vec_ptype_slice(x, r_globals.empty_lgl); case VCTRS_TYPE_integer: return vec_ptype_slice(x, r_globals.empty_int); case VCTRS_TYPE_double: return vec_ptype_slice(x, r_globals.empty_dbl); case VCTRS_TYPE_complex: return vec_ptype_slice(x, r_globals.empty_cpl); case VCTRS_TYPE_character: return vec_ptype_slice(x, r_globals.empty_chr); case VCTRS_TYPE_raw: return vec_ptype_slice(x, r_globals.empty_raw); case VCTRS_TYPE_list: return vec_ptype_slice(x, r_globals.empty_list); case VCTRS_TYPE_dataframe: return df_ptype(x, true); case VCTRS_TYPE_s3: return s3_ptype(x, x_arg, call); case VCTRS_TYPE_scalar: stop_scalar_type(x, x_arg, call); } r_stop_unreachable(); } static r_obj* col_ptype(r_obj* x) { return vec_ptype(x, vec_args.empty, r_lazy_null); } static inline r_obj* vec_ptype_slice(r_obj* x, r_obj* empty) { if (r_attrib_has_any(x)) { // Slicing preserves attributes return vec_slice(x, r_null); } else { return empty; } } static r_obj* s3_ptype(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call) { switch (class_type(x)) { case VCTRS_CLASS_bare_tibble: return df_ptype(x, true); case VCTRS_CLASS_data_frame: return df_ptype(x, false); case VCTRS_CLASS_bare_data_frame: r_stop_internal("Bare data frames should be handled by `vec_ptype()`."); case VCTRS_CLASS_none: r_stop_internal("Non-S3 classes should be handled by `vec_ptype()`."); default: break; } r_obj* method = KEEP(vec_ptype_method(x)); r_obj* out; if (method == r_null) { obj_check_vector(x, VCTRS_ALLOW_NULL_no, x_arg, call); out = vec_slice(x, r_null); } else { out = vec_ptype_invoke(x, method); } FREE(1); return out; } static inline r_obj* vec_ptype_method(r_obj* x) { r_obj* cls = KEEP(s3_get_class(x)); r_obj* method = s3_class_find_method("vec_ptype", cls, vctrs_method_table); FREE(1); return method; } static inline r_obj* vec_ptype_invoke(r_obj* x, r_obj* method) { return vctrs_dispatch1(syms_vec_ptype, method, syms_x, x); } r_obj* df_ptype(r_obj* x, bool bare) { r_obj* row_nms = KEEP(df_rownames(x)); r_obj* ptype = r_null; if (bare) { ptype = KEEP(bare_df_map(x, &col_ptype)); } else { ptype = KEEP(df_map(x, &col_ptype)); } if (r_typeof(row_nms) == R_TYPE_character) { r_attrib_poke(ptype, r_syms.row_names, r_globals.empty_chr); } FREE(2); return ptype; } r_obj* vec_ptype_final(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call) { r_obj* out = KEEP(vec_ptype(x, x_arg, call)); out = vec_ptype_finalise(out); FREE(1); return out; } void vctrs_init_ptype(r_obj* ns) { syms_vec_ptype = r_sym("vec_ptype"); } static r_obj* syms_vec_ptype = NULL; vctrs/src/set.h0000644000176200001440000000217114362266120013151 0ustar liggesusers#ifndef VCTRS_SET_H #define VCTRS_SET_H #include "vctrs-core.h" r_obj* vec_set_intersect(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call); r_obj* vec_set_difference(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call); r_obj* vec_set_union(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call); r_obj* vec_set_symmetric_difference(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call); #endif vctrs/src/cast-bare.c0000644000176200001440000000664015156001116014211 0ustar liggesusers#include "vctrs.h" r_obj* int_as_logical(r_obj* x, bool* lossy) { int* data = r_int_begin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_logical(n)); int* out_data = r_lgl_begin(out); for (r_ssize i = 0; i < n; ++i, ++data, ++out_data) { int elt = *data; if (elt == r_globals.na_int) { *out_data = r_globals.na_lgl; continue; } if (elt != 0 && elt != 1) { *lossy = true; FREE(1); return r_null; } *out_data = elt; } FREE(1); return out; } r_obj* dbl_as_logical(r_obj* x, bool* lossy) { double* data = r_dbl_begin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_logical(n)); int* out_data = r_lgl_begin(out); for (r_ssize i = 0; i < n; ++i, ++data, ++out_data) { double elt = *data; if (isnan(elt)) { *out_data = r_globals.na_lgl; continue; } if (elt != 0 && elt != 1) { *lossy = true; FREE(1); return r_null; } *out_data = (int) elt; } FREE(1); return out; } r_obj* chr_as_logical(r_obj* x, bool* lossy) { r_obj* const* x_p = r_chr_cbegin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_logical(n)); int* p_out = r_lgl_begin(out); for (r_ssize i = 0; i < n; ++i) { r_obj* str = x_p[i]; if (str == r_globals.na_str) { p_out[i] = r_globals.na_lgl; continue; } const char* elt = r_str_c_string(str); switch (elt[0]) { case 'T': if (elt[1] == '\0' || strcmp(elt, "TRUE") == 0) { p_out[i] = 1; continue; } break; case 'F': if (elt[1] == '\0' || strcmp(elt, "FALSE") == 0) { p_out[i] = 0; continue; } break; case 't': if (strcmp(elt, "true") == 0) { p_out[i] = 1; continue; } break; case 'f': if (strcmp(elt, "false") == 0) { p_out[i] = 0; continue; } break; default: break; } *lossy = true; FREE(1); return r_null; } FREE(1); return out; } r_obj* lgl_as_integer(r_obj* x, bool* lossy) { return Rf_coerceVector(x, INTSXP); } r_obj* dbl_as_integer(r_obj* x, bool* lossy) { double* data = r_dbl_begin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_integer(n)); int* out_data = r_int_begin(out); for (r_ssize i = 0; i < n; ++i, ++data, ++out_data) { double elt = *data; if (elt <= INT_MIN || elt >= INT_MAX + 1.0) { *lossy = true; FREE(1); return r_null; } if (isnan(elt)) { *out_data = r_globals.na_int; continue; } int value = (int) elt; if (value != elt) { *lossy = true; FREE(1); return r_null; } *out_data = value; } FREE(1); return out; } r_obj* lgl_as_double(r_obj* x, bool* lossy) { int* data = r_lgl_begin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_double(n)); double* out_data = r_dbl_begin(out); for (r_ssize i = 0; i < n; ++i, ++data, ++out_data) { int elt = *data; *out_data = (elt == r_globals.na_lgl) ? r_globals.na_dbl : elt; } FREE(1); return out; } r_obj* int_as_double(r_obj* x, bool* lossy) { int* data = r_int_begin(x); r_ssize n = r_length(x); r_obj* out = KEEP(r_alloc_double(n)); double* out_data = r_dbl_begin(out); for (r_ssize i = 0; i < n; ++i, ++data, ++out_data) { int elt = *data; *out_data = (elt == r_globals.na_int) ? r_globals.na_dbl : elt; } FREE(1); return out; } vctrs/src/ptype2.c0000644000176200001440000002074315132161317013577 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/ptype2-decl.h" // [[ register() ]] r_obj* ffi_ptype2_opts(r_obj* x, r_obj* y, r_obj* ffi_opts, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); struct r_lazy call = { .x = r_syms.call, .env = frame, }; enum s3_fallback s3_fallback = s3_fallback_from_opts(ffi_opts); int _; return vec_ptype2( x, y, &x_arg, &y_arg, call, s3_fallback, &_ ); } r_obj* vec_ptype2( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback, int* left ) { return vec_ptype2_impl( x, y, p_x_arg, p_y_arg, call, s3_fallback, left, true ); } static r_obj* vec_ptype2_impl( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback, int* left, bool first_pass ) { const enum vctrs_type x_type = vec_typeof(x); const enum vctrs_type y_type = vec_typeof(y); if (x_type == VCTRS_TYPE_null) { // When `x` and `y` are `NULL`, keep using `x` name (1) // When `x` is `NULL` but `y` isn't, switch to `y` name (0) *left = y_type == VCTRS_TYPE_null; return vec_ptype_or_s3_fallback(y, p_y_arg, x_type, call, s3_fallback); } if (y_type == VCTRS_TYPE_null) { // When `x` and `y` are `NULL`, keep using `x` name (1) // When `y` is `NULL` but `x` isn't, keep using `x` name (1) *left = 1; return vec_ptype_or_s3_fallback(x, p_x_arg, x_type, call, s3_fallback); } if (x_type == VCTRS_TYPE_unspecified) { // When `x` and `y` are unspecified, keep using `x` name (1) // When `x` is unspecified by `y` isn't, switch to `y` name (0) *left = y_type == VCTRS_TYPE_unspecified; return vec_ptype_or_s3_fallback(y, p_y_arg, y_type, call, s3_fallback); } if (y_type == VCTRS_TYPE_unspecified) { // When `x` and `y` are unspecified, keep using `x` name (1) // When `y` is unspecified but `x` isn't, keep using `x` name (1) *left = 1; return vec_ptype_or_s3_fallback(x, p_x_arg, x_type, call, s3_fallback); } if (x_type == VCTRS_TYPE_scalar) { stop_scalar_type(x, p_x_arg, call); } if (y_type == VCTRS_TYPE_scalar) { stop_scalar_type(y, p_y_arg, call); } if (x_type != VCTRS_TYPE_s3 && y_type != VCTRS_TYPE_s3) { return vec_ptype2_switch_native( x, y, x_type, y_type, p_x_arg, p_y_arg, call, s3_fallback, left ); } if (x_type == VCTRS_TYPE_s3 || y_type == VCTRS_TYPE_s3) { r_obj* out = KEEP(vec_ptype2_dispatch_native( x, y, x_type, y_type, p_x_arg, p_y_arg, call, s3_fallback, left )); if (out != r_null) { out = vec_shaped_ptype(out, x, y, p_x_arg, p_y_arg); FREE(1); return out; } FREE(1); } // Try native dispatch again with prototypes, in case the prototype // is another type. FIXME: Use R-level callback instead. if (first_pass) { x = KEEP(vec_ptype(x, p_x_arg, call)); y = KEEP(vec_ptype(y, p_y_arg, call)); r_obj* out = vec_ptype2_impl( x, y, p_x_arg, p_y_arg, call, s3_fallback, left, false ); FREE(2); return out; } return vec_ptype2_dispatch_s3( x, y, p_x_arg, p_y_arg, call, s3_fallback ); } static r_obj* vec_ptype2_switch_native( r_obj* x, r_obj* y, enum vctrs_type x_type, enum vctrs_type y_type, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback, int* left ) { enum vctrs_type2 type2 = vec_typeof2_impl(x_type, y_type, left); switch (type2) { case VCTRS_TYPE2_null_null: return r_null; case VCTRS_TYPE2_logical_logical: return vec_shaped_ptype(r_globals.empty_lgl, x, y, p_x_arg, p_y_arg); case VCTRS_TYPE2_logical_integer: case VCTRS_TYPE2_integer_integer: return vec_shaped_ptype(r_globals.empty_int, x, y, p_x_arg, p_y_arg); case VCTRS_TYPE2_logical_double: case VCTRS_TYPE2_integer_double: case VCTRS_TYPE2_double_double: return vec_shaped_ptype(r_globals.empty_dbl, x, y, p_x_arg, p_y_arg); case VCTRS_TYPE2_integer_complex: case VCTRS_TYPE2_double_complex: case VCTRS_TYPE2_complex_complex: return vec_shaped_ptype(r_globals.empty_cpl, x, y, p_x_arg, p_y_arg); case VCTRS_TYPE2_character_character: return vec_shaped_ptype(r_globals.empty_chr, x, y, p_x_arg, p_y_arg); case VCTRS_TYPE2_raw_raw: return vec_shaped_ptype(r_globals.empty_raw, x, y, p_x_arg, p_y_arg); case VCTRS_TYPE2_list_list: return vec_shaped_ptype(r_globals.empty_list, x, y, p_x_arg, p_y_arg); case VCTRS_TYPE2_dataframe_dataframe: return df_ptype2( x, y, p_x_arg, p_y_arg, call, s3_fallback ); default: return vec_ptype2_dispatch_s3( x, y, p_x_arg, p_y_arg, call, s3_fallback ); } } /** * Return `vec_ptype()`, allowing for common class fallback * * This is normally the `vec_ptype()` of the input, but if the common class * fallback is enabled we return the `vec_ptype2()` of this input with itself. * This way we may return a fallback sentinel which can be treated specially, * for instance in `vec_c(NA, x, NA)`. */ r_obj* vec_ptype_or_s3_fallback( r_obj* x, struct vctrs_arg* p_x_arg, enum vctrs_type x_type, struct r_lazy call, enum s3_fallback s3_fallback ) { if (s3_fallback == S3_FALLBACK_true && x_type == VCTRS_TYPE_s3) { int _; return vec_ptype2( x, x, p_x_arg, p_x_arg, r_lazy_null, s3_fallback, &_ ); } return vec_ptype(x, p_x_arg, call); } struct is_coercible_data { r_obj* x; r_obj* y; struct vctrs_arg* p_x_arg; struct vctrs_arg* p_y_arg; struct r_lazy call; enum s3_fallback s3_fallback; r_obj* out; }; static void vec_is_coercible_cb(void* data_) { struct is_coercible_data* data = (struct is_coercible_data*) data_; int _; data->out = vec_ptype2( data->x, data->y, data->p_x_arg, data->p_y_arg, data->call, data->s3_fallback, &_ ); } static void vec_is_coercible_e( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback, ERR* err ) { struct is_coercible_data data = { .x = x, .y = y, .p_x_arg = p_x_arg, .p_y_arg = p_y_arg, .call = call, .s3_fallback = s3_fallback, .out = r_null }; *err = r_try_catch(&vec_is_coercible_cb, &data, syms_vctrs_error_incompatible_type, NULL, NULL); } bool vec_is_coercible( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ) { ERR err = NULL; vec_is_coercible_e( x, y, p_x_arg, p_y_arg, call, s3_fallback, &err ); return !err; } // [[ register() ]] r_obj* ffi_is_coercible(r_obj* x, r_obj* y, r_obj* opts, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); struct r_lazy call = { .x = syms_call, .env = frame }; const enum s3_fallback s3_fallback = s3_fallback_from_opts(opts); return r_lgl(vec_is_coercible( x, y, &x_arg, &y_arg, call, s3_fallback )); } // [[ register() ]] r_obj* ffi_ptype2(r_obj* x, r_obj* y, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); struct r_lazy call = { .x = syms_call, .env = frame }; int _; return vec_ptype2( x, y, &x_arg, &y_arg, call, S3_FALLBACK_false, &_ ); } // Order on R side is important enum s3_fallback s3_fallback_from_opts(r_obj* opts) { return (enum s3_fallback) r_int_get(r_list_get(opts, 0), 0); } void vctrs_init_ptype2(r_obj* ns) { } vctrs/src/utils-dispatch.h0000644000176200001440000000131715113325071015310 0ustar liggesusers#ifndef VCTRS_UTILS_DISPATCH_H #define VCTRS_UTILS_DISPATCH_H #include "vctrs-core.h" enum vctrs_class_type { VCTRS_CLASS_list, VCTRS_CLASS_data_frame, VCTRS_CLASS_bare_asis, VCTRS_CLASS_bare_data_frame, VCTRS_CLASS_bare_tibble, VCTRS_CLASS_bare_factor, VCTRS_CLASS_bare_ordered, VCTRS_CLASS_bare_date, VCTRS_CLASS_bare_posixct, VCTRS_CLASS_bare_posixlt, VCTRS_CLASS_unknown, VCTRS_CLASS_none }; enum vctrs_class_type class_type(r_obj* x); static inline bool class_type_is_data_frame(enum vctrs_class_type type) { switch (type) { case VCTRS_CLASS_data_frame: case VCTRS_CLASS_bare_data_frame: case VCTRS_CLASS_bare_tibble: return true; default: return false; } } #endif vctrs/src/decl/0000755000176200001440000000000015157322654013123 5ustar liggesusersvctrs/src/decl/names-decl.h0000644000176200001440000000221515042720152015270 0ustar liggesusersstatic r_obj* syms_as_universal_names; static r_obj* syms_check_unique_names; static r_obj* fns_as_universal_names; static r_obj* fns_check_unique_names; static r_obj* syms_glue_as_name_spec; static r_obj* fns_glue_as_name_spec; static r_obj* syms_internal_spec; static r_obj* syms_set_rownames_dispatch; static r_obj* fns_set_rownames_dispatch; static r_obj* syms_set_names_dispatch; static r_obj* fns_set_names_dispatch; static void describe_repair(r_obj* old_names, r_obj* new_names); static r_obj* check_unique_names(r_obj* names, const struct name_repair_opts* opts); static void vec_validate_minimal_names(r_obj* names, r_ssize n, struct r_lazy call); r_obj* ffi_as_minimal_names(r_obj* names); static bool any_has_suffix(r_obj* names); static r_obj* as_unique_names_impl(r_obj* names, bool quiet); static void stop_large_name(void); static bool is_dotdotint(const char* name); static ptrdiff_t suffix_pos(const char* name); static bool needs_suffix(r_obj* str); static r_obj* names_iota(r_ssize n); static r_obj* vec_unique_names_impl(r_obj* names, r_ssize n, bool quiet); static r_obj* glue_as_name_spec(r_obj* spec); vctrs/src/decl/rep-decl.h0000644000176200001440000000335714363556517015004 0ustar liggesusersstatic inline void stop_rep_times_size(struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void check_rep_times(int times, struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void check_rep_each_times(int times, r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg); static inline bool multiply_would_overflow(r_ssize x, r_ssize y); static inline bool plus_would_overflow(r_ssize x, r_ssize y); static inline void stop_rep_size_oob(struct r_lazy call); static r_obj* vec_rep_each_uniform(r_obj* x, int times, struct r_lazy error_call, struct vctrs_arg* p_times_arg); static r_obj* vec_rep_each_impl(r_obj* x, r_obj* times, const r_ssize times_size, struct r_lazy error_call, struct vctrs_arg* p_times_arg); static inline void stop_rep_times_negative(struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void stop_rep_times_missing(struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void stop_rep_times_oob(int times, struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void stop_rep_each_times_negative(r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void stop_rep_each_times_missing(r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg); static inline void stop_rep_each_times_oob(int times, r_ssize i, struct r_lazy call, struct vctrs_arg* p_times_arg); static r_obj* vec_unrep(r_obj* x, struct r_lazy error_call); vctrs/src/decl/missing-decl.h0000644000176200001440000000423614315060310015636 0ustar liggesusersstatic inline r_obj* proxy_detect_missing(r_obj* proxy); static inline r_obj* lgl_detect_missing(r_obj* x); static inline r_obj* int_detect_missing(r_obj* x); static inline r_obj* dbl_detect_missing(r_obj* x); static inline r_obj* cpl_detect_missing(r_obj* x); static inline r_obj* raw_detect_missing(r_obj* x); static inline r_obj* chr_detect_missing(r_obj* x); static inline r_obj* list_detect_missing(r_obj* x); static inline r_obj* df_detect_missing(r_obj* x); static inline r_ssize col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize lgl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize int_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize dbl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize cpl_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize raw_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize chr_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize list_col_detect_missing(r_obj* x, r_ssize* v_loc, r_ssize loc_size); static inline r_ssize proxy_first_missing(r_obj* proxy); static inline r_ssize lgl_first_missing(r_obj* x); static inline r_ssize int_first_missing(r_obj* x); static inline r_ssize dbl_first_missing(r_obj* x); static inline r_ssize cpl_first_missing(r_obj* x); static inline r_ssize raw_first_missing(r_obj* x); static inline r_ssize chr_first_missing(r_obj* x); static inline r_ssize list_first_missing(r_obj* x); static inline r_ssize df_first_missing(r_obj* x); static inline const unsigned char* r_uchar_cbegin(r_obj* x); vctrs/src/decl/type-data-frame-decl.h0000644000176200001440000000302015156537555017163 0ustar liggesusersstatic r_obj* syms_df_lossy_cast; static r_obj* fns_df_lossy_cast; static r_obj* new_compact_rownames(r_ssize n); static r_ssize df_size_from_n(r_obj* n); static r_obj* c_data_frame_class(r_obj* cls); static void attrib_append_row_names(r_obj* x, r_obj* row_names); static r_obj* data_frame(r_obj* x, r_ssize size, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call); static r_obj* df_list(r_obj* x, r_ssize size, bool unpack, const struct name_repair_opts* p_name_repair_opts, struct r_lazy error_call); static r_obj* df_list_drop_null(r_obj* x); static r_obj* df_list_unpack(r_obj* x); static void init_bare_data_frame(r_obj* x, r_ssize n); static r_obj* df_ptype2_match( r_obj* x, r_obj* y, r_obj* x_names, r_obj* y_names, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ); static r_obj* df_ptype2_loop( r_obj* x, r_obj* y, r_obj* names, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ); static r_obj* df_cast_match(const struct cast_opts* opts, r_obj* x_names, r_obj* to_names); static r_obj* df_cast_loop(const struct cast_opts* opts, r_obj* names); static r_ssize df_flatten_loop(r_obj* x, r_obj* out, r_obj* out_names, r_ssize counter); vctrs/src/decl/compare-decl.h0000644000176200001440000000073114315060310015607 0ustar liggesusersstatic r_obj* df_compare(r_obj* x, r_obj* y, bool na_equal, r_ssize size); static void df_compare_impl(int* v_out, struct df_short_circuit_info* p_info, r_obj* x, r_obj* y, bool na_equal); static void vec_compare_col(int* v_out, struct df_short_circuit_info* p_info, r_obj* x, r_obj* y, bool na_equal); vctrs/src/decl/encoding-decl.h0000644000176200001440000000033115156537555015773 0ustar liggesusersstatic r_obj* chr_encode_utf8(r_obj* x); static inline r_ssize chr_find_encoding_start(r_obj* x, r_ssize size); static r_obj* list_encode_utf8(r_obj* x); static r_obj* obj_attrib_encode_utf8(r_obj* x, bool owned); vctrs/src/decl/dictionary-decl.h0000644000176200001440000000222515061010234016325 0ustar liggesusersstatic inline uint32_t dict_key_size(SEXP x); static inline void vctrs_unique_loc_loop(struct dictionary* d, struct growable* g, R_len_t n); static inline bool duplicated_any_loop(struct dictionary* d, R_len_t n); static inline void vctrs_n_distinct_loop(struct dictionary* d, R_len_t n); static inline void vctrs_id_loop(struct dictionary* d, R_len_t n, int* p_out); static inline void vec_match_loop( int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle ); static inline void vec_match_loop_propagate( int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle ); static inline void vec_in_loop( int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle ); static inline void vec_in_loop_propagate( int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle ); static inline void load_with_haystack(struct dictionary* d, R_len_t n_haystack); static inline void vctrs_count_loop(struct dictionary* d, R_len_t n, int* p_count); static inline void vctrs_duplicated_loop(struct dictionary* d, R_len_t n, uint32_t* p_hashes, int* p_out); vctrs/src/decl/subscript-decl.h0000644000176200001440000000167514315060310016207 0ustar liggesusersstatic r_obj* fns_cnd_body_subscript_dim; static r_obj* new_error_subscript_type(r_obj* subscript, const struct subscript_opts* opts, r_obj* body); static enum subscript_type_action parse_subscript_arg_type(r_obj* x, const char* kind); static r_obj* obj_cast_subscript(r_obj* subscript, const struct subscript_opts* opts, ERR* err); static r_obj* dbl_cast_subscript(r_obj* subscript, const struct subscript_opts* opts, ERR* err); static r_obj* dbl_cast_subscript_fallback(r_obj* subscript, const struct subscript_opts* opts, ERR* err); static r_obj* syms_new_dbl_cast_subscript_body; static r_obj* syms_lossy_err; static r_obj* syms_new_error_subscript_type; vctrs/src/decl/match-decl.h0000644000176200001440000003061314511320527015266 0ustar liggesusers// Initialised at load time struct vctrs_arg args_incomplete_; static struct vctrs_arg* const args_incomplete = &args_incomplete_; struct vctrs_arg args_no_match_; static struct vctrs_arg* const args_no_match = &args_no_match_; struct vctrs_arg args_remaining_; static struct vctrs_arg* const args_remaining = &args_remaining_; static r_obj* vec_locate_matches(r_obj* needles, r_obj* haystack, r_obj* condition, r_obj* filter, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, bool nan_distinct, r_obj* chr_proxy_collate, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call); static r_obj* df_locate_matches(r_obj* needles, r_obj* haystack, r_obj* needles_complete, r_obj* haystack_complete, r_ssize size_needles, r_ssize size_haystack, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call); static void df_locate_matches_recurse(r_ssize col, r_ssize loc_lower_bound_o_needles, r_ssize loc_upper_bound_o_needles, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack, const struct poly_df_data* p_needles, const struct poly_df_data* p_haystack, const struct poly_df_data* p_needles_complete, const struct poly_df_data* p_haystack_complete, const int* v_o_needles, const int* v_o_haystack, const struct vctrs_incomplete* incomplete, enum vctrs_multiple multiple, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, int* v_loc_filter_match_o_haystack); static void df_locate_matches_with_containers(int n_containers, const int* v_container_ids, r_ssize col, r_ssize loc_lower_bound_o_needles, r_ssize loc_upper_bound_o_needles, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack, const struct poly_df_data* p_needles, const struct poly_df_data* p_haystack, const struct poly_df_data* p_needles_complete, const struct poly_df_data* p_haystack_complete, const int* v_o_needles, const int* v_o_haystack, const struct vctrs_incomplete* incomplete, enum vctrs_multiple multiple, bool any_filters, const enum vctrs_filter* v_filters, const enum vctrs_ops* v_ops, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, int* v_loc_filter_match_o_haystack); static inline r_ssize int_locate_upper_incomplete(const int* v_haystack_complete, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack); static inline r_ssize int_locate_lower_duplicate(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack); static inline r_ssize int_locate_upper_duplicate(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack); static inline struct vctrs_match_bounds int_locate_match(int val_needle, const int* v_haystack, const int* v_o_haystack, r_ssize loc_lower_bound_o_haystack, r_ssize loc_upper_bound_o_haystack); static r_obj* df_joint_xtfrm_by_col(r_obj* x, r_obj* y, r_ssize x_size, r_ssize y_size, r_ssize n_cols, bool nan_distinct, r_obj* chr_proxy_collate); static r_obj* df_detect_complete_by_col(r_obj* x, r_ssize x_size, r_ssize n_cols); static inline void parse_condition(r_obj* condition, r_ssize n_cols, enum vctrs_ops* v_ops); static inline struct vctrs_no_match parse_no_match(r_obj* no_match, struct r_lazy call); static inline struct vctrs_remaining parse_remaining(r_obj* remaining, struct r_lazy call); static inline struct vctrs_incomplete parse_incomplete(r_obj* incomplete, struct r_lazy call); static inline enum vctrs_multiple parse_multiple(r_obj* multiple, struct r_lazy call); static inline enum vctrs_relationship parse_relationship(r_obj* relationship, struct r_lazy call); static inline void parse_filter(r_obj* filter, r_ssize n_cols, enum vctrs_filter* v_filters); static r_obj* expand_compact_indices(const int* v_o_haystack, struct r_dyn_array* p_loc_first_match_o_haystack, struct r_dyn_array* p_size_match, struct r_dyn_array* p_loc_needles, bool skip_size_match, bool skip_loc_needles, const struct vctrs_incomplete* incomplete, const struct vctrs_no_match* no_match, const struct vctrs_remaining* remaining, enum vctrs_multiple multiple, enum vctrs_relationship relationship, r_ssize size_needles, r_ssize size_haystack, bool any_non_equi, bool has_loc_filter_match_o_haystack, const enum vctrs_filter* v_filters, const int* v_loc_filter_match_o_haystack, const struct poly_df_data* p_haystack, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy error_call); static r_obj* compute_nesting_container_info(r_obj* haystack, r_ssize size_haystack, const enum vctrs_ops* v_ops); static r_obj* compute_nesting_container_ids(r_obj* x, const int* v_order, const int* v_group_sizes, const int* v_outer_group_sizes, r_ssize size, r_ssize n_groups, bool has_outer_group_sizes); static inline bool p_nesting_container_df_compare_fully_ge_na_equal(const void* x, r_ssize i, const void* y, r_ssize j); static inline int p_matches_df_compare_na_equal(const void* x, r_ssize i, const void* y, r_ssize j, const enum vctrs_filter* v_filters); static inline bool p_matches_df_equal_na_equal(const void* x, r_ssize i, const void* y, r_ssize j, const enum vctrs_filter* v_filters); static inline r_ssize midpoint(r_ssize lhs, r_ssize rhs); static inline void stop_matches_overflow(double size, struct r_lazy call); static inline void stop_matches_nothing(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void stop_matches_remaining(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void stop_matches_incomplete(r_ssize i, struct vctrs_arg* needles_arg, struct r_lazy call); static inline void stop_matches_multiple(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void warn_matches_multiple(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void stop_matches_relationship_one_to_one(r_ssize i, const char* which, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void stop_matches_relationship_one_to_many(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void stop_matches_relationship_many_to_one(r_ssize i, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); static inline void warn_matches_relationship_many_to_many(r_ssize i, r_ssize j, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call); vctrs/src/decl/ptype2-dispatch-decl.h0000644000176200001440000000004714315060310017201 0ustar liggesusersstatic r_obj* syms_vec_ptype2_default; vctrs/src/decl/interval-decl.h0000644000176200001440000000225614315060310016011 0ustar liggesusers// Initialized at load time struct vctrs_arg args_start_; static struct vctrs_arg* const args_start = &args_start_; struct vctrs_arg args_end_; static struct vctrs_arg* const args_end = &args_end_; struct vctrs_arg args_lower_; static struct vctrs_arg* const args_lower = &args_lower_; struct vctrs_arg args_upper_; static struct vctrs_arg* const args_upper = &args_upper_; static r_obj* vec_interval_group_info(r_obj* start, r_obj* end, bool abutting, enum vctrs_interval_missing missing, bool locations); static r_obj* vec_interval_complement(r_obj* start, r_obj* end, r_obj* lower, r_obj* upper); static r_obj* vec_interval_locate_containers(r_obj* start, r_obj* end); static inline r_obj* interval_order(r_obj* start, r_obj* end, r_obj* direction, r_obj* na_value, r_ssize size); static inline enum vctrs_interval_missing parse_missing(r_obj* missing); vctrs/src/decl/subscript-loc-decl.h0000644000176200001440000000631314315060310016754 0ustar liggesusersstatic r_obj* lgl_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts); static r_obj* int_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts); static r_obj* int_invert_location(r_obj* subscript, r_ssize n, const struct location_opts* opts); static r_obj* int_filter_zero(r_obj* subscript, r_ssize n_zero); static r_obj* int_filter_missing(r_obj* subscript, r_ssize n_missing); static r_obj* int_filter_oob(r_obj* subscript, r_ssize n, r_ssize n_oob); static void int_check_consecutive(r_obj* subscript, r_ssize n, r_ssize n_extend, const struct location_opts* opts); static r_obj* dbl_as_location(r_obj* subscript, r_ssize n, const struct location_opts* opts); static r_obj* chr_as_location(r_obj* subscript, r_obj* names, const struct location_opts* opts); static void stop_subscript_missing(r_obj* i, const struct location_opts* opts); static void stop_subscript_empty(r_obj* i, const struct location_opts* opts); static void stop_subscript_oob_location(r_obj* i, r_ssize size, const struct location_opts* opts); static void stop_subscript_negative_oob_location(r_obj* i, r_ssize size, const struct location_opts* opts); static void stop_subscript_oob_name(r_obj* i, r_obj* names, const struct location_opts* opts); static void stop_location_negative(r_obj* i, const struct location_opts* opts); static void stop_location_zero(r_obj* i, const struct location_opts* opts); static void stop_indicator_size(r_obj* i, r_obj* n, const struct location_opts* opts); static void stop_location_negative_missing(r_obj* i, const struct location_opts* opts); static void stop_location_negative_positive(r_obj* i, const struct location_opts* opts); static void stop_location_oob_non_consecutive(r_obj* i, r_ssize size, const struct location_opts* opts); static enum subscript_missing parse_subscript_arg_missing(r_obj* x, struct r_lazy call); static enum num_loc_negative parse_loc_negative(r_obj* x, struct r_lazy call); static enum num_loc_oob parse_loc_oob(r_obj* x, struct r_lazy call); static enum num_loc_zero parse_loc_zero(r_obj* x, struct r_lazy call); static void stop_subscript_arg_missing(struct r_lazy call); static void stop_bad_negative(struct r_lazy call); static void stop_bad_oob(struct r_lazy call); static void stop_bad_zero(struct r_lazy call); vctrs/src/decl/list-unchop-decl.h0000644000176200001440000000033115072256373016443 0ustar liggesusersstatic r_obj* list_unchop( r_obj* xs, r_obj* indices, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* name_repair, struct vctrs_arg* p_error_arg, struct r_lazy error_call ); vctrs/src/decl/if-else-decl.h0000644000176200001440000000227615065005761015527 0ustar liggesusersstatic r_obj* generic_if_else( r_obj* condition, r_obj* true_, r_obj* false_, r_obj* missing, r_obj* ptype, r_ssize size, struct vctrs_arg* p_true_arg, struct vctrs_arg* p_false_arg, struct vctrs_arg* p_missing_arg, struct r_lazy error_call ); static r_obj* atomic_if_else( r_obj* condition, r_obj* true_, r_obj* false_, r_obj* missing, r_obj* ptype, r_ssize size, struct vctrs_arg* p_true_arg, struct vctrs_arg* p_false_arg, struct vctrs_arg* p_missing_arg, struct r_lazy error_call, bool has_missing ); static r_obj* atomic_if_else_switch( enum r_type type, r_obj* condition, r_obj* true_, r_obj* false_, r_obj* missing, r_ssize size, r_ssize true_size, r_ssize false_size, r_ssize missing_size, r_obj* true_names, r_obj* false_names, r_obj* missing_names, bool has_missing, bool has_true_names, bool has_false_names, bool has_missing_names ); static bool ptype_is_atomic(r_obj* ptype); static r_obj* ptype_finalize( r_obj* ptype, r_obj* true_, r_obj* false_, r_obj* missing, bool has_missing, struct vctrs_arg* p_true_arg, struct vctrs_arg* p_false_arg, struct vctrs_arg* p_missing_arg, struct r_lazy error_call ); vctrs/src/decl/unspecified-decl.h0000644000176200001440000000037315132161317016470 0ustar liggesusersstatic SEXP syms_vec_ptype_finalise_dispatch; static SEXP fns_vec_ptype_finalise_dispatch; static r_obj* vec_ptype_finalise_unspecified(r_obj* x); static r_obj* vec_ptype_finalise_dispatch(r_obj* x); static inline bool lgl_is_unspecified(SEXP x); vctrs/src/decl/rlang-dev-decl.h0000644000176200001440000000000014315060310016025 0ustar liggesusersvctrs/src/decl/rank-decl.h0000644000176200001440000000230014315060310015106 0ustar liggesusersstatic inline enum ties parse_ties(r_obj* ties); static inline enum incomplete parse_incomplete(r_obj* incomplete); static inline bool r_lgl_all(r_obj* x); static r_obj* vec_rank(r_obj* x, enum ties ties_type, enum incomplete incomplete_type, r_obj* direction, r_obj* na_value, bool nan_distinct, r_obj* chr_proxy_collate); static void vec_rank_min(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank); static void vec_rank_max(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank); static void vec_rank_sequential(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank); static void vec_rank_dense(const int* v_order, const int* v_group_sizes, r_ssize n_groups, int* v_rank); vctrs/src/decl/cast-decl.h0000644000176200001440000000043714315060310015116 0ustar liggesusersstatic r_obj* vec_cast_switch_native(const struct cast_opts* opts, enum vctrs_type x_type, enum vctrs_type to_type, bool* lossy); static r_obj* vec_cast_dispatch_s3(const struct cast_opts* opts); vctrs/src/decl/ptype-decl.h0000644000176200001440000000062015113335375015334 0ustar liggesusersstatic SEXP syms_vec_ptype; static inline r_obj* vec_ptype_slice(r_obj* x, r_obj* empty); static r_obj* df_ptype(r_obj* x, bool bare); static r_obj* col_ptype(r_obj* x); static r_obj* s3_ptype(r_obj* x, struct vctrs_arg* x_arg, struct r_lazy call); static inline r_obj* vec_ptype_method(r_obj* x); static inline r_obj* vec_ptype_invoke(r_obj* x, r_obj* method); vctrs/src/decl/type-integer64-decl.h0000644000176200001440000000022614276722575016776 0ustar liggesusersstatic inline void int64_unpack(int64_t x, r_ssize i, double* v_left, double* v_right); static inline int64_t int64_pack(double left, double right); vctrs/src/decl/recode-decl.h0000644000176200001440000000123315065005761015434 0ustar liggesusersstatic r_obj* build_indices_for_to_as_list_of_vectors( r_obj* x, r_obj* from_flat, r_obj* from_flat_map, r_ssize x_size, r_ssize from_size ); static r_obj* build_xs_and_indices_for_to_as_vector( r_obj* x, r_obj* from_flat, r_obj* to, r_ssize x_size ); static r_obj* build_indices_for_single_to(r_obj* x, r_obj* from_flat); static r_obj* build_from_flat_map(r_obj* from, r_ssize from_size); static r_obj* build_repeated_to(r_obj* to, r_obj* from); static r_obj* ptype_finalize( r_obj* ptype, r_obj* to, r_obj* default_, bool to_as_list_of_vectors, struct vctrs_arg* p_to_arg, struct vctrs_arg* p_default_arg, struct r_lazy call ); vctrs/src/decl/parallel-decl.h0000644000176200001440000000276715113325071015775 0ustar liggesusersstatic r_obj* vec_parallel( r_obj* xs, enum vec_parallel_missing missing, r_ssize size, struct vctrs_arg* p_xs_arg, struct r_lazy error_call, enum vec_parallel_variant parallel ); static inline void vec_parallel_init(const int* v_x, enum vec_parallel_missing missing, r_ssize size, int* v_out); static inline void vec_parallel_init_missing_as_na(const int* v_x, r_ssize size, int* v_out); static inline void vec_parallel_init_missing_as_false(const int* v_x, r_ssize size, int* v_out); static inline void vec_parallel_init_missing_as_true(const int* v_x, r_ssize size, int* v_out); static inline void vec_pany_fill(const int* v_x, enum vec_parallel_missing missing, r_ssize size, int* v_out); static inline void vec_pall_fill(const int* v_x, enum vec_parallel_missing missing, r_ssize size, int* v_out); static inline void vec_pany_fill_missing_as_na(const int* v_x, r_ssize size, int* v_out); static inline void vec_pall_fill_missing_as_na(const int* v_x, r_ssize size, int* v_out); static inline void vec_pany_fill_missing_as_false(const int* v_x, r_ssize size, int* v_out); static inline void vec_pall_fill_missing_as_false(const int* v_x, r_ssize size, int* v_out); static inline void vec_pany_fill_missing_as_true(const int* v_x, r_ssize size, int* v_out); static inline void vec_pall_fill_missing_as_true(const int* v_x, r_ssize size, int* v_out); static enum vec_parallel_missing parse_vec_parallel_missing(r_obj* missing, struct r_lazy error_call); static r_ssize compute_size(r_ssize size, r_obj* xs); vctrs/src/decl/match-joint-decl.h0000644000176200001440000000053014350637775016423 0ustar liggesusersstatic inline r_obj* vec_joint_proxy_order(r_obj* x, r_obj* y); static inline r_obj* vec_joint_proxy_order_independent(r_obj* x, r_obj* y); static inline r_obj* vec_joint_proxy_order_dependent(r_obj* x, r_obj* y); static inline r_obj* vec_joint_proxy_order_s3(r_obj* x, r_obj* y); static inline r_obj* df_joint_proxy_order(r_obj* x, r_obj* y); vctrs/src/decl/slice-interleave-decl.h0000644000176200001440000000031315075743736017440 0ustar liggesusersstatic r_ssize list_interleave_x_size_used(r_obj* const* v_x, r_ssize x_size); static r_obj* list_interleave_indices( r_obj* const* v_x, r_ssize x_size, r_ssize x_size_used, r_ssize elt_size ); vctrs/src/decl/order-decl.h0000644000176200001440000002056215120272011015276 0ustar liggesusersstatic inline bool parse_nan_distinct(SEXP nan_distinct); static SEXP vec_order_info_impl( SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate, bool group_sizes ); static SEXP vec_locate_sorted_groups( SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate ); static inline size_t vec_compute_n_bytes_lazy_raw(SEXP x, const enum vctrs_type type); static inline size_t vec_compute_n_bytes_lazy_counts(SEXP x, const enum vctrs_type type); static SEXP parse_na_value(SEXP na_value); static SEXP parse_direction(SEXP direction); static SEXP vec_order_expand_args(SEXP x, SEXP decreasing, SEXP na_largest); static SEXP vec_order_compute_na_last(SEXP na_largest, SEXP decreasing); static void vec_order_switch( SEXP x, SEXP decreasing, SEXP na_last, bool nan_distinct, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ); static void df_order( SEXP x, SEXP decreasing, SEXP na_last, bool nan_distinct, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ); static void vec_order_base_switch( SEXP x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ); static void int_order( SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ); static void lgl_order( SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ); static void dbl_order( SEXP x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ); static void cpl_order( SEXP x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ); static void chr_order( SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct group_infos* p_group_infos ); static void int_adjust( const bool decreasing, const bool na_last, const r_ssize size, void* p_x ); static void int_compute_range( const int* p_x, r_ssize size, int* p_x_min, uint32_t* p_range ); static void int_order_counting( const int* p_x, r_ssize size, int x_min, uint32_t range, bool initialized, bool decreasing, bool na_last, int* p_o, int* p_o_aux, struct group_infos* p_group_infos ); static void int_order_insertion( const r_ssize size, uint32_t* p_x, int* p_o, struct group_infos* p_group_infos ); static void int_order_radix( const r_ssize size, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos ); static inline uint32_t int_map_to_uint32(int x); static uint8_t int_compute_skips(const uint32_t* p_x, r_ssize size, bool* p_skips); static void int_order_radix_recurse( const r_ssize size, const uint8_t pass, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos ); static inline uint8_t int_extract_uint32_byte(uint32_t x, uint8_t shift); static void dbl_order_chunk_impl( bool decreasing, bool na_last, bool nan_distinct, r_ssize size, void* p_x, int* p_o, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ); static void dbl_order_impl( const double* p_x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, bool copy, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ); static void dbl_adjust( const bool decreasing, const bool na_last, const bool nan_distinct, const r_ssize size, void* p_x ); static void dbl_order_insertion( const r_ssize size, uint64_t* p_x, int* p_o, struct group_infos* p_group_infos ); static void dbl_order_radix( const r_ssize size, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos ); static inline void dbl_adjust_nan_identical( const bool decreasing, const bool na_last, const r_ssize size, double* p_x_dbl, uint64_t* p_x_u64 ); static inline void dbl_adjust_nan_distinct( const bool decreasing, const bool na_last, const r_ssize size, double* p_x_dbl, uint64_t* p_x_u64 ); static inline uint64_t dbl_map_to_uint64(double x); static inline uint64_t dbl_flip_uint64(uint64_t x); static uint8_t dbl_compute_skips(const uint64_t* p_x, r_ssize size, bool* p_skips); static void dbl_order_radix_recurse( const r_ssize size, const uint8_t pass, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos ); static inline uint8_t dbl_extract_uint64_byte(uint64_t x, uint8_t shift); static struct r_ssize_int_pair chr_extract_without_missings( r_ssize size, const SEXP* p_x, const char** p_x_strings ); static void chr_handle_missings( r_ssize size, r_ssize n_missing, const bool na_last, const SEXP* p_x, int* p_o, int* p_o_aux ); static void chr_order_radix( const r_ssize size, const bool decreasing, const int max_string_size, const char** p_x, int* p_o, const char** p_x_aux, int* p_o_aux, uint8_t* p_bytes, struct group_infos* p_group_infos ); static void chr_order_radix_recurse( const r_ssize size, const bool decreasing, const int pass, const int max_string_size, const char** p_x, int* p_o, const char** p_x_aux, int* p_o_aux, uint8_t* p_bytes, struct group_infos* p_group_infos ); static void chr_order_insertion( const r_ssize size, const bool decreasing, const char** p_x, int* p_o, struct group_infos* p_group_infos ); static inline bool chr_all_same( const char** p_x, const r_ssize size ); static inline bool chr_all_same_byte( const char** p_x, const r_ssize size ); static inline bool str_ge( const char* x, const char* y, const int direction ); static void vec_order_chunk_switch( bool decreasing, bool na_last, bool nan_distinct, r_ssize size, const enum vctrs_type type, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ); static inline size_t df_compute_n_bytes_lazy_raw(SEXP x); static size_t df_compute_n_bytes_lazy_counts(SEXP x); static SEXP df_expand_args(SEXP x, SEXP args); static SEXP expand_arg(SEXP arg, const int* p_expansions, r_ssize arg_size, r_ssize size); static int vec_decreasing_expansion(SEXP x); static int df_decreasing_expansion(SEXP x); static int parse_na_value_one(SEXP x); static int parse_direction_one(SEXP x); vctrs/src/decl/group-decl.h0000644000176200001440000000051515047425317015334 0ustar liggesusersstatic inline void vctrs_group_id_loop(struct dictionary* d, R_len_t n, int* p_out); static inline R_len_t vctrs_group_rle_loop(struct dictionary* d, R_len_t n, int* p_g, int* p_l); static inline SEXP new_group_rle(SEXP g, SEXP l, R_len_t n); static inline void vec_group_loc_loop(struct dictionary* d, R_len_t n, int* p_groups); vctrs/src/decl/poly-op-decl.h0000644000176200001440000000103415047425317015574 0ustar liggesusersstatic void init_nil_poly_vec(struct poly_vec* p_poly_vec); static void init_lgl_poly_vec(struct poly_vec* p_poly_vec); static void init_int_poly_vec(struct poly_vec* p_poly_vec); static void init_dbl_poly_vec(struct poly_vec* p_poly_vec); static void init_cpl_poly_vec(struct poly_vec* p_poly_vec); static void init_chr_poly_vec(struct poly_vec* p_poly_vec); static void init_raw_poly_vec(struct poly_vec* p_poly_vec); static void init_list_poly_vec(struct poly_vec* p_poly_vec); static void init_df_poly_vec(struct poly_vec* p_poly_vec); vctrs/src/decl/hash-decl.h0000644000176200001440000000355215156537736015141 0ustar liggesusersstatic inline uint32_t sexp_hash(r_obj* x); static inline uint32_t lgl_hash(r_obj* x); static inline uint32_t int_hash(r_obj* x); static inline uint32_t dbl_hash(r_obj* x); static inline uint32_t cpl_hash(r_obj* x); static inline uint32_t raw_hash(r_obj* x); static inline uint32_t chr_hash(r_obj* x); static inline uint32_t list_hash(r_obj* x); static inline uint32_t expr_hash(r_obj* x); static inline uint32_t node_hash(r_obj* x); static inline uint32_t fn_hash(r_obj* x); static inline uint32_t attrib_hash(r_obj* x); static inline r_obj* attrib_hash_cb(r_obj* tag, r_obj* value, void* data); static inline void lgl_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out); static inline void int_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out); static inline void dbl_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out); static inline void cpl_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out); static inline void chr_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out); static inline void raw_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out); static inline void list_hash_fill_na_equal(r_obj* x, r_ssize size, uint32_t* v_out); static inline void lgl_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out); static inline void int_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out); static inline void dbl_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out); static inline void cpl_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out); static inline void chr_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out); static inline void raw_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out); static inline void list_hash_fill_na_propagate(r_obj* x, r_ssize size, uint32_t* v_out); static inline void df_hash_fill(r_obj* x, r_ssize size, bool na_equal, uint32_t* v_out); vctrs/src/decl/bind-decl.h0000644000176200001440000000253715075506244015122 0ustar liggesusersstatic r_obj* vec_rbind(r_obj* xs, r_obj* ptype, r_obj* id, struct name_repair_opts* name_repair, r_obj* name_spec, struct r_lazy error_call); static r_obj* as_df_row(r_obj* x, struct name_repair_opts* name_repair, struct r_lazy error_call); static r_obj* as_df_row_impl(r_obj* x, struct name_repair_opts* name_repair, struct r_lazy error_call); static struct name_repair_opts validate_bind_name_repair(r_obj* name_repair, bool allow_minimal); static r_obj* vec_cbind(r_obj* xs, r_obj* ptype, r_obj* size, struct name_repair_opts* name_repair, struct r_lazy error_call); static r_obj* cbind_names_to(bool has_names, r_obj* names_to, r_obj* ptype, struct r_lazy error_call); static r_obj* as_df_col(r_obj* x, r_obj* outer, bool* allow_pack, struct r_lazy error_call); static r_obj* cbind_container_type(r_obj* x, void* data); static r_obj* syms_vec_cbind_frame_ptype; static r_obj* fns_vec_cbind_frame_ptype; static r_obj* shaped_as_df_col(r_obj* x, r_obj* outer); static r_obj* vec_as_df_col(r_obj* x, r_obj* outer); vctrs/src/decl/case-when-decl.h0000644000176200001440000000007615072256373016056 0ustar liggesusersstatic r_ssize compute_size(r_ssize size, r_obj* conditions); vctrs/src/decl/slice-decl.h0000644000176200001440000000006515113325071015265 0ustar liggesusersr_obj* vec_slice_altrep(r_obj* x, r_obj* subscript); vctrs/src/decl/ptype-common-decl.h0000644000176200001440000000024014315060310016603 0ustar liggesusersstatic r_obj* ptype2_common(r_obj* current, r_obj* next, struct counters* counters, void* data); vctrs/src/decl/slice-assign-decl.h0000644000176200001440000000256015057563011016556 0ustar liggesusersstatic r_obj* syms_vec_assign_fallback; static r_obj* fns_vec_assign_fallback; static r_obj* vec_assign_fallback( r_obj* x, r_obj* index, r_obj* value, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ); static r_obj* vec_proxy_assign_names( r_obj* proxy, r_obj* index, r_obj* value_proxy, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ); static r_obj* lgl_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ); static r_obj* int_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ); static r_obj* dbl_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ); static r_obj* cpl_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ); static r_obj* raw_assign( r_obj* x, r_obj* index, r_obj* value, enum vctrs_ownership ownership, enum assignment_slice_value slice_value, enum vctrs_index_style index_style ); vctrs/src/decl/shape-decl.h0000644000176200001440000000075615065005761015304 0ustar liggesusersstatic inline r_obj* vec_shape2( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg ); static inline r_obj* dims_shape(r_obj* dimensions); static inline r_obj* dims_shape2( r_obj* x_dimensions, r_obj* y_dimensions, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg ); static inline int dim2( int x_dimension, int y_dimension, int axis, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg ); vctrs/src/decl/slice-chop-decl.h0000644000176200001440000000140214402367170016216 0ustar liggesusersstatic r_obj* vec_chop_base(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices); static r_obj* chop(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices); static r_obj* chop_shaped(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices); static r_obj* chop_df(r_obj* x, struct vctrs_proxy_info info, struct vctrs_chop_indices* p_indices); static r_obj* chop_fallback(r_obj* x, struct vctrs_chop_indices* p_indices); static r_obj* chop_fallback_shaped(r_obj* x, struct vctrs_chop_indices* p_indices); static r_obj* vec_as_chop_sizes(r_obj* sizes, r_ssize size); vctrs/src/decl/equal-decl.h0000644000176200001440000000067115157004241015301 0ustar liggesusersstatic inline bool obj_vec_equal(r_obj* x, r_obj* y, enum r_type type); static inline bool obj_expr_equal(r_obj* x, r_obj* y); static inline bool obj_node_equal(r_obj* x, r_obj* y); static inline bool obj_fn_equal(r_obj* x, r_obj* y); static inline bool obj_attrib_equal(r_obj* x, r_obj* y); static r_obj* obj_attrib_equal_cb(r_obj* tag, r_obj* value, void* data); static r_obj* obj_attrib_count_cb(r_obj* _tag, r_obj* _value, void* data); vctrs/src/decl/runs-decl.h0000644000176200001440000000466714364250244015177 0ustar liggesusersstatic r_obj* vec_detect_run_bounds(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call); static r_obj* vec_locate_run_bounds(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call); static struct r_vector_bool* vec_detect_run_bounds_bool(r_obj* x, enum vctrs_run_bound which, struct r_lazy error_call); static inline void lgl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void int_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void dbl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void cpl_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void chr_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void raw_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void list_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void df_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void lgl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void int_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void dbl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void cpl_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void chr_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void raw_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline void list_col_detect_run_bounds_bool(r_obj* x, r_ssize size, enum vctrs_run_bound which, bool* v_out); static inline r_ssize compute_iter_loc(r_ssize size, enum vctrs_run_bound which); static inline r_ssize compute_iter_step(enum vctrs_run_bound which); static inline enum vctrs_run_bound as_run_bound(r_obj* which, struct r_lazy error_call); vctrs/src/decl/set-decl.h0000644000176200001440000000143115047425317014771 0ustar liggesusersstatic inline void vec_set_intersect_loop( struct dictionary* x_dict, struct dictionary* y_dict, r_ssize x_size, r_ssize y_size, bool* v_marked ); static inline void vec_set_difference_loop( struct dictionary* x_dict, struct dictionary* y_dict, r_ssize x_size, r_ssize y_size, bool* v_marked ); static inline r_ssize vec_set_union_x_loop( struct dictionary* x_dict, r_ssize x_size, bool* v_marked ); static inline r_ssize vec_set_union_y_loop( struct dictionary* x_dict, struct dictionary* y_dict, r_ssize x_size, r_ssize y_size, bool* v_marked ); static inline struct r_ssize_pair vec_set_symmetric_difference_loop( struct dictionary* x_dict, struct dictionary* y_dict, r_ssize x_size, r_ssize y_size, bool* v_x_marked, bool* v_y_marked ); vctrs/src/decl/type-info-decl.h0000644000176200001440000000010014373202700016066 0ustar liggesusersstatic enum vctrs_type vec_base_typeof(r_obj* x, bool proxied); vctrs/src/decl/size-common-decl.h0000644000176200001440000000014115075743736016444 0ustar liggesusersstatic r_obj* size2_common( r_obj* x, r_obj* y, struct counters* counters, void* data ); vctrs/src/decl/proxy-restore-decl.h0000644000176200001440000000021315056611175017034 0ustar liggesusersstatic r_obj* syms_vec_restore_dispatch; static r_obj* fns_vec_restore_dispatch; static r_obj* vec_restore_dispatch(r_obj* x, r_obj* to); vctrs/src/decl/ptype2-decl.h0000644000176200001440000000067515120513137015420 0ustar liggesusersstatic r_obj* vec_ptype2_impl( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback, int* left, bool first_pass ); static r_obj* vec_ptype2_switch_native( r_obj* x, r_obj* y, enum vctrs_type x_type, enum vctrs_type y_type, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback, int* left ); vctrs/src/decl/arg-decl.h0000644000176200001440000000100114315060310014721 0ustar liggesusersstatic int fill_arg_buffer(struct vctrs_arg* arg, char* buf, r_ssize cur_size, r_ssize tot_size); static r_ssize counter_arg_fill(void* data, char* buf, r_ssize remaining); static r_ssize wrapper_arg_fill(void* data, char* buf, r_ssize remaining); static r_ssize lazy_arg_fill(void* data, char* buf, r_ssize remaining); static r_ssize subscript_arg_fill(void* p_data, char* buf, r_ssize remaining); static bool is_empty_arg(struct vctrs_arg* arg); vctrs/src/decl/empty-decl.h0000644000176200001440000000005114315060310015312 0ustar liggesusersstatic r_obj* list_drop_empty(r_obj* x); vctrs/src/decl/typeof2-s3-decl.h0000644000176200001440000000033714315060310016076 0ustar liggesusersstatic enum vctrs_type2_s3 vec_typeof2_s3_impl2(SEXP x, SEXP y, enum vctrs_type type_y, int* left); vctrs/src/decl/expand-decl.h0000644000176200001440000000007614362266120015453 0ustar liggesusersstatic inline enum vctrs_expand_vary parse_vary(r_obj* vary); vctrs/src/decl/list-combine-decl.h0000644000176200001440000000714015113325071016554 0ustar liggesusersstatic r_obj* list_combine_impl( r_obj* xs, bool has_indices, r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, bool has_default, r_obj* default_, enum list_combine_unmatched unmatched, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_indices_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call, enum s3_fallback s3_fallback ); static bool needs_list_combine_common_class_fallback(r_obj* ptype); static r_obj* list_combine_common_class_fallback( r_obj* xs, bool has_indices, r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, bool has_default, r_obj* default_, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_indices_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call ); static bool needs_list_combine_homogeneous_fallback( r_obj* xs, bool has_default, r_obj* default_, r_obj* ptype ); static r_obj* list_combine_homogeneous_fallback( r_obj* xs, bool has_indices, r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, bool has_default, r_obj* default_, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* name_spec, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call ); static bool list_all_have_class(r_obj* xs, r_obj* class); static bool obj_has_class(r_obj* x, r_obj* class); static r_obj* base_list_combine_fallback( r_obj* xs, bool has_indices, r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, bool has_default, r_obj* default_, enum list_combine_multiple multiple, enum assignment_slice_value slice_xs, r_obj* name_spec, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call ); static r_obj* base_c_invoke( r_obj* xs, r_obj* name_spec, struct r_lazy error_call ); static void stop_name_spec_in_fallback(r_obj* xs, struct r_lazy error_call); static r_obj* build_fallback_index(r_obj* indices, r_ssize size, struct r_lazy error_call); static r_obj* vec_recycle_xs_fallback( r_obj* xs, r_obj* indices, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ); static r_obj* vec_slice_xs_fallback( r_obj* xs, r_obj* indices, r_ssize size, struct vctrs_arg* p_xs_arg, struct r_lazy error_call ); static r_obj* list_location_to_location_indices(r_obj* indices); static r_obj* list_condition_to_location_indices(r_obj* indices); static bool vec_implements_base_c(r_obj* x); static bool class_implements_base_c(r_obj* cls); static enum vctrs_index_style compute_indices_style(r_obj* indices, r_ssize size); static void check_any_unmatched( r_obj* indices, enum vctrs_index_style indices_style, r_ssize size, struct r_lazy error_call ); static void stop_combine_unmatched(r_obj* loc, struct r_lazy error_call); static r_obj* compute_default_index( r_obj* indices, enum vctrs_index_style indices_style, r_ssize size ); static r_obj* push_default( r_obj* xs, r_obj* default_ ); static r_obj* push_default_index( r_obj* indices, r_obj* default_index ); static r_obj* ptype_common_with_default( r_obj* ptype, r_obj* xs, bool has_default, r_obj* default_, struct vctrs_arg* p_xs_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call, enum s3_fallback s3_fallback ); vctrs/src/decl/size-decl.h0000644000176200001440000000004715113325071015140 0ustar liggesusersstatic r_ssize vec_raw_size(r_obj* x); vctrs/src/decl/utils-dispatch-decl.h0000644000176200001440000000025214416313204017121 0ustar liggesusersenum vctrs_class_type class_type(r_obj* x); static enum vctrs_class_type class_type_impl(r_obj* cls); static const char* class_type_as_str(enum vctrs_class_type type); vctrs/src/decl/proxy-decl.h0000644000176200001440000000203215065005761015352 0ustar liggesusersr_obj* syms_vec_proxy; r_obj* syms_vec_proxy_equal; r_obj* syms_vec_proxy_equal_array; r_obj* syms_vec_proxy_compare; r_obj* syms_vec_proxy_compare_array; r_obj* syms_vec_proxy_order; r_obj* syms_vec_proxy_order_array; r_obj* fns_vec_proxy_equal_array; r_obj* fns_vec_proxy_compare_array; r_obj* fns_vec_proxy_order_array; static inline r_obj* vec_proxy_2(r_obj* x, bool recurse); static inline r_obj* vec_proxy_equal_impl(r_obj* x); static inline r_obj* vec_proxy_compare_impl(r_obj* x); static inline r_obj* vec_proxy_order_impl(r_obj* x); static inline r_obj* vec_proxy_equal_method(r_obj* x); static inline r_obj* vec_proxy_equal_invoke(r_obj* x, r_obj* method); static inline r_obj* vec_proxy_compare_method(r_obj* x); static inline r_obj* vec_proxy_compare_invoke(r_obj* x, r_obj* method); static inline r_obj* vec_proxy_order_method(r_obj* x); static inline r_obj* vec_proxy_order_invoke(r_obj* x, r_obj* method); static inline r_obj* df_proxy(r_obj* x, enum vctrs_proxy_kind kind); static inline r_obj* df_proxy_recurse(r_obj* x); vctrs/src/shape.c0000644000176200001440000001330515157322033013451 0ustar liggesusers#include "vctrs.h" #include "decl/shape-decl.h" r_obj* ffi_vec_shaped_ptype(r_obj* ptype, r_obj* x, r_obj* y, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); return vec_shaped_ptype(ptype, x, y, &x_arg, &y_arg); } r_obj* vec_shaped_ptype( r_obj* ptype, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg ) { r_obj* shape = vec_shape2(x, y, p_x_arg, p_y_arg); if (shape == r_null) { return ptype; } // Only `KEEP()` if we have to KEEP(shape); ptype = KEEP(r_clone_referenced(ptype)); r_attrib_poke_dim(ptype, shape); FREE(2); return ptype; } // ----------------------------------------------------------------------------- r_obj* ffi_vec_shape2(r_obj* x, r_obj* y, r_obj* frame) { struct r_lazy x_arg_ = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_); struct r_lazy y_arg_ = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_); return vec_shape2(x, y, &x_arg, &y_arg); } // This function is fairly important for performance, because `vec_shaped_ptype()` // is called on every ptype2 iteration. This example runs roughly 15% faster if // we completely avoid any `KEEP()` and `FREE()` calls in the happy path, which // justifies the somewhat ugly code flow. // // ```r // x <- as.list(1:1e6) // vec_ptype_common(!!!x) // ``` // // rchk asserts that `r_dim()` creates a potentially fresh variable, even though // we are mostly confident it does not static inline r_obj* vec_shape2( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg ) { r_obj* x_dimensions = r_dim(x); if (x_dimensions == r_null) { r_obj* y_dimensions = r_dim(y); if (y_dimensions == r_null) { return r_null; } else { KEEP(y_dimensions); r_obj* out = dims_shape(y_dimensions); FREE(1); return out; } } else { KEEP(x_dimensions); r_obj* y_dimensions = r_dim(y); if (y_dimensions == r_null) { r_obj* out = dims_shape(x_dimensions); FREE(1); return out; } else { KEEP(y_dimensions); r_obj* out = dims_shape2(x_dimensions, y_dimensions, x, y, p_x_arg, p_y_arg); FREE(2); return out; } } } // ----------------------------------------------------------------------------- // Sets the first axis to zero static inline r_obj* dims_shape(r_obj* dimensions) { if (r_length(dimensions) == 0) { r_stop_internal("`dimensions` must have length."); } if (r_typeof(dimensions) != R_TYPE_integer) { r_stop_internal("`dimensions` must be an integer vector."); } if (r_int_get(dimensions, 0) == 0) { // Already a shape, no clone required return dimensions; } dimensions = KEEP(r_clone_referenced(dimensions)); r_int_begin(dimensions)[0] = 0; FREE(1); return dimensions; } // ----------------------------------------------------------------------------- /* * Returns a dimensions vector where the first dimension length is forcibly set * to 0, and the rest are the common shape of `x` and `y`. */ static inline r_obj* dims_shape2( r_obj* x_dimensions, r_obj* y_dimensions, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg ) { r_ssize x_dimensionality = r_length(x_dimensions); r_ssize y_dimensionality = r_length(y_dimensions); r_obj* max_dimensions; r_ssize max_dimensionality; r_ssize min_dimensionality; if (x_dimensionality >= y_dimensionality) { max_dimensions = x_dimensions; max_dimensionality = x_dimensionality; min_dimensionality = y_dimensionality; } else { max_dimensions = y_dimensions; max_dimensionality = y_dimensionality; min_dimensionality = x_dimensionality; } // Sanity check, should never be true if (max_dimensionality == 0) { r_stop_internal("`max_dimensionality` must have length."); } const int* p_x_dimensions = r_int_cbegin(x_dimensions); const int* p_y_dimensions = r_int_cbegin(y_dimensions); const int* p_max_dimensions = r_int_cbegin(max_dimensions); r_obj* out = KEEP(r_alloc_integer(max_dimensionality)); int* p_out = r_int_begin(out); // Set the first axis to zero p_out[0] = 0; // Start loop at the second axis r_ssize i = 1; for (; i < min_dimensionality; ++i) { const int axis = i + 1; const int x_dimension = p_x_dimensions[i]; const int y_dimension = p_y_dimensions[i]; p_out[i] = dim2(x_dimension, y_dimension, axis, x, y, p_x_arg, p_y_arg); } for (; i < max_dimensionality; ++i) { p_out[i] = p_max_dimensions[i]; } FREE(1); return out; } static inline int dim2( int x_dimension, int y_dimension, int axis, r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg ) { if (x_dimension == y_dimension) { return x_dimension; } else if (x_dimension == 1) { return y_dimension; } else if (y_dimension == 1) { return x_dimension; } else { stop_incompatible_shape(x, y, x_dimension, y_dimension, axis, p_x_arg, p_y_arg); } } // ----------------------------------------------------------------------------- r_obj* vec_shape_broadcast( r_obj* x, r_obj* to, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_to_arg, struct r_lazy call ) { r_obj* ffi_x_arg = KEEP(vctrs_arg(p_x_arg)); r_obj* ffi_to_arg = KEEP(vctrs_arg(p_to_arg)); r_obj* ffi_call = KEEP(r_lazy_eval(call)); r_obj* out = vctrs_eval_mask5( r_sym("shape_broadcast"), r_syms.x, x, r_sym("to"), to, syms.x_arg, ffi_x_arg, syms.to_arg, ffi_to_arg, r_syms.call, ffi_call ); FREE(3); return out; } vctrs/src/proxy-restore.c0000644000176200001440000002123315157273651015225 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/proxy-restore-decl.h" r_obj* ffi_vec_restore(r_obj* x, r_obj* to) { // Never own user objects, so foreign ownership. // Hooked to `vec_restore()`, which is called after an R level non-recursive // `vec_proxy()`, so not recursively proxied. struct vec_restore_opts opts = { .ownership = VCTRS_OWNERSHIP_foreign, .recursively_proxied = false }; return vec_restore_opts(x, to, &opts); } // Exposed for testing r_obj* ffi_vec_restore_recurse(r_obj* x, r_obj* to) { // Never own user objects, but we are restoring recursively here for testing purposes struct vec_restore_opts opts = { .ownership = VCTRS_OWNERSHIP_foreign, .recursively_proxied = true }; return vec_restore_opts(x, to, &opts); } // Restoration with full options // // This gives you the ability to perform a recursive restore, which is needed // when `vec_proxy_recurse()` is used, like in `vec_c()` and `vec_rbind()`. In // that case you also specify `VCTRS_OWNERSHIP_deep` to indicate full ownership // to avoid copies (of data frame columns in particular) during the restoration // process. r_obj* vec_restore_opts( r_obj* x, r_obj* to, const struct vec_restore_opts* p_opts ) { enum vctrs_class_type to_type = class_type(to); switch (to_type) { case VCTRS_CLASS_bare_factor: case VCTRS_CLASS_bare_ordered: case VCTRS_CLASS_none: return vec_restore_default(x, to, p_opts->ownership); case VCTRS_CLASS_bare_date: return vec_date_restore(x, to, p_opts->ownership); case VCTRS_CLASS_bare_posixct: return vec_posixct_restore(x, to, p_opts->ownership); case VCTRS_CLASS_bare_posixlt: return vec_posixlt_restore(x, to, p_opts->ownership); case VCTRS_CLASS_bare_data_frame: case VCTRS_CLASS_bare_tibble: return vec_bare_df_restore(x, to, p_opts); case VCTRS_CLASS_data_frame: return vec_df_restore(x, to, p_opts); default: if (p_opts->recursively_proxied && is_data_frame(x)) { return vec_df_restore(x, to, p_opts); } else { return vec_restore_dispatch(x, to); } } } static r_obj* vec_restore_dispatch(r_obj* x, r_obj* to) { return vctrs_dispatch2(syms_vec_restore_dispatch, fns_vec_restore_dispatch, syms_x, x, syms_to, to); } struct vec_restore_collect_data { r_obj* names; r_obj* dim; r_obj* dim_names; r_obj* row_names; }; static r_obj* vec_restore_collect_cb(r_obj* tag, r_obj* value, void* data) { struct vec_restore_collect_data* p_data = (struct vec_restore_collect_data*) data; if (tag == r_syms.names) { p_data->names = value; return NULL; } if (tag == r_syms.dim) { p_data->dim = value; return NULL; } if (tag == r_syms.dim_names) { p_data->dim_names = value; return NULL; } if (tag == r_syms.row_names) { p_data->row_names = value; return NULL; } return NULL; } // Default algorithm to restore `x` to the type of `to` // // 4 attributes from `x` are retained: // - `names` // - `dim` // - `dimnames` // - `row.names` // // All other `x` attributes are cleared and are replaced with `to`'s attributes. // // Duplicates `x` as needed according to `ownership`. Recursive ownership is // never useful here, because we only touch the container, not the elements inside. r_obj* vec_restore_default(r_obj* x, r_obj* to, enum vctrs_ownership ownership) { if (x == to) { // Rare but useful pointer comparison return x; } if (!r_attrib_has_any(x) && !r_attrib_has_any(to)) { // No one has attributes (nothing to clear, nothing to add). // Don't need to worry about OBJECT/S4 bit, you'd always have at least one attribute. return x; } // Note: Could this be `R_tryWrap()` one day? For backwards compatibility with // R < 4.6, maybe we would have our own ALTREP wrapper class implementation? // https://github.com/wch/r-source/commit/84293ec070c219b9ad2df44ae84d3f0f58d5ce7c x = KEEP(vec_clone_referenced(x, ownership)); // In one pass, collect attributes we want to retain from `x` struct vec_restore_collect_data data = { .names = r_null, .dim = r_null, .dim_names = r_null, .row_names = r_null }; r_attrib_map(x, vec_restore_collect_cb, &data); r_obj* names = KEEP(data.names); r_obj* dim = KEEP(data.dim); r_obj* dim_names = KEEP(data.dim_names); r_obj* row_names = KEEP(data.row_names); if (dim == r_null) { // This is a vector, clear any `dim` or `dim_names` dim_names = r_null; } else { // This is an array, clear any `names` or `row_names` names = r_null; row_names = r_null; } // We don't actually retain row names if `to` isn't a data frame if (row_names != r_null && !is_data_frame(to)) { row_names = r_null; } // Copy over all attributes from `to` // // Uses `SHALLOW_DUPLICATE_ATTRIB()`. Notably: // - Shallow clones attribute pairlist from `to` // - `SET_ATTRIB()` for efficiency and avoids // `Rf_getAttrib()`/`Rf_setAttrib()` funny business // - `SET_OBJECT()` // - `SET_S4_OBJECT()` / `UNSET_S4_OBJECT()` r_attrib_clone_from(x, to); // Retain specific attributes from `x` // // We must set all 4! If `to` had `names` but `x` does not, then `names` will // be `r_null` and will clear the `names` brought over by // `r_attrib_clone_from()`. r_attrib_poke(x, r_syms.names, names); r_attrib_poke(x, r_syms.dim, dim); r_attrib_poke(x, r_syms.dim_names, dim_names); r_attrib_poke(x, r_syms.row_names, row_names); FREE(5); return x; } r_obj* ffi_vec_restore_default(r_obj* x, r_obj* to) { // Never own user objects return vec_restore_default(x, to, VCTRS_OWNERSHIP_foreign); } r_obj* vec_df_restore(r_obj* x, r_obj* to, const struct vec_restore_opts* p_opts) { r_obj* out = KEEP(vec_bare_df_restore(x, to, p_opts)); out = vec_restore_dispatch(out, to); FREE(1); return out; } r_obj* vec_bare_df_restore(r_obj* x, r_obj* to, const struct vec_restore_opts* p_opts) { if (r_typeof(x) != R_TYPE_list) { r_stop_internal("Attempt to restore data frame from a %s.", r_type_as_c_string(r_typeof(x))); } int n_prot = 0; if (!is_data_frame(to)) { to = KEEP_N(vec_proxy(to), &n_prot); if (!is_data_frame(to)) { r_stop_internal("Expected restoration target to have a df proxy."); } } if (p_opts->recursively_proxied) { r_ssize n_cols = r_length(x); if (n_cols != r_length(to)) { r_stop_internal("Shape of `x` doesn't match `to` in recursive df restoration."); }; r_obj* const * v_x = r_list_cbegin(x); r_obj* const * v_to = r_list_cbegin(to); // During restoration, if we have deep ownership over `x` we can // propagate that ownership to the columns, otherwise we have no // known ownership over the columns enum vctrs_ownership col_ownership; switch (p_opts->ownership) { case VCTRS_OWNERSHIP_foreign: col_ownership = VCTRS_OWNERSHIP_foreign; break; case VCTRS_OWNERSHIP_shallow: col_ownership = VCTRS_OWNERSHIP_foreign; break; case VCTRS_OWNERSHIP_deep: col_ownership = VCTRS_OWNERSHIP_deep; break; default: r_stop_unreachable(); } const struct vec_restore_opts col_opts = { .ownership = col_ownership, .recursively_proxied = p_opts->recursively_proxied }; for (r_ssize i = 0; i < n_cols; ++i) { r_obj* x_restored = vec_restore_opts(v_x[i], v_to[i], &col_opts); r_list_poke(x, i, x_restored); } } x = KEEP(vec_restore_default(x, to, p_opts->ownership)); if (r_attrib_get(x, r_syms.names) == r_null) { r_obj* names = KEEP(r_alloc_character(r_length(x))); r_attrib_poke(x, r_syms.names, names); FREE(1); } r_obj* rownames = KEEP(df_rownames(x)); if (rownames == r_null) { r_ssize size = df_raw_size(x); init_compact_rownames(x, size); } else if (rownames_type(rownames) == ROWNAMES_TYPE_identifiers) { rownames = KEEP(vec_as_names(rownames, p_unique_repair_silent_opts)); x = vec_proxy_set_names(x, rownames, p_opts->ownership); FREE(1); } FREE(2); FREE(n_prot); return x; } // Mapped to `vec_restore.data.frame()`, which is called after non-recursive R // level `vec_proxy()`, so we don't need recursion here r_obj* ffi_vec_bare_df_restore(r_obj* x, r_obj* to) { // Never own user objects const struct vec_restore_opts opts = { .ownership = VCTRS_OWNERSHIP_foreign, .recursively_proxied = false }; return vec_bare_df_restore(x, to, &opts); } void vctrs_init_proxy_restore(r_obj* ns) { syms_vec_restore_dispatch = r_sym("vec_restore_dispatch"); fns_vec_restore_dispatch = r_eval(syms_vec_restore_dispatch, ns); } static r_obj* syms_vec_restore_dispatch = NULL; static r_obj* fns_vec_restore_dispatch = NULL; vctrs/src/assert.h0000644000176200001440000001142015120272011013641 0ustar liggesusers#ifndef VCTRS_ASSERT_H #define VCTRS_ASSERT_H #include "vctrs-core.h" #include "conditions.h" #include "dim.h" #include "size.h" #include "utils-dispatch.h" // ---------------------------------------------------------------------- /** * Whether or not `NULL` values are allowed */ enum vctrs_allow_null { VCTRS_ALLOW_NULL_no = 0, VCTRS_ALLOW_NULL_yes = 1 }; static inline bool allow_null_as_bool(enum vctrs_allow_null allow_null) { return (bool) allow_null; } static inline enum vctrs_allow_null arg_as_allow_null(r_obj* x, const char* arg) { return r_arg_as_bool(x, arg) ? VCTRS_ALLOW_NULL_yes : VCTRS_ALLOW_NULL_no; } // ---------------------------------------------------------------------- // Vector check static inline bool obj_is_vector(r_obj* x, enum vctrs_allow_null allow_null) { if (x == r_null) { return allow_null_as_bool(allow_null); } struct vctrs_proxy_info info = vec_proxy_info(x); return info.type != VCTRS_TYPE_scalar; } static inline void obj_check_vector( r_obj* x, enum vctrs_allow_null allow_null, struct vctrs_arg* p_x_arg, struct r_lazy call ) { if (!obj_is_vector(x, allow_null)) { stop_scalar_type(x, p_x_arg, call); } } bool list_all_vectors( r_obj* xs, enum vctrs_allow_null allow_null ); void list_check_all_vectors( r_obj* xs, enum vctrs_allow_null allow_null, struct vctrs_arg* p_xs_arg, struct r_lazy call ); // ---------------------------------------------------------------------- // Size check static inline bool vec_is_size( r_obj* x, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_x_arg, struct r_lazy call ) { const r_ssize x_size = vec_size_params(x, p_x_arg, call); if (x_size == size) { return true; } if (allow_null_as_bool(allow_null) && x == r_null) { return true; } return false; } static inline void vec_check_size( r_obj* x, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_x_arg, struct r_lazy call ) { if (!vec_is_size(x, size, allow_null, p_x_arg, call)) { const r_ssize x_size = vec_size_params(x, p_x_arg, call); stop_assert_size(x_size, size, p_x_arg, call); } } bool list_all_size( r_obj* xs, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_xs_arg, struct r_lazy call ); void list_check_all_size( r_obj* xs, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_xs_arg, struct r_lazy call ); // ---------------------------------------------------------------------- // List check r_no_return void stop_non_list_type( r_obj* x, struct vctrs_arg* arg, struct r_lazy call ); static inline bool obj_is_list(r_obj* x) { // Require `x` to be a list internally if (r_typeof(x) != R_TYPE_list) { return false; } // List arrays are not lists for vctrs purposes. We have pretty deep // assumptions that if an object is a list, then `r_length(x) == vec_size(x)`. // See `list_drop_empty()` and `list_combine()` for examples of // implementations that would be broken if this wasn't true. if (has_dim(x)) { return false; } // Unclassed R_TYPE_list are lists if (!r_is_object(x)) { return true; } const enum vctrs_class_type type = class_type(x); // Classed R_TYPE_list are only lists if the last class is explicitly `"list"` // or if it is a bare "AsIs" type return (type == VCTRS_CLASS_list) || (type == VCTRS_CLASS_bare_asis); } static inline void obj_check_list( r_obj* x, struct vctrs_arg* p_x_arg, struct r_lazy call ) { if (!obj_is_list(x)) { stop_non_list_type(x, p_x_arg, call); } } // ---------------------------------------------------------------------- // Recyclable check static inline bool vec_is_recyclable( r_obj* x, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_x_arg, struct r_lazy call ) { const r_ssize x_size = vec_size_params(x, p_x_arg, call); if (x_size == size || x_size == 1) { return true; } if (allow_null_as_bool(allow_null) && x == r_null) { return true; } return false; } static inline r_ssize vec_check_recyclable( r_obj* x, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_x_arg, struct r_lazy call ) { const r_ssize x_size = vec_size_params(x, p_x_arg, call); if (x_size == size || x_size == 1) { return x_size; } // It is up to the caller to be prepared to handle this! if (allow_null_as_bool(allow_null) && x == r_null) { return 0; } stop_recycle_incompatible_size(x_size, size, p_x_arg, call); } bool list_all_recyclable( r_obj* xs, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_xs_arg, struct r_lazy call ); void list_check_all_recyclable( r_obj* xs, r_ssize size, enum vctrs_allow_null allow_null, struct vctrs_arg* p_xs_arg, struct r_lazy call ); #endif vctrs/src/type-integer64.c0000644000176200001440000001010514315060310015122 0ustar liggesusers#include "vctrs.h" #include "decl/type-integer64-decl.h" #define r_na_llong LLONG_MIN static const char* v_integer64_proxy_df_names_c_strings[] = { "left", "right" }; static const enum r_type v_integer64_proxy_df_types[] = { R_TYPE_double, R_TYPE_double }; enum integer64_proxy_df_locs { INTEGER64_PROXY_DF_LOCS_left, INTEGER64_PROXY_DF_LOCS_right }; #define INTEGER64_PROXY_DF_SIZE R_ARR_SIZEOF(v_integer64_proxy_df_types) // [[ register() ]] r_obj* vctrs_integer64_proxy(r_obj* x) { if (r_typeof(x) != R_TYPE_double) { r_stop_internal("`x` must be a double."); } if (r_attrib_get(x, R_DimSymbol) != r_null) { r_stop_internal("`x` should not have a `dim` attribute."); } r_ssize size = r_length(x); // Casting `const double*` to `const long long*` is UB, but we are mimicking // what bit64 is doing, so if this ever breaks it means that bit64 is broken. const long long* v_x = (const long long*) r_dbl_cbegin(x); r_obj* nms = KEEP(r_chr_n( v_integer64_proxy_df_names_c_strings, INTEGER64_PROXY_DF_SIZE )); r_obj* out = KEEP(r_alloc_df_list( size, nms, v_integer64_proxy_df_types, INTEGER64_PROXY_DF_SIZE )); r_init_data_frame(out, size); r_obj* left = r_list_get(out, INTEGER64_PROXY_DF_LOCS_left); r_obj* right = r_list_get(out, INTEGER64_PROXY_DF_LOCS_right); double* v_left = r_dbl_begin(left); double* v_right = r_dbl_begin(right); for (r_ssize i = 0; i < size; ++i) { const long long elt = v_x[i]; if (elt == r_na_llong) { v_left[i] = r_globals.na_dbl; v_right[i] = r_globals.na_dbl; continue; } const int64_t elt_i64 = (int64_t) elt; int64_unpack(elt_i64, i, v_left, v_right); } FREE(2); return out; } // [[ register() ]] r_obj* vctrs_integer64_restore(r_obj* x) { if (!is_data_frame(x)) { r_stop_internal("`x` must be a data frame."); } if (r_length(x) != 2) { r_stop_internal("`x` must have two columns."); } r_obj* left = r_list_get(x, INTEGER64_PROXY_DF_LOCS_left); r_obj* right = r_list_get(x, INTEGER64_PROXY_DF_LOCS_right); const double* v_left = r_dbl_cbegin(left); const double* v_right = r_dbl_cbegin(right); r_ssize size = r_length(left); r_obj* out = KEEP(r_alloc_double(size)); // See above comment about UB in this cast long long* v_out = (long long*) r_dbl_begin(out); r_attrib_poke_class(out, r_chr("integer64")); for (r_ssize i = 0; i < size; ++i) { const double left = v_left[i]; const double right = v_right[i]; if (isnan(left)) { v_out[i] = r_na_llong; continue; } v_out[i] = (long long) int64_pack(left, right); } FREE(1); return out; } // ----------------------------------------------------------------------------- /* * This pair of functions facilitates: * - Splitting an `int64_t` into two `uint32_t` values, maintaining order * - Combining those two `uint32_t` values back into the original `int32_t` * * The two `uint32_t` values are stored in two doubles. This allows us to store * it in a two column data frame that vctrs knows how to work with, and we can * use the standard `NA_real_` as the missing value without fear of conflicting * with any other valid `int64_t` value. * * Unsigned 32-bit integers are used because bit shifting is undefined on signed * types. * * An arithmetic shift of `- INT64_MIN` is done to remap the int64_t value * into uint64_t space, while maintaining order. This relies on unsigned * arithmetic overflow behavior, which is well-defined. */ static inline void int64_unpack(int64_t x, r_ssize i, double* v_left, double* v_right) { const uint64_t x_u64 = ((uint64_t) x) - INT64_MIN; const uint32_t left_u32 = (uint32_t) (x_u64 >> 32); const uint32_t right_u32 = (uint32_t) x_u64; v_left[i] = (double) left_u32; v_right[i] = (double) right_u32; } static inline int64_t int64_pack(double left, double right) { const uint32_t left_u32 = (uint32_t) left; const uint32_t right_u32 = (uint32_t) right; const uint64_t out_u64 = ((uint64_t) left_u32) << 32 | right_u32; const int64_t out = (int64_t) (out_u64 + INT64_MIN); return out; } vctrs/src/slice-array.c0000644000176200001440000004642615056611175014604 0ustar liggesusers#include "vctrs.h" #define SLICE_SHAPED_INDEX(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ CTYPE* out_data = DEREF(out); \ const CTYPE* x_data = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_data) { \ const int step = p_info->p_steps[j]; \ \ if (step == NA_INTEGER) { \ *out_data = NA_VALUE; \ continue; \ } \ \ loc += step; \ *out_data = x_data[loc]; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_SHAPED_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ CTYPE* out_data = DEREF(out); \ \ int size_index = p_info->p_index[0]; \ if (size_index == NA_INTEGER) { \ R_len_t out_n = p_info->shape_elem_n * p_info->index_n; \ for (R_len_t i = 0; i < out_n; ++i, ++out_data) { \ *out_data = NA_VALUE; \ } \ UNPROTECT(2); \ return(out); \ } \ \ const CTYPE* x_data = CONST_DEREF(x); \ \ /* Convert to C index */ \ size_index = size_index - 1; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += size_index; \ const CTYPE elt_x_data = x_data[loc]; \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_data) { \ *out_data = elt_x_data; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_SHAPED_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF) \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ CTYPE* out_data = DEREF(out); \ \ R_len_t start = p_info->p_index[0]; \ R_len_t n = p_info->p_index[1]; \ R_len_t step = p_info->p_index[2]; \ \ const CTYPE* x_data = CONST_DEREF(x); \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += start; \ \ for (R_len_t j = 0; j < n; ++j, ++out_data, loc += step) { \ *out_data = x_data[loc]; \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_SHAPED(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ if (is_compact_rep(index)) { \ SLICE_SHAPED_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } else if (is_compact_seq(index)) { \ SLICE_SHAPED_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF); \ } else { \ SLICE_SHAPED_INDEX(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } static SEXP lgl_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(LGLSXP, int, LOGICAL, LOGICAL_RO, NA_LOGICAL); } static SEXP int_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(INTSXP, int, INTEGER, INTEGER_RO, NA_INTEGER); } static SEXP dbl_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(REALSXP, double, REAL, REAL_RO, NA_REAL); } static SEXP cpl_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(CPLXSXP, Rcomplex, COMPLEX, COMPLEX_RO, vctrs_shared_na_cpl); } static SEXP raw_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_SHAPED(RAWSXP, Rbyte, RAW, RAW_RO, 0); } #undef SLICE_SHAPED #undef SLICE_SHAPED_COMPACT_REP #undef SLICE_SHAPED_COMPACT_SEQ #undef SLICE_SHAPED_INDEX #define SLICE_BARRIER_SHAPED_INDEX(RTYPE, CTYPE, CONST_DEREF, SET, NA_VALUE) \ const CTYPE* x_data = CONST_DEREF(x); \ \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ \ R_len_t out_loc = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_loc) { \ const int step = p_info->p_steps[j]; \ \ if (step == NA_INTEGER) { \ SET(out, out_loc, NA_VALUE); \ continue; \ } \ \ loc += step; \ SEXP elt = x_data[loc]; \ SET(out, out_loc, elt); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_BARRIER_SHAPED_COMPACT_REP(RTYPE, CTYPE, CONST_DEREF, SET, NA_VALUE) \ const CTYPE* x_data = CONST_DEREF(x); \ \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ \ int size_index = p_info->p_index[0]; \ if (size_index == NA_INTEGER) { \ R_len_t out_n = p_info->shape_elem_n * p_info->index_n; \ for (R_len_t i = 0; i < out_n; ++i) { \ SET(out, i, NA_VALUE); \ } \ UNPROTECT(2); \ return(out); \ } \ \ R_len_t out_loc = 0; \ \ /* Convert to C index */ \ size_index = size_index - 1; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += size_index; \ const SEXP elt_x_data = x_data[loc]; \ \ for (R_len_t j = 0; j < p_info->index_n; ++j, ++out_loc) { \ SET(out, out_loc, elt_x_data); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_BARRIER_SHAPED_COMPACT_SEQ(RTYPE, CTYPE, CONST_DEREF, SET) \ const CTYPE* x_data = CONST_DEREF(x); \ \ SEXP out_dim = PROTECT(Rf_shallow_duplicate(p_info->dim)); \ INTEGER(out_dim)[0] = p_info->index_n; \ \ SEXP out = PROTECT(Rf_allocArray(RTYPE, out_dim)); \ \ R_len_t start = p_info->p_index[0]; \ R_len_t n = p_info->p_index[1]; \ R_len_t step = p_info->p_index[2]; \ \ R_len_t out_loc = 0; \ \ for (R_len_t i = 0; i < p_info->shape_elem_n; ++i) { \ R_len_t loc = vec_strided_loc( \ p_info->p_shape_index, \ p_info->p_strides, \ p_info->shape_n \ ); \ \ loc += start; \ \ for (R_len_t j = 0; j < n; ++j, ++out_loc, loc += step) { \ SEXP elt = x_data[loc]; \ SET(out, out_loc, elt); \ } \ \ vec_shape_index_increment(p_info); \ } \ \ UNPROTECT(2); \ return out #define SLICE_BARRIER_SHAPED(RTYPE, CTYPE, CONST_DEREF, SET, NA_VALUE) \ if (is_compact_rep(index)) { \ SLICE_BARRIER_SHAPED_COMPACT_REP(RTYPE, CTYPE, CONST_DEREF, SET, NA_VALUE); \ } else if (is_compact_seq(index)) { \ SLICE_BARRIER_SHAPED_COMPACT_SEQ(RTYPE, CTYPE, CONST_DEREF, SET); \ } else { \ SLICE_BARRIER_SHAPED_INDEX(RTYPE, CTYPE, CONST_DEREF, SET, NA_VALUE); \ } static SEXP chr_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_BARRIER_SHAPED(STRSXP, SEXP, STRING_PTR_RO, SET_STRING_ELT, NA_STRING); } static SEXP list_slice_shaped(SEXP x, SEXP index, struct strides_info* p_info) { SLICE_BARRIER_SHAPED(VECSXP, SEXP, VECTOR_PTR_RO, SET_VECTOR_ELT, R_NilValue); } #undef SLICE_BARRIER_SHAPED #undef SLICE_BARRIER_SHAPED_COMPACT_REP #undef SLICE_BARRIER_SHAPED_COMPACT_SEQ #undef SLICE_BARRIER_SHAPED_INDEX SEXP vec_slice_shaped_base(enum vctrs_type type, SEXP x, SEXP index, struct strides_info* p_info) { switch (type) { case VCTRS_TYPE_logical: return lgl_slice_shaped(x, index, p_info); case VCTRS_TYPE_integer: return int_slice_shaped(x, index, p_info); case VCTRS_TYPE_double: return dbl_slice_shaped(x, index, p_info); case VCTRS_TYPE_complex: return cpl_slice_shaped(x, index, p_info); case VCTRS_TYPE_character: return chr_slice_shaped(x, index, p_info); case VCTRS_TYPE_raw: return raw_slice_shaped(x, index, p_info); case VCTRS_TYPE_list: return list_slice_shaped(x, index, p_info); default: stop_unimplemented_vctrs_type("vec_slice_shaped_base", type); } } SEXP vec_slice_shaped(enum vctrs_type type, SEXP x, SEXP index) { int n_protect = 0; struct strides_info info = new_strides_info(x, index); PROTECT_STRIDES_INFO(&info, &n_protect); SEXP out = vec_slice_shaped_base(type, x, index, &info); UNPROTECT(n_protect); return out; } vctrs/src/set.c0000644000176200001440000007216315156537555013173 0ustar liggesusers#include "vctrs.h" struct r_ssize_pair { r_ssize x; r_ssize y; }; #include "decl/set-decl.h" // ----------------------------------------------------------------------------- r_obj* ffi_vec_set_intersect(r_obj* x, r_obj* y, r_obj* ptype, r_obj* frame) { struct r_lazy call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy y_arg_lazy = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_lazy); return vec_set_intersect(x, y, ptype, &x_arg, &y_arg, call); } r_obj* vec_set_intersect(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call) { int n_prot = 0; if (ptype == r_null) { int _; ptype = vec_ptype2( x, y, x_arg, y_arg, call, S3_FALLBACK_false, &_ ); KEEP_N(ptype, &n_prot); ptype = vec_ptype_finalise(ptype); KEEP_N(ptype, &n_prot); } x = vec_cast_params( x, ptype, x_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(x, &n_prot); y = vec_cast_params( y, ptype, y_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(y, &n_prot); r_obj* x_proxy = KEEP_N(vec_proxy_equal(x), &n_prot); x_proxy = KEEP_N(obj_encode_utf8(x_proxy), &n_prot); r_obj* y_proxy = KEEP_N(vec_proxy_equal(y), &n_prot); y_proxy = KEEP_N(obj_encode_utf8(y_proxy), &n_prot); const r_ssize x_size = vec_size(x_proxy); const r_ssize y_size = vec_size(y_proxy); struct dictionary* x_dict = new_dictionary(x_proxy); PROTECT_DICT(x_dict, &n_prot); struct dictionary* y_dict = new_dictionary_partial(y_proxy); PROTECT_DICT(y_dict, &n_prot); r_obj* marked_shelter = KEEP_N(r_alloc_raw(x_size * sizeof(bool)), &n_prot); bool* v_marked = (bool*) r_raw_begin(marked_shelter); r_memset(v_marked, 0, x_size * sizeof(bool)); vec_set_intersect_loop(x_dict, y_dict, x_size, y_size, v_marked); r_ssize n_marked = 0; for (r_ssize i = 0; i < x_size; ++i) { n_marked += v_marked[i]; } r_obj* loc = KEEP_N(r_alloc_integer(n_marked), &n_prot); int* v_loc = r_int_begin(loc); r_ssize j = 0; for (r_ssize i = 0; i < x_size; ++i) { if (v_marked[i]) { v_loc[j] = i + 1; ++j; } } r_obj* out = vec_slice_unsafe(x, loc); FREE(n_prot); return out; } #define VEC_SET_INTERSECT_LOOP(DICT_HASH_SCALAR, DICT_HASH_WITH) \ do { \ /* Load dictionary with `x`. */ \ /* Key values point to first time we saw that `x` value. */ \ for (r_ssize i = 0; i < x_size; ++i) { \ const uint32_t hash = DICT_HASH_SCALAR(x_dict, i); \ \ if (x_dict->key[hash] == DICT_EMPTY) { \ dict_put(x_dict, hash, i); \ } \ } \ \ /* Mark unique elements of `x` that are also in `y` */ \ for (r_ssize i = 0; i < y_size; ++i) { \ const uint32_t hash = DICT_HASH_WITH(x_dict, y_dict, i); \ const r_ssize loc = x_dict->key[hash]; \ \ if (loc != DICT_EMPTY) { \ v_marked[loc] = true; \ } \ } \ } \ while (0) static inline void vec_set_intersect_loop( struct dictionary* x_dict, struct dictionary* y_dict, r_ssize x_size, r_ssize y_size, bool* v_marked ) { switch (x_dict->p_poly_vec->type) { case VCTRS_TYPE_null: VEC_SET_INTERSECT_LOOP(nil_dict_hash_scalar, nil_dict_hash_with); break; case VCTRS_TYPE_logical: VEC_SET_INTERSECT_LOOP(lgl_dict_hash_scalar, lgl_dict_hash_with); break; case VCTRS_TYPE_integer: VEC_SET_INTERSECT_LOOP(int_dict_hash_scalar, int_dict_hash_with); break; case VCTRS_TYPE_double: VEC_SET_INTERSECT_LOOP(dbl_dict_hash_scalar, dbl_dict_hash_with); break; case VCTRS_TYPE_complex: VEC_SET_INTERSECT_LOOP(cpl_dict_hash_scalar, cpl_dict_hash_with); break; case VCTRS_TYPE_character: VEC_SET_INTERSECT_LOOP(chr_dict_hash_scalar, chr_dict_hash_with); break; case VCTRS_TYPE_raw: VEC_SET_INTERSECT_LOOP(raw_dict_hash_scalar, raw_dict_hash_with); break; case VCTRS_TYPE_list: VEC_SET_INTERSECT_LOOP(list_dict_hash_scalar, list_dict_hash_with); break; case VCTRS_TYPE_dataframe: VEC_SET_INTERSECT_LOOP(df_dict_hash_scalar, df_dict_hash_with); break; default: stop_unimplemented_vctrs_type("vec_set_intersect_loop", x_dict->p_poly_vec->type); } } #undef VEC_SET_INTERSECT_LOOP // ----------------------------------------------------------------------------- r_obj* ffi_vec_set_difference(r_obj* x, r_obj* y, r_obj* ptype, r_obj* frame) { struct r_lazy call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy y_arg_lazy = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_lazy); return vec_set_difference(x, y, ptype, &x_arg, &y_arg, call); } r_obj* vec_set_difference(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call) { int n_prot = 0; if (ptype == r_null) { int _; ptype = vec_ptype2( x, y, x_arg, y_arg, call, S3_FALLBACK_false, &_ ); KEEP_N(ptype, &n_prot); ptype = vec_ptype_finalise(ptype); KEEP_N(ptype, &n_prot); } x = vec_cast_params( x, ptype, x_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(x, &n_prot); y = vec_cast_params( y, ptype, y_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(y, &n_prot); r_obj* x_proxy = KEEP_N(vec_proxy_equal(x), &n_prot); x_proxy = KEEP_N(obj_encode_utf8(x_proxy), &n_prot); r_obj* y_proxy = KEEP_N(vec_proxy_equal(y), &n_prot); y_proxy = KEEP_N(obj_encode_utf8(y_proxy), &n_prot); const r_ssize x_size = vec_size(x_proxy); const r_ssize y_size = vec_size(y_proxy); struct dictionary* x_dict = new_dictionary(x_proxy); PROTECT_DICT(x_dict, &n_prot); struct dictionary* y_dict = new_dictionary_partial(y_proxy); PROTECT_DICT(y_dict, &n_prot); r_obj* marked_shelter = KEEP_N(r_alloc_raw(x_size * sizeof(bool)), &n_prot); bool* v_marked = (bool*) r_raw_begin(marked_shelter); vec_set_difference_loop(x_dict, y_dict, x_size, y_size, v_marked); r_ssize n_marked = 0; for (r_ssize i = 0; i < x_size; ++i) { n_marked += v_marked[i]; } r_obj* loc = KEEP_N(r_alloc_integer(n_marked), &n_prot); int* v_loc = r_int_begin(loc); r_ssize j = 0; for (r_ssize i = 0; i < x_size; ++i) { if (v_marked[i]) { v_loc[j] = i + 1; ++j; } } r_obj* out = vec_slice_unsafe(x, loc); FREE(n_prot); return out; } #define VEC_SET_DIFFERENCE_LOOP(DICT_HASH_SCALAR, DICT_HASH_WITH) \ do { \ /* Load dictionary with `x`. */ \ /* Key values point to first time we saw that `x` value. */ \ /* Mark those first seen locations as potential results. */ \ for (r_ssize i = 0; i < x_size; ++i) { \ const uint32_t hash = DICT_HASH_SCALAR(x_dict, i); \ const bool first_time = x_dict->key[hash] == DICT_EMPTY; \ \ if (first_time) { \ dict_put(x_dict, hash, i); \ } \ \ v_marked[i] = first_time; \ } \ \ /* If we've seen the `y` element in `x`, unmark it */ \ for (r_ssize i = 0; i < y_size; ++i) { \ const uint32_t hash = DICT_HASH_WITH(x_dict, y_dict, i); \ const r_ssize loc = x_dict->key[hash]; \ \ if (loc != DICT_EMPTY) { \ v_marked[loc] = false; \ } \ } \ } \ while (0) static inline void vec_set_difference_loop( struct dictionary* x_dict, struct dictionary* y_dict, r_ssize x_size, r_ssize y_size, bool* v_marked ) { switch (x_dict->p_poly_vec->type) { case VCTRS_TYPE_null: VEC_SET_DIFFERENCE_LOOP(nil_dict_hash_scalar, nil_dict_hash_with); break; case VCTRS_TYPE_logical: VEC_SET_DIFFERENCE_LOOP(lgl_dict_hash_scalar, lgl_dict_hash_with); break; case VCTRS_TYPE_integer: VEC_SET_DIFFERENCE_LOOP(int_dict_hash_scalar, int_dict_hash_with); break; case VCTRS_TYPE_double: VEC_SET_DIFFERENCE_LOOP(dbl_dict_hash_scalar, dbl_dict_hash_with); break; case VCTRS_TYPE_complex: VEC_SET_DIFFERENCE_LOOP(cpl_dict_hash_scalar, cpl_dict_hash_with); break; case VCTRS_TYPE_character: VEC_SET_DIFFERENCE_LOOP(chr_dict_hash_scalar, chr_dict_hash_with); break; case VCTRS_TYPE_raw: VEC_SET_DIFFERENCE_LOOP(raw_dict_hash_scalar, raw_dict_hash_with); break; case VCTRS_TYPE_list: VEC_SET_DIFFERENCE_LOOP(list_dict_hash_scalar, list_dict_hash_with); break; case VCTRS_TYPE_dataframe: VEC_SET_DIFFERENCE_LOOP(df_dict_hash_scalar, df_dict_hash_with); break; default: stop_unimplemented_vctrs_type("vec_set_difference_loop", x_dict->p_poly_vec->type); } } #undef VEC_SET_DIFFERENCE_LOOP // ----------------------------------------------------------------------------- r_obj* ffi_vec_set_union(r_obj* x, r_obj* y, r_obj* ptype, r_obj* frame) { struct r_lazy call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy y_arg_lazy = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_lazy); return vec_set_union(x, y, ptype, &x_arg, &y_arg, call); } r_obj* vec_set_union(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call) { int n_prot = 0; if (ptype == r_null) { int _; ptype = vec_ptype2( x, y, x_arg, y_arg, call, S3_FALLBACK_false, &_ ); KEEP_N(ptype, &n_prot); ptype = vec_ptype_finalise(ptype); KEEP_N(ptype, &n_prot); } x = vec_cast_params( x, ptype, x_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(x, &n_prot); y = vec_cast_params( y, ptype, y_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(y, &n_prot); r_obj* x_proxy = KEEP_N(vec_proxy_equal(x), &n_prot); x_proxy = KEEP_N(obj_encode_utf8(x_proxy), &n_prot); r_obj* y_proxy = KEEP_N(vec_proxy_equal(y), &n_prot); y_proxy = KEEP_N(obj_encode_utf8(y_proxy), &n_prot); const r_ssize x_size = vec_size(x_proxy); const r_ssize y_size = vec_size(y_proxy); struct dictionary* x_dict = new_dictionary(x_proxy); PROTECT_DICT(x_dict, &n_prot); struct dictionary* y_dict = new_dictionary(y_proxy); PROTECT_DICT(y_dict, &n_prot); r_obj* marked_shelter = KEEP_N(r_alloc_raw(x_size * sizeof(bool)), &n_prot); bool* v_marked = (bool*) r_raw_begin(marked_shelter); const r_ssize n_x_marked = vec_set_union_x_loop( x_dict, x_size, v_marked ); r_obj* loc = KEEP_N(r_alloc_integer(n_x_marked), &n_prot); int* v_loc = r_int_begin(loc); r_ssize j = 0; for (r_ssize i = 0; i < x_size; ++i) { if (v_marked[i]) { v_loc[j] = i + 1; ++j; } } // Go ahead and slice out `x` x = KEEP_N(vec_slice_unsafe(x, loc), &n_prot); // Resize `v_marked` for use with `y` marked_shelter = KEEP_N(r_raw_resize(marked_shelter, y_size * sizeof(bool)), &n_prot); v_marked = (bool*) r_raw_begin(marked_shelter); const r_ssize n_y_marked = vec_set_union_y_loop( x_dict, y_dict, x_size, y_size, v_marked ); loc = KEEP_N(r_int_resize(loc, n_y_marked), &n_prot); v_loc = r_int_begin(loc); j = 0; for (r_ssize i = 0; i < y_size; ++i) { if (v_marked[i]) { v_loc[j] = i + 1; ++j; } } y = KEEP_N(vec_slice_unsafe(y, loc), &n_prot); const struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_none, .fn = r_null }; r_obj* args = KEEP_N(r_alloc_list(2), &n_prot); r_list_poke(args, 0, x); r_list_poke(args, 1, y); r_obj* out = vec_c( args, ptype, r_null, &name_repair_opts, vec_args.empty, r_lazy_null ); FREE(n_prot); return out; } #define VEC_SET_UNION_X_LOOP(DICT_HASH_SCALAR) \ do { \ /* Load dictionary with `x`. */ \ /* Key values point to first time we saw that `x` value. */ \ /* Mark those first seen locations as definite results. */ \ for (r_ssize i = 0; i < x_size; ++i) { \ const uint32_t hash = DICT_HASH_SCALAR(x_dict, i); \ const bool first_time = x_dict->key[hash] == DICT_EMPTY; \ \ if (first_time) { \ dict_put(x_dict, hash, i); \ } \ \ v_marked[i] = first_time; \ } \ \ return x_dict->used; \ } \ while (0) static inline r_ssize vec_set_union_x_loop( struct dictionary* x_dict, r_ssize x_size, bool* v_marked ) { switch (x_dict->p_poly_vec->type) { case VCTRS_TYPE_null: VEC_SET_UNION_X_LOOP(nil_dict_hash_scalar); case VCTRS_TYPE_logical: VEC_SET_UNION_X_LOOP(lgl_dict_hash_scalar); case VCTRS_TYPE_integer: VEC_SET_UNION_X_LOOP(int_dict_hash_scalar); case VCTRS_TYPE_double: VEC_SET_UNION_X_LOOP(dbl_dict_hash_scalar); case VCTRS_TYPE_complex: VEC_SET_UNION_X_LOOP(cpl_dict_hash_scalar); case VCTRS_TYPE_character: VEC_SET_UNION_X_LOOP(chr_dict_hash_scalar); case VCTRS_TYPE_raw: VEC_SET_UNION_X_LOOP(raw_dict_hash_scalar); case VCTRS_TYPE_list: VEC_SET_UNION_X_LOOP(list_dict_hash_scalar); case VCTRS_TYPE_dataframe: VEC_SET_UNION_X_LOOP(df_dict_hash_scalar); default: stop_unimplemented_vctrs_type("vec_set_union_x_loop", x_dict->p_poly_vec->type); } } #undef VEC_SET_UNION_X_LOOP #define VEC_SET_UNION_Y_LOOP(DICT_HASH_SCALAR, DICT_HASH_WITH) \ do { \ /* Load dictionary with `y`. */ \ /* Key values point to first time we saw that `y` value. */ \ /* Mark those first seen locations as possible results. */ \ for (r_ssize i = 0; i < y_size; ++i) { \ const uint32_t hash = DICT_HASH_SCALAR(y_dict, i); \ const bool first_time = y_dict->key[hash] == DICT_EMPTY; \ \ if (first_time) { \ dict_put(y_dict, hash, i); \ } \ \ v_marked[i] = first_time; \ } \ \ r_ssize n_marked = y_dict->used; \ \ /* Check if unique elements of `y` are in `x`. */ \ /* If they are, unmark them. */ \ for (r_ssize i = 0; i < y_size; ++i) { \ if (!v_marked[i]) { \ continue; \ } \ \ const uint32_t hash = DICT_HASH_WITH(x_dict, y_dict, i); \ const bool in_x = x_dict->key[hash] != DICT_EMPTY; \ \ v_marked[i] = !in_x; \ n_marked -= in_x; \ } \ \ return n_marked; \ } \ while (0) static inline r_ssize vec_set_union_y_loop( struct dictionary* x_dict, struct dictionary* y_dict, r_ssize x_size, r_ssize y_size, bool* v_marked ) { switch (x_dict->p_poly_vec->type) { case VCTRS_TYPE_null: VEC_SET_UNION_Y_LOOP(nil_dict_hash_scalar, nil_dict_hash_with); case VCTRS_TYPE_logical: VEC_SET_UNION_Y_LOOP(lgl_dict_hash_scalar, lgl_dict_hash_with); case VCTRS_TYPE_integer: VEC_SET_UNION_Y_LOOP(int_dict_hash_scalar, int_dict_hash_with); case VCTRS_TYPE_double: VEC_SET_UNION_Y_LOOP(dbl_dict_hash_scalar, dbl_dict_hash_with); case VCTRS_TYPE_complex: VEC_SET_UNION_Y_LOOP(cpl_dict_hash_scalar, cpl_dict_hash_with); case VCTRS_TYPE_character: VEC_SET_UNION_Y_LOOP(chr_dict_hash_scalar, chr_dict_hash_with); case VCTRS_TYPE_raw: VEC_SET_UNION_Y_LOOP(raw_dict_hash_scalar, raw_dict_hash_with); case VCTRS_TYPE_list: VEC_SET_UNION_Y_LOOP(list_dict_hash_scalar, list_dict_hash_with); case VCTRS_TYPE_dataframe: VEC_SET_UNION_Y_LOOP(df_dict_hash_scalar, df_dict_hash_with); default: stop_unimplemented_vctrs_type("vec_set_union_y_loop", x_dict->p_poly_vec->type); } } #undef VEC_SET_UNION_Y_LOOP // ----------------------------------------------------------------------------- r_obj* ffi_vec_set_symmetric_difference(r_obj* x, r_obj* y, r_obj* ptype, r_obj* frame) { struct r_lazy call = { .x = r_syms.error_call, .env = frame }; struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy y_arg_lazy = { .x = syms.y_arg, .env = frame }; struct vctrs_arg y_arg = new_lazy_arg(&y_arg_lazy); return vec_set_symmetric_difference(x, y, ptype, &x_arg, &y_arg, call); } r_obj* vec_set_symmetric_difference(r_obj* x, r_obj* y, r_obj* ptype, struct vctrs_arg* x_arg, struct vctrs_arg* y_arg, struct r_lazy call) { int n_prot = 0; if (ptype == r_null) { int _; ptype = vec_ptype2( x, y, x_arg, y_arg, call, S3_FALLBACK_false, &_ ); KEEP_N(ptype, &n_prot); ptype = vec_ptype_finalise(ptype); KEEP_N(ptype, &n_prot); } x = vec_cast_params( x, ptype, x_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(x, &n_prot); y = vec_cast_params( y, ptype, y_arg, vec_args.empty, call, S3_FALLBACK_false ); KEEP_N(y, &n_prot); r_obj* x_proxy = KEEP_N(vec_proxy_equal(x), &n_prot); x_proxy = KEEP_N(obj_encode_utf8(x_proxy), &n_prot); r_obj* y_proxy = KEEP_N(vec_proxy_equal(y), &n_prot); y_proxy = KEEP_N(obj_encode_utf8(y_proxy), &n_prot); const r_ssize x_size = vec_size(x_proxy); const r_ssize y_size = vec_size(y_proxy); struct dictionary* x_dict = new_dictionary(x_proxy); PROTECT_DICT(x_dict, &n_prot); struct dictionary* y_dict = new_dictionary(y_proxy); PROTECT_DICT(y_dict, &n_prot); r_obj* x_marked_shelter = KEEP_N(r_alloc_raw(x_size * sizeof(bool)), &n_prot); bool* v_x_marked = (bool*) r_raw_begin(x_marked_shelter); r_obj* y_marked_shelter = KEEP_N(r_alloc_raw(y_size * sizeof(bool)), &n_prot); bool* v_y_marked = (bool*) r_raw_begin(y_marked_shelter); const struct r_ssize_pair n_marked = vec_set_symmetric_difference_loop( x_dict, y_dict, x_size, y_size, v_x_marked, v_y_marked ); const r_ssize n_x_marked = n_marked.x; const r_ssize n_y_marked = n_marked.y; r_obj* loc = KEEP_N(r_alloc_integer(n_x_marked), &n_prot); int* v_loc = r_int_begin(loc); r_ssize j = 0; for (r_ssize i = 0; i < x_size; ++i) { if (v_x_marked[i]) { v_loc[j] = i + 1; ++j; } } // Slice out `x`, then reuse `loc` for slicing `y` x = KEEP_N(vec_slice_unsafe(x, loc), &n_prot); loc = KEEP_N(r_int_resize(loc, n_y_marked), &n_prot); v_loc = r_int_begin(loc); j = 0; for (r_ssize i = 0; i < y_size; ++i) { if (v_y_marked[i]) { v_loc[j] = i + 1; ++j; } } y = KEEP_N(vec_slice_unsafe(y, loc), &n_prot); const struct name_repair_opts name_repair_opts = { .type = NAME_REPAIR_none, .fn = r_null }; r_obj* args = KEEP_N(r_alloc_list(2), &n_prot); r_list_poke(args, 0, x); r_list_poke(args, 1, y); r_obj* out = vec_c( args, ptype, r_null, &name_repair_opts, vec_args.empty, r_lazy_null ); FREE(n_prot); return out; } #define VEC_SET_SYMMETRIC_DIFFERENCE_LOOP(DICT_HASH_SCALAR, DICT_HASH_WITH) \ do { \ /* Load dictionary with `x`. */ \ /* Key values point to first time we saw that `x` value. */ \ /* Mark those first seen locations as possible results. */ \ for (r_ssize i = 0; i < x_size; ++i) { \ const uint32_t hash = DICT_HASH_SCALAR(x_dict, i); \ const bool first_time = x_dict->key[hash] == DICT_EMPTY; \ \ if (first_time) { \ dict_put(x_dict, hash, i); \ } \ \ v_x_marked[i] = first_time; \ } \ \ /* Load dictionary with `y`. */ \ /* Key values point to first time we saw that `y` value. */ \ /* Mark those first seen locations as possible results. */ \ for (r_ssize i = 0; i < y_size; ++i) { \ const uint32_t hash = DICT_HASH_SCALAR(y_dict, i); \ const bool first_time = y_dict->key[hash] == DICT_EMPTY; \ \ if (first_time) { \ dict_put(y_dict, hash, i); \ } \ \ v_y_marked[i] = first_time; \ } \ \ r_ssize n_x_marked = x_dict->used; \ r_ssize n_y_marked = y_dict->used; \ \ /* Check if unique elements of `y` are in `x`. */ \ /* If they are, unmark them from both `x` and `y`. */ \ for (r_ssize i = 0; i < y_size; ++i) { \ if (!v_y_marked[i]) { \ continue; \ } \ \ const uint32_t hash = DICT_HASH_WITH(x_dict, y_dict, i); \ const r_ssize loc = x_dict->key[hash]; \ const bool in_x = loc != DICT_EMPTY; \ \ if (in_x) { \ v_x_marked[loc] = false; \ v_y_marked[i] = false; \ --n_x_marked; \ --n_y_marked; \ } \ } \ \ struct r_ssize_pair n_marked = { \ .x = n_x_marked, \ .y = n_y_marked \ }; \ \ return n_marked; \ } \ while (0) static inline struct r_ssize_pair vec_set_symmetric_difference_loop( struct dictionary* x_dict, struct dictionary* y_dict, r_ssize x_size, r_ssize y_size, bool* v_x_marked, bool* v_y_marked ) { switch (x_dict->p_poly_vec->type) { case VCTRS_TYPE_null: VEC_SET_SYMMETRIC_DIFFERENCE_LOOP(nil_dict_hash_scalar, nil_dict_hash_with); break; case VCTRS_TYPE_logical: VEC_SET_SYMMETRIC_DIFFERENCE_LOOP(lgl_dict_hash_scalar, lgl_dict_hash_with); break; case VCTRS_TYPE_integer: VEC_SET_SYMMETRIC_DIFFERENCE_LOOP(int_dict_hash_scalar, int_dict_hash_with); break; case VCTRS_TYPE_double: VEC_SET_SYMMETRIC_DIFFERENCE_LOOP(dbl_dict_hash_scalar, dbl_dict_hash_with); break; case VCTRS_TYPE_complex: VEC_SET_SYMMETRIC_DIFFERENCE_LOOP(cpl_dict_hash_scalar, cpl_dict_hash_with); break; case VCTRS_TYPE_character: VEC_SET_SYMMETRIC_DIFFERENCE_LOOP(chr_dict_hash_scalar, chr_dict_hash_with); break; case VCTRS_TYPE_raw: VEC_SET_SYMMETRIC_DIFFERENCE_LOOP(raw_dict_hash_scalar, raw_dict_hash_with); break; case VCTRS_TYPE_list: VEC_SET_SYMMETRIC_DIFFERENCE_LOOP(list_dict_hash_scalar, list_dict_hash_with); break; case VCTRS_TYPE_dataframe: VEC_SET_SYMMETRIC_DIFFERENCE_LOOP(df_dict_hash_scalar, df_dict_hash_with); break; default: stop_unimplemented_vctrs_type("vec_set_symmetric_difference_loop", x_dict->p_poly_vec->type); } } #undef VEC_SET_SYMMETRIC_DIFFERENCE_LOOP vctrs/src/missing.h0000644000176200001440000001116215047425317014035 0ustar liggesusers#ifndef VCTRS_MISSING_H #define VCTRS_MISSING_H #include "vctrs-core.h" #include "poly-op.h" // ----------------------------------------------------------------------------- r_obj* vec_detect_missing(r_obj* x); bool vec_any_missing(r_obj* x); r_ssize vec_first_missing(r_obj* x); // ----------------------------------------------------------------------------- static inline bool lgl_is_missing(int x) { return x == r_globals.na_int; } static inline bool int_is_missing(int x) { return x == r_globals.na_int; } static inline bool dbl_is_missing(double x) { return isnan(x); } static inline bool cpl_is_missing(r_complex x) { return dbl_is_missing(x.r) || dbl_is_missing(x.i); } static inline bool chr_is_missing(r_obj* x) { return x == r_globals.na_str; } static inline bool raw_is_missing(unsigned char x) { return false; } static inline bool list_is_missing(r_obj* x) { return x == r_null; } // ----------------------------------------------------------------------------- #define P_IS_MISSING(CTYPE, IS_MISSING) do { \ return IS_MISSING(((CTYPE const*) p_x)[i]); \ } while (0) static r_no_return inline bool p_nil_is_missing(const void* p_x, r_ssize i) { r_stop_internal("Can't check NULL for missingness."); } static inline bool p_lgl_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(int, lgl_is_missing); } static inline bool p_int_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(int, int_is_missing); } static inline bool p_dbl_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(double, dbl_is_missing); } static inline bool p_cpl_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(r_complex, cpl_is_missing); } static inline bool p_chr_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(r_obj*, chr_is_missing); } static inline bool p_raw_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(unsigned char, raw_is_missing); } static inline bool p_list_is_missing(const void* p_x, r_ssize i) { P_IS_MISSING(r_obj*, list_is_missing); } #undef P_IS_MISSING // No support for df-cols, as they should be flattened static inline bool p_col_is_missing( const void* p_x, r_ssize i, const enum vctrs_type type ) { switch (type) { case VCTRS_TYPE_logical: return p_lgl_is_missing(p_x, i); case VCTRS_TYPE_integer: return p_int_is_missing(p_x, i); case VCTRS_TYPE_double: return p_dbl_is_missing(p_x, i); case VCTRS_TYPE_complex: return p_cpl_is_missing(p_x, i); case VCTRS_TYPE_character: return p_chr_is_missing(p_x, i); case VCTRS_TYPE_raw: return p_raw_is_missing(p_x, i); case VCTRS_TYPE_list: return p_list_is_missing(p_x, i); default: stop_unimplemented_vctrs_type("p_col_is_missing", type); } } static inline bool p_df_is_missing(const void* p_x, r_ssize i) { struct poly_df_data* p_x_data = (struct poly_df_data*) p_x; enum vctrs_type* v_col_type = p_x_data->v_col_type; const void** v_col_ptr = p_x_data->v_col_ptr; r_ssize n_col = p_x_data->n_col; // df-cols should already be flattened for (r_ssize col = 0; col < n_col; ++col) { if (!p_col_is_missing(v_col_ptr[col], i, v_col_type[col])) { return false; } } return true; } // ----------------------------------------------------------------------------- static r_no_return inline bool p_nil_is_incomplete(const void* p_x, r_ssize i) { p_nil_is_missing(p_x, i); } static inline bool p_lgl_is_incomplete(const void* p_x, r_ssize i) { return p_lgl_is_missing(p_x, i); } static inline bool p_int_is_incomplete(const void* p_x, r_ssize i) { return p_int_is_missing(p_x, i); } static inline bool p_dbl_is_incomplete(const void* p_x, r_ssize i) { return p_dbl_is_missing(p_x, i); } static inline bool p_cpl_is_incomplete(const void* p_x, r_ssize i) { return p_cpl_is_missing(p_x, i); } static inline bool p_chr_is_incomplete(const void* p_x, r_ssize i) { return p_chr_is_missing(p_x, i); } static inline bool p_raw_is_incomplete(const void* p_x, r_ssize i) { return p_raw_is_missing(p_x, i); } static inline bool p_list_is_incomplete(const void* p_x, r_ssize i) { return p_list_is_missing(p_x, i); } static inline bool p_df_is_incomplete(const void* p_x, r_ssize i) { struct poly_df_data* p_x_data = (struct poly_df_data*) p_x; enum vctrs_type* v_col_type = p_x_data->v_col_type; const void** v_col_ptr = p_x_data->v_col_ptr; r_ssize n_col = p_x_data->n_col; // df-cols should already be flattened, // so we only need missingness of each column, not completeness for (r_ssize col = 0; col < n_col; ++col) { if (p_col_is_missing(v_col_ptr[col], i, v_col_type[col])) { return true; } } return false; } // ----------------------------------------------------------------------------- #endif vctrs/src/dictionary.c0000644000176200001440000007574615156537555014557 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" #include "decl/dictionary-decl.h" // Initialised at load time struct vctrs_arg args_needles; struct vctrs_arg args_haystack; // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 static inline uint32_t u32_safe_ceil2(uint32_t x) { // Return 2^0 when `x` is 0 x += (x == 0); x--; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; x++; if (x == 0) { // INT32_MAX+2 <= x <= UINT32_MAX (i.e. 2^31+1 <= x <= 2^32-1) would attempt // to ceiling to 2^32, which is 1 greater than `UINT32_MAX`, resulting in // overflow wraparound to 0. r_stop_internal("`x` results in an `uint32_t` overflow."); } return x; } // Dictonary object ------------------------------------------------------------ static struct dictionary* new_dictionary_opts(SEXP x, struct dictionary_opts* opts); // Dictionaries must be protected in consistent stack order with // `PROTECT_DICT()` struct dictionary* new_dictionary(SEXP x) { struct dictionary_opts opts = { .partial = false, .na_equal = true }; return new_dictionary_opts(x, &opts); } struct dictionary* new_dictionary_partial(SEXP x) { struct dictionary_opts opts = { .partial = true, .na_equal = true }; return new_dictionary_opts(x, &opts); } static struct dictionary* new_dictionary_params(SEXP x, bool partial, bool na_equal) { struct dictionary_opts opts; opts.partial = partial; opts.na_equal = na_equal; return new_dictionary_opts(x, &opts); } static struct dictionary* new_dictionary_opts(SEXP x, struct dictionary_opts* opts) { int nprot = 0; SEXP out = PROTECT_N(Rf_allocVector(RAWSXP, sizeof(struct dictionary)), &nprot); struct dictionary* d = (struct dictionary*) RAW(out); d->protect = out; enum vctrs_type type = vec_proxy_typeof(x); struct poly_vec* p_poly_vec = new_poly_vec(x, type); KEEP_N(p_poly_vec->shelter, &nprot); d->p_poly_vec = p_poly_vec; d->used = 0; if (opts->partial) { d->key = NULL; d->size = 0; } else { uint32_t size = dict_key_size(x); d->key = (R_len_t*) R_alloc(size, sizeof(R_len_t)); for (uint32_t i = 0; i < size; ++i) { d->key[i] = DICT_EMPTY; } d->size = size; } R_len_t n = vec_size(x); if (n) { d->hash = (uint32_t*) R_alloc(n, sizeof(uint32_t)); if (!(d->hash)) { Rf_errorcall(R_NilValue, "Can't allocate hash lookup table. Please free memory."); } r_memset(d->hash, 0, n * sizeof(uint32_t)); vec_hash_fill(x, n, opts->na_equal, d->hash); } else { d->hash = NULL; } UNPROTECT(nprot); return d; } // Assume worst case, that every value is distinct, aiming for a load factor // of at most 50%. We round up to power of 2 to ensure quadratic probing // strategy works. Maximum power of 2 we can store in a uint32_t is 2^31, // as 2^32 is 1 greater than the max uint32_t value, so we clamp sizes that // would result in 2^32 to INT32_MAX to ensure that our maximum ceiling value // is only 2^31. This will increase the max load factor above 50% for `x` with // length greater than 1073741824 (2147483648 * .50), but it ensures that // it can run. See https://github.com/r-lib/vctrs/pull/1760 for further // discussion of why 50% was chosen. static inline uint32_t dict_key_size(SEXP x) { const R_len_t x_size = vec_size(x); if (x_size > R_LEN_T_MAX) { // Ensure we catch the switch to supporting long vectors in `vec_size()` r_stop_internal("Dictionary functions do not support long vectors."); } const double load_adjusted_size = x_size / 0.50; if (load_adjusted_size > UINT32_MAX) { r_stop_internal("Can't safely cast load adjusted size to a `uint32_t`."); } uint32_t size = (uint32_t)load_adjusted_size; // Clamp to `INT32_MAX` to avoid overflow in `u32_safe_ceil2()`, // at the cost of an increased maximum load factor for long input size = size > INT32_MAX ? INT32_MAX : size; size = u32_safe_ceil2(size); size = (size < 16) ? 16 : size; if (x_size > size) { // Should never happen with `R_len_t` sizes. // This is a defensive check that will be useful when we support long vectors. r_stop_internal("Hash table size must be at least as large as input to avoid a load factor of >100%."); } // Rprintf("size: %u\n", size); return size; } // R interface ----------------------------------------------------------------- // TODO: rename to match R function names // TODO: separate out into individual files SEXP vctrs_unique_loc(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(obj_encode_utf8(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); struct growable g = new_growable(INTSXP, 256); PROTECT_GROWABLE(&g, &nprot); vctrs_unique_loc_loop(d, &g, n); SEXP out = growable_values(&g); UNPROTECT(nprot); return out; } #define VCTRS_UNIQUE_LOC_LOOP(DICT_HASH_WITH) \ do { \ for (int i = 0; i < n; ++i) { \ uint32_t hash = DICT_HASH_WITH(d, d, i); \ \ if (d->key[hash] == DICT_EMPTY) { \ dict_put(d, hash, i); \ growable_push_int(g, i + 1); \ } \ } \ } \ while (0) static inline void vctrs_unique_loc_loop(struct dictionary* d, struct growable* g, R_len_t n) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VCTRS_UNIQUE_LOC_LOOP(nil_dict_hash_with); break; case VCTRS_TYPE_logical: VCTRS_UNIQUE_LOC_LOOP(lgl_dict_hash_with); break; case VCTRS_TYPE_integer: VCTRS_UNIQUE_LOC_LOOP(int_dict_hash_with); break; case VCTRS_TYPE_double: VCTRS_UNIQUE_LOC_LOOP(dbl_dict_hash_with); break; case VCTRS_TYPE_complex: VCTRS_UNIQUE_LOC_LOOP(cpl_dict_hash_with); break; case VCTRS_TYPE_character: VCTRS_UNIQUE_LOC_LOOP(chr_dict_hash_with); break; case VCTRS_TYPE_raw: VCTRS_UNIQUE_LOC_LOOP(raw_dict_hash_with); break; case VCTRS_TYPE_list: VCTRS_UNIQUE_LOC_LOOP(list_dict_hash_with); break; case VCTRS_TYPE_dataframe: VCTRS_UNIQUE_LOC_LOOP(df_dict_hash_with); break; default: stop_unimplemented_vctrs_type("vctrs_unique_loc_loop", d->p_poly_vec->type); } } #undef VCTRS_UNIQUE_LOC_LOOP // [[ include("vctrs.h") ]] SEXP vec_unique(SEXP x) { SEXP index = PROTECT(vctrs_unique_loc(x)); SEXP out = vec_slice_unsafe(x, index); UNPROTECT(1); return out; } SEXP vctrs_duplicated_any(SEXP x) { bool out = duplicated_any(x); return Rf_ScalarLogical(out); } // [[ include("vctrs.h") ]] bool duplicated_any(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(obj_encode_utf8(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); bool out = duplicated_any_loop(d, n); UNPROTECT(nprot); return out; } #define DUPLICATED_ANY_LOOP(DICT_HASH_SCALAR) \ do { \ for (int i = 0; i < n; ++i) { \ uint32_t hash = DICT_HASH_SCALAR(d, i); \ \ if (d->key[hash] == DICT_EMPTY) { \ dict_put(d, hash, i); \ } else { \ return true; \ } \ } \ \ return false; \ } \ while (0) static inline bool duplicated_any_loop(struct dictionary* d, R_len_t n) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: DUPLICATED_ANY_LOOP(nil_dict_hash_scalar); break; case VCTRS_TYPE_logical: DUPLICATED_ANY_LOOP(lgl_dict_hash_scalar); break; case VCTRS_TYPE_integer: DUPLICATED_ANY_LOOP(int_dict_hash_scalar); break; case VCTRS_TYPE_double: DUPLICATED_ANY_LOOP(dbl_dict_hash_scalar); break; case VCTRS_TYPE_complex: DUPLICATED_ANY_LOOP(cpl_dict_hash_scalar); break; case VCTRS_TYPE_character: DUPLICATED_ANY_LOOP(chr_dict_hash_scalar); break; case VCTRS_TYPE_raw: DUPLICATED_ANY_LOOP(raw_dict_hash_scalar); break; case VCTRS_TYPE_list: DUPLICATED_ANY_LOOP(list_dict_hash_scalar); break; case VCTRS_TYPE_dataframe: DUPLICATED_ANY_LOOP(df_dict_hash_scalar); break; default: stop_unimplemented_vctrs_type("duplicated_any_loop", d->p_poly_vec->type); } } #undef DUPLICATED_ANY_LOOP SEXP vctrs_n_distinct(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(obj_encode_utf8(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); vctrs_n_distinct_loop(d, n); UNPROTECT(nprot); return Rf_ScalarInteger(d->used); } #define VCTRS_N_DISTINCT_LOOP(DICT_HASH_SCALAR) \ do { \ for (int i = 0; i < n; ++i) { \ uint32_t hash = DICT_HASH_SCALAR(d, i); \ \ if (d->key[hash] == DICT_EMPTY) { \ dict_put(d, hash, i); \ } \ } \ } \ while (0) static inline void vctrs_n_distinct_loop(struct dictionary* d, R_len_t n) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VCTRS_N_DISTINCT_LOOP(nil_dict_hash_scalar); break; case VCTRS_TYPE_logical: VCTRS_N_DISTINCT_LOOP(lgl_dict_hash_scalar); break; case VCTRS_TYPE_integer: VCTRS_N_DISTINCT_LOOP(int_dict_hash_scalar); break; case VCTRS_TYPE_double: VCTRS_N_DISTINCT_LOOP(dbl_dict_hash_scalar); break; case VCTRS_TYPE_complex: VCTRS_N_DISTINCT_LOOP(cpl_dict_hash_scalar); break; case VCTRS_TYPE_character: VCTRS_N_DISTINCT_LOOP(chr_dict_hash_scalar); break; case VCTRS_TYPE_raw: VCTRS_N_DISTINCT_LOOP(raw_dict_hash_scalar); break; case VCTRS_TYPE_list: VCTRS_N_DISTINCT_LOOP(list_dict_hash_scalar); break; case VCTRS_TYPE_dataframe: VCTRS_N_DISTINCT_LOOP(df_dict_hash_scalar); break; default: stop_unimplemented_vctrs_type("vctrs_n_distinct_loop", d->p_poly_vec->type); } } #undef VCTRS_N_DISTINCT_LOOP SEXP vctrs_id(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(obj_encode_utf8(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP out = PROTECT_N(Rf_allocVector(INTSXP, n), &nprot); int* p_out = INTEGER(out); vctrs_id_loop(d, n, p_out); UNPROTECT(nprot); return out; } #define VCTRS_ID_LOOP(DICT_HASH_SCALAR) \ do { \ for (int i = 0; i < n; ++i) { \ uint32_t hash = DICT_HASH_SCALAR(d, i); \ \ if (d->key[hash] == DICT_EMPTY) { \ dict_put(d, hash, i); \ } \ \ p_out[i] = d->key[hash] + 1; \ } \ } \ while (0) static inline void vctrs_id_loop(struct dictionary* d, R_len_t n, int* p_out) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VCTRS_ID_LOOP(nil_dict_hash_scalar); break; case VCTRS_TYPE_logical: VCTRS_ID_LOOP(lgl_dict_hash_scalar); break; case VCTRS_TYPE_integer: VCTRS_ID_LOOP(int_dict_hash_scalar); break; case VCTRS_TYPE_double: VCTRS_ID_LOOP(dbl_dict_hash_scalar); break; case VCTRS_TYPE_complex: VCTRS_ID_LOOP(cpl_dict_hash_scalar); break; case VCTRS_TYPE_character: VCTRS_ID_LOOP(chr_dict_hash_scalar); break; case VCTRS_TYPE_raw: VCTRS_ID_LOOP(raw_dict_hash_scalar); break; case VCTRS_TYPE_list: VCTRS_ID_LOOP(list_dict_hash_scalar); break; case VCTRS_TYPE_dataframe: VCTRS_ID_LOOP(df_dict_hash_scalar); break; default: stop_unimplemented_vctrs_type("vctrs_id_loop", d->p_poly_vec->type); } } #undef VCTRS_ID_LOOP // [[ register() ]] SEXP vctrs_match(SEXP needles, SEXP haystack, SEXP na_equal, SEXP frame) { struct r_lazy call = { .x = frame, .env = r_null }; struct r_lazy needles_arg_ = { .x = syms.needles_arg, .env = frame }; struct vctrs_arg needles_arg = new_lazy_arg(&needles_arg_); struct r_lazy haystack_arg_ = { .x = syms.haystack_arg, .env = frame }; struct vctrs_arg haystack_arg = new_lazy_arg(&haystack_arg_); return vec_match_params(needles, haystack, r_bool_as_int(na_equal), &needles_arg, &haystack_arg, call); } SEXP vec_match_params(SEXP needles, SEXP haystack, bool na_equal, struct vctrs_arg* needles_arg, struct vctrs_arg* haystack_arg, struct r_lazy call) { int nprot = 0; int _; SEXP type = vec_ptype2( needles, haystack, needles_arg, haystack_arg, call, S3_FALLBACK_false, &_ ); PROTECT_N(type, &nprot); type = PROTECT_N(vec_ptype_finalise(type), &nprot); needles = vec_cast_params( needles, type, needles_arg, vec_args.empty, call, S3_FALLBACK_false ); PROTECT_N(needles, &nprot); haystack = vec_cast_params( haystack, type, haystack_arg, vec_args.empty, call, S3_FALLBACK_false ); PROTECT_N(haystack, &nprot); needles = PROTECT_N(vec_proxy_equal(needles), &nprot); needles = PROTECT_N(obj_encode_utf8(needles), &nprot); haystack = PROTECT_N(vec_proxy_equal(haystack), &nprot); haystack = PROTECT_N(obj_encode_utf8(haystack), &nprot); R_len_t n_haystack = vec_size(haystack); R_len_t n_needle = vec_size(needles); struct dictionary* d = new_dictionary_params(haystack, false, na_equal); PROTECT_DICT(d, &nprot); // Load dictionary with haystack load_with_haystack(d, n_haystack); struct dictionary* d_needles = new_dictionary_params(needles, true, na_equal); PROTECT_DICT(d_needles, &nprot); // Locate needles SEXP out = PROTECT_N(Rf_allocVector(INTSXP, n_needle), &nprot); int* p_out = INTEGER(out); if (na_equal) { vec_match_loop(p_out, d, d_needles, n_needle); } else { vec_match_loop_propagate(p_out, d, d_needles, n_needle); } UNPROTECT(nprot); return out; } #define VEC_MATCH_LOOP(DICT_HASH_WITH) \ do { \ for (R_len_t i = 0; i < n_needle; ++i) { \ uint32_t hash = DICT_HASH_WITH(d, d_needles, i); \ \ if (d->key[hash] == DICT_EMPTY) { \ /* TODO: Return `no_match` instead */ \ p_out[i] = NA_INTEGER; \ } else { \ p_out[i] = d->key[hash] + 1; \ } \ } \ } \ while (0) static inline void vec_match_loop( int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle ) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VEC_MATCH_LOOP(nil_dict_hash_with); break; case VCTRS_TYPE_logical: VEC_MATCH_LOOP(lgl_dict_hash_with); break; case VCTRS_TYPE_integer: VEC_MATCH_LOOP(int_dict_hash_with); break; case VCTRS_TYPE_double: VEC_MATCH_LOOP(dbl_dict_hash_with); break; case VCTRS_TYPE_complex: VEC_MATCH_LOOP(cpl_dict_hash_with); break; case VCTRS_TYPE_character: VEC_MATCH_LOOP(chr_dict_hash_with); break; case VCTRS_TYPE_raw: VEC_MATCH_LOOP(raw_dict_hash_with); break; case VCTRS_TYPE_list: VEC_MATCH_LOOP(list_dict_hash_with); break; case VCTRS_TYPE_dataframe: VEC_MATCH_LOOP(df_dict_hash_with); break; default: stop_unimplemented_vctrs_type("vec_match_loop", d->p_poly_vec->type); } } #undef VEC_MATCH_LOOP #define VEC_MATCH_LOOP_PROPAGATE(DICT_HASH_WITH, DICT_IS_INCOMPLETE) \ do { \ for (R_len_t i = 0; i < n_needle; ++i) { \ if (DICT_IS_INCOMPLETE(d_needles, i)) { \ p_out[i] = NA_INTEGER; \ continue; \ } \ \ uint32_t hash = DICT_HASH_WITH(d, d_needles, i); \ \ if (d->key[hash] == DICT_EMPTY) { \ /* TODO: Return `no_match` instead */ \ p_out[i] = NA_INTEGER; \ } else { \ p_out[i] = d->key[hash] + 1; \ } \ } \ } \ while (0) static inline void vec_match_loop_propagate( int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle ) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VEC_MATCH_LOOP_PROPAGATE(nil_dict_hash_with, nil_dict_is_incomplete); break; case VCTRS_TYPE_logical: VEC_MATCH_LOOP_PROPAGATE(lgl_dict_hash_with, lgl_dict_is_incomplete); break; case VCTRS_TYPE_integer: VEC_MATCH_LOOP_PROPAGATE(int_dict_hash_with, int_dict_is_incomplete); break; case VCTRS_TYPE_double: VEC_MATCH_LOOP_PROPAGATE(dbl_dict_hash_with, dbl_dict_is_incomplete); break; case VCTRS_TYPE_complex: VEC_MATCH_LOOP_PROPAGATE(cpl_dict_hash_with, cpl_dict_is_incomplete); break; case VCTRS_TYPE_character: VEC_MATCH_LOOP_PROPAGATE(chr_dict_hash_with, chr_dict_is_incomplete); break; case VCTRS_TYPE_raw: VEC_MATCH_LOOP_PROPAGATE(raw_dict_hash_with, raw_dict_is_incomplete); break; case VCTRS_TYPE_list: VEC_MATCH_LOOP_PROPAGATE(list_dict_hash_with, list_dict_is_incomplete); break; case VCTRS_TYPE_dataframe: VEC_MATCH_LOOP_PROPAGATE(df_dict_hash_with, df_dict_is_incomplete); break; default: stop_unimplemented_vctrs_type("vec_match_loop_propagate", d->p_poly_vec->type); } } #undef VEC_MATCH_LOOP_PROPAGATE SEXP vctrs_in(SEXP needles, SEXP haystack, SEXP na_equal_, SEXP frame) { struct r_lazy needles_arg_ = { .x = syms.needles_arg, .env = frame }; struct vctrs_arg needles_arg = new_lazy_arg(&needles_arg_); struct r_lazy haystack_arg_ = { .x = syms.haystack_arg, .env = frame }; struct vctrs_arg haystack_arg = new_lazy_arg(&haystack_arg_); struct r_lazy call = { .x = frame, .env = r_null }; return vec_in( needles, haystack, r_bool_as_int(na_equal_), &needles_arg, &haystack_arg, call ); } // [[ register() ]] SEXP vec_in( SEXP needles, SEXP haystack, bool na_equal, struct vctrs_arg* p_needles_arg, struct vctrs_arg* p_haystack_arg, struct r_lazy call ) { int nprot = 0; int _; SEXP type = vec_ptype2( needles, haystack, p_needles_arg, p_haystack_arg, call, S3_FALLBACK_false, &_ ); PROTECT_N(type, &nprot); type = PROTECT_N(vec_ptype_finalise(type), &nprot); needles = vec_cast_params( needles, type, p_needles_arg, vec_args.empty, call, S3_FALLBACK_false ); PROTECT_N(needles, &nprot); haystack = vec_cast_params( haystack, type, p_haystack_arg, vec_args.empty, call, S3_FALLBACK_false ); PROTECT_N(haystack, &nprot); needles = PROTECT_N(vec_proxy_equal(needles), &nprot); needles = PROTECT_N(obj_encode_utf8(needles), &nprot); haystack = PROTECT_N(vec_proxy_equal(haystack), &nprot); haystack = PROTECT_N(obj_encode_utf8(haystack), &nprot); R_len_t n_haystack = vec_size(haystack); R_len_t n_needle = vec_size(needles); struct dictionary* d = new_dictionary_params(haystack, false, na_equal); PROTECT_DICT(d, &nprot); // Load dictionary with haystack load_with_haystack(d, n_haystack); struct dictionary* d_needles = new_dictionary_params(needles, true, na_equal); PROTECT_DICT(d_needles, &nprot); // Locate needles SEXP out = PROTECT_N(Rf_allocVector(LGLSXP, n_needle), &nprot); int* p_out = LOGICAL(out); if (na_equal) { vec_in_loop(p_out, d, d_needles, n_needle); } else { vec_in_loop_propagate(p_out, d, d_needles, n_needle); } UNPROTECT(nprot); return out; } #define VEC_IN_LOOP(DICT_HASH_WITH) \ do { \ for (int i = 0; i < n_needle; ++i) { \ uint32_t hash = DICT_HASH_WITH(d, d_needles, i); \ p_out[i] = (d->key[hash] != DICT_EMPTY); \ } \ } \ while (0) static inline void vec_in_loop( int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle ) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VEC_IN_LOOP(nil_dict_hash_with); break; case VCTRS_TYPE_logical: VEC_IN_LOOP(lgl_dict_hash_with); break; case VCTRS_TYPE_integer: VEC_IN_LOOP(int_dict_hash_with); break; case VCTRS_TYPE_double: VEC_IN_LOOP(dbl_dict_hash_with); break; case VCTRS_TYPE_complex: VEC_IN_LOOP(cpl_dict_hash_with); break; case VCTRS_TYPE_character: VEC_IN_LOOP(chr_dict_hash_with); break; case VCTRS_TYPE_raw: VEC_IN_LOOP(raw_dict_hash_with); break; case VCTRS_TYPE_list: VEC_IN_LOOP(list_dict_hash_with); break; case VCTRS_TYPE_dataframe: VEC_IN_LOOP(df_dict_hash_with); break; default: stop_unimplemented_vctrs_type("vec_in_loop", d->p_poly_vec->type); } } #undef VEC_IN_LOOP #define VEC_IN_LOOP_PROPAGATE(DICT_HASH_WITH, DICT_IS_INCOMPLETE) \ do { \ for (int i = 0; i < n_needle; ++i) { \ if (DICT_IS_INCOMPLETE(d_needles, i)) { \ p_out[i] = NA_LOGICAL; \ } else { \ uint32_t hash = DICT_HASH_WITH(d, d_needles, i); \ p_out[i] = (d->key[hash] != DICT_EMPTY); \ } \ } \ } \ while (0) static inline void vec_in_loop_propagate( int* p_out, struct dictionary* d, struct dictionary* d_needles, R_len_t n_needle ) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VEC_IN_LOOP_PROPAGATE(nil_dict_hash_with, nil_dict_is_incomplete); break; case VCTRS_TYPE_logical: VEC_IN_LOOP_PROPAGATE(lgl_dict_hash_with, lgl_dict_is_incomplete); break; case VCTRS_TYPE_integer: VEC_IN_LOOP_PROPAGATE(int_dict_hash_with, int_dict_is_incomplete); break; case VCTRS_TYPE_double: VEC_IN_LOOP_PROPAGATE(dbl_dict_hash_with, dbl_dict_is_incomplete); break; case VCTRS_TYPE_complex: VEC_IN_LOOP_PROPAGATE(cpl_dict_hash_with, cpl_dict_is_incomplete); break; case VCTRS_TYPE_character: VEC_IN_LOOP_PROPAGATE(chr_dict_hash_with, chr_dict_is_incomplete); break; case VCTRS_TYPE_raw: VEC_IN_LOOP_PROPAGATE(raw_dict_hash_with, raw_dict_is_incomplete); break; case VCTRS_TYPE_list: VEC_IN_LOOP_PROPAGATE(list_dict_hash_with, list_dict_is_incomplete); break; case VCTRS_TYPE_dataframe: VEC_IN_LOOP_PROPAGATE(df_dict_hash_with, df_dict_is_incomplete); break; default: stop_unimplemented_vctrs_type("vec_in_loop_propagate", d->p_poly_vec->type); } } #undef VEC_IN_LOOP_PROPAGATE #define LOAD_WITH_HAYSTACK(DICT_HASH_SCALAR) \ do { \ for (int i = 0; i < n_haystack; ++i) { \ uint32_t hash = DICT_HASH_SCALAR(d, i); \ \ if (d->key[hash] == DICT_EMPTY) { \ dict_put(d, hash, i); \ } \ } \ } \ while (0) static inline void load_with_haystack(struct dictionary* d, R_len_t n_haystack) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: LOAD_WITH_HAYSTACK(nil_dict_hash_scalar); break; case VCTRS_TYPE_logical: LOAD_WITH_HAYSTACK(lgl_dict_hash_scalar); break; case VCTRS_TYPE_integer: LOAD_WITH_HAYSTACK(int_dict_hash_scalar); break; case VCTRS_TYPE_double: LOAD_WITH_HAYSTACK(dbl_dict_hash_scalar); break; case VCTRS_TYPE_complex: LOAD_WITH_HAYSTACK(cpl_dict_hash_scalar); break; case VCTRS_TYPE_character: LOAD_WITH_HAYSTACK(chr_dict_hash_scalar); break; case VCTRS_TYPE_raw: LOAD_WITH_HAYSTACK(raw_dict_hash_scalar); break; case VCTRS_TYPE_list: LOAD_WITH_HAYSTACK(list_dict_hash_scalar); break; case VCTRS_TYPE_dataframe: LOAD_WITH_HAYSTACK(df_dict_hash_scalar); break; default: stop_unimplemented_vctrs_type("vec_match_loop", d->p_poly_vec->type); } } #undef LOAD_WITH_HAYSTACK SEXP vctrs_count(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(obj_encode_utf8(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP count = PROTECT_N(Rf_allocVector(INTSXP, d->size), &nprot); int* p_count = INTEGER(count); // Load dictionary and accumulate `p_count` vctrs_count_loop(d, n, p_count); // Create output SEXP out_loc = PROTECT_N(Rf_allocVector(INTSXP, d->used), &nprot); int* p_out_loc = INTEGER(out_loc); // Reuse `count` storage, which will be narrowed SEXP out_count = count; int* p_out_count = p_count; int i = 0; for (uint32_t hash = 0; hash < d->size; ++hash) { if (d->key[hash] == DICT_EMPTY) { continue; } p_out_loc[i] = d->key[hash] + 1; p_out_count[i] = p_count[hash]; i++; } out_count = PROTECT_N(r_int_resize(out_count, d->used), &nprot); SEXP out = PROTECT_N(Rf_allocVector(VECSXP, 2), &nprot); SET_VECTOR_ELT(out, 0, out_loc); SET_VECTOR_ELT(out, 1, out_count); SEXP names = PROTECT_N(Rf_allocVector(STRSXP, 2), &nprot); SET_STRING_ELT(names, 0, Rf_mkChar("loc")); SET_STRING_ELT(names, 1, Rf_mkChar("count")); Rf_setAttrib(out, R_NamesSymbol, names); init_data_frame(out, d->used); UNPROTECT(nprot); return out; } #define VCTRS_COUNT_LOOP(DICT_HASH_SCALAR) \ do { \ for (int i = 0; i < n; ++i) { \ uint32_t hash = DICT_HASH_SCALAR(d, i); \ \ if (d->key[hash] == DICT_EMPTY) { \ dict_put(d, hash, i); \ p_count[hash] = 0; \ } \ \ p_count[hash]++; \ } \ } \ while (0) static inline void vctrs_count_loop(struct dictionary* d, R_len_t n, int* p_count) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VCTRS_COUNT_LOOP(nil_dict_hash_scalar); break; case VCTRS_TYPE_logical: VCTRS_COUNT_LOOP(lgl_dict_hash_scalar); break; case VCTRS_TYPE_integer: VCTRS_COUNT_LOOP(int_dict_hash_scalar); break; case VCTRS_TYPE_double: VCTRS_COUNT_LOOP(dbl_dict_hash_scalar); break; case VCTRS_TYPE_complex: VCTRS_COUNT_LOOP(cpl_dict_hash_scalar); break; case VCTRS_TYPE_character: VCTRS_COUNT_LOOP(chr_dict_hash_scalar); break; case VCTRS_TYPE_raw: VCTRS_COUNT_LOOP(raw_dict_hash_scalar); break; case VCTRS_TYPE_list: VCTRS_COUNT_LOOP(list_dict_hash_scalar); break; case VCTRS_TYPE_dataframe: VCTRS_COUNT_LOOP(df_dict_hash_scalar); break; default: stop_unimplemented_vctrs_type("vctrs_count_loop", d->p_poly_vec->type); } } #undef VCTRS_COUNT_LOOP SEXP vctrs_duplicated(SEXP x) { int nprot = 0; R_len_t n = vec_size(x); x = PROTECT_N(vec_proxy_equal(x), &nprot); x = PROTECT_N(obj_encode_utf8(x), &nprot); struct dictionary* d = new_dictionary(x); PROTECT_DICT(d, &nprot); SEXP out = PROTECT_N(Rf_allocVector(LGLSXP, n), &nprot); int* p_out = LOGICAL(out); r_memset(p_out, 0, n * sizeof(int)); uint32_t* p_hashes = (uint32_t*) R_alloc(n, sizeof(uint32_t)); vctrs_duplicated_loop(d, n, p_hashes, p_out); UNPROTECT(nprot); return out; } #define VCTRS_DUPLICATED_LOOP(DICT_HASH_SCALAR) \ do { \ /* Forward pass */ \ for (R_len_t i = 0; i < n; ++i) { \ const uint32_t hash = DICT_HASH_SCALAR(d, i); \ p_hashes[i] = hash; \ \ if (d->key[hash] == DICT_EMPTY) { \ dict_put(d, hash, i); \ } else { \ p_out[i] = 1; \ } \ } \ \ for (uint32_t i = 0; i < d->size; ++i) { \ d->key[i] = DICT_EMPTY; \ } \ \ /* Reverse pass */ \ for (R_len_t i = n - 1; i >= 0; --i) { \ const uint32_t hash = p_hashes[i]; \ \ if (d->key[hash] == DICT_EMPTY) { \ dict_put(d, hash, i); \ } else { \ p_out[i] = 1; \ } \ } \ } \ while (0) static inline void vctrs_duplicated_loop(struct dictionary* d, R_len_t n, uint32_t* p_hashes, int* p_out) { switch (d->p_poly_vec->type) { case VCTRS_TYPE_null: VCTRS_DUPLICATED_LOOP(nil_dict_hash_scalar); break; case VCTRS_TYPE_logical: VCTRS_DUPLICATED_LOOP(lgl_dict_hash_scalar); break; case VCTRS_TYPE_integer: VCTRS_DUPLICATED_LOOP(int_dict_hash_scalar); break; case VCTRS_TYPE_double: VCTRS_DUPLICATED_LOOP(dbl_dict_hash_scalar); break; case VCTRS_TYPE_complex: VCTRS_DUPLICATED_LOOP(cpl_dict_hash_scalar); break; case VCTRS_TYPE_character: VCTRS_DUPLICATED_LOOP(chr_dict_hash_scalar); break; case VCTRS_TYPE_raw: VCTRS_DUPLICATED_LOOP(raw_dict_hash_scalar); break; case VCTRS_TYPE_list: VCTRS_DUPLICATED_LOOP(list_dict_hash_scalar); break; case VCTRS_TYPE_dataframe: VCTRS_DUPLICATED_LOOP(df_dict_hash_scalar); break; default: stop_unimplemented_vctrs_type("vctrs_duplicated_loop", d->p_poly_vec->type); } } #undef VCTRS_DUPLICATED_LOOP void vctrs_init_dictionary(SEXP ns) { args_needles = new_wrapper_arg(NULL, "needles"); args_haystack = new_wrapper_arg(NULL, "haystack"); } vctrs/src/recode.c0000644000176200001440000005354115120513137013615 0ustar liggesusers#include "recode.h" #include "vctrs.h" #include "vec-int.h" #include "decl/recode-decl.h" r_obj* vec_recode_values( r_obj* x, r_obj* from, r_obj* to, r_obj* default_, enum list_combine_unmatched unmatched, bool from_as_list_of_vectors, bool to_as_list_of_vectors, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_from_arg, struct vctrs_arg* p_to_arg, struct vctrs_arg* p_default_arg, r_obj* ptype, struct r_lazy error_call ) { int n_prot = 0; const bool has_default = default_ != r_null; // Vector checks ---- obj_check_vector(x, VCTRS_ALLOW_NULL_no, p_x_arg, error_call); if (from_as_list_of_vectors) { obj_check_list(from, p_from_arg, error_call); list_check_all_vectors(from, VCTRS_ALLOW_NULL_no, p_from_arg, error_call); } else { obj_check_vector(from, VCTRS_ALLOW_NULL_no, p_from_arg, error_call); } if (to_as_list_of_vectors) { obj_check_list(to, p_to_arg, error_call); list_check_all_vectors(to, VCTRS_ALLOW_NULL_no, p_to_arg, error_call); } else { obj_check_vector(to, VCTRS_ALLOW_NULL_no, p_to_arg, error_call); } if (has_default) { obj_check_vector(default_, VCTRS_ALLOW_NULL_no, p_default_arg, error_call); } // Type checks ---- // Finalize `ptype` up front // Remember that `default` gets a chance to participate in `ptype` determination ptype = KEEP_N( ptype_finalize( ptype, to, default_, to_as_list_of_vectors, p_to_arg, p_default_arg, error_call ), &n_prot ); // Cast `to` to the determined common type // Errors can happen here if user supplied `ptype`, short circuiting common type checks if (to_as_list_of_vectors) { to = KEEP_N(vec_cast_common(to, ptype, p_to_arg, error_call), &n_prot); } else { to = KEEP_N(vec_cast(to, ptype, p_to_arg, vec_args.empty, error_call), &n_prot); } // Cast `default` to determined common type if (has_default) { default_ = KEEP_N( vec_cast( default_, ptype, p_default_arg, vec_args.empty, error_call ), &n_prot ); } // Cast `from` to the type of `x` if (from_as_list_of_vectors) { // TODO: `vec_cast_common()` should take `to_arg` so we can use `p_x_arg` here from = KEEP_N(vec_cast_common(from, x, p_from_arg, error_call), &n_prot); } else { from = KEEP_N(vec_cast(from, x, p_from_arg, p_x_arg, error_call), &n_prot); } // Size checks ---- const r_ssize x_size = vec_size(x); r_ssize from_size = vec_size(from); const r_ssize to_size = vec_size(to); // Check size of `to` as recyclable against the size of `from`, // regardless of `from_as_list_of_vectors` and `to_as_list_of_vectors` vec_check_recyclable(to, from_size, VCTRS_ALLOW_NULL_no, p_to_arg, error_call); if (to_as_list_of_vectors) { // Each element of `to` should be recyclable against `x`. list_check_all_recyclable(to, x_size, VCTRS_ALLOW_NULL_no, p_to_arg, error_call); } if (has_default) { // `default` should be recycled against `x`. vec_check_recyclable(default_, x_size, VCTRS_ALLOW_NULL_no, p_default_arg, error_call); } // Implementation ---- // At this point we don't expect any errors, all vector, type, and size // checks have been done. So `p_*_arg` and `error_call` are not used. // Try `to_as_list_of_vectors` optimization // // All size 1 `to` values is a common `dplyr::recode_values()` case if (to_as_list_of_vectors) { if (list_all_size(to, 1, VCTRS_ALLOW_NULL_no, p_to_arg, error_call)) { // Optimize `to` to a flat vector and drop any outer names on the list, we've // already done casting checks and we don't want them on the flat form. We // don't expect any errors here. to_as_list_of_vectors = false; to = KEEP_N( vec_c( to, ptype, name_spec_inner, p_no_repair_opts, vec_args.empty, r_lazy_null ), &n_prot ); } } // Flatten `from` to a vector if it is a list r_obj* from_flat; if (from_as_list_of_vectors) { from_flat = KEEP_N( vec_c( from, x, name_spec_inner, p_no_repair_opts, vec_args.empty, r_lazy_null ), &n_prot ); } else { from_flat = from; } // Compute `xs` and `indices` for `list_combine()` r_obj* xs; r_obj* indices; if (to_as_list_of_vectors) { if (to_size == 1) { xs = to; indices = KEEP_N(build_indices_for_single_to(x, from_flat), &n_prot); } else { xs = to; r_obj* from_flat_map = r_null; if (from_as_list_of_vectors && from_size != vec_size(from_flat)) { // `from` was flattened and that changed its size, we need a "map" back // into the unflattened form to place indices correctly from_flat_map = KEEP_N(build_from_flat_map(from, from_size), &n_prot); } indices = KEEP_N( build_indices_for_to_as_list_of_vectors( x, from_flat, from_flat_map, x_size, from_size ), &n_prot ); } } else { if (to_size == 1) { xs = KEEP_N(r_list(to), &n_prot); indices = KEEP_N(build_indices_for_single_to(x, from_flat), &n_prot); } else { if (from_as_list_of_vectors && from_size != vec_size(from_flat)) { // `from` was flattened and that changed its size, we need to // repeat `to` elements to match the new flattened size to = KEEP_N(build_repeated_to(to, from), &n_prot); } r_obj* result = KEEP_N( build_xs_and_indices_for_to_as_vector( x, from_flat, to, x_size ), &n_prot ); xs = r_list_get(result, 0); indices = r_list_get(result, 1); } } // The `indices` are actually built in such a way that `"first"` is // the effective behavior here, so it doesn't actually matter what // we provide. Regardless, we want `case_when()` like behavior. const enum list_combine_multiple multiple = LIST_COMBINE_MULTIPLE_last; // - With `to_as_list_of_vectors`, each `to` element was size 1 or `x_size` // so can use `ASSIGNMENT_SLICE_VALUE_yes`. // - With `!to_as_list_of_vectors`, `to` was size 1 or `from_size` but has // already been resliced to the `indices` size. enum assignment_slice_value slice_xs = to_as_list_of_vectors ? ASSIGNMENT_SLICE_VALUE_yes : ASSIGNMENT_SLICE_VALUE_no; r_obj* out = list_combine( xs, indices, x_size, default_, unmatched, multiple, slice_xs, ptype, name_spec_inner, p_no_repair_opts, vec_args.empty, vec_args.empty, vec_args.empty, error_call ); FREE(n_prot); return out; } r_obj* vec_replace_values( r_obj* x, r_obj* from, r_obj* to, bool from_as_list_of_vectors, bool to_as_list_of_vectors, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_from_arg, struct vctrs_arg* p_to_arg, struct r_lazy error_call ) { obj_check_vector(x, VCTRS_ALLOW_NULL_no, p_x_arg, error_call); r_obj* default_ = x; struct vctrs_arg* p_default_arg = p_x_arg; const enum list_combine_unmatched unmatched = LIST_COMBINE_UNMATCHED_default; r_obj* ptype = KEEP(vec_ptype_final(x, p_x_arg, error_call)); r_obj* out = KEEP(vec_recode_values( x, from, to, default_, unmatched, from_as_list_of_vectors, to_as_list_of_vectors, p_x_arg, p_from_arg, p_to_arg, p_default_arg, ptype, error_call )); // `vec_recode_values()` creates a new vector and names come from any of `to` // or `default`, but `vec_replace_values()` updates an existing vector and // should act like `[<-` and `base::replace()`, retaining existing names. // `out` is totally fresh, so we can claim deep ownership over it (though we // only require shallow ownership to set names). r_obj* names = KEEP(vec_names(x)); out = vec_set_names(out, names, VCTRS_OWNERSHIP_deep); FREE(3); return out; } r_obj* ffi_vec_recode_values( r_obj* ffi_x, r_obj* ffi_from, r_obj* ffi_to, r_obj* ffi_default, r_obj* ffi_unmatched, r_obj* ffi_from_as_list_of_vectors, r_obj* ffi_to_as_list_of_vectors, r_obj* ffi_ptype, r_obj* ffi_frame ) { struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = ffi_frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy from_arg_lazy = { .x = syms.from_arg, .env = ffi_frame }; struct vctrs_arg from_arg = new_lazy_arg(&from_arg_lazy); struct r_lazy to_arg_lazy = { .x = syms.to_arg, .env = ffi_frame }; struct vctrs_arg to_arg = new_lazy_arg(&to_arg_lazy); struct r_lazy default_arg_lazy = { .x = syms.default_arg, .env = ffi_frame }; struct vctrs_arg default_arg = new_lazy_arg(&default_arg_lazy); struct r_lazy error_call = { .x = r_syms.error_call, .env = ffi_frame }; const bool from_as_list_of_vectors = r_arg_as_bool(ffi_from_as_list_of_vectors, "from_as_list_of_vectors"); const bool to_as_list_of_vectors = r_arg_as_bool(ffi_to_as_list_of_vectors, "to_as_list_of_vectors"); const enum list_combine_unmatched unmatched = parse_list_combine_unmatched(ffi_unmatched, error_call); return vec_recode_values( ffi_x, ffi_from, ffi_to, ffi_default, unmatched, from_as_list_of_vectors, to_as_list_of_vectors, &x_arg, &from_arg, &to_arg, &default_arg, ffi_ptype, error_call ); } r_obj* ffi_vec_replace_values( r_obj* ffi_x, r_obj* ffi_from, r_obj* ffi_to, r_obj* ffi_from_as_list_of_vectors, r_obj* ffi_to_as_list_of_vectors, r_obj* ffi_frame ) { struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = ffi_frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy from_arg_lazy = { .x = syms.from_arg, .env = ffi_frame }; struct vctrs_arg from_arg = new_lazy_arg(&from_arg_lazy); struct r_lazy to_arg_lazy = { .x = syms.to_arg, .env = ffi_frame }; struct vctrs_arg to_arg = new_lazy_arg(&to_arg_lazy); struct r_lazy error_call = { .x = r_syms.error_call, .env = ffi_frame }; const bool from_as_list_of_vectors = r_arg_as_bool(ffi_from_as_list_of_vectors, "from_as_list_of_vectors"); const bool to_as_list_of_vectors = r_arg_as_bool(ffi_to_as_list_of_vectors, "to_as_list_of_vectors"); return vec_replace_values( ffi_x, ffi_from, ffi_to, from_as_list_of_vectors, to_as_list_of_vectors, &x_arg, &from_arg, &to_arg, error_call ); } // Form `indices` when `to` is a list of vectors // // # Example // // ``` // x = c(1, 2, 3, 4, 5, 1) // // from = list( // c(1, 3), // c(4, 2, 1) // ) // // # Expected output // indices = list( // c(1, 3, 6), // c(2, 4) // ) // ``` // // To get to our expected output, we flatten `from` into `from_flat` but track // its original sizes through a `from_flat_map` // // ``` // from_flat = c(1, 3, 4, 2, 1) // from_flat_map = c(0, 0, 1, 1, 1) // ``` // // Then we match `x` against `from_flat` to only perform a single match call, // and to ensure that the "first" match wins. // // ``` // loc = c(1, 4, 2, 3, NA, 1) // ``` // // We loop over `loc` to determine the size of each `index` that goes in the output. // // ``` // loc = c(1, 4, 2, 3, NA, 1) // _ _ _ These go with index 1, size 3 // _ _ These go with index 2, size 2 // ``` // // We determine that, say, `loc` 4 (1-indexed) from above goes with index 2 by looking at // `from_flat_map[4 - 1]` and seeing that that gives us 1 (0-indexed) meaning it goes // with index 2. // // We then construct the `indices` of the right size using this first pass, then // do a second pass that finally accumulates the `indices` in the right buckets, // using the same `from_flat_map` explanation as above. // // # Safety // // Expect that all type and size checks have been done. static r_obj* build_indices_for_to_as_list_of_vectors( r_obj* x, r_obj* from_flat, r_obj* from_flat_map, r_ssize x_size, r_ssize from_size ) { r_obj* indices = KEEP(r_alloc_list(from_size)); const bool has_from_flat_map = from_flat_map != r_null; const r_ssize* v_from_flat_map = has_from_flat_map ? r_raw_cbegin(from_flat_map) : NULL; // Want `na_equal = true` for `vec_recode_values(x, from = NA, to = to)` const bool na_equal = true; // Find `x` values in `from_flat` r_obj* loc = KEEP(vec_match_params( x, from_flat, na_equal, vec_args.empty, vec_args.empty, r_lazy_null )); const int* v_loc = r_int_cbegin(loc); r_obj* index_sizes = KEEP(r_alloc_raw(from_size * sizeof(r_ssize))); r_ssize* v_index_sizes = r_raw_begin(index_sizes); r_memset(v_index_sizes, 0, from_size * sizeof(r_ssize)); // First pass sums the `index` sizes. Using growables has too much // ambiguity when there are a large number of `from` elements. for (r_ssize i = 0; i < x_size; ++i) { int loc = v_loc[i]; if (loc != r_globals.na_int) { loc = has_from_flat_map ? v_from_flat_map[loc - 1] : loc - 1; ++v_index_sizes[loc]; } } // Direct pointer to the integer vectors we store in `indices` r_obj* v_indices = KEEP(r_alloc_raw(from_size * sizeof(int*))); int** v_v_indices = r_raw_begin(v_indices); // Allocate each `index` for (r_ssize i = 0; i < from_size; ++i) { const r_ssize index_size = v_index_sizes[i]; r_obj* index = r_alloc_integer(index_size); r_list_poke(indices, i, index); v_v_indices[i] = r_int_begin(index); } // Set `v_index_sizes` to `0`. We are going to reuse this // to now track the location we are inserting at for a particular index. r_memset(v_index_sizes, 0, from_size * sizeof(r_ssize)); r_ssize* v_index_locs = v_index_sizes; v_index_sizes = NULL; // Second pass inserts the `index` values for (r_ssize i = 0; i < x_size; ++i) { int loc = v_loc[i]; if (loc != r_globals.na_int) { loc = has_from_flat_map ? v_from_flat_map[loc - 1] : loc - 1; int* v_index = v_v_indices[loc]; const r_ssize index_loc = v_index_locs[loc]; ++v_index_locs[loc]; v_index[index_loc] = i + 1; } } FREE(4); return indices; } // Form `xs` and `indices` when `to` is a vector // // # Example // // Imagine you have: // // ``` // vec_recode_values( // x = c(1, 2, 3, 2), // from = c(3, 2), // to = c(-1, -2) // ) // #> [1] NA -2 -1 -2 // ``` // // To get this output, you `vec_match(x, from)` to get: // // ``` // vec_match(x, from) // #> [1] NA 2 1 2 // ``` // // Then compute `index_into_to` and `index_into_out` as: // // ``` // index_into_to = c(2, 1, 2) // index_into_out = c(2, 3, 4) // // vec_slice(to, index_into_to) // #> [1] -2 -1 -2 // ``` // // Now you can take `list(to)` and `list(index_into_out)` as `xs` and `indices` into // `list_combine()`. In theory the output could just be: // // ``` // vec_slice(to, vec_match(x, from)) // ``` // // but this is simplifying over a few things: // // - No `default` support // - No `unmatched` support // - We always make a "fresh" output container in `list_combine()` with // `vec_init()`, which has implications for clearing extraneous that might // have been on `to`. Ideally we do drop any extraneous attributes not // relevant for the class before returning the attribute. // // So in the end it seems simpler to just go through `list_combine()` for // all cases. // // # Safety // // Expect that all type and size checks have been done. static r_obj* build_xs_and_indices_for_to_as_vector( r_obj* x, r_obj* from_flat, r_obj* to, r_ssize x_size ) { r_obj* out = KEEP(r_alloc_list(2)); const bool na_equal = true; // Find locations where `to` will be utilized // Want `na_equal = true` for `vec_recode_values(x, from = NA, to = to)` r_obj* loc = KEEP(vec_match_params( x, from_flat, na_equal, vec_args.empty, vec_args.empty, r_lazy_null )); // TODO: Might be nice if `vec_match()` optionally reported the number of matches // as an `n` attribute like `vec_group_id()`, not sure how that interacts with // `na_equal = false` though, do propagated `NA` count as matches? const r_ssize size = r_int_count_complete(loc); r_obj* xs = r_alloc_list(1); r_list_poke(out, 0, xs); r_obj* indices = r_alloc_list(1); r_list_poke(out, 1, indices); // `to_size != 1` otherwise we would have used // `build_indices_for_single_to()`, so `to` instead has size of `from_size` // and must be resliced and placed in the correct order. if (size == x_size) { // It's fairly common to match every value in `x` when recoding, // so we optimize this case. You don't need to drop the `NA`s // out of the index before slicing (there aren't any), and you // can use a compact-seq for the assignment index. r_obj* index = compact_seq(0, x_size, true); r_list_poke(indices, 0, index); r_list_poke(xs, 0, vec_slice_unsafe(to, loc)); } else { const int* v_loc = r_int_cbegin(loc); r_obj* index = r_alloc_integer(size); r_list_poke(indices, 0, index); int* v_index = r_int_begin(index); for (r_ssize i = 0, j = 0; i < x_size && j < size; ++i) { const int elt = v_loc[i]; v_index[j] = elt; j += (elt != r_globals.na_int); } r_list_poke(xs, 0, vec_slice_unsafe(to, index)); // Now actually fill `index` for use with `list_combine()` for (r_ssize i = 0, j = 0; i < x_size && j < size; ++i) { const int elt = v_loc[i]; v_index[j] = i + 1; j += (elt != r_globals.na_int); } } FREE(2); return out; } // Form `indices` when `to` is length 1 // // This applies for all of these: // - `to = 1`, wrapped into `to = list(1)` // - `to = list(1)` // - `to = list(vec)`, where `vec` is `x_size` // // When you just have 1 `to` value, any match maps straight to this `to` value, // there is nothing to "reslice" like in the other cases so we get to skip that // step. // // # Safety // // Expect that all type and size checks have been done. static r_obj* build_indices_for_single_to(r_obj* x, r_obj* from_flat) { r_obj* indices = KEEP(r_alloc_list(1)); // Want `na_equal = true` for `vec_recode_values(x, from = NA, to = to)` const bool na_equal = true; // TODO: For a lower memory footprint, we could teach `list_combine()` how to // take `compact_condition` as `indices` elements and then give `vec_in()` an // option to return one of these. For 10 mil integer elements, this reduces // memory usage by 30mb and is ~10% faster. That also makes giving a "single // `to`" its own path even more compelling. // // Find locations where `to` will be utilized r_obj* index = vec_in( x, from_flat, na_equal, vec_args.empty, vec_args.empty, r_lazy_null ); r_list_poke(indices, 0, index); FREE(1); return indices; } // Builds a map from `from_flat` into `from` // // # Example // // When `from` is a flat vector, you have as many `index`es as `from` elements, // and `vec_match()` locations map directly to `index` locations. // // ``` // from = c(1, 3, 4, 2, 5) // index_locs = c(0, 1, 2, 3, 4) // ``` // // When `from` is a list of vectors, you have as many `index`es as `from` vectors. // We flatten `from` to perform the `vec_match()`, but then we need a little help // to map the match locations back to the `from` vector's `index` location. // // ``` // from = list(c(1, 3), c(4, 2, 5)) // from_flat = c(1, 3, 4, 2, 5) // from_flat_map = c(0, 0, 1, 1, 1) // index_locs = c(0, 1) // ``` static r_obj* build_from_flat_map(r_obj* from, r_ssize from_size) { r_obj* const* v_from = r_list_cbegin(from); // Cache sizes rather than calling `vec_size()` again, as that's // a little faster r_obj* from_sizes = KEEP(r_alloc_raw(from_size * sizeof(r_ssize))); r_ssize* v_from_sizes = r_raw_begin(from_sizes); r_ssize from_flat_map_size = 0; for (r_ssize i = 0; i < from_size; ++i) { r_obj* elt = v_from[i]; const r_ssize elt_size = vec_size(elt); v_from_sizes[i] = elt_size; from_flat_map_size += elt_size; } r_obj* from_flat_map = KEEP(r_alloc_raw(from_flat_map_size * sizeof(r_ssize))); r_ssize* v_from_flat_map = r_raw_begin(from_flat_map); r_ssize k = 0; for (r_ssize i = 0; i < from_size; ++i) { const r_ssize elt_size = v_from_sizes[i]; for (r_ssize j = 0; j < elt_size; ++j) { v_from_flat_map[k] = i; ++k; } } FREE(2); return from_flat_map; } // Repeats vector `to` to match `from_flat` // // # Example // // ``` // from = list(c(1, 3), c(2, 4, 5)) // to = c(0, 1) // ``` // // We flatten `from` and then repeat `to` to match its flattened form // so they stay aligned. // // ``` // from_flat = c(1, 3, 2, 4, 5) // to = c(0, 0, 1, 1, 1) // ``` static r_obj* build_repeated_to(r_obj* to, r_obj* from) { r_obj* from_sizes = KEEP(list_sizes(from, vec_args.empty, r_lazy_null)); to = vec_rep_each(to, from_sizes, r_lazy_null, vec_args.empty, vec_args.empty); FREE(1); return to; } static r_obj* ptype_finalize( r_obj* ptype, r_obj* to, r_obj* default_, bool to_as_list_of_vectors, struct vctrs_arg* p_to_arg, struct vctrs_arg* p_default_arg, struct r_lazy error_call ) { if (ptype != r_null) { // Performs scalar checks and whatnot return vec_ptype_final(ptype, vec_args.ptype, error_call); } // Otherwise `ptype` is `NULL`, and we determine it from `to` and `default` r_keep_loc ptype_pi; KEEP_HERE(ptype, &ptype_pi); if (to_as_list_of_vectors) { // Use only `to` and `p_to_arg` first for best errors // Not finalising `ptype` yet in case we need to incorporate `default`! ptype = vec_ptype_common( to, r_null, PTYPE_FINALISE_false, S3_FALLBACK_false, p_to_arg, error_call ); KEEP_AT(ptype, ptype_pi); // Now incorporate `default` and `p_default_arg` int _; ptype = vec_ptype2( default_, ptype, p_default_arg, vec_args.empty, error_call, S3_FALLBACK_false, &_ ); KEEP_AT(ptype, ptype_pi); } else { int _; ptype = vec_ptype2( to, default_, p_to_arg, p_default_arg, error_call, S3_FALLBACK_false, &_ ); KEEP_AT(ptype, ptype_pi); } // Finalize on the way out ptype = vec_ptype_finalise(ptype); FREE(1); return ptype; } vctrs/src/c.c0000644000176200001440000000216115113325071012566 0ustar liggesusers#include "vctrs.h" r_obj* ffi_vec_c(r_obj* ffi_call, r_obj* op, r_obj* args, r_obj* frame) { args = r_node_cdr(args); r_obj* xs = r_node_car(args); args = r_node_cdr(args); r_obj* ptype = r_node_car(args); args = r_node_cdr(args); r_obj* name_spec = r_node_car(args); args = r_node_cdr(args); r_obj* name_repair = r_node_car(args); struct r_lazy error_arg_lazy = { .x = syms.dot_error_arg, .env = frame }; struct vctrs_arg error_arg = new_lazy_arg(&error_arg_lazy); struct r_lazy error_call = { .x = syms.dot_error_call, .env = frame }; struct name_repair_opts name_repair_opts = new_name_repair_opts( name_repair, r_lazy_null, false, error_call ); KEEP(name_repair_opts.shelter); r_obj* out = vec_c(xs, ptype, name_spec, &name_repair_opts, &error_arg, error_call); FREE(1); return out; } r_obj* vec_c( r_obj* xs, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* name_repair, struct vctrs_arg* p_error_arg, struct r_lazy error_call ) { return list_combine_for_vec_c( xs, ptype, name_spec, name_repair, p_error_arg, error_call ); } vctrs/src/type-tibble.c0000644000176200001440000000300715120513137014564 0ustar liggesusers#include "vctrs.h" #include "type-data-frame.h" r_obj* tib_ptype2( r_obj* x, r_obj* y, struct vctrs_arg* p_x_arg, struct vctrs_arg* p_y_arg, struct r_lazy call, enum s3_fallback s3_fallback ) { r_obj* out = KEEP(df_ptype2( x, y, p_x_arg, p_y_arg, call, s3_fallback )); r_attrib_poke_class(out, classes_tibble); FREE(1); return out; } // [[ register() ]] r_obj* ffi_tib_ptype2(r_obj* x, r_obj* y, r_obj* ffi_x_arg_, r_obj* ffi_y_arg_, r_obj* frame) { struct vctrs_arg x_arg = vec_as_arg(ffi_x_arg_); struct vctrs_arg y_arg = vec_as_arg(ffi_y_arg_); struct r_lazy call = { .x = r_syms.call, .env = frame }; return tib_ptype2( x, y, &x_arg, &y_arg, call, S3_FALLBACK_false ); } // [[ include("type-tibble.h") ]] SEXP tib_cast(const struct cast_opts* opts) { SEXP out = PROTECT(df_cast_opts(opts)); Rf_setAttrib(out, R_ClassSymbol, classes_tibble); UNPROTECT(1); return out; } // [[ register() ]] r_obj* ffi_tib_cast(r_obj* x, r_obj* to, r_obj* ffi_x_arg, r_obj* ffi_to_arg, r_obj* frame) { struct vctrs_arg x_arg = vec_as_arg(ffi_x_arg); struct vctrs_arg to_arg = vec_as_arg(ffi_to_arg); const struct cast_opts opts = { .x = x, .to = to, .p_x_arg = &x_arg, .p_to_arg = &to_arg, .call = { .x = r_syms.call, .env = frame } }; return tib_cast(&opts); } vctrs/src/order.c0000644000176200001440000031134315156537555013507 0ustar liggesusers/* * The implementation of vec_order() is based on data.table’s forder() and their * earlier contribution to R’s order(). See LICENSE.note for more information. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at https://mozilla.org/MPL/2.0/. * * Copyright (c) 2020, RStudio * Copyright (c) 2020, Data table team */ #include "vctrs.h" #include "type-data-frame.h" #include "type-complex.h" #include "decl/order-decl.h" // ----------------------------------------------------------------------------- /* * High level description of `vec_order()` * * Heavily inspired by `radixsort.c` in base R and `forder()` from data.table * https://github.com/wch/r-source/blob/trunk/src/main/radixsort.c * https://github.com/Rdatatable/data.table/blob/master/src/forder.c * * Additional resources about radix sorting: * http://codercorner.com/RadixSortRevisited.htm * http://stereopsis.com/radix.html * * The very end of this has a MSB radix sort implementation * https://eternallyconfuzzled.com/sorting-c-introduction-to-the-automatic-ordering-of-data * * ----------------------------------------------------------------------------- * Integers * * This uses a combination of 3 ordering algorithms. * * - `int_order_insertion()` - An insertion sort is used when `x` is very * small. This has less overhead than the counting or radix sort and is * faster for small input. * * - `int_order_counting()` - A counting sort is used when `x` has a range * of less than `INT_ORDER_COUNTING_RANGE_BOUNDARY`. For integers with a * small range like this, the bucketing in the counting sort can be very * fast when compared with the recursive multipass approach of the radix sort. * * - `int_order_radix()` - A radix sort is used for everything else. * This is a MSB radix sort. It orders the vector 1 byte (8 bits) at a time, * so for a 4 byte int this makes a maximum of 4 passes over each integer. * It orders from most significant byte to least significant. After each * pass, there are 256 buckets (1 for each possible byte value). Each bucket * is then ordered separately on the next byte. This happens recursively for * the 4 passes. When the buckets get small enough in size, the insertion * sort is used to finish them off. * * For radix sorting, we have to use unsigned types for bit shifting to * work reliably. We map `int` to `uint32_t` in a way that preserves order, * and also handle `na_last` and `decreasing` in this mapping. This all happens * in `int_adjust()`. It is assumed and checked at load time that * `sizeof(int) == 4`. * * ----------------------------------------------------------------------------- * Doubles * * This uses a combination of 2 ordering algorithms: * * - `dbl_order_insertion()` - An insertion sort is used when `x` is very small. * * - `dbl_order_radix()` - This is similar to `int_order_radix()`, see above, * but makes a max of 8 passes over the data. * * For doubles, we assume `sizeof(double) == 8`, which should pretty much be * ensured by IEEE 754 specifications. * * For the mapping here, it is possible to map `double -> uint64_t` in an * order preserving way. This is very cool, and involves always flipping the * sign bit of the value, and flipping all other bits if the value was negative. * This is described more in: http://stereopsis.com/radix.html. * This is implemented in `dbl_adjust()` which also handles `na_last` and * `decreasing`. For `na_last`, we treat `NA_real_` and `NaN` equivalently. * Base R does as well, but data.table does not. * * ----------------------------------------------------------------------------- * Characters * * This uses a combination of 2 ordering algorithms: * * - `chr_order_insertion()` - An insertion sort is used when `x` is very small. * * - `chr_order_radix()` - Same principle as integer/double ordering, but we * iterate 1 character at a time. We assume a C locale here. Any non-ASCII and * non-UTF-8 strings are translated up front by `obj_encode_utf8()`. We have a * number of short cuts for trying to exit out of this early so that we don't * have to look at every byte of the string. * * ----------------------------------------------------------------------------- * Logicals * * Uses the same infrastructure as integers. Because the number of possible * unique values is low, this will always use either an insertion sort for * small vectors, or a counting sort for large ones. * * ----------------------------------------------------------------------------- * Complex * * We treat complex as a data frame of two double columns. We order the * real part first using `dbl_order_chunk()`, then order the imaginary part also * using `dbl_order_chunk()`. * * ----------------------------------------------------------------------------- * Data frames * * Multi-column data frame ordering uses the same principle as MSB ordering. * It starts with the first column (the most "significant" one) and orders it. * While ordering the column, group sizes are tracked ("groups" are duplicate * values in the column). The next column is broken into chunks corresponding * to these group sizes from the first column, and the chunks are ordered * individually. While ordering the chunks of the 2nd column, group sizes are * again tracked to use in subsequent columns. */ // ----------------------------------------------------------------------------- #define UINT8_MAX_SIZE (UINT8_MAX + 1) #define UINT8_MAX_SIZE_HALVED (UINT8_MAX + 1) / 2 /* * Maximum number of passes required to completely sort ints and doubles */ #define INT_MAX_RADIX_PASS 4 #define DBL_MAX_RADIX_PASS 8 /* * Maximum range allowed when deciding whether or not to use a counting sort * vs a radix sort. Counting sort is somewhat faster when less than this * boundary value. */ #define INT_ORDER_COUNTING_RANGE_BOUNDARY 100000 /* * Size of `x` that determines when an insertion sort should be used. Seems * to work better than 256 (from limited testing), base R uses 200. * Somewhat based on this post: * https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/ */ #define ORDER_INSERTION_BOUNDARY 128 /* * Adjustments for translating current `pass` into the current `radix` byte * that we need to shift to. */ #define PASS_TO_RADIX(X, MAX) (MAX - 1 - X) #define SHIFT_ADJUSTMENT -CHAR_BIT // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_order( SEXP x, SEXP direction, SEXP na_value, SEXP nan_distinct, SEXP chr_proxy_collate ) { bool c_nan_distinct = parse_nan_distinct(nan_distinct); return vec_order(x, direction, na_value, c_nan_distinct, chr_proxy_collate); } // [[ include("order.h") ]] SEXP vec_order( SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate ) { const bool group_sizes = false; SEXP info = vec_order_info_impl(x, direction, na_value, nan_distinct, chr_proxy_collate, group_sizes); return r_list_get(info, 0); } // ----------------------------------------------------------------------------- // [[ register() ]] SEXP vctrs_locate_sorted_groups( SEXP x, SEXP direction, SEXP na_value, SEXP nan_distinct, SEXP chr_proxy_collate ) { bool c_nan_distinct = parse_nan_distinct(nan_distinct); return vec_locate_sorted_groups( x, direction, na_value, c_nan_distinct, chr_proxy_collate ); } static SEXP vec_locate_sorted_groups( SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate ) { SEXP info = KEEP(vec_order_info( x, direction, na_value, nan_distinct, chr_proxy_collate )); SEXP o = r_list_get(info, 0); const int* p_o = r_int_cbegin(o); SEXP sizes = r_list_get(info, 1); const int* p_sizes = r_int_cbegin(sizes); r_ssize n_groups = r_length(sizes); SEXP loc = KEEP(r_alloc_list(n_groups)); SEXP key_loc = KEEP(r_alloc_integer(n_groups)); int* p_key_loc = r_int_begin(key_loc); int start = 0; for (r_ssize i = 0; i < n_groups; ++i) { p_key_loc[i] = p_o[start]; const int size = p_sizes[i]; SEXP elt = r_alloc_integer(size); r_list_poke(loc, i, elt); int* p_elt = r_int_begin(elt); R_len_t k = 0; for (int j = 0; j < size; ++j) { p_elt[k] = p_o[start]; ++start; ++k; } } SEXP key = KEEP(vec_slice(x, key_loc)); // Construct output data frame SEXP out = KEEP(r_alloc_list(2)); r_list_poke(out, 0, key); r_list_poke(out, 1, loc); SEXP names = KEEP(r_alloc_character(2)); r_chr_poke(names, 0, strings_key); r_chr_poke(names, 1, strings_loc); r_attrib_poke(out, r_syms.names, names); out = new_data_frame(out, n_groups); FREE(6); return out; } // ----------------------------------------------------------------------------- /* * Returns a list of size three. * - The first element of the list contains the ordering as an integer vector. * - The second element of the list contains the group sizes as an integer * vector. * - The third element of the list contains the max group size as an integer. */ // [[ include("order.h") ]] SEXP vec_order_info( SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate ) { const bool group_sizes = true; return vec_order_info_impl(x, direction, na_value, nan_distinct, chr_proxy_collate, group_sizes); } // [[ register() ]] SEXP vctrs_order_info( SEXP x, SEXP direction, SEXP na_value, SEXP nan_distinct, SEXP chr_proxy_collate ) { bool c_nan_distinct = parse_nan_distinct(nan_distinct); return vec_order_info(x, direction, na_value, c_nan_distinct, chr_proxy_collate); } static SEXP vec_order_info_impl( SEXP x, SEXP direction, SEXP na_value, bool nan_distinct, SEXP chr_proxy_collate, bool group_sizes ) { // TODO call struct r_lazy call = r_lazy_null; int n_prot = 0; SEXP decreasing = PROTECT_N(parse_direction(direction), &n_prot); SEXP na_largest = PROTECT_N(parse_na_value(na_value), &n_prot); // Call on `x` before potentially flattening cols with `vec_proxy_order()` SEXP args = PROTECT_N(vec_order_expand_args(x, decreasing, na_largest), &n_prot); R_len_t arg_size = vec_size_common(args, 0, vec_args.empty, call); args = PROTECT_N(vec_recycle_common(args, arg_size, vec_args.empty, call), &n_prot); decreasing = VECTOR_ELT(args, 0); na_largest = VECTOR_ELT(args, 1); SEXP na_last = PROTECT_N(vec_order_compute_na_last(na_largest, decreasing), &n_prot); SEXP proxy = PROTECT_N(vec_proxy_order(x), &n_prot); proxy = PROTECT_N(obj_encode_utf8(proxy), &n_prot); proxy = PROTECT_N(proxy_apply_chr_proxy_collate(proxy, chr_proxy_collate), &n_prot); r_ssize size = vec_size(proxy); const enum vctrs_type type = vec_proxy_typeof(proxy); // Compute the maximum size required for auxiliary working memory const size_t n_bytes_lazy_raw = vec_compute_n_bytes_lazy_raw(proxy, type); // Auxiliary vectors to hold intermediate results while ordering. // If `x` is a data frame we allocate enough room for the largest column type. struct lazy_raw* p_lazy_x_chunk = new_lazy_raw(size, n_bytes_lazy_raw); PROTECT_LAZY_VEC(p_lazy_x_chunk, &n_prot); struct lazy_raw* p_lazy_x_aux = new_lazy_raw(size, n_bytes_lazy_raw); PROTECT_LAZY_VEC(p_lazy_x_aux, &n_prot); struct lazy_raw* p_lazy_o_aux = new_lazy_raw(size, sizeof(int)); PROTECT_LAZY_VEC(p_lazy_o_aux, &n_prot); struct lazy_raw* p_lazy_bytes = new_lazy_raw(size, sizeof(uint8_t)); PROTECT_LAZY_VEC(p_lazy_bytes, &n_prot); // Compute the maximum size of the `counts` vector needed during radix // ordering. 4 * 256 for integers, 8 * 256 for doubles, not used for characters // since the number of iterations depends on string length. size_t n_bytes_lazy_counts = vec_compute_n_bytes_lazy_counts(proxy, type); r_ssize size_lazy_counts = UINT8_MAX_SIZE * n_bytes_lazy_counts; struct lazy_raw* p_lazy_counts = new_lazy_raw(size_lazy_counts, sizeof(r_ssize)); PROTECT_LAZY_VEC(p_lazy_counts, &n_prot); // Determine if group tracking can be turned off. // We turn if off if ordering non-data frame input as long as // locations haven't been requested by the user. // It is more efficient to ignore it when possible. bool force_groups = group_sizes; bool ignore_groups = force_groups ? false : (is_data_frame(proxy) ? false : true); // Construct the two sets of group info needed for tracking groups. // We switch between them after each data frame column is processed. struct group_info* p_group_info0 = new_group_info(); PROTECT_GROUP_INFO(p_group_info0, &n_prot); struct group_info* p_group_info1 = new_group_info(); PROTECT_GROUP_INFO(p_group_info1, &n_prot); struct group_infos* p_group_infos = new_group_infos( p_group_info0, p_group_info1, size, force_groups, ignore_groups ); PROTECT_GROUP_INFOS(p_group_infos, &n_prot); struct order* p_order = new_order(size); PROTECT_ORDER(p_order, &n_prot); vec_order_switch( proxy, decreasing, na_last, nan_distinct, size, type, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); SEXP out = PROTECT_N(r_alloc_list(3), &n_prot); r_list_poke(out, 0, p_order->data); if (group_sizes) { struct group_info* p_group_info = groups_current(p_group_infos); SEXP sizes = p_group_info->data; sizes = r_int_resize(sizes, p_group_info->n_groups); r_list_poke(out, 1, sizes); r_list_poke(out, 2, r_int((int) p_group_info->max_group_size)); } UNPROTECT(n_prot); return out; } // ----------------------------------------------------------------------------- static void vec_order_switch( SEXP x, SEXP decreasing, SEXP na_last, bool nan_distinct, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { if (type == VCTRS_TYPE_dataframe) { df_order( x, decreasing, na_last, nan_distinct, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); return; } if (r_length(decreasing) != 1) { Rf_errorcall( R_NilValue, "Internal error: Size of decreasing != 1, but " "`vec_order_expand_args()` didn't catch it." ); } if (r_length(na_last) != 1) { Rf_errorcall( R_NilValue, "Internal error: Size of na_last != 1, but " "`vec_order_expand_args()` didn't catch it." ); } bool c_decreasing = LOGICAL(decreasing)[0]; bool c_na_last = LOGICAL(na_last)[0]; vec_order_base_switch( x, c_decreasing, c_na_last, nan_distinct, size, type, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } // ----------------------------------------------------------------------------- // Used on bare vectors and the first column of data frame `x`s static void vec_order_base_switch( SEXP x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, const enum vctrs_type type, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { switch (type) { case VCTRS_TYPE_integer: { int_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_logical: { lgl_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_double: { dbl_order( x, decreasing, na_last, nan_distinct, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_complex: { cpl_order( x, decreasing, na_last, nan_distinct, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_character: { chr_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_group_infos ); break; } case VCTRS_TYPE_dataframe: { Rf_errorcall(R_NilValue, "Internal error: Data frames should have been handled by now"); } default: { Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`."); } } } // ----------------------------------------------------------------------------- /* * These are the main entry points for integer ordering. They are nearly * identical except `int_order()` assumes that `p_x` cannot be * modified directly and is user input. * * `int_order_chunk()` assumes `p_x` is modifiable by reference. It is called * when iterating over data frame columns and `p_x` is the 2nd or greater * column, in which case `p_x` is really a chunk of that column that has been * copied into `x_chunk`. * * `int_order()` assumes `p_x` is user input which cannot be modified. * It copies `x` into another SEXP that can be modified directly unless a * counting sort is going to be used, in which case `p_x` can be used directly. */ static void int_order_chunk( bool decreasing, bool na_last, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { void* p_x_chunk = p_lazy_x_chunk->p_data; const enum vctrs_sortedness sortedness = int_sortedness( p_x_chunk, size, decreasing, na_last, p_group_infos ); if (sortedness != VCTRS_SORTEDNESS_unsorted) { ord_resolve_sortedness_chunk(sortedness, size, p_o); return; } if (size <= ORDER_INSERTION_BOUNDARY) { int_adjust(decreasing, na_last, size, p_x_chunk); int_order_insertion(size, p_x_chunk, p_o, p_group_infos); return; } int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint32_t range; int x_min; int_compute_range(p_x_chunk, size, &x_min, &range); /* * If in counting order range and on the second or higher column, we will * need `p_o_aux` as working memory. At this point, `p_o` will have been * initialized from ordering the first column. */ if (range < INT_ORDER_COUNTING_RANGE_BOUNDARY) { const bool initialized = true; int_order_counting( p_x_chunk, size, x_min, range, initialized, decreasing, na_last, p_o, p_o_aux, p_group_infos ); return; } uint32_t* p_x_aux = (uint32_t*) init_lazy_raw(p_lazy_x_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); r_memset(p_counts, 0, p_lazy_counts->size); int_adjust(decreasing, na_last, size, p_x_chunk); int_order_radix( size, p_x_chunk, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } static void int_order( SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { const int* p_x = INTEGER_RO(x); const enum vctrs_sortedness sortedness = int_sortedness( p_x, size, decreasing, na_last, p_group_infos ); // Handle sorted cases and set ordering to initialized if (sortedness != VCTRS_SORTEDNESS_unsorted) { int* p_o = p_order->p_data; ord_resolve_sortedness(sortedness, size, p_o); p_order->initialized = true; return; } if (size <= ORDER_INSERTION_BOUNDARY) { int* p_o = init_order(p_order); void* p_x_chunk = init_lazy_raw(p_lazy_x_chunk); r_memcpy(p_x_chunk, p_x, size * sizeof(*p_x)); int_adjust(decreasing, na_last, size, p_x_chunk); int_order_insertion(size, p_x_chunk, p_o, p_group_infos); return; } uint32_t range; int x_min; int_compute_range(p_x, size, &x_min, &range); /* * If in counting order range and on the first column / single vector, * `p_o_aux` won't be used, so no need to initialize it. * * Also, `p_o` will be filled directly, so for performance we don't * initialize its order. */ if (range < INT_ORDER_COUNTING_RANGE_BOUNDARY) { const bool initialized = false; int* p_o = p_order->p_data; int* p_o_aux = NULL; int_order_counting( p_x, size, x_min, range, initialized, decreasing, na_last, p_o, p_o_aux, p_group_infos ); p_order->initialized = true; return; } int* p_o = init_order(p_order); int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint32_t* p_x_aux = (uint32_t*) init_lazy_raw(p_lazy_x_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); r_memset(p_counts, 0, p_lazy_counts->size); // Copy `x` so we can modify in place void* p_x_chunk = init_lazy_raw(p_lazy_x_chunk); r_memcpy(p_x_chunk, p_x, size * sizeof(*p_x)); int_adjust(decreasing, na_last, size, p_x_chunk); int_order_radix( size, p_x_chunk, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } // ----------------------------------------------------------------------------- /* * - Shifts the integer elements of `p_x` in a way that correctly maintains * ordering for `na_last` and `decreasing` * * - After shifting, also maps the elements from `int32_t` to `uint32_t` and * stores them back in `p_x`. * * - Used before both the integer insertion sort and radix sort, which both * expect their input to already have been "adjusted" for `na_last` and * `decreasing` and expect a `uint32_t` pointer input. * * - If `na_last = true`, `NA` is always the maximum element, so we set it to * `UINT32_MAX`. In that case, we also shift all non-NA values down by 1 to * make room for it (defined by `na_shift`). * * - If `na_last = false`, we set `NA` to the minimum element of 0. * * - The multiplication by `direction` applies to non-NA values and correctly * orders inputs based on whether we are in a decreasing order or not. */ static void int_adjust( const bool decreasing, const bool na_last, const r_ssize size, void* p_x ) { const int direction = decreasing ? -1 : 1; const uint32_t na_u32 = na_last ? UINT32_MAX : 0; const int na_shift = na_last ? -1 : 0; const int* p_x_int = (const int*) p_x; uint32_t* p_x_u32 = (uint32_t*) p_x; for (r_ssize i = 0; i < size; ++i) { int elt = p_x_int[i]; if (elt == NA_INTEGER) { p_x_u32[i] = na_u32; continue; } elt = elt * direction + na_shift; p_x_u32[i] = int_map_to_uint32(elt); } } #define HEX_UINT32_SIGN_BIT 0x80000000u // Flipping the sign bit is all we need to do to map in an order preserving way. // [INT32_MIN, INT32_MAX] => [0, UINT32_MAX] static inline uint32_t int_map_to_uint32(int x) { return ((uint32_t) x) ^ HEX_UINT32_SIGN_BIT; } #undef HEX_UINT32_SIGN_BIT // ----------------------------------------------------------------------------- /* * `int_compute_range()` computes the range of all values in `p_x`. * It is used by counting sort to computes buckets with `p_x[i] - x_min`. * * - `p_range` and `p_x_min` are updated on the way out to retain both the * range and the minimum value. * * - `NA` values are skipped over. If all values are `NA`, we defer to radix * sort (which definitely can handle that case) by returning a `range` of the * maximum uint32 value (which will be greater than * INT_ORDER_COUNTING_RANGE_BOUNDARY). */ static void int_compute_range( const int* p_x, r_ssize size, int* p_x_min, uint32_t* p_range ) { uint32_t range = UINT32_MAX; int x_min = NA_INTEGER; int x_max = NA_INTEGER; r_ssize i = 0; // Find first non-NA value while (i < size) { const int elt = p_x[i]; if (elt == NA_INTEGER) { ++i; continue; } x_min = elt; x_max = elt; range = 0; // Bump to next `i` since we know this one's value ++i; break; } // All NAs - Return max range to signal to use radix sort if (x_min == NA_INTEGER) { *p_x_min = x_min; *p_range = range; return; } // Now that we have initial values, iterate through the rest // to compute the final min/max. for (r_ssize j = i; j < size; ++j) { const int elt = p_x[j]; if (elt == NA_INTEGER) { continue; } if (elt > x_max) { x_max = elt; } else if (elt < x_min) { x_min = elt; } } /* * - Max possible range is from * `c(.Machine$integer.max, -.Machine$integer.max)` which is exactly the * max of a `uint32_t`. * - We need to go up to `intmax_t` to avoid intermediate overflow. * - `+ 1` to get an inclusive range on both ends. */ range = (uint32_t) r__intmax_add(r__intmax_subtract(x_max, x_min), 1); *p_x_min = x_min; *p_range = range; } // ----------------------------------------------------------------------------- /* * The counting sort expects `p_x` to be unadjusted (i.e. `int_adjust()` has * not been used). It handles `decreasing` and `na_last` internally. * * Counting sort is used when `p_x` has a range less than * `INT_ORDER_COUNTING_RANGE_BOUNDARY`. In these cases radix sort * doesn't spread out values as much when looking at individual radixes. * * Counting sort does not modify `p_x` in any way. */ static void int_order_counting( const int* p_x, r_ssize size, int x_min, uint32_t range, bool initialized, bool decreasing, bool na_last, int* p_o, int* p_o_aux, struct group_infos* p_group_infos ) { // - Only allocate this once (counts are reset to 0 at end) // - Allocating as static allows us to allocate an array this large // - `+ 1` to ensure there is room for the extra `NA` bucket static r_ssize p_counts[INT_ORDER_COUNTING_RANGE_BOUNDARY + 1] = { 0 }; // `NA` values get counted in the last used bucket uint32_t na_bucket = range; r_ssize na_count = 0; // Sanity check if (range > INT_ORDER_COUNTING_RANGE_BOUNDARY) { Rf_errorcall(R_NilValue, "Internal error: `range > INT_ORDER_COUNTING_RANGE_BOUNDARY`."); } // Histogram pass for (r_ssize i = 0; i < size; ++i) { const int elt = p_x[i]; if (elt == NA_INTEGER) { ++na_count; } else { const uint32_t bucket = elt - x_min; ++p_counts[bucket]; } } // Add `NA` counts once at the end p_counts[na_bucket] = na_count; r_ssize cumulative = 0; // Handle decreasing/increasing by altering the order in which // counts are accumulated const int direction = decreasing ? -1 : 1; r_ssize j = decreasing ? range - 1 : 0; // `na_last = false` pushes NA counts to the front if (!na_last && na_count != 0) { p_counts[na_bucket] = cumulative; cumulative += na_count; groups_size_maybe_push(na_count, p_group_infos); } // Accumulate counts, skip zeros for (uint32_t i = 0; i < range; ++i) { r_ssize count = p_counts[j]; if (count == 0) { j += direction; continue; } // Insert current cumulative value, then increment p_counts[j] = cumulative; cumulative += count; // At this point we will handle this group completely groups_size_maybe_push(count, p_group_infos); j += direction; } // `na_last = true` pushes NA counts to the back if (na_last && na_count != 0) { p_counts[na_bucket] = cumulative; groups_size_maybe_push(na_count, p_group_infos); } // If order is not initialized, we are on the first column / atomic vector // and can place the order directly into the result. Much faster than // initializing, placing in `p_o_aux`, and copying back over. if (initialized) { for (r_ssize i = 0; i < size; ++i) { const int elt = p_x[i]; uint32_t bucket = (elt == NA_INTEGER) ? na_bucket : elt - x_min; const r_ssize loc = p_counts[bucket]++; p_o_aux[loc] = p_o[i]; } r_memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); } else { for (r_ssize i = 0; i < size; ++i) { const int elt = p_x[i]; uint32_t bucket = (elt == NA_INTEGER) ? na_bucket : elt - x_min; const r_ssize loc = p_counts[bucket]++; p_o[loc] = i + 1; } } // Reset counts for next column. // Only reset what we might have touched. // `+ 1` to reset the NA bucket too. r_memset(p_counts, 0, (range + 1) * sizeof(r_ssize)); } // ----------------------------------------------------------------------------- /* * `int_order_insertion()` is used in two ways: * - It is how we "finish off" radix sorts rather than deep recursion. * - If we have an original `x` input that is small enough, we just immediately * insertion sort it. * * For small inputs, it is much faster than deeply recursing with * radix ordering. * * Insertion ordering expects that `p_x` has been adjusted with `int_adjust()` * which takes care of `na_last` and `decreasing` and also maps `int32_t` to * `uint32_t` ahead of time. */ static void int_order_insertion( const r_ssize size, uint32_t* p_x, int* p_o, struct group_infos* p_group_infos ) { // Don't think this can occur, but safer this way if (size == 0) { return; } for (r_ssize i = 1; i < size; ++i) { const uint32_t x_elt = p_x[i]; const int o_elt = p_o[i]; r_ssize j = i - 1; while (j >= 0) { const uint32_t x_cmp_elt = p_x[j]; if (x_elt >= x_cmp_elt) { break; } int o_cmp_elt = p_o[j]; // Swap p_x[j + 1] = x_cmp_elt; p_o[j + 1] = o_cmp_elt; // Next --j; } // Place original elements in new location // closer to start of the vector p_x[j + 1] = x_elt; p_o[j + 1] = o_elt; } // We've ordered a small chunk, we need to push at least one group size. // Depends on the post-ordered results so we have to do this // in a separate loop. r_ssize group_size = 1; uint32_t previous = p_x[0]; for (r_ssize i = 1; i < size; ++i) { const uint32_t current = p_x[i]; // Continue the current group run if (current == previous) { ++group_size; continue; } // Push current run size and reset size tracker groups_size_maybe_push(group_size, p_group_infos); group_size = 1; previous = current; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); } // ----------------------------------------------------------------------------- /* * Integer radix ordering entry point * * Expects that `int_adjust()` has been called on `p_x`, which takes care * of `na_last` and `decreasing` and also maps `int32_t` to `uint32_t` once * up front so we don't have to do it for each radix pass. * * Sorts `p_x` and `p_o` in place */ static void int_order_radix( const r_ssize size, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos ) { bool p_skips[INT_MAX_RADIX_PASS]; uint8_t pass = int_compute_skips(p_x, size, p_skips); // Skipped all passes - Happens when `x` is 1 value repeated if (pass == INT_MAX_RADIX_PASS) { groups_size_maybe_push(size, p_group_infos); return; } int_order_radix_recurse( size, pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_skips, p_group_infos ); } // ----------------------------------------------------------------------------- /* * Recursive function for radix ordering. Orders the current byte, then iterates * over the sub groups and recursively calls itself on each subgroup to order * the next byte. */ static void int_order_radix_recurse( const r_ssize size, const uint8_t pass, uint32_t* p_x, int* p_o, uint32_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos ) { // Exit as fast as possible if we are below the insertion order boundary if (size <= ORDER_INSERTION_BOUNDARY) { int_order_insertion(size, p_x, p_o, p_group_infos); return; } // Skip passes where our up front check told us that all bytes were the same uint8_t next_pass = pass + 1; r_ssize* p_counts_next_pass = p_counts + UINT8_MAX_SIZE; while (next_pass < INT_MAX_RADIX_PASS && p_skips[next_pass]) { ++next_pass; p_counts_next_pass += UINT8_MAX_SIZE; } const uint8_t radix = PASS_TO_RADIX(pass, INT_MAX_RADIX_PASS); const uint8_t shift = radix * 8; uint8_t byte = 0; // Histogram for this pass for (r_ssize i = 0; i < size; ++i) { const uint32_t x_elt = p_x[i]; byte = int_extract_uint32_byte(x_elt, shift); p_bytes[i] = byte; ++p_counts[byte]; } // Fast check to see if all bytes were the same. // If so, skip this `pass` since we learned nothing. // No need to accumulate counts and iterate over chunks, // we know all others are zero. if (p_counts[byte] == size) { // Reset count for other group chunks p_counts[byte] = 0; if (next_pass == INT_MAX_RADIX_PASS) { // If we are already at the last pass, we are done groups_size_maybe_push(size, p_group_infos); } else { // Otherwise, recurse on next byte using the same `size` since // the group size hasn't changed int_order_radix_recurse( size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); } return; } r_ssize cumulative = 0; // Accumulate counts, skip zeros for (uint16_t i = 0; i < UINT8_MAX_SIZE; ++i) { r_ssize count = p_counts[i]; if (count == 0) { continue; } // Replace with `cumulative` first, then bump `cumulative`. // `p_counts` now represents starting locations for each radix group. p_counts[i] = cumulative; cumulative += count; } // Place into auxiliary arrays in the correct order, then copy back over for (r_ssize i = 0; i < size; ++i) { const uint8_t byte = p_bytes[i]; const r_ssize loc = p_counts[byte]++; p_o_aux[loc] = p_o[i]; p_x_aux[loc] = p_x[i]; } // Copy back over r_memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); r_memcpy(p_x, p_x_aux, size * sizeof(*p_x_aux)); r_ssize last_cumulative_count = 0; // Recurse on subgroups as required for (uint16_t i = 0; last_cumulative_count < size && i < UINT8_MAX_SIZE; ++i) { const r_ssize cumulative_count = p_counts[i]; if (!cumulative_count) { continue; } // Set to zero to clear for subsequent groups p_counts[i] = 0; // Diff the accumulated counts to get the radix group size const r_ssize group_size = cumulative_count - last_cumulative_count; last_cumulative_count = cumulative_count; if (group_size == 1) { groups_size_maybe_push(1, p_group_infos); ++p_x; ++p_o; continue; } // Can get here in the case of ties, like c(1L, 1L), which have a // `group_size` of 2 in the last radix, but there is nothing left to // compare so we are done. if (next_pass == INT_MAX_RADIX_PASS) { groups_size_maybe_push(group_size, p_group_infos); p_x += group_size; p_o += group_size; continue; } // Order next byte of this subgroup int_order_radix_recurse( group_size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); p_x += group_size; p_o += group_size; } } // ----------------------------------------------------------------------------- /* * Do a parallel histogram run over all 4 passes to determine if any passes * can be skipped (because all bytes were the same) */ static uint8_t int_compute_skips(const uint32_t* p_x, r_ssize size, bool* p_skips) { uint8_t radix_start = PASS_TO_RADIX(0, INT_MAX_RADIX_PASS); uint8_t shift_start = radix_start * 8; for (uint8_t i = 0; i < INT_MAX_RADIX_PASS; ++i) { p_skips[i] = true; } uint8_t p_bytes[INT_MAX_RADIX_PASS]; const uint32_t elt0 = p_x[0]; // Get bytes of first element in MSD->LSD order. // Placed in `p_bytes` in a way that aligns with the `pass` variable for (uint8_t pass = 0, shift = shift_start; pass < INT_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { p_bytes[pass] = int_extract_uint32_byte(elt0, shift); } // Check to see which passes are skippable for (r_ssize i = 1; i < size; ++i) { uint8_t n_skips = INT_MAX_RADIX_PASS; const uint32_t elt = p_x[i]; for (uint8_t pass = 0, shift = shift_start; pass < INT_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { bool skip = p_skips[pass]; if (skip) { p_skips[pass] = (p_bytes[pass] == int_extract_uint32_byte(elt, shift)); } else { --n_skips; } } // No passes are skippable if (n_skips == 0) { break; } } uint8_t pass = 0; // Shift forward to the first pass with varying bytes while (pass < INT_MAX_RADIX_PASS && p_skips[pass]) { ++pass; } return pass; } // ----------------------------------------------------------------------------- // Bytes will be extracted 8 bits at a time. // This is a MSB radix sort, so they are extracted MSB->LSB. static inline uint8_t int_extract_uint32_byte(uint32_t x, uint8_t shift) { return (x >> shift) & UINT8_MAX; } // ----------------------------------------------------------------------------- /* * Entry points for logical ordering. These just use integer infrastructure. */ static void lgl_order_chunk( bool decreasing, bool na_last, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { int_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void lgl_order( SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { int_order( x, decreasing, na_last, size, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } // ----------------------------------------------------------------------------- /* * These are the main entry points for double ordering. They are nearly * identical except `dbl_order()` assumes that `p_x` cannot be * modified directly and is user input. * * `dbl_order_chunk()` assumes `p_x` is modifiable by reference. It is called * when iterating over data frame columns and `p_x` is the 2nd or greater * column, in which case `p_x` is really a chunk of that column that has been * copied into `x_chunk`. * * `dbl_order()` assumes `p_x` is user input which cannot be modified. * It copies `x` into another SEXP that can be modified directly. * * Unlike `int_order_chunk()`, there is no intermediate counting sort, as it is * sort of unclear how to compute the range of a double vector in the same * way, and even after adjusting to a `uint64_t`, it is unlikely that they * have a very small range of values. */ static void dbl_order_chunk( bool decreasing, bool na_last, bool nan_distinct, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { void* p_x_chunk = p_lazy_x_chunk->p_data; dbl_order_chunk_impl( decreasing, na_last, nan_distinct, size, p_x_chunk, p_o, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } static void dbl_order( SEXP x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { const double* p_x = REAL_RO(x); dbl_order_impl( p_x, decreasing, na_last, nan_distinct, size, true, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); } /* * Used by `dbl_order_chunk()` and by `cpl_order()` * * `dbl_order_chunk_impl()` also deals with sortedness since we don't have an up * front sortedness check on complex vectors. */ static void dbl_order_chunk_impl( bool decreasing, bool na_last, bool nan_distinct, r_ssize size, void* p_x, int* p_o, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { const enum vctrs_sortedness sortedness = dbl_sortedness( p_x, size, decreasing, na_last, nan_distinct, p_group_infos ); if (sortedness != VCTRS_SORTEDNESS_unsorted) { ord_resolve_sortedness_chunk(sortedness, size, p_o); return; } dbl_adjust(decreasing, na_last, nan_distinct, size, p_x); if (size <= ORDER_INSERTION_BOUNDARY) { dbl_order_insertion(size, p_x, p_o, p_group_infos); return; } uint64_t* p_x_aux = (uint64_t*) init_lazy_raw(p_lazy_x_aux); int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); r_memset(p_counts, 0, p_lazy_counts->size); dbl_order_radix( size, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } /* * Used by `dbl_order()` and by `cpl_order()` * * Unlike `int_order_impl()`, `dbl_order_impl()` also deals with sortedness * since we don't have an up front sortedness check on complex vectors. * * When dealing with complex vectors, `p_x` and `p_lazy_x_chunk->p_data` will * already point to the same memory. In this case, we don't need to copy `p_x` * into `p_lazy_x_chunk`, so we set `copy = false` which tells * `dbl_order_impl()` to just use `p_lazy_x_chunk` directly. */ static void dbl_order_impl( const double* p_x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, bool copy, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { const enum vctrs_sortedness sortedness = dbl_sortedness( p_x, size, decreasing, na_last, nan_distinct, p_group_infos ); // Handle sorted cases and set ordering to initialized if (sortedness != VCTRS_SORTEDNESS_unsorted) { int* p_o = p_order->p_data; ord_resolve_sortedness(sortedness, size, p_o); p_order->initialized = true; return; } int* p_o = init_order(p_order); void* p_x_chunk; if (copy) { p_x_chunk = init_lazy_raw(p_lazy_x_chunk); r_memcpy(p_x_chunk, p_x, size * sizeof(*p_x)); } else { p_x_chunk = p_lazy_x_chunk->p_data; } dbl_adjust(decreasing, na_last, nan_distinct, size, p_x_chunk); if (size <= ORDER_INSERTION_BOUNDARY) { dbl_order_insertion(size, p_x_chunk, p_o, p_group_infos); return; } uint64_t* p_x_aux = (uint64_t*) init_lazy_raw(p_lazy_x_aux); int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); r_ssize* p_counts = (r_ssize*) init_lazy_raw(p_lazy_counts); r_memset(p_counts, 0, p_lazy_counts->size); dbl_order_radix( size, p_x_chunk, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_group_infos ); } // ----------------------------------------------------------------------------- /* * When mapping double -> uint64_t: * * Smallest possible value comes from: * dbl_map_to_uint64(-Inf) -> 4503599627370495 * * One larger is: * dbl_map_to_uint64(-.Machine$double.xmax) -> 4503599627370496 * * Largest possible value comes from: * dbl_map_to_uint64(Inf) -> 18442240474082181120 * * One smaller is: * dbl_map_to_uint64(.Machine$double.xmax) -> 18442240474082181119 * * This gives us room to manually map: * If (!nan_distinct): * dbl_map_to_uint64(NA_real_) -> UINT64_MAX (or 0 if `na_last = false`) * dbl_map_to_uint64(NaN) -> UINT64_MAX (or 0 if `na_last = false`) * If (nan_distinct): * dbl_map_to_uint64(NA_real_) -> UINT64_MAX (or 0 if `na_last = false`) * dbl_map_to_uint64(NaN) -> UINT64_MAX - 1 (or 1 if `na_last = false`) * When using `nan_distinct`, NaN is always ordered between NA_real_ and * non-missing numbers, regardless of `decreasing`. */ static void dbl_adjust( const bool decreasing, const bool na_last, const bool nan_distinct, const r_ssize size, void* p_x ) { double* p_x_dbl = (double*) p_x; uint64_t* p_x_u64 = (uint64_t*) p_x; if (nan_distinct) { dbl_adjust_nan_distinct(decreasing, na_last, size, p_x_dbl, p_x_u64); } else { dbl_adjust_nan_identical(decreasing, na_last, size, p_x_dbl, p_x_u64); } } static inline void dbl_adjust_nan_identical( const bool decreasing, const bool na_last, const r_ssize size, double* p_x_dbl, uint64_t* p_x_u64 ) { const int direction = decreasing ? -1 : 1; const uint64_t na_u64 = na_last ? UINT64_MAX : 0; for (r_ssize i = 0; i < size; ++i) { double elt = p_x_dbl[i]; if (isnan(elt)) { p_x_u64[i] = na_u64; continue; } elt = elt * direction; p_x_u64[i] = dbl_map_to_uint64(elt); } } static inline void dbl_adjust_nan_distinct( const bool decreasing, const bool na_last, const r_ssize size, double* p_x_dbl, uint64_t* p_x_u64 ) { const int direction = decreasing ? -1 : 1; const uint64_t na_u64 = na_last ? UINT64_MAX : 0; const uint64_t nan_u64 = na_last ? UINT64_MAX - 1 : 1; for (r_ssize i = 0; i < size; ++i) { double elt = p_x_dbl[i]; const enum vctrs_dbl type = dbl_classify(elt); switch (type) { case VCTRS_DBL_number: { elt = elt * direction; p_x_u64[i] = dbl_map_to_uint64(elt); break; } case VCTRS_DBL_missing: { p_x_u64[i] = na_u64; break; } case VCTRS_DBL_nan: { p_x_u64[i] = nan_u64; break; } } } } static union { double d; uint64_t u64; } d_u64; /* * Map `double -> `uint64_t` retaining ordering. * * Assumes `x` is not a `NA_real_` or `NaN` value. * Correctly handles `Inf` and `-Inf`. */ static inline uint64_t dbl_map_to_uint64(double x) { // Catch `-0` vs `0` if (x == 0) { x = 0; } // Reinterpret as uint64_t without changing bytes d_u64.d = x; d_u64.u64 = dbl_flip_uint64(d_u64.u64); return d_u64.u64; } #define HEX_UINT64_SIGN 0x8000000000000000u #define HEX_UINT64_ONES 0xffffffffffffffffu // To retain ordering in mapping from double -> uint64_t we always have to // flip the sign bit, and for negative numbers we also flip all of the other // bits. Described more here: http://stereopsis.com/radix.html static inline uint64_t dbl_flip_uint64(uint64_t x) { const uint64_t mask = (x & HEX_UINT64_SIGN) ? HEX_UINT64_ONES : HEX_UINT64_SIGN; return x ^ mask; } #undef HEX_UINT64_SIGN #undef HEX_UINT64_ONES // ----------------------------------------------------------------------------- /* * `dbl_order_insertion()` is used in two ways: * - It is how we "finish off" radix sorts rather than deep recursion. * - If we have an original `x` input that is small enough, we just immediately * insertion sort it. * * For small inputs, it is much faster than deeply recursing with * radix ordering. * * Insertion ordering expects that `p_x` has been adjusted with `dbl_adjust()` * which takes care of `na_last` and `decreasing` and also maps `double` to * `uint64_t` ahead of time. * * It is essentially the same as `int_order_insertion()` with different types. */ static void dbl_order_insertion( const r_ssize size, uint64_t* p_x, int* p_o, struct group_infos* p_group_infos ) { // Don't think this can occur, but safer this way if (size == 0) { return; } for (r_ssize i = 1; i < size; ++i) { const uint64_t x_elt = p_x[i]; const int o_elt = p_o[i]; r_ssize j = i - 1; while (j >= 0) { const uint64_t x_cmp_elt = p_x[j]; if (x_elt >= x_cmp_elt) { break; } int o_cmp_elt = p_o[j]; // Swap p_x[j + 1] = x_cmp_elt; p_o[j + 1] = o_cmp_elt; // Next --j; } // Place original elements in new location // closer to start of the vector p_x[j + 1] = x_elt; p_o[j + 1] = o_elt; } // We've ordered a small chunk, we need to push at least one group size. // Depends on the post-ordered results so we have to do this // in a separate loop. r_ssize group_size = 1; uint64_t previous = p_x[0]; for (r_ssize i = 1; i < size; ++i) { const uint64_t current = p_x[i]; // Continue the current group run if (current == previous) { ++group_size; continue; } // Push current run size and reset size tracker groups_size_maybe_push(group_size, p_group_infos); group_size = 1; previous = current; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); } // ----------------------------------------------------------------------------- /* * Double radix ordering entry point * * Expects that `dbl_adjust()` has been called on `p_x`, which takes care * of `na_last` and `decreasing` and also maps `double` to `uint64_t` once * up front so we don't have to do it for each radix pass. * * Sorts `p_x` and `p_o` in place */ static void dbl_order_radix( const r_ssize size, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, struct group_infos* p_group_infos ) { bool p_skips[DBL_MAX_RADIX_PASS]; uint8_t pass = dbl_compute_skips(p_x, size, p_skips); // Skipped all passes - Happens when `x` is 1 value repeated if (pass == DBL_MAX_RADIX_PASS) { groups_size_maybe_push(size, p_group_infos); return; } dbl_order_radix_recurse( size, pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts, p_skips, p_group_infos ); } // ----------------------------------------------------------------------------- /* * Recursive function for radix ordering. Orders the current byte, then iterates * over the sub groups and recursively calls itself on each subgroup to order * the next byte. * * This needs 8 passes, unlike the 4 required by `int_order_radix()`. */ static void dbl_order_radix_recurse( const r_ssize size, const uint8_t pass, uint64_t* p_x, int* p_o, uint64_t* p_x_aux, int* p_o_aux, uint8_t* p_bytes, r_ssize* p_counts, bool* p_skips, struct group_infos* p_group_infos ) { // Exit as fast as possible if we are below the insertion order boundary if (size <= ORDER_INSERTION_BOUNDARY) { dbl_order_insertion(size, p_x, p_o, p_group_infos); return; } // Skip passes where our up front check told us that all bytes were the same uint8_t next_pass = pass + 1; r_ssize* p_counts_next_pass = p_counts + UINT8_MAX_SIZE; while (next_pass < DBL_MAX_RADIX_PASS && p_skips[next_pass]) { ++next_pass; p_counts_next_pass += UINT8_MAX_SIZE; } const uint8_t radix = PASS_TO_RADIX(pass, DBL_MAX_RADIX_PASS); const uint8_t shift = radix * 8; uint8_t byte = 0; // Histogram for (r_ssize i = 0; i < size; ++i) { const uint64_t x_elt = p_x[i]; byte = dbl_extract_uint64_byte(x_elt, shift); p_bytes[i] = byte; ++p_counts[byte]; } // Fast check to see if all bytes were the same. // If so, skip this `pass` since we learned nothing. // No need to accumulate counts and iterate over chunks, // we know all others are zero. if (p_counts[byte] == size) { // Reset count for other group chunks p_counts[byte] = 0; if (next_pass == DBL_MAX_RADIX_PASS) { // If we are already at the last pass, we are done groups_size_maybe_push(size, p_group_infos); } else { // Otherwise, recurse on next byte using the same `size` since // the group size hasn't changed dbl_order_radix_recurse( size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); } return; } r_ssize cumulative = 0; // Accumulate counts, skip zeros for (uint16_t i = 0; i < UINT8_MAX_SIZE; ++i) { r_ssize count = p_counts[i]; if (count == 0) { continue; } // Replace with `cumulative` first, then bump `cumulative`. // `p_counts` now represents starting locations for each radix group. p_counts[i] = cumulative; cumulative += count; } // Place into auxiliary arrays in the correct order, then copy back over for (r_ssize i = 0; i < size; ++i) { const uint8_t byte = p_bytes[i]; const r_ssize loc = p_counts[byte]++; p_o_aux[loc] = p_o[i]; p_x_aux[loc] = p_x[i]; } // Copy back over r_memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); r_memcpy(p_x, p_x_aux, size * sizeof(*p_x_aux)); r_ssize last_cumulative_count = 0; // Recurse on subgroups as required for (uint16_t i = 0; last_cumulative_count < size && i < UINT8_MAX_SIZE; ++i) { const r_ssize cumulative_count = p_counts[i]; if (!cumulative_count) { continue; } p_counts[i] = 0; // Diff the accumulated counts to get the radix group size const r_ssize group_size = cumulative_count - last_cumulative_count; last_cumulative_count = cumulative_count; if (group_size == 1) { groups_size_maybe_push(1, p_group_infos); ++p_x; ++p_o; continue; } // Can get here in the case of ties, like c(1, 1), which have a // `group_size` of 2 in the last radix, but there is nothing left to // compare so we are done. if (next_pass == DBL_MAX_RADIX_PASS) { groups_size_maybe_push(group_size, p_group_infos); p_x += group_size; p_o += group_size; continue; } // Order next byte of this subgroup dbl_order_radix_recurse( group_size, next_pass, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_counts_next_pass, p_skips, p_group_infos ); p_x += group_size; p_o += group_size; } } // ----------------------------------------------------------------------------- /* * Detect completely skippable bytes * * There are 8 passes over a double, 1 for each byte. Often times for the entire * `x` vector a few of those passes are useless because all of the bytes are * the same. This does an up front computation in 1 pass over the data to * determine which bytes are completely skippable. * * It is worth noting that just because byte 0 wasn't skippable doesn't mean * that byte 1 isn't. With the way that doubles are mapped to uint64_t, it * is often the case that, for small doubles, bytes 0-2 aren't skippable but * the rest of them are (for example, this happens with doubles in the range * of 1:128). This provides a nice performance increase there. */ static uint8_t dbl_compute_skips(const uint64_t* p_x, r_ssize size, bool* p_skips) { uint8_t radix_start = PASS_TO_RADIX(0, DBL_MAX_RADIX_PASS); uint8_t shift_start = radix_start * 8; for (uint8_t i = 0; i < DBL_MAX_RADIX_PASS; ++i) { p_skips[i] = true; } uint8_t p_bytes[DBL_MAX_RADIX_PASS]; const uint64_t elt0 = p_x[0]; // Get bytes of first element in MSD->LSD order. // Placed in `p_bytes` in a way that aligns with the `pass` variable for (uint8_t pass = 0, shift = shift_start; pass < DBL_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { p_bytes[pass] = dbl_extract_uint64_byte(elt0, shift); } // Check to see which passes are skippable for (r_ssize i = 1; i < size; ++i) { uint8_t n_skips = DBL_MAX_RADIX_PASS; const uint64_t elt = p_x[i]; for (uint8_t pass = 0, shift = shift_start; pass < DBL_MAX_RADIX_PASS; ++pass, shift += SHIFT_ADJUSTMENT) { bool skip = p_skips[pass]; if (skip) { p_skips[pass] = (p_bytes[pass] == dbl_extract_uint64_byte(elt, shift)); } else { --n_skips; } } // No passes are skippable if (n_skips == 0) { break; } } uint8_t pass = 0; // Shift forward to the first pass with varying bytes while (pass < DBL_MAX_RADIX_PASS && p_skips[pass]) { ++pass; } return pass; } // ----------------------------------------------------------------------------- // Bytes will be extracted 8 bits at a time. // This is a MSB radix sort, so they are extracted MSB->LSB. static inline uint8_t dbl_extract_uint64_byte(uint64_t x, uint8_t shift) { return (x >> shift) & UINT8_MAX; } // ----------------------------------------------------------------------------- /* * `cpl_order()` uses the fact that Rcomplex is really just a rcrd * type of two double vectors. It orders first on the real vector, and then on * the imaginary vector. * * `cpl_order_chunk()` isn't required. It would only be called from data frames * when there is a complex column, but in those cases we split the column * into two double vectors (real / imaginary) and "rerun" the column using * `dbl_order_chunk()`. */ static void cpl_order( SEXP x, bool decreasing, bool na_last, bool nan_distinct, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { // We treat complex as a two column data frame, so we have to use group // information for at least the first column. // - If a complex atomic vector is used, `ignore_groups` will be true unless // the user also requested group information. // - If the first column of a df is a complex column, `ignore_groups` will // be false. bool reset_ignore_groups = false; if (p_group_infos->ignore_groups) { p_group_infos->ignore_groups = false; reset_ignore_groups = true; } const Rcomplex* p_x_cpl = COMPLEX_RO(x); // When a complex column is present, // `lazy_x_chunk` and `lazy_x_aux` are created to have the // size of a double vector. double* p_x_chunk_dbl = (double*) init_lazy_raw(p_lazy_x_chunk); // Handle the real portion first for (r_ssize i = 0; i < size; ++i) { p_x_chunk_dbl[i] = cpl_normalise_missing(p_x_cpl[i]).r; } /* * Call double ordering algorithm on real section. * * In this case, both `p_x_chunk_dbl` and `p_lazy_x_chunk` are passed through, * but we set `copy = false` which tells `dbl_order_impl()` not to copy * the input (`p_x_chunk_dbl`) over to the chunk vector of (`p_lazy_x_chunk`). * It has already been done when we extracted the real section. */ dbl_order_impl( p_x_chunk_dbl, decreasing, na_last, nan_distinct, size, false, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); // Ordering will now be initialized int* p_o = p_order->p_data; // Reset `ignore_groups` for the second pass if we don't need to track groups. // This happens if an atomic complex vector is passed in and the user // hasn't requested group information. if (reset_ignore_groups) { p_group_infos->ignore_groups = true; } // Get the number of group chunks from the first pass struct group_info* p_group_info_pre = groups_current(p_group_infos); r_ssize n_groups = p_group_info_pre->n_groups; // If there were no ties, we are completely done if (n_groups == size) { return; } // Swap to other group info to prepare for the imaginary section groups_swap(p_group_infos); // Fill with the imaginary portion. // Uses updated ordering to place it in sequential order. for (r_ssize i = 0; i < size; ++i) { const int loc = p_o[i] - 1; p_x_chunk_dbl[i] = cpl_normalise_missing(p_x_cpl[loc]).i; } // Iterate over the group chunks from the first pass for (r_ssize group = 0; group < n_groups; ++group) { r_ssize group_size = p_group_info_pre->p_data[group]; // Fast handling of simplest case if (group_size == 1) { ++p_x_chunk_dbl; ++p_o; groups_size_maybe_push(1, p_group_infos); continue; } dbl_order_chunk_impl( decreasing, na_last, nan_distinct, group_size, p_x_chunk_dbl, p_o, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); p_x_chunk_dbl += group_size; p_o += group_size; } } // ----------------------------------------------------------------------------- /* * These are the main entry points for character ordering. * * `chr_order_chunk()` assumes `p_lazy_x_chunk` holds a chunk worth of `SEXP`s * and is modifiable by reference. * * `chr_order()` assumes `x` is user input which cannot be modified. * It copies `x` into memory that can be modified directly. */ static void chr_order_chunk( bool decreasing, bool na_last, r_ssize size, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { SEXP* p_x_chunk_sexp = (SEXP*) p_lazy_x_chunk->p_data; const enum vctrs_sortedness sortedness = chr_sortedness( p_x_chunk_sexp, size, decreasing, na_last, p_group_infos ); if (sortedness != VCTRS_SORTEDNESS_unsorted) { ord_resolve_sortedness_chunk(sortedness, size, p_o); return; } const char** p_x_aux = (const char**) init_lazy_raw(p_lazy_x_aux); // Extract out `CHAR()` string pointers and `Rf_length()` string sizes, // skipping missings along the way, but returning the count of them and // the max string size const struct r_ssize_int_pair pair = chr_extract_without_missings( size, p_x_chunk_sexp, p_x_aux ); const r_ssize n_missing = pair.x; const int max_string_size = pair.y; if (n_missing != 0) { // If there are missings, handle them by pushing their ordering to the front // (or back, depending on `na_last`) of `p_o` int* p_o_aux = init_lazy_raw(p_lazy_o_aux); chr_handle_missings( size, n_missing, na_last, p_x_chunk_sexp, p_o, p_o_aux ); size -= n_missing; p_o = na_last ? p_o : (p_o + n_missing); } // Now forget about `p_x_chunk_sexp` and swap `p_x_aux` with `p_x_chunk` // to get back to the "standard" definition of these pointers //p_x_chunk_sexp = NULL; const char** p_x_chunk = (const char**) p_lazy_x_chunk->p_data; SWAP(const char**, p_x_chunk, p_x_aux); if (!na_last && n_missing != 0) { // Push `!na_last` group before sorting groups_size_maybe_push(n_missing, p_group_infos); } if (size <= ORDER_INSERTION_BOUNDARY) { chr_order_insertion( size, decreasing, p_x_chunk, p_o, p_group_infos ); } else { int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); chr_order_radix( size, decreasing, max_string_size, p_x_chunk, p_o, p_x_aux, p_o_aux, p_bytes, p_group_infos ); } if (na_last && n_missing != 0) { // Push `na_last` group after sorting groups_size_maybe_push(n_missing, p_group_infos); } } static void chr_order( SEXP x, bool decreasing, bool na_last, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct group_infos* p_group_infos ) { const SEXP* p_x = STRING_PTR_RO(x); const enum vctrs_sortedness sortedness = chr_sortedness( p_x, size, decreasing, na_last, p_group_infos ); // Handle sorted cases and set ordering to initialized if (sortedness != VCTRS_SORTEDNESS_unsorted) { int* p_o = p_order->p_data; ord_resolve_sortedness(sortedness, size, p_o); p_order->initialized = true; return; } int* p_o = init_order(p_order); const char** p_x_chunk = (const char**) init_lazy_raw(p_lazy_x_chunk); // Extract out `CHAR()` string pointers and `Rf_length()` string sizes, // skipping missings along the way, but returning the count of them and // the max string size const struct r_ssize_int_pair pair = chr_extract_without_missings( size, p_x, p_x_chunk ); const r_ssize n_missing = pair.x; const int max_string_size = pair.y; if (n_missing != 0) { // If there are missings, handle them by pushing their ordering to the front // (or back, depending on `na_last`) of `p_o` int* p_o_aux = init_lazy_raw(p_lazy_o_aux); chr_handle_missings( size, n_missing, na_last, p_x, p_o, p_o_aux ); size -= n_missing; p_o = na_last ? p_o : (p_o + n_missing); } if (!na_last && n_missing != 0) { // Push `na_last` group before sorting groups_size_maybe_push(n_missing, p_group_infos); } if (size <= ORDER_INSERTION_BOUNDARY) { chr_order_insertion( size, decreasing, p_x_chunk, p_o, p_group_infos ); } else { const char** p_x_aux = (const char**) init_lazy_raw(p_lazy_x_aux); int* p_o_aux = (int*) init_lazy_raw(p_lazy_o_aux); uint8_t* p_bytes = (uint8_t*) init_lazy_raw(p_lazy_bytes); chr_order_radix( size, decreasing, max_string_size, p_x_chunk, p_o, p_x_aux, p_o_aux, p_bytes, p_group_infos ); } if (na_last && n_missing != 0) { // Push `na_last` group after sorting groups_size_maybe_push(n_missing, p_group_infos); } } // Extract info from `p_x` into `p_x_strings` // // Skips `NA`s, which are instead handled immediately by // `chr_handle_missings()`. This allows the radix code to be as simple as // possible, because `NA` handling in the hot radix path is detremental to // performance, and complex to handle! // // Returns `n_missing`, which is used by the caller as: // - An input to `chr_handle_missings()` // - An offset to `p_o` in the `!na_last` case // - An adjustment to the total `size` to account for `NA` removal // // Returns `max_string_size`, which is used by the caller as: // - The max number of `pass`es to take over the data static struct r_ssize_int_pair chr_extract_without_missings( r_ssize size, const SEXP* p_x, const char** p_x_strings ) { r_ssize n_missing = 0; int max_string_size = 0; r_ssize loc = 0; for (r_ssize i = 0; i < size; ++i) { SEXP elt = p_x[i]; if (elt == NA_STRING) { ++n_missing; } else { const int elt_string_size = (int) Rf_length(elt); if (max_string_size < elt_string_size) { max_string_size = elt_string_size; } p_x_strings[loc] = CHAR(elt); ++loc; } } return (struct r_ssize_int_pair) { .x = n_missing, .y = max_string_size }; } // Preemptively handle missing values in character ordering // // Rearranges `p_o` by placing all `NA`s up front or at the back depending on // `na_last`. This is done stably. // // After calling this: // - With `na_last`, `p_o` matches `p_x_strings` // - With `!na_last`, `p_o + n_missing` matches `p_x_strings` static void chr_handle_missings( r_ssize size, r_ssize n_missing, const bool na_last, const SEXP* p_x, int* p_o, int* p_o_aux ) { r_ssize loc_missing = na_last ? (size - n_missing) : 0; r_ssize loc_not_missing = na_last ? 0 : n_missing; for (r_ssize i = 0; i < size; ++i) { SEXP elt = p_x[i]; if (elt == NA_STRING) { p_o_aux[loc_missing] = p_o[i]; ++loc_missing; } else { p_o_aux[loc_not_missing] = p_o[i]; ++loc_not_missing; } } // Copy back r_memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); } static void chr_order_radix( const r_ssize size, const bool decreasing, const int max_string_size, const char** p_x, int* p_o, const char** p_x_aux, int* p_o_aux, uint8_t* p_bytes, struct group_infos* p_group_infos ) { int pass = 0; // If `pass == max_string_size == 0`, we are already done! // // This is needed when a vector of `NA` followed by all `""` are passed in. The // `NA` make it look unsorted, but then the `NA` are removed, so we are left // with a vector of `""` with `max_string_size == 0`. // // Without this, we can infloop because the `next_pass == max_string_size` // exit never occurs because `next_pass` starts above `max_string_size`. We // have a test to make sure we return the correct result. if (pass == max_string_size) { groups_size_maybe_push(size, p_group_infos); return; } chr_order_radix_recurse( size, decreasing, pass, max_string_size, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_group_infos ); } /* * Recursive function for ordering the `p_x` strings * * For ASCII strings, 1 character aligns with 1 byte, so we can order them * 1 character at a time from left to right (MSB to LSB). * * For UTF-8 strings, the implementation of UTF-8 is done so that UTF-8 * characters are made up of between 1-4 bytes. Luckily, treating them as * a sequence of single bytes like we do for ASCII orders identically to * treating them as their full 1-4 byte sequence. * * Because these are variable length, some strings are shorter than others. * Shorter strings should order lower than longer strings if they are otherwise * equivalent, so we reserve the 0-th bucket of `p_counts` for counting * implicit empty strings. Normally this would be an issue because this is * the bucket for ASCII value 0, but this is the null value, which is not * allowed in R strings! * * Guaranteed to never see `NA`s. */ static void chr_order_radix_recurse( const r_ssize size, const bool decreasing, const int pass, const int max_string_size, const char** p_x, int* p_o, const char** p_x_aux, int* p_o_aux, uint8_t* p_bytes, struct group_infos* p_group_infos ) { // Exit as fast as possible if we are below the insertion order boundary if (size <= ORDER_INSERTION_BOUNDARY) { chr_order_insertion( size, decreasing, p_x, p_o, p_group_infos ); return; } const int next_pass = pass + 1; // Fast check to see if all bytes within this group are the same. If so, skip // this `pass` since we learned nothing. Unlike with other methods, it is useful // to do this check before the histogram because with strings there is often a // long common prefix, and it is faster to skip past that as quickly as possible, // avoiding the jumpiness of histogramming. if (chr_all_same_byte(p_x, size)) { if (next_pass == max_string_size) { // If we are already at the last pass, we are done groups_size_maybe_push(size, p_group_infos); } else { // Otherwise, advance `p_x` to the next byte and immediately recurse using // the same `size` since the group size hasn't changed for (r_ssize i = 0; i < size; ++i) { ++p_x[i]; } chr_order_radix_recurse( size, decreasing, next_pass, max_string_size, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_group_infos ); } return; } // We don't carry along `p_counts` from an up front allocation since // the strings have variable length r_ssize p_counts[UINT8_MAX_SIZE] = { 0 }; // Histogram // // Histogramming strings is very special! // // Note that R strings never contain a nul terminator of `\0`, which is byte // `0`, in the string itself. However, all R strings end with a nul terminator // since they are C style strings. // // We utilize this fact when histogramming. If we hit a string of "ab" on // `pass = 2` then it will extract the nul terminator `\0` as byte `0` and // will therefore bucket all `"ab"` strings together. Then the check for // `nul_terminator_i` will detect this, keeping us from over recursing on // `"ab"` and indexing past the nul terminator! // // This trick avoids needing to track the string size alongside the string // pointer. We just need the `max_string_size` of the total number of times to // recurse, and the algorithm automatically stops early on shorter strings. // // We also advance the `char*` returned by `p_x[i]` to the next byte right // after we access it, for use by the next `pass`. This actually makes // `chr_order_insertion()` and `chr_all_same_byte()` a little simpler, because // we've already taken care of advancing past all the bytes that we know are // all the same. For strings like `"ab"` in the example above, this can advance // us past the `\0` nul terminator, but we will never access that location. for (r_ssize i = 0; i < size; ++i) { const uint8_t byte = (uint8_t) *p_x[i]; ++p_x[i]; p_bytes[i] = byte; ++p_counts[byte]; } r_ssize cumulative = 0; // Handle decreasing/increasing by altering the order in which // counts are accumulated const int direction = decreasing ? -1 : 1; r_ssize j = decreasing ? UINT8_MAX_SIZE - 1 : 0; // Accumulate counts, skip zeros for (uint16_t i = 0; i < UINT8_MAX_SIZE; ++i) { r_ssize count = p_counts[j]; if (count == 0) { j += direction; continue; } // Insert current cumulative value, then increment p_counts[j] = cumulative; cumulative += count; j += direction; } // Place into auxiliary arrays in the correct order for (r_ssize i = 0; i < size; ++i) { const uint8_t byte = p_bytes[i]; const r_ssize loc = p_counts[byte]++; p_o_aux[loc] = p_o[i]; p_x_aux[loc] = p_x[i]; } // Copy back into `p_o` because our output is `p_o`, but recognize that we can // just swap the auxiliary data related to `x` to achieve the same idea there r_memcpy(p_o, p_o_aux, size * sizeof(*p_o_aux)); SWAP(const char**, p_x, p_x_aux); // Cumulative counts will be in reverse order if we were decreasing. We // reverse them to put them in the correct order for cumulative count diffing. // This works nicely because `UINT8_MAX_SIZE` is even. if (decreasing) { for (uint16_t i = 0; i < UINT8_MAX_SIZE_HALVED; ++i) { const r_ssize front = p_counts[i]; const r_ssize back = p_counts[UINT8_MAX_SIZE - 1 - i]; p_counts[i] = back; p_counts[UINT8_MAX_SIZE - 1 - i] = front; } } const uint16_t nul_terminator_i = decreasing ? UINT8_MAX_SIZE - 1 : 0; r_ssize last_cumulative_count = 0; // Recurse on subgroups as required for (uint16_t i = 0; last_cumulative_count < size && i < UINT8_MAX_SIZE; ++i) { const r_ssize cumulative_count = p_counts[i]; if (!cumulative_count) { continue; } // Set to zero to clear for subsequent groups p_counts[i] = 0; // Diff the accumulated counts to get the radix group size const r_ssize group_size = cumulative_count - last_cumulative_count; last_cumulative_count = cumulative_count; if (group_size == 1) { groups_size_maybe_push(1, p_group_infos); ++p_x; ++p_o; continue; } // If `i` is pointing to the `\0` byte bucket, we are done with this group. // `\0` is the end of string marker, so all strings in this bucket are the // same and we can't recurse further into them, otherwise we'd index OOB // (see the test that mentions `\0`). Happens with `c("abc", "abd", "a", // "a")` where after the first pass we haven't learned anything, and after // the second pass two of the strings are on `\0`. We avoid indexing OOB on // a third pass by exiting early when we see the `\0` group. // `chr_all_same()` would also catch this, but this check is faster and // clearer. if (i == nul_terminator_i) { groups_size_maybe_push(group_size, p_group_infos); p_x += group_size; p_o += group_size; continue; } // If we've made it to the end of the string, finalize the group. // We can get here with `c("xyz", "xyz", "xyx")` where we need the // last byte to break the tie, but we end up with a group size of 2. if (next_pass == max_string_size) { groups_size_maybe_push(group_size, p_group_infos); p_x += group_size; p_o += group_size; continue; } // If the upcoming subgroup is all the same string, we are done. This is a // very useful performance optimization for cases like `c("abcd", "def", // "abcd")` where after the first pass we have two groups, but every string // within each group is already the same so we don't need to continue // recursing. if (chr_all_same(p_x, group_size)) { groups_size_maybe_push(group_size, p_group_infos); p_x += group_size; p_o += group_size; continue; } // Order next byte of this subgroup chr_order_radix_recurse( group_size, decreasing, next_pass, max_string_size, p_x, p_o, p_x_aux, p_o_aux, p_bytes, p_group_infos ); p_x += group_size; p_o += group_size; } } /* * Insertion order for character vectors. This occurs in the radix ordering * once we drop below a certain chunk size. * * Guaranteed to never see `NA`s. */ static void chr_order_insertion( const r_ssize size, const bool decreasing, const char** p_x, int* p_o, struct group_infos* p_group_infos ) { // Don't think this can occur, but safer this way if (size == 0) { return; } const int direction = decreasing ? -1 : 1; for (r_ssize i = 1; i < size; ++i) { const char* elt = p_x[i]; const int elt_o = p_o[i]; r_ssize j = i - 1; while (j >= 0) { const char* cmp_elt = p_x[j]; if (str_ge(elt, cmp_elt, direction)) { break; } // It seems to help performance to avoid loading this // before the if statement, since it often isn't needed const int cmp_elt_o = p_o[j]; // Swap p_x[j + 1] = cmp_elt; p_o[j + 1] = cmp_elt_o; // Next --j; } // Place original elements in new location // closer to start of the vector p_x[j + 1] = elt; p_o[j + 1] = elt_o; } // We've ordered a small chunk, we need to push at least one group size. // Depends on the post-ordered results so we have to do this // in a separate loop. r_ssize group_size = 1; const char* previous = p_x[0]; for (r_ssize i = 1; i < size; ++i) { const char* current = p_x[i]; // Continue the current group run if (current == previous) { ++group_size; continue; } // Push current run size and reset size tracker groups_size_maybe_push(group_size, p_group_infos); group_size = 1; previous = current; } // Push final group run groups_size_maybe_push(group_size, p_group_infos); } static inline bool chr_all_same( const char** p_x, const r_ssize size ) { if (size == 0) { return true; } const char* first = p_x[0]; for (r_ssize i = 1; i < size; ++i) { if (first != p_x[i]) { return false; } } return true; } // Returns `true` if the upcoming byte is the same for every element of `p_x` // // Guaranteed to never see `NA`s static inline bool chr_all_same_byte( const char** p_x, const r_ssize size ) { if (size == 0) { return true; } const uint8_t first = (uint8_t) *p_x[0]; for (r_ssize i = 1; i < size; ++i) { const uint8_t this = (uint8_t) *p_x[i]; if (this != first) { return false; } } return true; } // ----------------------------------------------------------------------------- #define DF_ORDER_EXTRACT_CHUNK(CONST_DEREF, CTYPE) do { \ const CTYPE* p_col = CONST_DEREF(col); \ CTYPE* p_x_chunk = (CTYPE*) init_lazy_raw(p_lazy_x_chunk); \ \ /* Extract the next group chunk and place in */ \ /* sequential order for cache friendliness */ \ for (r_ssize j = 0; j < group_size; ++j) { \ const int loc = p_o_col[j] - 1; \ p_x_chunk[j] = p_col[loc]; \ } \ } while (0) #define DF_ORDER_EXTRACT_CHUNK_CPL() do { \ const Rcomplex* p_col = COMPLEX_RO(col); \ double* p_x_chunk = (double*) init_lazy_raw(p_lazy_x_chunk); \ \ if (complex_first_pass) { \ /* First pass - real */ \ for (r_ssize j = 0; j < group_size; ++j) { \ const int loc = p_o_col[j] - 1; \ p_x_chunk[j] = cpl_normalise_missing(p_col[loc]).r; \ } \ } else { \ /* Second pass - imaginary */ \ for (r_ssize j = 0; j < group_size; ++j) { \ const int loc = p_o_col[j] - 1; \ p_x_chunk[j] = cpl_normalise_missing(p_col[loc]).i; \ } \ } \ } while (0) /* * `df_order()` is the main user of `p_group_infos`. It uses the grouping * of the current column to break up the next column into sub groups. That * process is continued until either all columns have been processed or we * can tell all of the values apart. */ static void df_order( SEXP x, SEXP decreasing, SEXP na_last, bool nan_distinct, r_ssize size, struct order* p_order, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { r_ssize n_cols = r_length(x); bool recycle_decreasing; r_ssize n_decreasing = r_length(decreasing); int* p_decreasing = LOGICAL(decreasing); if (n_decreasing == 1) { recycle_decreasing = true; } else if (n_decreasing == n_cols) { recycle_decreasing = false; } else { Rf_errorcall( R_NilValue, "Internal error: `vec_order_expand_args()` should expand " "`decreasing` to have length 1 or length equal " "to the number of columns of `x` after calling `vec_proxy_order()`." ); } bool recycle_na_last; r_ssize n_na_last = r_length(na_last); int* p_na_last = LOGICAL(na_last); if (n_na_last == 1) { recycle_na_last = true; } else if (n_na_last == n_cols) { recycle_na_last = false; } else { Rf_errorcall( R_NilValue, "Internal error: `vec_order_expand_args()` should expand " "`na_last` to have length 1 or length equal " "to the number of columns of `x` after calling `vec_proxy_order()`." ); } // Special case no columns if (n_cols == 0) { init_order(p_order); if (size != 0) { groups_size_maybe_push(size, p_group_infos); } return; } SEXP col = VECTOR_ELT(x, 0); bool col_decreasing = p_decreasing[0]; bool col_na_last = p_na_last[0]; enum vctrs_type type = vec_proxy_typeof(col); // Apply on one column to fill `p_group_infos`. // First column is immutable and we must copy into `x_chunk`. vec_order_base_switch( col, col_decreasing, col_na_last, nan_distinct, size, type, p_order, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); // For complex, we have to rerun the column a second time on the // imaginary part. This is done by decrementing `i` after processing // the real part so the column is rerun. bool complex_first_pass = true; // Iterate over remaining columns by group chunk for (r_ssize i = 1; i < n_cols; ++i) { // Get the number of group chunks from previous column group info struct group_info* p_group_info_pre = groups_current(p_group_infos); r_ssize n_groups = p_group_info_pre->n_groups; // If there were no ties, we are completely done if (n_groups == size) { break; } if (!recycle_decreasing) { col_decreasing = p_decreasing[i]; } if (!recycle_na_last) { col_na_last = p_na_last[i]; } // Reset pointer between columns since we increment it as // we iterate through the groups, but need it to start from the beginning // on the next column. `p_o` is initialized now that we have already // processed at least one column. int* p_o_col = p_order->p_data; col = VECTOR_ELT(x, i); type = vec_proxy_typeof(col); // Turn off group tracking if: // - We are on the last column // - The user didn't request group information // - That column isn't the first pass of a complex column if (i == n_cols - 1 && !p_group_infos->force_groups && !complex_first_pass) { p_group_infos->ignore_groups = true; } // Swap to other group info to prepare for this column groups_swap(p_group_infos); // Iterate over this column's group chunks for (r_ssize group = 0; group < n_groups; ++group) { r_ssize group_size = p_group_info_pre->p_data[group]; // Fast handling of simplest case if (group_size == 1) { ++p_o_col; groups_size_maybe_push(1, p_group_infos); continue; } // Extract current chunk and place into `x_chunk` in sequential order switch (type) { case VCTRS_TYPE_integer: DF_ORDER_EXTRACT_CHUNK(INTEGER_RO, int); break; case VCTRS_TYPE_logical: DF_ORDER_EXTRACT_CHUNK(LOGICAL_RO, int); break; case VCTRS_TYPE_double: DF_ORDER_EXTRACT_CHUNK(REAL_RO, double); break; case VCTRS_TYPE_character: DF_ORDER_EXTRACT_CHUNK(STRING_PTR_RO, SEXP); break; case VCTRS_TYPE_complex: DF_ORDER_EXTRACT_CHUNK_CPL(); break; default: Rf_errorcall(R_NilValue, "Unknown data frame column type in `vec_order()`."); } vec_order_chunk_switch( col_decreasing, col_na_last, nan_distinct, group_size, type, p_o_col, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); p_o_col += group_size; } if (type == VCTRS_TYPE_complex) { if (complex_first_pass) { // Transition from first pass to second pass complex_first_pass = false; // Decrement `i` so we reuse this column --i; } else { // Reset flag for future complex columns complex_first_pass = true; } } } } #undef DF_ORDER_EXTRACT_CHUNK #undef DF_ORDER_EXTRACT_CHUNK_CPL // ----------------------------------------------------------------------------- /* * Switch function specifically for column chunks generated when * processing a data frame */ static void vec_order_chunk_switch( bool decreasing, bool na_last, bool nan_distinct, r_ssize size, const enum vctrs_type type, int* p_o, struct lazy_raw* p_lazy_x_chunk, struct lazy_raw* p_lazy_x_aux, struct lazy_raw* p_lazy_o_aux, struct lazy_raw* p_lazy_bytes, struct lazy_raw* p_lazy_counts, struct group_infos* p_group_infos ) { switch (type) { case VCTRS_TYPE_integer: { int_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_logical: { lgl_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_double: { dbl_order_chunk( decreasing, na_last, nan_distinct, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_complex: { // Complex types are run in two passes, once over real then over imaginary dbl_order_chunk( decreasing, na_last, nan_distinct, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_character: { chr_order_chunk( decreasing, na_last, size, p_o, p_lazy_x_chunk, p_lazy_x_aux, p_lazy_o_aux, p_lazy_bytes, p_lazy_counts, p_group_infos ); break; } case VCTRS_TYPE_dataframe: { Rf_errorcall(R_NilValue, "Internal error: df-cols should have already been flattened."); break; } default: { Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`"); } } } // ----------------------------------------------------------------------------- /* * Compute the minimum size required for `lazy_x_aux` and `lazy_x_chunk`. * * For complex, we split the vector into two double vectors. We only need to * allocate 1 double vector though, and it will be reused for both the real * and imaginary parts. */ static inline size_t vec_compute_n_bytes_lazy_raw( SEXP x, const enum vctrs_type type ) { switch (type) { case VCTRS_TYPE_integer: case VCTRS_TYPE_logical: return sizeof(int); case VCTRS_TYPE_double: return sizeof(double); case VCTRS_TYPE_complex: // Complex types will be split into two double vectors return sizeof(double); case VCTRS_TYPE_character: // Both `SEXP` and `const char*` are written to the working memory. // Should be the same size (8 bytes), both are pointers, but be defensive. return sizeof(SEXP) > sizeof(const char*) ? sizeof(SEXP) : sizeof(const char*); case VCTRS_TYPE_dataframe: return df_compute_n_bytes_lazy_raw(x); default: Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`."); } } // `x` should be a flattened df with no df-cols static inline size_t df_compute_n_bytes_lazy_raw(SEXP x) { r_ssize n_cols = r_length(x); size_t multiplier = 0; for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); const enum vctrs_type type = vec_proxy_typeof(col); size_t col_multiplier = vec_compute_n_bytes_lazy_raw(col, type); if (col_multiplier > multiplier) { multiplier = col_multiplier; } } return multiplier; } // ----------------------------------------------------------------------------- /* * Compute the minimum size required for `p_counts` * * - For integer, we use 4 passes. * - Logical uses integer radix sorting. * - For double, we use 8 passes. * - Complex uses double radix sorting. * - Character doesn't use `p_counts` because of the * variable number of passes. */ static inline size_t vec_compute_n_bytes_lazy_counts( SEXP x, const enum vctrs_type type ) { switch (type) { case VCTRS_TYPE_integer: case VCTRS_TYPE_logical: return INT_MAX_RADIX_PASS; case VCTRS_TYPE_double: case VCTRS_TYPE_complex: return DBL_MAX_RADIX_PASS; case VCTRS_TYPE_character: return 0; case VCTRS_TYPE_dataframe: return df_compute_n_bytes_lazy_counts(x); default: Rf_errorcall(R_NilValue, "This type is not supported by `vec_order()`."); } } // `x` should be a flattened df with no df-cols static size_t df_compute_n_bytes_lazy_counts(SEXP x) { r_ssize n_cols = r_length(x); size_t multiplier = 0; for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); const enum vctrs_type type = vec_proxy_typeof(col); size_t col_multiplier = vec_compute_n_bytes_lazy_counts(col, type); if (col_multiplier > multiplier) { multiplier = col_multiplier; } } return multiplier; } // ----------------------------------------------------------------------------- /* * `vec_order_expand_args()` checks the type and length of `decreasing` and * `na_largest` and possibly expands them. * * `x` is expected to be the original input, before `vec_proxy_order()` is * called on it. * * If `x` is not a data frame, `decreasing` and `na_largest` must be boolean * values. If `x` is something like a rcrd type with a multi-column data frame * proxy, then restricting to a boolean argument is correct, and works because * the single value will be recycled across the columns. * * If `x` is a data frame, and `decreasing` or `na_largest` is size 1, we return * it untouched and it will be recycled correctly. * * If `x` is a data frame and the size of the arg matches the number of * columns of `x`, we have to be careful to "expand" the arg to match * the number of columns of `x` that will exist after `vec_proxy_order()` * is called. It flattens df-cols which might either already exist in `x`, * or may arise from rcrd columns that have data frame proxies. The majority * of the code here is for tracking this expansion. */ static SEXP vec_order_expand_args( SEXP x, SEXP decreasing, SEXP na_largest ) { SEXP args = PROTECT(r_new_list(2)); SET_VECTOR_ELT(args, 0, decreasing); SET_VECTOR_ELT(args, 1, na_largest); // Don't check length here. These might be vectorized if `x` is a data frame. if (TYPEOF(decreasing) != LGLSXP) { Rf_errorcall(R_NilValue, "Internal error: `decreasing` must be logical"); } if (lgl_any_na(decreasing)) { Rf_errorcall(R_NilValue, "Internal error: `decreasing` can't contain missing values."); } if (TYPEOF(na_largest) != LGLSXP) { Rf_errorcall(R_NilValue, "Internal error: `na_largest` must be logical"); } if (lgl_any_na(na_largest)) { Rf_errorcall(R_NilValue, "Internal error: `na_largest` can't contain missing values."); } if (is_data_frame(x)) { args = df_expand_args(x, args); UNPROTECT(1); return args; } if (r_length(decreasing) != 1) { Rf_errorcall(R_NilValue, "`direction` must be a single value when `x` is not a data frame."); } if (r_length(na_largest) != 1) { Rf_errorcall(R_NilValue, "`na_value` must be a single value when `x` is not a data frame."); } UNPROTECT(1); return args; } static SEXP df_expand_args(SEXP x, SEXP args) { SEXP decreasing = VECTOR_ELT(args, 0); SEXP na_largest = VECTOR_ELT(args, 1); r_ssize n_decreasing = r_length(decreasing); r_ssize n_na_largest = r_length(na_largest); r_ssize n_cols = r_length(x); // They will be recycled correctly even if columns get flattened if (n_decreasing == 1 && n_na_largest == 1) { return args; } // Must start out with the same length as the number of columns if (n_decreasing != 1 && n_decreasing != n_cols) { Rf_errorcall( R_NilValue, "`direction` should have length 1 or length equal to the number of " "columns of `x` when `x` is a data frame." ); } if (n_na_largest != 1 && n_na_largest != n_cols) { Rf_errorcall( R_NilValue, "`na_value` should have length 1 or length equal to the number of " "columns of `x` when `x` is a data frame." ); } SEXP expansions = PROTECT(Rf_allocVector(INTSXP, n_cols)); int* p_expansions = INTEGER(expansions); int size = 0; bool needs_expansion = false; // Compute expansion factor for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); int expansion = vec_decreasing_expansion(col); if (expansion != 1) { needs_expansion = true; } p_expansions[i] = expansion; size += expansion; } if (!needs_expansion) { UNPROTECT(1); return args; } decreasing = expand_arg(decreasing, p_expansions, n_decreasing, size); SET_VECTOR_ELT(args, 0, decreasing); na_largest = expand_arg(na_largest, p_expansions, n_na_largest, size); SET_VECTOR_ELT(args, 1, na_largest); UNPROTECT(1); return args; } static SEXP expand_arg( SEXP arg, const int* p_expansions, r_ssize n_arg, r_ssize size ) { if (n_arg == 1) { return arg; } SEXP out = PROTECT(Rf_allocVector(LGLSXP, size)); int* p_out = LOGICAL(out); int* p_arg = LOGICAL(arg); int k = 0; // Fill `out` with repeated `arg` values to match expanded size for (r_ssize i = 0; i < n_arg; ++i) { int col_arg = p_arg[i]; int expansion = p_expansions[i]; for (r_ssize j = 0; j < expansion; ++j) { p_out[k] = col_arg; ++k; } } UNPROTECT(1); return out; } static int vec_decreasing_expansion(SEXP x) { // Bare vectors if (!r_is_object(x) && !has_dim(x)) { return 1; } // Compute number of cols in df-cols, // and do proxy-compare on the cols as needed if (is_data_frame(x)) { return df_decreasing_expansion(x); } int expansion; // Otherwise we have an S3 column that could have a data frame // ordering proxy containing multiple columns, so we need to check for that SEXP proxy = PROTECT(vec_proxy_order(x)); // If the `proxy` is a data frame, the expansion factor is the // number of columns. Otherwise it is 1. if (is_data_frame(proxy)) { expansion = Rf_length(proxy); } else { expansion = 1; } UNPROTECT(1); return expansion; } // 0-col df-cols get dropped from the comparison proxy, so returning `0` here // when a df-col has no columns should be correct static int df_decreasing_expansion(SEXP x) { r_ssize n_cols = r_length(x); int out = 0; // Accumulate the expansion factors of the cols of the df-col for (r_ssize i = 0; i < n_cols; ++i) { SEXP col = VECTOR_ELT(x, i); out += vec_decreasing_expansion(col); } return out; } // ----------------------------------------------------------------------------- /* * `na_value` -> `na_largest` is parsed as: * largest -> TRUE * smallest -> FALSE * `na_largest` maps directly to `na_last` unless we are in decreasing order, * in which case `na_last = !na_largest`. */ static SEXP vec_order_compute_na_last( SEXP na_largest, SEXP decreasing ) { const r_ssize size = r_length(na_largest); if (size != r_length(decreasing)) { r_stop_internal( "`na_largest` and `decreasing` should already match in size." ); } SEXP na_last = PROTECT(r_new_logical(size)); int* p_na_last = LOGICAL(na_last); const int* p_na_largest = LOGICAL_RO(na_largest); const int* p_decreasing = LOGICAL_RO(decreasing); for (r_ssize i = 0; i < size; ++i) { p_na_last[i] = p_decreasing[i] ? !p_na_largest[i] : p_na_largest[i]; } UNPROTECT(1); return na_last; } // ----------------------------------------------------------------------------- static SEXP parse_na_value(SEXP na_value) { // Don't care about length here, checked later if (TYPEOF(na_value) != STRSXP) { Rf_errorcall(R_NilValue, "`na_value` must be a character vector."); } R_len_t size = Rf_length(na_value); const SEXP* p_na_value = STRING_PTR_RO(na_value); SEXP na_largest = PROTECT(Rf_allocVector(LGLSXP, size)); int* p_na_largest = LOGICAL(na_largest); for (R_len_t i = 0; i < size; ++i) { p_na_largest[i] = parse_na_value_one(p_na_value[i]); } UNPROTECT(1); return na_largest; } static int parse_na_value_one(SEXP x) { if (x == NA_STRING) { Rf_errorcall(R_NilValue, "`na_value` can't be missing."); } const char* c_x = CHAR(x); if (!strcmp(c_x, "largest")) return 1; if (!strcmp(c_x, "smallest")) return 0; Rf_errorcall( R_NilValue, "`na_value` must contain only \"largest\" or \"smallest\"." ); } static SEXP parse_direction(SEXP direction) { // Don't care about length here, checked later if (TYPEOF(direction) != STRSXP) { Rf_errorcall(R_NilValue, "`direction` must be a character vector."); } R_len_t size = Rf_length(direction); const SEXP* p_direction = STRING_PTR_RO(direction); SEXP decreasing = PROTECT(Rf_allocVector(LGLSXP, size)); int* p_decreasing = LOGICAL(decreasing); for (R_len_t i = 0; i < size; ++i) { p_decreasing[i] = parse_direction_one(p_direction[i]); } UNPROTECT(1); return decreasing; } static int parse_direction_one(SEXP x) { if (x == NA_STRING) { Rf_errorcall(R_NilValue, "`direction` can't be missing."); } const char* c_x = CHAR(x); if (!strcmp(c_x, "asc")) return 0; if (!strcmp(c_x, "desc")) return 1; Rf_errorcall( R_NilValue, "`direction` must contain only \"asc\" or \"desc\"." ); } static inline bool parse_nan_distinct(SEXP nan_distinct) { if (TYPEOF(nan_distinct) != LGLSXP) { Rf_errorcall(R_NilValue, "`nan_distinct` must be a logical vector."); } if (Rf_length(nan_distinct) != 1) { Rf_errorcall(R_NilValue, "`nan_distinct` must be length 1."); } int c_nan_distinct = LOGICAL_RO(nan_distinct)[0]; if (c_nan_distinct == NA_LOGICAL) { Rf_errorcall(R_NilValue, "`nan_distinct` can't be missing."); } return (bool) c_nan_distinct; } vctrs/src/typeof2.h0000644000176200001440000000517714315060310013746 0ustar liggesusers#ifndef VCTRS_TYPEOF2_H #define VCTRS_TYPEOF2_H #include "vctrs-core.h" enum vctrs_type2 { VCTRS_TYPE2_null_null, VCTRS_TYPE2_null_unspecified, VCTRS_TYPE2_null_logical, VCTRS_TYPE2_null_integer, VCTRS_TYPE2_null_double, VCTRS_TYPE2_null_complex, VCTRS_TYPE2_null_character, VCTRS_TYPE2_null_raw, VCTRS_TYPE2_null_list, VCTRS_TYPE2_null_dataframe, VCTRS_TYPE2_null_s3, VCTRS_TYPE2_null_scalar, VCTRS_TYPE2_unspecified_unspecified, VCTRS_TYPE2_unspecified_logical, VCTRS_TYPE2_unspecified_integer, VCTRS_TYPE2_unspecified_double, VCTRS_TYPE2_unspecified_complex, VCTRS_TYPE2_unspecified_character, VCTRS_TYPE2_unspecified_raw, VCTRS_TYPE2_unspecified_list, VCTRS_TYPE2_unspecified_dataframe, VCTRS_TYPE2_unspecified_s3, VCTRS_TYPE2_unspecified_scalar, VCTRS_TYPE2_logical_logical, VCTRS_TYPE2_logical_integer, VCTRS_TYPE2_logical_double, VCTRS_TYPE2_logical_complex, VCTRS_TYPE2_logical_character, VCTRS_TYPE2_logical_raw, VCTRS_TYPE2_logical_list, VCTRS_TYPE2_logical_dataframe, VCTRS_TYPE2_logical_s3, VCTRS_TYPE2_logical_scalar, VCTRS_TYPE2_integer_integer, VCTRS_TYPE2_integer_double, VCTRS_TYPE2_integer_complex, VCTRS_TYPE2_integer_character, VCTRS_TYPE2_integer_raw, VCTRS_TYPE2_integer_list, VCTRS_TYPE2_integer_dataframe, VCTRS_TYPE2_integer_s3, VCTRS_TYPE2_integer_scalar, VCTRS_TYPE2_double_double, VCTRS_TYPE2_double_complex, VCTRS_TYPE2_double_character, VCTRS_TYPE2_double_raw, VCTRS_TYPE2_double_list, VCTRS_TYPE2_double_dataframe, VCTRS_TYPE2_double_s3, VCTRS_TYPE2_double_scalar, VCTRS_TYPE2_complex_complex, VCTRS_TYPE2_complex_character, VCTRS_TYPE2_complex_raw, VCTRS_TYPE2_complex_list, VCTRS_TYPE2_complex_dataframe, VCTRS_TYPE2_complex_s3, VCTRS_TYPE2_complex_scalar, VCTRS_TYPE2_character_character, VCTRS_TYPE2_character_raw, VCTRS_TYPE2_character_list, VCTRS_TYPE2_character_dataframe, VCTRS_TYPE2_character_s3, VCTRS_TYPE2_character_scalar, VCTRS_TYPE2_raw_raw, VCTRS_TYPE2_raw_list, VCTRS_TYPE2_raw_dataframe, VCTRS_TYPE2_raw_s3, VCTRS_TYPE2_raw_scalar, VCTRS_TYPE2_list_list, VCTRS_TYPE2_list_dataframe, VCTRS_TYPE2_list_s3, VCTRS_TYPE2_list_scalar, VCTRS_TYPE2_dataframe_dataframe, VCTRS_TYPE2_dataframe_s3, VCTRS_TYPE2_dataframe_scalar, VCTRS_TYPE2_S3_s3, VCTRS_TYPE2_S3_scalar, VCTRS_TYPE2_scalar_scalar }; enum vctrs_type2 vec_typeof2_impl(enum vctrs_type type_x, enum vctrs_type type_y, int* left); enum vctrs_type2 vec_typeof2(r_obj* x, r_obj* y); const char* vctrs_type2_as_str(enum vctrs_type2 type); #endif vctrs/src/slice-interleave.c0000644000176200001440000000636015113325071015604 0ustar liggesusers#include "slice-interleave.h" #include "vctrs.h" #include "decl/slice-interleave-decl.h" r_obj* ffi_list_interleave( r_obj* ffi_x, r_obj* ffi_size, r_obj* ffi_ptype, r_obj* ffi_name_spec, r_obj* ffi_name_repair, r_obj* ffi_frame ) { struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = ffi_frame }; struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); struct r_lazy error_call = { .x = r_syms.error_call, .env = ffi_frame }; const r_ssize size = (ffi_size == r_null) ? -1 : vec_as_short_length(ffi_size, vec_args.size, error_call); struct name_repair_opts name_repair_opts = new_name_repair_opts( ffi_name_repair, r_lazy_null, false, error_call ); KEEP(name_repair_opts.shelter); r_obj* out = list_interleave( ffi_x, size, ffi_ptype, ffi_name_spec, &name_repair_opts, &x_arg, error_call ); FREE(1); return out; } r_obj* list_interleave( r_obj* x, r_ssize size, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_x_arg, struct r_lazy error_call ) { obj_check_list(x, p_x_arg, error_call); const r_ssize elt_size = (size == -1) ? vec_size_common(x, 0, p_x_arg, error_call) : size; r_obj* const* v_x = r_list_cbegin(x); const r_ssize x_size = r_length(x); // `x_size` excluding `NULL`s const r_ssize x_size_used = list_interleave_x_size_used( v_x, x_size ); const r_ssize out_size = r_ssize_mult(x_size_used, elt_size); if (out_size > R_LEN_T_MAX) { r_abort( "Long vectors are not yet supported in `list_interleave()`. " "Result from interleaving would have size %td, which is larger " "than the maximum supported size of 2^31 - 1.", out_size ); } r_obj* indices = KEEP(list_interleave_indices( v_x, x_size, x_size_used, elt_size )); r_obj* default_ = r_null; struct vctrs_arg* p_indices_arg = vec_args.empty; struct vctrs_arg* p_default_arg = vec_args.empty; r_obj* out = list_combine( x, indices, out_size, default_, LIST_COMBINE_UNMATCHED_default, LIST_COMBINE_MULTIPLE_last, ASSIGNMENT_SLICE_VALUE_no, ptype, name_spec, p_name_repair_opts, p_x_arg, p_indices_arg, p_default_arg, error_call ); FREE(1); return out; } static r_ssize list_interleave_x_size_used(r_obj* const* v_x, r_ssize x_size) { r_ssize x_size_used = 0; for (r_ssize i = 0; i < x_size; ++i) { r_obj* elt = v_x[i]; if (elt == r_null) { continue; } ++x_size_used; } return x_size_used; } static r_obj* list_interleave_indices( r_obj* const* v_x, r_ssize x_size, r_ssize x_size_used, r_ssize elt_size ) { r_obj* indices = KEEP(r_alloc_list(x_size)); r_ssize start = 0; for (r_ssize i = 0; i < x_size; ++i) { r_obj* elt = v_x[i]; if (elt == r_null) { // Insert `integer()` index for `NULL`, don't advance `start` r_list_poke(indices, i, r_globals.empty_int); continue; } ++start; r_obj* index = r_alloc_integer(elt_size); r_list_poke(indices, i, index); int* v_index = r_int_begin(index); for (r_ssize j = 0; j < elt_size; ++j) { v_index[j] = start + x_size_used * j; } } FREE(1); return indices; } vctrs/src/ptype-common.h0000644000176200001440000000071215120272011014771 0ustar liggesusers#ifndef VCTRS_PTYPE_COMMON_H #define VCTRS_PTYPE_COMMON_H #include "vctrs-core.h" #include "ptype2.h" #include "unspecified.h" #include "utils.h" static inline bool vec_is_common_class_fallback(r_obj* ptype) { return r_inherits(ptype, c_strs_vctrs_common_class_fallback); } r_obj* vec_ptype_common( r_obj* dots, r_obj* ptype, enum ptype_finalise finalise, enum s3_fallback s3_fallback, struct vctrs_arg* p_arg, struct r_lazy call ); #endif vctrs/src/slice-interleave.h0000644000176200001440000000047715075743736015637 0ustar liggesusers#ifndef VCTRS_SLICE_INTERLEAVE_H #define VCTRS_SLICE_INTERLEAVE_H #include "vctrs-core.h" #include "names.h" r_obj* list_interleave( r_obj* x, r_ssize size, r_obj* ptype, r_obj* name_spec, const struct name_repair_opts* p_name_repair_opts, struct vctrs_arg* p_x_arg, struct r_lazy error_call ); #endif vctrs/NAMESPACE0000644000176200001440000004641515154276515012657 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method("!",vctrs_vctr) S3method("!=",vctrs_vctr) S3method("$",vctrs_list_of) S3method("$",vctrs_rcrd) S3method("$",vctrs_sclr) S3method("$",vctrs_vctr) S3method("$<-",vctrs_list_of) S3method("$<-",vctrs_rcrd) S3method("$<-",vctrs_sclr) S3method("$<-",vctrs_vctr) S3method("%%",vctrs_vctr) S3method("%/%",vctrs_vctr) S3method("&",vctrs_vctr) S3method("*",vctrs_vctr) S3method("+",vctrs_vctr) S3method("-",vctrs_vctr) S3method("/",vctrs_vctr) S3method("<",vctrs_vctr) S3method("<=",vctrs_vctr) S3method("==",vctrs_vctr) S3method(">",vctrs_vctr) S3method(">=",vctrs_vctr) S3method("[",vctrs_rcrd) S3method("[",vctrs_sclr) S3method("[",vctrs_unspecified) S3method("[",vctrs_vctr) S3method("[<-",vctrs_list_of) S3method("[<-",vctrs_rcrd) S3method("[<-",vctrs_sclr) S3method("[<-",vctrs_vctr) S3method("[[",vctrs_list_of) S3method("[[",vctrs_rcrd) S3method("[[",vctrs_sclr) S3method("[[",vctrs_vctr) S3method("[[<-",vctrs_list_of) S3method("[[<-",vctrs_rcrd) S3method("[[<-",vctrs_sclr) S3method("[[<-",vctrs_vctr) S3method("^",vctrs_vctr) S3method("dim<-",vctrs_sclr) S3method("dim<-",vctrs_vctr) S3method("dimnames<-",vctrs_sclr) S3method("dimnames<-",vctrs_vctr) S3method("is.na<-",vctrs_sclr) S3method("is.na<-",vctrs_vctr) S3method("length<-",vctrs_rcrd) S3method("length<-",vctrs_vctr) S3method("levels<-",vctrs_sclr) S3method("levels<-",vctrs_vctr) S3method("names<-",vctrs_rcrd) S3method("names<-",vctrs_sclr) S3method("names<-",vctrs_vctr) S3method("|",vctrs_vctr) S3method(Complex,vctrs_sclr) S3method(Math,vctrs_sclr) S3method(Math,vctrs_vctr) S3method(Ops,vctrs_sclr) S3method(Summary,vctrs_sclr) S3method(Summary,vctrs_vctr) S3method(anyDuplicated,vctrs_sclr) S3method(anyDuplicated,vctrs_vctr) S3method(anyNA,vctrs_vctr) S3method(as.Date,vctrs_sclr) S3method(as.Date,vctrs_vctr) S3method(as.POSIXct,vctrs_sclr) S3method(as.POSIXct,vctrs_vctr) S3method(as.POSIXlt,vctrs_vctr) S3method(as.character,vctrs_list_of) S3method(as.character,vctrs_sclr) S3method(as.character,vctrs_vctr) S3method(as.data.frame,vctrs_sclr) S3method(as.data.frame,vctrs_vctr) S3method(as.double,vctrs_sclr) S3method(as.double,vctrs_vctr) S3method(as.integer,vctrs_sclr) S3method(as.integer,vctrs_vctr) S3method(as.list,vctrs_list_of) S3method(as.list,vctrs_sclr) S3method(as.list,vctrs_vctr) S3method(as.logical,vctrs_sclr) S3method(as.logical,vctrs_vctr) S3method(as_list_of,list) S3method(as_list_of,vctrs_list_of) S3method(c,vctrs_sclr) S3method(c,vctrs_vctr) S3method(can_fall_back,"vctrs:::common_class_fallback") S3method(can_fall_back,data.frame) S3method(can_fall_back,default) S3method(can_fall_back,ts) S3method(can_fall_back,vctrs_vctr) S3method(cnd_body,vctrs_error_cast_lossy) S3method(cnd_body,vctrs_error_combine_unmatched) S3method(cnd_body,vctrs_error_incompatible_size) S3method(cnd_body,vctrs_error_matches_incomplete) S3method(cnd_body,vctrs_error_matches_multiple) S3method(cnd_body,vctrs_error_matches_nothing) S3method(cnd_body,vctrs_error_matches_relationship_many_to_one) S3method(cnd_body,vctrs_error_matches_relationship_one_to_many) S3method(cnd_body,vctrs_error_matches_relationship_one_to_one) S3method(cnd_body,vctrs_error_matches_remaining) S3method(cnd_body,vctrs_error_names_cannot_be_dot_dot) S3method(cnd_body,vctrs_error_names_cannot_be_empty) S3method(cnd_body,vctrs_error_names_must_be_unique) S3method(cnd_body,vctrs_error_subscript_oob) S3method(cnd_body,vctrs_error_subscript_type) S3method(cnd_header,vctrs_error_cast_lossy) S3method(cnd_header,vctrs_error_combine_unmatched) S3method(cnd_header,vctrs_error_incompatible_size) S3method(cnd_header,vctrs_error_matches_incomplete) S3method(cnd_header,vctrs_error_matches_multiple) S3method(cnd_header,vctrs_error_matches_nothing) S3method(cnd_header,vctrs_error_matches_relationship_many_to_one) S3method(cnd_header,vctrs_error_matches_relationship_one_to_many) S3method(cnd_header,vctrs_error_matches_relationship_one_to_one) S3method(cnd_header,vctrs_error_matches_remaining) S3method(cnd_header,vctrs_error_names_cannot_be_dot_dot) S3method(cnd_header,vctrs_error_names_cannot_be_empty) S3method(cnd_header,vctrs_error_names_must_be_unique) S3method(cnd_header,vctrs_error_subscript_oob) S3method(cnd_header,vctrs_error_subscript_size) S3method(cnd_header,vctrs_error_subscript_type) S3method(diff,vctrs_vctr) S3method(duplicated,vctrs_sclr) S3method(duplicated,vctrs_vctr) S3method(format,hidden) S3method(format,vctrs_bytes) S3method(format,vctrs_group_rle) S3method(format,vctrs_list_of) S3method(format,vctrs_rcrd) S3method(format,vctrs_vctr) S3method(is.finite,vctrs_vctr) S3method(is.infinite,vctrs_vctr) S3method(is.na,vctrs_vctr) S3method(is.nan,vctrs_vctr) S3method(length,vctrs_rcrd) S3method(levels,vctrs_sclr) S3method(levels,vctrs_vctr) S3method(max,vctrs_vctr) S3method(mean,vctrs_vctr) S3method(median,vctrs_vctr) S3method(min,vctrs_vctr) S3method(na.exclude,vctrs_vctr) S3method(na.fail,vctrs_vctr) S3method(na.omit,vctrs_vctr) S3method(names,vctrs_rcrd) S3method(obj_print_data,default) S3method(obj_print_data,vctrs_list_of) S3method(obj_print_footer,default) S3method(obj_print_header,default) S3method(obj_print_header,vctrs_group_rle) S3method(obj_str_data,default) S3method(obj_str_data,vctrs_rcrd) S3method(obj_str_footer,default) S3method(obj_str_header,default) S3method(print,vctrs_bytes) S3method(print,vctrs_sclr) S3method(print,vctrs_unspecified) S3method(print,vctrs_vctr) S3method(quantile,vctrs_vctr) S3method(range,vctrs_vctr) S3method(rep,vctrs_rcrd) S3method(rep,vctrs_vctr) S3method(str,vctrs_vctr) S3method(summary,vctrs_sclr) S3method(summary,vctrs_vctr) S3method(t,vctrs_sclr) S3method(t,vctrs_vctr) S3method(unique,vctrs_sclr) S3method(unique,vctrs_vctr) S3method(vec_arith,Date) S3method(vec_arith,POSIXct) S3method(vec_arith,POSIXlt) S3method(vec_arith,default) S3method(vec_arith,difftime) S3method(vec_arith,factor) S3method(vec_arith,logical) S3method(vec_arith,numeric) S3method(vec_arith.Date,Date) S3method(vec_arith.Date,POSIXct) S3method(vec_arith.Date,POSIXlt) S3method(vec_arith.Date,default) S3method(vec_arith.Date,difftime) S3method(vec_arith.Date,numeric) S3method(vec_arith.POSIXct,Date) S3method(vec_arith.POSIXct,POSIXct) S3method(vec_arith.POSIXct,POSIXlt) S3method(vec_arith.POSIXct,default) S3method(vec_arith.POSIXct,difftime) S3method(vec_arith.POSIXct,numeric) S3method(vec_arith.POSIXlt,Date) S3method(vec_arith.POSIXlt,POSIXct) S3method(vec_arith.POSIXlt,POSIXlt) S3method(vec_arith.POSIXlt,default) S3method(vec_arith.POSIXlt,difftime) S3method(vec_arith.POSIXlt,numeric) S3method(vec_arith.difftime,Date) S3method(vec_arith.difftime,MISSING) S3method(vec_arith.difftime,POSIXct) S3method(vec_arith.difftime,POSIXlt) S3method(vec_arith.difftime,default) S3method(vec_arith.difftime,difftime) S3method(vec_arith.difftime,numeric) S3method(vec_arith.logical,default) S3method(vec_arith.logical,logical) S3method(vec_arith.logical,numeric) S3method(vec_arith.numeric,Date) S3method(vec_arith.numeric,POSIXct) S3method(vec_arith.numeric,POSIXlt) S3method(vec_arith.numeric,default) S3method(vec_arith.numeric,difftime) S3method(vec_arith.numeric,logical) S3method(vec_arith.numeric,numeric) S3method(vec_cast,Date) S3method(vec_cast,POSIXct) S3method(vec_cast,POSIXlt) S3method(vec_cast,character) S3method(vec_cast,character.factor) S3method(vec_cast,character.ordered) S3method(vec_cast,complex) S3method(vec_cast,data.frame) S3method(vec_cast,data.frame.data.table) S3method(vec_cast,data.table.data.frame) S3method(vec_cast,data.table.data.table) S3method(vec_cast,difftime) S3method(vec_cast,double) S3method(vec_cast,double.exclude) S3method(vec_cast,double.omit) S3method(vec_cast,exclude.double) S3method(vec_cast,exclude.exclude) S3method(vec_cast,exclude.integer) S3method(vec_cast,factor) S3method(vec_cast,factor.character) S3method(vec_cast,factor.factor) S3method(vec_cast,integer) S3method(vec_cast,integer.exclude) S3method(vec_cast,integer.omit) S3method(vec_cast,integer64) S3method(vec_cast,list) S3method(vec_cast,list.vctrs_list_of) S3method(vec_cast,logical) S3method(vec_cast,omit.double) S3method(vec_cast,omit.integer) S3method(vec_cast,omit.omit) S3method(vec_cast,ordered) S3method(vec_cast,ordered.character) S3method(vec_cast,ordered.ordered) S3method(vec_cast,raw) S3method(vec_cast,table.table) S3method(vec_cast,vctrs_list_of) S3method(vec_cast,vctrs_list_of.list) S3method(vec_cast,vctrs_rcrd) S3method(vec_cast,vctrs_rcrd.vctrs_rcrd) S3method(vec_cast,vctrs_vctr) S3method(vec_cast.Date,Date) S3method(vec_cast.Date,POSIXct) S3method(vec_cast.Date,POSIXlt) S3method(vec_cast.POSIXct,Date) S3method(vec_cast.POSIXct,POSIXct) S3method(vec_cast.POSIXct,POSIXlt) S3method(vec_cast.POSIXlt,Date) S3method(vec_cast.POSIXlt,POSIXct) S3method(vec_cast.POSIXlt,POSIXlt) S3method(vec_cast.character,character) S3method(vec_cast.complex,complex) S3method(vec_cast.complex,double) S3method(vec_cast.complex,integer) S3method(vec_cast.complex,logical) S3method(vec_cast.data.frame,data.frame) S3method(vec_cast.difftime,difftime) S3method(vec_cast.double,double) S3method(vec_cast.double,integer) S3method(vec_cast.double,integer64) S3method(vec_cast.double,logical) S3method(vec_cast.integer,double) S3method(vec_cast.integer,integer) S3method(vec_cast.integer,integer64) S3method(vec_cast.integer,logical) S3method(vec_cast.integer64,double) S3method(vec_cast.integer64,integer) S3method(vec_cast.integer64,integer64) S3method(vec_cast.integer64,logical) S3method(vec_cast.list,list) S3method(vec_cast.logical,double) S3method(vec_cast.logical,integer) S3method(vec_cast.logical,integer64) S3method(vec_cast.logical,logical) S3method(vec_cast.raw,raw) S3method(vec_cast.vctrs_list_of,vctrs_list_of) S3method(vec_cbind_frame_ptype,default) S3method(vec_cbind_frame_ptype,sf) S3method(vec_math,Date) S3method(vec_math,POSIXct) S3method(vec_math,POSIXlt) S3method(vec_math,default) S3method(vec_math,factor) S3method(vec_math,vctrs_rcrd) S3method(vec_proxy,"vctrs:::common_class_fallback") S3method(vec_proxy,AsIs) S3method(vec_proxy,Date) S3method(vec_proxy,POSIXct) S3method(vec_proxy,POSIXlt) S3method(vec_proxy,default) S3method(vec_proxy,exclude) S3method(vec_proxy,factor) S3method(vec_proxy,numeric_version) S3method(vec_proxy,omit) S3method(vec_proxy,ordered) S3method(vec_proxy,vctrs_list_of) S3method(vec_proxy,vctrs_rcrd) S3method(vec_proxy,vctrs_vctr) S3method(vec_proxy_compare,AsIs) S3method(vec_proxy_compare,POSIXlt) S3method(vec_proxy_compare,array) S3method(vec_proxy_compare,default) S3method(vec_proxy_compare,list) S3method(vec_proxy_compare,raw) S3method(vec_proxy_equal,AsIs) S3method(vec_proxy_equal,POSIXlt) S3method(vec_proxy_equal,array) S3method(vec_proxy_equal,default) S3method(vec_proxy_equal,integer64) S3method(vec_proxy_equal,numeric_version) S3method(vec_proxy_order,AsIs) S3method(vec_proxy_order,array) S3method(vec_proxy_order,default) S3method(vec_proxy_order,list) S3method(vec_proxy_order,raw) S3method(vec_ptype,POSIXlt) S3method(vec_ptype2,AsIs) S3method(vec_ptype2,Date) S3method(vec_ptype2,POSIXct) S3method(vec_ptype2,POSIXlt) S3method(vec_ptype2,character) S3method(vec_ptype2,character.factor) S3method(vec_ptype2,character.ordered) S3method(vec_ptype2,complex) S3method(vec_ptype2,data.frame) S3method(vec_ptype2,data.frame.data.table) S3method(vec_ptype2,data.table.data.frame) S3method(vec_ptype2,data.table.data.table) S3method(vec_ptype2,difftime) S3method(vec_ptype2,double) S3method(vec_ptype2,double.exclude) S3method(vec_ptype2,double.omit) S3method(vec_ptype2,exclude.double) S3method(vec_ptype2,exclude.exclude) S3method(vec_ptype2,exclude.integer) S3method(vec_ptype2,factor) S3method(vec_ptype2,factor.character) S3method(vec_ptype2,factor.factor) S3method(vec_ptype2,factor.ordered) S3method(vec_ptype2,integer) S3method(vec_ptype2,integer.exclude) S3method(vec_ptype2,integer.omit) S3method(vec_ptype2,integer64) S3method(vec_ptype2,list) S3method(vec_ptype2,list.vctrs_list_of) S3method(vec_ptype2,logical) S3method(vec_ptype2,omit.double) S3method(vec_ptype2,omit.integer) S3method(vec_ptype2,omit.omit) S3method(vec_ptype2,ordered) S3method(vec_ptype2,ordered.character) S3method(vec_ptype2,ordered.factor) S3method(vec_ptype2,ordered.ordered) S3method(vec_ptype2,raw) S3method(vec_ptype2,table.table) S3method(vec_ptype2,vctrs_list_of) S3method(vec_ptype2,vctrs_list_of.list) S3method(vec_ptype2.AsIs,AsIs) S3method(vec_ptype2.Date,Date) S3method(vec_ptype2.Date,POSIXct) S3method(vec_ptype2.Date,POSIXlt) S3method(vec_ptype2.POSIXct,Date) S3method(vec_ptype2.POSIXct,POSIXct) S3method(vec_ptype2.POSIXct,POSIXlt) S3method(vec_ptype2.POSIXlt,Date) S3method(vec_ptype2.POSIXlt,POSIXct) S3method(vec_ptype2.POSIXlt,POSIXlt) S3method(vec_ptype2.character,character) S3method(vec_ptype2.complex,complex) S3method(vec_ptype2.complex,double) S3method(vec_ptype2.complex,integer) S3method(vec_ptype2.data.frame,data.frame) S3method(vec_ptype2.difftime,difftime) S3method(vec_ptype2.double,complex) S3method(vec_ptype2.double,double) S3method(vec_ptype2.double,integer) S3method(vec_ptype2.double,logical) S3method(vec_ptype2.integer,complex) S3method(vec_ptype2.integer,double) S3method(vec_ptype2.integer,integer) S3method(vec_ptype2.integer,integer64) S3method(vec_ptype2.integer,logical) S3method(vec_ptype2.integer64,integer) S3method(vec_ptype2.integer64,integer64) S3method(vec_ptype2.integer64,logical) S3method(vec_ptype2.list,list) S3method(vec_ptype2.logical,double) S3method(vec_ptype2.logical,integer) S3method(vec_ptype2.logical,integer64) S3method(vec_ptype2.logical,logical) S3method(vec_ptype2.raw,raw) S3method(vec_ptype2.vctrs_list_of,vctrs_list_of) S3method(vec_ptype_abbr,"NULL") S3method(vec_ptype_abbr,AsIs) S3method(vec_ptype_abbr,Date) S3method(vec_ptype_abbr,POSIXct) S3method(vec_ptype_abbr,POSIXlt) S3method(vec_ptype_abbr,data.frame) S3method(vec_ptype_abbr,data.table) S3method(vec_ptype_abbr,default) S3method(vec_ptype_abbr,difftime) S3method(vec_ptype_abbr,factor) S3method(vec_ptype_abbr,integer64) S3method(vec_ptype_abbr,ordered) S3method(vec_ptype_abbr,table) S3method(vec_ptype_abbr,vctrs_list_of) S3method(vec_ptype_abbr,vctrs_unspecified) S3method(vec_ptype_finalise,default) S3method(vec_ptype_full,"NULL") S3method(vec_ptype_full,AsIs) S3method(vec_ptype_full,Date) S3method(vec_ptype_full,POSIXct) S3method(vec_ptype_full,POSIXlt) S3method(vec_ptype_full,default) S3method(vec_ptype_full,difftime) S3method(vec_ptype_full,factor) S3method(vec_ptype_full,integer64) S3method(vec_ptype_full,ordered) S3method(vec_ptype_full,table) S3method(vec_ptype_full,vctrs_list_of) S3method(vec_restore,AsIs) S3method(vec_restore,Date) S3method(vec_restore,POSIXct) S3method(vec_restore,POSIXlt) S3method(vec_restore,data.frame) S3method(vec_restore,default) S3method(vec_restore,exclude) S3method(vec_restore,factor) S3method(vec_restore,omit) S3method(vec_restore,ordered) S3method(vec_restore,table) S3method(vec_restore,vctrs_rcrd) S3method(vec_restore,vctrs_vctr) S3method(xtfrm,vctrs_sclr) S3method(xtfrm,vctrs_vctr) export("%0%") export("field<-") export("vec_slice<-") export(MISSING) export(allow_lossy_cast) export(as_list_of) export(data_frame) export(df_cast) export(df_list) export(df_ptype2) export(field) export(fields) export(is_list_of) export(list_all_recyclable) export(list_all_size) export(list_all_vectors) export(list_check_all_recyclable) export(list_check_all_size) export(list_check_all_vectors) export(list_combine) export(list_drop_empty) export(list_of) export(list_of_ptype) export(list_of_size) export(list_of_transpose) export(list_sizes) export(list_unchop) export(maybe_lossy_cast) export(n_fields) export(new_data_frame) export(new_date) export(new_datetime) export(new_duration) export(new_factor) export(new_list_of) export(new_ordered) export(new_rcrd) export(new_vctr) export(num_as_location) export(num_as_location2) export(obj_check_list) export(obj_check_vector) export(obj_is_list) export(obj_is_vector) export(obj_print) export(obj_print_data) export(obj_print_footer) export(obj_print_header) export(obj_str) export(obj_str_data) export(obj_str_footer) export(obj_str_header) export(s3_register) export(stop_incompatible_cast) export(stop_incompatible_op) export(stop_incompatible_size) export(stop_incompatible_type) export(tib_cast) export(tib_ptype2) export(unspecified) export(vec_any_missing) export(vec_arith) export(vec_arith.Date) export(vec_arith.POSIXct) export(vec_arith.POSIXlt) export(vec_arith.difftime) export(vec_arith.logical) export(vec_arith.numeric) export(vec_arith_base) export(vec_as_index) export(vec_as_location) export(vec_as_location2) export(vec_as_names) export(vec_as_names_legacy) export(vec_as_subscript) export(vec_as_subscript2) export(vec_assert) export(vec_assign) export(vec_c) export(vec_case_when) export(vec_cast) export(vec_cast.Date) export(vec_cast.POSIXct) export(vec_cast.POSIXlt) export(vec_cast.character) export(vec_cast.complex) export(vec_cast.data.frame) export(vec_cast.difftime) export(vec_cast.double) export(vec_cast.factor) export(vec_cast.integer) export(vec_cast.integer64) export(vec_cast.list) export(vec_cast.logical) export(vec_cast.ordered) export(vec_cast.raw) export(vec_cast.vctrs_list_of) export(vec_cast_common) export(vec_cbind) export(vec_cbind_frame_ptype) export(vec_check_list) export(vec_check_recyclable) export(vec_check_size) export(vec_chop) export(vec_compare) export(vec_count) export(vec_data) export(vec_default_cast) export(vec_default_ptype2) export(vec_detect_complete) export(vec_detect_missing) export(vec_duplicate_any) export(vec_duplicate_detect) export(vec_duplicate_id) export(vec_empty) export(vec_equal) export(vec_equal_na) export(vec_expand_grid) export(vec_fill_missing) export(vec_group_id) export(vec_group_loc) export(vec_group_rle) export(vec_identify_runs) export(vec_if_else) export(vec_in) export(vec_init) export(vec_init_along) export(vec_interleave) export(vec_is) export(vec_is_empty) export(vec_is_list) export(vec_locate_matches) export(vec_locate_sorted_groups) export(vec_match) export(vec_math) export(vec_math_base) export(vec_names) export(vec_names2) export(vec_order) export(vec_pall) export(vec_pany) export(vec_proxy) export(vec_proxy_compare) export(vec_proxy_equal) export(vec_proxy_order) export(vec_ptype) export(vec_ptype2) export(vec_ptype2.AsIs) export(vec_ptype2.Date) export(vec_ptype2.POSIXct) export(vec_ptype2.POSIXlt) export(vec_ptype2.character) export(vec_ptype2.complex) export(vec_ptype2.data.frame) export(vec_ptype2.difftime) export(vec_ptype2.double) export(vec_ptype2.factor) export(vec_ptype2.integer) export(vec_ptype2.integer64) export(vec_ptype2.list) export(vec_ptype2.logical) export(vec_ptype2.ordered) export(vec_ptype2.raw) export(vec_ptype2.vctrs_list_of) export(vec_ptype_abbr) export(vec_ptype_common) export(vec_ptype_finalise) export(vec_ptype_full) export(vec_ptype_show) export(vec_rank) export(vec_rbind) export(vec_recode_values) export(vec_recycle) export(vec_recycle_common) export(vec_rep) export(vec_rep_each) export(vec_repeat) export(vec_replace_values) export(vec_replace_when) export(vec_restore) export(vec_run_sizes) export(vec_seq_along) export(vec_set_difference) export(vec_set_intersect) export(vec_set_names) export(vec_set_symmetric_difference) export(vec_set_union) export(vec_size) export(vec_size_common) export(vec_slice) export(vec_sort) export(vec_split) export(vec_type) export(vec_type2) export(vec_type_common) export(vec_unchop) export(vec_unique) export(vec_unique_count) export(vec_unique_loc) export(vec_unrep) import(rlang) importFrom(stats,median) importFrom(stats,na.exclude) importFrom(stats,na.fail) importFrom(stats,na.omit) importFrom(stats,quantile) useDynLib(vctrs, .registration = TRUE) vctrs/LICENSE0000644000176200001440000000005314401442234012414 0ustar liggesusersYEAR: 2023 COPYRIGHT HOLDER: vctrs authors vctrs/NEWS.md0000644000176200001440000017226015157322033012522 0ustar liggesusers# vctrs 0.7.2 * `vec_restore()`'s default method now consistently clears unrecognized attributes from `x`. Previously: ```r x <- structure(1, foo = "bar") to <- 2 vec_restore(x, to) #> structure(1, foo = "bar") x <- structure(1, foo = "bar") to <- structure(2, a = "b") vec_restore(x, to) #> structure(1, a = "b") ``` Now both `vec_restore()` calls consistently clear the unrecognized `foo` attribute (#2157). * `vec_equal()` now considers two objects with the same attribute names and values equivalent even if the attribute ordering is different. This reflects the fact that attributes are generally viewed as a map rather than an ordered list (#2156). * Pairlist tags and attribute names are now incorporated during object hashing. In practice this can come up when list elements are hashed inside functions like `vec_unique()` (#2154). ```r # Only difference is the attribute name x <- list( structure(1, foo = 2), structure(1, bar = 2) ) # Used to return `x[1]`, now returns `x` vec_unique(x) ``` * vctrs is now fully compliant with the R C API. # vctrs 0.7.1 * Fixed some protection issues discovered by rchk. # vctrs 0.7.0 ## Features * New `vec_if_else()` for performing a vectorized if-else. It is exactly the same as `dplyr::if_else()`, but much faster and more memory efficient (#2030). * New `vec_case_when()` and `vec_replace_when()` for recoding and replacing using logical conditions (#2024). * New `vec_recode_values()` and `vec_replace_values()` for recoding and replacing values. In particular, this makes it easy to recode a vector using a lookup table (#2027). * New `list_combine()` for combining a list of vectors together according to a set of `indices`. We now recommend using: * `list_combine(x, indices = indices, size = size)` over `list_unchop(x, indices = indices)` * `vec_c(!!!x)` over `list_unchop(x)` `list_unchop()` is not being deprecated, we just no longer feel like it has the best name or the most correct API, and all future work will be put into improving `list_combine()`. `list_combine()` is already much more powerful than `list_unchop()`, with new `unmatched`, `multiple`, and `slice_x` (like `slice_value` of `vec_assign()`) arguments and the ability to provide logical `indices`. `list_combine()` is the engine that powers `vec_case_when()`, `vec_replace_when()`, `vec_recode_values()`, `vec_replace_values()`, and parts of `vec_if_else()`. * New `vec_pany()` and `vec_pall()`, parallel variants of `any()` and `all()` (in the same way that `pmin()` and `pmax()` are parallel variants of `min()` and `max()`). * New `list_of_transpose()` for transposing a `` (#2059). * New `list_of_ptype()` and `list_of_size()` accessors. * New `vec_check_recyclable()`, `list_all_recyclable()`, and `list_check_all_recyclable()`. * New `slice_value` argument for `vec_assign()` to optionally slice `value` by `i` before performing the assignment. It is an optimized form of `vec_slice(x, i) <- vec_slice(value, i)` that avoids materializing `vec_slice(value, i)` (#2009). * New `.size` argument for `list_of()` that can restrict the element size in addition to the element type. For example: ``` r # Restricts the element type, but not the size (default behavior) list_of(1:2, 3:4, .ptype = integer(), .size = zap()) # Restricts the element size, but not the type list_of(1:2, 3:4, .ptype = zap(), .size = 2) # Restricts the element type and size list_of(1:2, 3:4, .ptype = integer(), .size = 2) ``` * New `.name_spec = "inner"` option for `vec_c()`, `list_unchop()`, and `vec_rbind()`. This efficiently ignores all outer names, while retaining any inner names (#1988). * New `allow_null` argument for `list_all_vectors()`, `list_all_size()`, `list_check_all_vectors()`, and `list_check_all_size()`, which skips over `NULL` when performing their respective check (#1762). * New `.size` and `.error_call` arguments for `vec_interleave()`. * New `.finalise` argument for `vec_ptype_common()` that defaults to `TRUE`. Setting this to `FALSE` lets you opt out of prototype finalisation, which allows `vec_ptype_common()` to act like `vec_ptype()` and `vec_ptype2()`, which don't finalise. This can be useful in some advanced common type determination cases (#2100). * The following functions are no longer experimental: * `vec_fill_missing()` * `vec_group_id()` * `vec_group_loc()` * `vec_group_rle()` * `vec_locate_matches()` ## Bug fixes * data.table's `IDate` class now has `vec_proxy()` and `vec_restore()` methods, fixing a number of issues with that class (#1549, #1961, #1972, #1781). * `vec_detect_complete(NULL)` now returns `logical()`, consistent with `vec_detect_missing(NULL)` (#1916). * `vec_assign()` no longer modifies `POSIXlt` and `vctrs_rcrd` types in place (#1951). * `vec_interleave()` now reports the correct index in errors when `NULL`s are present. * `list_unchop()` now assigns names correctly when overlapping `indices` are involved (#2019). * `list_unchop()` now works in an edge case with a single `NA` recycled to size 0 (#1989). * `list_unchop()` now correctly respects `indices` when combining fallback data frame columns (#1975). * `vec_locate_sorted_groups()` and `vec_order_radix()` no longer crash on columns of type complex (tidyverse/dplyr#7708). * Hashing is now supported for lists containing complex or raw vectors, enabling functions like `vec_unique_loc()` to work on these objects (#1992, #2046). * `obj_check_vector()` now throws a clearer error message. In particular, special info bullets have been added that link out to FAQ pages and explain common issues around incompatible S3 lists and data frames (#2061). * `vec_rank()` now throws an improved error on non-vector types, like `NULL` (#1967). * `vec_ptype_common()` now reports more accurate error argument names (#2048). * Fixed the C level signature for the `exp_short_init_compact_seq()` callable. * Methods for the deprecated testthat function `is_informative_error()` have been removed (#2089). ## Performance * `vec_c()`, `list_unchop()`, `vec_size_common()`, `vec_recycle_common()`, `vec_ptype_common()`, `list_sizes()`, `list_check_all_vectors()`, and other vctrs functions that take a list of objects are now more performant, particularly when many small objects are provided (#2034, #2035, #2041, #2042, #2043, #2044, #2070). * `vec_match()`, `vec_in()`, `vec_group_loc()`, `vec_count()`, `vec_unique()` and other functions backed by a dictionary based implementation are often significantly faster, depending on the exact inputs used (#1976). * `vec_equal()` now efficiently internally recycles `x` and `y` elements of size 1 (#2028). * `list_unchop()` now efficiently internally recycles `x` elements of size 1 (#2013). * `vec_assign()` and `vec_slice<-()` now efficiently internally recycle `value` of size 1. * `vec_assign()` and `vec_slice<-()` are now more efficient with logical `i` (#2009). * `vec_cast()` with arrays no longer clones when no casting is required (#2006). ## Breaking changes * R >=4.0.0 is now required. This is still more permissive than the general tidyverse policy of supporting the [5 most recent versions of R](https://tidyverse.org/blog/2019/04/r-version-support/). * `obj_is_list()` now returns `FALSE` for list arrays. Functions such as `list_drop_empty()` and `list_combine()` validate their input using `obj_is_list()`, but aren't well defined on list arrays. * Assigning `NULL` into a `` via `x[[i]] <- NULL` now shortens the list to better align with base R and the existing `$<-` and `[<-` methods (#2112). * `as_list_of()` on an existing `` no longer has a `.ptype` argument for changing the type on the fly, as this feels incompatible with the new system that allows restricting both the type and size. If you really need this, coerce to a bare list with `as.list()` first, then coerce back to a `` using the `` method of `as_list_of()`. * Experimental "partial" type support has been removed. This idea never panned out and was not widely used. The following functions have been removed (#2101): * `is_partial()` * `new_partial()` * `partial_factor()` * `partial_frame()` * The deprecated C callable for `vec_is_vector()` has been removed. # vctrs 0.6.5 * Internal changes requested by CRAN around C level format strings (#1896). * Fixed tests related to changes to `dim<-()` in R-devel (#1889). # vctrs 0.6.4 * Fixed a performance issue with `vec_c()` and ALTREP vectors (in particular, the new ALTREP list vectors in R-devel) (#1884). * Fixed an issue with complex vector tests related to changes in R-devel (#1883). * Added a class to the `vec_locate_matches()` error that is thrown when an overflow would otherwise occur (#1845). * Fixed an issue with `vec_rank()` and 0-column data frames (#1863). # vctrs 0.6.3 * Fixed an issue where certain ALTREP row names were being materialized when passed to `new_data_frame()`. We've fixed this by removing a safeguard in `new_data_frame()` that performed a compatibility check when both `n` and `row.names` were provided. Because this is a low level function designed for performance, it is up to the caller to ensure these inputs are compatible (tidyverse/dplyr#6596). * Fixed an issue where `vec_set_*()` used with data frames could accidentally return an object with the type of the proxy rather than the type of the original inputs (#1837). * Fixed a rare `vec_locate_matches()` bug that could occur when using a max/min `filter` (tidyverse/dplyr#6835). # vctrs 0.6.2 * Fixed conditional S3 registration to avoid a CRAN check NOTE that appears in R >=4.3.0 (#1832). * Fixed tests to maintain compatibility with the next version of waldo (#1829). # vctrs 0.6.1 * Fixed a test related to `c.sfc()` changes in sf 1.0-10 (#1817). # vctrs 0.6.0 * New `vec_run_sizes()` for computing the size of each run within a vector. It is identical to the `times` column from `vec_unrep()`, but is faster if you don't need the run key (#1210). * New `sizes` argument to `vec_chop()` which allows you to partition a vector using an integer vector describing the size of each expected slice. It is particularly useful in combination with `vec_run_sizes()` and `list_sizes()` (#1210, #1598). * New `obj_is_vector()`, `obj_check_vector()`, and `vec_check_size()` validation helpers. We believe these are a better approach to vector validation than `vec_assert()` and `vec_is()`, which have been marked as questioning because the semantics of their `ptype` arguments are hard to define and can often be replaced by `vec_cast()` or a type predicate function like `rlang::is_logical()` (#1784). * `vec_is_list()` and `vec_check_list()` have been renamed to `obj_is_list()` and `obj_check_list()`, in line with the new `obj_is_vector()` helper. The old functions have been silently deprecated, but an official deprecation process will start in the next vctrs release (#1803). * `vec_locate_matches()` gains a new `relationship` argument that holistically handles multiple matches between `needles` and `haystack`. In particular, `relationship = "many-to-one"` replaces `multiple = "error"` and `multiple = "warning"`, which have been removed from the documentation and silently soft-deprecated. Official deprecation for those options will start in a future release (#1791). * `vec_locate_matches()` has changed its default `needles_arg` and `haystack_arg` values from `""` to `"needles"` and `"haystack"`, respectively. This generally generates more informative error messages (#1792). * `vec_chop()` has gained empty `...` between `x` and the optional `indices` argument. For backwards compatibility, supplying `vec_chop(x, indices)` without naming `indices` still silently works, but will be deprecated in a future release (#1813). * `vec_slice()` has gained an `error_call` argument (#1785). * The `numeric_version` type from base R is now better supported in equality, comparison, and order based operations (tidyverse/dplyr#6680). * R >=3.5.0 is now explicitly required. This is in line with the tidyverse policy of supporting the [5 most recent versions of R](https://tidyverse.org/blog/2019/04/r-version-support/). # vctrs 0.5.2 * New `vec_expand_grid()`, which is a lower level helper that is similar to `tidyr::expand_grid()` (#1325). * New `vec_set_intersect()`, `vec_set_difference()`, `vec_set_union()`, and `vec_set_symmetric_difference()` which compute set operations like `intersect()`, `setdiff()`, and `union()`, but the vctrs variants don't strip attributes and work with data frames (#1755, #1765). * `vec_identify_runs()` is now faster when used with data frames (#1684). * The maximum load factor of the internal dictionary was reduced from 77% to 50%, which improves performance of functions like `vec_match()`, `vec_set_intersect()`, and `vec_unique()` in some cases (#1760). * Fixed a bug with the internal `vec_order_radix()` function related to matrix columns (#1753). # vctrs 0.5.1 * Fix for CRAN checks. # vctrs 0.5.0 * vctrs is now compliant with `-Wstrict-prototypes` as requested by CRAN (#1729). * `vec_ptype2()` now consistently falls back to bare data frame in case of incompatible data frame subclasses. This is part of a general move towards relaxed coercion rules. * Common type and cast errors now inherit from `"vctrs_error_ptype2"` and `"vctrs_error_cast"` respectively. They are still both subclasses from `"vctrs_error_incompatible_type"` (which used to be their most specific class and is now a parent class). * New `list_all_size()` and `list_check_all_size()` to quickly determine if a list contains elements of a particular `size` (#1582). * `list_unchop()` has gained empty `...` to force optional arguments to be named (#1715). * `vec_rep_each(times = 0)` now works correctly with logical vectors that are considered unspecified and with named vectors (#1673). * `list_of()` was relaxed to make it easier to combine. It is now coercible with `list()` (#1161). When incompatible `list_of()` types are combined, the result is now a bare `list()`. Following this change, the role of `list_of()` is mainly to carry type information for potential optimisations, rather than to guarantee a certain type throughout an analysis. * `validate_list_of()` has been removed. It hasn't proven to be practically useful, and isn't used by any packages on CRAN (#1697). * Directed calls to `vec_c()`, like `vec_c(.ptype = )`, now mention the position of the problematic argument when there are cast errors (#1690). * `list_unchop()` no longer drops names in some cases when `indices` were supplied (#1689). * `"unique_quiet"` and `"universal_quiet"` are newly accepted by `vec_as_names(repair =)` and `vec_names2(repair =)`. These options exist to help users who call these functions indirectly, via another function which only exposes `repair` but not `quiet`. Specifying `repair = "unique_quiet"` is like specifying `repair = "unique", quiet = TRUE`. When the `"*_quiet"` options are used, any setting of `quiet` is silently overridden (@jennybc, #1629). `"unique_quiet"` and `"universal_quiet"` are also newly accepted for the name repair argument of several other functions that do not expose a `quiet` argument: `data_frame()`, `df_list()`, `vec_c()`, `list_unchop()`, `vec_interleave()`, `vec_rbind()`, and `vec_cbind()` (@jennybc, #1716). * `list_unchop()` has gained `error_call` and `error_arg` arguments (#1641, #1692). * `vec_c()` has gained `.error_call` and `.error_arg` arguments (#1641, #1692). * Improved the performance of list-of common type methods (#1686, #875). * The list-of method for `as_list_of()` now places the optional `.ptype` argument after the `...` (#1686). * `vec_rbind()` now applies `base::c()` fallback recursively within packed df-cols (#1331, #1462, #1640). * `vec_c()`, `vec_unchop()`, and `vec_rbind()` now proxy and restore recursively (#1107). This prevents `vec_restore()` from being called with partially filled vectors and improves performance (#1217, #1496). * New `vec_any_missing()` for quickly determining if a vector has any missing values (#1672). * `vec_equal_na()` has been renamed to `vec_detect_missing()` to align better with vctrs naming conventions. `vec_equal_na()` will stick around for a few minor versions, but has been formally soft-deprecated (#1672). * `vec_c(outer = c(inner = 1))` now produces correct error messages (#522). * If a data frame is returned as the proxy from `vec_proxy_equal()`, `vec_proxy_compare()`, or `vec_proxy_order()`, then the corresponding proxy function is now automatically applied recursively along all of the columns. Additionally, packed data frame columns will be unpacked, and 1 column data frames will be unwrapped. This ensures that the simplest possible types are provided to the native C algorithms, improving both correctness and performance (#1664). * When used with record vectors, `vec_proxy_compare()` and `vec_proxy_order()` now call the correct proxy function while recursing over the fields (#1664). * The experimental function `vec_list_cast()` has been removed from the package (#1382). * Native classes like dates and datetimes now accept dimensions (#1290, #1329). * `vec_compare()` now throws a more informative error when attempting to compare complex vectors (#1655). * `vec_rep()` and friends gain `error_call`, `x_arg`, and `times_arg` arguments so they can be embedded in frontends (#1303). * Record vectors now fail as expected when indexed along dimensions greater than 1 (#1295). * `vec_order()` and `vec_sort()` now have `...` between the required and optional arguments to make them easier to extend (#1647). * S3 vignette was extended to show how to make the polynomial class atomic instead of a list (#1030). * The experimental `n` argument of `vec_restore()` has been removed. It was only used to inform on the size of data frames in case a bare list is restored. It is now expected that bare lists be initialised to data frame so that the size is carried through row attributes. This makes the generic simpler and fixes some performance issues (#650). * The `anyNA()` method for `vctrs_vctr` (and thus `vctrs_list_of`) now supports the `recursive` argument (#1278). * `vec_as_location()` and `num_as_location()` have gained a `missing = "remove"` option (#1595). * `vec_as_location()` no longer matches `NA_character_` and `""` indices if those invalid names appear in `names` (#1489). * `vec_unchop()` has been renamed to `list_unchop()` to better indicate that it requires list input. `vec_unchop()` will stick around for a few minor versions, but has been formally soft-deprecated (#1209). * Lossy cast errors during scalar subscript validation now have the correct message (#1606). * Fixed confusing error message with logical `[[` subscripts (#1608). * New `vec_rank()` to compute various types of sample ranks (#1600). * `num_as_location()` now throws the right error when there are out-of-bounds negative values and `oob = "extend"` and `negative = "ignore"` are set (#1614, #1630). * `num_as_location()` now works correctly when a combination of `zero = "error"` and `negative = "invert"` are used (#1612). * `data_frame()` and `df_list()` have gained `.error_call` arguments (#1610). * `vec_locate_matches()` has gained an `error_call` argument (#1611). * `"select"` and `"relocate"` have been added as valid subscript actions to support tidyselect and dplyr (#1596). * `num_as_location()` has a new `oob = "remove"` argument to remove out-of-bounds locations (#1595). * `vec_rbind()` and `vec_cbind()` now have `.error_call` arguments (#1597). * `df_list()` has gained a new `.unpack` argument to optionally disable data frame unpacking (#1616). * `vec_check_list(arg = "")` now throws the correct error (#1604). * The `difftime` to `difftime` `vec_cast()` method now standardizes the internal storage type to double, catching potentially corrupt integer storage `difftime` vectors (#1602). * `vec_as_location2()` and `vec_as_subscript2()` more correctly utilize their `call` arguments (#1605). * `vec_count(sort = "count")` now uses a stable sorting method. This ensures that different keys with the same count are sorted in the order that they originally appeared in (#1588). * Lossy cast error conditions now show the correct message when `conditionMessage()` is called on them (#1592). * Fixed inconsistent reporting of conflicting inputs in `vec_ptype_common()` (#1570). * `vec_ptype_abbr()` and `vec_ptype_full()` now suffix 1d arrays with `[1d]`. * `vec_ptype_abbr()` and `vec_ptype_full()` methods are no longer inherited (#1549). * `vec_cast()` now throws the correct error when attempting to cast a subclassed data frame to a non-data frame type (#1568). * `vec_locate_matches()` now uses a more conservative heuristic when taking the joint ordering proxy. This allows it to work correctly with sf's sfc vectors and the classes from the bignum package (#1558). * An sfc method for `vec_proxy_order()` was added to better support the sf package. These vectors are generally treated like list-columns even though they don't explicitly have a `"list"` class, and the `vec_proxy_order()` method now forwards to the list method to reflect that (#1558). * `vec_proxy_compare()` now works correctly for raw vectors wrapped in `I()`. `vec_proxy_order()` now works correctly for raw and list vectors wrapped in `I()` (#1557). # vctrs 0.4.2 * HTML documentation fixes for CRAN checks. # vctrs 0.4.1 * OOB errors with `character()` indexes use "that don't exist" instead of "past the end" (#1543). * Fixed memory protection issues related to common type determination (#1551, tidyverse/tidyr#1348). # vctrs 0.4.0 * New experimental `vec_locate_sorted_groups()` for returning the locations of groups in sorted order. This is equivalent to, but faster than, calling `vec_group_loc()` and then sorting by the `key` column of the result. * New experimental `vec_locate_matches()` for locating where each observation in one vector matches one or more observations in another vector. It is similar to `vec_match()`, but returns all matches by default (rather than just the first), and can match on binary conditions other than equality. The algorithm is inspired by data.table's very fast binary merge procedure. * The `vec_proxy_equal()`, `vec_proxy_compare()`, and `vec_proxy_order()` methods for `vctrs_rcrd` are now applied recursively over the fields (#1503). * Lossy cast errors now inherit from incompatible type errors. * `vec_is_list()` now returns `TRUE` for `AsIs` lists (#1463). * `vec_assert()`, `vec_ptype2()`, `vec_cast()`, and `vec_as_location()` now use `caller_arg()` to infer a default `arg` value from the caller. This may result in unhelpful arguments being mentioned in error messages. In general, you should consider snapshotting vctrs error messages thrown in your package and supply `arg` and `call` arguments if the error context is not adequately reported to your users. * `vec_ptype_common()`, `vec_cast_common()`, `vec_size_common()`, and `vec_recycle_common()` gain `call` and `arg` arguments for specifying an error context. * `vec_compare()` can now compare zero column data frames (#1500). * `new_data_frame()` now errors on negative and missing `n` values (#1477). * `vec_order()` now correctly orders zero column data frames (#1499). * vctrs now depends on cli to help with error message generation. * New `vec_check_list()` and `list_check_all_vectors()` input checkers, and an accompanying `list_all_vectors()` predicate. * New `vec_interleave()` for combining multiple vectors together, interleaving their elements in the process (#1396). * `vec_equal_na(NULL)` now returns `logical(0)` rather than erroring (#1494). * `vec_as_location(missing = "error")` now fails with `NA` and `NA_character_` in addition to `NA_integer_` (#1420, @krlmlr). * Starting with rlang 1.0.0, errors are displayed with the contextual function call. Several vctrs operations gain a `call` argument that makes it possible to report the correct context in error messages. This concerns: - `vec_cast()` and `vec_ptype2()` - `vec_default_cast()` and `vec_default_ptype2()` - `vec_assert()` - `vec_as_names()` - `stop_` constructors like `stop_incompatible_type()` Note that default `vec_cast()` and `vec_ptype2()` methods automatically support this if they pass `...` to the corresponding `vec_default_` functions. If you throw a non-internal error from a non-default method, add a `call = caller_env()` argument in the method and pass it to `rlang::abort()`. * If `NA_character_` is specified as a name for `vctrs_vctr` objects, it is now automatically repaired to `""` (#780). * `""` is now an allowed name for `vctrs_vctr` objects and all its subclasses (`vctrs_list_of` in particular) (#780). * `list_of()` is now much faster when many values are provided. * `vec_as_location()` evaluates `arg` only in case of error, for performance (#1150, @krlmlr). * `levels.vctrs_vctr()` now returns `NULL` instead of failing (#1186, @krlmlr). * `vec_assert()` produces a more informative error when `size` is invalid (#1470). * `vec_duplicate_detect()` is a bit faster when there are many unique values. * `vec_proxy_order()` is described in `vignette("s3-vectors")` (#1373, @krlmlr). * `vec_chop()` now materializes ALTREP vectors before chopping, which is more efficient than creating many small ALTREP pieces (#1450). * New `list_drop_empty()` for removing empty elements from a list (#1395). * `list_sizes()` now propagates the names of the list onto the result. * Name repair messages are now signaled by `rlang::names_inform_repair()`. This means that the messages are now sent to stdout by default rather than to stderr, resulting in prettier messages. Additionally, name repair messages can now be silenced through the global option `rlib_name_repair_verbosity`, which is useful for testing purposes. See `?names_inform_repair` for more information (#1429). * `vctrs_vctr` methods for `na.omit()`, `na.exclude()`, and `na.fail()` have been added (#1413). * `vec_init()` is now slightly faster (#1423). * `vec_set_names()` no longer corrupts `vctrs_rcrd` types (#1419). * `vec_detect_complete()` now computes completeness for `vctrs_rcrd` types in the same way as data frames, which means that if any field is missing, the entire record is considered incomplete (#1386). * The `na_value` argument of `vec_order()` and `vec_sort()` now correctly respect missing values in lists (#1401). * `vec_rep()` and `vec_rep_each()` are much faster for `times = 0` and `times = 1` (@mgirlich, #1392). * `vec_equal_na()` and `vec_fill_missing()` now work with integer64 vectors (#1304). * The `xtfrm()` method for vctrs_vctr objects no longer accidentally breaks ties (#1354). * `min()`, `max()` and `range()` no longer throw an error if `na.rm = TRUE` is set and all values are `NA` (@gorcha, #1357). In this case, and where an empty input is given, it will return `Inf`/`-Inf`, or `NA` if `Inf` can't be cast to the input type. * `vec_group_loc()`, used for grouping in dplyr, now correctly handles vectors with billions of elements (up to `.Machine$integer.max`) (#1133). # vctrs 0.3.8 * Compatibility with next version of rlang. # vctrs 0.3.7 * `vec_ptype_abbr()` gains arguments to control whether to indicate named vectors with a prefix (`prefix_named`) and indicate shaped vectors with a suffix (`suffix_shape`) (#781, @krlmlr). * `vec_ptype()` is now an optional _performance_ generic. It is not necessary to implement, but if your class has a static prototype, you might consider implementing a custom `vec_ptype()` method that returns a constant to improve performance in some cases (such as common type imputation). * New `vec_detect_complete()`, inspired by `stats::complete.cases()`. For most vectors, this is identical to `!vec_equal_na()`. For data frames and matrices, this detects rows that only contain non-missing values. * `vec_order()` can now order complex vectors (#1330). * Removed dependency on digest in favor of `rlang::hash()`. * Fixed an issue where `vctrs_rcrd` objects were not being proxied correctly when used as a data frame column (#1318). * `register_s3()` is now licensed with the "unlicense" which makes it very clear that it's fine to copy and paste into your own package (@maxheld83, #1254). # vctrs 0.3.6 * Fixed an issue with tibble 3.0.0 where removing column names with `names(x) <- NULL` is now deprecated (#1298). * Fixed a GCC 11 issue revealed by CRAN checks. # vctrs 0.3.5 * New experimental `vec_fill_missing()` for filling in missing values with the previous or following value. It is similar to `tidyr::fill()`, but also works with data frames and has an additional `max_fill` argument to limit the number of sequential missing values to fill. * New `vec_unrep()` to compress a vector with repeated values. It is very similar to run length encoding, and works nicely alongside `vec_rep_each()` as a way to invert the compression. * `vec_cbind()` with only empty data frames now preserves the common size of the inputs in the result (#1281). * `vec_c()` now correctly returns a named result with named empty inputs (#1263). * vctrs has been relicensed as MIT (#1259). * Functions that make comparisons within a single vector, such as `vec_unique()`, or between two vectors, such as `vec_match()`, now convert all character input to UTF-8 before making comparisons (#1246). * New `vec_identify_runs()` which returns a vector of identifiers for the elements of `x` that indicate which run of repeated values they fall in (#1081). * Fixed an encoding translation bug with lists containing data frames which have columns where `vec_size()` is different from the low level `Rf_length()` (#1233). # vctrs 0.3.4 * Fixed a GCC sanitiser error revealed by CRAN checks. # vctrs 0.3.3 * The `table` class is now implemented as a wrapper type that delegates its coercion methods. It used to be restricted to integer tables (#1190). * Named one-dimensional arrays now behave consistently with simple vectors in `vec_names()` and `vec_rbind()`. * `new_rcrd()` now uses `df_list()` to validate the fields. This makes it more flexible as the fields can now be of any type supported by vctrs, including data frames. * Thanks to the previous change the `[[` method of records now preserves list fields (#1205). * `vec_data()` now preserves data frames. This is consistent with the notion that data frames are a primitive vector type in vctrs. This shouldn't affect code that uses `[[` and `length()` to manipulate the data. On the other hand, the vctrs primitives like `vec_slice()` will now operate rowwise when `vec_data()` returns a data frame. * `outer` is now passed unrecycled to name specifications. Instead, the return value is recycled (#1099). * Name specifications can now return `NULL`. The names vector will only be allocated if the spec function returns non-`NULL` during the concatenation. This makes it possible to ignore outer names without having to create an empty names vector when there are no inner names: ``` zap_outer_spec <- function(outer, inner) if (is_character(inner)) inner # `NULL` names rather than a vector of "" names(vec_c(a = 1:2, .name_spec = zap_outer_spec)) #> NULL # Names are allocated when inner names exist names(vec_c(a = 1:2, c(b = 3L), .name_spec = zap_outer_spec)) #> [1] "" "" "b" ``` * Fixed several performance issues in `vec_c()` and `vec_unchop()` with named vectors. * The restriction that S3 lists must have a list-based proxy to be considered lists by `vec_is_list()` has been removed (#1208). * New performant `data_frame()` constructor for creating data frames in a way that follows tidyverse semantics. Among other things, inputs are recycled using tidyverse recycling rules, strings are never converted to factors, list-columns are easier to create, and unnamed data frame input is automatically spliced. * New `df_list()` for safely and consistently constructing the data structure underlying a data frame, a named list of equal-length vectors. It is useful in combination with `new_data_frame()` for creating user-friendly constructors for data frame subclasses that use the tidyverse rules for recycling and determining types. * Fixed performance issue with `vec_order()` on classed vectors which affected `dplyr::group_by()` (tidyverse/dplyr#5423). * `vec_set_names()` no longer alters the input in-place (#1194). * New `vec_proxy_order()` that provides an ordering proxy for use in `vec_order()` and `vec_sort()`. The default method falls through to `vec_proxy_compare()`. Lists are special cased, and return an integer vector proxy that orders by first appearance. * List columns in data frames are no longer comparable through `vec_compare()`. * The experimental `relax` argument has been removed from `vec_proxy_compare()`. # vctrs 0.3.2 * Fixed a performance issue in `bind_rows()` with S3 columns (#1122, #1124, #1151, tidyverse/dplyr#5327). * `vec_slice()` now checks sizes of data frame columns in case the data structure is corrupt (#552). * The native routines in vctrs now dispatch and evaluate in the vctrs namespace. This improves the continuity of evaluation in backtraces. * `new_data_frame()` is now twice as fast when `class` is supplied. * New `vec_names2()`, `vec_names()` and `vec_set_names()` (#1173). # vctrs 0.3.1 * `vec_slice()` no longer restores attributes of foreign objects for which a `[` method exist. This fixes an issue with `ts` objects which were previously incorrectly restored. * The `as.list()` method for `vctrs_rcrd` objects has been removed in favor of directly using the method for `vctrs_vctr`, which calls `vec_chop()`. * `vec_c()` and `vec_rbind()` now fall back to `base::c()` if the inputs have a common class hierarchy for which a `c()` method is implemented but no self-to-self `vec_ptype2()` method is implemented. * `vec_rbind()` now internally calls `vec_proxy()` and `vec_restore()` on the data frame common type that is used to create the output (#1109). * `vec_as_location2("0")` now works correctly (#1131). * `?reference-faq-compatibility` is a new reference guide on vctrs primitives. It includes an overview of the fallbacks to base R generics implemented in vctrs for compatibility with existing classes. * The documentation of vctrs functions now includes a Dependencies section to reference which other vctrs operations are called from that function. By following the dependencies links recursively, you will find the vctrs primitives on which an operation relies. ## CRAN results * Fixed type declaration mismatches revealed by LTO build. * Fixed r-devel issue with new `c.factor()` method. # vctrs 0.3.0 This version features an overhaul of the coercion system to make it more consistent and easier to implement. See the _Breaking changes_ and _Type system_ sections for details. There are three new documentation topics if you'd like to learn how to implement coercion methods to make your class compatible with tidyverse packages like dplyr: * https://vctrs.r-lib.org/reference/theory-faq-coercion.html for an overview of the coercion mechanism in vctrs. * https://vctrs.r-lib.org/reference/howto-faq-coercion.html for a practical guide about implementing methods for vectors. * https://vctrs.r-lib.org/reference/howto-faq-coercion-data-frame.html for a practical guide about implementing methods for data frames. ## Reverse dependencies troubleshooting The following errors are caused by breaking changes. * `"Can't convert to ."` `vec_cast()` no longer converts to list. Use `vec_chop()` or `as.list()` instead. * `"Can't convert to ."` `vec_cast()` no longer converts to character. Use `as.character()`to deparse objects. * `"names for target but not for current"` Names of list-columns are now preserved by `vec_rbind()`. Adjust tests accordingly. ## Breaking changes * Double-dispatch methods for `vec_ptype2()` and `vec_cast()` are no longer inherited (#710). Class implementers must implement one set of methods for each compatible class. For example, a tibble subclass no longer inherits from the `vec_ptype2()` methods between `tbl_df` and `data.frame`. This means that you explicitly need to implement `vec_ptype2()` methods with `tbl_df` and `data.frame`. This change requires a bit more work from class maintainers but is safer because the coercion hierarchies are generally different from class hierarchies. See the S3 dispatch section of `?vec_ptype2` for more information. * `vec_cast()` is now restricted to the same conversions as `vec_ptype2()` methods (#606, #741). This change is motivated by safety and performance: - It is generally sloppy to generically convert arbitrary inputs to one type. Restricted coercions are more predictable and allow your code to fail earlier when there is a type issue. - When unrestricted conversions are useful, this is generally towards a known type. For example, `glue::glue()` needs to convert arbitrary inputs to the known character type. In this case, using double dispatch instead of a single dispatch generic like `as.character()` is wasteful. - To implement the useful semantics of coercible casts (already used in `vec_assign()`), two double dispatch were needed. Now it can be done with one double dispatch by calling `vec_cast()` directly. * `stop_incompatible_cast()` now throws an error of class `vctrs_error_incompatible_type` rather than `vctrs_error_incompatible_cast`. This means that `vec_cast()` also throws errors of this class, which better aligns it with `vec_ptype2()` now that they are restricted to the same conversions. * The `y` argument of `stop_incompatible_cast()` has been renamed to `to` to better match `to_arg`. ## Type system * Double-dispatch methods for `vec_ptype2()` and `vec_cast()` are now easier to implement. They no longer need any the boiler plate. Implementing a method for classes `foo` and `bar` is now as simple as: ``` #' @export vec_ptype2.foo.bar <- function(x, y, ...) new_foo() ``` vctrs also takes care of implementing the default and unspecified methods. If you have implemented these methods, they are no longer called and can now be removed. One consequence of the new dispatch mechanism is that `NextMethod()` is now completely unsupported. This is for the best as it never worked correctly in a double-dispatch setting. Parent methods must now be called manually. * `vec_ptype2()` methods now get zero-size prototypes as inputs. This guarantees that methods do not peek at the data to determine the richer type. * `vec_is_list()` no longer allows S3 lists that implement a `vec_proxy()` method to automatically be considered lists. A S3 list must explicitly inherit from `"list"` in the base class to be considered a list. * `vec_restore()` no longer restores row names if the target is not a data frame. This fixes an issue where `POSIXlt` objects would carry a `row.names` attribute after a proxy/restore roundtrip. * `vec_cast()` to and from data frames preserves the row names of inputs. * The internal function `vec_names()` now returns row names if the input is a data frame. Similarly, `vec_set_names()` sets row names on data frames. This is part of a general effort at making row names the vector names of data frames in vctrs. If necessary, the row names are repaired verbosely but without error to make them unique. This should be a mostly harmless change for users, but it could break unit tests in packages if they make assumptions about the row names. ## Compatibility and fallbacks * With the double dispatch changes, the coercion methods are no longer inherited from parent classes. This is because the coercion hierarchy is in principle different from the S3 hierarchy. A consequence of this change is that subclasses that don't implement coercion methods are now in principle incompatible. This is particularly problematic with subclasses of data frames for which throwing incompatible errors would be too incovenient for users. To work around this, we have implemented a fallback to the relevant base data frame class (either `data.frame` or `tbl_df`) in coercion methods (#981). This fallback is silent unless you set the `vctrs:::warn_on_fallback` option to `TRUE`. In the future we may extend this fallback principle to other base types when they are explicitly included in the class vector (such as `"list"`). * Improved support for foreign classes in the combining operations `vec_c()`, `vec_rbind()`, and `vec_unchop()`. A foreign class is a class that doesn't implement `vec_ptype2()`. When all the objects to combine have the same foreign class, one of these fallbacks is invoked: - If the class implements a `base::c()` method, the method is used for the combination. (FIXME: `vec_rbind()` currently doesn't use this fallback.) - Otherwise if the objects have identical attributes and the same base type, we consider them to be compatible. The vectors are concatenated and the attributes are restored (#776). These fallbacks do not make your class completely compatible with vctrs-powered packages, but they should help in many simple cases. * `vec_c()` and `vec_unchop()` now fall back to `base::c()` for S4 objects if the object doesn't implement `vec_ptype2()` but sets an S4 `c()` method (#919). ## Vector operations * `vec_rbind()` and `vec_c()` with data frame inputs now consistently preserve the names of list-columns, df-columns, and matrix-columns (#689). This can cause some false positives in unit tests, if they are sensitive to internal names (#1007). * `vec_rbind()` now repairs row names silently to avoid confusing messages when the row names are not informative and were not created on purpose. * `vec_rbind()` gains option to treat input names as row names. This is disabled by default (#966). * New `vec_rep()` and `vec_rep_each()` for repeating an entire vector and elements of a vector, respectively. These two functions provide a clearer interface for the functionality of `vec_repeat()`, which is now deprecated. * `vec_cbind()` now calls `vec_restore()` on inputs emptied of their columns before computing the common type. This has consequences for data frame classes with special columns that devolve into simpler classes when the columns are subsetted out. These classes are now always simplified by `vec_cbind()`. For instance, column-binding a grouped data frame with a data frame now produces a tibble (the simplified class of a grouped data frame). * `vec_match()` and `vec_in()` gain parameters for argument tags (#944). * The internal version of `vec_assign()` now has support for assigning names and inner names. For data frames, the names are assigned recursively. * `vec_assign()` gains `x_arg` and `value_arg` parameters (#918). * `vec_group_loc()`, which powers `dplyr::group_by()`, now has more efficient vector access (#911). * `vec_ptype()` gained an `x_arg` argument. * New `list_sizes()` for computing the size of every element in a list. `list_sizes()` is to `vec_size()` as `lengths()` is to `length()`, except that it only supports lists. Atomic vectors and data frames result in an error. * `new_data_frame()` infers size from row names when `n = NULL` (#894). * `vec_c()` now accepts `rlang::zap()` as `.name_spec` input. The returned vector is then always unnamed, and the names do not cause errors when they can't be combined. They are still used to create more informative messages when the inputs have incompatible types (#232). ## Classes * vctrs now supports the `data.table` class. The common type of a data frame and a data table is a data table. * `new_vctr()` now always appends a base `"list"` class to list `.data` to be compatible with changes to `vec_is_list()`. This affects `new_list_of()`, which now returns an object with a base class of `"list"`. * dplyr methods are now implemented for `vec_restore()`, `vec_ptype2()`, and `vec_cast()`. The user-visible consequence (and breaking change) is that row-binding a grouped data frame and a data frame or tibble now returns a grouped data frame. It would previously return a tibble. * The `is.na<-()` method for `vctrs_vctr` now supports numeric and character subscripts to indicate where to insert missing values (#947). * Improved support for vector-like S4 objects (#550, #551). * The base classes `AsIs` and `table` have vctrs methods (#904, #906). * `POSIXlt` and `POSIXct` vectors are handled more consistently (#901). * Ordered factors that do not have identical levels are now incompatible. They are now incompatible with all factors. ## Indexing and names * `vec_as_subscript()` now fails when the subscript is a matrix or an array, consistently with `vec_as_location()`. * Improved error messages in `vec_as_location()` when subscript is a matrix or array (#936). * `vec_as_location2()` properly picks up `subscript_arg` (tidyverse/tibble#735). * `vec_as_names()` now has more informative error messages when names are not unique (#882). * `vec_as_names()` gains a `repair_arg` argument that when set will cause `repair = "check_unique"` to generate an informative hint (#692). ## Conditions * `stop_incompatible_type()` now has an `action` argument for customizing whether the coercion error came from `vec_ptype2()` or `vec_cast()`. `stop_incompatible_cast()` is now a thin wrapper around `stop_incompatible_type(action = "convert")`. * `stop_` functions now take `details` after the dots. This argument can no longer be passed by position. * Supplying both `details` and `message` to the `stop_` functions is now an internal error. * `x_arg`, `y_arg`, and `to_arg` are now compulsory arguments in `stop_` functions like `stop_incompatible_type()`. * Lossy cast errors are now considered internal. Please don't test for the class or explicitly handle them. * New argument `loss_type` for the experimental function `maybe_lossy_cast()`. It can take the values "precision" or "generality" to indicate in the error message which kind of loss is the error about (double to integer loses precision, character to factor loses generality). * Coercion and recycling errors are now more consistent. ## CRAN results * Fixed clang-UBSAN error "nan is outside the range of representable values of type 'int'" (#902). * Fixed compilation of stability vignette following the date conversion changes on R-devel. # vctrs 0.2.4 * Factors and dates methods are now implemented in C for efficiency. * `new_data_frame()` now correctly updates attributes and supports merging of the `"names"` and `"row.names"` arguments (#883). * `vec_match()` gains an `na_equal` argument (#718). * `vec_chop()`'s `indices` argument has been restricted to positive integer vectors. Character and logical subscripts haven't proven useful, and this aligns `vec_chop()` with `vec_unchop()`, for which only positive integer vectors make sense. * New `vec_unchop()` for combining a list of vectors into a single vector. It is similar to `vec_c()`, but gives greater control over how the elements are placed in the output through the use of a secondary `indices` argument. * Breaking change: When `.id` is supplied, `vec_rbind()` now creates the identifier column at the start of the data frame rather than at the end. * `numeric_version` and `package_version` lists are now treated as vectors (#723). * `vec_slice()` now properly handles symbols and S3 subscripts. * `vec_as_location()` and `vec_as_subscript()` are now fully implemented in C for efficiency. * `num_as_location()` gains a new argument, `zero`, for controlling whether to `"remove"`, `"ignore"`, or `"error"` on zero values (#852). # vctrs 0.2.3 * The main feature of this release is considerable performance improvements with factors and dates. * `vec_c()` now falls back to `base::c()` if the vector doesn't implement `vec_ptype2()` but implements `c()`. This should improve the compatibility of vctrs-based functions with foreign classes (#801). * `new_data_frame()` is now faster. * New `vec_is_list()` for detecting if a vector is a list in the vctrs sense. For instance, objects of class `lm` are not lists. In general, classes need to explicitly inherit from `"list"` to be considered as lists by vctrs. * Unspecified vectors of `NA` can now be assigned into a list (#819). ``` x <- list(1, 2) vec_slice(x, 1) <- NA x #> [[1]] #> NULL #> #> [[2]] #> 2 ``` * `vec_ptype()` now errors on scalar inputs (#807). * `vec_ptype_finalise()` is now recursive over all data frame types, ensuring that unspecified columns are correctly finalised to logical (#800). * `vec_ptype()` now correctly handles unspecified columns in data frames, and will always return an unspecified column type (#800). * `vec_slice()` and `vec_chop()` now work correctly with `bit64::integer64()` objects when an `NA` subscript is supplied. By extension, this means that `vec_init()` now works with these objects as well (#813). * `vec_rbind()` now binds row names. When named inputs are supplied and `names_to` is `NULL`, the names define row names. If `names_to` is supplied, they are assigned in the column name as before. * `vec_cbind()` now uses the row names of the first named input. * The `c()` method for `vctrs_vctr` now throws an error when `recursive` or `use.names` is supplied (#791). # vctrs 0.2.2 * New `vec_as_subscript()` function to cast inputs to the base type of a subscript (logical, numeric, or character). `vec_as_index()` has been renamed to `vec_as_location()`. Use `num_as_location()` if you need more options to control how numeric subscripts are converted to a vector of locations. * New `vec_as_subscript2()`, `vec_as_location2()`, and `num_as_location2()` variants for validating scalar subscripts and locations (e.g. for indexing with `[[`). * `vec_as_location()` now preserves names of its inputs if possible. * `vec_ptype2()` methods for base classes now prevent inheritance. This makes sense because the subtyping graph created by `vec_ptype2()` methods is generally not the same as the inheritance relationships defined by S3 classes. For instance, subclasses are often a richer type than their superclasses, and should often be declared as supertypes (e.g. `vec_ptype2()` should return the subclass). We introduced this breaking change in a patch release because `new_vctr()` now adds the base type to the class vector by default, which caused `vec_ptype2()` to dispatch erroneously to the methods for base types. We'll finish switching to this approach in vctrs 0.3.0 for the rest of the base S3 classes (dates, data frames, ...). * `vec_equal_na()` now works with complex vectors. * `vctrs_vctr` class gains an `as.POSIXlt()` method (#717). * `vec_is()` now ignores names and row names (#707). * `vec_slice()` now support Altvec vectors (@jimhester, #696). * `vec_proxy_equal()` is now applied recursively across the columns of data frames (#641). * `vec_split()` no longer returns the `val` column as a `list_of`. It is now returned as a bare list (#660). * Complex numbers are now coercible with integer and double (#564). * zeallot has been moved from Imports to Suggests, meaning that `%<-%` is no longer re-exported from vctrs. * `vec_equal()` no longer propagates missing values when comparing list elements. This means that `vec_equal(list(NULL), list(NULL))` will continue to return `NA` because `NULL` is the missing element for a list, but now `vec_equal(list(NA), list(NA))` returns `TRUE` because the `NA` values are compared directly without checking for missingness. * Lists of expressions are now supported in `vec_equal()` and functions that compare elements, such as `vec_unique()` and `vec_match()`. This ensures that they work with the result of modeling functions like `glm()` and `mgcv::gam()` which store "family" objects containing expressions (#643). * `new_vctr()` gains an experimental `inherit_base_type` argument which determines whether or not the class of the underlying type will be included in the class. * `list_of()` now inherits explicitly from "list" (#593). * `vec_ptype()` has relaxed default behaviour for base types; now if two vectors both inherit from (e.g.) "character", the common type is also "character" (#497). * `vec_equal()` now correctly treats `NULL` as the missing value element for lists (#653). * `vec_cast()` now casts data frames to lists rowwise, i.e. to a list of data frames of size 1. This preserves the invariant of `vec_size(vec_cast(x, to)) == vec_size(x)` (#639). * Positive and negative 0 are now considered equivalent by all functions that check for equality or uniqueness (#637). * New experimental functions `vec_group_rle()` for returning run length encoded groups; `vec_group_id()` for constructing group identifiers from a vector; `vec_group_loc()` for computing the locations of unique groups in a vector (#514). * New `vec_chop()` for repeatedly slicing a vector. It efficiently captures the pattern of `map(indices, vec_slice, x = x)`. * Support for multiple character encodings has been added to functions that compare elements within a single vector, such as `vec_unique()`, and across multiple vectors, such as `vec_match()`. When multiple encodings are encountered, a translation to UTF-8 is performed before any comparisons are made (#600, #553). * Equality and ordering methods are now implemented for raw and complex vectors (@romainfrancois). # vctrs 0.2.1 Maintenance release for CRAN checks. # vctrs 0.2.0 With the 0.2.0 release, many vctrs functions have been rewritten with native C code to improve performance. Functions like `vec_c()` and `vec_rbind()` should now be fast enough to be used in packages. This is an ongoing effort, for instance the handling of factors and dates has not been rewritten yet. These classes still slow down vctrs primitives. The API in 0.2.0 has been updated, please see a list of breaking changes below. vctrs has now graduated from experimental to a maturing package. Please note that API changes are still planned for future releases, for instance `vec_ptype2()` and `vec_cast()` might need to return a sentinel instead of failing with an error when there is no common type or possible cast. ## Breaking changes * Lossy casts now throw errors of type `vctrs_error_cast_lossy`. Previously these were warnings. You can suppress these errors selectively with `allow_lossy_cast()` to get the partial cast results. To implement your own lossy cast operation, call the new exported function `maybe_lossy_cast()`. * `vec_c()` now fails when an input is supplied with a name but has internal names or is length > 1: ``` vec_c(foo = c(a = 1)) #> Error: Can't merge the outer name `foo` with a named vector. #> Please supply a `.name_spec` specification. vec_c(foo = 1:3) #> Error: Can't merge the outer name `foo` with a vector of length > 1. #> Please supply a `.name_spec` specification. ``` You can supply a name specification that describes how to combine the external name of the input with its internal names or positions: ``` # Name spec as glue string: vec_c(foo = c(a = 1), .name_spec = "{outer}_{inner}") # Name spec as a function: vec_c(foo = c(a = 1), .name_spec = function(outer, inner) paste(outer, inner, sep = "_")) vec_c(foo = c(a = 1), .name_spec = ~ paste(.x, .y, sep = "_")) ``` * `vec_empty()` has been renamed to `vec_is_empty()`. * `vec_dim()` and `vec_dims()` are no longer exported. * `vec_na()` has been renamed to `vec_init()`, as the primary use case is to initialize an output container. * `vec_slice<-` is now type stable (#140). It always returns the same type as the LHS. If needed, the RHS is cast to the correct type, but only if both inputs are coercible. See examples in `?vec_slice`. * We have renamed the `type` particle to `ptype`: - `vec_type()` => `vec_ptype()` - `vec_type2()` => `vec_ptype2()` - `vec_type_common()` => `vec_ptype_common()` Consequently, `vec_ptype()` was renamed to `vec_ptype_show()`. ## New features * New `vec_proxy()` generic. This is the main customisation point in vctrs along with `vec_restore()`. You should only implement it when your type is designed around a non-vector class (atomic vectors, bare lists, data frames). In this case, `vec_proxy()` should return such a vector class. The vctrs operations will be applied on the proxy and `vec_restore()` is called to restore the original representation of your type. The most common case where you need to implement `vec_proxy()` is for S3 lists. In vctrs, S3 lists are treated as scalars by default. This way we don't treat objects like model fits as vectors. To prevent vctrs from treating your S3 list as a scalar, unclass it from the `vec_proxy()` method. For instance here is the definition for `list_of`: ``` #' @export vec_proxy.vctrs_list_of <- function(x) { unclass(x) } ``` If you inherit from `vctrs_vctr` or `vctrs_rcrd` you don't need to implement `vec_proxy()`. * `vec_c()`, `vec_rbind()`, and `vec_cbind()` gain a `.name_repair` argument (#227, #229). * `vec_c()`, `vec_rbind()`, `vec_cbind()`, and all functions relying on `vec_ptype_common()` now have more informative error messages when some of the inputs have nested data frames that are not convergent: ``` df1 <- tibble(foo = tibble(bar = tibble(x = 1:3, y = letters[1:3]))) df2 <- tibble(foo = tibble(bar = tibble(x = 1:3, y = 4:6))) vec_rbind(df1, df2) #> Error: No common type for `..1$foo$bar$y` and `..2$foo$bar$y` . ``` * `vec_cbind()` now turns named data frames to packed columns. ```r data <- tibble::tibble(x = 1:3, y = letters[1:3]) data <- vec_cbind(data, packed = data) data # A tibble: 3 x 3 x y packed$x $y 1 1 a 1 a 2 2 b 2 b 3 3 c 3 c ``` Packed data frames are nested in a single column. This makes it possible to access it through a single name: ```r data$packed # A tibble: 3 x 2 x y 1 1 a 2 2 b 3 3 c ``` We are planning to use this syntax more widely in the tidyverse. * New `vec_is()` function to check whether a vector conforms to a prototype and/or a size. Unlike `vec_assert()`, it doesn't throw errors but returns `TRUE` or `FALSE` (#79). Called without a specific type or size, `vec_assert()` tests whether an object is a data vector or a scalar. S3 lists are treated as scalars by default. Implement a `vec_is_vector()` for your class to override this property (or derive from `vctrs_vctr`). * New `vec_order()` and `vec_sort()` for ordering and sorting generalised vectors. * New `.names_to` parameter for `vec_rbind()`. If supplied, this should be the name of a column where the names of the inputs are copied. This is similar to the `.id` parameter of `dplyr::bind_rows()`. * New `vec_seq_along()` and `vec_init_along()` create useful sequences (#189). * `vec_slice()` now preserves character row names, if present. * New `vec_split(x, by)` is a generalisation of `split()` that can divide a vector into groups formed by the unique values of another vector. Returns a two-column data frame containing unique values of `by` aligned with matching `x` values (#196). ## Other features and bug fixes * Using classed errors of class `"vctrs_error_assert"` for failed assertions, and of class `"vctrs_error_incompatible"` (with subclasses `_type`, `_cast` and `_op`) for errors on incompatible types (#184). * Character indexing is now only supported for named objects, an error is raised for unnamed objects (#171). * Predicate generics now consistently return logical vectors when passed a `vctrs_vctr` class. They used to restore the output to their input type (#251). * `list_of()` now has an `as.character()` method. It uses `vec_ptype_abbr()` to collapse complex objects into their type representation (tidyverse/tidyr#654). * New `stop_incompatible_size()` to signal a failure due to mismatched sizes. * New `validate_list_of()` (#193). * `vec_arith()` is consistent with base R when combining `difftime` and `date`, with a warning if casts are lossy (#192). * `vec_c()` and `vec_rbind()` now handle data.frame columns properly (@yutannihilation, #182). * `vec_cast(x, data.frame())` preserves the number of rows in `x`. * `vec_equal()` now handles missing values symmetrically (#204). * `vec_equal_na()` now returns `TRUE` for data frames and records when every component is missing, not when _any_ component is missing (#201). * `vec_init()` checks input is a vector. * `vec_proxy_compare()` gains an experimental `relax` argument, which allows data frames to be orderable even if all their columns are not (#210). * `vec_size()` now works with positive short row names. This fixes issues with data frames created with jsonlite (#220). * `vec_slice<-` now has a `vec_assign()` alias. Use `vec_assign()` when you don't want to modify the original input. * `vec_slice()` now calls `vec_restore()` automatically. Unlike the default `[` method from base R, attributes are preserved by default. * `vec_slice()` can correct slice 0-row data frames (#179). * New `vec_repeat()` for repeating each element of a vector the same number of times. * `vec_type2(x, data.frame())` ensures that the returned object has names that are a length-0 character vector. vctrs/inst/0000755000176200001440000000000015157322654012402 5ustar liggesusersvctrs/inst/include/0000755000176200001440000000000015113325071014011 5ustar liggesusersvctrs/inst/include/vctrs.c0000644000176200001440000000067115113325071015322 0ustar liggesusers#include "vctrs.h" // Maturing bool (*obj_is_vector)(SEXP) = NULL; R_len_t (*short_vec_size)(SEXP) = NULL; SEXP (*short_vec_recycle)(SEXP, R_len_t) = NULL; void vctrs_init_api(void) { obj_is_vector = (bool (*)(SEXP)) R_GetCCallable("vctrs", "obj_is_vector"); short_vec_size = (R_len_t (*)(SEXP)) R_GetCCallable("vctrs", "short_vec_size"); short_vec_recycle = (SEXP (*)(SEXP, R_len_t)) R_GetCCallable("vctrs", "short_vec_recycle"); } vctrs/inst/include/vctrs.h0000644000176200001440000000043015113325071015320 0ustar liggesusers#ifndef VCTRS_H #define VCTRS_H #include #include #include // Maturing extern bool (*obj_is_vector)(SEXP); extern R_len_t (*short_vec_size)(SEXP); extern SEXP (*short_vec_recycle)(SEXP, R_len_t); void vctrs_init_api(void); #endif vctrs/inst/doc/0000755000176200001440000000000015157322654013147 5ustar liggesusersvctrs/inst/doc/stability.Rmd0000644000176200001440000003142614376223322015617 0ustar liggesusers--- title: "Type and size stability" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Type and size stability} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette introduces the ideas of type-stability and size-stability. If a function possesses these properties, it is substantially easier to reason about because to predict the "shape" of the output you only need to know the "shape"s of the inputs. This work is partly motivated by a common pattern that I noticed when reviewing code: if I read the code (without running it!), and I can't predict the type of each variable, I feel very uneasy about the code. This sense is important because most unit tests explore typical inputs, rather than exhaustively testing the strange and unusual. Analysing the types (and size) of variables makes it possible to spot unpleasant edge cases. ```{r setup} library(vctrs) library(rlang) library(zeallot) ``` ## Definitions We say a function is __type-stable__ iff: 1. You can predict the output type knowing only the input types. 1. The order of arguments in ... does not affect the output type. Similarly, a function is __size-stable__ iff: 1. You can predict the output size knowing only the input sizes, or there is a single numeric input that specifies the output size. Very few base R functions are size-stable, so I'll also define a slightly weaker condition. I'll call a function __length-stable__ iff: 1. You can predict the output _length_ knowing only the input _lengths_, or there is a single numeric input that specifies the output _length_. (But note that length-stable is not a particularly robust definition because `length()` returns a value for things that are not vectors.) We'll call functions that don't obey these principles __type-unstable__ and __size-unstable__ respectively. On top of type- and size-stability it's also desirable to have a single set of rules that are applied consistently. We want one set of type-coercion and size-recycling rules that apply everywhere, not many sets of rules that apply to different functions. The goal of these principles is to minimise cognitive overhead. Rather than having to memorise many special cases, you should be able to learn one set of principles and apply them again and again. ### Examples To make these ideas concrete, let's apply them to a few base functions: 1. `mean()` is trivially type-stable and size-stable because it always returns a double vector of length 1 (or it throws an error). 1. Surprisingly, `median()` is type-unstable: ```{r} vec_ptype_show(median(c(1L, 1L))) vec_ptype_show(median(c(1L, 1L, 1L))) ``` It is, however, size-stable, since it always returns a vector of length 1. 1. `sapply()` is type-unstable because you can't predict the output type only knowing the input types: ```{r} vec_ptype_show(sapply(1L, function(x) c(x, x))) vec_ptype_show(sapply(integer(), function(x) c(x, x))) ``` It's not quite size-stable; `vec_size(sapply(x, f))` is `vec_size(x)` for vectors but not for matrices (the output is transposed) or data frames (it iterates over the columns). 1. `vapply()` is a type-stable version of `sapply()` because `vec_ptype_show(vapply(x, fn, template))` is always `vec_ptype_show(template)`. It is size-unstable for the same reasons as `sapply()`. 1. `c()` is type-unstable because `c(x, y)` doesn't always output the same type as `c(y, x)`. ```{r} vec_ptype_show(c(NA, Sys.Date())) vec_ptype_show(c(Sys.Date(), NA)) ``` `c()` is *almost always* length-stable because `length(c(x, y))` *almost always* equals `length(x) + length(y)`. One common source of instability here is dealing with non-vectors (see the later section "Non-vectors"): ```{r} env <- new.env(parent = emptyenv()) length(env) length(mean) length(c(env, mean)) ``` 1. `paste(x1, x2)` is length-stable because `length(paste(x1, x2))` equals `max(length(x1), length(x2))`. However, it doesn't follow the usual arithmetic recycling rules because `paste(1:2, 1:3)` doesn't generate a warning. 1. `ifelse()` is length-stable because `length(ifelse(cond, true, false))` is always `length(cond)`. `ifelse()` is type-unstable because the output type depends on the value of `cond`: ```{r} vec_ptype_show(ifelse(NA, 1L, 1L)) vec_ptype_show(ifelse(FALSE, 1L, 1L)) ``` 1. `read.csv(file)` is type-unstable and size-unstable because, while you know it will return a data frame, you don't know what columns it will return or how many rows it will have. Similarly, `df[[i]]` is not type-stable because the result depends on the _value_ of `i`. There are many important functions that can not be made type-stable or size-stable! With this understanding of type- and size-stability in hand, we'll use them to analyse some base R functions in greater depth and then propose alternatives with better properties. ## `c()` and `vctrs::vec_c()` In this section we'll compare and contrast `c()` and `vec_c()`. `vec_c()` is both type- and size-stable because it possesses the following invariants: * `vec_ptype(vec_c(x, y))` equals `vec_ptype_common(x, y)`. * `vec_size(vec_c(x, y))` equals `vec_size(x) + vec_size(y)`. `c()` has another undesirable property in that it's not consistent with `unlist()`; i.e., `unlist(list(x, y))` does not always equal `c(x, y)`; i.e., base R has multiple sets of type-coercion rules. I won't consider this problem further here. I have two goals here: * To fully document the quirks of `c()`, hence motivating the development of an alternative. * To discuss non-obvious consequences of the type- and size-stability above. ### Atomic vectors If we only consider atomic vectors, `c()` is type-stable because it uses a hierarchy of types: character > complex > double > integer > logical. ```{r} c(FALSE, 1L, 2.5) ``` `vec_c()` obeys similar rules: ```{r} vec_c(FALSE, 1L, 2.5) ``` But it does not automatically coerce to character vectors or lists: ```{r, error = TRUE} c(FALSE, "x") vec_c(FALSE, "x") c(FALSE, list(1)) vec_c(FALSE, list(1)) ``` ### Incompatible vectors and non-vectors In general, most base methods do not throw an error: ```{r} c(10.5, factor("x")) ``` If the inputs aren't vectors, `c()` automatically puts them in a list: ```{r} c(mean, globalenv()) ``` For numeric versions, this depends on the order of inputs. Version first is an error, otherwise the input is wrapped in a list: ```{r, error = TRUE} c(getRversion(), "x") c("x", getRversion()) ``` `vec_c()` throws an error if the inputs are not vectors or not automatically coercible: ```{r, error = TRUE} vec_c(mean, globalenv()) vec_c(Sys.Date(), factor("x"), "x") ``` ### Factors Combining two factors returns an integer vector: ```{r} fa <- factor("a") fb <- factor("b") c(fa, fb) ``` (This is documented in `c()` but is still undesirable.) `vec_c()` returns a factor taking the union of the levels. This behaviour is motivated by pragmatics: there are many places in base R that automatically convert character vectors to factors, so enforcing stricter behaviour would be unnecessarily onerous. (This is backed up by experience with `dplyr::bind_rows()`, which is stricter and is a common source of user difficulty.) ```{r} vec_c(fa, fb) vec_c(fb, fa) ``` ### Date-times `c()` strips the time zone associated with date-times: ```{r} datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland") c(datetime_nz) ``` This behaviour is documented in `?DateTimeClasses` but is the source of considerable user pain. `vec_c()` preserves time zones: ```{r} vec_c(datetime_nz) ``` What time zone should the output have if inputs have different time zones? One option would be to be strict and force the user to manually align all the time zones. However, this is onerous (particularly because there's no easy way to change the time zone in base R), so vctrs chooses to use the first non-local time zone: ```{r} datetime_local <- as.POSIXct("2020-01-01 09:00") datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central") vec_c(datetime_local, datetime_houston, datetime_nz) vec_c(datetime_houston, datetime_nz) vec_c(datetime_nz, datetime_houston) ``` ### Dates and date-times Combining dates and date-times with `c()` gives silently incorrect results: ```{r} date <- as.Date("2020-01-01") datetime <- as.POSIXct("2020-01-01 09:00") c(date, datetime) c(datetime, date) ``` This behaviour arises because neither `c.Date()` nor `c.POSIXct()` check that all inputs are of the same type. `vec_c()` uses a standard set of rules to avoid this problem. When you mix dates and date-times, vctrs returns a date-time and converts dates to date-times at midnight (in the timezone of the date-time). ```{r} vec_c(date, datetime) vec_c(date, datetime_nz) ``` ### Missing values If a missing value comes at the beginning of the inputs, `c()` falls back to the internal behaviour, which strips all attributes: ```{r} c(NA, fa) c(NA, date) c(NA, datetime) ``` `vec_c()` takes a different approach treating a logical vector consisting only of `NA` as the `unspecified()` class which can be converted to any other 1d type: ```{r} vec_c(NA, fa) vec_c(NA, date) vec_c(NA, datetime) ``` ### Data frames Because it is *almost always* length-stable, `c()` combines data frames column wise (into a list): ```{r} df1 <- data.frame(x = 1) df2 <- data.frame(x = 2) str(c(df1, df1)) ``` `vec_c()` is size-stable, which implies it will row-bind data frames: ```{r} vec_c(df1, df2) ``` ### Matrices and arrays The same reasoning applies to matrices: ```{r} m <- matrix(1:4, nrow = 2) c(m, m) vec_c(m, m) ``` One difference is that `vec_c()` will "broadcast" a vector to match the dimensions of a matrix: ```{r} c(m, 1) vec_c(m, 1) ``` ### Implementation The basic implementation of `vec_c()` is reasonably simple. We first figure out the properties of the output, i.e. the common type and total size, and then allocate it with `vec_init()`, and then insert each input into the correct place in the output. ```{r, eval = FALSE} vec_c <- function(...) { args <- compact(list2(...)) ptype <- vec_ptype_common(!!!args) if (is.null(ptype)) return(NULL) ns <- map_int(args, vec_size) out <- vec_init(ptype, sum(ns)) pos <- 1 for (i in seq_along(ns)) { n <- ns[[i]] x <- vec_cast(args[[i]], to = ptype) vec_slice(out, pos:(pos + n - 1)) <- x pos <- pos + n } out } ``` (The real `vec_c()` is a bit more complicated in order to handle inner and outer names). ## `ifelse()` One of the functions that motivate the development of vctrs is `ifelse()`. It has the surprising property that the result value is "A vector of the same length and attributes (including dimensions and class) as `test`". To me, it seems more reasonable for the type of the output to be controlled by the type of the `yes` and `no` arguments. In `dplyr::if_else()` I swung too far towards strictness: it throws an error if `yes` and `no` are not the same type. This is annoying in practice because it requires typed missing values (`NA_character_` etc), and because the checks are only on the class (not the full prototype), it's easy to create invalid output. I found it much easier to understand what `ifelse()` _should_ do once I internalised the ideas of type- and size-stability: * The first argument must be logical. * `vec_ptype(if_else(test, yes, no))` equals `vec_ptype_common(yes, no)`. Unlike `ifelse()` this implies that `if_else()` must always evaluate both `yes` and `no` in order to figure out the correct type. I think this is consistent with `&&` (scalar operation, short circuits) and `&` (vectorised, evaluates both sides). * `vec_size(if_else(test, yes, no))` equals `vec_size_common(test, yes, no)`. I think the output could have the same size as `test` (i.e., the same behaviour as `ifelse`), but I _think_ as a general rule that your inputs should either be mutually recycling or not. This leads to the following implementation: ```{r} if_else <- function(test, yes, no) { if (!is_logical(test)) { abort("`test` must be a logical vector.") } c(yes, no) %<-% vec_cast_common(yes, no) c(test, yes, no) %<-% vec_recycle_common(test, yes, no) out <- vec_init(yes, vec_size(yes)) vec_slice(out, test) <- vec_slice(yes, test) vec_slice(out, !test) <- vec_slice(no, !test) out } x <- c(NA, 1:4) if_else(x > 2, "small", "big") if_else(x > 2, factor("small"), factor("big")) if_else(x > 2, Sys.Date(), Sys.Date() + 7) ``` By using `vec_size()` and `vec_slice()`, this definition of `if_else()` automatically works with data.frames and matrices: ```{r} if_else(x > 2, data.frame(x = 1), data.frame(y = 2)) if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30)) ``` vctrs/inst/doc/s3-vector.Rmd0000644000176200001440000013076115132202160015426 0ustar liggesusers--- title: "S3 vectors" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{S3 vectors} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) set.seed(1014) ``` This vignette shows you how to create your own S3 vector classes. It focuses on the aspects of making a vector class that every class needs to worry about; you'll also need to provide methods that actually make the vector useful. I assume that you're already familiar with the basic machinery of S3, and the vocabulary I use in Advanced R: constructor, helper, and validator. If not, I recommend reading at least the first two sections of [the S3 chapter](https://adv-r.hadley.nz/s3.html) of *Advanced R*. This article refers to "vectors of numbers" as *double vectors*. Here, "double" stands for ["double precision floating point number"](https://en.wikipedia.org/wiki/Double-precision_floating-point_format), see also `double()`. ```{r setup} library(vctrs) library(rlang) library(zeallot) ``` This vignette works through five big topics: - The basics of creating a new vector class with vctrs. - The coercion and casting system. - The record and list-of types. - Equality and comparison proxies. - Arithmetic operators. They're collectively demonstrated with a number of simple S3 classes: - Percent: a double vector that prints as a percentage. This illustrates the basic mechanics of class creation, coercion, and casting. - Decimal: a double vector that always prints with a fixed number of decimal places. This class has an attribute which needs a little extra care in casts and coercions. - Cached sum: a double vector that caches the total sum in an attribute. The attribute depends on the data, so needs extra care. - Rational: a pair of integer vectors that defines a rational number like `2 / 3`. This introduces you to the record style, and to the equality and comparison operators. It also needs special handling for `+`, `-`, and friends. - Polynomial: a list of integer vectors that define polynomials like `1 + x - x^3`. Sorting such vectors correctly requires a custom equality method. - Meter: a numeric vector with meter units. This is the simplest possible class with interesting algebraic properties. - Period and frequency: a pair of classes represent a period, or its inverse, frequency. This allows us to explore more arithmetic operators. ## Basics In this section you'll learn how to create a new vctrs class by calling `new_vctr()`. This creates an object with class `vctrs_vctr` which has a number of methods. These are designed to make your life as easy as possible. For example: - The `print()` and `str()` methods are defined in terms of `format()` so you get a pleasant, consistent display as soon as you've made your `format()` method. - You can immediately put your new vector class in a data frame because `as.data.frame.vctrs_vctr()` does the right thing. - Subsetting (`[`, `[[`, and `$`), `length<-`, and `rep()` methods automatically preserve attributes because they use `vec_restore()`. A default `vec_restore()` works for all classes where the attributes are data-independent, and can easily be customised when the attributes do depend on the data. - Default subset-assignment methods (`[<-`, `[[<-`, and `$<-`) follow the principle that the new values should be coerced to match the existing vector. This gives predictable behaviour and clear error messages. ### Percent class In this section, I'll show you how to make a `percent` class, i.e., a double vector that is printed as a percentage. We start by defining a low-level [constructor](https://adv-r.hadley.nz/s3.html#s3-constrcutor) to check types and/or sizes and call `new_vctr()`. `percent` is built on a double vector of any length and doesn't have any attributes. ```{r} new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "vctrs_percent") } x <- new_percent(c(seq(0, 1, length.out = 4), NA)) x str(x) ``` Note that we prefix the name of the class with the name of the package. This prevents conflicting definitions between packages. For packages that implement only one class (such as [blob](https://blob.tidyverse.org/)), it's fine to use the package name without prefix as the class name. We then follow up with a user friendly [helper](https://adv-r.hadley.nz/s3.html#helpers). Here we'll use `vec_cast()` to allow it to accept anything coercible to a double: ```{r} percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` Before you go on, check that user-friendly constructor returns a zero-length vector when called with no arguments. This makes it easy to use as a prototype. ```{r} new_percent() percent() ``` For the convenience of your users, consider implementing an `is_percent()` function: ```{r} is_percent <- function(x) { inherits(x, "vctrs_percent") } ``` ### `format()` method The first method for every class should almost always be a `format()` method. This should return a character vector the same length as `x`. The easiest way to do this is to rely on one of R's low-level formatting functions like `formatC()`: ```{r} format.vctrs_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } ``` ```{r, include = FALSE} # As of R 3.5, print.vctr can not find format.percent since it's not in # its lexical environment. We fix that problem by manually registering. s3_register("base::format", "vctrs_percent") ``` ```{r} x ``` (Note the use of `vec_data()` so `format()` doesn't get stuck in an infinite loop, and that I take a little care to not convert `NA` to `"NA"`; this leads to better printing.) The format method is also used by data frames, tibbles, and `str()`: ```{r} data.frame(x) ``` For optimal display, I recommend also defining an abbreviated type name, which should be 4-5 letters for commonly used vectors. This is used in tibbles and in `str()`: ```{r} vec_ptype_abbr.vctrs_percent <- function(x, ...) { "prcnt" } tibble::tibble(x) str(x) ``` If you need more control over printing in tibbles, implement a method for `pillar::pillar_shaft()`. See `vignette("pillar", package = "vctrs")` for details. ## Casting and coercion The next set of methods you are likely to need are those related to coercion and casting. Coercion and casting are two sides of the same coin: changing the prototype of an existing object. When the change happens *implicitly* (e.g in `c()`) we call it **coercion**; when the change happens *explicitly* (e.g. with `as.integer(x)`), we call it **casting**. One of the main goals of vctrs is to put coercion and casting on a robust theoretical footing so it's possible to make accurate predictions about what (e.g.) `c(x, y)` should do when `x` and `y` have different prototypes. vctrs achieves this goal through two generics: - `vec_ptype2(x, y)` defines possible set of coercions. It returns a prototype if `x` and `y` can be safely coerced to the same prototype; otherwise it returns an error. The set of automatic coercions is usually quite small because too many tend to make code harder to reason about and silently propagate mistakes. - `vec_cast(x, to)` defines the possible sets of casts. It returns `x` translated to have prototype `to`, or throws an error if the conversion isn't possible. The set of possible casts is a superset of possible coercions because they're requested explicitly. ### Double dispatch Both generics use [**double dispatch**](https://en.wikipedia.org/wiki/Double_dispatch) which means that the implementation is selected based on the class of two arguments, not just one. S3 does not natively support double dispatch, so we implement our own dispatch mechanism. In practice, this means: - You end up with method names with two classes, like `vec_ptype2.foo.bar()`. - You don't need to implement default methods (they would never be called if you do). - You can't call `NextMethod()`. ### Percent class {#percent} We'll make our percent class coercible back and forth with double vectors. `vec_ptype2()` provides a user friendly error message if the coercion doesn't exist and makes sure `NA` is handled in a standard way. `NA` is technically a logical vector, but we want to stand in for a missing value of any type. ```{r, error = TRUE} vec_ptype2("bogus", percent()) vec_ptype2(percent(), NA) vec_ptype2(NA, percent()) ``` By default and in simple cases, an object of the same class is compatible with itself: ```{r} vec_ptype2(percent(), percent()) ``` However this only works if the attributes for both objects are the same. Also the default methods are a bit slower. It is always a good idea to provide an explicit coercion method for the case of identical classes. So we'll start by saying that a `vctrs_percent` combined with a `vctrs_percent` yields a `vctrs_percent`, which we indicate by returning a prototype generated by the constructor. ```{r} vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() ``` Next we define methods that say that combining a `percent` and double should yield a `double`. We avoid returning a `percent` here because errors in the scale (1 vs. 0.01) are more obvious with raw numbers. Because double dispatch is a bit of a hack, we need to provide two methods. It's your responsibility to ensure that each member of the pair returns the same result: if they don't you will get weird and unpredictable behaviour. The double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. If we implemented `vec_ptype2.vctrs_percent.numeric()`, it would never be called. ```{r} vec_ptype2.vctrs_percent.double <- function(x, y, ...) double() vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() ``` We can check that we've implemented this correctly with `vec_ptype_show()`: ```{r} vec_ptype_show(percent(), double(), percent()) ``` The `vec_ptype2()` methods define which input is the richer type that vctrs should coerce to. However, they don't perform any conversion. This is the job of `vec_cast()`, which we implement next. We'll provide a method to cast a percent to a percent: ```{r} vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x ``` And then for converting back and forth between doubles. To convert a double to a percent we use the `percent()` helper (not the constructor; this is unvalidated user input). To convert a `percent` to a double, we strip the attributes. Note that for historical reasons the order of argument in the signature is the opposite as for `vec_ptype2()`. The class for `to` comes first, and the class for `x` comes second. Again, the double dispatch mechanism requires us to refer to the underlying type, `double`, in the method name. Implementing `vec_cast.vctrs_percent.numeric()` has no effect. ```{r} vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x) vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x) ``` Then we can check this works with `vec_cast()`: ```{r} vec_cast(0.5, percent()) vec_cast(percent(0.5), double()) ``` Once you've implemented `vec_ptype2()` and `vec_cast()`, you get `vec_c()`, `[<-`, and `[[<-` implementations for free. ```{r, error = TRUE} vec_c(percent(0.5), 1) vec_c(NA, percent(0.5)) # but vec_c(TRUE, percent(0.5)) x <- percent(c(0.5, 1, 2)) x[1:2] <- 2:1 x[[3]] <- 0.5 x ``` You'll also get mostly correct behaviour for `c()`. The exception is when you use `c()` with a base R class: ```{r, error = TRUE} # Correct c(percent(0.5), 1) c(percent(0.5), factor(1)) # Incorrect c(factor(1), percent(0.5)) ``` Unfortunately there's no way to fix this problem with the current design of `c()`. Again, as a convenience, consider providing an `as_percent()` function that makes use of the casts defined in your `vec_cast.vctrs_percent()` methods: ```{r} as_percent <- function(x) { vec_cast(x, new_percent()) } ``` Occasionally, it is useful to provide conversions that go beyond what's allowed in casting. For example, we could offer a parsing method for character vectors. In this case, `as_percent()` should be generic, the default method should cast, and then additional methods should implement more flexible conversion: ```{r} as_percent <- function(x, ...) { UseMethod("as_percent") } as_percent.default <- function(x, ...) { vec_cast(x, new_percent()) } as_percent.character <- function(x) { value <- as.numeric(gsub(" *% *$", "", x)) / 100 new_percent(value) } ``` ### Decimal class Now that you've seen the basics with a very simple S3 class, we'll gradually explore more complicated scenarios. This section creates a `decimal` class that prints with the specified number of decimal places. This is very similar to `percent` but now the class needs an attribute: the number of decimal places to display (an integer vector of length 1). We start off as before, defining a low-level constructor, a user-friendly constructor, a `format()` method, and a `vec_ptype_abbr()`. Note that additional object attributes are simply passed along to `new_vctr()`: ```{r} new_decimal <- function(x = double(), digits = 2L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_integer(digits)) { abort("`digits` must be an integer vector.") } vec_check_size(digits, size = 1L) new_vctr(x, digits = digits, class = "vctrs_decimal") } decimal <- function(x = double(), digits = 2L) { x <- vec_cast(x, double()) digits <- vec_recycle(vec_cast(digits, integer()), 1L) new_decimal(x, digits = digits) } digits <- function(x) attr(x, "digits") format.vctrs_decimal <- function(x, ...) { sprintf(paste0("%-0.", digits(x), "f"), x) } vec_ptype_abbr.vctrs_decimal <- function(x, ...) { "dec" } x <- decimal(runif(10), 1L) x ``` Note that I provide a little helper to extract the `digits` attribute. This makes the code a little easier to read and should not be exported. By default, vctrs assumes that attributes are independent of the data and so are automatically preserved. You'll see what to do if the attributes are data dependent in the next section. ```{r} x[1:2] x[[1]] ``` For the sake of exposition, we'll assume that `digits` is an important attribute of the class and should be included in the full type: ```{r} vec_ptype_full.vctrs_decimal <- function(x, ...) { paste0("decimal<", digits(x), ">") } x ``` Now consider `vec_cast()` and `vec_ptype2()`. Casting and coercing from one decimal to another requires a little thought as the values of the `digits` attribute might be different, and we need some way to reconcile them. Here I've decided to chose the maximum of the two; other reasonable options are to take the value from the left-hand side or throw an error. ```{r} vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) { new_decimal(digits = max(digits(x), digits(y))) } vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) { new_decimal(vec_data(x), digits = digits(to)) } vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2)) ``` Finally, I can implement coercion to and from other types, like doubles. When automatically coercing, I choose the richer type (i.e., the decimal). ```{r} vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y vec_cast.vctrs_decimal.double <- function(x, to, ...) new_decimal(x, digits = digits(to)) vec_cast.double.vctrs_decimal <- function(x, to, ...) vec_data(x) vec_c(decimal(1, digits = 1), pi) vec_c(pi, decimal(1, digits = 1)) ``` If type `x` has greater resolution than `y`, there will be some inputs that lose precision. These should generate errors using `stop_lossy_cast()`. You can see that in action when casting from doubles to integers; only some doubles can become integers without losing resolution. ```{r, error = TRUE} vec_cast(c(1, 2, 10), to = integer()) vec_cast(c(1.5, 2, 10.5), to = integer()) ``` ### Cached sum class {#cached-sum} The next level up in complexity is an object that has data-dependent attributes. To explore this idea we'll create a vector that caches the sum of its values. As usual, we start with low-level and user-friendly constructors: ```{r} new_cached_sum <- function(x = double(), sum = 0L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_double(sum)) { abort("`sum` must be a double vector.") } vec_check_size(sum, size = 1L) new_vctr(x, sum = sum, class = "vctrs_cached_sum") } cached_sum <- function(x) { x <- vec_cast(x, double()) new_cached_sum(x, sum(x)) } ``` For this class, we can use the default `format()` method, and instead, we'll customise the `obj_print_footer()` method. This is a good place to display user facing attributes. ```{r} obj_print_footer.vctrs_cached_sum <- function(x, ...) { cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "") } x <- cached_sum(runif(10)) x ``` We'll also override `sum()` and `mean()` to use the attribute. This is easiest to do with `vec_math()`, which you'll learn about later. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { cat("Using cache\n") switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } sum(x) ``` As mentioned above, vctrs assumes that attributes are independent of the data. This means that when we take advantage of the default methods, they'll work, but return the incorrect result: ```{r} x[1:2] ``` To fix this, you need to provide a `vec_restore()` method. Note that this method dispatches on the `to` argument. ```{r} vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) { new_cached_sum(x, sum(x)) } x[1] ``` This works because most of the vctrs methods dispatch to the underlying base function by first stripping off extra attributes with `vec_data()` and then reapplying them again with `vec_restore()`. The default `vec_restore()` method copies over all attributes, which is not appropriate when the attributes depend on the data. Note that `vec_restore.class` is subtly different from `vec_cast.class.class()`. `vec_restore()` is used when restoring attributes that have been lost; `vec_cast()` is used for coercions. This is easier to understand with a concrete example. Imagine factors were implemented with `new_vctr()`. `vec_restore.factor()` would restore attributes back to an integer vector, but you would not want to allow manually casting an integer to a factor with `vec_cast()`. ## Record-style objects Record-style objects use a list of equal-length vectors to represent individual components of the object. The best example of this is `POSIXlt`, which underneath the hood is a list of 11 fields like year, month, and day. Record-style classes override `length()` and subsetting methods to conceal this implementation detail. ```{r} x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3)) x length(x) length(unclass(x)) x[[1]] # the first date time unclass(x)[[1]] # the first component, the number of seconds ``` vctrs makes it easy to create new record-style classes using `new_rcrd()`, which has a wide selection of default methods. ### Rational class A fraction, or rational number, can be represented by a pair of integer vectors representing the numerator (the number on top) and the denominator (the number on bottom), where the length of each vector must be the same. To represent such a data structure we turn to a new base data type: the record (or rcrd for short). As usual we start with low-level and user-friendly constructors. The low-level constructor calls `new_rcrd()`, which needs a named list of equal-length vectors. ```{r} new_rational <- function(n = integer(), d = integer()) { if (!is_integer(n)) { abort("`n` must be an integer vector.") } if (!is_integer(d)) { abort("`d` must be an integer vector.") } new_rcrd(list(n = n, d = d), class = "vctrs_rational") } ``` Our user friendly constructor casts `n` and `d` to integers and recycles them to the same length. ```{r} rational <- function(n = integer(), d = integer()) { c(n, d) %<-% vec_cast_common(n, d, .to = integer()) c(n, d) %<-% vec_recycle_common(n, d) new_rational(n, d) } x <- rational(1, 1:10) ``` Behind the scenes, `x` is a named list with two elements. But those details are hidden so that it behaves like a vector: ```{r} names(x) length(x) ``` To access the underlying fields we need to use `field()` and `fields()`: ```{r} fields(x) field(x, "n") ``` Notice that we can't `print()` or `str()` the new rational vector `x` yet. Printing causes an error: ```{r, error = TRUE} x str(x) ``` This is because we haven't defined how our class can be printed from the underlying data. Note that if you want to look under the hood during development, you can always call `vec_data(x)`. ```{r} vec_data(x) str(vec_data(x)) ``` It is generally best to define a formatting method early in the development of a class. The format method defines how to display the class so that it can be printed in the normal way: ```{r} format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl" vec_ptype_full.vctrs_rational <- function(x, ...) "rational" x ``` vctrs uses the `format()` method in `str()`, hiding the underlying implementation details from the user: ```{r} str(x) ``` For `rational`, `vec_ptype2()` and `vec_cast()` follow the same pattern as `percent()`. We allow coercion from integer and to doubles. ```{r} vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational() vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational() vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational() vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d") vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1) vec_c(rational(1, 2), 1L, NA) ``` ### Decimal2 class The previous implementation of `decimal` was built on top of doubles. This is a bad idea because decimal vectors are typically used when you care about precise values (i.e., dollars and cents in a bank account), and double values suffer from floating point problems. A better implementation of a decimal class would be to use pair of integers, one for the value to the left of the decimal point, and the other for the value to the right (divided by a `scale`). The following code is a very quick sketch of how you might start creating such a class: ```{r} new_decimal2 <- function(l, r, scale = 2L) { if (!is_integer(l)) { abort("`l` must be an integer vector.") } if (!is_integer(r)) { abort("`r` must be an integer vector.") } if (!is_integer(scale)) { abort("`scale` must be an integer vector.") } vec_check_size(scale, size = 1L) new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2") } decimal2 <- function(l, r, scale = 2L) { l <- vec_cast(l, integer()) r <- vec_cast(r, integer()) c(l, r) %<-% vec_recycle_common(l, r) scale <- vec_cast(scale, integer()) # should check that r < 10^scale new_decimal2(l = l, r = r, scale = scale) } format.vctrs_decimal2 <- function(x, ...) { val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale") sprintf(paste0("%.0", attr(x, "scale"), "f"), val) } decimal2(10, c(0, 5, 99)) ``` ## Equality and comparison vctrs provides four "proxy" generics. Two of these let you control how your class determines equality and comparison: - `vec_proxy_equal()` returns a data vector suitable for comparison. It underpins `==`, `!=`, `unique()`, `anyDuplicated()`, and `is.na()`. - `vec_proxy_compare()` specifies how to compare the elements of your vector. This proxy is used in `<`, `<=`, `>=`, `>`, `min()`, `max()`, `median()`, and `quantile()`. Two other proxy generic are used for sorting for unordered data types and for accessing the raw data for exotic storage formats: - `vec_proxy_order()` specifies how to sort the elements of your vector. It is used in `xtfrm()`, which in turn is called by the `order()` and `sort()` functions. This proxy was added to implement the behaviour of lists, which are sortable (their order proxy sorts by first occurrence) but not comparable (comparison operators cause an error). Its default implementation for other classes calls `vec_proxy_compare()` and you normally don't need to implement this proxy. - `vec_proxy()` returns the actual data of a vector. This is useful when you store the data in a field of your class. Most of the time, you shouldn't need to implement `vec_proxy()`. The default behavior is as follows: - `vec_proxy_equal()` calls `vec_proxy()` - `vec_proxy_compare()` calls `vec_proxy_equal()` - `vec_proxy_order()` calls `vec_proxy_compare()` You should only implement these proxies when some preprocessing on the data is needed to make elements comparable. In that case, defining these methods will get you a lot of behaviour for relatively little work. These proxy functions should always return a simple object (either a bare vector or a data frame) that possesses the same properties as your class. This permits efficient implementation of the vctrs internals because it allows dispatch to happen once in R, and then efficient computations can be written in C. ### Rational class Let's explore these ideas by with the rational class we started on above. By default, `vec_proxy()` converts a record to a data frame, and the default comparison works column by column: ```{r} x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2)) x vec_proxy(x) x == rational(1, 1) ``` This makes sense as a default but isn't correct here because `rational(1, 1)` represents the same number as `rational(2, 2)`, so they should be equal. We can fix that by implementing a `vec_proxy_equal()` method that divides `n` and `d` by their greatest common divisor: ```{r} # Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632 gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_equal.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } vec_proxy_equal(x) x == rational(1, 1) ``` `vec_proxy_equal()` is also used by `unique()`: ```{r} unique(x) ``` We now need to fix the comparison operations similarly, since comparison currently happens lexicographically by `n`, then by `d`: ```{r} rational(1, 2) < rational(2, 3) rational(2, 4) < rational(2, 3) ``` The easiest fix is to convert the fraction to a floating point number and use this as a proxy: ```{r} vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } rational(2, 4) < rational(2, 3) ``` This also fixes `sort()`, because the default implementation of `vec_proxy_order()` calls `vec_proxy_compare()`. ```{r} sort(x) ``` (We could have used the same approach in `vec_proxy_equal()`, but when working with floating point numbers it's not necessarily true that `x == y` implies that `d * x == d * y`.) ### Polynomial class A related problem occurs if we build our vector on top of a list. The following code defines a polynomial class that represents polynomials (like `1 + 3x - 2x^2`) using a list of integer vectors (like `c(1, 3, -2)`). Note the use of `new_list_of()` in the constructor. ```{r} poly <- function(...) { x <- vec_cast_common(..., .to = integer()) new_poly(x) } new_poly <- function(x) { new_list_of(x, ptype = integer(), class = "vctrs_poly_list") } vec_ptype_full.vctrs_poly_list <- function(x, ...) "polynomial" vec_ptype_abbr.vctrs_poly_list <- function(x, ...) "poly" format.vctrs_poly_list <- function(x, ...) { format_one <- function(x) { if (length(x) == 0) { return("") } if (length(x) == 1) { format(x) } else { suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "") out <- paste0(x, suffix) out <- out[x != 0L] paste0(out, collapse = " + ") } } vapply(x, format_one, character(1)) } obj_print_data.vctrs_poly_list <- function(x, ...) { if (length(x) != 0) { print(format(x), quote = FALSE) } } p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ``` The resulting objects will inherit from the `vctrs_list_of` class, which provides tailored methods for `$`, `[[`, the corresponding assignment operators, and other methods. ```{r} class(p) p[2] p[[2]] ``` The class implements the list interface: ```{r} obj_is_list(p) ``` This is fine for the internal implementation of this class but it would be more appropriate if it behaved like an atomic vector rather than a list. #### Make an atomic polynomial vector An atomic vector is a vector like integer or character for which `[[` returns the same type. Unlike lists, you can't reach inside an atomic vector. To make the polynomial class an atomic vector, we'll wrap the internal `list_of()` class within a record vector. Usually records are used because they can store several fields of data for each observation. Here we have only one, but we use the class anyway to inherit its atomicity. ```{r} poly <- function(...) { x <- vec_cast_common(..., .to = integer()) x <- new_poly(x) new_rcrd(list(data = x), class = "vctrs_poly") } format.vctrs_poly <- function(x, ...) { format(field(x, "data")) } ``` The new `format()` method delegates to the one we wrote for the internal list. The vector looks just like before: ```{r} p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ``` Making the class atomic means that `obj_is_list()` now returns `FALSE`. This prevents recursive algorithms that traverse lists from reaching too far inside the polynomial internals. ```{r} obj_is_list(p) ``` Most importantly, it prevents users from reaching into the internals with `[[`: ```{r} p[[2]] ``` #### Implementing equality and comparison Equality works out of the box because we can tell if two integer vectors are equal: ```{r} p == poly(c(1, 0, 1)) ``` We can't compare individual elements, because the data is stored in a list and by default lists are not comparable: ```{r, error = TRUE} p < p[2] ``` To enable comparison, we implement a `vec_proxy_compare()` method: ```{r} vec_proxy_compare.vctrs_poly <- function(x, ...) { # Get the list inside the record vector x_raw <- vec_data(field(x, "data")) # First figure out the maximum length n <- max(vapply(x_raw, length, integer(1))) # Then expand all vectors to this length by filling in with zeros full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x)) # Then turn into a data frame as.data.frame(do.call(rbind, full)) } p < p[2] ``` Often, this is sufficient to also implement `sort()`. However, for lists, there is already a default `vec_proxy_order()` method that sorts by first occurrence: ```{r} sort(p) sort(p[c(1:3, 1:2)]) ``` To ensure consistency between ordering and comparison, we forward `vec_proxy_order()` to `vec_proxy_compare()`: ```{r} vec_proxy_order.vctrs_poly <- function(x, ...) { vec_proxy_compare(x, ...) } sort(p) ``` ## Arithmetic vctrs also provides two mathematical generics that allow you to define a broad swath of mathematical behaviour at once: - `vec_math(fn, x, ...)` specifies the behaviour of mathematical functions like `abs()`, `sum()`, and `mean()`. (Note that `var()` and `sd()` can't be overridden, see `?vec_math()` for the complete list supported by `vec_math()`.) - `vec_arith(op, x, y)` specifies the behaviour of the arithmetic operations like `+`, `-`, and `%%`. (See `?vec_arith()` for the complete list.) Both generics define the behaviour for multiple functions because `sum.vctrs_vctr(x)` calls `vec_math.vctrs_vctr("sum", x)`, and `x + y` calls `vec_math.x_class.y_class("+", x, y)`. They're accompanied by `vec_math_base()` and `vec_arith_base()` which make it easy to call the underlying base R functions. `vec_arith()` uses double dispatch and needs the following standard boilerplate: ```{r} vec_arith.MYCLASS <- function(op, x, y, ...) { UseMethod("vec_arith.MYCLASS", y) } vec_arith.MYCLASS.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` Correctly exporting `vec_arith()` methods from a package is currently a little awkward. See the instructions in the Arithmetic section of the "Implementing a vctrs S3 class in a package" section below. ### Cached sum class I showed an example of `vec_math()` to define `sum()` and `mean()` methods for `cached_sum`. Now let's talk about exactly how it works. Most `vec_math()` functions will have a similar form. You use a switch statement to handle the methods that you care about and fall back to `vec_math_base()` for those that you don't care about. ```{r} vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } ``` ### Meter class To explore the infix arithmetic operators exposed by `vec_arith()` I'll create a new class that represents a measurement in `meter`s: ```{r} new_meter <- function(x) { stopifnot(is.double(x)) new_vctr(x, class = "vctrs_meter") } format.vctrs_meter <- function(x, ...) { paste0(format(vec_data(x)), " m") } meter <- function(x) { x <- vec_cast(x, double()) new_meter(x) } x <- meter(1:10) x ``` Because `meter` is built on top of a double vector, basic mathematic operations work: ```{r} sum(x) mean(x) ``` But we can't do arithmetic: ```{r, error = TRUE} x + 1 meter(10) + meter(1) meter(10) * 3 ``` To allow these infix functions to work, we'll need to provide `vec_arith()` generic. But before we do that, let's think about what combinations of inputs we should support: - It makes sense to add and subtract meters: that yields another meter. We can divide a meter by another meter (yielding a unitless number), but we can't multiply meters (because that would yield an area). - For a combination of meter and number multiplication and division by a number are acceptable. Addition and subtraction don't make much sense as we, strictly speaking, are dealing with objects of different nature. `vec_arith()` is another function that uses double dispatch, so as usual we start with a template. ```{r} vec_arith.vctrs_meter <- function(op, x, y, ...) { UseMethod("vec_arith.vctrs_meter", y) } vec_arith.vctrs_meter.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ``` Then write the method for two meter objects. We use a switch statement to cover the cases we care about and `stop_incompatible_op()` to throw an informative error message for everything else. ```{r, error = TRUE} vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) { switch( op, "+" = , "-" = new_meter(vec_arith_base(op, x, y)), "/" = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } meter(10) + meter(1) meter(10) - meter(1) meter(10) / meter(1) meter(10) * meter(1) ``` Next we write the pair of methods for arithmetic with a meter and a number. These are almost identical, but while `meter(10) / 2` makes sense, `2 / meter(10)` does not (and neither do addition and subtraction). To support both doubles and integers as operands, we dispatch over `numeric` here instead of `double`. ```{r, error = TRUE} vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) { switch( op, "/" = , "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) { switch( op, "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } meter(2) * 10 meter(2) * as.integer(10) 10 * meter(2) meter(20) / 10 10 / meter(20) meter(20) + 10 ``` For completeness, we also need `vec_arith.vctrs_meter.MISSING` for the unary `+` and `-` operators: ```{r} vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) { switch(op, `-` = x * -1, `+` = x, stop_incompatible_op(op, x, y) ) } -meter(1) +meter(1) ``` ## Implementing a vctrs S3 class in a package Defining S3 methods interactively is fine for iteration and exploration, but if your class lives in a package, you need to do a few more things: - Register the S3 methods by listing them in the `NAMESPACE` file. - Create documentation around your methods, for the sake of your user and to satisfy `R CMD check`. Let's assume that the `percent` class is implemented in the pizza package in the file `R/percent.R`. Here we walk through the major sections of this hypothetical file. You've seen all of this code before, but now it's augmented by the roxygen2 directives that produce the correct `NAMESPACE` entries and help topics. ### Getting started First, the pizza package needs to include vctrs in the `Imports` section of its `DESCRIPTION` (perhaps by calling `usethis::use_package("vctrs")`. While vctrs is under very active development, it probably makes sense to state a minimum version. Imports: a_package, another_package, ... vctrs (>= x.y.z), ... Then we make all vctrs functions available within the pizza package by including the directive `#' @import vctrs` somewhere. Usually, it's not good practice to `@import` the entire namespace of a package, but vctrs is deliberately designed with this use case in mind. Where should we put `#' @import vctrs`? There are two natural locations: - With package-level docs in `R/pizza-doc.R`. You can use `usethis::use_package_doc()` to initiate this package-level documentation. - In `R/percent.R`. This makes the most sense when the vctrs S3 class is a modest and self-contained part of the overall package. We also must use one of these locations to dump some internal documentation that's needed to avoid `R CMD check` complaints. We don't expect any human to ever read this documentation. Here's how this dummy documentation should look, combined with the `#' @import vctrs` directive described above. ```{r eval = FALSE} #' Internal vctrs methods #' #' @import vctrs #' @keywords internal #' @name pizza-vctrs NULL ``` This should appear in `R/pizza-doc.R` (package-level docs) or in `R/percent.R` (class-focused file). Remember to call `devtools::document()` regularly, as you develop, to regenerate `NAMESPACE` and the `.Rd` files. From this point on, the code shown is expected to appear in `R/percent.R`. ### Low-level and user-friendly constructors Next we add our constructor: ```{r} new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "pizza_percent") } ``` Note that the name of the package must be included in the class name (`pizza_percent`), but it does not need to be included in the constructor name. You do not need to export the constructor, unless you want people to extend your class. We can also add a call to `setOldClass()` for compatibility with S4: ```{r} # for compatibility with the S4 system methods::setOldClass(c("pizza_percent", "vctrs_vctr")) ``` Because we've used a function from the methods package, you'll also need to add methods to `Imports`, with (e.g.) `usethis::use_package("methods")`. This is a "free" dependency because methods is bundled with every R install. Next we implement, export, and document a user-friendly helper: `percent()`. ```{r} #' `percent` vector #' #' This creates a double vector that represents percentages so when it is #' printed, it is multiplied by 100 and suffixed with `%`. #' #' @param x A numeric vector #' @return An S3 vector of class `pizza_percent`. #' @export #' @examples #' percent(c(0.25, 0.5, 0.75)) percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ``` (Again note that the package name will appear in the class, but does not need to occur in the function, because we can already do `pizza::percent()`; it would be redundant to have `pizza::pizza_percent()`.) ### Other helpers It's a good idea to provide a function that tests if an object is of this class. If you do so, it makes sense to document it with the user-friendly constructor `percent()`: ```{r} #' @export #' @rdname percent is_percent <- function(x) { inherits(x, "pizza_percent") } ``` You'll also need to update the `percent()` documentation to reflect that `x` now means two different things: ```{r} #' @param x #' * For `percent()`: A numeric vector #' * For `is_percent()`: An object to test. ``` Next we provide the key methods to make printing work. These are S3 methods, so they don't need to be documented, but they do need to be exported. ```{r eval = FALSE} #' @export format.pizza_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } #' @export vec_ptype_abbr.pizza_percent <- function(x, ...) { "prcnt" } ``` Finally, we implement methods for `vec_ptype2()` and `vec_cast()`. ```{r, eval = FALSE} #' @export vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() #' @export vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() #' @export vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x #' @export vec_cast.pizza_percent.double <- function(x, to, ...) percent(x) #' @export vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x) ``` ### Arithmetic Writing double dispatch methods for `vec_arith()` is currently more awkward than writing them for `vec_ptype2()` or `vec_cast()`. We plan to improve this in the future. For now, you can use the following instructions. If you define a new type and want to write `vec_arith()` methods for it, you'll need to provide a new single dispatch S3 generic for it of the following form: ```{r, eval=FALSE} #' @export #' @method vec_arith my_type vec_arith.my_type <- function(op, x, y, ...) { UseMethod("vec_arith.my_type", y) } ``` Note that this actually functions as both an S3 method for `vec_arith()` and an S3 generic called `vec_arith.my_type()` that dispatches off `y`. roxygen2 only recognizes it as an S3 generic, so you have to register the S3 method part of this with an explicit `@method` call. After that, you can define double dispatch methods, but you still need an explicit `@method` tag to ensure it is registered with the correct generic: ```{r, eval=FALSE} #' @export #' @method vec_arith.my_type my_type vec_arith.my_type.my_type <- function(op, x, y, ...) { # implementation here } #' @export #' @method vec_arith.my_type integer vec_arith.my_type.integer <- function(op, x, y, ...) { # implementation here } #' @export #' @method vec_arith.integer my_type vec_arith.integer.my_type <- function(op, x, y, ...) { # implementation here } ``` vctrs provides the hybrid S3 generics/methods for most of the base R types, like `vec_arith.integer()`. If you don't fully import vctrs with `@import vctrs`, then you will need to explicitly import the generic you are registering double dispatch methods for with `@importFrom vctrs vec_arith.integer`. ### Testing It's good practice to test your new class. Specific recommendations: - `R/percent.R` is the type of file where you really do want 100% test coverage. You can use `devtools::test_coverage_file()` to check this. - Make sure to test behaviour with zero-length inputs and missing values. - Use `testthat::verify_output()` to test your format method. Customised printing is often a primary motivation for creating your own S3 class in the first place, so this will alert you to unexpected changes in your printed output. Read more about `verify_output()` in the [testthat v2.3.0 blog post](https://tidyverse.org/blog/2019/11/testthat-2-3-0/); it's an example of a so-called [golden test](https://ro-che.info/articles/2017-12-04-golden-tests). - Check for method symmetry; use `expect_s3_class()`, probably with `exact = TRUE`, to ensure that `vec_c(x, y)` and `vec_c(y, x)` return the same type of output for the important `x`s and `y`s in your domain. - Use `testthat::expect_error()` to check that inputs that can't be combined fail with an error. Here, you should be generally checking the class of the error, not its message. Relevant classes include `vctrs_error_assert_ptype`, `vctrs_error_assert_size`, and `vctrs_error_incompatible_type`. ```{r, eval = FALSE} expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type") ``` If your tests pass when run by `devtools::test()`, but fail when run in `R CMD check`, it is very likely to reflect a problem with S3 method registration. Carefully check your roxygen2 comments and the generated `NAMESPACE`. ### Existing classes Before you build your own class, you might want to consider using, or subclassing existing classes. You can check [awesome-vctrs](https://github.com/krlmlr/awesome-vctrs) for a curated list of R vector classes, some of which are built with vctrs. If you've built or extended a class, consider adding it to that list so other people can use it. vctrs/inst/doc/type-size.Rmd0000644000176200001440000003155214511320527015540 0ustar liggesusers--- title: "Prototypes and sizes" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Prototypes and sizes} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` Rather than using `class()` and `length()`, vctrs has notions of prototype (`vec_ptype_show()`) and size (`vec_size()`). This vignette discusses the motivation for why these alternatives are necessary and connects their definitions to type coercion and the recycling rules. Size and prototype are motivated by thinking about the optimal behaviour for `c()` and `rbind()`, particularly inspired by data frames with columns that are matrices or data frames. ```{r} library(vctrs) ``` ## Prototype The idea of a prototype is to capture the metadata associated with a vector without capturing any data. Unfortunately, the `class()` of an object is inadequate for this purpose: * The `class()` doesn't include attributes. Attributes are important because, for example, they store the levels of a factor and the timezone of a `POSIXct`. You cannot combine two factors or two `POSIXct`s without thinking about the attributes. * The `class()` of a matrix is "matrix" and doesn't include the type of the underlying vector or the dimensionality. Instead, vctrs takes advantage of R's vectorised nature and uses a __prototype__, a 0-observation slice of the vector (this is basically `x[0]` but with some subtleties we'll come back to later). This is a miniature version of the vector that contains all of the attributes but none of the data. Conveniently, you can create many prototypes using existing base functions (e.g, `double()` and `factor(levels = c("a", "b"))`). vctrs provides a few helpers (e.g. `new_date()`, `new_datetime()`, and `new_duration()`) where the equivalents in base R are missing. ### Base prototypes `vec_ptype()` creates a prototype from an existing object. However, many base vectors have uninformative printing methods for 0-length subsets, so vctrs also provides `vec_ptype_show()`, which prints the prototype in a friendly way (and returns nothing). Using `vec_ptype_show()` allows us to see the prototypes base R classes: * Atomic vectors have no attributes and just display the underlying `typeof()`: ```{r} vec_ptype_show(FALSE) vec_ptype_show(1L) vec_ptype_show(2.5) vec_ptype_show("three") vec_ptype_show(list(1, 2, 3)) ``` * The prototype of matrices and arrays include the base type and the dimensions after the first: ```{r} vec_ptype_show(array(logical(), c(2, 3))) vec_ptype_show(array(integer(), c(2, 3, 4))) vec_ptype_show(array(character(), c(2, 3, 4, 5))) ``` * The prototype of a factor includes its levels. Levels are a character vector, which can be arbitrarily long, so the prototype just shows a hash. If the hash of two factors is equal, it's highly likely that their levels are also equal. ```{r} vec_ptype_show(factor("a")) vec_ptype_show(ordered("b")) ``` While `vec_ptype_show()` prints only the hash, the prototype object itself does contain all levels: ```{r} vec_ptype(factor("a")) ``` * Base R has three key date time classes: dates, date-times (`POSIXct`), and durations (`difftime)`. Date-times have a timezone, and durations have a unit. ```{r} vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(as.difftime(10, units = "mins")) ``` * Data frames have the most complex prototype: the prototype of a data frame is the name and prototype of each column: ```{r} vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x")) ``` Data frames can have columns that are themselves data frames, making this a "recursive" type: ```{r} df <- data.frame(x = FALSE) df$y <- data.frame(a = 1L, b = 2.5) vec_ptype_show(df) ``` ### Coercing to common type It's often important to combine vectors with multiple types. vctrs provides a consistent set of rules for coercion, via `vec_ptype_common()`. `vec_ptype_common()` possesses the following invariants: * `class(vec_ptype_common(x, y))` equals `class(vec_ptype_common(y, x))`. * `class(vec_ptype_common(x, vec_ptype_common(y, z))` equals `class(vec_ptype_common(vec_ptype_common(x, y), z))`. * `vec_ptype_common(x, NULL) == vec_ptype(x)`. i.e., `vec_ptype_common()` is both commutative and associative (with respect to class) and has an identity element, `NULL`; i.e., it's a __commutative monoid__. This means the underlying implementation is quite simple: we can find the common type of any number of objects by progressively finding the common type of pairs of objects. Like with `vec_ptype()`, the easiest way to explore `vec_ptype_common()` is with `vec_ptype_show()`: when given multiple inputs, it will print their common prototype. (In other words: program with `vec_ptype_common()` but play with `vec_ptype_show()`.) * The common type of atomic vectors is computed very similar to the rules of base R, except that we do not coerce to character automatically: ```{r, error = TRUE} vec_ptype_show(logical(), integer(), double()) vec_ptype_show(logical(), character()) ``` * Matrices and arrays are automatically broadcast to higher dimensions: ```{r} vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 2)) ) vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 3)), array(1, c(0, 3, 4)), array(1, c(0, 3, 4, 5)) ) ``` Provided that the dimensions follow the vctrs recycling rules: ```{r, error = TRUE} vec_ptype_show( array(1, c(0, 2)), array(1, c(0, 3)) ) ``` * Factors combine levels in the order in which they appear. ```{r} fa <- factor("a") fb <- factor("b") levels(vec_ptype_common(fa, fb)) levels(vec_ptype_common(fb, fa)) ``` * Combining a date and date-time yields a date-time: ```{r} vec_ptype_show(new_date(), new_datetime()) ``` When combining two date times, the timezone is taken from the first input: ```{r} vec_ptype_show( new_datetime(tzone = "US/Central"), new_datetime(tzone = "Pacific/Auckland") ) ``` Unless it's the local timezone, in which case any explicit time zone will win: ```{r} vec_ptype_show( new_datetime(tzone = ""), new_datetime(tzone = ""), new_datetime(tzone = "Pacific/Auckland") ) ``` * The common type of two data frames is the common type of each column that occurs in both data frames: ```{r} vec_ptype_show( data.frame(x = FALSE), data.frame(x = 1L), data.frame(x = 2.5) ) ``` And the union of the columns that only occur in one: ```{r} vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1)) ``` Note that new columns are added on the right-hand side. This is consistent with the way that factor levels and time zones are handled. ### Casting to specified type `vec_ptype_common()` finds the common type of a set of vector. Typically, however, what you want is a set of vectors coerced to that common type. That's the job of `vec_cast_common()`: ```{r} str(vec_cast_common( FALSE, 1:5, 2.5 )) str(vec_cast_common( factor("x"), factor("y") )) str(vec_cast_common( data.frame(x = 1), data.frame(y = 1:2) )) ``` Alternatively, you can cast to a specific prototype using `vec_cast()`: ```{r, error = TRUE} # Cast succeeds vec_cast(c(1, 2), integer()) # Cast fails vec_cast(c(1.5, 2.5), factor("a")) ``` If a cast is possible in general (i.e., double -> integer), but information is lost for a specific input (e.g. 1.5 -> 1), it will generate an error. ```{r, error = TRUE} vec_cast(c(1.5, 2), integer()) ``` You can suppress the lossy cast errors with `allow_lossy_cast()`: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()) ) ``` This will suppress all lossy cast errors. Supply prototypes if you want to be specific about the type of lossy cast allowed: ```{r} allow_lossy_cast( vec_cast(c(1.5, 2), integer()), x_ptype = double(), to_ptype = integer() ) ``` The set of casts should not be more permissive than the set of coercions. This is not enforced but it is expected from classes to follow the rule and keep the coercion ecosystem sound. ## Size `vec_size()` was motivated by the need to have an invariant that describes the number of "observations" in a data structure. This is particularly important for data frames, as it's useful to have some function such that `f(data.frame(x))` equals `f(x)`. No base function has this property: * `length(data.frame(x))` equals `1` because the length of a data frame is the number of columns. * `nrow(data.frame(x))` does not equal `nrow(x)` because `nrow()` of a vector is `NULL`. * `NROW(data.frame(x))` equals `NROW(x)` for vector `x`, so is almost what we want. But because `NROW()` is defined in terms of `length()`, it returns a value for every object, even types that can't go in a data frame, e.g. `data.frame(mean)` errors even though `NROW(mean)` is `1`. We define `vec_size()` as follows: * It is the length of 1d vectors. * It is the number of rows of data frames, matrices, and arrays. * It throws error for non vectors. Given `vec_size()`, we can give a precise definition of a data frame: a data frame is a list of vectors where every vector has the same size. This has the desirable property of trivially supporting matrix and data frame columns. ### Slicing `vec_slice()` is to `vec_size()` as `[` is to `length()`; i.e., it allows you to select observations regardless of the dimensionality of the underlying object. `vec_slice(x, i)` is equivalent to: * `x[i]` when `x` is a vector. * `x[i, , drop = FALSE]` when `x` is a data frame. * `x[i, , , drop = FALSE]` when `x` is a 3d array. ```{r} x <- sample(1:10) df <- data.frame(x = x) vec_slice(x, 5:6) vec_slice(df, 5:6) ``` `vec_slice(data.frame(x), i)` equals `data.frame(vec_slice(x, i))` (modulo variable and row names). Prototypes are generated with `vec_slice(x, 0L)`; given a prototype, you can initialize a vector of given size (filled with `NA`s) with `vec_init()`. ### Common sizes: recycling rules Closely related to the definition of size are the __recycling rules__. The recycling rules determine the size of the output when two vectors of different sizes are combined. In vctrs, the recycling rules are encoded in `vec_size_common()`, which gives the common size of a set of vectors: ```{r} vec_size_common(1:3, 1:3, 1:3) vec_size_common(1:10, 1) vec_size_common(integer(), 1) ``` vctrs obeys a stricter set of recycling rules than base R. Vectors of size 1 are recycled to any other size. All other size combinations will generate an error. This strictness prevents common mistakes like `dest == c("IAH", "HOU"))`, at the cost of occasionally requiring an explicit calls to `rep()`. ```{r, echo = FALSE, fig.cap="Summary of vctrs recycling rules. X indicates an error"} knitr::include_graphics("../man/figures/sizes-recycling.png", dpi = 300) ``` You can apply the recycling rules in two ways: * If you have a vector and desired size, use `vec_recycle()`: ```{r} vec_recycle(1:3, 3) vec_recycle(1, 10) ``` * If you have multiple vectors and you want to recycle them to the same size, use `vec_recycle_common()`: ```{r} vec_recycle_common(1:3, 1:3) vec_recycle_common(1:10, 1) ``` ## Appendix: recycling in base R The recycling rules in base R are described in [The R Language Definition](https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Recycling-rules) but are not implemented in a single function and thus are not applied consistently. Here, I give a brief overview of their most common realisation, as well as showing some of the exceptions. Generally, in base R, when a pair of vectors is not the same length, the shorter vector is recycled to the same length as the longer: ```{r} rep(1, 6) + 1 rep(1, 6) + 1:2 rep(1, 6) + 1:3 ``` If the length of the longer vector is not an integer multiple of the length of the shorter, you usually get a warning: ```{r} invisible(pmax(1:2, 1:3)) invisible(1:2 + 1:3) invisible(cbind(1:2, 1:3)) ``` But some functions recycle silently: ```{r} length(atan2(1:3, 1:2)) length(paste(1:3, 1:2)) length(ifelse(1:3, 1:2, 1:2)) ``` And `data.frame()` throws an error: ```{r, error = TRUE} data.frame(1:2, 1:3) ``` The R language definition states that "any arithmetic operation involving a zero-length vector has a zero-length result". But outside of arithmetic, this rule is not consistently followed: ```{r, error = TRUE} # length-0 output 1:2 + integer() atan2(1:2, integer()) pmax(1:2, integer()) # dropped cbind(1:2, integer()) # recycled to length of first ifelse(rep(TRUE, 4), integer(), character()) # preserved-ish paste(1:2, integer()) # Errors data.frame(1:2, integer()) ``` vctrs/inst/doc/pillar.Rmd0000644000176200001440000002126014315060310015056 0ustar liggesusers--- title: "Printing vectors nicely in tibbles" author: "Kirill Müller, Hadley Wickham" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Printing vectors nicely in tibbles} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ``` You can get basic control over how a vector is printed in a tibble by providing a `format()` method. If you want greater control, you need to understand how printing works. The presentation of a column in a tibble is controlled by two S3 generics: * `vctrs::vec_ptype_abbr()` determines what goes into the column header. * `pillar::pillar_shaft()` determines what goes into the body, or the shaft, of the column. Technically a [*pillar*](https://en.wikipedia.org/wiki/Column#Nomenclature) is composed of a *shaft* (decorated with an *ornament*), with a *capital* above and a *base* below. Multiple pillars form a *colonnade*, which can be stacked in multiple *tiers*. This is the motivation behind the names in our API. This short vignette shows the basics of column styling using a `"latlon"` vector. The vignette imagines the code is in a package, so that you can see the roxygen2 commands you'll need to create documentation and the `NAMESPACE` file. In this vignette, we'll attach pillar and vctrs: ```{r setup} library(vctrs) library(pillar) ``` You don't need to do this in a package. Instead, you'll need to _import_ the packages by then to the `Imports:` section of your `DESCRIPTION`. The following helper does this for you: ```{r, eval = FALSE} usethis::use_package("vctrs") usethis::use_package("pillar") ``` ## Prerequisites To illustrate the basic ideas we're going to create a `"latlon"` class that encodes geographic coordinates in a record. We'll pretend that this code lives in a package called earth. For simplicity, the values are printed as degrees and minutes only. By using `vctrs_rcrd()`, we already get the infrastructure to make this class fully compatible with data frames for free. See `vignette("s3-vector", package = "vctrs")` for details on the record data type. ```{r} #' @export latlon <- function(lat, lon) { new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon") } #' @export format.earth_latlon <- function(x, ..., formatter = deg_min) { x_valid <- which(!is.na(x)) lat <- field(x, "lat")[x_valid] lon <- field(x, "lon")[x_valid] ret <- rep(NA_character_, vec_size(x)) ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon")) # It's important to keep NA in the vector! ret } deg_min <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg min <- round(x * 60) # Ensure the columns are always the same width so they line up nicely ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]])) format(ret, justify = "right") } latlon(c(32.71, 2.95), c(-117.17, 1.67)) ``` ## Using in a tibble Columns of this class can be used in a tibble right away because we've made a class using the vctrs infrastructure and have provided a `format()` method: ```{r} library(tibble) loc <- latlon( c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA), c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA) ) data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc) data ``` This output is ok, but we could improve it by: 1. Using a more description type abbreviation than ``. 1. Using a dash of colour to highlight the most important parts of the value. 1. Providing a narrower view when horizontal space is at a premium. The following sections show how to enhance the rendering. ## Fixing the data type Instead of `` we'd prefer to use ``. We can do that by implementing the `vec_ptype_abbr()` method, which should return a string that can be used in a column header. For your own classes, strive for an evocative abbreviation that's under 6 characters. ```{r} #' @export vec_ptype_abbr.earth_latlon <- function(x) { "latlon" } data ``` ## Custom rendering The `format()` method is used by default for rendering. For custom formatting you need to implement the `pillar_shaft()` method. This function should always return a pillar shaft object, created by `new_pillar_shaft_simple()` or similar. `new_pillar_shaft_simple()` accepts ANSI escape codes for colouring, and pillar includes some built in styles like `style_subtle()`. We can use subtle style for the degree and minute separators to make the data more obvious. First we define a degree formatter that makes use of `style_subtle()`: ```{r} deg_min_color <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg rad <- round(x * 60) ret <- sprintf( "%d%s%.2d%s%s", deg, pillar::style_subtle("°"), rad, pillar::style_subtle("'"), pm[ifelse(sign >= 0, 1, 2)] ) format(ret, justify = "right") } ``` And then we pass that to our `format()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x, formatter = deg_min_color) pillar::new_pillar_shaft_simple(out, align = "right") } ``` Currently, ANSI escapes are not rendered in vignettes, so this result doesn't look any different, but if you run the code yourself you'll see an improved display. ```{r} data ``` As well as the functions in pillar, the [cli](https://cli.r-lib.org/) package provides a variety of tools for styling text. ## Truncation Tibbles can automatically compacts columns when there's no enough horizontal space to display everything: ```{r} print(data, width = 30) ``` Currently the latlon class isn't ever compacted because we haven't specified a minimum width when constructing the shaft. Let's fix that and re-print the data: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10) } print(data, width = 30) ``` ## Adaptive rendering Truncation may be useful for character data, but for lat-lon data it'd be nicer to show full degrees and remove the minutes. We'll first write a function that does this: ```{r} deg <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- round(x) ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)]) format(ret, justify = "right") } ``` Then use it as part of more sophisticated implementation of the `pillar_shaft()` method: ```{r} #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { deg <- format(x, formatter = deg) deg_min <- format(x) pillar::new_pillar_shaft( list(deg = deg, deg_min = deg_min), width = pillar::get_max_extent(deg_min), min_width = pillar::get_max_extent(deg), class = "pillar_shaft_latlon" ) } ``` Now the `pillar_shaft()` method returns an object of class `"pillar_shaft_latlon"` created by `new_pillar_shaft()`. This object contains the necessary information to render the values, and also minimum and maximum width values. For simplicity, both formats are pre-rendered, and the minimum and maximum widths are computed from there. (`get_max_extent()` is a helper that computes the maximum display width occupied by the values in a character vector.) All that's left to do is to implement a `format()` method for our new `"pillar_shaft_latlon"` class. This method will be called with a `width` argument, which then determines which of the formats to choose. The formatting of our choice is passed to the `new_ornament()` function: ```{r} #' @export format.pillar_shaft_latlon <- function(x, width, ...) { if (get_max_extent(x$deg_min) <= width) { ornament <- x$deg_min } else { ornament <- x$deg } pillar::new_ornament(ornament, align = "right") } data print(data, width = 30) ``` ## Testing If you want to test the output of your code, you can compare it with a known state recorded in a text file. The `testthat::expect_snapshot()` function offers an easy way to test output-generating functions. It takes care about details such as Unicode, ANSI escapes, and output width. Furthermore it won't make the tests fail on CRAN. This is important because your output may rely on details out of your control, which should be fixed eventually but should not lead to your package being removed from CRAN. Use this testthat expectation in one of your test files to create a snapshot test: ```{r eval = FALSE} expect_snapshot(pillar_shaft(data$loc)) ``` See for more information. vctrs/inst/doc/s3-vector.R0000644000176200001440000004711415157322645015126 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) set.seed(1014) ## ----setup-------------------------------------------------------------------- library(vctrs) library(rlang) library(zeallot) ## ----------------------------------------------------------------------------- new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "vctrs_percent") } x <- new_percent(c(seq(0, 1, length.out = 4), NA)) x str(x) ## ----------------------------------------------------------------------------- percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ## ----------------------------------------------------------------------------- new_percent() percent() ## ----------------------------------------------------------------------------- is_percent <- function(x) { inherits(x, "vctrs_percent") } ## ----------------------------------------------------------------------------- format.vctrs_percent <- function(x, ...) { out <- formatC(signif(vec_data(x) * 100, 3)) out[is.na(x)] <- NA out[!is.na(x)] <- paste0(out[!is.na(x)], "%") out } ## ----include = FALSE---------------------------------------------------------- # As of R 3.5, print.vctr can not find format.percent since it's not in # its lexical environment. We fix that problem by manually registering. s3_register("base::format", "vctrs_percent") ## ----------------------------------------------------------------------------- x ## ----------------------------------------------------------------------------- data.frame(x) ## ----------------------------------------------------------------------------- vec_ptype_abbr.vctrs_percent <- function(x, ...) { "prcnt" } tibble::tibble(x) str(x) ## ----error = TRUE------------------------------------------------------------- try({ vec_ptype2("bogus", percent()) vec_ptype2(percent(), NA) vec_ptype2(NA, percent()) }) ## ----------------------------------------------------------------------------- vec_ptype2(percent(), percent()) ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_percent.double <- function(x, y, ...) double() vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() ## ----------------------------------------------------------------------------- vec_ptype_show(percent(), double(), percent()) ## ----------------------------------------------------------------------------- vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x ## ----------------------------------------------------------------------------- vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x) vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x) ## ----------------------------------------------------------------------------- vec_cast(0.5, percent()) vec_cast(percent(0.5), double()) ## ----error = TRUE------------------------------------------------------------- try({ vec_c(percent(0.5), 1) vec_c(NA, percent(0.5)) # but vec_c(TRUE, percent(0.5)) x <- percent(c(0.5, 1, 2)) x[1:2] <- 2:1 x[[3]] <- 0.5 x }) ## ----error = TRUE------------------------------------------------------------- try({ # Correct c(percent(0.5), 1) c(percent(0.5), factor(1)) # Incorrect c(factor(1), percent(0.5)) }) ## ----------------------------------------------------------------------------- as_percent <- function(x) { vec_cast(x, new_percent()) } ## ----------------------------------------------------------------------------- as_percent <- function(x, ...) { UseMethod("as_percent") } as_percent.default <- function(x, ...) { vec_cast(x, new_percent()) } as_percent.character <- function(x) { value <- as.numeric(gsub(" *% *$", "", x)) / 100 new_percent(value) } ## ----------------------------------------------------------------------------- new_decimal <- function(x = double(), digits = 2L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_integer(digits)) { abort("`digits` must be an integer vector.") } vec_check_size(digits, size = 1L) new_vctr(x, digits = digits, class = "vctrs_decimal") } decimal <- function(x = double(), digits = 2L) { x <- vec_cast(x, double()) digits <- vec_recycle(vec_cast(digits, integer()), 1L) new_decimal(x, digits = digits) } digits <- function(x) attr(x, "digits") format.vctrs_decimal <- function(x, ...) { sprintf(paste0("%-0.", digits(x), "f"), x) } vec_ptype_abbr.vctrs_decimal <- function(x, ...) { "dec" } x <- decimal(runif(10), 1L) x ## ----------------------------------------------------------------------------- x[1:2] x[[1]] ## ----------------------------------------------------------------------------- vec_ptype_full.vctrs_decimal <- function(x, ...) { paste0("decimal<", digits(x), ">") } x ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) { new_decimal(digits = max(digits(x), digits(y))) } vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) { new_decimal(vec_data(x), digits = digits(to)) } vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2)) ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y vec_cast.vctrs_decimal.double <- function(x, to, ...) new_decimal(x, digits = digits(to)) vec_cast.double.vctrs_decimal <- function(x, to, ...) vec_data(x) vec_c(decimal(1, digits = 1), pi) vec_c(pi, decimal(1, digits = 1)) ## ----error = TRUE------------------------------------------------------------- try({ vec_cast(c(1, 2, 10), to = integer()) vec_cast(c(1.5, 2, 10.5), to = integer()) }) ## ----------------------------------------------------------------------------- new_cached_sum <- function(x = double(), sum = 0L) { if (!is_double(x)) { abort("`x` must be a double vector.") } if (!is_double(sum)) { abort("`sum` must be a double vector.") } vec_check_size(sum, size = 1L) new_vctr(x, sum = sum, class = "vctrs_cached_sum") } cached_sum <- function(x) { x <- vec_cast(x, double()) new_cached_sum(x, sum(x)) } ## ----------------------------------------------------------------------------- obj_print_footer.vctrs_cached_sum <- function(x, ...) { cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "") } x <- cached_sum(runif(10)) x ## ----------------------------------------------------------------------------- vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { cat("Using cache\n") switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } sum(x) ## ----------------------------------------------------------------------------- x[1:2] ## ----------------------------------------------------------------------------- vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) { new_cached_sum(x, sum(x)) } x[1] ## ----------------------------------------------------------------------------- x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3)) x length(x) length(unclass(x)) x[[1]] # the first date time unclass(x)[[1]] # the first component, the number of seconds ## ----------------------------------------------------------------------------- new_rational <- function(n = integer(), d = integer()) { if (!is_integer(n)) { abort("`n` must be an integer vector.") } if (!is_integer(d)) { abort("`d` must be an integer vector.") } new_rcrd(list(n = n, d = d), class = "vctrs_rational") } ## ----------------------------------------------------------------------------- rational <- function(n = integer(), d = integer()) { c(n, d) %<-% vec_cast_common(n, d, .to = integer()) c(n, d) %<-% vec_recycle_common(n, d) new_rational(n, d) } x <- rational(1, 1:10) ## ----------------------------------------------------------------------------- names(x) length(x) ## ----------------------------------------------------------------------------- fields(x) field(x, "n") ## ----error = TRUE------------------------------------------------------------- try({ x str(x) }) ## ----------------------------------------------------------------------------- vec_data(x) str(vec_data(x)) ## ----------------------------------------------------------------------------- format.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") out <- paste0(n, "/", d) out[is.na(n) | is.na(d)] <- NA out } vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl" vec_ptype_full.vctrs_rational <- function(x, ...) "rational" x ## ----------------------------------------------------------------------------- str(x) ## ----------------------------------------------------------------------------- vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational() vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational() vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational() vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d") vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1) vec_c(rational(1, 2), 1L, NA) ## ----------------------------------------------------------------------------- new_decimal2 <- function(l, r, scale = 2L) { if (!is_integer(l)) { abort("`l` must be an integer vector.") } if (!is_integer(r)) { abort("`r` must be an integer vector.") } if (!is_integer(scale)) { abort("`scale` must be an integer vector.") } vec_check_size(scale, size = 1L) new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2") } decimal2 <- function(l, r, scale = 2L) { l <- vec_cast(l, integer()) r <- vec_cast(r, integer()) c(l, r) %<-% vec_recycle_common(l, r) scale <- vec_cast(scale, integer()) # should check that r < 10^scale new_decimal2(l = l, r = r, scale = scale) } format.vctrs_decimal2 <- function(x, ...) { val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale") sprintf(paste0("%.0", attr(x, "scale"), "f"), val) } decimal2(10, c(0, 5, 99)) ## ----------------------------------------------------------------------------- x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2)) x vec_proxy(x) x == rational(1, 1) ## ----------------------------------------------------------------------------- # Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632 gcd <- function(x, y) { r <- x %% y ifelse(r, gcd(y, r), y) } vec_proxy_equal.vctrs_rational <- function(x, ...) { n <- field(x, "n") d <- field(x, "d") gcd <- gcd(n, d) data.frame(n = n / gcd, d = d / gcd) } vec_proxy_equal(x) x == rational(1, 1) ## ----------------------------------------------------------------------------- unique(x) ## ----------------------------------------------------------------------------- rational(1, 2) < rational(2, 3) rational(2, 4) < rational(2, 3) ## ----------------------------------------------------------------------------- vec_proxy_compare.vctrs_rational <- function(x, ...) { field(x, "n") / field(x, "d") } rational(2, 4) < rational(2, 3) ## ----------------------------------------------------------------------------- sort(x) ## ----------------------------------------------------------------------------- poly <- function(...) { x <- vec_cast_common(..., .to = integer()) new_poly(x) } new_poly <- function(x) { new_list_of(x, ptype = integer(), class = "vctrs_poly_list") } vec_ptype_full.vctrs_poly_list <- function(x, ...) "polynomial" vec_ptype_abbr.vctrs_poly_list <- function(x, ...) "poly" format.vctrs_poly_list <- function(x, ...) { format_one <- function(x) { if (length(x) == 0) { return("") } if (length(x) == 1) { format(x) } else { suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "") out <- paste0(x, suffix) out <- out[x != 0L] paste0(out, collapse = " + ") } } vapply(x, format_one, character(1)) } obj_print_data.vctrs_poly_list <- function(x, ...) { if (length(x) != 0) { print(format(x), quote = FALSE) } } p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ## ----------------------------------------------------------------------------- class(p) p[2] p[[2]] ## ----------------------------------------------------------------------------- obj_is_list(p) ## ----------------------------------------------------------------------------- poly <- function(...) { x <- vec_cast_common(..., .to = integer()) x <- new_poly(x) new_rcrd(list(data = x), class = "vctrs_poly") } format.vctrs_poly <- function(x, ...) { format(field(x, "data")) } ## ----------------------------------------------------------------------------- p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1)) p ## ----------------------------------------------------------------------------- obj_is_list(p) ## ----------------------------------------------------------------------------- p[[2]] ## ----------------------------------------------------------------------------- p == poly(c(1, 0, 1)) ## ----error = TRUE------------------------------------------------------------- try({ p < p[2] }) ## ----------------------------------------------------------------------------- vec_proxy_compare.vctrs_poly <- function(x, ...) { # Get the list inside the record vector x_raw <- vec_data(field(x, "data")) # First figure out the maximum length n <- max(vapply(x_raw, length, integer(1))) # Then expand all vectors to this length by filling in with zeros full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x)) # Then turn into a data frame as.data.frame(do.call(rbind, full)) } p < p[2] ## ----------------------------------------------------------------------------- sort(p) sort(p[c(1:3, 1:2)]) ## ----------------------------------------------------------------------------- vec_proxy_order.vctrs_poly <- function(x, ...) { vec_proxy_compare(x, ...) } sort(p) ## ----------------------------------------------------------------------------- vec_arith.MYCLASS <- function(op, x, y, ...) { UseMethod("vec_arith.MYCLASS", y) } vec_arith.MYCLASS.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ## ----------------------------------------------------------------------------- vec_math.vctrs_cached_sum <- function(.fn, .x, ...) { switch(.fn, sum = attr(.x, "sum"), mean = attr(.x, "sum") / length(.x), vec_math_base(.fn, .x, ...) ) } ## ----------------------------------------------------------------------------- new_meter <- function(x) { stopifnot(is.double(x)) new_vctr(x, class = "vctrs_meter") } format.vctrs_meter <- function(x, ...) { paste0(format(vec_data(x)), " m") } meter <- function(x) { x <- vec_cast(x, double()) new_meter(x) } x <- meter(1:10) x ## ----------------------------------------------------------------------------- sum(x) mean(x) ## ----error = TRUE------------------------------------------------------------- try({ x + 1 meter(10) + meter(1) meter(10) * 3 }) ## ----------------------------------------------------------------------------- vec_arith.vctrs_meter <- function(op, x, y, ...) { UseMethod("vec_arith.vctrs_meter", y) } vec_arith.vctrs_meter.default <- function(op, x, y, ...) { stop_incompatible_op(op, x, y) } ## ----error = TRUE------------------------------------------------------------- try({ vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) { switch( op, "+" = , "-" = new_meter(vec_arith_base(op, x, y)), "/" = vec_arith_base(op, x, y), stop_incompatible_op(op, x, y) ) } meter(10) + meter(1) meter(10) - meter(1) meter(10) / meter(1) meter(10) * meter(1) }) ## ----error = TRUE------------------------------------------------------------- try({ vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) { switch( op, "/" = , "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) { switch( op, "*" = new_meter(vec_arith_base(op, x, y)), stop_incompatible_op(op, x, y) ) } meter(2) * 10 meter(2) * as.integer(10) 10 * meter(2) meter(20) / 10 10 / meter(20) meter(20) + 10 }) ## ----------------------------------------------------------------------------- vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) { switch(op, `-` = x * -1, `+` = x, stop_incompatible_op(op, x, y) ) } -meter(1) +meter(1) ## ----eval = FALSE------------------------------------------------------------- # #' Internal vctrs methods # #' # #' @import vctrs # #' @keywords internal # #' @name pizza-vctrs # NULL ## ----------------------------------------------------------------------------- new_percent <- function(x = double()) { if (!is_double(x)) { abort("`x` must be a double vector.") } new_vctr(x, class = "pizza_percent") } ## ----------------------------------------------------------------------------- # for compatibility with the S4 system methods::setOldClass(c("pizza_percent", "vctrs_vctr")) ## ----------------------------------------------------------------------------- #' `percent` vector #' #' This creates a double vector that represents percentages so when it is #' printed, it is multiplied by 100 and suffixed with `%`. #' #' @param x A numeric vector #' @return An S3 vector of class `pizza_percent`. #' @export #' @examples #' percent(c(0.25, 0.5, 0.75)) percent <- function(x = double()) { x <- vec_cast(x, double()) new_percent(x) } ## ----------------------------------------------------------------------------- #' @export #' @rdname percent is_percent <- function(x) { inherits(x, "pizza_percent") } ## ----------------------------------------------------------------------------- #' @param x #' * For `percent()`: A numeric vector #' * For `is_percent()`: An object to test. ## ----eval = FALSE------------------------------------------------------------- # #' @export # format.pizza_percent <- function(x, ...) { # out <- formatC(signif(vec_data(x) * 100, 3)) # out[is.na(x)] <- NA # out[!is.na(x)] <- paste0(out[!is.na(x)], "%") # out # } # # #' @export # vec_ptype_abbr.pizza_percent <- function(x, ...) { # "prcnt" # } ## ----eval = FALSE------------------------------------------------------------- # #' @export # vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent() # #' @export # vec_ptype2.double.vctrs_percent <- function(x, y, ...) double() # # #' @export # vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x # #' @export # vec_cast.pizza_percent.double <- function(x, to, ...) percent(x) # #' @export # vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x) ## ----eval=FALSE--------------------------------------------------------------- # #' @export # #' @method vec_arith my_type # vec_arith.my_type <- function(op, x, y, ...) { # UseMethod("vec_arith.my_type", y) # } ## ----eval=FALSE--------------------------------------------------------------- # #' @export # #' @method vec_arith.my_type my_type # vec_arith.my_type.my_type <- function(op, x, y, ...) { # # implementation here # } # # #' @export # #' @method vec_arith.my_type integer # vec_arith.my_type.integer <- function(op, x, y, ...) { # # implementation here # } # # #' @export # #' @method vec_arith.integer my_type # vec_arith.integer.my_type <- function(op, x, y, ...) { # # implementation here # } ## ----eval = FALSE------------------------------------------------------------- # expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type") vctrs/inst/doc/type-size.R0000644000176200001440000001263115157322653015225 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----------------------------------------------------------------------------- library(vctrs) ## ----------------------------------------------------------------------------- vec_ptype_show(FALSE) vec_ptype_show(1L) vec_ptype_show(2.5) vec_ptype_show("three") vec_ptype_show(list(1, 2, 3)) ## ----------------------------------------------------------------------------- vec_ptype_show(array(logical(), c(2, 3))) vec_ptype_show(array(integer(), c(2, 3, 4))) vec_ptype_show(array(character(), c(2, 3, 4, 5))) ## ----------------------------------------------------------------------------- vec_ptype_show(factor("a")) vec_ptype_show(ordered("b")) ## ----------------------------------------------------------------------------- vec_ptype(factor("a")) ## ----------------------------------------------------------------------------- vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(as.difftime(10, units = "mins")) ## ----------------------------------------------------------------------------- vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x")) ## ----------------------------------------------------------------------------- df <- data.frame(x = FALSE) df$y <- data.frame(a = 1L, b = 2.5) vec_ptype_show(df) ## ----error = TRUE------------------------------------------------------------- try({ vec_ptype_show(logical(), integer(), double()) vec_ptype_show(logical(), character()) }) ## ----------------------------------------------------------------------------- vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 2)) ) vec_ptype_show( array(1, c(0, 1)), array(1, c(0, 3)), array(1, c(0, 3, 4)), array(1, c(0, 3, 4, 5)) ) ## ----error = TRUE------------------------------------------------------------- try({ vec_ptype_show( array(1, c(0, 2)), array(1, c(0, 3)) ) }) ## ----------------------------------------------------------------------------- fa <- factor("a") fb <- factor("b") levels(vec_ptype_common(fa, fb)) levels(vec_ptype_common(fb, fa)) ## ----------------------------------------------------------------------------- vec_ptype_show(new_date(), new_datetime()) ## ----------------------------------------------------------------------------- vec_ptype_show( new_datetime(tzone = "US/Central"), new_datetime(tzone = "Pacific/Auckland") ) ## ----------------------------------------------------------------------------- vec_ptype_show( new_datetime(tzone = ""), new_datetime(tzone = ""), new_datetime(tzone = "Pacific/Auckland") ) ## ----------------------------------------------------------------------------- vec_ptype_show( data.frame(x = FALSE), data.frame(x = 1L), data.frame(x = 2.5) ) ## ----------------------------------------------------------------------------- vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1)) ## ----------------------------------------------------------------------------- str(vec_cast_common( FALSE, 1:5, 2.5 )) str(vec_cast_common( factor("x"), factor("y") )) str(vec_cast_common( data.frame(x = 1), data.frame(y = 1:2) )) ## ----error = TRUE------------------------------------------------------------- try({ # Cast succeeds vec_cast(c(1, 2), integer()) # Cast fails vec_cast(c(1.5, 2.5), factor("a")) }) ## ----error = TRUE------------------------------------------------------------- try({ vec_cast(c(1.5, 2), integer()) }) ## ----------------------------------------------------------------------------- allow_lossy_cast( vec_cast(c(1.5, 2), integer()) ) ## ----------------------------------------------------------------------------- allow_lossy_cast( vec_cast(c(1.5, 2), integer()), x_ptype = double(), to_ptype = integer() ) ## ----------------------------------------------------------------------------- x <- sample(1:10) df <- data.frame(x = x) vec_slice(x, 5:6) vec_slice(df, 5:6) ## ----------------------------------------------------------------------------- vec_size_common(1:3, 1:3, 1:3) vec_size_common(1:10, 1) vec_size_common(integer(), 1) ## ----echo = FALSE, fig.cap="Summary of vctrs recycling rules. X indicates an error"---- knitr::include_graphics("../man/figures/sizes-recycling.png", dpi = 300) ## ----------------------------------------------------------------------------- vec_recycle(1:3, 3) vec_recycle(1, 10) ## ----------------------------------------------------------------------------- vec_recycle_common(1:3, 1:3) vec_recycle_common(1:10, 1) ## ----------------------------------------------------------------------------- rep(1, 6) + 1 rep(1, 6) + 1:2 rep(1, 6) + 1:3 ## ----------------------------------------------------------------------------- invisible(pmax(1:2, 1:3)) invisible(1:2 + 1:3) invisible(cbind(1:2, 1:3)) ## ----------------------------------------------------------------------------- length(atan2(1:3, 1:2)) length(paste(1:3, 1:2)) length(ifelse(1:3, 1:2, 1:2)) ## ----error = TRUE------------------------------------------------------------- try({ data.frame(1:2, 1:3) }) ## ----error = TRUE------------------------------------------------------------- try({ # length-0 output 1:2 + integer() atan2(1:2, integer()) pmax(1:2, integer()) # dropped cbind(1:2, integer()) # recycled to length of first ifelse(rep(TRUE, 4), integer(), character()) # preserved-ish paste(1:2, integer()) # Errors data.frame(1:2, integer()) }) vctrs/inst/doc/s3-vector.html0000644000176200001440000044702215157322646015674 0ustar liggesusers S3 vectors

S3 vectors

This vignette shows you how to create your own S3 vector classes. It focuses on the aspects of making a vector class that every class needs to worry about; you’ll also need to provide methods that actually make the vector useful.

I assume that you’re already familiar with the basic machinery of S3, and the vocabulary I use in Advanced R: constructor, helper, and validator. If not, I recommend reading at least the first two sections of the S3 chapter of Advanced R.

This article refers to “vectors of numbers” as double vectors. Here, “double” stands for “double precision floating point number”, see also double().

library(vctrs)
library(rlang)
library(zeallot)

This vignette works through five big topics:

  • The basics of creating a new vector class with vctrs.
  • The coercion and casting system.
  • The record and list-of types.
  • Equality and comparison proxies.
  • Arithmetic operators.

They’re collectively demonstrated with a number of simple S3 classes:

  • Percent: a double vector that prints as a percentage. This illustrates the basic mechanics of class creation, coercion, and casting.

  • Decimal: a double vector that always prints with a fixed number of decimal places. This class has an attribute which needs a little extra care in casts and coercions.

  • Cached sum: a double vector that caches the total sum in an attribute. The attribute depends on the data, so needs extra care.

  • Rational: a pair of integer vectors that defines a rational number like 2 / 3. This introduces you to the record style, and to the equality and comparison operators. It also needs special handling for +, -, and friends.

  • Polynomial: a list of integer vectors that define polynomials like 1 + x - x^3. Sorting such vectors correctly requires a custom equality method.

  • Meter: a numeric vector with meter units. This is the simplest possible class with interesting algebraic properties.

  • Period and frequency: a pair of classes represent a period, or its inverse, frequency. This allows us to explore more arithmetic operators.

Basics

In this section you’ll learn how to create a new vctrs class by calling new_vctr(). This creates an object with class vctrs_vctr which has a number of methods. These are designed to make your life as easy as possible. For example:

  • The print() and str() methods are defined in terms of format() so you get a pleasant, consistent display as soon as you’ve made your format() method.

  • You can immediately put your new vector class in a data frame because as.data.frame.vctrs_vctr() does the right thing.

  • Subsetting ([, [[, and $), length<-, and rep() methods automatically preserve attributes because they use vec_restore(). A default vec_restore() works for all classes where the attributes are data-independent, and can easily be customised when the attributes do depend on the data.

  • Default subset-assignment methods ([<-, [[<-, and $<-) follow the principle that the new values should be coerced to match the existing vector. This gives predictable behaviour and clear error messages.

Percent class

In this section, I’ll show you how to make a percent class, i.e., a double vector that is printed as a percentage. We start by defining a low-level constructor to check types and/or sizes and call new_vctr().

percent is built on a double vector of any length and doesn’t have any attributes.

new_percent <- function(x = double()) {
  if (!is_double(x)) {
    abort("`x` must be a double vector.")
  }
  new_vctr(x, class = "vctrs_percent")
}

x <- new_percent(c(seq(0, 1, length.out = 4), NA))
x
#> <vctrs_percent[5]>
#> [1] 0.0000000 0.3333333 0.6666667 1.0000000        NA

str(x)
#>  vctrs_pr [1:5] 0.0000000, 0.3333333, 0.6666667, 1.0000000,        NA

Note that we prefix the name of the class with the name of the package. This prevents conflicting definitions between packages. For packages that implement only one class (such as blob), it’s fine to use the package name without prefix as the class name.

We then follow up with a user friendly helper. Here we’ll use vec_cast() to allow it to accept anything coercible to a double:

percent <- function(x = double()) {
  x <- vec_cast(x, double())
  new_percent(x)
}

Before you go on, check that user-friendly constructor returns a zero-length vector when called with no arguments. This makes it easy to use as a prototype.

new_percent()
#> <vctrs_percent[0]>
percent()
#> <vctrs_percent[0]>

For the convenience of your users, consider implementing an is_percent() function:

is_percent <- function(x) {
  inherits(x, "vctrs_percent")
}

format() method

The first method for every class should almost always be a format() method. This should return a character vector the same length as x. The easiest way to do this is to rely on one of R’s low-level formatting functions like formatC():

format.vctrs_percent <- function(x, ...) {
  out <- formatC(signif(vec_data(x) * 100, 3))
  out[is.na(x)] <- NA
  out[!is.na(x)] <- paste0(out[!is.na(x)], "%")
  out
}
x
#> <vctrs_percent[5]>
#> [1] 0%    33.3% 66.7% 100%  <NA>

(Note the use of vec_data() so format() doesn’t get stuck in an infinite loop, and that I take a little care to not convert NA to "NA"; this leads to better printing.)

The format method is also used by data frames, tibbles, and str():

data.frame(x)
#>       x
#> 1    0%
#> 2 33.3%
#> 3 66.7%
#> 4  100%
#> 5  <NA>

For optimal display, I recommend also defining an abbreviated type name, which should be 4-5 letters for commonly used vectors. This is used in tibbles and in str():

vec_ptype_abbr.vctrs_percent <- function(x, ...) {
  "prcnt"
}

tibble::tibble(x)
#> # A tibble: 5 × 1
#>         x
#>   <prcnt>
#> 1      0%
#> 2   33.3%
#> 3   66.7%
#> 4    100%
#> 5      NA

str(x)
#>  prcnt [1:5] 0%, 33.3%, 66.7%, 100%, <NA>

If you need more control over printing in tibbles, implement a method for pillar::pillar_shaft(). See vignette("pillar", package = "vctrs") for details.

Casting and coercion

The next set of methods you are likely to need are those related to coercion and casting. Coercion and casting are two sides of the same coin: changing the prototype of an existing object. When the change happens implicitly (e.g in c()) we call it coercion; when the change happens explicitly (e.g. with as.integer(x)), we call it casting.

One of the main goals of vctrs is to put coercion and casting on a robust theoretical footing so it’s possible to make accurate predictions about what (e.g.) c(x, y) should do when x and y have different prototypes. vctrs achieves this goal through two generics:

  • vec_ptype2(x, y) defines possible set of coercions. It returns a prototype if x and y can be safely coerced to the same prototype; otherwise it returns an error. The set of automatic coercions is usually quite small because too many tend to make code harder to reason about and silently propagate mistakes.

  • vec_cast(x, to) defines the possible sets of casts. It returns x translated to have prototype to, or throws an error if the conversion isn’t possible. The set of possible casts is a superset of possible coercions because they’re requested explicitly.

Double dispatch

Both generics use double dispatch which means that the implementation is selected based on the class of two arguments, not just one. S3 does not natively support double dispatch, so we implement our own dispatch mechanism. In practice, this means:

  • You end up with method names with two classes, like vec_ptype2.foo.bar().

  • You don’t need to implement default methods (they would never be called if you do).

  • You can’t call NextMethod().

Percent class

We’ll make our percent class coercible back and forth with double vectors.

vec_ptype2() provides a user friendly error message if the coercion doesn’t exist and makes sure NA is handled in a standard way. NA is technically a logical vector, but we want to stand in for a missing value of any type.

vec_ptype2("bogus", percent())
#> Error:
#> ! Can't combine `"bogus"` <character> and `percent()` <vctrs_percent>.
vec_ptype2(percent(), NA)
#> <vctrs_percent[0]>
vec_ptype2(NA, percent())
#> <vctrs_percent[0]>

By default and in simple cases, an object of the same class is compatible with itself:

vec_ptype2(percent(), percent())
#> <vctrs_percent[0]>

However this only works if the attributes for both objects are the same. Also the default methods are a bit slower. It is always a good idea to provide an explicit coercion method for the case of identical classes. So we’ll start by saying that a vctrs_percent combined with a vctrs_percent yields a vctrs_percent, which we indicate by returning a prototype generated by the constructor.

vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent()

Next we define methods that say that combining a percent and double should yield a double. We avoid returning a percent here because errors in the scale (1 vs. 0.01) are more obvious with raw numbers.

Because double dispatch is a bit of a hack, we need to provide two methods. It’s your responsibility to ensure that each member of the pair returns the same result: if they don’t you will get weird and unpredictable behaviour.

The double dispatch mechanism requires us to refer to the underlying type, double, in the method name. If we implemented vec_ptype2.vctrs_percent.numeric(), it would never be called.

vec_ptype2.vctrs_percent.double <- function(x, y, ...) double()
vec_ptype2.double.vctrs_percent <- function(x, y, ...) double()

We can check that we’ve implemented this correctly with vec_ptype_show():

vec_ptype_show(percent(), double(), percent())
#> Prototype: <double>
#> 0. (                 , <vctrs_percent> ) = <vctrs_percent>
#> 1. ( <vctrs_percent> , <double>        ) = <double>       
#> 2. ( <double>        , <vctrs_percent> ) = <double>

The vec_ptype2() methods define which input is the richer type that vctrs should coerce to. However, they don’t perform any conversion. This is the job of vec_cast(), which we implement next. We’ll provide a method to cast a percent to a percent:

vec_cast.vctrs_percent.vctrs_percent <- function(x, to, ...) x

And then for converting back and forth between doubles. To convert a double to a percent we use the percent() helper (not the constructor; this is unvalidated user input). To convert a percent to a double, we strip the attributes.

Note that for historical reasons the order of argument in the signature is the opposite as for vec_ptype2(). The class for to comes first, and the class for x comes second.

Again, the double dispatch mechanism requires us to refer to the underlying type, double, in the method name. Implementing vec_cast.vctrs_percent.numeric() has no effect.

vec_cast.vctrs_percent.double <- function(x, to, ...) percent(x)
vec_cast.double.vctrs_percent <- function(x, to, ...) vec_data(x)

Then we can check this works with vec_cast():

vec_cast(0.5, percent())
#> <vctrs_percent[1]>
#> [1] 50%
vec_cast(percent(0.5), double())
#> [1] 0.5

Once you’ve implemented vec_ptype2() and vec_cast(), you get vec_c(), [<-, and [[<- implementations for free.

vec_c(percent(0.5), 1)
#> [1] 0.5 1.0
vec_c(NA, percent(0.5))
#> <vctrs_percent[2]>
#> [1] <NA> 50%
# but
vec_c(TRUE, percent(0.5))
#> Error in `vec_c()`:
#> ! Can't combine `..1` <logical> and `..2` <vctrs_percent>.

x <- percent(c(0.5, 1, 2))
x[1:2] <- 2:1
#> Error in `vec_restore_dispatch()`:
#> ! Can't convert <integer> to <vctrs_percent>.
x[[3]] <- 0.5
x
#> <vctrs_percent[3]>
#> [1] 50%  100% 50%

You’ll also get mostly correct behaviour for c(). The exception is when you use c() with a base R class:

# Correct
c(percent(0.5), 1)
#> [1] 0.5 1.0
c(percent(0.5), factor(1))
#> Error in `vec_c()`:
#> ! Can't combine `..1` <vctrs_percent> and `..2` <factor<25c7e>>.

# Incorrect
c(factor(1), percent(0.5))
#> [1] 1.0 0.5

Unfortunately there’s no way to fix this problem with the current design of c().

Again, as a convenience, consider providing an as_percent() function that makes use of the casts defined in your vec_cast.vctrs_percent() methods:

as_percent <- function(x) {
  vec_cast(x, new_percent())
}

Occasionally, it is useful to provide conversions that go beyond what’s allowed in casting. For example, we could offer a parsing method for character vectors. In this case, as_percent() should be generic, the default method should cast, and then additional methods should implement more flexible conversion:

as_percent <- function(x, ...) {
  UseMethod("as_percent")
}

as_percent.default <- function(x, ...) {
  vec_cast(x, new_percent())
}

as_percent.character <- function(x) {
  value <- as.numeric(gsub(" *% *$", "", x)) / 100
  new_percent(value)
}

Decimal class

Now that you’ve seen the basics with a very simple S3 class, we’ll gradually explore more complicated scenarios. This section creates a decimal class that prints with the specified number of decimal places. This is very similar to percent but now the class needs an attribute: the number of decimal places to display (an integer vector of length 1).

We start off as before, defining a low-level constructor, a user-friendly constructor, a format() method, and a vec_ptype_abbr(). Note that additional object attributes are simply passed along to new_vctr():

new_decimal <- function(x = double(), digits = 2L) {
  if (!is_double(x)) {
    abort("`x` must be a double vector.")
  }
  if (!is_integer(digits)) {
    abort("`digits` must be an integer vector.")
  }
  vec_check_size(digits, size = 1L)

  new_vctr(x, digits = digits, class = "vctrs_decimal")
}

decimal <- function(x = double(), digits = 2L) {
  x <- vec_cast(x, double())
  digits <- vec_recycle(vec_cast(digits, integer()), 1L)

  new_decimal(x, digits = digits)
}

digits <- function(x) attr(x, "digits")

format.vctrs_decimal <- function(x, ...) {
  sprintf(paste0("%-0.", digits(x), "f"), x)
}

vec_ptype_abbr.vctrs_decimal <- function(x, ...) {
  "dec"
}

x <- decimal(runif(10), 1L)
x
#> <vctrs_decimal[10]>
#>  [1] 0.1 0.8 0.6 0.2 0.0 0.5 0.5 0.3 0.7 0.8

Note that I provide a little helper to extract the digits attribute. This makes the code a little easier to read and should not be exported.

By default, vctrs assumes that attributes are independent of the data and so are automatically preserved. You’ll see what to do if the attributes are data dependent in the next section.

x[1:2]
#> <vctrs_decimal[2]>
#> [1] 0.1 0.8
x[[1]]
#> <vctrs_decimal[1]>
#> [1] 0.1

For the sake of exposition, we’ll assume that digits is an important attribute of the class and should be included in the full type:

vec_ptype_full.vctrs_decimal <- function(x, ...) {
  paste0("decimal<", digits(x), ">")
}

x
#> <decimal<1>[10]>
#>  [1] 0.1 0.8 0.6 0.2 0.0 0.5 0.5 0.3 0.7 0.8

Now consider vec_cast() and vec_ptype2(). Casting and coercing from one decimal to another requires a little thought as the values of the digits attribute might be different, and we need some way to reconcile them. Here I’ve decided to chose the maximum of the two; other reasonable options are to take the value from the left-hand side or throw an error.

vec_ptype2.vctrs_decimal.vctrs_decimal <- function(x, y, ...) {
  new_decimal(digits = max(digits(x), digits(y)))
}
vec_cast.vctrs_decimal.vctrs_decimal <- function(x, to, ...) {
  new_decimal(vec_data(x), digits = digits(to))
}

vec_c(decimal(1/100, digits = 3), decimal(2/100, digits = 2))
#> <decimal<3>[2]>
#> [1] 0.010 0.020

Finally, I can implement coercion to and from other types, like doubles. When automatically coercing, I choose the richer type (i.e., the decimal).

vec_ptype2.vctrs_decimal.double <- function(x, y, ...) x
vec_ptype2.double.vctrs_decimal <- function(x, y, ...) y

vec_cast.vctrs_decimal.double  <- function(x, to, ...) new_decimal(x, digits = digits(to))
vec_cast.double.vctrs_decimal  <- function(x, to, ...) vec_data(x)

vec_c(decimal(1, digits = 1), pi)
#> <decimal<1>[2]>
#> [1] 1.0 3.1
vec_c(pi, decimal(1, digits = 1))
#> <decimal<1>[2]>
#> [1] 3.1 1.0

If type x has greater resolution than y, there will be some inputs that lose precision. These should generate errors using stop_lossy_cast(). You can see that in action when casting from doubles to integers; only some doubles can become integers without losing resolution.

vec_cast(c(1, 2, 10), to = integer())
#> [1]  1  2 10

vec_cast(c(1.5, 2, 10.5), to = integer())
#> Error:
#> ! Can't convert from `c(1.5, 2, 10.5)` <double> to <integer> due to loss of precision.
#> • Locations: 1, 3

Cached sum class

The next level up in complexity is an object that has data-dependent attributes. To explore this idea we’ll create a vector that caches the sum of its values. As usual, we start with low-level and user-friendly constructors:

new_cached_sum <- function(x = double(), sum = 0L) {
  if (!is_double(x)) {
    abort("`x` must be a double vector.")
  }
  if (!is_double(sum)) {
    abort("`sum` must be a double vector.")
  }
  vec_check_size(sum, size = 1L)

  new_vctr(x, sum = sum, class = "vctrs_cached_sum")
}

cached_sum <- function(x) {
  x <- vec_cast(x, double())
  new_cached_sum(x, sum(x))
}

For this class, we can use the default format() method, and instead, we’ll customise the obj_print_footer() method. This is a good place to display user facing attributes.

obj_print_footer.vctrs_cached_sum <- function(x, ...) {
  cat("# Sum: ", format(attr(x, "sum"), digits = 3), "\n", sep = "")
}

x <- cached_sum(runif(10))
x
#> <vctrs_cached_sum[10]>
#>  [1] 0.87460066 0.17494063 0.03424133 0.32038573 0.40232824 0.19566983
#>  [7] 0.40353812 0.06366146 0.38870131 0.97554784
#> # Sum: 3.83

We’ll also override sum() and mean() to use the attribute. This is easiest to do with vec_math(), which you’ll learn about later.

vec_math.vctrs_cached_sum <- function(.fn, .x, ...) {
  cat("Using cache\n")
  switch(.fn,
    sum = attr(.x, "sum"),
    mean = attr(.x, "sum") / length(.x),
    vec_math_base(.fn, .x, ...)
  )
}

sum(x)
#> Using cache
#> [1] 3.833615

As mentioned above, vctrs assumes that attributes are independent of the data. This means that when we take advantage of the default methods, they’ll work, but return the incorrect result:

x[1:2]
#> <vctrs_cached_sum[2]>
#> [1] 0.8746007 0.1749406
#> # Sum: 3.83

To fix this, you need to provide a vec_restore() method. Note that this method dispatches on the to argument.

vec_restore.vctrs_cached_sum <- function(x, to, ..., i = NULL) {
  new_cached_sum(x, sum(x))
}

x[1]
#> <vctrs_cached_sum[1]>
#> [1] 0.8746007
#> # Sum: 0.875

This works because most of the vctrs methods dispatch to the underlying base function by first stripping off extra attributes with vec_data() and then reapplying them again with vec_restore(). The default vec_restore() method copies over all attributes, which is not appropriate when the attributes depend on the data.

Note that vec_restore.class is subtly different from vec_cast.class.class(). vec_restore() is used when restoring attributes that have been lost; vec_cast() is used for coercions. This is easier to understand with a concrete example. Imagine factors were implemented with new_vctr(). vec_restore.factor() would restore attributes back to an integer vector, but you would not want to allow manually casting an integer to a factor with vec_cast().

Record-style objects

Record-style objects use a list of equal-length vectors to represent individual components of the object. The best example of this is POSIXlt, which underneath the hood is a list of 11 fields like year, month, and day. Record-style classes override length() and subsetting methods to conceal this implementation detail.

x <- as.POSIXlt(ISOdatetime(2020, 1, 1, 0, 0, 1:3))
x
#> [1] "2020-01-01 00:00:01 EST" "2020-01-01 00:00:02 EST"
#> [3] "2020-01-01 00:00:03 EST"

length(x)
#> [1] 3
length(unclass(x))
#> [1] 11

x[[1]] # the first date time
#> [1] "2020-01-01 00:00:01 EST"
unclass(x)[[1]] # the first component, the number of seconds
#> [1] 1 2 3

vctrs makes it easy to create new record-style classes using new_rcrd(), which has a wide selection of default methods.

Rational class

A fraction, or rational number, can be represented by a pair of integer vectors representing the numerator (the number on top) and the denominator (the number on bottom), where the length of each vector must be the same. To represent such a data structure we turn to a new base data type: the record (or rcrd for short).

As usual we start with low-level and user-friendly constructors. The low-level constructor calls new_rcrd(), which needs a named list of equal-length vectors.

new_rational <- function(n = integer(), d = integer()) {
  if (!is_integer(n)) {
    abort("`n` must be an integer vector.")
  }
  if (!is_integer(d)) {
    abort("`d` must be an integer vector.")
  }

  new_rcrd(list(n = n, d = d), class = "vctrs_rational")
}

Our user friendly constructor casts n and d to integers and recycles them to the same length.

rational <- function(n = integer(), d = integer()) {
  c(n, d) %<-% vec_cast_common(n, d, .to = integer())
  c(n, d) %<-% vec_recycle_common(n, d)

  new_rational(n, d)
}

x <- rational(1, 1:10)

Behind the scenes, x is a named list with two elements. But those details are hidden so that it behaves like a vector:

names(x)
#> NULL
length(x)
#> [1] 10

To access the underlying fields we need to use field() and fields():

fields(x)
#> [1] "n" "d"
field(x, "n")
#>  [1] 1 1 1 1 1 1 1 1 1 1

Notice that we can’t print() or str() the new rational vector x yet. Printing causes an error:

x
#> <vctrs_rational[10]>
#> Error in `format()`:
#> ! `format.vctrs_rational()` not implemented.

str(x)
#> Error in `format()`:
#> ! `format.vctrs_rational()` not implemented.

This is because we haven’t defined how our class can be printed from the underlying data. Note that if you want to look under the hood during development, you can always call vec_data(x).

vec_data(x)
#>    n  d
#> 1  1  1
#> 2  1  2
#> 3  1  3
#> 4  1  4
#> 5  1  5
#> 6  1  6
#> 7  1  7
#> 8  1  8
#> 9  1  9
#> 10 1 10

str(vec_data(x))
#> 'data.frame':    10 obs. of  2 variables:
#>  $ n: int  1 1 1 1 1 1 1 1 1 1
#>  $ d: int  1 2 3 4 5 6 7 8 9 10

It is generally best to define a formatting method early in the development of a class. The format method defines how to display the class so that it can be printed in the normal way:

format.vctrs_rational <- function(x, ...) {
  n <- field(x, "n")
  d <- field(x, "d")

  out <- paste0(n, "/", d)
  out[is.na(n) | is.na(d)] <- NA

  out
}

vec_ptype_abbr.vctrs_rational <- function(x, ...) "rtnl"
vec_ptype_full.vctrs_rational <- function(x, ...) "rational"

x
#> <rational[10]>
#>  [1] 1/1  1/2  1/3  1/4  1/5  1/6  1/7  1/8  1/9  1/10

vctrs uses the format() method in str(), hiding the underlying implementation details from the user:

str(x)
#>  rtnl [1:10] 1/1, 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10

For rational, vec_ptype2() and vec_cast() follow the same pattern as percent(). We allow coercion from integer and to doubles.

vec_ptype2.vctrs_rational.vctrs_rational <- function(x, y, ...) new_rational()
vec_ptype2.vctrs_rational.integer <- function(x, y, ...) new_rational()
vec_ptype2.integer.vctrs_rational <- function(x, y, ...) new_rational()

vec_cast.vctrs_rational.vctrs_rational <- function(x, to, ...) x
vec_cast.double.vctrs_rational <- function(x, to, ...) field(x, "n") / field(x, "d")
vec_cast.vctrs_rational.integer <- function(x, to, ...) rational(x, 1)

vec_c(rational(1, 2), 1L, NA)
#> <rational[3]>
#> [1] 1/2  1/1  <NA>

Decimal2 class

The previous implementation of decimal was built on top of doubles. This is a bad idea because decimal vectors are typically used when you care about precise values (i.e., dollars and cents in a bank account), and double values suffer from floating point problems.

A better implementation of a decimal class would be to use pair of integers, one for the value to the left of the decimal point, and the other for the value to the right (divided by a scale). The following code is a very quick sketch of how you might start creating such a class:

new_decimal2 <- function(l, r, scale = 2L) {
  if (!is_integer(l)) {
    abort("`l` must be an integer vector.")
  }
  if (!is_integer(r)) {
    abort("`r` must be an integer vector.")
  }
  if (!is_integer(scale)) {
    abort("`scale` must be an integer vector.")
  }
  vec_check_size(scale, size = 1L)

  new_rcrd(list(l = l, r = r), scale = scale, class = "vctrs_decimal2")
}

decimal2 <- function(l, r, scale = 2L) {
  l <- vec_cast(l, integer())
  r <- vec_cast(r, integer())
  c(l, r) %<-% vec_recycle_common(l, r)
  scale <- vec_cast(scale, integer())

  # should check that r < 10^scale
  new_decimal2(l = l, r = r, scale = scale)
}

format.vctrs_decimal2 <- function(x, ...) {
  val <- field(x, "l") + field(x, "r") / 10^attr(x, "scale")
  sprintf(paste0("%.0", attr(x, "scale"), "f"), val)
}

decimal2(10, c(0, 5, 99))
#> <vctrs_decimal2[3]>
#> [1] 10.00 10.05 10.99

Equality and comparison

vctrs provides four “proxy” generics. Two of these let you control how your class determines equality and comparison:

  • vec_proxy_equal() returns a data vector suitable for comparison. It underpins ==, !=, unique(), anyDuplicated(), and is.na().

  • vec_proxy_compare() specifies how to compare the elements of your vector. This proxy is used in <, <=, >=, >, min(), max(), median(), and quantile().

Two other proxy generic are used for sorting for unordered data types and for accessing the raw data for exotic storage formats:

  • vec_proxy_order() specifies how to sort the elements of your vector. It is used in xtfrm(), which in turn is called by the order() and sort() functions.

    This proxy was added to implement the behaviour of lists, which are sortable (their order proxy sorts by first occurrence) but not comparable (comparison operators cause an error). Its default implementation for other classes calls vec_proxy_compare() and you normally don’t need to implement this proxy.

  • vec_proxy() returns the actual data of a vector. This is useful when you store the data in a field of your class. Most of the time, you shouldn’t need to implement vec_proxy().

The default behavior is as follows:

  • vec_proxy_equal() calls vec_proxy()
  • vec_proxy_compare() calls vec_proxy_equal()
  • vec_proxy_order() calls vec_proxy_compare()

You should only implement these proxies when some preprocessing on the data is needed to make elements comparable. In that case, defining these methods will get you a lot of behaviour for relatively little work.

These proxy functions should always return a simple object (either a bare vector or a data frame) that possesses the same properties as your class. This permits efficient implementation of the vctrs internals because it allows dispatch to happen once in R, and then efficient computations can be written in C.

Rational class

Let’s explore these ideas by with the rational class we started on above. By default, vec_proxy() converts a record to a data frame, and the default comparison works column by column:

x <- rational(c(1, 2, 1, 2), c(1, 1, 2, 2))
x
#> <rational[4]>
#> [1] 1/1 2/1 1/2 2/2

vec_proxy(x)
#>   n d
#> 1 1 1
#> 2 2 1
#> 3 1 2
#> 4 2 2

x == rational(1, 1)
#> [1]  TRUE FALSE FALSE FALSE

This makes sense as a default but isn’t correct here because rational(1, 1) represents the same number as rational(2, 2), so they should be equal. We can fix that by implementing a vec_proxy_equal() method that divides n and d by their greatest common divisor:

# Thanks to Matthew Lundberg: https://stackoverflow.com/a/21504113/16632
gcd <- function(x, y) {
  r <- x %% y
  ifelse(r, gcd(y, r), y)
}

vec_proxy_equal.vctrs_rational <- function(x, ...) {
  n <- field(x, "n")
  d <- field(x, "d")
  gcd <- gcd(n, d)

  data.frame(n = n / gcd, d = d / gcd)
}
vec_proxy_equal(x)
#>   n d
#> 1 1 1
#> 2 2 1
#> 3 1 2
#> 4 1 1

x == rational(1, 1)
#> [1]  TRUE FALSE FALSE  TRUE

vec_proxy_equal() is also used by unique():

unique(x)
#> <rational[3]>
#> [1] 1/1 2/1 1/2

We now need to fix the comparison operations similarly, since comparison currently happens lexicographically by n, then by d:

rational(1, 2) < rational(2, 3)
#> [1] TRUE
rational(2, 4) < rational(2, 3)
#> [1] TRUE

The easiest fix is to convert the fraction to a floating point number and use this as a proxy:

vec_proxy_compare.vctrs_rational <- function(x, ...) {
  field(x, "n") / field(x, "d")
}

rational(2, 4) < rational(2, 3)
#> [1] TRUE

This also fixes sort(), because the default implementation of vec_proxy_order() calls vec_proxy_compare().

sort(x)
#> <rational[4]>
#> [1] 1/2 1/1 2/2 2/1

(We could have used the same approach in vec_proxy_equal(), but when working with floating point numbers it’s not necessarily true that x == y implies that d * x == d * y.)

Polynomial class

A related problem occurs if we build our vector on top of a list. The following code defines a polynomial class that represents polynomials (like 1 + 3x - 2x^2) using a list of integer vectors (like c(1, 3, -2)). Note the use of new_list_of() in the constructor.

poly <- function(...) {
  x <- vec_cast_common(..., .to = integer())
  new_poly(x)
}
new_poly <- function(x) {
  new_list_of(x, ptype = integer(), class = "vctrs_poly_list")
}

vec_ptype_full.vctrs_poly_list <- function(x, ...) "polynomial"
vec_ptype_abbr.vctrs_poly_list <- function(x, ...) "poly"

format.vctrs_poly_list <- function(x, ...) {
  format_one <- function(x) {
    if (length(x) == 0) {
      return("")
    }

    if (length(x) == 1) {
      format(x)
    } else {
      suffix <- c(paste0("\u22C5x^", seq(length(x) - 1, 1)), "")
      out <- paste0(x, suffix)
      out <- out[x != 0L]
      paste0(out, collapse = " + ")
    }
  }

  vapply(x, format_one, character(1))
}

obj_print_data.vctrs_poly_list <- function(x, ...) {
  if (length(x) != 0) {
    print(format(x), quote = FALSE)
  }
}

p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1))
p
#> <polynomial[3]>
#> [1] 1         1⋅x^4 + 2 1⋅x^2 + 1

The resulting objects will inherit from the vctrs_list_of class, which provides tailored methods for $, [[, the corresponding assignment operators, and other methods.

class(p)
#> [1] "vctrs_poly_list" "vctrs_list_of"   "vctrs_vctr"      "list"
p[2]
#> <polynomial[1]>
#> [1] 1⋅x^4 + 2
p[[2]]
#> [1] 1 0 0 0 2

The class implements the list interface:

obj_is_list(p)
#> [1] TRUE

This is fine for the internal implementation of this class but it would be more appropriate if it behaved like an atomic vector rather than a list.

Make an atomic polynomial vector

An atomic vector is a vector like integer or character for which [[ returns the same type. Unlike lists, you can’t reach inside an atomic vector.

To make the polynomial class an atomic vector, we’ll wrap the internal list_of() class within a record vector. Usually records are used because they can store several fields of data for each observation. Here we have only one, but we use the class anyway to inherit its atomicity.

poly <- function(...) {
  x <- vec_cast_common(..., .to = integer())
  x <- new_poly(x)
  new_rcrd(list(data = x), class = "vctrs_poly")
}
format.vctrs_poly <- function(x, ...) {
  format(field(x, "data"))
}

The new format() method delegates to the one we wrote for the internal list. The vector looks just like before:

p <- poly(1, c(1, 0, 0, 0, 2), c(1, 0, 1))
p
#> <vctrs_poly[3]>
#> [1] 1         1⋅x^4 + 2 1⋅x^2 + 1

Making the class atomic means that obj_is_list() now returns FALSE. This prevents recursive algorithms that traverse lists from reaching too far inside the polynomial internals.

obj_is_list(p)
#> [1] FALSE

Most importantly, it prevents users from reaching into the internals with [[:

p[[2]]
#> <vctrs_poly[1]>
#> [1] 1⋅x^4 + 2

Implementing equality and comparison

Equality works out of the box because we can tell if two integer vectors are equal:

p == poly(c(1, 0, 1))
#> [1] FALSE FALSE  TRUE

We can’t compare individual elements, because the data is stored in a list and by default lists are not comparable:

p < p[2]
#> Error in `vec_proxy_compare()`:
#> ! `vec_proxy_compare.vctrs_poly_list()` not supported.

To enable comparison, we implement a vec_proxy_compare() method:

vec_proxy_compare.vctrs_poly <- function(x, ...) {
  # Get the list inside the record vector
  x_raw <- vec_data(field(x, "data"))

  # First figure out the maximum length
  n <- max(vapply(x_raw, length, integer(1)))

  # Then expand all vectors to this length by filling in with zeros
  full <- lapply(x_raw, function(x) c(rep(0L, n - length(x)), x))

  # Then turn into a data frame
  as.data.frame(do.call(rbind, full))
}

p < p[2]
#> [1]  TRUE FALSE  TRUE

Often, this is sufficient to also implement sort(). However, for lists, there is already a default vec_proxy_order() method that sorts by first occurrence:

sort(p)
#> <vctrs_poly[3]>
#> [1] 1         1⋅x^2 + 1 1⋅x^4 + 2
sort(p[c(1:3, 1:2)])
#> <vctrs_poly[5]>
#> [1] 1         1         1⋅x^2 + 1 1⋅x^4 + 2 1⋅x^4 + 2

To ensure consistency between ordering and comparison, we forward vec_proxy_order() to vec_proxy_compare():

vec_proxy_order.vctrs_poly <- function(x, ...) {
  vec_proxy_compare(x, ...)
}

sort(p)
#> <vctrs_poly[3]>
#> [1] 1         1⋅x^2 + 1 1⋅x^4 + 2

Arithmetic

vctrs also provides two mathematical generics that allow you to define a broad swath of mathematical behaviour at once:

  • vec_math(fn, x, ...) specifies the behaviour of mathematical functions like abs(), sum(), and mean(). (Note that var() and sd() can’t be overridden, see ?vec_math() for the complete list supported by vec_math().)

  • vec_arith(op, x, y) specifies the behaviour of the arithmetic operations like +, -, and %%. (See ?vec_arith() for the complete list.)

Both generics define the behaviour for multiple functions because sum.vctrs_vctr(x) calls vec_math.vctrs_vctr("sum", x), and x + y calls vec_math.x_class.y_class("+", x, y). They’re accompanied by vec_math_base() and vec_arith_base() which make it easy to call the underlying base R functions.

vec_arith() uses double dispatch and needs the following standard boilerplate:

vec_arith.MYCLASS <- function(op, x, y, ...) {
  UseMethod("vec_arith.MYCLASS", y)
}
vec_arith.MYCLASS.default <- function(op, x, y, ...) {
  stop_incompatible_op(op, x, y)
}

Correctly exporting vec_arith() methods from a package is currently a little awkward. See the instructions in the Arithmetic section of the “Implementing a vctrs S3 class in a package” section below.

Cached sum class

I showed an example of vec_math() to define sum() and mean() methods for cached_sum. Now let’s talk about exactly how it works. Most vec_math() functions will have a similar form. You use a switch statement to handle the methods that you care about and fall back to vec_math_base() for those that you don’t care about.

vec_math.vctrs_cached_sum <- function(.fn, .x, ...) {
  switch(.fn,
    sum = attr(.x, "sum"),
    mean = attr(.x, "sum") / length(.x),
    vec_math_base(.fn, .x, ...)
  )
}

Meter class

To explore the infix arithmetic operators exposed by vec_arith() I’ll create a new class that represents a measurement in meters:

new_meter <- function(x) {
  stopifnot(is.double(x))
  new_vctr(x, class = "vctrs_meter")
}

format.vctrs_meter <- function(x, ...) {
  paste0(format(vec_data(x)), " m")
}

meter <- function(x) {
  x <- vec_cast(x, double())
  new_meter(x)
}

x <- meter(1:10)
x
#> <vctrs_meter[10]>
#>  [1]  1 m  2 m  3 m  4 m  5 m  6 m  7 m  8 m  9 m 10 m

Because meter is built on top of a double vector, basic mathematic operations work:

sum(x)
#> <vctrs_meter[1]>
#> [1] 55 m
mean(x)
#> <vctrs_meter[1]>
#> [1] 5.5 m

But we can’t do arithmetic:

x + 1
#> Error in `vec_arith()`:
#> ! <vctrs_meter> + <double> is not permitted
meter(10) + meter(1)
#> Error in `vec_arith()`:
#> ! <vctrs_meter> + <vctrs_meter> is not permitted
meter(10) * 3
#> Error in `vec_arith()`:
#> ! <vctrs_meter> * <double> is not permitted

To allow these infix functions to work, we’ll need to provide vec_arith() generic. But before we do that, let’s think about what combinations of inputs we should support:

  • It makes sense to add and subtract meters: that yields another meter. We can divide a meter by another meter (yielding a unitless number), but we can’t multiply meters (because that would yield an area).

  • For a combination of meter and number multiplication and division by a number are acceptable. Addition and subtraction don’t make much sense as we, strictly speaking, are dealing with objects of different nature.

vec_arith() is another function that uses double dispatch, so as usual we start with a template.

vec_arith.vctrs_meter <- function(op, x, y, ...) {
  UseMethod("vec_arith.vctrs_meter", y)
}
vec_arith.vctrs_meter.default <- function(op, x, y, ...) {
  stop_incompatible_op(op, x, y)
}

Then write the method for two meter objects. We use a switch statement to cover the cases we care about and stop_incompatible_op() to throw an informative error message for everything else.

vec_arith.vctrs_meter.vctrs_meter <- function(op, x, y, ...) {
  switch(
    op,
    "+" = ,
    "-" = new_meter(vec_arith_base(op, x, y)),
    "/" = vec_arith_base(op, x, y),
    stop_incompatible_op(op, x, y)
  )
}

meter(10) + meter(1)
#> <vctrs_meter[1]>
#> [1] 11 m
meter(10) - meter(1)
#> <vctrs_meter[1]>
#> [1] 9 m
meter(10) / meter(1)
#> [1] 10
meter(10) * meter(1)
#> Error in `vec_arith()`:
#> ! <vctrs_meter> * <vctrs_meter> is not permitted

Next we write the pair of methods for arithmetic with a meter and a number. These are almost identical, but while meter(10) / 2 makes sense, 2 / meter(10) does not (and neither do addition and subtraction). To support both doubles and integers as operands, we dispatch over numeric here instead of double.

vec_arith.vctrs_meter.numeric <- function(op, x, y, ...) {
  switch(
    op,
    "/" = ,
    "*" = new_meter(vec_arith_base(op, x, y)),
    stop_incompatible_op(op, x, y)
  )
}
vec_arith.numeric.vctrs_meter <- function(op, x, y, ...) {
  switch(
    op,
    "*" = new_meter(vec_arith_base(op, x, y)),
    stop_incompatible_op(op, x, y)
  )
}

meter(2) * 10
#> <vctrs_meter[1]>
#> [1] 20 m
meter(2) * as.integer(10)
#> <vctrs_meter[1]>
#> [1] 20 m
10 * meter(2)
#> <vctrs_meter[1]>
#> [1] 20 m
meter(20) / 10
#> <vctrs_meter[1]>
#> [1] 2 m
10 / meter(20)
#> Error in `vec_arith()`:
#> ! <double> / <vctrs_meter> is not permitted
meter(20) + 10
#> Error in `vec_arith()`:
#> ! <vctrs_meter> + <double> is not permitted

For completeness, we also need vec_arith.vctrs_meter.MISSING for the unary + and - operators:

vec_arith.vctrs_meter.MISSING <- function(op, x, y, ...) {
  switch(op,
    `-` = x * -1,
    `+` = x,
    stop_incompatible_op(op, x, y)
  )
}
-meter(1)
#> <vctrs_meter[1]>
#> [1] -1 m
+meter(1)
#> <vctrs_meter[1]>
#> [1] 1 m

Implementing a vctrs S3 class in a package

Defining S3 methods interactively is fine for iteration and exploration, but if your class lives in a package, you need to do a few more things:

  • Register the S3 methods by listing them in the NAMESPACE file.

  • Create documentation around your methods, for the sake of your user and to satisfy R CMD check.

Let’s assume that the percent class is implemented in the pizza package in the file R/percent.R. Here we walk through the major sections of this hypothetical file. You’ve seen all of this code before, but now it’s augmented by the roxygen2 directives that produce the correct NAMESPACE entries and help topics.

Getting started

First, the pizza package needs to include vctrs in the Imports section of its DESCRIPTION (perhaps by calling usethis::use_package("vctrs"). While vctrs is under very active development, it probably makes sense to state a minimum version.

Imports:
    a_package,
    another_package,
    ...
    vctrs (>= x.y.z),
    ...

Then we make all vctrs functions available within the pizza package by including the directive #' @import vctrs somewhere. Usually, it’s not good practice to @import the entire namespace of a package, but vctrs is deliberately designed with this use case in mind.

Where should we put #' @import vctrs? There are two natural locations:

  • With package-level docs in R/pizza-doc.R. You can use usethis::use_package_doc() to initiate this package-level documentation.

  • In R/percent.R. This makes the most sense when the vctrs S3 class is a modest and self-contained part of the overall package.

We also must use one of these locations to dump some internal documentation that’s needed to avoid R CMD check complaints. We don’t expect any human to ever read this documentation. Here’s how this dummy documentation should look, combined with the #' @import vctrs directive described above.

#' Internal vctrs methods
#'
#' @import vctrs
#' @keywords internal
#' @name pizza-vctrs
NULL

This should appear in R/pizza-doc.R (package-level docs) or in R/percent.R (class-focused file).

Remember to call devtools::document() regularly, as you develop, to regenerate NAMESPACE and the .Rd files.

From this point on, the code shown is expected to appear in R/percent.R.

Low-level and user-friendly constructors

Next we add our constructor:

new_percent <- function(x = double()) {
  if (!is_double(x)) {
    abort("`x` must be a double vector.")
  }
  new_vctr(x, class = "pizza_percent")
}

Note that the name of the package must be included in the class name (pizza_percent), but it does not need to be included in the constructor name. You do not need to export the constructor, unless you want people to extend your class.

We can also add a call to setOldClass() for compatibility with S4:

# for compatibility with the S4 system
methods::setOldClass(c("pizza_percent", "vctrs_vctr"))

Because we’ve used a function from the methods package, you’ll also need to add methods to Imports, with (e.g.) usethis::use_package("methods"). This is a “free” dependency because methods is bundled with every R install.

Next we implement, export, and document a user-friendly helper: percent().

#' `percent` vector
#'
#' This creates a double vector that represents percentages so when it is
#' printed, it is multiplied by 100 and suffixed with `%`.
#'
#' @param x A numeric vector
#' @return An S3 vector of class `pizza_percent`.
#' @export
#' @examples
#' percent(c(0.25, 0.5, 0.75))
percent <- function(x = double()) {
  x <- vec_cast(x, double())
  new_percent(x)
}

(Again note that the package name will appear in the class, but does not need to occur in the function, because we can already do pizza::percent(); it would be redundant to have pizza::pizza_percent().)

Other helpers

It’s a good idea to provide a function that tests if an object is of this class. If you do so, it makes sense to document it with the user-friendly constructor percent():

#' @export
#' @rdname percent
is_percent <- function(x) {
  inherits(x, "pizza_percent")
}

You’ll also need to update the percent() documentation to reflect that x now means two different things:

#' @param x
#'  * For `percent()`: A numeric vector
#'  * For `is_percent()`: An object to test.

Next we provide the key methods to make printing work. These are S3 methods, so they don’t need to be documented, but they do need to be exported.

#' @export
format.pizza_percent <- function(x, ...) {
  out <- formatC(signif(vec_data(x) * 100, 3))
  out[is.na(x)] <- NA
  out[!is.na(x)] <- paste0(out[!is.na(x)], "%")
  out
}

#' @export
vec_ptype_abbr.pizza_percent <- function(x, ...) {
  "prcnt"
}

Finally, we implement methods for vec_ptype2() and vec_cast().

#' @export
vec_ptype2.vctrs_percent.vctrs_percent <- function(x, y, ...) new_percent()
#' @export
vec_ptype2.double.vctrs_percent <- function(x, y, ...) double()

#' @export
vec_cast.pizza_percent.pizza_percent <- function(x, to, ...) x
#' @export
vec_cast.pizza_percent.double <- function(x, to, ...) percent(x)
#' @export
vec_cast.double.pizza_percent <- function(x, to, ...) vec_data(x)

Arithmetic

Writing double dispatch methods for vec_arith() is currently more awkward than writing them for vec_ptype2() or vec_cast(). We plan to improve this in the future. For now, you can use the following instructions.

If you define a new type and want to write vec_arith() methods for it, you’ll need to provide a new single dispatch S3 generic for it of the following form:

#' @export
#' @method vec_arith my_type
vec_arith.my_type <- function(op, x, y, ...) {
  UseMethod("vec_arith.my_type", y)
}

Note that this actually functions as both an S3 method for vec_arith() and an S3 generic called vec_arith.my_type() that dispatches off y. roxygen2 only recognizes it as an S3 generic, so you have to register the S3 method part of this with an explicit @method call.

After that, you can define double dispatch methods, but you still need an explicit @method tag to ensure it is registered with the correct generic:

#' @export
#' @method vec_arith.my_type my_type
vec_arith.my_type.my_type <- function(op, x, y, ...) {
  # implementation here
}

#' @export
#' @method vec_arith.my_type integer
vec_arith.my_type.integer <- function(op, x, y, ...) {
  # implementation here
}

#' @export
#' @method vec_arith.integer my_type
vec_arith.integer.my_type <- function(op, x, y, ...) {
  # implementation here
}

vctrs provides the hybrid S3 generics/methods for most of the base R types, like vec_arith.integer(). If you don’t fully import vctrs with @import vctrs, then you will need to explicitly import the generic you are registering double dispatch methods for with @importFrom vctrs vec_arith.integer.

Testing

It’s good practice to test your new class. Specific recommendations:

  • R/percent.R is the type of file where you really do want 100% test coverage. You can use devtools::test_coverage_file() to check this.

  • Make sure to test behaviour with zero-length inputs and missing values.

  • Use testthat::verify_output() to test your format method. Customised printing is often a primary motivation for creating your own S3 class in the first place, so this will alert you to unexpected changes in your printed output. Read more about verify_output() in the testthat v2.3.0 blog post; it’s an example of a so-called golden test.

  • Check for method symmetry; use expect_s3_class(), probably with exact = TRUE, to ensure that vec_c(x, y) and vec_c(y, x) return the same type of output for the important xs and ys in your domain.

  • Use testthat::expect_error() to check that inputs that can’t be combined fail with an error. Here, you should be generally checking the class of the error, not its message. Relevant classes include vctrs_error_assert_ptype, vctrs_error_assert_size, and vctrs_error_incompatible_type.

    expect_error(vec_c(1, "a"), class = "vctrs_error_incompatible_type")

If your tests pass when run by devtools::test(), but fail when run in R CMD check, it is very likely to reflect a problem with S3 method registration. Carefully check your roxygen2 comments and the generated NAMESPACE.

Existing classes

Before you build your own class, you might want to consider using, or subclassing existing classes. You can check awesome-vctrs for a curated list of R vector classes, some of which are built with vctrs.

If you’ve built or extended a class, consider adding it to that list so other people can use it.

vctrs/inst/doc/pillar.html0000644000176200001440000012442715157322637015333 0ustar liggesusers Printing vectors nicely in tibbles

Printing vectors nicely in tibbles

Kirill Müller, Hadley Wickham

You can get basic control over how a vector is printed in a tibble by providing a format() method. If you want greater control, you need to understand how printing works. The presentation of a column in a tibble is controlled by two S3 generics:

  • vctrs::vec_ptype_abbr() determines what goes into the column header.
  • pillar::pillar_shaft() determines what goes into the body, or the shaft, of the column.

Technically a pillar is composed of a shaft (decorated with an ornament), with a capital above and a base below. Multiple pillars form a colonnade, which can be stacked in multiple tiers. This is the motivation behind the names in our API.

This short vignette shows the basics of column styling using a "latlon" vector. The vignette imagines the code is in a package, so that you can see the roxygen2 commands you’ll need to create documentation and the NAMESPACE file. In this vignette, we’ll attach pillar and vctrs:

library(vctrs)
library(pillar)

You don’t need to do this in a package. Instead, you’ll need to import the packages by then to the Imports: section of your DESCRIPTION. The following helper does this for you:

usethis::use_package("vctrs")
usethis::use_package("pillar")

Prerequisites

To illustrate the basic ideas we’re going to create a "latlon" class that encodes geographic coordinates in a record. We’ll pretend that this code lives in a package called earth. For simplicity, the values are printed as degrees and minutes only. By using vctrs_rcrd(), we already get the infrastructure to make this class fully compatible with data frames for free. See vignette("s3-vector", package = "vctrs") for details on the record data type.

#' @export
latlon <- function(lat, lon) {
  new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon")
}

#' @export
format.earth_latlon <- function(x, ..., formatter = deg_min) {
  x_valid <- which(!is.na(x))

  lat <- field(x, "lat")[x_valid]
  lon <- field(x, "lon")[x_valid]

  ret <- rep(NA_character_, vec_size(x))
  ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon"))
  # It's important to keep NA in the vector!
  ret
}

deg_min <- function(x, direction) {
  pm <- if (direction == "lat") c("N", "S") else c("E", "W")

  sign <- sign(x)
  x <- abs(x)
  deg <- trunc(x)
  x <- x - deg
  min <- round(x * 60)

  # Ensure the columns are always the same width so they line up nicely
  ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]]))
  format(ret, justify = "right")
}

latlon(c(32.71, 2.95), c(-117.17, 1.67))
#> <earth_latlon[2]>
#> [1] 32°43'N 117°10'W  2°57'N   1°40'E

Using in a tibble

Columns of this class can be used in a tibble right away because we’ve made a class using the vctrs infrastructure and have provided a format() method:

library(tibble)
#> 
#> Attaching package: 'tibble'
#> The following object is masked from 'package:vctrs':
#> 
#>     data_frame

loc <- latlon(
  c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA),
  c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA)
)

data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc)

data
#> # A tibble: 6 × 3
#>   venue          year              loc
#>   <chr>         <int>       <erth_ltl>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA

This output is ok, but we could improve it by:

  1. Using a more description type abbreviation than <erth_ltl>.

  2. Using a dash of colour to highlight the most important parts of the value.

  3. Providing a narrower view when horizontal space is at a premium.

The following sections show how to enhance the rendering.

Fixing the data type

Instead of <erth_ltl> we’d prefer to use <latlon>. We can do that by implementing the vec_ptype_abbr() method, which should return a string that can be used in a column header. For your own classes, strive for an evocative abbreviation that’s under 6 characters.

#' @export
vec_ptype_abbr.earth_latlon <- function(x) {
  "latlon"
}

data
#> # A tibble: 6 × 3
#>   venue          year              loc
#>   <chr>         <int>         <latlon>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA

Custom rendering

The format() method is used by default for rendering. For custom formatting you need to implement the pillar_shaft() method. This function should always return a pillar shaft object, created by new_pillar_shaft_simple() or similar. new_pillar_shaft_simple() accepts ANSI escape codes for colouring, and pillar includes some built in styles like style_subtle(). We can use subtle style for the degree and minute separators to make the data more obvious.

First we define a degree formatter that makes use of style_subtle():

deg_min_color <- function(x, direction) {
  pm <- if (direction == "lat") c("N", "S") else c("E", "W")

  sign <- sign(x)
  x <- abs(x)
  deg <- trunc(x)
  x <- x - deg
  rad <- round(x * 60)
  ret <- sprintf(
    "%d%s%.2d%s%s",
    deg,
    pillar::style_subtle("°"),
    rad,
    pillar::style_subtle("'"),
    pm[ifelse(sign >= 0, 1, 2)]
  )
  format(ret, justify = "right")
}

And then we pass that to our format() method:

#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.earth_latlon <- function(x, ...) {
  out <- format(x, formatter = deg_min_color)
  pillar::new_pillar_shaft_simple(out, align = "right")
}

Currently, ANSI escapes are not rendered in vignettes, so this result doesn’t look any different, but if you run the code yourself you’ll see an improved display.

data
#> # A tibble: 6 × 3
#>   venue          year              loc
#>   <chr>         <int>         <latlon>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA

As well as the functions in pillar, the cli package provides a variety of tools for styling text.

Truncation

Tibbles can automatically compacts columns when there’s no enough horizontal space to display everything:

print(data, width = 30)
#> # A tibble: 6 × 3
#>   venue  year              loc
#>   <chr> <int>         <latlon>
#> 1 rstu…  2017 28°20'N  81°33'W
#> 2 rstu…  2018 32°43'N 117°10'W
#> 3 rstu…  2019 30°16'N  97°44'W
#> 4 rstu…  2020 37°47'N 122°25'W
#> 5 rstu…  2021 28°30'N  81°24'W
#> 6 rstu…  2022               NA

Currently the latlon class isn’t ever compacted because we haven’t specified a minimum width when constructing the shaft. Let’s fix that and re-print the data:

#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.earth_latlon <- function(x, ...) {
  out <- format(x)
  pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10)
}

print(data, width = 30)
#> # A tibble: 6 × 3
#>   venue        year        loc
#>   <chr>       <int>   <latlon>
#> 1 rstudio::c…  2017 28°20'N  …
#> 2 rstudio::c…  2018 32°43'N 1…
#> 3 rstudio::c…  2019 30°16'N  …
#> 4 rstudio::c…  2020 37°47'N 1…
#> 5 rstudio::c…  2021 28°30'N  …
#> 6 rstudio::c…  2022         NA

Adaptive rendering

Truncation may be useful for character data, but for lat-lon data it’d be nicer to show full degrees and remove the minutes. We’ll first write a function that does this:

deg <- function(x, direction) {
  pm <- if (direction == "lat") c("N", "S") else c("E", "W")

  sign <- sign(x)
  x <- abs(x)
  deg <- round(x)

  ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)])
  format(ret, justify = "right")
}

Then use it as part of more sophisticated implementation of the pillar_shaft() method:

#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.earth_latlon <- function(x, ...) {
  deg <- format(x, formatter = deg)
  deg_min <- format(x)

  pillar::new_pillar_shaft(
    list(deg = deg, deg_min = deg_min),
    width = pillar::get_max_extent(deg_min),
    min_width = pillar::get_max_extent(deg),
    class = "pillar_shaft_latlon"
  )
}

Now the pillar_shaft() method returns an object of class "pillar_shaft_latlon" created by new_pillar_shaft(). This object contains the necessary information to render the values, and also minimum and maximum width values. For simplicity, both formats are pre-rendered, and the minimum and maximum widths are computed from there. (get_max_extent() is a helper that computes the maximum display width occupied by the values in a character vector.)

All that’s left to do is to implement a format() method for our new "pillar_shaft_latlon" class. This method will be called with a width argument, which then determines which of the formats to choose. The formatting of our choice is passed to the new_ornament() function:

#' @export
format.pillar_shaft_latlon <- function(x, width, ...) {
  if (get_max_extent(x$deg_min) <= width) {
    ornament <- x$deg_min
  } else {
    ornament <- x$deg
  }

  pillar::new_ornament(ornament, align = "right")
}

data
#> # A tibble: 6 × 3
#>   venue          year              loc
#>   <chr>         <int>         <latlon>
#> 1 rstudio::conf  2017 28°20'N  81°33'W
#> 2 rstudio::conf  2018 32°43'N 117°10'W
#> 3 rstudio::conf  2019 30°16'N  97°44'W
#> 4 rstudio::conf  2020 37°47'N 122°25'W
#> 5 rstudio::conf  2021 28°30'N  81°24'W
#> 6 rstudio::conf  2022               NA
print(data, width = 30)
#> # A tibble: 6 × 3
#>   venue        year        loc
#>   <chr>       <int>   <latlon>
#> 1 rstudio::c…  2017 28°N  82°W
#> 2 rstudio::c…  2018 33°N 117°W
#> 3 rstudio::c…  2019 30°N  98°W
#> 4 rstudio::c…  2020 38°N 122°W
#> 5 rstudio::c…  2021 28°N  81°W
#> 6 rstudio::c…  2022         NA

Testing

If you want to test the output of your code, you can compare it with a known state recorded in a text file. The testthat::expect_snapshot() function offers an easy way to test output-generating functions. It takes care about details such as Unicode, ANSI escapes, and output width. Furthermore it won’t make the tests fail on CRAN. This is important because your output may rely on details out of your control, which should be fixed eventually but should not lead to your package being removed from CRAN.

Use this testthat expectation in one of your test files to create a snapshot test:

expect_snapshot(pillar_shaft(data$loc))

See https://testthat.r-lib.org/articles/snapshotting.html for more information.

vctrs/inst/doc/pillar.R0000644000176200001440000001026715157322637014564 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ----setup-------------------------------------------------------------------- library(vctrs) library(pillar) ## ----eval = FALSE------------------------------------------------------------- # usethis::use_package("vctrs") # usethis::use_package("pillar") ## ----------------------------------------------------------------------------- #' @export latlon <- function(lat, lon) { new_rcrd(list(lat = lat, lon = lon), class = "earth_latlon") } #' @export format.earth_latlon <- function(x, ..., formatter = deg_min) { x_valid <- which(!is.na(x)) lat <- field(x, "lat")[x_valid] lon <- field(x, "lon")[x_valid] ret <- rep(NA_character_, vec_size(x)) ret[x_valid] <- paste0(formatter(lat, "lat"), " ", formatter(lon, "lon")) # It's important to keep NA in the vector! ret } deg_min <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg min <- round(x * 60) # Ensure the columns are always the same width so they line up nicely ret <- sprintf("%d°%.2d'%s", deg, min, ifelse(sign >= 0, pm[[1]], pm[[2]])) format(ret, justify = "right") } latlon(c(32.71, 2.95), c(-117.17, 1.67)) ## ----------------------------------------------------------------------------- library(tibble) loc <- latlon( c(28.3411783, 32.7102978, 30.2622356, 37.7859102, 28.5, NA), c(-81.5480348, -117.1704058, -97.7403327, -122.4131357, -81.4, NA) ) data <- tibble(venue = "rstudio::conf", year = 2017:2022, loc = loc) data ## ----------------------------------------------------------------------------- #' @export vec_ptype_abbr.earth_latlon <- function(x) { "latlon" } data ## ----------------------------------------------------------------------------- deg_min_color <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- trunc(x) x <- x - deg rad <- round(x * 60) ret <- sprintf( "%d%s%.2d%s%s", deg, pillar::style_subtle("°"), rad, pillar::style_subtle("'"), pm[ifelse(sign >= 0, 1, 2)] ) format(ret, justify = "right") } ## ----------------------------------------------------------------------------- #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x, formatter = deg_min_color) pillar::new_pillar_shaft_simple(out, align = "right") } ## ----------------------------------------------------------------------------- data ## ----------------------------------------------------------------------------- print(data, width = 30) ## ----------------------------------------------------------------------------- #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { out <- format(x) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 10) } print(data, width = 30) ## ----------------------------------------------------------------------------- deg <- function(x, direction) { pm <- if (direction == "lat") c("N", "S") else c("E", "W") sign <- sign(x) x <- abs(x) deg <- round(x) ret <- sprintf("%d°%s", deg, pm[ifelse(sign >= 0, 1, 2)]) format(ret, justify = "right") } ## ----------------------------------------------------------------------------- #' @importFrom pillar pillar_shaft #' @export pillar_shaft.earth_latlon <- function(x, ...) { deg <- format(x, formatter = deg) deg_min <- format(x) pillar::new_pillar_shaft( list(deg = deg, deg_min = deg_min), width = pillar::get_max_extent(deg_min), min_width = pillar::get_max_extent(deg), class = "pillar_shaft_latlon" ) } ## ----------------------------------------------------------------------------- #' @export format.pillar_shaft_latlon <- function(x, width, ...) { if (get_max_extent(x$deg_min) <= width) { ornament <- x$deg_min } else { ornament <- x$deg } pillar::new_ornament(ornament, align = "right") } data print(data, width = 30) ## ----eval = FALSE------------------------------------------------------------- # expect_snapshot(pillar_shaft(data$loc)) vctrs/inst/doc/stability.R0000644000176200001440000001175515157322650015303 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(vctrs) library(rlang) library(zeallot) ## ----------------------------------------------------------------------------- vec_ptype_show(median(c(1L, 1L))) vec_ptype_show(median(c(1L, 1L, 1L))) ## ----------------------------------------------------------------------------- vec_ptype_show(sapply(1L, function(x) c(x, x))) vec_ptype_show(sapply(integer(), function(x) c(x, x))) ## ----------------------------------------------------------------------------- vec_ptype_show(c(NA, Sys.Date())) vec_ptype_show(c(Sys.Date(), NA)) ## ----------------------------------------------------------------------------- env <- new.env(parent = emptyenv()) length(env) length(mean) length(c(env, mean)) ## ----------------------------------------------------------------------------- vec_ptype_show(ifelse(NA, 1L, 1L)) vec_ptype_show(ifelse(FALSE, 1L, 1L)) ## ----------------------------------------------------------------------------- c(FALSE, 1L, 2.5) ## ----------------------------------------------------------------------------- vec_c(FALSE, 1L, 2.5) ## ----error = TRUE------------------------------------------------------------- try({ c(FALSE, "x") vec_c(FALSE, "x") c(FALSE, list(1)) vec_c(FALSE, list(1)) }) ## ----------------------------------------------------------------------------- c(10.5, factor("x")) ## ----------------------------------------------------------------------------- c(mean, globalenv()) ## ----error = TRUE------------------------------------------------------------- try({ c(getRversion(), "x") c("x", getRversion()) }) ## ----error = TRUE------------------------------------------------------------- try({ vec_c(mean, globalenv()) vec_c(Sys.Date(), factor("x"), "x") }) ## ----------------------------------------------------------------------------- fa <- factor("a") fb <- factor("b") c(fa, fb) ## ----------------------------------------------------------------------------- vec_c(fa, fb) vec_c(fb, fa) ## ----------------------------------------------------------------------------- datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland") c(datetime_nz) ## ----------------------------------------------------------------------------- vec_c(datetime_nz) ## ----------------------------------------------------------------------------- datetime_local <- as.POSIXct("2020-01-01 09:00") datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central") vec_c(datetime_local, datetime_houston, datetime_nz) vec_c(datetime_houston, datetime_nz) vec_c(datetime_nz, datetime_houston) ## ----------------------------------------------------------------------------- date <- as.Date("2020-01-01") datetime <- as.POSIXct("2020-01-01 09:00") c(date, datetime) c(datetime, date) ## ----------------------------------------------------------------------------- vec_c(date, datetime) vec_c(date, datetime_nz) ## ----------------------------------------------------------------------------- c(NA, fa) c(NA, date) c(NA, datetime) ## ----------------------------------------------------------------------------- vec_c(NA, fa) vec_c(NA, date) vec_c(NA, datetime) ## ----------------------------------------------------------------------------- df1 <- data.frame(x = 1) df2 <- data.frame(x = 2) str(c(df1, df1)) ## ----------------------------------------------------------------------------- vec_c(df1, df2) ## ----------------------------------------------------------------------------- m <- matrix(1:4, nrow = 2) c(m, m) vec_c(m, m) ## ----------------------------------------------------------------------------- c(m, 1) vec_c(m, 1) ## ----eval = FALSE------------------------------------------------------------- # vec_c <- function(...) { # args <- compact(list2(...)) # # ptype <- vec_ptype_common(!!!args) # if (is.null(ptype)) # return(NULL) # # ns <- map_int(args, vec_size) # out <- vec_init(ptype, sum(ns)) # # pos <- 1 # for (i in seq_along(ns)) { # n <- ns[[i]] # # x <- vec_cast(args[[i]], to = ptype) # vec_slice(out, pos:(pos + n - 1)) <- x # pos <- pos + n # } # # out # } ## ----------------------------------------------------------------------------- if_else <- function(test, yes, no) { if (!is_logical(test)) { abort("`test` must be a logical vector.") } c(yes, no) %<-% vec_cast_common(yes, no) c(test, yes, no) %<-% vec_recycle_common(test, yes, no) out <- vec_init(yes, vec_size(yes)) vec_slice(out, test) <- vec_slice(yes, test) vec_slice(out, !test) <- vec_slice(no, !test) out } x <- c(NA, 1:4) if_else(x > 2, "small", "big") if_else(x > 2, factor("small"), factor("big")) if_else(x > 2, Sys.Date(), Sys.Date() + 7) ## ----------------------------------------------------------------------------- if_else(x > 2, data.frame(x = 1), data.frame(y = 2)) if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30)) vctrs/inst/doc/stability.html0000644000176200001440000014462115157322651016046 0ustar liggesusers Type and size stability

Type and size stability

This vignette introduces the ideas of type-stability and size-stability. If a function possesses these properties, it is substantially easier to reason about because to predict the “shape” of the output you only need to know the “shape”s of the inputs.

This work is partly motivated by a common pattern that I noticed when reviewing code: if I read the code (without running it!), and I can’t predict the type of each variable, I feel very uneasy about the code. This sense is important because most unit tests explore typical inputs, rather than exhaustively testing the strange and unusual. Analysing the types (and size) of variables makes it possible to spot unpleasant edge cases.

library(vctrs)
library(rlang)
library(zeallot)

Definitions

We say a function is type-stable iff:

  1. You can predict the output type knowing only the input types.
  2. The order of arguments in … does not affect the output type.

Similarly, a function is size-stable iff:

  1. You can predict the output size knowing only the input sizes, or there is a single numeric input that specifies the output size.

Very few base R functions are size-stable, so I’ll also define a slightly weaker condition. I’ll call a function length-stable iff:

  1. You can predict the output length knowing only the input lengths, or there is a single numeric input that specifies the output length.

(But note that length-stable is not a particularly robust definition because length() returns a value for things that are not vectors.)

We’ll call functions that don’t obey these principles type-unstable and size-unstable respectively.

On top of type- and size-stability it’s also desirable to have a single set of rules that are applied consistently. We want one set of type-coercion and size-recycling rules that apply everywhere, not many sets of rules that apply to different functions.

The goal of these principles is to minimise cognitive overhead. Rather than having to memorise many special cases, you should be able to learn one set of principles and apply them again and again.

Examples

To make these ideas concrete, let’s apply them to a few base functions:

  1. mean() is trivially type-stable and size-stable because it always returns a double vector of length 1 (or it throws an error).

  2. Surprisingly, median() is type-unstable:

    vec_ptype_show(median(c(1L, 1L)))
    #> Prototype: double
    vec_ptype_show(median(c(1L, 1L, 1L)))
    #> Prototype: integer

    It is, however, size-stable, since it always returns a vector of length 1.

  3. sapply() is type-unstable because you can’t predict the output type only knowing the input types:

    vec_ptype_show(sapply(1L, function(x) c(x, x)))
    #> Prototype: integer[,1]
    vec_ptype_show(sapply(integer(), function(x) c(x, x)))
    #> Prototype: list

    It’s not quite size-stable; vec_size(sapply(x, f)) is vec_size(x) for vectors but not for matrices (the output is transposed) or data frames (it iterates over the columns).

  4. vapply() is a type-stable version of sapply() because vec_ptype_show(vapply(x, fn, template)) is always vec_ptype_show(template).
    It is size-unstable for the same reasons as sapply().

  5. c() is type-unstable because c(x, y) doesn’t always output the same type as c(y, x).

    vec_ptype_show(c(NA, Sys.Date()))
    #> Prototype: double
    vec_ptype_show(c(Sys.Date(), NA))
    #> Prototype: date

    c() is almost always length-stable because length(c(x, y)) almost always equals length(x) + length(y). One common source of instability here is dealing with non-vectors (see the later section “Non-vectors”):

    env <- new.env(parent = emptyenv())
    length(env)
    #> [1] 0
    length(mean)
    #> [1] 1
    length(c(env, mean))
    #> [1] 2
  6. paste(x1, x2) is length-stable because length(paste(x1, x2)) equals max(length(x1), length(x2)). However, it doesn’t follow the usual arithmetic recycling rules because paste(1:2, 1:3) doesn’t generate a warning.

  7. ifelse() is length-stable because length(ifelse(cond, true, false)) is always length(cond). ifelse() is type-unstable because the output type depends on the value of cond:

    vec_ptype_show(ifelse(NA, 1L, 1L))
    #> Prototype: logical
    vec_ptype_show(ifelse(FALSE, 1L, 1L))
    #> Prototype: integer
  8. read.csv(file) is type-unstable and size-unstable because, while you know it will return a data frame, you don’t know what columns it will return or how many rows it will have. Similarly, df[[i]] is not type-stable because the result depends on the value of i. There are many important functions that can not be made type-stable or size-stable!

With this understanding of type- and size-stability in hand, we’ll use them to analyse some base R functions in greater depth and then propose alternatives with better properties.

c() and vctrs::vec_c()

In this section we’ll compare and contrast c() and vec_c(). vec_c() is both type- and size-stable because it possesses the following invariants:

  • vec_ptype(vec_c(x, y)) equals vec_ptype_common(x, y).
  • vec_size(vec_c(x, y)) equals vec_size(x) + vec_size(y).

c() has another undesirable property in that it’s not consistent with unlist(); i.e., unlist(list(x, y)) does not always equal c(x, y); i.e., base R has multiple sets of type-coercion rules. I won’t consider this problem further here.

I have two goals here:

  • To fully document the quirks of c(), hence motivating the development of an alternative.

  • To discuss non-obvious consequences of the type- and size-stability above.

Atomic vectors

If we only consider atomic vectors, c() is type-stable because it uses a hierarchy of types: character > complex > double > integer > logical.

c(FALSE, 1L, 2.5)
#> [1] 0.0 1.0 2.5

vec_c() obeys similar rules:

vec_c(FALSE, 1L, 2.5)
#> [1] 0.0 1.0 2.5

But it does not automatically coerce to character vectors or lists:

c(FALSE, "x")
#> [1] "FALSE" "x"
vec_c(FALSE, "x")
#> Error in `vec_c()`:
#> ! Can't combine `..1` <logical> and `..2` <character>.

c(FALSE, list(1))
#> [[1]]
#> [1] FALSE
#> 
#> [[2]]
#> [1] 1
vec_c(FALSE, list(1))
#> Error in `vec_c()`:
#> ! Can't combine `..1` <logical> and `..2` <list>.

Incompatible vectors and non-vectors

In general, most base methods do not throw an error:

c(10.5, factor("x"))
#> [1] 10.5  1.0

If the inputs aren’t vectors, c() automatically puts them in a list:

c(mean, globalenv())
#> [[1]]
#> function (x, ...) 
#> UseMethod("mean")
#> <bytecode: 0x13149a318>
#> <environment: namespace:base>
#> 
#> [[2]]
#> <environment: R_GlobalEnv>

For numeric versions, this depends on the order of inputs. Version first is an error, otherwise the input is wrapped in a list:

c(getRversion(), "x")
#> Error:
#> ! invalid version specification 'x'

c("x", getRversion())
#> [[1]]
#> [1] "x"
#> 
#> [[2]]
#> [1] 4 6 0

vec_c() throws an error if the inputs are not vectors or not automatically coercible:

vec_c(mean, globalenv())
#> Error in `vec_size()`:
#> ! `x` must be a vector, not a function.
#> ℹ Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more.

vec_c(Sys.Date(), factor("x"), "x")
#> Error in `vec_c()`:
#> ! Can't combine `..1` <date> and `..2` <factor<bf275>>.

Factors

Combining two factors returns an integer vector:

fa <- factor("a")
fb <- factor("b")

c(fa, fb)
#> [1] a b
#> Levels: a b

(This is documented in c() but is still undesirable.)

vec_c() returns a factor taking the union of the levels. This behaviour is motivated by pragmatics: there are many places in base R that automatically convert character vectors to factors, so enforcing stricter behaviour would be unnecessarily onerous. (This is backed up by experience with dplyr::bind_rows(), which is stricter and is a common source of user difficulty.)

vec_c(fa, fb)
#> [1] a b
#> Levels: a b
vec_c(fb, fa)
#> [1] b a
#> Levels: b a

Date-times

c() strips the time zone associated with date-times:

datetime_nz <- as.POSIXct("2020-01-01 09:00", tz = "Pacific/Auckland")
c(datetime_nz)
#> [1] "2020-01-01 09:00:00 NZDT"

This behaviour is documented in ?DateTimeClasses but is the source of considerable user pain.

vec_c() preserves time zones:

vec_c(datetime_nz)
#> [1] "2020-01-01 09:00:00 NZDT"

What time zone should the output have if inputs have different time zones? One option would be to be strict and force the user to manually align all the time zones. However, this is onerous (particularly because there’s no easy way to change the time zone in base R), so vctrs chooses to use the first non-local time zone:

datetime_local <- as.POSIXct("2020-01-01 09:00")
datetime_houston <- as.POSIXct("2020-01-01 09:00", tz = "US/Central")

vec_c(datetime_local, datetime_houston, datetime_nz)
#> [1] "2020-01-01 08:00:00 CST" "2020-01-01 09:00:00 CST"
#> [3] "2019-12-31 14:00:00 CST"
vec_c(datetime_houston, datetime_nz)
#> [1] "2020-01-01 09:00:00 CST" "2019-12-31 14:00:00 CST"
vec_c(datetime_nz, datetime_houston)
#> [1] "2020-01-01 09:00:00 NZDT" "2020-01-02 04:00:00 NZDT"

Dates and date-times

Combining dates and date-times with c() gives silently incorrect results:

date <- as.Date("2020-01-01")
datetime <- as.POSIXct("2020-01-01 09:00")

c(date, datetime)
#> [1] "2020-01-01" "2020-01-01"
c(datetime, date)
#> [1] "2020-01-01 09:00:00 EST" "2019-12-31 19:00:00 EST"

This behaviour arises because neither c.Date() nor c.POSIXct() check that all inputs are of the same type.

vec_c() uses a standard set of rules to avoid this problem. When you mix dates and date-times, vctrs returns a date-time and converts dates to date-times at midnight (in the timezone of the date-time).

vec_c(date, datetime)
#> [1] "2020-01-01 00:00:00 EST" "2020-01-01 09:00:00 EST"
vec_c(date, datetime_nz)
#> [1] "2020-01-01 00:00:00 NZDT" "2020-01-01 09:00:00 NZDT"

Missing values

If a missing value comes at the beginning of the inputs, c() falls back to the internal behaviour, which strips all attributes:

c(NA, fa)
#> [1] NA  1
c(NA, date)
#> [1]    NA 18262
c(NA, datetime)
#> [1]         NA 1577887200

vec_c() takes a different approach treating a logical vector consisting only of NA as the unspecified() class which can be converted to any other 1d type:

vec_c(NA, fa)
#> [1] <NA> a   
#> Levels: a
vec_c(NA, date)
#> [1] NA           "2020-01-01"
vec_c(NA, datetime)
#> [1] NA                        "2020-01-01 09:00:00 EST"

Data frames

Because it is almost always length-stable, c() combines data frames column wise (into a list):

df1 <- data.frame(x = 1)
df2 <- data.frame(x = 2)
str(c(df1, df1))
#> List of 2
#>  $ x: num 1
#>  $ x: num 1

vec_c() is size-stable, which implies it will row-bind data frames:

vec_c(df1, df2)
#>   x
#> 1 1
#> 2 2

Matrices and arrays

The same reasoning applies to matrices:

m <- matrix(1:4, nrow = 2)
c(m, m)
#> [1] 1 2 3 4 1 2 3 4
vec_c(m, m)
#>      [,1] [,2]
#> [1,]    1    3
#> [2,]    2    4
#> [3,]    1    3
#> [4,]    2    4

One difference is that vec_c() will “broadcast” a vector to match the dimensions of a matrix:

c(m, 1)
#> [1] 1 2 3 4 1

vec_c(m, 1)
#>      [,1] [,2]
#> [1,]    1    3
#> [2,]    2    4
#> [3,]    1    1

Implementation

The basic implementation of vec_c() is reasonably simple. We first figure out the properties of the output, i.e. the common type and total size, and then allocate it with vec_init(), and then insert each input into the correct place in the output.

vec_c <- function(...) {
  args <- compact(list2(...))

  ptype <- vec_ptype_common(!!!args)
  if (is.null(ptype))
    return(NULL)

  ns <- map_int(args, vec_size)
  out <- vec_init(ptype, sum(ns))

  pos <- 1
  for (i in seq_along(ns)) {
    n <- ns[[i]]
    
    x <- vec_cast(args[[i]], to = ptype)
    vec_slice(out, pos:(pos + n - 1)) <- x
    pos <- pos + n
  }

  out
}

(The real vec_c() is a bit more complicated in order to handle inner and outer names).

ifelse()

One of the functions that motivate the development of vctrs is ifelse(). It has the surprising property that the result value is “A vector of the same length and attributes (including dimensions and class) as test”. To me, it seems more reasonable for the type of the output to be controlled by the type of the yes and no arguments.

In dplyr::if_else() I swung too far towards strictness: it throws an error if yes and no are not the same type. This is annoying in practice because it requires typed missing values (NA_character_ etc), and because the checks are only on the class (not the full prototype), it’s easy to create invalid output.

I found it much easier to understand what ifelse() should do once I internalised the ideas of type- and size-stability:

  • The first argument must be logical.

  • vec_ptype(if_else(test, yes, no)) equals vec_ptype_common(yes, no). Unlike ifelse() this implies that if_else() must always evaluate both yes and no in order to figure out the correct type. I think this is consistent with && (scalar operation, short circuits) and & (vectorised, evaluates both sides).

  • vec_size(if_else(test, yes, no)) equals vec_size_common(test, yes, no). I think the output could have the same size as test (i.e., the same behaviour as ifelse), but I think as a general rule that your inputs should either be mutually recycling or not.

This leads to the following implementation:

if_else <- function(test, yes, no) {
  if (!is_logical(test)) {
    abort("`test` must be a logical vector.")
  }
  
  c(yes, no) %<-% vec_cast_common(yes, no)
  c(test, yes, no) %<-% vec_recycle_common(test, yes, no)

  out <- vec_init(yes, vec_size(yes))
  vec_slice(out, test) <- vec_slice(yes, test)
  vec_slice(out, !test) <- vec_slice(no, !test)

  out
}

x <- c(NA, 1:4)
if_else(x > 2, "small", "big")
#> [1] NA      "big"   "big"   "small" "small"
if_else(x > 2, factor("small"), factor("big"))
#> [1] <NA>  big   big   small small
#> Levels: small big
if_else(x > 2, Sys.Date(), Sys.Date() + 7)
#> [1] NA           "2026-03-27" "2026-03-27" "2026-03-20" "2026-03-20"

By using vec_size() and vec_slice(), this definition of if_else() automatically works with data.frames and matrices:

if_else(x > 2, data.frame(x = 1), data.frame(y = 2))
#>    x  y
#> 1 NA NA
#> 2 NA  2
#> 3 NA  2
#> 4  1 NA
#> 5  1 NA

if_else(x > 2, matrix(1:10, ncol = 2), cbind(30, 30))
#>      [,1] [,2]
#> [1,]   NA   NA
#> [2,]   30   30
#> [3,]   30   30
#> [4,]    4    9
#> [5,]    5   10
vctrs/inst/doc/type-size.html0000644000176200001440000021463615157322654016002 0ustar liggesusers Prototypes and sizes

Prototypes and sizes

Rather than using class() and length(), vctrs has notions of prototype (vec_ptype_show()) and size (vec_size()). This vignette discusses the motivation for why these alternatives are necessary and connects their definitions to type coercion and the recycling rules.

Size and prototype are motivated by thinking about the optimal behaviour for c() and rbind(), particularly inspired by data frames with columns that are matrices or data frames.

library(vctrs)

Prototype

The idea of a prototype is to capture the metadata associated with a vector without capturing any data. Unfortunately, the class() of an object is inadequate for this purpose:

  • The class() doesn’t include attributes. Attributes are important because, for example, they store the levels of a factor and the timezone of a POSIXct. You cannot combine two factors or two POSIXcts without thinking about the attributes.

  • The class() of a matrix is “matrix” and doesn’t include the type of the underlying vector or the dimensionality.

Instead, vctrs takes advantage of R’s vectorised nature and uses a prototype, a 0-observation slice of the vector (this is basically x[0] but with some subtleties we’ll come back to later). This is a miniature version of the vector that contains all of the attributes but none of the data.

Conveniently, you can create many prototypes using existing base functions (e.g, double() and factor(levels = c("a", "b"))). vctrs provides a few helpers (e.g. new_date(), new_datetime(), and new_duration()) where the equivalents in base R are missing.

Base prototypes

vec_ptype() creates a prototype from an existing object. However, many base vectors have uninformative printing methods for 0-length subsets, so vctrs also provides vec_ptype_show(), which prints the prototype in a friendly way (and returns nothing).

Using vec_ptype_show() allows us to see the prototypes base R classes:

  • Atomic vectors have no attributes and just display the underlying typeof():

    vec_ptype_show(FALSE)
    #> Prototype: logical
    vec_ptype_show(1L)
    #> Prototype: integer
    vec_ptype_show(2.5)
    #> Prototype: double
    vec_ptype_show("three")
    #> Prototype: character
    vec_ptype_show(list(1, 2, 3))
    #> Prototype: list
  • The prototype of matrices and arrays include the base type and the dimensions after the first:

    vec_ptype_show(array(logical(), c(2, 3)))
    #> Prototype: logical[,3]
    vec_ptype_show(array(integer(), c(2, 3, 4)))
    #> Prototype: integer[,3,4]
    vec_ptype_show(array(character(), c(2, 3, 4, 5)))
    #> Prototype: character[,3,4,5]
  • The prototype of a factor includes its levels. Levels are a character vector, which can be arbitrarily long, so the prototype just shows a hash. If the hash of two factors is equal, it’s highly likely that their levels are also equal.

    vec_ptype_show(factor("a"))
    #> Prototype: factor<4d52a>
    vec_ptype_show(ordered("b"))
    #> Prototype: ordered<9b7e3>

    While vec_ptype_show() prints only the hash, the prototype object itself does contain all levels:

    vec_ptype(factor("a"))
    #> factor()
    #> Levels: a
  • Base R has three key date time classes: dates, date-times (POSIXct), and durations (difftime). Date-times have a timezone, and durations have a unit.

    vec_ptype_show(Sys.Date())
    #> Prototype: date
    vec_ptype_show(Sys.time())
    #> Prototype: datetime<local>
    vec_ptype_show(as.difftime(10, units = "mins"))
    #> Prototype: duration<mins>
  • Data frames have the most complex prototype: the prototype of a data frame is the name and prototype of each column:

    vec_ptype_show(data.frame(a = FALSE, b = 1L, c = 2.5, d = "x"))
    #> Prototype: data.frame<
    #>   a: logical
    #>   b: integer
    #>   c: double
    #>   d: character
    #> >

    Data frames can have columns that are themselves data frames, making this a “recursive” type:

    df <- data.frame(x = FALSE)
    df$y <- data.frame(a = 1L, b = 2.5)
    vec_ptype_show(df)
    #> Prototype: data.frame<
    #>   x: logical
    #>   y: 
    #>     data.frame<
    #>       a: integer
    #>       b: double
    #>     >
    #> >

Coercing to common type

It’s often important to combine vectors with multiple types. vctrs provides a consistent set of rules for coercion, via vec_ptype_common(). vec_ptype_common() possesses the following invariants:

  • class(vec_ptype_common(x, y)) equals class(vec_ptype_common(y, x)).

  • class(vec_ptype_common(x, vec_ptype_common(y, z)) equals class(vec_ptype_common(vec_ptype_common(x, y), z)).

  • vec_ptype_common(x, NULL) == vec_ptype(x).

i.e., vec_ptype_common() is both commutative and associative (with respect to class) and has an identity element, NULL; i.e., it’s a commutative monoid. This means the underlying implementation is quite simple: we can find the common type of any number of objects by progressively finding the common type of pairs of objects.

Like with vec_ptype(), the easiest way to explore vec_ptype_common() is with vec_ptype_show(): when given multiple inputs, it will print their common prototype. (In other words: program with vec_ptype_common() but play with vec_ptype_show().)

  • The common type of atomic vectors is computed very similar to the rules of base R, except that we do not coerce to character automatically:

    vec_ptype_show(logical(), integer(), double())
    #> Prototype: <double>
    #> 0. (           , <logical> ) = <logical>
    #> 1. ( <logical> , <integer> ) = <integer>
    #> 2. ( <integer> , <double>  ) = <double>
    
    vec_ptype_show(logical(), character())
    #> Error in `vec_ptype_show()`:
    #> ! Can't combine `out_types[[i - 1]]` <logical> and `in_types[[i]]` <character>.
  • Matrices and arrays are automatically broadcast to higher dimensions:

    vec_ptype_show(
      array(1, c(0, 1)), 
      array(1, c(0, 2))
    )
    #> Prototype: <double[,2]>
    #> 0. (              , <double[,1]> ) = <double[,1]>
    #> 1. ( <double[,1]> , <double[,2]> ) = <double[,2]>
    
    vec_ptype_show(
      array(1, c(0, 1)), 
      array(1, c(0, 3)),
      array(1, c(0, 3, 4)),
      array(1, c(0, 3, 4, 5))
    )
    #> Prototype: <double[,3,4,5]>
    #> 0. (                , <double[,1]>     ) = <double[,1]>    
    #> 1. ( <double[,1]>   , <double[,3]>     ) = <double[,3]>    
    #> 2. ( <double[,3]>   , <double[,3,4]>   ) = <double[,3,4]>  
    #> 3. ( <double[,3,4]> , <double[,3,4,5]> ) = <double[,3,4,5]>

    Provided that the dimensions follow the vctrs recycling rules:

    vec_ptype_show(
      array(1, c(0, 2)), 
      array(1, c(0, 3))
    )
    #> Error:
    #> ! Can't combine `out_types[[i - 1]]` <double[,2]> and `in_types[[i]]` <double[,3]>.
    #> ✖ Incompatible sizes 2 and 3 along axis 2.
  • Factors combine levels in the order in which they appear.

    fa <- factor("a")
    fb <- factor("b")
    
    levels(vec_ptype_common(fa, fb))
    #> [1] "a" "b"
    levels(vec_ptype_common(fb, fa))
    #> [1] "b" "a"
  • Combining a date and date-time yields a date-time:

    vec_ptype_show(new_date(), new_datetime())
    #> Prototype: <datetime<local>>
    #> 0. (        , <date>            ) = <date>           
    #> 1. ( <date> , <datetime<local>> ) = <datetime<local>>

    When combining two date times, the timezone is taken from the first input:

    vec_ptype_show(
      new_datetime(tzone = "US/Central"), 
      new_datetime(tzone = "Pacific/Auckland")
    )
    #> Prototype: <datetime<US/Central>>
    #> 0. (                        , <datetime<US/Central>>       ) = <datetime<US/Central>>
    #> 1. ( <datetime<US/Central>> , <datetime<Pacific/Auckland>> ) = <datetime<US/Central>>

    Unless it’s the local timezone, in which case any explicit time zone will win:

    vec_ptype_show(
      new_datetime(tzone = ""), 
      new_datetime(tzone = ""), 
      new_datetime(tzone = "Pacific/Auckland")
    )
    #> Prototype: <datetime<Pacific/Auckland>>
    #> 0. (                   , <datetime<local>>            ) = <datetime<local>>           
    #> 1. ( <datetime<local>> , <datetime<local>>            ) = <datetime<local>>           
    #> 2. ( <datetime<local>> , <datetime<Pacific/Auckland>> ) = <datetime<Pacific/Auckland>>
  • The common type of two data frames is the common type of each column that occurs in both data frames:

    vec_ptype_show(
      data.frame(x = FALSE), 
      data.frame(x = 1L),
      data.frame(x = 2.5)
    )
    #> Prototype: <data.frame<x:double>>
    #> 0. (                         , <data.frame<x:logical>> ) = <data.frame<x:logical>>
    #> 1. ( <data.frame<x:logical>> , <data.frame<x:integer>> ) = <data.frame<x:integer>>
    #> 2. ( <data.frame<x:integer>> , <data.frame<x:double>>  ) = <data.frame<x:double>>

    And the union of the columns that only occur in one:

    vec_ptype_show(data.frame(x = 1, y = 1), data.frame(y = 1, z = 1))
    #> Prototype: <data.frame<
    #>   x: double
    #>   y: double
    #>   z: double
    #> >>
    #> 0. ┌              , <data.frame< ┐ = <data.frame<
    #>    │                  x: double  │     x: double 
    #>    │                  y: double  │     y: double 
    #>    └                >>           ┘   >>          
    #> 1. ┌ <data.frame< , <data.frame< ┐ = <data.frame<
    #>    │   x: double      y: double  │     x: double 
    #>    │   y: double      z: double  │     y: double 
    #>    │ >>             >>           │     z: double 
    #>    └                             ┘   >>

    Note that new columns are added on the right-hand side. This is consistent with the way that factor levels and time zones are handled.

Casting to specified type

vec_ptype_common() finds the common type of a set of vector. Typically, however, what you want is a set of vectors coerced to that common type. That’s the job of vec_cast_common():

str(vec_cast_common(
  FALSE, 
  1:5, 
  2.5
))
#> List of 3
#>  $ : num 0
#>  $ : num [1:5] 1 2 3 4 5
#>  $ : num 2.5

str(vec_cast_common(
  factor("x"), 
  factor("y")
))
#> List of 2
#>  $ : Factor w/ 2 levels "x","y": 1
#>  $ : Factor w/ 2 levels "x","y": 2

str(vec_cast_common(
  data.frame(x = 1),
  data.frame(y = 1:2)
))
#> List of 2
#>  $ :'data.frame':    1 obs. of  2 variables:
#>   ..$ x: num 1
#>   ..$ y: int NA
#>  $ :'data.frame':    2 obs. of  2 variables:
#>   ..$ x: num [1:2] NA NA
#>   ..$ y: int [1:2] 1 2

Alternatively, you can cast to a specific prototype using vec_cast():

# Cast succeeds
vec_cast(c(1, 2), integer())
#> [1] 1 2

# Cast fails
vec_cast(c(1.5, 2.5), factor("a"))
#> Error:
#> ! Can't convert `c(1.5, 2.5)` <double> to <factor<4d52a>>.

If a cast is possible in general (i.e., double -> integer), but information is lost for a specific input (e.g. 1.5 -> 1), it will generate an error.

vec_cast(c(1.5, 2), integer())
#> Error:
#> ! Can't convert from `c(1.5, 2)` <double> to <integer> due to loss of precision.
#> • Locations: 1

You can suppress the lossy cast errors with allow_lossy_cast():

allow_lossy_cast(
  vec_cast(c(1.5, 2), integer())
)
#> [1] 1 2

This will suppress all lossy cast errors. Supply prototypes if you want to be specific about the type of lossy cast allowed:

allow_lossy_cast(
  vec_cast(c(1.5, 2), integer()),
  x_ptype = double(),
  to_ptype = integer()
)
#> [1] 1 2

The set of casts should not be more permissive than the set of coercions. This is not enforced but it is expected from classes to follow the rule and keep the coercion ecosystem sound.

Size

vec_size() was motivated by the need to have an invariant that describes the number of “observations” in a data structure. This is particularly important for data frames, as it’s useful to have some function such that f(data.frame(x)) equals f(x). No base function has this property:

  • length(data.frame(x)) equals 1 because the length of a data frame is the number of columns.

  • nrow(data.frame(x)) does not equal nrow(x) because nrow() of a vector is NULL.

  • NROW(data.frame(x)) equals NROW(x) for vector x, so is almost what we want. But because NROW() is defined in terms of length(), it returns a value for every object, even types that can’t go in a data frame, e.g. data.frame(mean) errors even though NROW(mean) is 1.

We define vec_size() as follows:

  • It is the length of 1d vectors.
  • It is the number of rows of data frames, matrices, and arrays.
  • It throws error for non vectors.

Given vec_size(), we can give a precise definition of a data frame: a data frame is a list of vectors where every vector has the same size. This has the desirable property of trivially supporting matrix and data frame columns.

Slicing

vec_slice() is to vec_size() as [ is to length(); i.e., it allows you to select observations regardless of the dimensionality of the underlying object. vec_slice(x, i) is equivalent to:

  • x[i] when x is a vector.
  • x[i, , drop = FALSE] when x is a data frame.
  • x[i, , , drop = FALSE] when x is a 3d array.
x <- sample(1:10)
df <- data.frame(x = x)

vec_slice(x, 5:6)
#> [1] 8 2
vec_slice(df, 5:6)
#>   x
#> 1 8
#> 2 2

vec_slice(data.frame(x), i) equals data.frame(vec_slice(x, i)) (modulo variable and row names).

Prototypes are generated with vec_slice(x, 0L); given a prototype, you can initialize a vector of given size (filled with NAs) with vec_init().

Common sizes: recycling rules

Closely related to the definition of size are the recycling rules. The recycling rules determine the size of the output when two vectors of different sizes are combined. In vctrs, the recycling rules are encoded in vec_size_common(), which gives the common size of a set of vectors:

vec_size_common(1:3, 1:3, 1:3)
#> [1] 3
vec_size_common(1:10, 1)
#> [1] 10
vec_size_common(integer(), 1)
#> [1] 0

vctrs obeys a stricter set of recycling rules than base R. Vectors of size 1 are recycled to any other size. All other size combinations will generate an error. This strictness prevents common mistakes like dest == c("IAH", "HOU")), at the cost of occasionally requiring an explicit calls to rep().

Summary of vctrs recycling rules. X indicates an error
Summary of vctrs recycling rules. X indicates an error

You can apply the recycling rules in two ways:

  • If you have a vector and desired size, use vec_recycle():

    vec_recycle(1:3, 3)
    #> [1] 1 2 3
    vec_recycle(1, 10)
    #>  [1] 1 1 1 1 1 1 1 1 1 1
  • If you have multiple vectors and you want to recycle them to the same size, use vec_recycle_common():

    vec_recycle_common(1:3, 1:3)
    #> [[1]]
    #> [1] 1 2 3
    #> 
    #> [[2]]
    #> [1] 1 2 3
    vec_recycle_common(1:10, 1)
    #> [[1]]
    #>  [1]  1  2  3  4  5  6  7  8  9 10
    #> 
    #> [[2]]
    #>  [1] 1 1 1 1 1 1 1 1 1 1

Appendix: recycling in base R

The recycling rules in base R are described in The R Language Definition but are not implemented in a single function and thus are not applied consistently. Here, I give a brief overview of their most common realisation, as well as showing some of the exceptions.

Generally, in base R, when a pair of vectors is not the same length, the shorter vector is recycled to the same length as the longer:

rep(1, 6) + 1
#> [1] 2 2 2 2 2 2
rep(1, 6) + 1:2
#> [1] 2 3 2 3 2 3
rep(1, 6) + 1:3
#> [1] 2 3 4 2 3 4

If the length of the longer vector is not an integer multiple of the length of the shorter, you usually get a warning:

invisible(pmax(1:2, 1:3))
#> Warning in pmax(1:2, 1:3): an argument will be fractionally recycled
invisible(1:2 + 1:3)
#> Warning in 1:2 + 1:3: longer object length is not a multiple of shorter object
#> length
invisible(cbind(1:2, 1:3))
#> Warning in cbind(1:2, 1:3): number of rows of result is not a multiple of
#> vector length (arg 1)

But some functions recycle silently:

length(atan2(1:3, 1:2))
#> [1] 3
length(paste(1:3, 1:2))
#> [1] 3
length(ifelse(1:3, 1:2, 1:2))
#> [1] 3

And data.frame() throws an error:

data.frame(1:2, 1:3)
#> Error in `data.frame()`:
#> ! arguments imply differing number of rows: 2, 3

The R language definition states that “any arithmetic operation involving a zero-length vector has a zero-length result”. But outside of arithmetic, this rule is not consistently followed:

# length-0 output
1:2 + integer()
#> integer(0)
atan2(1:2, integer())
#> numeric(0)
pmax(1:2, integer())
#> integer(0)

# dropped
cbind(1:2, integer())
#>      [,1]
#> [1,]    1
#> [2,]    2

# recycled to length of first
ifelse(rep(TRUE, 4), integer(), character())
#> [1] NA NA NA NA

# preserved-ish
paste(1:2, integer())
#> [1] "1 " "2 "

# Errors
data.frame(1:2, integer())
#> Error in `data.frame()`:
#> ! arguments imply differing number of rows: 2, 0
vctrs/inst/WORDLIST0000644000176200001440000000001313347722504013563 0ustar liggesusersvectorised vctrs/README.md0000644000176200001440000000507315157305416012706 0ustar liggesusers # vctrs [![Codecov test coverage](https://codecov.io/gh/r-lib/vctrs/graph/badge.svg)](https://app.codecov.io/gh/r-lib/vctrs) ![Lifecycle: maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg) [![R-CMD-check](https://github.com/r-lib/vctrs/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/vctrs/actions/workflows/R-CMD-check.yaml) There are three main goals to the vctrs package, each described in a vignette: - To propose `vec_size()` and `vec_ptype()` as alternatives to `length()` and `class()`; `vignette("type-size")`. These definitions are paired with a framework for size-recycling and type-coercion. `ptype` should evoke the notion of a prototype, i.e. the original or typical form of something. - To define size- and type-stability as desirable function properties, use them to analyse existing base functions, and to propose better alternatives; `vignette("stability")`. This work has been particularly motivated by thinking about the ideal properties of `c()`, `ifelse()`, and `rbind()`. - To provide a new `vctr` base class that makes it easy to create new S3 vectors; `vignette("s3-vector")`. vctrs provides methods for many base generics in terms of a few new vctrs generics, making implementation considerably simpler and more robust. vctrs is a developer-focussed package. Understanding and extending vctrs requires some effort from developers, but should be invisible to most users. It’s our hope that having an underlying theory will mean that users can build up an accurate mental model without explicitly learning the theory. vctrs will typically be used by other packages, making it easy for them to provide new classes of S3 vectors that are supported throughout the tidyverse (and beyond). For that reason, vctrs has few dependencies. ## Installation Install vctrs from CRAN with: ``` r install.packages("vctrs") ``` Alternatively, if you need the development version, install it with: ``` r # install.packages("pak") pak::pak("r-lib/vctrs") ``` ## Usage ``` r library(vctrs) # Sizes vec_size_common(1, 1:10) #> [1] 10 str(vec_recycle_common(1, 1:10)) #> List of 2 #> $ : num [1:10] 1 1 1 1 1 1 1 1 1 1 #> $ : int [1:10] 1 2 3 4 5 6 7 8 9 10 # Prototypes vec_ptype_common(FALSE, 1L, 2.5) #> numeric(0) str(vec_cast_common(FALSE, 1L, 2.5)) #> List of 3 #> $ : num 0 #> $ : num 1 #> $ : num 2.5 ``` vctrs/build/0000755000176200001440000000000015157322654012524 5ustar liggesusersvctrs/build/vignette.rds0000644000176200001440000000045615157322654015070 0ustar liggesusers=o0@AR*u%!pړAՈNUAl a;x!Sp q=@"dOr KAh ?% NbJȸ)Hj M׮׭˱”ɜ{*Mٚk/om_lH::_9<`\;03/2V7~$3 =`|$=N\YF4vctrs/man/0000755000176200001440000000000015157322654012200 5ustar liggesusersvctrs/man/vec_cbind_frame_ptype.Rd0000644000176200001440000000134414276722575017007 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bind.R \name{vec_cbind_frame_ptype} \alias{vec_cbind_frame_ptype} \title{Frame prototype} \usage{ vec_cbind_frame_ptype(x, ...) } \arguments{ \item{x}{A data frame.} \item{...}{These dots are for future extensions and must be empty.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} This is an experimental generic that returns zero-columns variants of a data frame. It is needed for \code{\link[=vec_cbind]{vec_cbind()}}, to work around the lack of colwise primitives in vctrs. Expect changes. } \keyword{internal} vctrs/man/df_list.Rd0000644000176200001440000000540014511524374014106 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{df_list} \alias{df_list} \title{Collect columns for data frame construction} \usage{ df_list( ..., .size = NULL, .unpack = TRUE, .name_repair = c("check_unique", "unique", "universal", "minimal", "unique_quiet", "universal_quiet"), .error_call = current_env() ) } \arguments{ \item{...}{Vectors of equal-length. When inputs are named, those names are used for names of the resulting list.} \item{.size}{The common size of vectors supplied in \code{...}. If \code{NULL}, this will be computed as the common size of the inputs.} \item{.unpack}{Should unnamed data frame inputs be unpacked? Defaults to \code{TRUE}.} \item{.name_repair}{One of \code{"check_unique"}, \code{"unique"}, \code{"universal"}, \code{"minimal"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. See \code{\link[=vec_as_names]{vec_as_names()}} for the meaning of these options.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \code{df_list()} constructs the data structure underlying a data frame, a named list of equal-length vectors. It is often used in combination with \code{\link[=new_data_frame]{new_data_frame()}} to safely and consistently create a helper function for data frame subclasses. } \section{Properties}{ \itemize{ \item Inputs are \link[=theory-faq-recycling]{recycled} to a common size with \code{\link[=vec_recycle_common]{vec_recycle_common()}}. \item With the exception of data frames, inputs are not modified in any way. Character vectors are never converted to factors, and lists are stored as-is for easy creation of list-columns. \item Unnamed data frame inputs are automatically unpacked. Named data frame inputs are stored unmodified as data frame columns. \item \code{NULL} inputs are completely ignored. \item The dots are dynamic, allowing for splicing of lists with \verb{!!!} and unquoting. } } \examples{ # `new_data_frame()` can be used to create custom data frame constructors new_fancy_df <- function(x = list(), n = NULL, ..., class = NULL) { new_data_frame(x, n = n, ..., class = c(class, "fancy_df")) } # Combine this constructor with `df_list()` to create a safe, # consistent helper function for your data frame subclass fancy_df <- function(...) { data <- df_list(...) new_fancy_df(data) } df <- fancy_df(x = 1) class(df) } \seealso{ \code{\link[=new_data_frame]{new_data_frame()}} for constructing data frame subclasses from a validated input. \code{\link[=data_frame]{data_frame()}} for a fast data frame creation helper. } vctrs/man/vec_recycle.Rd0000644000176200001440000000402214511524374014744 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/recycle.R \name{vec_recycle} \alias{vec_recycle} \alias{vec_recycle_common} \title{Vector recycling} \usage{ vec_recycle(x, size, ..., x_arg = "", call = caller_env()) vec_recycle_common(..., .size = NULL, .arg = "", .call = caller_env()) } \arguments{ \item{x}{A vector to recycle.} \item{size}{Desired output size.} \item{...}{Depending on the function used: \itemize{ \item For \code{vec_recycle_common()}, vectors to recycle. \item For \code{vec_recycle()}, these dots should be empty. }} \item{x_arg}{Argument name for \code{x}. These are used in error messages to inform the user about which argument has an incompatible size.} \item{call, .call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{.size}{Desired output size. If omitted, will use the common size from \code{\link[=vec_size_common]{vec_size_common()}}.} \item{.arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} } \description{ \code{vec_recycle(x, size)} recycles a single vector to a given size. \code{vec_recycle_common(...)} recycles multiple vectors to their common size. All functions obey the \link[=theory-faq-recycling]{vctrs recycling rules}, and will throw an error if recycling is not possible. See \code{\link[=vec_size]{vec_size()}} for the precise definition of size. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ # Inputs with 1 observation are recycled vec_recycle_common(1:5, 5) vec_recycle_common(integer(), 5) \dontrun{ vec_recycle_common(1:5, 1:2) } # Data frames and matrices are recycled along their rows vec_recycle_common(data.frame(x = 1), 1:5) vec_recycle_common(array(1:2, c(1, 2)), 1:5) vec_recycle_common(array(1:3, c(1, 3, 1)), 1:5) } vctrs/man/vec_detect_complete.Rd0000644000176200001440000000267214315060307016460 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/complete.R \name{vec_detect_complete} \alias{vec_detect_complete} \title{Complete} \usage{ vec_detect_complete(x) } \arguments{ \item{x}{A vector} } \value{ A logical vector with the same size as \code{x}. } \description{ \code{vec_detect_complete()} detects "complete" observations. An observation is considered complete if it is non-missing. For most vectors, this implies that \code{vec_detect_complete(x) == !vec_detect_missing(x)}. For data frames and matrices, a row is only considered complete if all elements of that row are non-missing. To compare, \code{!vec_detect_missing(x)} detects rows that are partially complete (they have at least one non-missing value). } \details{ A \link[=new_rcrd]{record} type vector is similar to a data frame, and is only considered complete if all fields are non-missing. } \examples{ x <- c(1, 2, NA, 4, NA) # For most vectors, this is identical to `!vec_detect_missing(x)` vec_detect_complete(x) !vec_detect_missing(x) df <- data_frame( x = x, y = c("a", "b", NA, "d", "e") ) # This returns `TRUE` where all elements of the row are non-missing. # Compare that with `!vec_detect_missing()`, which detects rows that have at # least one non-missing value. df2 <- df df2$all_non_missing <- vec_detect_complete(df) df2$any_non_missing <- !vec_detect_missing(df) df2 } \seealso{ \code{\link[stats:complete.cases]{stats::complete.cases()}} } vctrs/man/vec_default_ptype2.Rd0000644000176200001440000000431314315060307016241 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cast.R, R/type2.R \name{vec_default_cast} \alias{vec_default_cast} \alias{vec_default_ptype2} \title{Default cast and ptype2 methods} \usage{ vec_default_cast(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) vec_default_ptype2(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) } \arguments{ \item{x}{Vectors to cast.} \item{to}{Type to cast to. If \code{NULL}, \code{x} will be returned as is.} \item{...}{For \code{vec_cast_common()}, vectors to cast. For \code{vec_cast()}, \code{vec_cast_default()}, and \code{vec_restore()}, these dots are only for future extensions and should be empty.} \item{x_arg}{Argument name for \code{x}, used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{to_arg}{Argument name \code{to} used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ These functions are automatically called when no \code{\link[=vec_ptype2]{vec_ptype2()}} or \code{\link[=vec_cast]{vec_cast()}} method is implemented for a pair of types. \itemize{ \item They apply special handling if one of the inputs is of type \code{AsIs} or \code{sfc}. \item They attempt a number of fallbacks in cases where it would be too inconvenient to be strict: \itemize{ \item If the class and attributes are the same they are considered compatible. \code{vec_default_cast()} returns \code{x} in this case. \item In case of incompatible data frame classes, they fall back to \code{data.frame}. If an incompatible subclass of tibble is involved, they fall back to \code{tbl_df}. } \item Otherwise, an error is thrown with \code{\link[=stop_incompatible_type]{stop_incompatible_type()}} or \code{\link[=stop_incompatible_cast]{stop_incompatible_cast()}}. } } \keyword{internal} vctrs/man/df_ptype2.Rd0000644000176200001440000000426714315060307014361 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R, R/type-tibble.R \name{df_ptype2} \alias{df_ptype2} \alias{df_cast} \alias{tib_ptype2} \alias{tib_cast} \title{Coercion between two data frames} \usage{ df_ptype2(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) df_cast(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) tib_ptype2(x, y, ..., x_arg = "", y_arg = "", call = caller_env()) tib_cast(x, to, ..., x_arg = "", to_arg = "", call = caller_env()) } \arguments{ \item{x, y, to}{Subclasses of data frame.} \item{...}{If you call \code{df_ptype2()} or \code{df_cast()} from a \code{vec_ptype2()} or \code{vec_cast()} method, you must forward the dots passed to your method on to \code{df_ptype2()} or \code{df_cast()}.} \item{x_arg, y_arg}{Argument names for \code{x} and \code{y}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{to_arg}{Argument name \code{to} used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} } \value{ \itemize{ \item When \code{x} and \code{y} are not compatible, an error of class \code{vctrs_error_incompatible_type} is thrown. \item When \code{x} and \code{y} are compatible, \code{df_ptype2()} returns the common type as a bare data frame. \code{tib_ptype2()} returns the common type as a bare tibble. } } \description{ \code{df_ptype2()} and \code{df_cast()} are the two functions you need to call from \code{vec_ptype2()} and \code{vec_cast()} methods for data frame subclasses. See \link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}. Their main job is to determine the common type of two data frames, adding and coercing columns as needed, or throwing an incompatible type error when the columns are not compatible. } vctrs/man/vec_bind.Rd0000644000176200001440000001650515056611175014244 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bind.R \name{vec_bind} \alias{vec_bind} \alias{vec_rbind} \alias{vec_cbind} \title{Combine many data frames into one data frame} \usage{ vec_rbind( ..., .ptype = NULL, .names_to = rlang::zap(), .name_repair = c("unique", "universal", "check_unique", "unique_quiet", "universal_quiet"), .name_spec = NULL, .error_call = current_env() ) vec_cbind( ..., .ptype = NULL, .size = NULL, .name_repair = c("unique", "universal", "check_unique", "minimal", "unique_quiet", "universal_quiet"), .error_call = current_env() ) } \arguments{ \item{...}{Data frames or vectors. When the inputs are named: \itemize{ \item \code{vec_rbind()} assigns names to row names unless \code{.names_to} is supplied. In that case the names are assigned in the column defined by \code{.names_to}. \item \code{vec_cbind()} creates packed data frame columns with named inputs. } \code{NULL} inputs are silently ignored. Empty (e.g. zero row) inputs will not appear in the output, but will affect the derived \code{.ptype}.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} \item{.names_to}{This controls what to do with names on \code{...}: \itemize{ \item By default, names on \code{...} are \link[rlang:zap]{zapped} and do not appear anywhere in the output. \item If a string, specifies a column where the names on \code{...} will be copied. These names are often useful to identify rows with their original input. If a column name is supplied and \code{...} is not named, an integer column is used instead. \item If \code{NULL}, the outer names on \code{...} are instead merged with inner row names on each element of \code{...} and are subject to \code{.name_spec}. }} \item{.name_repair}{One of \code{"unique"}, \code{"universal"}, \code{"check_unique"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. See \code{\link[=vec_as_names]{vec_as_names()}} for the meaning of these options. With \code{vec_rbind()}, the repair function is applied to all inputs separately. This is because \code{vec_rbind()} needs to align their columns before binding the rows, and thus needs all inputs to have unique names. On the other hand, \code{vec_cbind()} applies the repair function after all inputs have been concatenated together in a final data frame. Hence \code{vec_cbind()} allows the more permissive minimal names repair.} \item{.name_spec}{A name specification (as documented in \code{\link[=vec_c]{vec_c()}}) for combining the outer names on \code{...} with the inner row names of each element of \code{...}. An outer name will only ever be provided when \code{.names_to} is set to \code{NULL}, which causes the outer name to be used as part of the row names rather than as a new column, but it can still be useful to hardcode this to either \code{\link[rlang:zap]{rlang::zap()}} to always ignore all names, or \code{"inner"} to always ignore outer names, regardless of \code{.names_to}.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{.size}{If, \code{NULL}, the default, will determine the number of rows in \code{vec_cbind()} output by using the tidyverse \link[=theory-faq-recycling]{recycling rules}. Alternatively, specify the desired number of rows, and any inputs of length 1 will be recycled appropriately.} } \value{ A data frame, or subclass of data frame. If \code{...} is a mix of different data frame subclasses, \code{vec_ptype2()} will be used to determine the output type. For \code{vec_rbind()}, this will determine the type of the container and the type of each column; for \code{vec_cbind()} it only determines the type of the output container. If there are no non-\code{NULL} inputs, the result will be \code{data.frame()}. } \description{ This pair of functions binds together data frames (and vectors), either row-wise or column-wise. Row-binding creates a data frame with common type across all arguments. Column-binding creates a data frame with common length across all arguments. } \section{Invariants}{ All inputs are first converted to a data frame. The conversion for 1d vectors depends on the direction of binding: \itemize{ \item For \code{vec_rbind()}, each element of the vector becomes a column in a single row. \item For \code{vec_cbind()}, each element of the vector becomes a row in a single column. } Once the inputs have all become data frames, the following invariants are observed for row-binding: \itemize{ \item \code{vec_size(vec_rbind(x, y)) == vec_size(x) + vec_size(y)} \item \code{vec_ptype(vec_rbind(x, y)) = vec_ptype_common(x, y)} } Note that if an input is an empty vector, it is first converted to a 1-row data frame with 0 columns. Despite being empty, its effective size for the total number of rows is 1. For column-binding, the following invariants apply: \itemize{ \item \code{vec_size(vec_cbind(x, y)) == vec_size_common(x, y)} \item \code{vec_ptype(vec_cbind(x, y)) == vec_cbind(vec_ptype(x), vec_ptype(x))} } } \section{Dependencies}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_init]{vec_init()}} \item \code{\link[=vec_assign]{vec_assign()}} \item \code{\link[=vec_restore]{vec_restore()}} } } \subsection{base dependencies of \code{vec_rbind()}}{ \itemize{ \item \code{\link[base:c]{base::c()}} } If columns to combine inherit from a common class, \code{vec_rbind()} falls back to \code{base::c()} if there exists a \code{c()} method implemented for this class hierarchy. } } \examples{ # row binding ----------------------------------------- # common columns are coerced to common class vec_rbind( data.frame(x = 1), data.frame(x = FALSE) ) # unique columns are filled with NAs vec_rbind( data.frame(x = 1), data.frame(y = "x") ) # null inputs are ignored vec_rbind( data.frame(x = 1), NULL, data.frame(x = 2) ) # bare vectors are treated as rows vec_rbind( c(x = 1, y = 2), c(x = 3) ) # default names will be supplied if arguments are not named vec_rbind( 1:2, 1:3, 1:4 ) # column binding -------------------------------------- # each input is recycled to have common length vec_cbind( data.frame(x = 1), data.frame(y = 1:3) ) # bare vectors are treated as columns vec_cbind( data.frame(x = 1), y = letters[1:3] ) # if you supply a named data frame, it is packed in a single column data <- vec_cbind( x = data.frame(a = 1, b = 2), y = 1 ) data # Packed data frames are nested in a single column. This makes it # possible to access it through a single name: data$x # since the base print method is suboptimal with packed data # frames, it is recommended to use tibble to work with these: if (rlang::is_installed("tibble")) { vec_cbind(x = tibble::tibble(a = 1, b = 2), y = 1) } # duplicate names are flagged vec_cbind(x = 1, x = 2) } \seealso{ \code{\link[=vec_c]{vec_c()}} for combining 1d vectors. } vctrs/man/vec_match.Rd0000644000176200001440000000474715154276515014435 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_match} \alias{vec_match} \alias{vec_in} \title{Find matching observations across vectors} \usage{ vec_match( needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "" ) vec_in( needles, haystack, ..., na_equal = TRUE, needles_arg = "", haystack_arg = "" ) } \arguments{ \item{needles, haystack}{Vector of \code{needles} to search for in vector haystack. \code{haystack} should usually be unique; if not \code{vec_match()} will only return the location of the first match. \code{needles} and \code{haystack} are coerced to the same type prior to comparison.} \item{...}{These dots are for future extensions and must be empty.} \item{na_equal}{If \code{TRUE}, missing values in \code{needles} can be matched to missing values in \code{haystack}. If \code{FALSE}, they propagate, missing values in \code{needles} are represented as \code{NA} in the return value.} \item{needles_arg, haystack_arg}{Argument tags for \code{needles} and \code{haystack} used in error messages.} } \value{ A vector the same length as \code{needles}. \code{vec_in()} returns a logical vector; \code{vec_match()} returns an integer vector. } \description{ \code{vec_in()} returns a logical vector based on whether \code{needle} is found in haystack. \code{vec_match()} returns an integer vector giving location of \code{needle} in \code{haystack}, or \code{NA} if it's not found. } \details{ \code{vec_in()} is equivalent to \link[base:match]{base::\%in\%}; \code{vec_match()} is equivalent to \code{\link[base:match]{base::match()}}. } \section{Missing values}{ In most cases places in R, missing values are not considered to be equal, i.e. \code{NA == NA} is not \code{TRUE}. The exception is in matching functions like \code{\link[base:match]{base::match()}} and \code{\link[=merge]{merge()}}, where an \code{NA} will match another \code{NA}. By \code{vec_match()} and \code{vec_in()} will match \code{NA}s; but you can control this behaviour with the \code{na_equal} argument. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ hadley <- strsplit("hadley", "")[[1]] vec_match(hadley, letters) vowels <- c("a", "e", "i", "o", "u") vec_match(hadley, vowels) vec_in(hadley, vowels) # Only the first index of duplicates is returned vec_match(c("a", "b"), c("a", "b", "a", "b")) } vctrs/man/vec_assert.Rd0000644000176200001440000000764415154276515014641 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/assert.R \name{vec_assert} \alias{vec_assert} \alias{vec_is} \title{Assert an argument has known prototype and/or size} \usage{ vec_assert( x, ptype = NULL, size = NULL, arg = caller_arg(x), call = caller_env() ) vec_is(x, ptype = NULL, size = NULL) } \arguments{ \item{x}{A vector argument to check.} \item{ptype}{Prototype to compare against. If the prototype has a class, its \code{\link[=vec_ptype]{vec_ptype()}} is compared to that of \code{x} with \code{identical()}. Otherwise, its \code{\link[=typeof]{typeof()}} is compared to that of \code{x} with \code{==}.} \item{size}{A single integer size against which to compare.} \item{arg}{Name of argument being checked. This is used in error messages. The label of the expression passed as \code{x} is taken as default.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ \code{vec_is()} returns \code{TRUE} or \code{FALSE}. \code{vec_assert()} either throws a typed error (see section on error types) or returns \code{x}, invisibly. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#questioning}{\figure{lifecycle-questioning.svg}{options: alt='[Questioning]'}}}{\strong{[Questioning]}} \itemize{ \item \code{vec_is()} is a predicate that checks if its input is a vector that conforms to a prototype and/or a size. \item \code{vec_assert()} throws an error when the input is not a vector or doesn't conform. } } \section{Error types}{ \code{vec_is()} never throws. \code{vec_assert()} throws the following errors: \itemize{ \item If the input is not a vector, an error of class \code{"vctrs_error_scalar_type"} is raised. \item If the prototype doesn't match, an error of class \code{"vctrs_error_assert_ptype"} is raised. \item If the size doesn't match, an error of class \code{"vctrs_error_assert_size"} is raised. } Both errors inherit from \code{"vctrs_error_assert"}. } \section{Lifecycle}{ Both \code{vec_is()} and \code{vec_assert()} are questioning because their \code{ptype} arguments have semantics that are challenging to define clearly and are rarely useful. \itemize{ \item Use \code{\link[=obj_is_vector]{obj_is_vector()}} or \code{\link[=obj_check_vector]{obj_check_vector()}} for vector checks \item Use \code{\link[=vec_check_size]{vec_check_size()}} for size checks \item Use \code{\link[=vec_cast]{vec_cast()}}, \code{\link[=inherits]{inherits()}}, or simple type predicates like \code{\link[rlang:type-predicates]{rlang::is_logical()}} for specific type checks } } \section{Vectors and scalars}{ Informally, a vector is a collection that makes sense to use as column in a data frame. The following rules define whether or not \code{x} is considered a vector. If no \code{\link[=vec_proxy]{vec_proxy()}} method has been registered, \code{x} is a vector if: \itemize{ \item The \link[=typeof]{base type} of the object is atomic: \code{"logical"}, \code{"integer"}, \code{"double"}, \code{"complex"}, \code{"character"}, or \code{"raw"}. \item \code{x} is a list, as defined by \code{\link[=obj_is_list]{obj_is_list()}}. \item \code{x} is a \link{data.frame}. } If a \code{vec_proxy()} method has been registered, \code{x} is a vector if: \itemize{ \item The proxy satisfies one of the above conditions. \item The base type of the proxy is \code{"list"}, regardless of its class. S3 lists are thus treated as scalars unless they implement a \code{vec_proxy()} method. } Otherwise an object is treated as scalar and cannot be used as a vector. In particular: \itemize{ \item \code{NULL} is not a vector. \item S3 lists like \code{lm} objects are treated as scalars by default. \item Objects of type \link{expression} are not treated as vectors. } } \keyword{internal} vctrs/man/internal-faq-ptype2-identity.Rd0000644000176200001440000001207615113325071020112 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-internal.R \name{internal-faq-ptype2-identity} \alias{internal-faq-ptype2-identity} \title{Internal FAQ - \code{vec_ptype2()}, \code{NULL}, and unspecified vectors} \description{ \subsection{Promotion monoid}{ Promotions (i.e. automatic coercions) should always transform inputs to their richer type to avoid losing values of precision. \code{vec_ptype2()} returns the \emph{richer} type of two vectors, or throws an incompatible type error if none of the two vector types include the other. For example, the richer type of integer and double is the latter because double covers a larger range of values than integer. \code{vec_ptype2()} is a \href{https://en.wikipedia.org/wiki/Monoid}{monoid} over vectors, which in practical terms means that it is a well behaved operation for \href{https://purrr.tidyverse.org/reference/reduce.html}{reduction}. Reduction is an important operation for promotions because that is how the richer type of multiple elements is computed. As a monoid, \code{vec_ptype2()} needs an identity element, i.e. a value that doesn’t change the result of the reduction. vctrs has two identity values, \code{NULL} and \strong{unspecified} vectors. } \subsection{The \code{NULL} identity}{ As an identity element that shouldn’t influence the determination of the common type of a set of vectors, \code{NULL} is promoted to any type: \if{html}{\out{
}}\preformatted{vec_ptype2(NULL, "") #> character(0) vec_ptype2(1L, NULL) #> integer(0) }\if{html}{\out{
}} The common type of \code{NULL} and \code{NULL} is the identity \code{NULL}: \if{html}{\out{
}}\preformatted{vec_ptype2(NULL, NULL) #> NULL }\if{html}{\out{
}} This way the result of \code{vec_ptype2(NULL, NULL)} does not influence subsequent promotions: \if{html}{\out{
}}\preformatted{vec_ptype2( vec_ptype2(NULL, NULL), "" ) #> character(0) }\if{html}{\out{
}} } \subsection{Unspecified vectors}{ In the vctrs coercion system, logical vectors of missing values are also automatically promoted to the type of any other vector, just like \code{NULL}. We call these vectors unspecified. The special coercion semantics of unspecified vectors serve two purposes: \enumerate{ \item It makes it possible to assign vectors of \code{NA} inside any type of vectors, even when they are not coercible with logical: \if{html}{\out{
}}\preformatted{x <- letters[1:5] vec_assign(x, 1:2, c(NA, NA)) #> [1] NA NA "c" "d" "e" }\if{html}{\out{
}} \item We can’t put \code{NULL} in a data frame, so we need an identity element that behaves more like a vector. Logical vectors of \code{NA} seem a natural fit for this. } Unspecified vectors are thus promoted to any other type, just like \code{NULL}: \if{html}{\out{
}}\preformatted{vec_ptype2(NA, "") #> character(0) vec_ptype2(1L, c(NA, NA)) #> integer(0) }\if{html}{\out{
}} } \subsection{Finalising common types}{ vctrs has an internal vector type of class \code{vctrs_unspecified}. Users normally don’t see such vectors in the wild, but they do come up when taking the common type of an unspecified vector with another identity value: \if{html}{\out{
}}\preformatted{vec_ptype2(NA, NA) #> [0] vec_ptype2(NA, NULL) #> [0] vec_ptype2(NULL, NA) #> [0] }\if{html}{\out{
}} We can’t return \code{NA} here because \code{vec_ptype2()} normally returns empty vectors. We also can’t return \code{NULL} because unspecified vectors need to be recognised as logical vectors if they haven’t been promoted at the end of the reduction. \if{html}{\out{
}}\preformatted{vec_ptype_finalise(vec_ptype2(NULL, NA)) #> logical(0) }\if{html}{\out{
}} See the output of \code{vec_ptype_common()} which performs the reduction and finalises the type, ready to be used by the caller: \if{html}{\out{
}}\preformatted{vec_ptype_common(NULL, NULL) #> NULL vec_ptype_common(NA, NULL) #> logical(0) }\if{html}{\out{
}} \code{vec_ptype_finalise()} is an S3 generic, but the only time you should ever need to write an S3 method for it is if your class \emph{wraps} another vector in some way and needs special handling to propagate the default finalisation. For example, the ivs package contains an interval class that wraps \code{start} and \code{end} vectors of the same type and has a \code{vec_ptype_finalize()} method that finalises those wrapped vectors: \if{html}{\out{
}}\preformatted{vec_ptype_finalise.ivs_iv <- function(x, ...) \{ start <- unclass(x)[[1L]] ptype <- vec_ptype_finalise(start, ...) new_bare_iv(ptype, ptype) \} }\if{html}{\out{
}} This ensures that \code{vec_ptype_finalise(vec_ptype(ivs::iv(NA, NA)))} correctly finalises to \verb{>} rather than \verb{>}. Note that data frames are already recursively finalised, so you don’t need a \code{vec_ptype_finalise()} method for a data frame subclass. } } vctrs/man/vec_as_names.Rd0000644000176200001440000001551714362266120015113 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{vec_as_names} \alias{vec_as_names} \title{Retrieve and repair names} \usage{ vec_as_names( names, ..., repair = c("minimal", "unique", "universal", "check_unique", "unique_quiet", "universal_quiet"), repair_arg = NULL, quiet = FALSE, call = caller_env() ) } \arguments{ \item{names}{A character vector.} \item{...}{These dots are for future extensions and must be empty.} \item{repair}{Either a string or a function. If a string, it must be one of \code{"check_unique"}, \code{"minimal"}, \code{"unique"}, \code{"universal"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. If a function, it is invoked with a vector of minimal names and must return minimal names, otherwise an error is thrown. \itemize{ \item Minimal names are never \code{NULL} or \code{NA}. When an element doesn't have a name, its minimal name is an empty string. \item Unique names are unique. A suffix is appended to duplicate names to make them unique. \item Universal names are unique and syntactic, meaning that you can safely use the names as variables without causing a syntax error. } The \code{"check_unique"} option doesn't perform any name repair. Instead, an error is raised if the names don't suit the \code{"unique"} criteria. The options \code{"unique_quiet"} and \code{"universal_quiet"} are here to help the user who calls this function indirectly, via another function which exposes \code{repair} but not \code{quiet}. Specifying \code{repair = "unique_quiet"} is like specifying \verb{repair = "unique", quiet = TRUE}. When the \code{"*_quiet"} options are used, any setting of \code{quiet} is silently overridden.} \item{repair_arg}{If specified and \code{repair = "check_unique"}, any errors will include a hint to set the \code{repair_arg}.} \item{quiet}{By default, the user is informed of any renaming caused by repairing the names. This only concerns unique and universal repairing. Set \code{quiet} to \code{TRUE} to silence the messages. Users can silence the name repair messages by setting the \code{"rlib_name_repair_verbosity"} global option to \code{"quiet"}.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \code{vec_as_names()} takes a character vector of names and repairs it according to the \code{repair} argument. It is the r-lib and tidyverse equivalent of \code{\link[base:make.names]{base::make.names()}}. vctrs deals with a few levels of name repair: \itemize{ \item \code{minimal} names exist. The \code{names} attribute is not \code{NULL}. The name of an unnamed element is \code{""} and never \code{NA}. For instance, \code{vec_as_names()} always returns minimal names and data frames created by the tibble package have names that are, at least, \code{minimal}. \item \code{unique} names are \code{minimal}, have no duplicates, and can be used where a variable name is expected. Empty names, \code{...}, and \code{..} followed by a sequence of digits are banned. \itemize{ \item All columns can be accessed by name via \code{df[["name"]]} and \code{df$`name` } and \code{with(df, `name`)}. } \item \code{universal} names are \code{unique} and syntactic (see Details for more). \itemize{ \item Names work everywhere, without quoting: \code{df$name} and \code{with(df, name)} and \code{lm(name1 ~ name2, data = df)} and \code{dplyr::select(df, name)} all work. } } \code{universal} implies \code{unique}, \code{unique} implies \code{minimal}. These levels are nested. } \section{\code{minimal} names}{ \code{minimal} names exist. The \code{names} attribute is not \code{NULL}. The name of an unnamed element is \code{""} and never \code{NA}. Examples: \if{html}{\out{
}}\preformatted{Original names of a vector with length 3: NULL minimal names: "" "" "" Original names: "x" NA minimal names: "x" "" }\if{html}{\out{
}} } \section{\code{unique} names}{ \code{unique} names are \code{minimal}, have no duplicates, and can be used (possibly with backticks) in contexts where a variable is expected. Empty names, \code{...}, and \code{..} followed by a sequence of digits are banned. If a data frame has \code{unique} names, you can index it by name, and also access the columns by name. In particular, \code{df[["name"]]} and \code{df$`name`} and also \code{with(df, `name`)} always work. There are many ways to make names \code{unique}. We append a suffix of the form \code{...j} to any name that is \code{""} or a duplicate, where \code{j} is the position. We also change \code{..#} and \code{...} to \code{...#}. Example: \if{html}{\out{
}}\preformatted{Original names: "" "x" "" "y" "x" "..2" "..." unique names: "...1" "x...2" "...3" "y" "x...5" "...6" "...7" }\if{html}{\out{
}} Pre-existing suffixes of the form \code{...j} are always stripped, prior to making names \code{unique}, i.e. reconstructing the suffixes. If this interacts poorly with your names, you should take control of name repair. } \section{\code{universal} names}{ \code{universal} names are \code{unique} and syntactic, meaning they: \itemize{ \item Are never empty (inherited from \code{unique}). \item Have no duplicates (inherited from \code{unique}). \item Are not \code{...}. Do not have the form \code{..i}, where \code{i} is a number (inherited from \code{unique}). \item Consist of letters, numbers, and the dot \code{.} or underscore \verb{_} characters. \item Start with a letter or start with the dot \code{.} not followed by a number. \item Are not a \link{reserved} word, e.g., \code{if} or \code{function} or \code{TRUE}. } If a vector has \code{universal} names, variable names can be used "as is" in code. They work well with nonstandard evaluation, e.g., \code{df$name} works. vctrs has a different method of making names syntactic than \code{\link[base:make.names]{base::make.names()}}. In general, vctrs prepends one or more dots \code{.} until the name is syntactic. Examples: \if{html}{\out{
}}\preformatted{ Original names: "" "x" NA "x" universal names: "...1" "x...2" "...3" "x...4" Original names: "(y)" "_z" ".2fa" "FALSE" universal names: ".y." "._z" "..2fa" ".FALSE" }\if{html}{\out{
}} } \examples{ # By default, `vec_as_names()` returns minimal names: vec_as_names(c(NA, NA, "foo")) # You can make them unique: vec_as_names(c(NA, NA, "foo"), repair = "unique") # Universal repairing fixes any non-syntactic name: vec_as_names(c("_foo", "+"), repair = "universal") } \seealso{ \code{\link[rlang:names2]{rlang::names2()}} returns the names of an object, after making them \code{minimal}. } vctrs/man/list_drop_empty.Rd0000644000176200001440000000116314315060307015672 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/empty.R \name{list_drop_empty} \alias{list_drop_empty} \title{Drop empty elements from a list} \usage{ list_drop_empty(x) } \arguments{ \item{x}{A list.} } \description{ \code{list_drop_empty()} removes empty elements from a list. This includes \code{NULL} elements along with empty vectors, like \code{integer(0)}. This is equivalent to, but faster than, \code{vec_slice(x, list_sizes(x) != 0L)}. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ x <- list(1, NULL, integer(), 2) list_drop_empty(x) } vctrs/man/vec_group.Rd0000644000176200001440000000442315047425317014461 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group.R \name{vec_group} \alias{vec_group} \alias{vec_group_id} \alias{vec_group_loc} \alias{vec_group_rle} \title{Identify groups} \usage{ vec_group_id(x) vec_group_loc(x) vec_group_rle(x) } \arguments{ \item{x}{A vector} } \value{ \itemize{ \item \code{vec_group_id()}: An integer vector with the same size as \code{x}. \item \code{vec_group_loc()}: A two column data frame with size equal to \code{vec_size(vec_unique(x))}. \itemize{ \item A \code{key} column of type \code{vec_ptype(x)} \item A \code{loc} column of type list, with elements of type integer. } \item \code{vec_group_rle()}: A \code{vctrs_group_rle} rcrd object with two integer vector fields: \code{group} and \code{length}. } Note that when using \code{vec_group_loc()} for complex types, the default \code{data.frame} print method will be suboptimal, and you will want to coerce into a tibble to better understand the output. } \description{ \itemize{ \item \code{vec_group_id()} returns an identifier for the group that each element of \code{x} falls in, constructed in the order that they appear. The number of groups is also returned as an attribute, \code{n}. \item \code{vec_group_loc()} returns a data frame containing a \code{key} column with the unique groups, and a \code{loc} column with the locations of each group in \code{x}. \item \code{vec_group_rle()} locates groups in \code{x} and returns them run length encoded in the order that they appear. The return value is a rcrd object with fields for the \code{group} identifiers and the run \code{length} of the corresponding group. The number of groups is also returned as an attribute, \code{n}. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ purrr <- c("p", "u", "r", "r", "r") vec_group_id(purrr) vec_group_rle(purrr) groups <- mtcars[c("vs", "am")] vec_group_id(groups) group_rle <- vec_group_rle(groups) group_rle # Access fields with `field()` field(group_rle, "group") field(group_rle, "length") # `vec_group_id()` is equivalent to vec_match(groups, vec_unique(groups)) vec_group_loc(mtcars$vs) vec_group_loc(mtcars[c("vs", "am")]) if (require("tibble")) { as_tibble(vec_group_loc(mtcars[c("vs", "am")])) } } \keyword{internal} vctrs/man/theory-faq-recycling.Rd0000644000176200001440000000521714511524374016524 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{theory-faq-recycling} \alias{theory-faq-recycling} \alias{vector_recycling_rules} \title{FAQ - How does recycling work in vctrs and the tidyverse?} \description{ Recycling describes the concept of repeating elements of one vector to match the size of another. There are two rules that underlie the “tidyverse” recycling rules: \itemize{ \item Vectors of size 1 will be recycled to the size of any other vector \item Otherwise, all vectors must have the same size } } \section{Examples}{ Vectors of size 1 are recycled to the size of any other vector: \if{html}{\out{
}}\preformatted{tibble(x = 1:3, y = 1L) #> # A tibble: 3 x 2 #> x y #> #> 1 1 1 #> 2 2 1 #> 3 3 1 }\if{html}{\out{
}} This includes vectors of size 0: \if{html}{\out{
}}\preformatted{tibble(x = integer(), y = 1L) #> # A tibble: 0 x 2 #> # i 2 variables: x , y }\if{html}{\out{
}} If vectors aren’t size 1, they must all be the same size. Otherwise, an error is thrown: \if{html}{\out{
}}\preformatted{tibble(x = 1:3, y = 4:7) #> Error in `tibble()`: #> ! Tibble columns must have compatible sizes. #> * Size 3: Existing data. #> * Size 4: Column `y`. #> i Only values of size one are recycled. }\if{html}{\out{
}} } \section{vctrs backend}{ Packages in r-lib and the tidyverse generally use \code{\link[=vec_size_common]{vec_size_common()}} and \code{\link[=vec_recycle_common]{vec_recycle_common()}} as the backends for handling recycling rules. \itemize{ \item \code{vec_size_common()} returns the common size of multiple vectors, after applying the recycling rules \item \code{vec_recycle_common()} goes one step further, and actually recycles the vectors to their common size } \if{html}{\out{
}}\preformatted{vec_size_common(1:3, "x") #> [1] 3 vec_recycle_common(1:3, "x") #> [[1]] #> [1] 1 2 3 #> #> [[2]] #> [1] "x" "x" "x" vec_size_common(1:3, c("x", "y")) #> Error: #> ! Can't recycle `..1` (size 3) to match `..2` (size 2). }\if{html}{\out{
}} } \section{Base R recycling rules}{ The recycling rules described here are stricter than the ones generally used by base R, which are: \itemize{ \item If any vector is length 0, the output will be length 0 \item Otherwise, the output will be length \code{max(length_x, length_y)}, and a warning will be thrown if the length of the longer vector is not an integer multiple of the length of the shorter vector. } We explore the base R rules in detail in \code{vignette("type-size")}. } vctrs/man/vctrs-conditions.Rd0000644000176200001440000000564414512002263015771 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/conditions.R \name{vctrs-conditions} \alias{vctrs-conditions} \alias{stop_incompatible_type} \alias{stop_incompatible_cast} \alias{stop_incompatible_op} \alias{stop_incompatible_size} \alias{allow_lossy_cast} \title{Custom conditions for vctrs package} \usage{ stop_incompatible_type( x, y, ..., x_arg, y_arg, action = c("combine", "convert"), details = NULL, message = NULL, class = NULL, call = caller_env() ) stop_incompatible_cast( x, to, ..., x_arg, to_arg, details = NULL, message = NULL, class = NULL, call = caller_env() ) stop_incompatible_op( op, x, y, details = NULL, ..., message = NULL, class = NULL, call = caller_env() ) stop_incompatible_size( x, y, x_size, y_size, ..., x_arg, y_arg, details = NULL, message = NULL, class = NULL, call = caller_env() ) allow_lossy_cast(expr, x_ptype = NULL, to_ptype = NULL) } \arguments{ \item{x, y, to}{Vectors} \item{..., class}{Only use these fields when creating a subclass.} \item{x_arg, y_arg, to_arg}{Argument names for \code{x}, \code{y}, and \code{to}. Used in error messages to inform the user about the locations of incompatible types.} \item{action}{An option to customize the incompatible type message depending on the context. Errors thrown from \code{\link[=vec_ptype2]{vec_ptype2()}} use \code{"combine"} and those thrown from \code{\link[=vec_cast]{vec_cast()}} use \code{"convert"}.} \item{details}{Any additional human readable details.} \item{message}{An overriding message for the error. \code{details} and \code{message} are mutually exclusive, supplying both is an error.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{x_ptype, to_ptype}{Suppress only the casting errors where \code{x} or \code{to} match these \link[=vec_ptype]{prototypes}.} } \value{ \verb{stop_incompatible_*()} unconditionally raise an error of class \code{"vctrs_error_incompatible_*"} and \code{"vctrs_error_incompatible"}. } \description{ These functions are called for their side effect of raising errors and warnings. These conditions have custom classes and structures to make testing easier. } \examples{ # Most of the time, `maybe_lossy_cast()` returns its input normally: maybe_lossy_cast( c("foo", "bar"), NA, "", lossy = c(FALSE, FALSE), x_arg = "", to_arg = "" ) # If `lossy` has any `TRUE`, an error is thrown: try(maybe_lossy_cast( c("foo", "bar"), NA, "", lossy = c(FALSE, TRUE), x_arg = "", to_arg = "" )) # Unless lossy casts are allowed: allow_lossy_cast( maybe_lossy_cast( c("foo", "bar"), NA, "", lossy = c(FALSE, TRUE), x_arg = "", to_arg = "" ) ) } \keyword{internal} vctrs/man/vec_if_else.Rd0000644000176200001440000000545715065005761014740 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/if-else.R \name{vec_if_else} \alias{vec_if_else} \title{Vectorized if-else} \usage{ vec_if_else( condition, true, false, ..., missing = NULL, ptype = NULL, condition_arg = "condition", true_arg = "true", false_arg = "false", missing_arg = "missing", error_call = current_env() ) } \arguments{ \item{condition}{A logical vector.} \item{true, false}{Vectors to use for \code{TRUE} and \code{FALSE} values of \code{condition}. Both \code{true} and \code{false} will be \link[=theory-faq-recycling]{recycled} to the size of \code{condition}. \code{true}, \code{false}, and \code{missing} (if used) will be cast to their common type.} \item{...}{These dots are for future extensions and must be empty.} \item{missing}{If not \code{NULL}, will be used as the value for \code{NA} values of \code{condition}. Follows the same size and type rules as \code{true} and \code{false}.} \item{ptype}{An optional prototype declaring the desired output type. If supplied, this overrides the common type of \code{true}, \code{false}, and \code{missing}.} \item{condition_arg, true_arg, false_arg, missing_arg}{Argument names used in error messages.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A vector with the same size as \code{condition} and the same type as the common type of \code{true}, \code{false}, and \code{missing}. Where \code{condition} is \code{TRUE}, the matching values from \code{true}, where it is \code{FALSE}, the matching values from \code{false}, and where it is \code{NA}, the matching values from \code{missing}, if provided, otherwise a missing value will be used. } \description{ \code{vec_if_else()} is a vectorized \link[=if]{if-else}. Compared to the base R equivalent, \code{\link[=ifelse]{ifelse()}}, this function allows you to handle missing values in the \code{condition} with \code{missing} and always takes \code{true}, \code{false}, and \code{missing} into account when determining what the output type should be. } \examples{ x <- c(-5:5, NA) vec_if_else(x < 0, NA, x) # Explicitly handle `NA` values in the `condition` with `missing` vec_if_else(x < 0, "negative", "positive", missing = "missing") # Unlike `ifelse()`, `vec_if_else()` preserves types x <- factor(sample(letters[1:5], 10, replace = TRUE)) ifelse(x \%in\% c("a", "b", "c"), x, NA) vec_if_else(x \%in\% c("a", "b", "c"), x, NA) # `vec_if_else()` also works with data frames condition <- c(TRUE, FALSE, NA, TRUE) true <- data_frame(x = 1:4, y = 5:8) false <- data_frame(x = 9:12, y = 13:16) vec_if_else(condition, true, false) } vctrs/man/vec_cast.Rd0000644000176200001440000001050214315060307014241 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cast.R, R/type-bare.R \name{vec_cast} \alias{vec_cast} \alias{vec_cast_common} \alias{vec_cast.logical} \alias{vec_cast.integer} \alias{vec_cast.double} \alias{vec_cast.complex} \alias{vec_cast.raw} \alias{vec_cast.character} \alias{vec_cast.list} \title{Cast a vector to a specified type} \usage{ vec_cast(x, to, ..., x_arg = caller_arg(x), to_arg = "", call = caller_env()) vec_cast_common(..., .to = NULL, .arg = "", .call = caller_env()) \method{vec_cast}{logical}(x, to, ...) \method{vec_cast}{integer}(x, to, ...) \method{vec_cast}{double}(x, to, ...) \method{vec_cast}{complex}(x, to, ...) \method{vec_cast}{raw}(x, to, ...) \method{vec_cast}{character}(x, to, ...) \method{vec_cast}{list}(x, to, ...) } \arguments{ \item{x}{Vectors to cast.} \item{to, .to}{Type to cast to. If \code{NULL}, \code{x} will be returned as is.} \item{...}{For \code{vec_cast_common()}, vectors to cast. For \code{vec_cast()}, \code{vec_cast_default()}, and \code{vec_restore()}, these dots are only for future extensions and should be empty.} \item{x_arg}{Argument name for \code{x}, used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{to_arg}{Argument name \code{to} used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{call, .call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{.arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} } \value{ A vector the same length as \code{x} with the same type as \code{to}, or an error if the cast is not possible. An error is generated if information is lost when casting between compatible types (i.e. when there is no 1-to-1 mapping for a specific value). } \description{ \code{vec_cast()} provides directional conversions from one type of vector to another. Along with \code{\link[=vec_ptype2]{vec_ptype2()}}, this generic forms the foundation of type coercions in vctrs. } \section{Implementing coercion methods}{ \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } } \section{Dependencies of \code{vec_cast_common()}}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} } } \subsection{base dependencies}{ Some functions enable a base-class fallback for \code{vec_cast_common()}. In that case the inputs are deemed compatible when they have the same \link[base:typeof]{base type} and inherit from the same base class. } } \examples{ # x is a double, but no information is lost vec_cast(1, integer()) # When information is lost the cast fails try(vec_cast(c(1, 1.5), integer())) try(vec_cast(c(1, 2), logical())) # You can suppress this error and get the partial results allow_lossy_cast(vec_cast(c(1, 1.5), integer())) allow_lossy_cast(vec_cast(c(1, 2), logical())) # By default this suppress all lossy cast errors without # distinction, but you can be specific about what cast is allowed # by supplying prototypes allow_lossy_cast(vec_cast(c(1, 1.5), integer()), to_ptype = integer()) try(allow_lossy_cast(vec_cast(c(1, 2), logical()), to_ptype = integer())) # No sensible coercion is possible so an error is generated try(vec_cast(1.5, factor("a"))) # Cast to common type vec_cast_common(factor("a"), factor(c("a", "b"))) } \seealso{ Call \code{\link[=stop_incompatible_cast]{stop_incompatible_cast()}} when you determine from the attributes that an input can't be cast to the target type. } vctrs/man/vector-checks.Rd0000644000176200001440000001117415075743736015242 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/assert.R \name{vector-checks} \alias{vector-checks} \alias{obj_is_vector} \alias{obj_check_vector} \alias{vec_check_size} \alias{vec_check_recyclable} \title{Vector checks} \usage{ obj_is_vector(x) obj_check_vector(x, ..., arg = caller_arg(x), call = caller_env()) vec_check_size(x, size, ..., arg = caller_arg(x), call = caller_env()) vec_check_recyclable(x, size, ..., arg = caller_arg(x), call = caller_env()) } \arguments{ \item{x}{For \verb{obj_*()} functions, an object. For \verb{vec_*()} functions, a vector.} \item{...}{These dots are for future extensions and must be empty.} \item{arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{size}{The size to check for compatibility with.} } \value{ \itemize{ \item \code{obj_is_vector()} returns a single \code{TRUE} or \code{FALSE}. \item \code{obj_check_vector()} returns \code{NULL} invisibly, or errors. \item \code{vec_check_size()} returns \code{NULL} invisibly, or errors. \item \code{vec_check_recyclable()} returns \code{NULL} invisibly, or errors. } } \description{ \itemize{ \item \code{obj_is_vector()} tests if \code{x} is considered a vector in the vctrs sense. See \emph{Vectors and scalars} below for the exact details. \item \code{obj_check_vector()} uses \code{obj_is_vector()} and throws a standardized and informative error if it returns \code{FALSE}. \item \code{vec_check_size()} tests if \code{x} has size \code{size}, and throws an informative error if it doesn't. \item \code{vec_check_recyclable()} tests if \code{x} can recycle to size \code{size}, and throws an informative error if it can't. } } \section{Vectors and scalars}{ Informally, a vector is a collection that makes sense to use as column in a data frame. The following rules define whether or not \code{x} is considered a vector. If no \code{\link[=vec_proxy]{vec_proxy()}} method has been registered, \code{x} is a vector if: \itemize{ \item The \link[=typeof]{base type} of the object is atomic: \code{"logical"}, \code{"integer"}, \code{"double"}, \code{"complex"}, \code{"character"}, or \code{"raw"}. \item \code{x} is a list, as defined by \code{\link[=obj_is_list]{obj_is_list()}}. \item \code{x} is a \link{data.frame}. } If a \code{vec_proxy()} method has been registered, \code{x} is a vector if: \itemize{ \item The proxy satisfies one of the above conditions. \item The base type of the proxy is \code{"list"}, regardless of its class. S3 lists are thus treated as scalars unless they implement a \code{vec_proxy()} method. } Otherwise an object is treated as scalar and cannot be used as a vector. In particular: \itemize{ \item \code{NULL} is not a vector. \item S3 lists like \code{lm} objects are treated as scalars by default. \item Objects of type \link{expression} are not treated as vectors. } } \section{Technical limitations}{ \itemize{ \item Support for S4 vectors is currently limited to objects that inherit from an atomic type. \item Subclasses of \link{data.frame} that \emph{append} their class to the back of the \code{"class"} attribute are not treated as vectors. If you inherit from an S3 class, always prepend your class to the front of the \code{"class"} attribute for correct dispatch. This matches our general principle of allowing subclasses but not mixins. } } \examples{ obj_is_vector(1) # Data frames are vectors obj_is_vector(data_frame()) # Bare lists are vectors obj_is_vector(list()) # S3 lists are vectors if they explicitly inherit from `"list"` x <- structure(list(), class = c("my_list", "list")) obj_is_list(x) obj_is_vector(x) # But if they don't explicitly inherit from `"list"`, they aren't # automatically considered to be vectors. Instead, vctrs considers this # to be a scalar object, like a linear model returned from `lm()`. y <- structure(list(), class = "my_list") obj_is_list(y) obj_is_vector(y) # `obj_check_vector()` throws an informative error if the input # isn't a vector try(obj_check_vector(y)) # `vec_check_size()` throws an informative error if the size of the # input doesn't match `size` vec_check_size(1:5, size = 5) try(vec_check_size(1:5, size = 4)) # `vec_check_recyclable()` throws an informative error if the input can't # recycle to size `size` vec_check_recyclable(1:5, size = 5) vec_check_recyclable(1, size = 5) try(vec_check_recyclable(1:2, size = 5)) } vctrs/man/vec_compare.Rd0000644000176200001440000000254214315060307014742 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compare.R \name{vec_compare} \alias{vec_compare} \title{Compare two vectors} \usage{ vec_compare(x, y, na_equal = FALSE, .ptype = NULL) } \arguments{ \item{x, y}{Vectors with compatible types and lengths.} \item{na_equal}{Should \code{NA} values be considered equal?} \item{.ptype}{Override to optionally specify common type} } \value{ An integer vector with values -1 for \code{x < y}, 0 if \code{x == y}, and 1 if \code{x > y}. If \code{na_equal} is \code{FALSE}, the result will be \code{NA} if either \code{x} or \code{y} is \code{NA}. } \description{ Compare two vectors } \section{S3 dispatch}{ \code{vec_compare()} is not generic for performance; instead it uses \code{\link[=vec_proxy_compare]{vec_proxy_compare()}} to create a proxy that is used in the comparison. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_recycle_common]{vec_recycle_common()}} \item \code{\link[=vec_proxy_compare]{vec_proxy_compare()}} } } \examples{ vec_compare(c(TRUE, FALSE, NA), FALSE) vec_compare(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) vec_compare(1:10, 5) vec_compare(runif(10), 0.5) vec_compare(letters[1:10], "d") df <- data.frame(x = c(1, 1, 1, 2), y = c(0, 1, 2, 1)) vec_compare(df, data.frame(x = 1, y = 1)) } vctrs/man/name_spec.Rd0000644000176200001440000000671415056104732014422 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{name_spec} \alias{name_spec} \title{Name specifications} \arguments{ \item{name_spec, .name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item \code{"inner"}, in which case outer names are ignored, and inner names are used if they exist. Note that outer names may still be used to provide informative error messages. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} } \description{ A name specification describes how to combine an inner and outer names. This sort of name combination arises when concatenating vectors or flattening lists. There are two possible cases: \itemize{ \item Named vector: \if{html}{\out{
}}\preformatted{vec_c(outer = c(inner1 = 1, inner2 = 2)) }\if{html}{\out{
}} \item Unnamed vector: \if{html}{\out{
}}\preformatted{vec_c(outer = 1:2) }\if{html}{\out{
}} } In r-lib and tidyverse packages, these cases are errors by default, because there's no behaviour that works well for every case. Instead, you can provide a name specification that describes how to combine the inner and outer names of inputs. Name specifications can refer to: \itemize{ \item \code{outer}: The external name recycled to the size of the input vector. \item \code{inner}: Either the names of the input vector, or a sequence of integer from 1 to the size of the vector if it is unnamed. } } \examples{ # By default, named inputs must be length 1: vec_c(name = 1) # ok try(vec_c(name = 1:3)) # bad # They also can't have internal names, even if scalar: try(vec_c(name = c(internal = 1))) # bad # Pass a name specification to work around this. A specification # can be a glue string referring to `outer` and `inner`: vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}") vec_c(name = 1:3, other = 4:5, .name_spec = "{outer}_{inner}") # They can also be functions: my_spec <- function(outer, inner) paste(outer, inner, sep = "_") vec_c(name = 1:3, other = 4:5, .name_spec = my_spec) # Or purrr-style formulas for anonymous functions: vec_c(name = 1:3, other = 4:5, .name_spec = ~ paste0(.x, .y)) # Or the string `"inner"` to only use inner names vec_c(name = 1:3, outer = 4:5, .name_spec = "inner") vec_c(name = c(a = 1, b = 2, c = 3), outer = 4:5, .name_spec = "inner") # This can be useful when you want outer names mentioned in error messages, # but you don't want them interfering with the result try(vec_c(x = c(a = 1), y = c(b = "2"), .name_spec = "inner")) # Or `rlang::zap()` to ignore both outer and inner names entirely vec_c(name = c(a = 1, b = 2), outer = c(c = 3), .name_spec = rlang::zap()) } vctrs/man/vec_proxy.Rd0000644000176200001440000001357414315060307014504 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/proxy.R \name{vec_proxy} \alias{vec_proxy} \alias{vec_restore} \title{Proxy and restore} \usage{ vec_proxy(x, ...) vec_restore(x, to, ...) } \arguments{ \item{x}{A vector.} \item{...}{These dots are for future extensions and must be empty.} \item{to}{The original vector to restore to.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{vec_proxy()} returns the data structure containing the values of a vector. This data structure is usually the vector itself. In this case the proxy is the \link[base:identity]{identity function}, which is the default \code{vec_proxy()} method. Only experts should implement special \code{vec_proxy()} methods, for these cases: \itemize{ \item A vector has vectorised attributes, i.e. metadata for each element of the vector. These \emph{record types} are implemented in vctrs by returning a data frame in the proxy method. If you're starting your class from scratch, consider deriving from the \code{\link[=new_rcrd]{rcrd}} class. It implements the appropriate data frame proxy and is generally the preferred way to create a record class. \item When you're implementing a vector on top of a non-vector type, like an environment or an S4 object. This is currently only partially supported. \item S3 lists are considered scalars by default. This is the safe choice for list objects such as returned by \code{stats::lm()}. To declare that your S3 list class is a vector, you normally add \code{"list"} to the right of your class vector. Explicit inheritance from list is generally the preferred way to declare an S3 list in R, for instance it makes it possible to dispatch on \code{generic.list} S3 methods. If you can't modify your class vector, you can implement an identity proxy (i.e. a proxy method that just returns its input) to let vctrs know this is a vector list and not a scalar. } \code{vec_restore()} is the inverse operation of \code{vec_proxy()}. It should only be called on vector proxies. \itemize{ \item It undoes the transformations of \code{vec_proxy()}. \item It restores attributes and classes. These may be lost when the memory values are manipulated. For example slicing a subset of a vector's proxy causes a new proxy to be allocated. } By default vctrs restores all attributes and classes automatically. You only need to implement a \code{vec_restore()} method if your class has attributes that depend on the data. } \section{Proxying}{ You should only implement \code{vec_proxy()} when your type is designed around a non-vector class. I.e. anything that is not either: \itemize{ \item An atomic vector \item A bare list \item A data frame } In this case, implement \code{vec_proxy()} to return such a vector class. The vctrs operations such as \code{\link[=vec_slice]{vec_slice()}} are applied on the proxy and \code{vec_restore()} is called to restore the original representation of your type. The most common case where you need to implement \code{vec_proxy()} is for S3 lists. In vctrs, S3 lists are treated as scalars by default. This way we don't treat objects like model fits as vectors. To prevent vctrs from treating your S3 list as a scalar, unclass it in the \code{vec_proxy()} method. For instance, here is the definition for \code{list_of}: \if{html}{\out{
}}\preformatted{vec_proxy.vctrs_list_of <- function(x) \{ unclass(x) \} }\if{html}{\out{
}} Another case where you need to implement a proxy is \link[=new_rcrd]{record types}. Record types should return a data frame, as in the \code{POSIXlt} method: \if{html}{\out{
}}\preformatted{vec_proxy.POSIXlt <- function(x) \{ new_data_frame(unclass(x)) \} }\if{html}{\out{
}} Note that you don't need to implement \code{vec_proxy()} when your class inherits from \code{vctrs_vctr} or \code{vctrs_rcrd}. } \section{Restoring}{ A restore is a specialised type of cast, primarily used in conjunction with \code{NextMethod()} or a C-level function that works on the underlying data structure. A \code{vec_restore()} method can make the following assumptions about \code{x}: \itemize{ \item It has the correct type. \item It has the correct names. \item It has the correct \code{dim} and \code{dimnames} attributes. \item It is unclassed. This way you can call vctrs generics with \code{x} without triggering an infinite loop of restoration. } The length may be different (for example after \code{\link[=vec_slice]{vec_slice()}} has been called), and all other attributes may have been lost. The method should restore all attributes so that after restoration, \code{vec_restore(vec_data(x), x)} yields \code{x}. To understand the difference between \code{vec_cast()} and \code{vec_restore()} think about factors: it doesn't make sense to cast an integer to a factor, but if \code{NextMethod()} or another low-level function has stripped attributes, you still need to be able to restore them. The default method copies across all attributes so you only need to provide your own method if your attributes require special care (i.e. they are dependent on the data in some way). When implementing your own method, bear in mind that many R users add attributes to track additional metadata that is important to them, so you should preserve any attributes that don't require special handling for your class. } \section{Dependencies}{ \itemize{ \item \code{x} must be a vector in the vctrs sense (see \code{\link[=vec_is]{vec_is()}}) \item By default the underlying data is returned as is (identity proxy) } All vector classes have a proxy, even those who don't implement any vctrs methods. The exception is S3 lists that don't inherit from \code{"list"} explicitly. These might have to implement an identity proxy for compatibility with vctrs (see discussion above). } \keyword{internal} vctrs/man/vec_names.Rd0000644000176200001440000000675414362266120014433 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{vec_names} \alias{vec_names} \alias{vec_names2} \alias{vec_set_names} \title{Get or set the names of a vector} \usage{ vec_names2( x, ..., repair = c("minimal", "unique", "universal", "check_unique", "unique_quiet", "universal_quiet"), quiet = FALSE ) vec_names(x) vec_set_names(x, names) } \arguments{ \item{x}{A vector with names} \item{...}{These dots are for future extensions and must be empty.} \item{repair}{Either a string or a function. If a string, it must be one of \code{"check_unique"}, \code{"minimal"}, \code{"unique"}, \code{"universal"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. If a function, it is invoked with a vector of minimal names and must return minimal names, otherwise an error is thrown. \itemize{ \item Minimal names are never \code{NULL} or \code{NA}. When an element doesn't have a name, its minimal name is an empty string. \item Unique names are unique. A suffix is appended to duplicate names to make them unique. \item Universal names are unique and syntactic, meaning that you can safely use the names as variables without causing a syntax error. } The \code{"check_unique"} option doesn't perform any name repair. Instead, an error is raised if the names don't suit the \code{"unique"} criteria. The options \code{"unique_quiet"} and \code{"universal_quiet"} are here to help the user who calls this function indirectly, via another function which exposes \code{repair} but not \code{quiet}. Specifying \code{repair = "unique_quiet"} is like specifying \verb{repair = "unique", quiet = TRUE}. When the \code{"*_quiet"} options are used, any setting of \code{quiet} is silently overridden.} \item{quiet}{By default, the user is informed of any renaming caused by repairing the names. This only concerns unique and universal repairing. Set \code{quiet} to \code{TRUE} to silence the messages. Users can silence the name repair messages by setting the \code{"rlib_name_repair_verbosity"} global option to \code{"quiet"}.} \item{names}{A character vector, or \code{NULL}.} } \value{ \code{vec_names2()} returns the names of \code{x}, repaired. \code{vec_names()} returns the names of \code{x} or \code{NULL} if unnamed. \code{vec_set_names()} returns \code{x} with names updated. } \description{ These functions work like \code{\link[rlang:names2]{rlang::names2()}}, \code{\link[=names]{names()}} and \code{\link[=names<-]{names<-()}}, except that they return or modify the the rowwise names of the vector. These are: \itemize{ \item The usual \code{names()} for atomic vectors and lists \item The row names for data frames and matrices \item The names of the first dimension for arrays Rowwise names are size consistent: the length of the names always equals \code{\link[=vec_size]{vec_size()}}. } \code{vec_names2()} returns the repaired names from a vector, even if it is unnamed. See \code{\link[=vec_as_names]{vec_as_names()}} for details on name repair. \code{vec_names()} is a bare-bones version that returns \code{NULL} if the vector is unnamed. \code{vec_set_names()} sets the names or removes them. } \examples{ vec_names2(1:3) vec_names2(1:3, repair = "unique") vec_names2(c(a = 1, b = 2)) # `vec_names()` consistently returns the rowwise names of data frames and arrays: vec_names(data.frame(a = 1, b = 2)) names(data.frame(a = 1, b = 2)) vec_names(mtcars) names(mtcars) vec_names(Titanic) names(Titanic) vec_set_names(1:3, letters[1:3]) vec_set_names(data.frame(a = 1:3), letters[1:3]) } vctrs/man/vctrs-unspecified.Rd0000644000176200001440000000753515120272011016113 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-unspecified.R \name{vctrs-unspecified} \alias{vctrs-unspecified} \alias{unspecified} \alias{vec_ptype_finalise} \title{Unspecified vectors and prototype finalisation} \usage{ unspecified(n = 0) vec_ptype_finalise(x, ...) } \arguments{ \item{n}{Length of vector} \item{x}{A \code{ptype} to finalize, typically a result of \code{\link[=vec_ptype]{vec_ptype()}}, \code{\link[=vec_ptype2]{vec_ptype2()}}, or \code{\link[=vec_ptype_common]{vec_ptype_common(.finalise = FALSE)}}.} \item{...}{These dots are for future extensions and must be empty.} } \description{ \code{unspecified()} is the underlying type used to represent logical vectors that only contain \code{NA}. These require special handling because we want to allow logical \code{NA} to specify missingness that can be cast to any other type. In vctrs, the \verb{} type is considered \emph{unfinalised} and is not suitable for use in most vctrs functions that take a \code{ptype} argument, like \code{\link[=vec_c]{vec_c()}}. The purpose of \code{vec_ptype_finalise()} is to finalise any \verb{} types into \verb{} after common type determination has been completed. \code{\link[=vec_ptype]{vec_ptype()}} and \code{\link[=vec_ptype2]{vec_ptype2()}} return \emph{unfinalised} types, and will convert a logical vector of \code{NA} into an empty \verb{} type that can combine with any other type. It is unlikely that you will call these yourself, but, if you do, you'll need to manually finalise with \code{vec_ptype_finalise()} to take care of any \verb{} types. \code{\link[=vec_ptype_common]{vec_ptype_common()}} uses both \code{\link[=vec_ptype]{vec_ptype()}} and \code{\link[=vec_ptype2]{vec_ptype2()}} to compute the common type, but typically returns a \emph{finalised} type for immediate usage in other vctrs functions. You can optionally skip finalisation by setting \code{.finalise = FALSE}, in which case \code{vec_ptype_common()} can return \verb{} and you'll need to manually call \code{vec_ptype_finalise()} yourself. \code{vec_ptype_finalise()} is an S3 generic, but it is extremely rare to need to write an S3 method for this. Data frames (and data frame subclasses) are already recursively finalised by the default method. The only time you may need to write an S3 method for \code{vec_ptype_finalise()} is if your class \emph{wraps} an arbitrary vector that has the potential to be a logical vector containing only \code{NA}s. See \code{ivs::iv()} for an example of this, which wraps arbitrary \code{start} and \code{end} vectors of the same type into a single interval vector class. } \examples{ # Returns `unspecified()` vec_ptype(NA) vec_ptype(c(NA, NA)) # We've chosen to make this return `logical()`, but this is admittedly # ambiguous, as it could be seen as "an empty vector of `NA`s" that could # also be treated as unspecified. vec_ptype(logical()) # These return `unspecified()` vec_ptype2(NA, NA) vec_ptype2(NA, NULL) vec_ptype2(NULL, NA) # An unspecified vector can combine with any other type vec_ptype2(NA, "x") vec_ptype2("x", NA) # Same as using `unspecified()` directly vec_ptype2(unspecified(1), "x") vec_ptype2("x", unspecified(1)) # Finalising a ptype turns unspecified back to logical vec_ptype(NA) vec_ptype_finalise(vec_ptype(NA)) # This works recursively over data frames df <- data_frame(x = NA, y = data_frame(z = NA)) vec_ptype_show(vec_ptype(df)) vec_ptype_show(vec_ptype_finalise(vec_ptype(df))) # `vec_ptype_common()` finalises automatically rather than returning an # unspecified type vec_ptype_common(NA) vec_ptype_common(NA, NA) vec_ptype_show(vec_ptype_common(df)) # `vec_ptype_common()` lets you opt out of finalisation using `.finalise` vec_ptype_common(NA, .finalise = FALSE) vec_ptype_show(vec_ptype_common(df, .finalise = FALSE)) } \keyword{internal} vctrs/man/vec_ptype_full.Rd0000644000176200001440000000257414276722575015526 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ptype-abbr-full.R \name{vec_ptype_full} \alias{vec_ptype_full} \alias{vec_ptype_abbr} \title{Vector type as a string} \usage{ vec_ptype_full(x, ...) vec_ptype_abbr(x, ..., prefix_named = FALSE, suffix_shape = TRUE) } \arguments{ \item{x}{A vector.} \item{...}{These dots are for future extensions and must be empty.} \item{prefix_named}{If \code{TRUE}, add a prefix for named vectors.} \item{suffix_shape}{If \code{TRUE} (the default), append the shape of the vector.} } \value{ A string. } \description{ \code{vec_ptype_full()} displays the full type of the vector. \code{vec_ptype_abbr()} provides an abbreviated summary suitable for use in a column heading. } \section{S3 dispatch}{ The default method for \code{vec_ptype_full()} uses the first element of the class vector. Override this method if your class has parameters that should be prominently displayed. The default method for \code{vec_ptype_abbr()} \code{\link[=abbreviate]{abbreviate()}}s \code{vec_ptype_full()} to 8 characters. You should almost always override, aiming for 4-6 characters where possible. These arguments are handled by the generic and not passed to methods: \itemize{ \item \code{prefix_named} \item \code{suffix_shape} } } \examples{ cat(vec_ptype_full(1:10)) cat(vec_ptype_full(iris)) cat(vec_ptype_abbr(1:10)) } \keyword{internal} vctrs/man/vec_ptype2.Rd0000644000176200001440000000565414315060307014546 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-bare.R, R/type2.R \name{vec_ptype2.logical} \alias{vec_ptype2.logical} \alias{vec_ptype2.integer} \alias{vec_ptype2.double} \alias{vec_ptype2.complex} \alias{vec_ptype2.character} \alias{vec_ptype2.raw} \alias{vec_ptype2.list} \alias{vec_ptype2} \title{Find the common type for a pair of vectors} \usage{ \method{vec_ptype2}{logical}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{integer}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{double}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{complex}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{character}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{raw}(x, y, ..., x_arg = "", y_arg = "") \method{vec_ptype2}{list}(x, y, ..., x_arg = "", y_arg = "") vec_ptype2( x, y, ..., x_arg = caller_arg(x), y_arg = caller_arg(y), call = caller_env() ) } \arguments{ \item{x, y}{Vector types.} \item{...}{These dots are for future extensions and must be empty.} \item{x_arg, y_arg}{Argument names for \code{x} and \code{y}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \code{vec_ptype2()} defines the coercion hierarchy for a set of related vector types. Along with \code{\link[=vec_cast]{vec_cast()}}, this generic forms the foundation of type coercions in vctrs. \code{vec_ptype2()} is relevant when you are implementing vctrs methods for your class, but it should not usually be called directly. If you need to find the common type of a set of inputs, call \code{\link[=vec_ptype_common]{vec_ptype_common()}} instead. This function supports multiple inputs and \link[=vec_ptype_finalise]{finalises} the common type. } \section{Implementing coercion methods}{ \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_ptype]{vec_ptype()}} is applied to \code{x} and \code{y} } } \seealso{ \code{\link[=stop_incompatible_type]{stop_incompatible_type()}} when you determine from the attributes that an input can't be cast to the target type. } vctrs/man/vec_seq_along.Rd0000644000176200001440000000135513505165544015276 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/size.R \name{vec_seq_along} \alias{vec_seq_along} \alias{vec_init_along} \title{Useful sequences} \usage{ vec_seq_along(x) vec_init_along(x, y = x) } \arguments{ \item{x, y}{Vectors} } \value{ \itemize{ \item \code{vec_seq_along()} an integer vector with the same size as \code{x}. \item \code{vec_init_along()} a vector with the same type as \code{x} and the same size as \code{y}. } } \description{ \code{vec_seq_along()} is equivalent to \code{\link[=seq_along]{seq_along()}} but uses size, not length. \code{vec_init_along()} creates a vector of missing values with size matching an existing object. } \examples{ vec_seq_along(mtcars) vec_init_along(head(mtcars)) } vctrs/man/vec_data.Rd0000644000176200001440000000231214276722575014242 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/proxy.R \name{vec_data} \alias{vec_data} \title{Extract underlying data} \usage{ vec_data(x) } \arguments{ \item{x}{A vector or object implementing \code{vec_proxy()}.} } \value{ The data underlying \code{x}, free from any attributes except the names. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Extract the data underlying an S3 vector object, i.e. the underlying (named) atomic vector, data frame, or list. } \section{Difference with \code{vec_proxy()}}{ \itemize{ \item \code{vec_data()} returns unstructured data. The only attributes preserved are names, dims, and dimnames. Currently, due to the underlying memory architecture of R, this creates a full copy of the data for atomic vectors. \item \code{vec_proxy()} may return structured data. This generic is the main customisation point for accessing memory values in vctrs, along with \code{\link[=vec_restore]{vec_restore()}}. Methods must return a vector type. Records and data frames will be processed rowwise. } } \keyword{internal} vctrs/man/new_vctr.Rd0000644000176200001440000000704214401377400014307 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-vctr.R \name{new_vctr} \alias{new_vctr} \alias{vctr} \title{vctr (vector) S3 class} \usage{ new_vctr(.data, ..., class = character(), inherit_base_type = NULL) } \arguments{ \item{.data}{Foundation of class. Must be a vector} \item{...}{Name-value pairs defining attributes} \item{class}{Name of subclass.} \item{inherit_base_type}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} A single logical, or \code{NULL}. Does this class extend the base type of \code{.data}? i.e. does the resulting object extend the behaviour of the underlying type? Defaults to \code{FALSE} for all types except lists, which are required to inherit from the base type.} } \description{ This abstract class provides a set of useful default methods that makes it considerably easier to get started with a new S3 vector class. See \code{vignette("s3-vector")} to learn how to use it to create your own S3 vector classes. } \details{ List vctrs are special cases. When created through \code{new_vctr()}, the resulting list vctr should always be recognized as a list by \code{obj_is_list()}. Because of this, if \code{inherit_base_type} is \code{FALSE} an error is thrown. } \section{Base methods}{ The vctr class provides methods for many base generics using a smaller set of generics defined by this package. Generally, you should think carefully before overriding any of the methods that vctrs implements for you as they've been carefully planned to be internally consistent. \itemize{ \item \code{[[} and \code{[} use \code{NextMethod()} dispatch to the underlying base function, then restore attributes with \code{vec_restore()}. \code{rep()} and \verb{length<-} work similarly. \item \verb{[[<-} and \verb{[<-} cast \code{value} to same type as \code{x}, then call \code{NextMethod()}. \item \code{as.logical()}, \code{as.integer()}, \code{as.numeric()}, \code{as.character()}, \code{as.Date()} and \code{as.POSIXct()} methods call \code{vec_cast()}. The \code{as.list()} method calls \code{[[} repeatedly, and the \code{as.data.frame()} method uses a standard technique to wrap a vector in a data frame. \item \code{as.factor()}, \code{as.ordered()} and \code{as.difftime()} are not generic functions in base R, but have been reimplemented as generics in the \code{generics} package. \code{vctrs} extends these and calls \code{vec_cast()}. To inherit this behaviour in a package, import and re-export the generic of interest from \code{generics}. \item \code{==}, \code{!=}, \code{unique()}, \code{anyDuplicated()}, and \code{is.na()} use \code{\link[=vec_proxy]{vec_proxy()}}. \item \code{<}, \code{<=}, \code{>=}, \code{>}, \code{min()}, \code{max()}, \code{range()}, \code{median()}, \code{quantile()}, and \code{xtfrm()} methods use \code{\link[=vec_proxy_compare]{vec_proxy_compare()}}. \item \code{+}, \code{-}, \code{/}, \code{*}, \code{^}, \code{\%\%}, \code{\%/\%}, \code{!}, \code{&}, and \code{|} operators use \code{\link[=vec_arith]{vec_arith()}}. \item Mathematical operations including the Summary group generics (\code{prod()}, \code{sum()}, \code{any()}, \code{all()}), the Math group generics (\code{abs()}, \code{sign()}, etc), \code{mean()}, \code{is.nan()}, \code{is.finite()}, and \code{is.infinite()} use \code{\link[=vec_math]{vec_math()}}. \item \code{dims()}, \verb{dims<-}, \code{dimnames()}, \verb{dimnames<-}, \code{levels()}, and \verb{levels<-} methods throw errors. } } \keyword{internal} vctrs/man/runs.Rd0000644000176200001440000000346714363556517013475 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/runs.R \name{runs} \alias{runs} \alias{vec_identify_runs} \alias{vec_run_sizes} \title{Runs} \usage{ vec_identify_runs(x) vec_run_sizes(x) } \arguments{ \item{x}{A vector.} } \value{ \itemize{ \item For \code{vec_identify_runs()}, an integer vector with the same size as \code{x}. A scalar integer attribute, \code{n}, is attached. \item For \code{vec_run_sizes()}, an integer vector with size equal to the number of runs in \code{x}. } } \description{ \itemize{ \item \code{vec_identify_runs()} returns a vector of identifiers for the elements of \code{x} that indicate which run of repeated values they fall in. The number of runs is also returned as an attribute, \code{n}. \item \code{vec_run_sizes()} returns an integer vector corresponding to the size of each run. This is identical to the \code{times} column from \code{vec_unrep()}, but is faster if you don't need the run keys. \item \code{\link[=vec_unrep]{vec_unrep()}} is a generalized \code{\link[base:rle]{base::rle()}}. It is documented alongside the "repeat" functions of \code{\link[=vec_rep]{vec_rep()}} and \code{\link[=vec_rep_each]{vec_rep_each()}}; look there for more information. } } \details{ Unlike \code{\link[base:rle]{base::rle()}}, adjacent missing values are considered identical when constructing runs. For example, \code{vec_identify_runs(c(NA, NA))} will return \code{c(1, 1)}, not \code{c(1, 2)}. } \examples{ x <- c("a", "z", "z", "c", "a", "a") vec_identify_runs(x) vec_run_sizes(x) vec_unrep(x) y <- c(1, 1, 1, 2, 2, 3) # With multiple columns, the runs are constructed rowwise df <- data_frame( x = x, y = y ) vec_identify_runs(df) vec_run_sizes(df) vec_unrep(df) } \seealso{ \code{\link[=vec_unrep]{vec_unrep()}} for a generalized \code{\link[base:rle]{base::rle()}}. } vctrs/man/faq-error-incompatible-attributes.Rd0000644000176200001440000000233714276722575021231 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq.R \name{faq-error-incompatible-attributes} \alias{faq-error-incompatible-attributes} \title{FAQ - Error/Warning: Some attributes are incompatible} \description{ This error occurs when \code{\link[=vec_ptype2]{vec_ptype2()}} or \code{\link[=vec_cast]{vec_cast()}} are supplied vectors of the same classes with different attributes. In this case, vctrs doesn't know how to combine the inputs. To fix this error, the maintainer of the class should implement self-to-self coercion methods for \code{\link[=vec_ptype2]{vec_ptype2()}} and \code{\link[=vec_cast]{vec_cast()}}. } \section{Implementing coercion methods}{ \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } } vctrs/man/howto-faq-coercion.Rd0000644000176200001440000002271514511323761016173 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{howto-faq-coercion} \alias{howto-faq-coercion} \title{FAQ - How to implement ptype2 and cast methods?} \description{ This guide illustrates how to implement \code{vec_ptype2()} and \code{vec_cast()} methods for existing classes. Related topics: \itemize{ \item For an overview of how these generics work and their roles in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")} } \subsection{The natural number class}{ We’ll illustrate how to implement coercion methods with a simple class that represents natural numbers. In this scenario we have an existing class that already features a constructor and methods for \code{print()} and subset. \if{html}{\out{
}}\preformatted{#' @export new_natural <- function(x) \{ if (is.numeric(x) || is.logical(x)) \{ stopifnot(is_whole(x)) x <- as.integer(x) \} else \{ stop("Can't construct natural from unknown type.") \} structure(x, class = "my_natural") \} is_whole <- function(x) \{ all(x \%\% 1 == 0 | is.na(x)) \} #' @export print.my_natural <- function(x, ...) \{ cat("\n") x <- unclass(x) NextMethod() \} #' @export `[.my_natural` <- function(x, i, ...) \{ new_natural(NextMethod()) \} }\if{html}{\out{
}} \if{html}{\out{
}}\preformatted{new_natural(1:3) #> #> [1] 1 2 3 new_natural(c(1, NA)) #> #> [1] 1 NA }\if{html}{\out{
}} } \subsection{Roxygen workflow}{ To implement methods for generics, first import the generics in your namespace and redocument: \if{html}{\out{
}}\preformatted{#' @importFrom vctrs vec_ptype2 vec_cast NULL }\if{html}{\out{
}} Note that for each batches of methods that you add to your package, you need to export the methods and redocument immediately, even during development. Otherwise they won’t be in scope when you run unit tests e.g. with testthat. Implementing double dispatch methods is very similar to implementing regular S3 methods. In these examples we are using roxygen2 tags to register the methods, but you can also register the methods manually in your NAMESPACE file or lazily with \code{s3_register()}. } \subsection{Implementing \code{vec_ptype2()}}{ \subsection{The self-self method}{ The first method to implement is the one that signals that your class is compatible with itself: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.my_natural <- function(x, y, ...) \{ x \} vec_ptype2(new_natural(1), new_natural(2:3)) #> #> integer(0) }\if{html}{\out{
}} \code{vec_ptype2()} implements a fallback to try and be compatible with simple classes, so it may seem that you don’t need to implement the self-self coercion method. However, you must implement it explicitly because this is how vctrs knows that a class that is implementing vctrs methods (for instance this disable fallbacks to \code{base::c()}). Also, it makes your class a bit more efficient. } \subsection{The parent and children methods}{ Our natural number class is conceptually a parent of \verb{} and a child of \verb{}, but the class is not compatible with logical, integer, or double vectors yet: \if{html}{\out{
}}\preformatted{vec_ptype2(TRUE, new_natural(2:3)) #> Error: #> ! Can't combine `TRUE` and `new_natural(2:3)` . vec_ptype2(new_natural(1), 2:3) #> Error: #> ! Can't combine `new_natural(1)` and `2:3` . }\if{html}{\out{
}} We’ll specify the twin methods for each of these classes, returning the richer class in each case. \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.logical <- function(x, y, ...) \{ # The order of the classes in the method name follows the order of # the arguments in the function signature, so `x` is the natural # number and `y` is the logical x \} #' @export vec_ptype2.logical.my_natural <- function(x, y, ...) \{ # In this case `y` is the richer natural number y \} }\if{html}{\out{
}} Between a natural number and an integer, the latter is the richer class: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.integer <- function(x, y, ...) \{ y \} #' @export vec_ptype2.integer.my_natural <- function(x, y, ...) \{ x \} }\if{html}{\out{
}} We no longer get common type errors for logical and integer: \if{html}{\out{
}}\preformatted{vec_ptype2(TRUE, new_natural(2:3)) #> #> integer(0) vec_ptype2(new_natural(1), 2:3) #> integer(0) }\if{html}{\out{
}} We are not done yet. Pairwise coercion methods must be implemented for all the connected nodes in the coercion hierarchy, which include double vectors further up. The coercion methods for grand-parent types must be implemented separately: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_natural.double <- function(x, y, ...) \{ y \} #' @export vec_ptype2.double.my_natural <- function(x, y, ...) \{ x \} }\if{html}{\out{
}} } \subsection{Incompatible attributes}{ Most of the time, inputs are incompatible because they have different classes for which no \code{vec_ptype2()} method is implemented. More rarely, inputs could be incompatible because of their attributes. In that case incompatibility is signalled by calling \code{stop_incompatible_type()}. In the following example, we implement a self-self ptype2 method for a hypothetical subclass of \verb{} that has stricter combination semantics. The method throws an error when the levels of the two factors are not compatible. \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_strict_factor.my_strict_factor <- function(x, y, ..., x_arg = "", y_arg = "") \{ if (!setequal(levels(x), levels(y))) \{ stop_incompatible_type(x, y, x_arg = x_arg, y_arg = y_arg) \} x \} }\if{html}{\out{
}} Note how the methods need to take \code{x_arg} and \code{y_arg} parameters and pass them on to \code{stop_incompatible_type()}. These argument tags help create more informative error messages when the common type determination is for a column of a data frame. They are part of the generic signature but can usually be left out if not used. } } \subsection{Implementing \code{vec_cast()}}{ Corresponding \code{vec_cast()} methods must be implemented for all \code{vec_ptype2()} methods. The general pattern is to convert the argument \code{x} to the type of \code{to}. The methods should validate the values in \code{x} and make sure they conform to the values of \code{to}. Please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents \code{to}, whereas the second class represents \code{x}. The self-self method is easy in this case, it just returns the target input: \if{html}{\out{
}}\preformatted{#' @export vec_cast.my_natural.my_natural <- function(x, to, ...) \{ x \} }\if{html}{\out{
}} The other types need to be validated. We perform input validation in the \code{new_natural()} constructor, so that’s a good fit for our \code{vec_cast()} implementations. \if{html}{\out{
}}\preformatted{#' @export vec_cast.my_natural.logical <- function(x, to, ...) \{ # The order of the classes in the method name is in reverse order # of the arguments in the function signature, so `to` is the natural # number and `x` is the logical new_natural(x) \} vec_cast.my_natural.integer <- function(x, to, ...) \{ new_natural(x) \} vec_cast.my_natural.double <- function(x, to, ...) \{ new_natural(x) \} }\if{html}{\out{
}} With these methods, vctrs is now able to combine logical and natural vectors. It properly returns the richer type of the two, a natural vector: \if{html}{\out{
}}\preformatted{vec_c(TRUE, new_natural(1), FALSE) #> #> [1] 1 1 0 }\if{html}{\out{
}} Because we haven’t implemented conversions \emph{from} natural, it still doesn’t know how to combine natural with the richer integer and double types: \if{html}{\out{
}}\preformatted{vec_c(new_natural(1), 10L) #> Error in `vec_c()`: #> ! Can't convert `..1` to . vec_c(1.5, new_natural(1)) #> Error in `vec_c()`: #> ! Can't convert `..2` to . }\if{html}{\out{
}} This is quick work which completes the implementation of coercion methods for vctrs: \if{html}{\out{
}}\preformatted{#' @export vec_cast.logical.my_natural <- function(x, to, ...) \{ # In this case `to` is the logical and `x` is the natural number attributes(x) <- NULL as.logical(x) \} #' @export vec_cast.integer.my_natural <- function(x, to, ...) \{ attributes(x) <- NULL as.integer(x) \} #' @export vec_cast.double.my_natural <- function(x, to, ...) \{ attributes(x) <- NULL as.double(x) \} }\if{html}{\out{
}} And we now get the expected combinations. \if{html}{\out{
}}\preformatted{vec_c(new_natural(1), 10L) #> [1] 1 10 vec_c(1.5, new_natural(1)) #> [1] 1.5 1.0 }\if{html}{\out{
}} } } vctrs/man/vec_chop.Rd0000644000176200001440000000465115072256373014263 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice-chop.R \name{vec_chop} \alias{vec_chop} \title{Chopping} \usage{ vec_chop(x, ..., indices = NULL, sizes = NULL) } \arguments{ \item{x}{A vector} \item{...}{These dots are for future extensions and must be empty.} \item{indices}{A list of positive integer vectors to slice \code{x} with, or \code{NULL}. Can't be used if \code{sizes} is already specified. If both \code{indices} and \code{sizes} are \code{NULL}, \code{x} is split into its individual elements, equivalent to using an \code{indices} of \code{as.list(vec_seq_along(x))}.} \item{sizes}{An integer vector of non-negative sizes representing sequential indices to slice \code{x} with, or \code{NULL}. Can't be used if \code{indices} is already specified. For example, \code{sizes = c(2, 4)} is equivalent to \code{indices = list(1:2, 3:6)}, but is typically faster. \code{sum(sizes)} must be equal to \code{vec_size(x)}, i.e. \code{sizes} must completely partition \code{x}, but an individual size is allowed to be \code{0}.} } \value{ A list where each element has the same type as \code{x}. The size of the list is equal to \code{vec_size(indices)}, \code{vec_size(sizes)}, or \code{vec_size(x)} depending on whether or not \code{indices} or \code{sizes} is provided. } \description{ \code{vec_chop()} provides an efficient method to repeatedly slice a vector. It captures the pattern of \code{map(indices, vec_slice, x = x)}. When no indices are supplied, it is generally equivalent to \code{\link[=as.list]{as.list()}}. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ vec_chop(1:5) # These two are equivalent vec_chop(1:5, indices = list(1:2, 3:5)) vec_chop(1:5, sizes = c(2, 3)) # Can also be used on data frames vec_chop(mtcars, indices = list(1:3, 4:6)) # If you know your input is sorted and you'd like to split on the groups, # `vec_run_sizes()` can be efficiently combined with `sizes` df <- data_frame( g = c(2, 5, 5, 6, 6, 6, 6, 8, 9, 9), x = 1:10 ) vec_chop(df, sizes = vec_run_sizes(df$g)) # If you have a list of homogeneous vectors, sometimes it can be useful to # combine, apply a function to the flattened vector, and chop according # to the original indices. This can be done efficiently with `list_sizes()`. x <- list(c(1, 2, 1), c(3, 1), 5, double()) x_flat <- vec_c(!!!x) x_flat <- x_flat + max(x_flat) vec_chop(x_flat, sizes = list_sizes(x)) } vctrs/man/order-radix.Rd0000644000176200001440000001350515154276515014714 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/order.R \name{order-radix} \alias{order-radix} \alias{vec_order_radix} \alias{vec_sort_radix} \title{Order and sort vectors} \usage{ vec_order_radix( x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) vec_sort_radix( x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) } \arguments{ \item{x}{A vector} \item{...}{These dots are for future extensions and must be empty.} \item{direction}{Direction to sort in. \itemize{ \item A single \code{"asc"} or \code{"desc"} for ascending or descending order respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"asc"} or \code{"desc"}, specifying the direction for each column. }} \item{na_value}{Ordering of missing values. \itemize{ \item A single \code{"largest"} or \code{"smallest"} for ordering missing values as the largest or smallest values respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"largest"} or \code{"smallest"}, specifying how missing values should be ordered within each column. }} \item{nan_distinct}{A single logical specifying whether or not \code{NaN} should be considered distinct from \code{NA} for double and complex vectors. If \code{TRUE}, \code{NaN} will always be ordered between \code{NA} and non-missing numbers.} \item{chr_proxy_collate}{A function generating an alternate representation of character vectors to use for collation, often used for locale-aware ordering. \itemize{ \item If \code{NULL}, no transformation is done. \item Otherwise, this must be a function of one argument. If the input contains a character vector, it will be passed to this function after it has been translated to UTF-8. This function should return a character vector with the same length as the input. The result should sort as expected in the C-locale, regardless of encoding. } For data frames, \code{chr_proxy_collate} will be applied to all character columns. Common transformation functions include: \code{tolower()} for case-insensitive ordering and \code{stringi::stri_sort_key()} for locale-aware ordering.} } \value{ \itemize{ \item \code{vec_order_radix()} an integer vector the same size as \code{x}. \item \code{vec_sort_radix()} a vector with the same size and type as \code{x}. } } \description{ \code{vec_order_radix()} computes the order of \code{x}. For data frames, the order is computed along the rows by computing the order of the first column and using subsequent columns to break ties. \code{vec_sort_radix()} sorts \code{x}. It is equivalent to \code{vec_slice(x, vec_order_radix(x))}. } \section{Differences with \code{order()}}{ Unlike the \code{na.last} argument of \code{order()} which decides the positions of missing values irrespective of the \code{decreasing} argument, the \code{na_value} argument of \code{vec_order_radix()} interacts with \code{direction}. If missing values are considered the largest value, they will appear last in ascending order, and first in descending order. Character vectors are ordered in the C-locale. This is different from \code{base::order()}, which respects \code{base::Sys.setlocale()}. Sorting in a consistent locale can produce more reproducible results between different sessions and platforms, however, the results of sorting in the C-locale can be surprising. For example, capital letters sort before lower case letters. Sorting \code{c("b", "C", "a")} with \code{vec_sort_radix()} will return \code{c("C", "a", "b")}, but with \code{base::order()} will return \code{c("a", "b", "C")} unless \code{base::order(method = "radix")} is explicitly set, which also uses the C-locale. While sorting with the C-locale can be useful for algorithmic efficiency, in many real world uses it can be the cause of data analysis mistakes. To balance these trade-offs, you can supply a \code{chr_proxy_collate} function to transform character vectors into an alternative representation that orders in the C-locale in a less surprising way. For example, providing \code{\link[base:chartr]{base::tolower()}} as a transform will order the original vector in a case-insensitive manner. Locale-aware ordering can be achieved by providing \code{stringi::stri_sort_key()} as a transform, setting the collation options as appropriate for your locale. Character vectors are always translated to UTF-8 before ordering, and before any transform is applied by \code{chr_proxy_collate}. For complex vectors, if either the real or imaginary component is \code{NA} or \code{NaN}, then the entire observation is considered missing. } \section{Dependencies of \code{vec_order_radix()}}{ \itemize{ \item \code{\link[=vec_proxy_order]{vec_proxy_order()}} } } \section{Dependencies of \code{vec_sort_radix()}}{ \itemize{ \item \code{\link[=vec_order_radix]{vec_order_radix()}} \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ if (FALSE) { x <- round(sample(runif(5), 9, replace = TRUE), 3) x <- c(x, NA) vec_order_radix(x) vec_sort_radix(x) vec_sort_radix(x, direction = "desc") # Can also handle data frames df <- data.frame(g = sample(2, 10, replace = TRUE), x = x) vec_order_radix(df) vec_sort_radix(df) vec_sort_radix(df, direction = "desc") # For data frames, `direction` and `na_value` are allowed to be vectors # with length equal to the number of columns in the data frame vec_sort_radix( df, direction = c("desc", "asc"), na_value = c("largest", "smallest") ) # Character vectors are ordered in the C locale, which orders capital letters # below lowercase ones y <- c("B", "A", "a") vec_sort_radix(y) # To order in a case-insensitive manner, provide a `chr_proxy_collate` # function that transforms the strings to all lowercase vec_sort_radix(y, chr_proxy_collate = tolower) } } \keyword{internal} vctrs/man/new_data_frame.Rd0000644000176200001440000000266314511320527015420 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{new_data_frame} \alias{new_data_frame} \title{Assemble attributes for data frame construction} \usage{ new_data_frame(x = list(), n = NULL, ..., class = NULL) } \arguments{ \item{x}{A named list of equal-length vectors. The lengths are not checked; it is responsibility of the caller to make sure they are equal.} \item{n}{Number of rows. If \code{NULL}, will be computed from the length of the first element of \code{x}.} \item{..., class}{Additional arguments for creating subclasses. The following attributes have special behavior: \itemize{ \item \code{"names"} is preferred if provided, overriding existing names in \code{x}. \item \code{"row.names"} is preferred if provided, overriding both \code{n} and the size implied by \code{x}. }} } \description{ \code{new_data_frame()} constructs a new data frame from an existing list. It is meant to be performant, and does not check the inputs for correctness in any way. It is only safe to use after a call to \code{\link[=df_list]{df_list()}}, which collects and validates the columns used to construct the data frame. } \examples{ new_data_frame(list(x = 1:10, y = 10:1)) } \seealso{ \code{\link[=df_list]{df_list()}} for a way to safely construct a data frame's underlying data structure from individual columns. This can be used to create a named list for further use by \code{new_data_frame()}. } vctrs/man/vec_order.Rd0000644000176200001440000000373614315060307014435 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/order.R \name{vec_order} \alias{vec_order} \alias{vec_sort} \title{Order and sort vectors} \usage{ vec_order( x, ..., direction = c("asc", "desc"), na_value = c("largest", "smallest") ) vec_sort( x, ..., direction = c("asc", "desc"), na_value = c("largest", "smallest") ) } \arguments{ \item{x}{A vector} \item{...}{These dots are for future extensions and must be empty.} \item{direction}{Direction to sort in. Defaults to \code{asc}ending.} \item{na_value}{Should \code{NA}s be treated as the largest or smallest values?} } \value{ \itemize{ \item \code{vec_order()} an integer vector the same size as \code{x}. \item \code{vec_sort()} a vector with the same size and type as \code{x}. } } \description{ Order and sort vectors } \section{Differences with \code{order()}}{ Unlike the \code{na.last} argument of \code{order()} which decides the positions of missing values irrespective of the \code{decreasing} argument, the \code{na_value} argument of \code{vec_order()} interacts with \code{direction}. If missing values are considered the largest value, they will appear last in ascending order, and first in descending order. } \section{Dependencies of \code{vec_order()}}{ \itemize{ \item \code{\link[=vec_proxy_order]{vec_proxy_order()}} } } \section{Dependencies of \code{vec_sort()}}{ \itemize{ \item \code{\link[=vec_proxy_order]{vec_proxy_order()}} \item \code{\link[=vec_order]{vec_order()}} \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ x <- round(c(runif(9), NA), 3) vec_order(x) vec_sort(x) vec_sort(x, direction = "desc") # Can also handle data frames df <- data.frame(g = sample(2, 10, replace = TRUE), x = x) vec_order(df) vec_sort(df) vec_sort(df, direction = "desc") # Missing values interpreted as largest values are last when # in increasing order: vec_order(c(1, NA), na_value = "largest", direction = "asc") vec_order(c(1, NA), na_value = "largest", direction = "desc") } vctrs/man/vec_empty.Rd0000644000176200001440000000075314276722575014476 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_empty} \alias{vec_empty} \title{Is a vector empty} \usage{ vec_empty(x) } \arguments{ \item{x}{An object.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#defunct}{\figure{lifecycle-defunct.svg}{options: alt='[Defunct]'}}}{\strong{[Defunct]}} This function is defunct, please use \code{\link[=vec_is_empty]{vec_is_empty()}}. } \keyword{internal} vctrs/man/parallel-operators.Rd0000644000176200001440000000575315113325071016275 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/parallel.R \name{parallel-operators} \alias{parallel-operators} \alias{vec_pany} \alias{vec_pall} \title{Parallel \code{any()} and \code{all()}} \usage{ vec_pany( ..., .missing = NA, .size = NULL, .arg = "", .error_call = current_env() ) vec_pall( ..., .missing = NA, .size = NULL, .arg = "", .error_call = current_env() ) } \arguments{ \item{...}{Logical vectors of equal size.} \item{.missing}{Value to use when a missing value is encountered. One of: \itemize{ \item \code{NA} to propagate missing values. With this, missings are treated the same way as \code{|} or \code{&}. \item \code{FALSE} to treat missing values as \code{FALSE}. \item \code{TRUE} to treat missing values as \code{TRUE}. }} \item{.size}{An optional output size. Only useful to specify if it is possible for no inputs to be provided.} \item{.arg}{Argument name used in error messages.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A logical vector the same size as the vectors in \code{...}. } \description{ These functions are variants of \code{\link[=any]{any()}} and \code{\link[=all]{all()}} that work in parallel on multiple inputs at once. They work similarly to how \code{\link[=pmin]{pmin()}} and \code{\link[=pmax]{pmax()}} are parallel variants of \code{\link[=min]{min()}} and \code{\link[=max]{max()}}. } \details{ \code{vec_pany()} and \code{vec_pall()} are consistent with \code{\link[=any]{any()}} and \code{\link[=all]{all()}} when there are no inputs to process in parallel: \itemize{ \item \code{any()} returns \code{FALSE} with no inputs. Similarly, \code{vec_pany(.size = 1)} returns \code{FALSE}. \item \code{all()} returns \code{TRUE} with no inputs. Similarly, \code{vec_pall(.size = 1)} returns \code{TRUE}. } } \examples{ a <- c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, NA, NA, NA) b <- c(TRUE, FALSE, NA, TRUE, FALSE, NA, TRUE, FALSE, NA) # Default behavior treats missings like `|` does vec_pany(a, b) a | b # Default behavior treats missings like `&` does vec_pall(a, b) a & b # Remove missings from the computation, like `na_rm = TRUE` vec_pany(a, b, .missing = FALSE) (a & !is.na(a)) | (b & !is.na(b)) vec_pall(a, b, .missing = TRUE) (a | is.na(a)) & (b | is.na(b)) # `vec_pall()` can be used to implement a `dplyr::filter()` style API df <- data_frame(id = seq_along(a), a = a, b = b) keep_rows <- function(x, ...) { vec_slice(x, vec_pall(..., .missing = FALSE)) } drop_rows <- function(x, ...) { vec_slice(x, !vec_pall(..., .missing = FALSE)) } # "Keep / Drop the rows when both a and b are TRUE" # These form complements of one another, even with `NA`s. keep_rows(df, a, b) drop_rows(df, a, b) # Same empty behavior as `any()` and `all()` vec_pany(.size = 1) any() vec_pall(.size = 1) all() } vctrs/man/faq-error-scalar-type.Rd0000644000176200001440000000566715113325071016611 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq.R \name{faq-error-scalar-type} \alias{faq-error-scalar-type} \alias{faq_error_scalar_type} \title{FAQ - Error: Input must be a vector} \description{ This error occurs when a function expects a vector and gets a scalar object instead. This commonly happens when some code attempts to assign a scalar object as column in a data frame: \if{html}{\out{
}}\preformatted{fn <- function() NULL tibble::tibble(x = fn) #> Error in `tibble::tibble()`: #> ! All columns in a tibble must be vectors. #> x Column `x` is a function. fit <- lm(1:3 ~ 1) tibble::tibble(x = fit) #> Error in `tibble::tibble()`: #> ! All columns in a tibble must be vectors. #> x Column `x` is a `lm` object. }\if{html}{\out{
}} } \section{Vectorness in base R and in the tidyverse}{ In base R, almost everything is a vector or behaves like a vector. In the tidyverse we have chosen to be a bit stricter about what is considered a vector. The main question we ask ourselves to decide on the vectorness of a type is whether it makes sense to include that object as a column in a data frame. The main difference is that S3 lists are considered vectors by base R but in the tidyverse that’s not the case by default: \if{html}{\out{
}}\preformatted{fit <- lm(1:3 ~ 1) typeof(fit) #> [1] "list" class(fit) #> [1] "lm" # S3 lists can be subset like a vector using base R: fit[c(1, 4)] #> $coefficients #> (Intercept) #> 2 #> #> $rank #> [1] 1 # But not in vctrs vctrs::vec_slice(fit, c(1, 4)) #> Error in `vctrs::vec_slice()`: #> ! `x` must be a vector, not a object. #> x Detected incompatible scalar S3 list. To be treated as a vector, the object must explicitly inherit from or should implement a `vec_proxy()` method. Class: . #> i If this object comes from a package, please report this error to the package author. #> i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. }\if{html}{\out{
}} Defused function calls are another (more esoteric) example: \if{html}{\out{
}}\preformatted{call <- quote(foo(bar = TRUE, baz = FALSE)) call #> foo(bar = TRUE, baz = FALSE) # They can be subset like a vector using base R: call[1:2] #> foo(bar = TRUE) lapply(call, function(x) x) #> [[1]] #> foo #> #> $bar #> [1] TRUE #> #> $baz #> [1] FALSE # But not with vctrs: vctrs::vec_slice(call, 1:2) #> Error in `vctrs::vec_slice()`: #> ! `x` must be a vector, not a call. #> i Read our FAQ about scalar types (`?vctrs::faq_error_scalar_type`) to learn more. }\if{html}{\out{
}} } \section{I get a scalar type error but I think this is a bug}{ It’s possible the author of the class needs to do some work to declare their class a vector. Consider reaching out to the author. We have written a \link[=howto-faq-fix-scalar-type-error]{developer FAQ page} to help them fix the issue. } vctrs/man/vec_size.Rd0000644000176200001440000001014414511524374014272 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/size.R \name{vec_size} \alias{vec_size} \alias{vec_size_common} \alias{list_sizes} \alias{vec_is_empty} \title{Number of observations} \usage{ vec_size(x) vec_size_common( ..., .size = NULL, .absent = 0L, .arg = "", .call = caller_env() ) list_sizes(x) vec_is_empty(x) } \arguments{ \item{x, ...}{Vector inputs or \code{NULL}.} \item{.size}{If \code{NULL}, the default, the output size is determined by recycling the lengths of all elements of \code{...}. Alternatively, you can supply \code{.size} to force a known size; in this case, \code{x} and \code{...} are ignored.} \item{.absent}{The size used when no input is provided, or when all input is \code{NULL}. If left as \code{NULL} when no input is supplied, an error is thrown.} \item{.arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{.call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ An integer (or double for long vectors). \code{vec_size_common()} returns \code{.absent} if all inputs are \code{NULL} or absent, \code{0L} by default. } \description{ \code{vec_size(x)} returns the size of a vector. \code{vec_is_empty()} returns \code{TRUE} if the size is zero, \code{FALSE} otherwise. The size is distinct from the \code{\link[=length]{length()}} of a vector because it generalises to the "number of observations" for 2d structures, i.e. it's the number of rows in matrix or a data frame. This definition has the important property that every column of a data frame (even data frame and matrix columns) have the same size. \code{vec_size_common(...)} returns the common size of multiple vectors. \code{list_sizes()} returns an integer vector containing the size of each element of a list. It is nearly equivalent to, but faster than, \code{map_int(x, vec_size)}, with the exception that \code{list_sizes()} will error on non-list inputs, as defined by \code{\link[=obj_is_list]{obj_is_list()}}. \code{list_sizes()} is to \code{vec_size()} as \code{\link[=lengths]{lengths()}} is to \code{\link[=length]{length()}}. } \details{ There is no vctrs helper that retrieves the number of columns: as this is a property of the \link[=vec_ptype_show]{type}. \code{vec_size()} is equivalent to \code{NROW()} but has a name that is easier to pronounce, and throws an error when passed non-vector inputs. } \section{Invariants}{ \itemize{ \item \code{vec_size(dataframe)} == \code{vec_size(dataframe[[i]])} \item \code{vec_size(matrix)} == \code{vec_size(matrix[, i, drop = FALSE])} \item \code{vec_size(vec_c(x, y))} == \code{vec_size(x)} + \code{vec_size(y)} } } \section{The size of NULL}{ The size of \code{NULL} is hard-coded to \code{0L} in \code{vec_size()}. \code{vec_size_common()} returns \code{.absent} when all inputs are \code{NULL} (if only some inputs are \code{NULL}, they are simply ignored). A default size of 0 makes sense because sizes are most often queried in order to compute a total size while assembling a collection of vectors. Since we treat \code{NULL} as an absent input by principle, we return the identity of sizes under addition to reflect that an absent input doesn't take up any size. Note that other defaults might make sense under different circumstances. For instance, a default size of 1 makes sense for finding the common size because 1 is the identity of the recycling rules. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} } } \examples{ vec_size(1:100) vec_size(mtcars) vec_size(array(dim = c(3, 5, 10))) vec_size_common(1:10, 1:10) vec_size_common(1:10, 1) vec_size_common(integer(), 1) list_sizes(list("a", 1:5, letters)) } \seealso{ \code{\link[=vec_slice]{vec_slice()}} for a variation of \code{[} compatible with \code{vec_size()}, and \code{\link[=vec_recycle]{vec_recycle()}} to \link[=theory-faq-recycling]{recycle} vectors to common length. } vctrs/man/vec_locate_matches.Rd0000644000176200001440000003370315047425317016303 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/match.R \name{vec_locate_matches} \alias{vec_locate_matches} \title{Locate observations matching specified conditions} \usage{ vec_locate_matches( needles, haystack, ..., condition = "==", filter = "none", incomplete = "compare", no_match = NA_integer_, remaining = "drop", multiple = "all", relationship = "none", nan_distinct = FALSE, chr_proxy_collate = NULL, needles_arg = "needles", haystack_arg = "haystack", error_call = current_env() ) } \arguments{ \item{needles, haystack}{Vectors used for matching. \itemize{ \item \code{needles} represents the vector to search for. \item \code{haystack} represents the vector to search in. } Prior to comparison, \code{needles} and \code{haystack} are coerced to the same type.} \item{...}{These dots are for future extensions and must be empty.} \item{condition}{Condition controlling how \code{needles} should be compared against \code{haystack} to identify a successful match. \itemize{ \item One of: \code{"=="}, \code{">"}, \code{">="}, \code{"<"}, or \code{"<="}. \item For data frames, a length \code{1} or \code{ncol(needles)} character vector containing only the above options, specifying how matching is determined for each column. }} \item{filter}{Filter to be applied to the matched results. \itemize{ \item \code{"none"} doesn't apply any filter. \item \code{"min"} returns only the minimum haystack value matching the current needle. \item \code{"max"} returns only the maximum haystack value matching the current needle. \item For data frames, a length \code{1} or \code{ncol(needles)} character vector containing only the above options, specifying a filter to apply to each column. } Filters don't have any effect on \code{"=="} conditions, but are useful for computing "rolling" matches with other conditions. A filter can return multiple haystack matches for a particular needle if the maximum or minimum haystack value is duplicated in \code{haystack}. These can be further controlled with \code{multiple}.} \item{incomplete}{Handling of missing and \link[=vec_detect_complete]{incomplete} values in \code{needles}. \itemize{ \item \code{"compare"} uses \code{condition} to determine whether or not a missing value in \code{needles} matches a missing value in \code{haystack}. If \code{condition} is \code{==}, \code{>=}, or \code{<=}, then missing values will match. \item \code{"match"} always allows missing values in \code{needles} to match missing values in \code{haystack}, regardless of the \code{condition}. \item \code{"drop"} drops incomplete values in \code{needles} from the result. \item \code{"error"} throws an error if any \code{needles} are incomplete. \item If a single integer is provided, this represents the value returned in the \code{haystack} column for values of \code{needles} that are incomplete. If \code{no_match = NA}, setting \code{incomplete = NA} forces incomplete values in \code{needles} to be treated like unmatched values. } \code{nan_distinct} determines whether a \code{NA} is allowed to match a \code{NaN}.} \item{no_match}{Handling of \code{needles} without a match. \itemize{ \item \code{"drop"} drops \code{needles} with zero matches from the result. \item \code{"error"} throws an error if any \code{needles} have zero matches. \item If a single integer is provided, this represents the value returned in the \code{haystack} column for values of \code{needles} that have zero matches. The default represents an unmatched needle with \code{NA}. }} \item{remaining}{Handling of \code{haystack} values that \code{needles} never matched. \itemize{ \item \code{"drop"} drops remaining \code{haystack} values from the result. Typically, this is the desired behavior if you only care when \code{needles} has a match. \item \code{"error"} throws an error if there are any remaining \code{haystack} values. \item If a single integer is provided (often \code{NA}), this represents the value returned in the \code{needles} column for the remaining \code{haystack} values that \code{needles} never matched. Remaining \code{haystack} values are always returned at the end of the result. }} \item{multiple}{Handling of \code{needles} with multiple matches. For each needle: \itemize{ \item \code{"all"} returns all matches detected in \code{haystack}. \item \code{"any"} returns any match detected in \code{haystack} with no guarantees on which match will be returned. It is often faster than \code{"first"} and \code{"last"} if you just need to detect if there is at least one match. \item \code{"first"} returns the first match detected in \code{haystack}. \item \code{"last"} returns the last match detected in \code{haystack}. }} \item{relationship}{Handling of the expected relationship between \code{needles} and \code{haystack}. If the expectations chosen from the list below are invalidated, an error is thrown. \itemize{ \item \code{"none"} doesn't perform any relationship checks. \item \code{"one-to-one"} expects: \itemize{ \item Each value in \code{needles} matches at most 1 value in \code{haystack}. \item Each value in \code{haystack} matches at most 1 value in \code{needles}. } \item \code{"one-to-many"} expects: \itemize{ \item Each value in \code{needles} matches any number of values in \code{haystack}. \item Each value in \code{haystack} matches at most 1 value in \code{needles}. } \item \code{"many-to-one"} expects: \itemize{ \item Each value in \code{needles} matches at most 1 value in \code{haystack}. \item Each value in \code{haystack} matches any number of values in \code{needles}. } \item \code{"many-to-many"} expects: \itemize{ \item Each value in \code{needles} matches any number of values in \code{haystack}. \item Each value in \code{haystack} matches any number of values in \code{needles}. } This performs no checks, and is identical to \code{"none"}, but is provided to allow you to be explicit about this relationship if you know it exists. \item \code{"warn-many-to-many"} doesn't assume there is any known relationship, but will warn if \code{needles} and \code{haystack} have a many-to-many relationship (which is typically unexpected), encouraging you to either take a closer look at your inputs or make this relationship explicit by specifying \code{"many-to-many"}. } \code{relationship} is applied after \code{filter} and \code{multiple} to allow potential multiple matches to be filtered out first. \code{relationship} doesn't handle cases where there are zero matches. For that, see \code{no_match} and \code{remaining}.} \item{nan_distinct}{A single logical specifying whether or not \code{NaN} should be considered distinct from \code{NA} for double and complex vectors. If \code{TRUE}, \code{NaN} will always be ordered between \code{NA} and non-missing numbers.} \item{chr_proxy_collate}{A function generating an alternate representation of character vectors to use for collation, often used for locale-aware ordering. \itemize{ \item If \code{NULL}, no transformation is done. \item Otherwise, this must be a function of one argument. If the input contains a character vector, it will be passed to this function after it has been translated to UTF-8. This function should return a character vector with the same length as the input. The result should sort as expected in the C-locale, regardless of encoding. } For data frames, \code{chr_proxy_collate} will be applied to all character columns. Common transformation functions include: \code{tolower()} for case-insensitive ordering and \code{stringi::stri_sort_key()} for locale-aware ordering.} \item{needles_arg, haystack_arg}{Argument tags for \code{needles} and \code{haystack} used in error messages.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A two column data frame containing the locations of the matches. \itemize{ \item \code{needles} is an integer vector containing the location of the needle currently being matched. \item \code{haystack} is an integer vector containing the location of the corresponding match in the haystack for the current needle. } } \description{ \code{vec_locate_matches()} is a more flexible version of \code{\link[=vec_match]{vec_match()}} used to identify locations where each value of \code{needles} matches one or multiple values in \code{haystack}. Unlike \code{vec_match()}, \code{vec_locate_matches()} returns all matches by default, and can match on binary conditions other than equality, such as \code{>}, \code{>=}, \code{<}, and \code{<=}. } \details{ \code{\link[=vec_match]{vec_match()}} is identical to (but often slightly faster than): \if{html}{\out{
}}\preformatted{vec_locate_matches( needles, haystack, condition = "==", multiple = "first", nan_distinct = TRUE ) }\if{html}{\out{
}} \code{vec_locate_matches()} is extremely similar to a SQL join between \code{needles} and \code{haystack}, with the default being most similar to a left join. Be very careful when specifying match \code{condition}s. If a condition is misspecified, it is very easy to accidentally generate an exponentially large number of matches. } \section{Dependencies of \code{vec_locate_matches()}}{ \itemize{ \item \code{\link[=vec_order_radix]{vec_order_radix()}} \item \code{\link[=vec_detect_complete]{vec_detect_complete()}} } } \examples{ x <- c(1, 2, NA, 3, NaN) y <- c(2, 1, 4, NA, 1, 2, NaN) # By default, for each value of `x`, all matching locations in `y` are # returned matches <- vec_locate_matches(x, y) matches # The result can be used to slice the inputs to align them data_frame( x = vec_slice(x, matches$needles), y = vec_slice(y, matches$haystack) ) # If multiple matches are present, control which is returned with `multiple` vec_locate_matches(x, y, multiple = "first") vec_locate_matches(x, y, multiple = "last") vec_locate_matches(x, y, multiple = "any") # Use `relationship` to add constraints and error on multiple matches if # they aren't expected try(vec_locate_matches(x, y, relationship = "one-to-one")) # In this case, the `NA` in `y` matches two rows in `x` try(vec_locate_matches(x, y, relationship = "one-to-many")) # By default, `NA` is treated as being identical to `NaN`. # Using `nan_distinct = TRUE` treats `NA` and `NaN` as different values, so # `NA` can only match `NA`, and `NaN` can only match `NaN`. vec_locate_matches(x, y, nan_distinct = TRUE) # If you never want missing values to match, set `incomplete = NA` to return # `NA` in the `haystack` column anytime there was an incomplete value # in `needles`. vec_locate_matches(x, y, incomplete = NA) # Using `incomplete = NA` allows us to enforce the one-to-many relationship # that we couldn't before vec_locate_matches(x, y, relationship = "one-to-many", incomplete = NA) # `no_match` allows you to specify the returned value for a needle with # zero matches. Note that this is different from an incomplete value, # so specifying `no_match` allows you to differentiate between incomplete # values and unmatched values. vec_locate_matches(x, y, incomplete = NA, no_match = 0L) # If you want to require that every `needle` has at least 1 match, set # `no_match` to `"error"`: try(vec_locate_matches(x, y, incomplete = NA, no_match = "error")) # By default, `vec_locate_matches()` detects equality between `needles` and # `haystack`. Using `condition`, you can detect where an inequality holds # true instead. For example, to find every location where `x[[i]] >= y`: matches <- vec_locate_matches(x, y, condition = ">=") data_frame( x = vec_slice(x, matches$needles), y = vec_slice(y, matches$haystack) ) # You can limit which matches are returned with a `filter`. For example, # with the above example you can filter the matches returned by `x[[i]] >= y` # down to only the ones containing the maximum `y` value of those matches. matches <- vec_locate_matches(x, y, condition = ">=", filter = "max") # Here, the matches for the `3` needle value have been filtered down to # only include the maximum haystack value of those matches, `2`. This is # often referred to as a rolling join. data_frame( x = vec_slice(x, matches$needles), y = vec_slice(y, matches$haystack) ) # In the very rare case that you need to generate locations for a # cross match, where every value of `x` is forced to match every # value of `y` regardless of what the actual values are, you can # replace `x` and `y` with integer vectors of the same size that contain # a single value and match on those instead. x_proxy <- vec_rep(1L, vec_size(x)) y_proxy <- vec_rep(1L, vec_size(y)) nrow(vec_locate_matches(x_proxy, y_proxy)) vec_size(x) * vec_size(y) # By default, missing values will match other missing values when using # `==`, `>=`, or `<=` conditions, but not when using `>` or `<` conditions. # This is similar to how `vec_compare(x, y, na_equal = TRUE)` works. x <- c(1, NA) y <- c(NA, 2) vec_locate_matches(x, y, condition = "<=") vec_locate_matches(x, y, condition = "<") # You can force missing values to match regardless of the `condition` # by using `incomplete = "match"` vec_locate_matches(x, y, condition = "<", incomplete = "match") # You can also use data frames for `needles` and `haystack`. The # `condition` will be recycled to the number of columns in `needles`, or # you can specify varying conditions per column. In this example, we take # a vector of date `values` and find all locations where each value is # between lower and upper bounds specified by the `haystack`. values <- as.Date("2019-01-01") + 0:9 needles <- data_frame(lower = values, upper = values) set.seed(123) lower <- as.Date("2019-01-01") + sample(10, 10, replace = TRUE) upper <- lower + sample(3, 10, replace = TRUE) haystack <- data_frame(lower = lower, upper = upper) # (values >= lower) & (values <= upper) matches <- vec_locate_matches(needles, haystack, condition = c(">=", "<=")) data_frame( lower = vec_slice(lower, matches$haystack), value = vec_slice(values, matches$needle), upper = vec_slice(upper, matches$haystack) ) } vctrs/man/list_unchop.Rd0000644000176200001440000001113615072256373015020 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/list-unchop.R \name{list_unchop} \alias{list_unchop} \title{Combine a list of vectors} \usage{ list_unchop( x, ..., indices = NULL, ptype = NULL, name_spec = NULL, name_repair = c("minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet"), error_arg = "x", error_call = current_env() ) } \arguments{ \item{x}{A list} \item{...}{These dots are for future extensions and must be empty.} \item{indices}{A list of positive integer vectors specifying the locations to place elements of \code{x} in. Each element of \code{x} is recycled to the size of the corresponding index vector. The size of \code{indices} must match the size of \code{x}. If \code{NULL}, \code{x} is combined in the order it is provided in, which is equivalent to using \code{\link[=vec_c]{vec_c()}}.} \item{ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{x}. Alternatively, you can supply \code{ptype} to give the output a known type.} \item{name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item \code{"inner"}, in which case outer names are ignored, and inner names are used if they exist. Note that outer names may still be used to provide informative error messages. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} \item{name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} \item{error_arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A vector of type \code{vec_ptype_common(!!!x)}, or \code{ptype}, if specified. The size is computed as \code{vec_size_common(!!!indices)} unless the indices are \code{NULL}, in which case the size is \code{vec_size_common(!!!x)}. } \description{ While \code{list_unchop()} is not deprecated, we now recommend that you use either: \itemize{ \item \code{list_combine(x, indices = indices, size = size)} over \code{list_unchop(x, indices = indices)} \item \code{vec_c(!!!x)} over \code{list_unchop(x)} } \code{list_unchop()} combines a list of vectors into a single vector, placing elements in the output according to the locations specified by \code{indices}. It is similar to \code{\link[=vec_c]{vec_c()}}, but gives greater control over how the elements are combined. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_c]{vec_c()}} } } \examples{ # If `indices` selects every value in `x` exactly once, # in any order, then `list_unchop()` inverts `vec_chop()` x <- c("a", "b", "c", "d") indices <- list(2, c(3, 1), 4) vec_chop(x, indices = indices) list_unchop(vec_chop(x, indices = indices), indices = indices) # When unchopping, size 1 elements of `x` are recycled # to the size of the corresponding index list_unchop(list(1, 2:3), indices = list(c(1, 3, 5), c(2, 4))) # Names are retained, and outer names can be combined with inner # names through the use of a `name_spec` lst <- list(x = c(a = 1, b = 2), y = 1) list_unchop(lst, indices = list(c(3, 2), c(1, 4)), name_spec = "{outer}_{inner}") # If you have a list of homogeneous vectors, sometimes it can be useful to # unchop, apply a function to the flattened vector, and then rechop according # to the original indices. This can be done efficiently with `list_sizes()`. x <- list(c(1, 2, 1), c(3, 1), 5, double()) x_flat <- list_unchop(x) x_flat <- x_flat + max(x_flat) vec_chop(x_flat, sizes = list_sizes(x)) } \keyword{internal} vctrs/man/vec_equal_na.Rd0000644000176200001440000000114614315060307015100 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_equal_na} \alias{vec_equal_na} \title{Missing values} \usage{ vec_equal_na(x) } \arguments{ \item{x}{A vector} } \value{ A logical vector the same size as \code{x}. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{vec_equal_na()} has been renamed to \code{\link[=vec_detect_missing]{vec_detect_missing()}} and is deprecated as of vctrs 0.5.0. } \keyword{internal} vctrs/man/vec_init.Rd0000644000176200001440000000110114663361445014262 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice.R \name{vec_init} \alias{vec_init} \title{Initialize a vector} \usage{ vec_init(x, n = 1L) } \arguments{ \item{x}{Template of vector to initialize.} \item{n}{Desired size of result.} } \description{ Initialize a vector } \section{Dependencies}{ \itemize{ \item vec_slice() } } \examples{ vec_init(1:10, 3) vec_init(Sys.Date(), 5) # The "missing" value for a data frame is a row that is entirely missing vec_init(mtcars, 2) # The "missing" value for a list is `NULL` vec_init(list(), 3) } vctrs/man/vec_as_index.Rd0000644000176200001440000000172115056611175015114 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_as_index} \alias{vec_as_index} \title{Convert to an index vector} \usage{ vec_as_index(i, n, names = NULL) } \arguments{ \item{i}{An index vector to convert.} \item{n}{A single integer representing the total size of the object that \code{i} is meant to index into.} \item{names}{If \code{i} is a character vector, \code{names} should be a character vector that \code{i} will be matched against to construct the index. Otherwise, not used. The default value of \code{NULL} will result in an error if \code{i} is a character vector.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{vec_as_index()} has been renamed to \code{\link[=vec_as_location]{vec_as_location()}} and is deprecated as of vctrs 0.2.2. } \keyword{internal} vctrs/man/vec_locate_sorted_groups.Rd0000644000176200001440000000675414341667017017565 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/order.R \name{vec_locate_sorted_groups} \alias{vec_locate_sorted_groups} \title{Locate sorted groups} \usage{ vec_locate_sorted_groups( x, ..., direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) } \arguments{ \item{x}{A vector} \item{...}{These dots are for future extensions and must be empty.} \item{direction}{Direction to sort in. \itemize{ \item A single \code{"asc"} or \code{"desc"} for ascending or descending order respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"asc"} or \code{"desc"}, specifying the direction for each column. }} \item{na_value}{Ordering of missing values. \itemize{ \item A single \code{"largest"} or \code{"smallest"} for ordering missing values as the largest or smallest values respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"largest"} or \code{"smallest"}, specifying how missing values should be ordered within each column. }} \item{nan_distinct}{A single logical specifying whether or not \code{NaN} should be considered distinct from \code{NA} for double and complex vectors. If \code{TRUE}, \code{NaN} will always be ordered between \code{NA} and non-missing numbers.} \item{chr_proxy_collate}{A function generating an alternate representation of character vectors to use for collation, often used for locale-aware ordering. \itemize{ \item If \code{NULL}, no transformation is done. \item Otherwise, this must be a function of one argument. If the input contains a character vector, it will be passed to this function after it has been translated to UTF-8. This function should return a character vector with the same length as the input. The result should sort as expected in the C-locale, regardless of encoding. } For data frames, \code{chr_proxy_collate} will be applied to all character columns. Common transformation functions include: \code{tolower()} for case-insensitive ordering and \code{stringi::stri_sort_key()} for locale-aware ordering.} } \value{ A two column data frame with size equal to \code{vec_size(vec_unique(x))}. \itemize{ \item A \code{key} column of type \code{vec_ptype(x)}. \item A \code{loc} column of type list, with elements of type integer. } } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{vec_locate_sorted_groups()} returns a data frame containing a \code{key} column with sorted unique groups, and a \code{loc} column with the locations of each group in \code{x}. It is similar to \code{\link[=vec_group_loc]{vec_group_loc()}}, except the groups are returned sorted rather than by first appearance. } \details{ \code{vec_locate_sorted_groups(x)} is equivalent to, but faster than: \if{html}{\out{
}}\preformatted{info <- vec_group_loc(x) vec_slice(info, vec_order(info$key)) }\if{html}{\out{
}} } \section{Dependencies of \code{vec_locate_sorted_groups()}}{ \itemize{ \item \code{\link[=vec_proxy_order]{vec_proxy_order()}} } } \examples{ df <- data.frame( g = sample(2, 10, replace = TRUE), x = c(NA, sample(5, 9, replace = TRUE)) ) # `vec_locate_sorted_groups()` is similar to `vec_group_loc()`, except keys # are returned ordered rather than by first appearance. vec_locate_sorted_groups(df) vec_group_loc(df) } \keyword{internal} vctrs/man/new_rcrd.Rd0000644000176200001440000000252015035470715014266 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-rcrd.R \name{new_rcrd} \alias{new_rcrd} \alias{ses} \alias{rcrd} \title{rcrd (record) S3 class} \usage{ new_rcrd(fields, ..., class = character()) } \arguments{ \item{fields}{A list or a data frame. Lists must be rectangular (same sizes), and contain uniquely named vectors (at least one). \code{fields} is validated with \code{\link[=df_list]{df_list()}} to ensure uniquely named vectors.} \item{...}{Additional attributes} \item{class}{Name of subclass.} } \description{ The rcrd class extends \link{vctr}. A rcrd is composed of 1 or more \link{field}s, which must be vectors of the same length. Is designed specifically for classes that can naturally be decomposed into multiple vectors of the same length, like \link{POSIXlt}, but where the organisation should be considered an implementation detail invisible to the user (unlike a \link{data.frame}). } \details{ Record-style objects created with \code{\link[=new_rcrd]{new_rcrd()}} do not do much on their own. For instance they do not have a default \code{\link[=format]{format()}} method, which means printing the object causes an error. See \href{https://vctrs.r-lib.org/articles/s3-vector.html#record-style-objects}{Record-style objects} for details on implementing methods for record vectors. } \keyword{internal} vctrs/man/vec_ptype.Rd0000644000176200001440000001530515120272011014446 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type.R \name{vec_ptype} \alias{vec_ptype} \alias{vec_ptype_common} \alias{vec_ptype_show} \title{Find the prototype of a set of vectors} \usage{ vec_ptype(x, ..., x_arg = "", call = caller_env()) vec_ptype_common( ..., .ptype = NULL, .finalise = TRUE, .arg = "", .call = caller_env() ) vec_ptype_show(...) } \arguments{ \item{x}{A vector} \item{...}{For \code{vec_ptype()}, these dots are for future extensions and must be empty. For \code{vec_ptype_common()} and \code{vec_ptype_show()}, vector inputs.} \item{x_arg}{Argument name for \code{x}. This is used in error messages to inform the user about the locations of incompatible types.} \item{call, .call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} \item{.finalise}{Should \code{vec_ptype_common()} \link[=vec_ptype_finalise]{finalise} its output? \itemize{ \item If \code{TRUE}, \code{\link[=vec_ptype_finalise]{vec_ptype_finalise()}} is called on the final \code{ptype} before it is returned. Practically this has the effect of converting any types from \link{unspecified} to logical. \item If \code{FALSE}, \link{unspecified} types are left unfinalised, which can be useful for advanced cases where you combine one common type result with another type via \code{\link[=vec_ptype2]{vec_ptype2()}}. Note that you must manually call \code{\link[=vec_ptype_finalise]{vec_ptype_finalise()}} on the final \code{ptype} before supplying it to any other vctrs functions. }} \item{.arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} } \value{ \code{vec_ptype()} and \code{vec_ptype_common()} return a prototype (a size-0 vector). } \description{ \itemize{ \item \code{vec_ptype()} returns the \link[=vec_ptype_finalise]{unfinalised} prototype of a single vector. \item \code{vec_ptype_common()} returns the common type of multiple vectors. By default, this is \link[=vec_ptype_finalise]{finalised} for immediate usage, but can optionally be left unfinalised for advanced common type determination. \item \code{vec_ptype_show()} nicely prints the common type of any number of inputs, and is designed for interactive exploration. } } \section{\code{vec_ptype()}}{ \code{vec_ptype()} returns \link[=vec_size]{size} 0 vectors potentially containing attributes but no data. Generally, this is just \code{vec_slice(x, 0L)}, but some inputs require special handling. \itemize{ \item While you can't slice \code{NULL}, the prototype of \code{NULL} is itself. This is because we treat \code{NULL} as an identity value in the \code{vec_ptype2()} monoid. \item The prototype of logical vectors that only contain missing values is the special \link{unspecified} type, which can be coerced to any other 1d type. This allows bare \code{NA}s to represent missing values for any 1d vector type. \link[=vec_ptype_finalise]{Finalising} this type converts it from unspecified back to logical. } See \link{internal-faq-ptype2-identity} for more information about identity values. \code{vec_ptype()} is a \emph{performance} generic. It is not necessary to implement it because the default method will work for any vctrs type. However the default method builds around other vctrs primitives like \code{vec_slice()} which incurs performance costs. If your class has a static prototype, you might consider implementing a custom \code{vec_ptype()} method that returns a constant. This will improve the performance of your class in many cases (\link[=vec_ptype2]{common type} imputation in particular). Because it may contain unspecified vectors, the prototype returned by \code{vec_ptype()} is said to be \strong{unfinalised}. Call \code{\link[=vec_ptype_finalise]{vec_ptype_finalise()}} to finalise it. } \section{\code{vec_ptype_common()}}{ \code{vec_ptype_common()} first finds the prototype of each input, then successively calls \code{\link[=vec_ptype2]{vec_ptype2()}} to find a common type. It returns a \link[=vec_ptype_finalise]{finalised} prototype by default, but can optionally be left unfinalised for advanced common type determination. } \section{Dependencies of \code{vec_ptype()}}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} for returning an empty slice } } \section{Dependencies of \code{vec_ptype_common()}}{ \itemize{ \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_ptype_finalise]{vec_ptype_finalise()}} } } \examples{ # Unknown types ------------------------------------------ vec_ptype_show() vec_ptype_show(NULL) # Vectors ------------------------------------------------ vec_ptype_show(1:10) vec_ptype_show(letters) vec_ptype_show(TRUE) vec_ptype_show(Sys.Date()) vec_ptype_show(Sys.time()) vec_ptype_show(factor("a")) vec_ptype_show(ordered("a")) # Matrices ----------------------------------------------- # The prototype of a matrix includes the number of columns vec_ptype_show(array(1, dim = c(1, 2))) vec_ptype_show(array("x", dim = c(1, 2))) # Data frames -------------------------------------------- # The prototype of a data frame includes the prototype of # every column vec_ptype_show(iris) # The prototype of multiple data frames includes the prototype # of every column that in any data frame vec_ptype_show( data.frame(x = TRUE), data.frame(y = 2), data.frame(z = "a") ) # Finalisation ------------------------------------------- # `vec_ptype()` and `vec_ptype2()` return unfinalised ptypes so that they # can be coerced to any other type vec_ptype(NA) vec_ptype2(NA, NA) # By default `vec_ptype_common()` finalises so that you can use its result # directly in other vctrs functions vec_ptype_common(NA, NA) # You can opt out of finalisation to make it work like `vec_ptype()` and # `vec_ptype2()` with `.finalise = FALSE`, but don't forget that you must # call `vec_ptype_finalise()` manually if you do so! vec_ptype_common(NA, NA, .finalise = FALSE) vec_ptype_finalise(vec_ptype_common(NA, NA, .finalise = FALSE)) # This can be useful in rare scenarios, like including a separate `default` # argument in the ptype computation xs <- list(NA, NA) default <- "a" try(vec_ptype2(vec_ptype_common(!!!xs), default)) vec_ptype2(vec_ptype_common(!!!xs, .finalise = FALSE), default) } vctrs/man/internal-faq-matches-algorithm.Rd0000644000176200001440000002751214315060307020452 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/match.R \name{internal-faq-matches-algorithm} \alias{internal-faq-matches-algorithm} \title{Internal FAQ - Implementation of \code{vec_locate_matches()}} \description{ \code{vec_locate_matches()} is similar to \code{vec_match()}, but detects \emph{all} matches by default, and can match on conditions other than equality (like \code{>=} and \code{<}). There are also various other arguments to limit or adjust exactly which kinds of matches are returned. Here is an example: \if{html}{\out{
}}\preformatted{x <- c("a", "b", "a", "c", "d") y <- c("d", "b", "a", "d", "a", "e") # For each value of `x`, find all matches in `y` # - The "c" in `x` doesn't have a match, so it gets an NA location by default # - The "e" in `y` isn't matched by anything in `x`, so it is dropped by default vec_locate_matches(x, y) #> needles haystack #> 1 1 3 #> 2 1 5 #> 3 2 2 #> 4 3 3 #> 5 3 5 #> 6 4 NA #> 7 5 1 #> 8 5 4 }\if{html}{\out{
}} } \section{Algorithm description}{ \subsection{Overview and \code{==}}{ The simplest (approximate) way to think about the algorithm that \code{df_locate_matches_recurse()} uses is that it sorts both inputs, and then starts at the midpoint in \code{needles} and uses a binary search to find each needle in \code{haystack}. Since there might be multiple of the same needle, we find the location of the lower and upper duplicate of that needle to handle all duplicates of that needle at once. Similarly, if there are duplicates of a matching \code{haystack} value, we find the lower and upper duplicates of the match. If the condition is \code{==}, that is pretty much all we have to do. For each needle, we then record 3 things: the location of the needle, the location of the lower match in the haystack, and the match size (i.e. \code{loc_upper_match - loc_lower_match + 1}). This later gets expanded in \code{expand_compact_indices()} into the actual output. After recording the matches for a single needle, we perform the same procedure on the LHS and RHS of that needle (remember we started on the midpoint needle). i.e. from \verb{[1, loc_needle-1]} and \verb{[loc_needle+1, size_needles]}, again taking the midpoint of those two ranges, finding their respective needle in the haystack, recording matches, and continuing on to the next needle. This iteration proceeds until we run out of needles. When we have a data frame with multiple columns, we add a layer of recursion to this. For the first column, we find the locations of the lower/upper duplicate of the current needle, and we find the locations of the lower/upper matches in the haystack. If we are on the final column in the data frame, we record the matches, otherwise we pass this information on to another call to \code{df_locate_matches_recurse()}, bumping the column index and using these refined lower/upper bounds as the starting bounds for the next column. I think an example would be useful here, so below I step through this process for a few iterations: \if{html}{\out{
}}\preformatted{# these are sorted already for simplicity needles <- data_frame(x = c(1, 1, 2, 2, 2, 3), y = c(1, 2, 3, 4, 5, 3)) haystack <- data_frame(x = c(1, 1, 2, 2, 3), y = c(2, 3, 4, 4, 1)) needles #> x y #> 1 1 1 #> 2 1 2 #> 3 2 3 #> 4 2 4 #> 5 2 5 #> 6 3 3 haystack #> x y #> 1 1 2 #> 2 1 3 #> 3 2 4 #> 4 2 4 #> 5 3 1 ## Column 1, iteration 1 # start at midpoint in needles # this corresponds to x==2 loc_mid_needles <- 3L # finding all x==2 values in needles gives us: loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 5L # finding matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # compute LHS/RHS bounds for next needle lhs_loc_lower_bound_needles <- 1L # original lower bound lhs_loc_upper_bound_needles <- 2L # lower_duplicate-1 rhs_loc_lower_bound_needles <- 6L # upper_duplicate+1 rhs_loc_upper_bound_needles <- 6L # original upper bound # We still have a 2nd column to check. So recurse and pass on the current # duplicate and match bounds to start the 2nd column with. ## Column 2, iteration 1 # midpoint of [3, 5] # value y==4 loc_mid_needles <- 4L loc_lower_duplicate_needles <- 4L loc_upper_duplicate_needles <- 4L loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # last column, so record matches # - this was location 4 in needles # - lower match in haystack is at loc 3 # - match size is 2 # Now handle LHS and RHS of needle midpoint lhs_loc_lower_bound_needles <- 3L # original lower bound lhs_loc_upper_bound_needles <- 3L # lower_duplicate-1 rhs_loc_lower_bound_needles <- 5L # upper_duplicate+1 rhs_loc_upper_bound_needles <- 5L # original upper bound ## Column 2, iteration 2 (using LHS bounds) # midpoint of [3,3] # value of y==3 loc_mid_needles <- 3L loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 3L # no match! no y==3 in haystack for x==2 # lower-match will always end up > upper-match in this case loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 2L # no LHS or RHS needle values to do, so we are done here ## Column 2, iteration 3 (using RHS bounds) # same as above, range of [5,5], value of y==5, which has no match in haystack ## Column 1, iteration 2 (LHS of first x needle) # Now we are done with the x needles from [3,5], so move on to the LHS and RHS # of that. Here we would do the LHS: # midpoint of [1,2] loc_mid_needles <- 1L # ... ## Column 1, iteration 3 (RHS of first x needle) # midpoint of [6,6] loc_mid_needles <- 6L # ... }\if{html}{\out{
}} In the real code, rather than comparing the double values of the columns directly, we replace each column with pseudo "joint ranks" computed between the i-th column of \code{needles} and the i-th column of \code{haystack}. It is approximately like doing \code{vec_rank(vec_c(needles$x, haystack$x), type = "dense")}, then splitting the resulting ranks back up into their corresponding needle/haystack columns. This keeps the recursion code simpler, because we only have to worry about comparing integers. } \subsection{Non-equi conditions and containers}{ At this point we can talk about non-equi conditions like \code{<} or \code{>=}. The general idea is pretty simple, and just builds on the above algorithm. For example, start with the \code{x} column from needles/haystack above: \if{html}{\out{
}}\preformatted{needles$x #> [1] 1 1 2 2 2 3 haystack$x #> [1] 1 1 2 2 3 }\if{html}{\out{
}} If we used a condition of \code{<=}, then we'd do everything the same as before: \itemize{ \item Midpoint in needles is location 3, value \code{x==2} \item Find lower/upper duplicates in needles, giving locations \verb{[3, 5]} \item Find lower/upper \emph{exact} match in haystack, giving locations \verb{[3, 4]} } At this point, we need to "adjust" the \code{haystack} match bounds to account for the condition. Since \code{haystack} is ordered, our "rule" for \code{<=} is to keep the lower match location the same, but extend the upper match location to the upper bound, so we end up with \verb{[3, 5]}. We know we can extend the upper match location because every haystack value after the exact match should be less than the needle. Then we just record the matches and continue on normally. This approach is really nice, because we only have to exactly match the \code{needle} in \code{haystack}. We don't have to compare each needle against every value in \code{haystack}, which would take a massive amount of time. However, it gets slightly more complex with data frames with multiple columns. Let's go back to our original \code{needles} and \code{haystack} data frames and apply the condition \code{<=} to each column. Here is another worked example, which shows a case where our "rule" falls apart on the second column. \if{html}{\out{
}}\preformatted{needles #> x y #> 1 1 1 #> 2 1 2 #> 3 2 3 #> 4 2 4 #> 5 2 5 #> 6 3 3 haystack #> x y #> 1 1 2 #> 2 1 3 #> 3 2 4 #> 4 2 4 #> 5 3 1 # `condition = c("<=", "<=")` ## Column 1, iteration 1 # x == 2 loc_mid_needles <- 3L loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 5L # finding exact matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # because haystack is ordered we know we can expand the upper bound automatically # to include everything past the match. i.e. needle of x==2 must be less than # the haystack value at loc 5, which we can check by seeing that it is x==3. loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 5L ## Column 2, iteration 1 # needles range of [3, 5] # y == 4 loc_mid_needles <- 4L loc_lower_duplicate_needles <- 4L loc_upper_duplicate_needles <- 4L # finding exact matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # lets try using our rule, which tells us we should be able to extend the upper # bound: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 5L # but the haystack value of y at location 5 is y==1, which is not less than y==4 # in the needles! looks like our rule failed us. }\if{html}{\out{
}} If you read through the above example, you'll see that the rule didn't work here. The problem is that while \code{haystack} is ordered (by \code{vec_order()}s standards), each column isn't ordered \emph{independently} of the others. Instead, each column is ordered within the "group" created by previous columns. Concretely, \code{haystack} here has an ordered \code{x} column, but if you look at \code{haystack$y} by itself, it isn't ordered (because of that 1 at the end). That is what causes the rule to fail. \if{html}{\out{
}}\preformatted{haystack #> x y #> 1 1 2 #> 2 1 3 #> 3 2 4 #> 4 2 4 #> 5 3 1 }\if{html}{\out{
}} To fix this, we need to create haystack "containers" where the values within each container are all \emph{totally} ordered. For \code{haystack} that would create 2 containers and look like: \if{html}{\out{
}}\preformatted{haystack[1:4,] #> # A tibble: 4 × 2 #> x y #> #> 1 1 2 #> 2 1 3 #> 3 2 4 #> 4 2 4 haystack[5,] #> # A tibble: 1 × 2 #> x y #> #> 1 3 1 }\if{html}{\out{
}} This is essentially what \code{computing_nesting_container_ids()} does. You can actually see these ids with the helper, \code{compute_nesting_container_info()}: \if{html}{\out{
}}\preformatted{haystack2 <- haystack # we really pass along the integer ranks, but in this case that is equivalent # to converting our double columns to integers haystack2$x <- as.integer(haystack2$x) haystack2$y <- as.integer(haystack2$y) info <- compute_nesting_container_info(haystack2, condition = c("<=", "<=")) # the ids are in the second slot. # container ids break haystack into [1, 4] and [5, 5]. info[[2]] #> [1] 0 0 0 0 1 }\if{html}{\out{
}} So the idea is that for each needle, we look in each haystack container and find all the matches, then we aggregate all of the matches once at the end. \code{df_locate_matches_with_containers()} has the job of iterating over the containers. Computing totally ordered containers can be expensive, but luckily it doesn't happen very often in normal usage. \itemize{ \item If there are all \code{==} conditions, we don't need containers (i.e. any equi join) \item If there is only 1 non-equi condition and no conditions after it, we don't need containers (i.e. most rolling joins) \item Otherwise the typical case where we need containers is if we have something like \verb{date >= lower, date <= upper}. Even so, the computation cost generally scales with the number of columns in \code{haystack} you compute containers with (here 2), and it only really slows down around 4 columns or so, which I haven't ever seen a real life example of. } } } vctrs/man/howto-faq-coercion-data-frame.Rd0000644000176200001440000003512514663361150020173 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{howto-faq-coercion-data-frame} \alias{howto-faq-coercion-data-frame} \title{FAQ - How to implement ptype2 and cast methods? (Data frames)} \description{ This guide provides a practical recipe for implementing \code{vec_ptype2()} and \code{vec_cast()} methods for coercions of data frame subclasses. Related topics: \itemize{ \item For an overview of the coercion mechanism in vctrs, see \code{\link[=theory-faq-coercion]{?theory-faq-coercion}}. \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. } Coercion of data frames occurs when different data frame classes are combined in some way. The two main methods of combination are currently row-binding with \code{\link[=vec_rbind]{vec_rbind()}} and col-binding with \code{\link[=vec_cbind]{vec_cbind()}} (which are in turn used by a number of dplyr and tidyr functions). These functions take multiple data frame inputs and automatically coerce them to their common type. vctrs is generally strict about the kind of automatic coercions that are performed when combining inputs. In the case of data frames we have decided to be a bit less strict for convenience. Instead of throwing an incompatible type error, we fall back to a base data frame or a tibble if we don’t know how to combine two data frame subclasses. It is still a good idea to specify the proper coercion behaviour for your data frame subclasses as soon as possible. We will see two examples in this guide. The first example is about a data frame subclass that has no particular attributes to manage. In the second example, we implement coercion methods for a tibble subclass that includes potentially incompatible attributes. \subsection{Roxygen workflow}{ To implement methods for generics, first import the generics in your namespace and redocument: \if{html}{\out{
}}\preformatted{#' @importFrom vctrs vec_ptype2 vec_cast NULL }\if{html}{\out{
}} Note that for each batches of methods that you add to your package, you need to export the methods and redocument immediately, even during development. Otherwise they won’t be in scope when you run unit tests e.g. with testthat. Implementing double dispatch methods is very similar to implementing regular S3 methods. In these examples we are using roxygen2 tags to register the methods, but you can also register the methods manually in your NAMESPACE file or lazily with \code{s3_register()}. } \subsection{Parent methods}{ Most of the common type determination should be performed by the parent class. In vctrs, double dispatch is implemented in such a way that you need to call the methods for the parent class manually. For \code{vec_ptype2()} this means you need to call \code{df_ptype2()} (for data frame subclasses) or \code{tib_ptype2()} (for tibble subclasses). Similarly, \code{df_cast()} and \code{tib_cast()} are the workhorses for \code{vec_cast()} methods of subtypes of \code{data.frame} and \code{tbl_df}. These functions take the union of the columns in \code{x} and \code{y}, and ensure shared columns have the same type. These functions are much less strict than \code{vec_ptype2()} and \code{vec_cast()} as they accept any subclass of data frame as input. They always return a \code{data.frame} or a \code{tbl_df}. You will probably want to write similar functions for your subclass to avoid repetition in your code. You may want to export them as well if you are expecting other people to derive from your class. } \subsection{A \code{data.table} example}{ This example is the actual implementation of vctrs coercion methods for \code{data.table}. This is a simple example because we don’t have to keep track of attributes for this class or manage incompatibilities. See the tibble section for a more complicated example. We first create the \code{dt_ptype2()} and \code{dt_cast()} helpers. They wrap around the parent methods \code{df_ptype2()} and \code{df_cast()}, and transform the common type or converted input to a data table. You may want to export these helpers if you expect other packages to derive from your data frame class. These helpers should always return data tables. To this end we use the conversion generic \code{as.data.table()}. Depending on the tools available for the particular class at hand, a constructor might be appropriate as well. \if{html}{\out{
}}\preformatted{dt_ptype2 <- function(x, y, ...) \{ as.data.table(df_ptype2(x, y, ...)) \} dt_cast <- function(x, to, ...) \{ as.data.table(df_cast(x, to, ...)) \} }\if{html}{\out{
}} We start with the self-self method: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.data.table.data.table <- function(x, y, ...) \{ dt_ptype2(x, y, ...) \} }\if{html}{\out{
}} Between a data frame and a data table, we consider the richer type to be data table. This decision is not based on the value coverage of each data structures, but on the idea that data tables have richer behaviour. Since data tables are the richer type, we call \code{dt_type2()} from the \code{vec_ptype2()} method. It always returns a data table, no matter the order of arguments: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.data.table.data.frame <- function(x, y, ...) \{ dt_ptype2(x, y, ...) \} #' @export vec_ptype2.data.frame.data.table <- function(x, y, ...) \{ dt_ptype2(x, y, ...) \} }\if{html}{\out{
}} The \code{vec_cast()} methods follow the same pattern, but note how the method for coercing to data frame uses \code{df_cast()} rather than \code{dt_cast()}. Also, please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents \code{to}, whereas the second class represents \code{x}. \if{html}{\out{
}}\preformatted{#' @export vec_cast.data.table.data.table <- function(x, to, ...) \{ dt_cast(x, to, ...) \} #' @export vec_cast.data.table.data.frame <- function(x, to, ...) \{ # `x` is a data.frame to be converted to a data.table dt_cast(x, to, ...) \} #' @export vec_cast.data.frame.data.table <- function(x, to, ...) \{ # `x` is a data.table to be converted to a data.frame df_cast(x, to, ...) \} }\if{html}{\out{
}} With these methods vctrs is now able to combine data tables with data frames: \if{html}{\out{
}}\preformatted{vec_cbind(data.frame(x = 1:3), data.table(y = "foo")) #> x y #> #> 1: 1 foo #> 2: 2 foo #> 3: 3 foo }\if{html}{\out{
}} } \subsection{A tibble example}{ In this example we implement coercion methods for a tibble subclass that carries a colour as a scalar metadata: \if{html}{\out{
}}\preformatted{# User constructor my_tibble <- function(colour = NULL, ...) \{ new_my_tibble(tibble::tibble(...), colour = colour) \} # Developer constructor new_my_tibble <- function(x, colour = NULL) \{ stopifnot(is.data.frame(x)) tibble::new_tibble( x, colour = colour, class = "my_tibble", nrow = nrow(x) ) \} df_colour <- function(x) \{ if (inherits(x, "my_tibble")) \{ attr(x, "colour") \} else \{ NULL \} \} #'@export print.my_tibble <- function(x, ...) \{ cat(sprintf("<\%s: \%s>\n", class(x)[[1]], df_colour(x))) cli::cat_line(format(x)[-1]) \} }\if{html}{\out{
}} This subclass is very simple. All it does is modify the header. \if{html}{\out{
}}\preformatted{red <- my_tibble("red", x = 1, y = 1:2) red #> #> x y #> #> 1 1 1 #> 2 1 2 red[2] #> #> y #> #> 1 1 #> 2 2 green <- my_tibble("green", z = TRUE) green #> #> z #> #> 1 TRUE }\if{html}{\out{
}} Combinations do not work properly out of the box, instead vctrs falls back to a bare tibble: \if{html}{\out{
}}\preformatted{vec_rbind(red, tibble::tibble(x = 10:12)) #> # A tibble: 5 x 2 #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} Instead of falling back to a data frame, we would like to return a \verb{} when combined with a data frame or a tibble. Because this subclass has more metadata than normal data frames (it has a colour), it is a \emph{supertype} of tibble and data frame, i.e. it is the richer type. This is similar to how a grouped tibble is a more general type than a tibble or a data frame. Conceptually, the latter are pinned to a single constant group. The coercion methods for data frames operate in two steps: \itemize{ \item They check for compatible subclass attributes. In our case the tibble colour has to be the same, or be undefined. \item They call their parent methods, in this case \code{\link[=tib_ptype2]{tib_ptype2()}} and \code{\link[=tib_cast]{tib_cast()}} because we have a subclass of tibble. This eventually calls the data frame methods \code{\link[=df_ptype2]{df_ptype2()}} and \code{\link[=tib_ptype2]{tib_ptype2()}} which match the columns and their types. } This process should usually be wrapped in two functions to avoid repetition. Consider exporting these if you expect your class to be derived by other subclasses. We first implement a helper to determine if two data frames have compatible colours. We use the \code{df_colour()} accessor which returns \code{NULL} when the data frame colour is undefined. \if{html}{\out{
}}\preformatted{has_compatible_colours <- function(x, y) \{ x_colour <- df_colour(x) \%||\% df_colour(y) y_colour <- df_colour(y) \%||\% x_colour identical(x_colour, y_colour) \} }\if{html}{\out{
}} Next we implement the coercion helpers. If the colours are not compatible, we call \code{stop_incompatible_cast()} or \code{stop_incompatible_type()}. These strict coercion semantics are justified because in this class colour is a \emph{data} attribute. If it were a non essential \emph{detail} attribute, like the timezone in a datetime, we would just standardise it to the value of the left-hand side. In simpler cases (like the data.table example), these methods do not need to take the arguments suffixed in \verb{_arg}. Here we do need to take these arguments so we can pass them to the \code{stop_} functions when we detect an incompatibility. They also should be passed to the parent methods. \if{html}{\out{
}}\preformatted{#' @export my_tib_cast <- function(x, to, ..., x_arg = "", to_arg = "") \{ out <- tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg) if (!has_compatible_colours(x, to)) \{ stop_incompatible_cast( x, to, x_arg = x_arg, to_arg = to_arg, details = "Can't combine colours." ) \} colour <- df_colour(x) \%||\% df_colour(to) new_my_tibble(out, colour = colour) \} #' @export my_tib_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") \{ out <- tib_ptype2(x, y, ..., x_arg = x_arg, y_arg = y_arg) if (!has_compatible_colours(x, y)) \{ stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, details = "Can't combine colours." ) \} colour <- df_colour(x) \%||\% df_colour(y) new_my_tibble(out, colour = colour) \} }\if{html}{\out{
}} Let’s now implement the coercion methods, starting with the self-self methods. \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_tibble.my_tibble <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_cast.my_tibble.my_tibble <- function(x, to, ...) \{ my_tib_cast(x, to, ...) \} }\if{html}{\out{
}} We can now combine compatible instances of our class! \if{html}{\out{
}}\preformatted{vec_rbind(red, red) #> #> x y #> #> 1 1 1 #> 2 1 2 #> 3 1 1 #> 4 1 2 vec_rbind(green, green) #> #> z #> #> 1 TRUE #> 2 TRUE vec_rbind(green, red) #> Error in `my_tib_ptype2()`: #> ! Can't combine `..1` and `..2` . #> Can't combine colours. }\if{html}{\out{
}} The methods for combining our class with tibbles follow the same pattern. For ptype2 we return our class in both cases because it is the richer type: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_tibble.tbl_df <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_ptype2.tbl_df.my_tibble <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} }\if{html}{\out{
}} For cast are careful about returning a tibble when casting to a tibble. Note the call to \code{vctrs::tib_cast()}: \if{html}{\out{
}}\preformatted{#' @export vec_cast.my_tibble.tbl_df <- function(x, to, ...) \{ my_tib_cast(x, to, ...) \} #' @export vec_cast.tbl_df.my_tibble <- function(x, to, ...) \{ tib_cast(x, to, ...) \} }\if{html}{\out{
}} From this point, we get correct combinations with tibbles: \if{html}{\out{
}}\preformatted{vec_rbind(red, tibble::tibble(x = 10:12)) #> #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} However we are not done yet. Because the coercion hierarchy is different from the class hierarchy, there is no inheritance of coercion methods. We’re not getting correct behaviour for data frames yet because we haven’t explicitly specified the methods for this class: \if{html}{\out{
}}\preformatted{vec_rbind(red, data.frame(x = 10:12)) #> # A tibble: 5 x 2 #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} Let’s finish up the boiler plate: \if{html}{\out{
}}\preformatted{#' @export vec_ptype2.my_tibble.data.frame <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_ptype2.data.frame.my_tibble <- function(x, y, ...) \{ my_tib_ptype2(x, y, ...) \} #' @export vec_cast.my_tibble.data.frame <- function(x, to, ...) \{ my_tib_cast(x, to, ...) \} #' @export vec_cast.data.frame.my_tibble <- function(x, to, ...) \{ df_cast(x, to, ...) \} }\if{html}{\out{
}} This completes the implementation: \if{html}{\out{
}}\preformatted{vec_rbind(red, data.frame(x = 10:12)) #> #> x y #> #> 1 1 1 #> 2 1 2 #> 3 10 NA #> 4 11 NA #> 5 12 NA }\if{html}{\out{
}} } } vctrs/man/vec_as_subscript.Rd0000644000176200001440000000400515056611175016021 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/subscript.R \name{vec_as_subscript} \alias{vec_as_subscript} \alias{vec_as_subscript2} \title{Convert to a base subscript type} \usage{ vec_as_subscript( i, ..., logical = c("cast", "error"), numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL, call = caller_env() ) vec_as_subscript2( i, ..., numeric = c("cast", "error"), character = c("cast", "error"), arg = NULL, call = caller_env() ) } \arguments{ \item{i}{An index vector to convert.} \item{...}{These dots are for future extensions and must be empty.} \item{logical, numeric, character}{How to handle logical, numeric, and character subscripts. If \code{"cast"} and the subscript is not one of the three base types (logical, integer or character), the subscript is \link[=vec_cast]{cast} to the relevant base type, e.g. factors are coerced to character. \code{NULL} is treated as an empty integer vector, and is thus coercible depending on the setting of \code{numeric}. Symbols are treated as character vectors and thus coercible depending on the setting of \code{character}. If \code{"error"}, the subscript type is disallowed and triggers an informative error.} \item{arg}{The argument name to be displayed in error messages.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Convert \code{i} to the base type expected by \code{\link[=vec_as_location]{vec_as_location()}} or \code{\link[=vec_as_location2]{vec_as_location2()}}. The values of the subscript type are not checked in any way (length, missingness, negative elements). } \keyword{internal} vctrs/man/vec_interleave.Rd0000644000176200001440000000666215075743736015503 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice-interleave.R \name{vec_interleave} \alias{vec_interleave} \title{Interleave many vectors into one vector} \usage{ vec_interleave( ..., .size = NULL, .ptype = NULL, .name_spec = NULL, .name_repair = c("minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet"), .error_call = current_env() ) } \arguments{ \item{...}{Vectors to interleave.} \item{.size}{The expected size of each vector. If not provided, computed automatically by \code{\link[=vec_size_common]{vec_size_common()}}. Each vector will be \link[=theory-faq-recycling]{recycled} to this size.} \item{.ptype}{The expected type of each vector. If not provided, computed automatically by \code{\link[=vec_ptype_common]{vec_ptype_common()}}. Each vector will be \link[=theory-faq-coercion]{cast} to this type.} \item{.name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item \code{"inner"}, in which case outer names are ignored, and inner names are used if they exist. Note that outer names may still be used to provide informative error messages. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} \item{.name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \code{vec_interleave()} combines multiple vectors together, much like \code{\link[=vec_c]{vec_c()}}, but does so in such a way that the elements of each vector are interleaved together. It is a more efficient equivalent to the following usage of \code{vec_c()}: \if{html}{\out{
}}\preformatted{vec_interleave(x, y) == vec_c(x[1], y[1], x[2], y[2], ..., x[n], y[n]) }\if{html}{\out{
}} } \section{Dependencies}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=list_combine]{list_combine()}} } } } \examples{ # The most common case is to interleave two vectors vec_interleave(1:3, 4:6) # But you aren't restricted to just two vec_interleave(1:3, 4:6, 7:9, 10:12) # You can also interleave data frames x <- data_frame(x = 1:2, y = c("a", "b")) y <- data_frame(x = 3:4, y = c("c", "d")) vec_interleave(x, y) # `.size` can be used to recycle size 1 elements before interleaving vec_interleave(1, 2, .size = 3) # `.ptype` can be used to enforce a particular type typeof(vec_interleave(1, 2, .ptype = integer())) } vctrs/man/missing.Rd0000644000176200001440000000362014315060307014126 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/missing.R \name{missing} \alias{missing} \alias{vec_detect_missing} \alias{vec_any_missing} \title{Missing values} \usage{ vec_detect_missing(x) vec_any_missing(x) } \arguments{ \item{x}{A vector} } \value{ \itemize{ \item \code{vec_detect_missing()} returns a logical vector the same size as \code{x}. \item \code{vec_any_missing()} returns a single \code{TRUE} or \code{FALSE}. } } \description{ \itemize{ \item \code{vec_detect_missing()} returns a logical vector the same size as \code{x}. For each element of \code{x}, it returns \code{TRUE} if the element is missing, and \code{FALSE} otherwise. \item \code{vec_any_missing()} returns a single \code{TRUE} or \code{FALSE} depending on whether or not \code{x} has \emph{any} missing values. } \subsection{Differences with \code{\link[=is.na]{is.na()}}}{ Data frame rows are only considered missing if every element in the row is missing. Similarly, \link[=new_rcrd]{record vector} elements are only considered missing if every field in the record is missing. Put another way, rows with \emph{any} missing values are considered \link[=vec_detect_complete]{incomplete}, but only rows with \emph{all} missing values are considered missing. List elements are only considered missing if they are \code{NULL}. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ x <- c(1, 2, NA, 4, NA) vec_detect_missing(x) vec_any_missing(x) # Data frames are iterated over rowwise, and only report a row as missing # if every element of that row is missing. If a row is only partially # missing, it is said to be incomplete, but not missing. y <- c("a", "b", NA, "d", "e") df <- data_frame(x = x, y = y) df$missing <- vec_detect_missing(df) df$incomplete <- !vec_detect_complete(df) df } \seealso{ \code{\link[=vec_detect_complete]{vec_detect_complete()}} } vctrs/man/vec_math.Rd0000644000176200001440000000403314276722575014264 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/numeric.R \name{vec_math} \alias{vec_math} \alias{vec_math_base} \title{Mathematical operations} \usage{ vec_math(.fn, .x, ...) vec_math_base(.fn, .x, ...) } \arguments{ \item{.fn}{A mathematical function from the base package, as a string.} \item{.x}{A vector.} \item{...}{Additional arguments passed to \code{.fn}.} } \description{ This generic provides a common dispatch mechanism for all regular unary mathematical functions. It is used as a common wrapper around many of the Summary group generics, the Math group generics, and a handful of other mathematical functions like \code{mean()} (but not \code{var()} or \code{sd()}). } \details{ \code{vec_math_base()} is provided as a convenience for writing methods. It calls the base \code{.fn} on the underlying \code{\link[=vec_data]{vec_data()}}. } \section{Included functions}{ \itemize{ \item From the \link{Summary} group generic: \code{prod()}, \code{sum()}, \code{any()}, \code{all()}. \item From the \link{Math} group generic: \code{abs()}, \code{sign()}, \code{sqrt()}, \code{ceiling()}, \code{floor()}, \code{trunc()}, \code{cummax()}, \code{cummin()}, \code{cumprod()}, \code{cumsum()}, \code{log()}, \code{log10()}, \code{log2()}, \code{log1p()}, \code{acos()}, \code{acosh()}, \code{asin()}, \code{asinh()}, \code{atan()}, \code{atanh()}, \code{exp()}, \code{expm1()}, \code{cos()}, \code{cosh()}, \code{cospi()}, \code{sin()}, \code{sinh()}, \code{sinpi()}, \code{tan()}, \code{tanh()}, \code{tanpi()}, \code{gamma()}, \code{lgamma()}, \code{digamma()}, \code{trigamma()}. \item Additional generics: \code{mean()}, \code{is.nan()}, \code{is.finite()}, \code{is.infinite()}. } Note that \code{median()} is currently not implemented, and \code{sd()} and \code{var()} are currently not generic and so do not support custom classes. } \examples{ x <- new_vctr(c(1, 2.5, 10)) x abs(x) sum(x) cumsum(x) } \seealso{ \code{\link[=vec_arith]{vec_arith()}} for the equivalent for the arithmetic infix operators. } \keyword{internal} vctrs/man/maybe_lossy_cast.Rd0000644000176200001440000000574314315060307016025 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/conditions.R \name{maybe_lossy_cast} \alias{maybe_lossy_cast} \title{Lossy cast error} \usage{ maybe_lossy_cast( result, x, to, lossy = NULL, locations = NULL, ..., loss_type = c("precision", "generality"), x_arg, to_arg, call = caller_env(), details = NULL, message = NULL, class = NULL, .deprecation = FALSE ) } \arguments{ \item{result}{The result of a potentially lossy cast.} \item{x}{Vectors to cast.} \item{to}{Type to cast to.} \item{lossy}{A logical vector indicating which elements of \code{result} were lossy. Can also be a single \code{TRUE}, but note that \code{locations} picks up locations from this vector by default. In this case, supply your own location vector, possibly empty.} \item{locations}{An optional integer vector giving the locations where \code{x} lost information.} \item{..., class}{Only use these fields when creating a subclass.} \item{loss_type}{The kind of lossy cast to be mentioned in error messages. Can be loss of precision (for instance from double to integer) or loss of generality (from character to factor).} \item{x_arg}{Argument name for \code{x}, used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{to_arg}{Argument name \code{to} used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{details}{Any additional human readable details.} \item{message}{An overriding message for the error. \code{details} and \code{message} are mutually exclusive, supplying both is an error.} \item{.deprecation}{If \code{TRUE}, the error is downgraded to a deprecation warning. This is useful for transitioning your class to a stricter conversion scheme. The warning advises your users to wrap their code with \code{allow_lossy_cast()}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} By default, lossy casts are an error. Use \code{allow_lossy_cast()} to silence these errors and continue with the partial results. In this case the lost values are typically set to \code{NA} or to a lower value resolution, depending on the type of cast. Lossy cast errors are thrown by \code{maybe_lossy_cast()}. Unlike functions prefixed with \code{stop_}, \code{maybe_lossy_cast()} usually returns a result. If a lossy cast is detected, it throws an error, unless it's been wrapped in \code{allow_lossy_cast()}. In that case, it returns the result silently. } \keyword{internal} vctrs/man/as-is.Rd0000644000176200001440000000060214276722575013510 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-asis.R \name{as-is} \alias{as-is} \alias{vec_ptype2.AsIs} \title{AsIs S3 class} \usage{ \method{vec_ptype2}{AsIs}(x, y, ..., x_arg = "", y_arg = "") } \description{ These functions help the base AsIs class fit into the vctrs type system by providing coercion and casting functions. } \keyword{internal} vctrs/man/vec_proxy_compare.Rd0000644000176200001440000000610614315060307016203 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compare.R \name{vec_proxy_compare} \alias{vec_proxy_compare} \alias{vec_proxy_order} \title{Comparison and order proxy} \usage{ vec_proxy_compare(x, ...) vec_proxy_order(x, ...) } \arguments{ \item{x}{A vector x.} \item{...}{These dots are for future extensions and must be empty.} } \value{ A 1d atomic vector or a data frame. } \description{ \code{vec_proxy_compare()} and \code{vec_proxy_order()} return proxy objects, i.e. an atomic vector or data frame of atomic vectors. For \code{\link[=vctr]{vctrs_vctr}} objects: \itemize{ \item \code{vec_proxy_compare()} determines the behavior of \code{<}, \code{>}, \code{>=} and \code{<=} (via \code{\link[=vec_compare]{vec_compare()}}); and \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}}, and \code{\link[=quantile]{quantile()}}. \item \code{vec_proxy_order()} determines the behavior of \code{order()} and \code{sort()} (via \code{xtfrm()}). } } \details{ The default method of \code{vec_proxy_compare()} assumes that all classes built on top of atomic vectors or records are comparable. Internally the default calls \code{\link[=vec_proxy_equal]{vec_proxy_equal()}}. If your class is not comparable, you will need to provide a \code{vec_proxy_compare()} method that throws an error. The behavior of \code{vec_proxy_order()} is identical to \code{vec_proxy_compare()}, with the exception of lists. Lists are not comparable, as comparing elements of different types is undefined. However, to allow ordering of data frames containing list-columns, the ordering proxy of a list is generated as an integer vector that can be used to order list elements by first appearance. If a class implements a \code{vec_proxy_compare()} method, it usually doesn't need to provide a \code{vec_proxy_order()} method, because the latter is implemented by forwarding to \code{vec_proxy_compare()} by default. Classes inheriting from list are an exception: due to the default \code{vec_proxy_order()} implementation, \code{vec_proxy_compare()} and \code{vec_proxy_order()} should be provided for such classes (with identical implementations) to avoid mismatches between comparison and sorting. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} called by default in \code{vec_proxy_compare()} \item \code{\link[=vec_proxy_compare]{vec_proxy_compare()}} called by default in \code{vec_proxy_order()} } } \section{Data frames}{ If the proxy for \code{x} is a data frame, the proxy function is automatically recursively applied on all columns as well. After applying the proxy recursively, if there are any data frame columns present in the proxy, then they are unpacked. Finally, if the resulting data frame only has a single column, then it is unwrapped and a vector is returned as the proxy. } \examples{ # Lists are not comparable x <- list(1:2, 1, 1:2, 3) try(vec_compare(x, x)) # But lists are orderable by first appearance to allow for # ordering data frames with list-cols df <- new_data_frame(list(x = x)) vec_sort(df) } \keyword{internal} vctrs/man/figures/0000755000176200001440000000000015157322654013644 5ustar liggesusersvctrs/man/figures/cast.png0000644000176200001440000003567513652627531015324 0ustar liggesusersPNG  IHDRTTSsRGB pHYs+ iTXtXML:com.adobe.xmp 2 5 1 2 Ү$9KIDATxyM.%".[I"RT(6$Z e -(囥URٲ!!m,Bз|{?9{q95y{f޳! ?WdX" W;E@ъW_m@D1D+^b~!Qx[@DGUl! WE@ъW_m@D1D+^b~!Qx[@DGUl! W~z…7gΜ &r?*=zۤ=q~%τޡykAط Wdm֮]q׮];o޼a  fΝ;wyK,9֭[Z5ΝrJL>c\ ,[Hw IÆ ر#㏦M6i0`@z!t2nܸ-[nz̘1i$)# O*>_:u*e|>}o~!%Jl޼w$eWm ,YǏڵg5GYr 4~ҥVzwرc}~< ЩSM6͚5M6oFڵQ% On! w I nΝׯ}͟?׮]ӦM$-I1)r!Fboo_fM2)T֭[gΜɂy*! WS&rСCY+^x…CGpBG}tƍMÛ޺73%c_nML2_НㄊsE"xB x|!)bLScꪫg1{X*馛q7bd S4(_5kH\rWv+Y/HE[o붮};vmNJ}O4|G`ӦM%K$~x ѭ%ߟ?8$EmB-=z:aOs0֪$R@@b )Z*Uq>J~{!$GUR!b=ЋbԵJ*@@̿zA@VIC/B :S*1p%-!Cܹ3X $cɒ%:ur-?0娃=ae…ո@E*2s3,[_K.}r!G}?A*U:,viXgҥ7onذ!qCtbݶmۂ lN>SO-R!SҴiS%\ҡCvj*Iȸ2(V=>d5kb PBק?&/C~ b~۷k׮X"Ae WKc=6֭[GW__hıݻW_}4`~|^a~wOIB&ʗNg2B+V0 3p}AL^6 9?FO233_wu29G7ָ[֭l28Hq' $ߘLuA1X}.D^yD? 3AwF O:?7WqhyAƍQ `bge?ߘI a8brqe?qM' f̘a=?CFxdxϢ{c g}QG[x& z'x" "CW5uT8r _m v:q{^K.]t$1aE!ugA(F7JIEݾtC۷ogbč7os;&yG!/q{s6Cʕ+e~l 5j`+fM>l@Oc.QL/qY>O;w.zn裏"f*Ȇ]^#'fҼm@(Z|C/w}Bˆ@DМ?b 1ڀ(bV-jB #Z* " h{QիW3Ƌ9hРD/b|!R[qO?tڽ ?3PJvl9;ѐ;%{Üf|8BÁiӦ@ 0"~ #zpK/弐11#RG>`Ǿ`<!1(<,q~f4waGNmP:={b>|8lرcU_/G b@իW?~5Chb3h^~RЕW^i)# B{ M rao f9τ@@1c:&|,| Ӓ3jϙ &3jt~@B~9d0ztAtFXg \BPqCGAFx0s5 ]{cZ)ゐw}jٲ%6,D"Xv#Uݙ4hyӧon=Ps e{L3sL. U)7k N1Ӝ?ĕBьMx9#ưJ QbUEyG4h5k9s d9)pyJ:^}Xf'8ƷQFr̉,t]WNTDI n5o/3cߣG.v^fiK܁[Gy$m[:|݇q ¹LJ삜 ׎ߴߟM;>Pr>ϝ/* q6JK1|&M*!gΝbA0^y@b`"{0?ٱr=B៵ &py ?v}N]6Rl[JO6,s. s`H VU s/XZ]%Qb:di\M8c=5#E$9WvmU^@7QkY& }6Ӝ?g5xmUq¡`7|!q)֭6XbqBD @l8%h0hz_~2a! LYg1Ig26iҤjժ1[V[b@+ENl;>q~[L1VnV}M-8ioF9T|9s7's2$/ܹ37_{ v4~Ls]C : _q8J>RY8BU^xVZl#|b3X#B>gW/?qԩlFoɃpt$gX9́r [.Bޓ|[ntڻw7xÓ h PbS:sG?|$RTo&wCRiӦ8xWȟ?F]k*YP_tޝN( 1G,qg&*}8_|p1YƍIwOSc9 Q4{5^{-]Oe05ȟMR 7j]_Z^:FLG/";[T?E JڶmyguV58GE'x"fP7xcab)dM ;Egg3fmh.ڪ96ZJ+ǏpTQTtj87ie/З_~AVVL?^zI8?hɿ+ G#/^b!v%KШRfX`gÅp¨ ?ZO>=*0s JK" ZǎID_~1Q#=իWF)Rvꫯ|a.;^_b;;8l߾I>c|ߩP ҝ\MEs` eX.ƃ:IY&eTaYQn5`1wd*" _*c{̵i.bCE0 kB}Du7 TlE~=L3 %oxNٲeZwq!H1 j'xwއ[l@P&"‹~ٯ[vq2%?;""h ?DL`'C}` R(J L/?<&bW1\X)GX֭e˖ 3תU`뮳7s<OI > }{xvȐ!#z{v>rGV1g7fAwsμ#?&^C ΐ<2(w¸ݵkW4tZSN[lXI!FgJUF>Y;WLaVBeGGoίN8]ilH&,~!" 0n?~4tץKE/ .>c7nv\P=&^fΜ8b!W2q 0C ,]@Iep=EZ&MB+>oذ᭷ڵk W|^IjC%KZSLRm0&iT? LZO9SO۰Ƿp{(i'F?b4 @xy=>%1N@DvLH]vYiX1s1%f‡_|nqOccO/"{o 80]ʰGq^t琁cSp(|80'xۣrEa9x8uL̐yؓntu1;An X:xJ^̂6visA<DǤ[戲 mgqC@CSNEVbۑk49kmn͌Ƈu], ڙ &pDs%)NbR'*d_=jNN%cQL]|'(0pa~R2H׷o_:ܪr#s[ċ"2s=}9FȨ*_۞DX! ǜ9s69T712[xfUB ]B>g s4zmڴ)coVn+096׳fN*?餓VK/GU -#cs7d訣P 0R9@8…+WMcr9W=fO?0Q0 $Qq%J>/IsE裏.ж<|G=DDs=G _VRG TK}m#ur=?;?\/ 0?'9K;zk7Q.]Xhٲ%?iҤP%2@HkÃ܀#y7o^A̱+;<0swx9s&JriX"W\2NJ#ؚUm槉Ϣ^*y(3 D{X(Q w1v$ԧ"EH2ebgʮ¦@2~Z|y8>7U ` z˫f@N2?&.ex̕XX%o߾=PD cƌa1,t"r K2 }yUȱ|5`Gb5sVL!(XDȥ o TC\UEG+ v[.{ɋӛ]L;\bꫯ+Iq*^'LJ.&j*BK/r?Spo-]Gcn]6Wٱ]]ImӦMk|ڸq+)`.`wsQjA9??eU1jp5hs'{l/4:Xbl6 HjdI9..+y٣@tڻ 2uqL2izz"xZ֜8ulcz?C5]C! /0bGB}#ל lM'ra% x0Y 䏅:\RAis AԴKx cuhY|' E2RɚU2kgd+vOWaA:AVjjKgT?cTS+gi?'xg۷TQHGs'K1QGy+ GsFsښHŊYi{>4`(G @bV@:D1X+'!(#C@I 1CrB ϙ3+T b9*RuЦuk|'so38?qTp1??hРoذaÆJ,ט)7)=7ons,W9\G XLvBo׮]L 7&~m֬YXˠP1qSD5=N;\H^d4@߹s' +K,iܸ1EjTaqOΙ0c.Ŧ7FRqBt> H36fL8>L/Ias֦d>#CI磏>ӯZJ r֝?9B0 ۠AAseP͋ŋ&;)Y$ͩggB;w\,)\ZT\l׮]oD*TeZ85M"b9.M֩SN.ꔦtR[c^y3Qkܺu+ի#1OΝ'蠸O>ĨօRUW]EFQ|T]joJMk,|]R7n ݻwǐ.7ܱcG,K+4d U \$$R!jzyR(Cs2MaiE@G?;ZKLf֪U$qDƏ?F?B Xm|!Ʊl '`?F3biMMvGi^_y̔>G.Xh`3s߆aD sC0iS$5x`fٸ}s1nu|B7?p,_y5З*U0j^bƠz嗓 @M41 ЙVvy (O)MP/Ttz.bsGn&>EA4P|~~̛ho 4$1SM$Q(;%w 5UR%3d~pԨQÄ10x'db;shH"痻@< H!>  m62&n ^ڤ@v/3 yj]^eg Α%R0Z 1DB 7ߴE^njv|؋M~5IkLyI<{=gRpO^3'ƃ}ex7s?c˗/X؀^~#Q׳>*2əv:ANݺu.[bǵtL'bŭj_:J(a qXy'Iv͚5oo`d1pFcPot7lK<50H֘LYl"IO;[;bW\T̟JN1pc"C1f:?yM|Et*4¦c_hт7B *tZp5tHtfNd 2s'.E͒ѣf4غC0VgIgxhm٩$19S8I Bm޸k8 3nwf9^ \,J!C k0IAZ$|fttdǃ,LYL13w2KXL>TM+&Iyp Y?Byl{NiEqYoI< L@lje_#sG”~@;]y9L~<ӻwo> f ̢_E5'esoFϞ=Am۲b"#+ RF~Ybdke$<32K|yb|M5RI00h̙Dz!Zn >,"Ebbzeۮ +`*7mv`pMSgD̓U˖-NDZ[uĨ|Ex 6f9xm'y03&8vX4&{Aج%e$,y\q5LV5QFFCK̆3G4(v猛Udm1|ɦ]"22_f Q2wK}PkDѳS̃P$Zɀ`Fa2yX3HRQ]veNF$b$jW_}J0y~.ʙX r+, SO\WMv&f2q6#SK"#>4Ob}.O{ C+J̉7&:`&t9nk4RgD̓ˀHm2--1C͞&"! \G mL[V2|׌!Bx>^*P3(8JA"JEBu2_s%(D[y ! Pe!'b~FZrJHxW϶9sc}YJ_2EߘŒIA ̑9#TH# s4MDFBT<{y9 JeDiڴ) Myq\p|[r,"]t9蠃L7x0;ލ)@ &e~Wsk5~1T%PbŊmڴ a*2<K/ 7SFX}FC2& kqlق ~3<3r^J*X|_!Xb~ \VŋsemDF;R3 le0!DM>&[ouҤI|| 70w+22ѢE Eڴ"J<}>PY̚5 z"&cCB j/QB #P*?*@@JV@<PbT(! 揇@Hs̙0aBA@x.\q qI"~7C I ҧ\A O8_~)I=8՗q qI"~g:utW0|l޼y0*{){ʔ)f%kΛ7/+Y+S!"i0?@ްaC͚5O;4۷z3_[.0kk.K ,1|[֨Q~;o_~A Vj?IsbI&1ĥ+W΂ӧs1+W&P_|q's1IU@^֧sεk6cǎ?+VVd ?ZyF >FXE{\`,ѷo_cdɒݺu;}ƽ={/&'OFM6(9fYu@ իW0(E-ZDg$.J{wM|lRrHiϸkCD YjBpdzj$gHhHx%KR1gCfсC6"'Eʗ-[F:uЯh *T`SCnnL1 iR0f>? ?k&;Dtk:u*3o#$B(|NucɟFm65d C@Jb''01sڴi/lց82>p>| srm@ZHC+&&휆ؒN919rsy(>]L_Š!TĸEuDaÆaߪZlɌ_\ti޽Y7Y\tE<̜i0l$~ 0+6ì+PjP 0%a}f?9@ ƌe*V4X#AP*|F %m?41Gemz!Z)@÷1c#|ur/+V@Dꩧ̔A-'LYq6ԗ5k 4ӗ]FwGcט]E9 '۱c5W:> 30K[fnT̢a׭[ǀo֢qml9 ve.(D1Ud?/0?r>܎ ?0eϔ)SAOI^{m2u`Y ٞc-w\p )Jȋ OHA_\99R>B@w?% B@@JiB@b&)G@>JQb&)G[3:QàIENDB`vctrs/man/figures/vec-count-deps.png0000644000176200001440000004331614276722575017224 0ustar liggesusersPNG  IHDRbKGD pHYs B(xtIME lj IDATxwXT (E4 "("6ƄDKMQ؍MĨ` **X@ĀB& 3?+Ig̙s^5}81B!4 B@!P%BB!T@ !*BH&I! ]ͫWp P0kE޽1m4(((P@i.c!]IJJ )) sss H%$$@QQo߆*:u*q9HKKS@ #FСCg !cKOų0c DEEQ0J R ށ,***(P%BB!T@ !*ݻ(//=t*SN!&&ߏ?^\5w\SJHuVzذa.^333"##qyl۶GPB= ŋE---,[7xO> "00BpM̞=[۷c݈;-[uuu3HLL9#GG:_^`mm7obҤIu.Z(%%eee@KK ֆ]]]hkkJJJt (!]!q!44wFHHqǥK[nbŊr3gNm[~pttѣ>>>|2PlXd `ذaHOOÁ4ˡ^c֥gϞRRRb?b hѯ_?DEEEEE(..FQQ QXX|˗}sssE={W^OEEP%Ś5kwww(++1SSSxzz<==addJ2]AB<JJJD)l@ z\^^ 991p8%%k.򐗗^ן˗tٳgz*PRRӃ%,,,DׯMbOyzzb…@XX 0331"###b޽Xp!á5j6mڄDѣppp )) )))ƕ+W0sEFFqqqӧЧOXZZ }c-Aƨx1c\|)))@~`eec%0JH%$|||pM m6@UU\..^iӦaʕO?$8[_5 %%Z `ff3f PSSCuu58NQۏWZooo0ưk.ZZZ0aك~!!!HKKɓۭաAx IIIdffBQQ666ӧOR;v`ɒ%Յ Ə:P%wD{ fff&deeaee[[[\+WD߾})`@bUg]1{lфiiiGhh(,X `ĉ?~<)A;]éƒ%999ڊؠ_~56(_g؅w' XZZbԩ6m%4%Wff&"""p-DEE᯿Buu5!C>= Фl 8::v½{?ׯ|||C[[FǸu"""TƍƍaggUU&\I HUTT@^^KÁ7 )) AAAؽ{7V\c?G9.\ʄB!puܼyn˗/ahh''' 6 ÇG~Z3f //ϟC#F#k׮noƑ#Gp1`ʔ)Xp!)1P6>}WիG^^,,,GGG899W^vzz:jXZZv |>}O<2dɒTRRĉ!''G ѣG駟/@^^ .Ă X)JG ݻp.^8puumbA߿?rssiN ك |/`jjJJHqe?x1c֖Q B9s7oFll,OUV_~*O||<Ο? . ** ***pwwMMMM nʕ+q L<6l!!66W9ǏcGnn.~7T}0h 9sf“'OoNNNt?FB+WHJJB>}l2Pp8څMݽ{N: ٳ'.ܮ̙3/K[ ,I8r>OQT4 (@T@I ޽{a`޽9r$]]\EEN8///য়~¤IƍXt)OBZ-[pssLuN(ڵk ???"""iiiؼy3LLL(P2 @TT,--qa Pc;233w^ddd~СC)H_X|9ϟOOOS`#,\X~=O >w 3_|X:t h' qxyy 4BWWDHdii7ooŗ_~ 777deeQ`V\\-[Ǐ6߿|tθt hرc8z(^~Q`!bرcXt)&L` +WFGaԩBHp8|Wظq#O_~,6p5̝;\.W\3,+V2|}}Q\\  @>իW^^^Iyg ,Çcv-ϟ?ǬYӧOcܸqBH5k0sLHIIav~!!!0`deeOœ*&M?k׮Ν;) T@;/֭[S>Chh((0V3~x:t8x  .\14=˗q L4IigFQQLgSm?k׮E^7jLxxxQQQ BԢEPTT9s@UUnnn*m/^zR,]哒0f <<DBHXj [nʊҊ远:u %%%˗{xtt4`eeP*vyf7^^^ɡPm;;wP(Blܸ/=ݻwq!88< p8HKKS0Hp88x tuu1n8SPZ+֌*z^RRSNÇ!))X5 &MO?.~#GDYYe455q!PHΆq&IF@߲w^HKKxɓڵk ooo߿'QRR߿~r\ <'i8{,cF[W\\Ç* !!@O>?ORy扽:c sΥ 6cmmǏ *_uSBB\.6668}41e['ĉ!'''vD0|p i3111Err2 hDOIIIp\Ӹw(HAҘ={vdddGǡH ~ $"ȑ#!!!P|W1beih 2OOjjͥKh 6lnܸH*`nn?2.T&v҄ pxzzN()c[^^3g`ƌؿ?ἣ>?~||ӧ}6x<B!`lleZUUsa i H^`_a~*vcP^]Ԃ:zԩ`C\\Ҿ2v؁Dx.....PQQ=ĶcܸqPVV/D{zzb˖-B=D<}4,,,@BbȐ!(**P(ĸqL|^Zl'Odrrr,&&edd0e떖LGG%$$v٥KXrr2bc1fϞͶlƎ+CII07o۱cG.n2{ gϏ]p 6L_?~|x<̎?.zmlƌ5OHH` Ă5ƫbΙ=3|ήQˊ]qkr2Ƙvl֭,66~e1@~6qDsڮ6#Mٳ';tM'}Owo17n@FFx<?~\Ϟ= I&&nHHH[n59wǏYfa̙֚[\ۿPPP3gn/ھy?z̜9b^^^b?iŚ5kwwwdٌ1S4|722Bzzzs8ɉ+**2{X Ըϯ1Ղ A k.]T,2>guBEEakk ___XZZgϞd0;dL+w<{ip1 :'OGӏiPPP6k IUUU\}Kgu>٨FPP]~ԨQr QQQGf?fϞ-xaѢEipŭߨQp5S㟵5 f...L(9oԶߖcllٲe5^j#??x;yC9yB (itpjkw^dgg7xPs,X.0n6ݻp8ܿtԙ#IyB(O[X,5k-Q<8`Q$  {g}uuu|I  '':::mHVV;fEEEO?YfV6~~~PPPMڑ@~~>VX!vжGGV;[;Bdd$^zաgqq1n:u甆ѣۮU@*l޼̓fKII%? &4۷/~F֐]]]$&&‚" @FFͥ`wb 2Yx}+I'Ptttwr`۶m\RP 9 h~~>4 c 9fΜ{>Mܶmo .!:4>O!T@;& Ф D4)vލ[n Pxadd M555Œ%Kg:*G}^zQ H-5iBc;w (!GkMXJJJذakdffRB'Mh?Xz5u !T@;j\~BA r{n9rQQQ1PXB!n߾$ iIkС>}:/^L?ڱȠ>٤ Me֯_OIH+pرc 3g sqygc\rcƌ`eeEJjرc3r ЖQPPSLA߾}) ]c 066yyy%8::Ν;u9?~Lj$ix*]̛I:zB3nnnԩNsEtt4j&##___ @[wp(]Dg4>Xp!JEԩ u^Ur?@WW::'ݘ7o)څnΝ;@'R0:e˖ŋHHHT}8qK.EAAx<^mmmNB=z?#zNYYt-@ƠA uIח#@ 'Oy扊4>C*4m[wŵkKw2SL˗/qƍN/;uB322```@Mꔖѡ֖C#ж3dĉunHt`۶mv[[:O㠤>z=ܜ'vs88q"B!( c 9fΜigzCKK ׯ_nj3 !!!z^JJ (iS8Z,͛WcF:֜4'Ad &ZqN0p!2 ƣmHJMt4LP^]{xUQܽ]uq::m"P9xciI2r1=hҡ(-.Wܣ1`<tDR8_5Fl9BIv,ף)~eus V]]bhhhP0:m۶oӧO[FiU9UYv[Ǥ$xxx 77%%%]~s1HFы^lh?BV]/?@x󖎁xiILX8]'qqq9rd*emMF&EQd`GT! h+p84iLMM믿ҎVX!C`„  B;XZ I&!**یz:7&ܽ{AhgCA4iBwp].]D!-S-WT@ہP(ЛI+ F3:u 111"33;w.H+* //?#"""~a͚5GϞ=) ʹuV?lذ/^wIwNhѼ1uTDDD 88ΛڒwߡV= ŋE---,[LL\\\\\{{Fͷ-[ =z޽{mprrƎ4@bb" O?}4,,,@f xdÆ 8rLLLDիXr%;e5CCC1~x7EYYY2d  1n8!$$'O… 1cF/n=j{ڵ2dTUU駟˖-w@GG6mݠy%.Z4i7YYYlΝdEEEVPPٞ={ڤJ>sf/. cÇ>|󙝝;{,+..fzzzoa^bfWnXZZ2RRRؙ3gĶcFFFڵkL [XcBUUU1CCCϪc%&&2yyyv%̬ؾ}6c%$$0yyy\Az0f}JaSΙ(np٦c}xI&''bbbXFFspp`[lۆϏ͞=mٲ;1XQQSRRb)))1͛vQﺋ[qmϟ?~ϟ3--z׏1Ǝ?^|x<svvfǏoV^5Sͫsa7ScER/ÇS0Z?377 (cyzzݻwJϊءC.g֖-Z]x0@ФbiiY+ĵc| e˗/gqqq>O]]edd7]xlҤIj1Μ9ŮϪU۽6cMÓ'O2;;;㠠 *6 GYYݻ7SSSчgWf<ikkz١CةSؔ)SXii)STTwc,;;CC{3i;CXf eee0`jj OOO*###78y޽6`Ϟ=6m;Cɓ222G544׬@EE`9od;K/ekJr8ɉ+**Xll Ȁ磲R̂ ASc/;;%z @Ьj(Z"hpjt"cFOaoozȓ&xzz"??Xt) III033䠣aÆ{GXXXφFnn.6m۷nnnHHH^!##III5j\DTTTѣppphroXZZVoc4㑝jnz-N}f?fϞ-:FFFXh|||\oq\o`aa )))TxW_}%:Eo߾0æLRSSk-smrYttt~}hذavG={2cccc7n`:::L^^'O6VVVΝ;tԘ);v,+,,͛73~gc;v` LUUӔ|}}k/88.ܦ_SԩSԔ2MMMrrrꍭdZZZ,;; ֮]+zFmh=jϏ>|:uM:U +//gVVVlݝ 66;TS򊎁v2Bݽ{mܸqe&--$%%ْ%KثWD888ٳgkJuVVV:ߓɄB'|tttjAFii)+..x=o6c>Lܱcǘyf1mڜw`„Babۘc1zjlٲFT}ضA˗/ozוSM+*Tvv6ۿ?ۼy3Fc?iiivN>deevJꖓ˗/9KJJj{۲75-Zۗ ::˩7Ź)yU_:0̛7QQQ믿`ccӭ㑔SDj^AAApzE ikk޽{PPPحb WWVmc޽nġκ~r 455[,v<;HNNLK.u]VeTy*]̵k>}`]f$z-XXX@]]:t[YYYt?޽{cС066*]Hyy9v >. kkkxyyQ`"=zTCSSBPDGGnp: /8< `Hr%OFdLaС].t 5'dcȐ!޳~xx`MAlhLZk? j<|111B޽aooO (iMu>˗KK}p;}\(jⷸK侈Bɝ;wp )S1*!&&4woSNAVV&&GJEU(,kl ygrrr;v,,--]MܼyBbw0#;6-xÇq…Y[[cTEM8ԍO%RW8,, /l22qqqpqq Uo?~<ƍeee -[ =z޽{OTUU(:866C AQQB!ƍ0`ɢ6/\3fx=ˆ pQ^uU\͎aUU=z`l޼ nݺjJիW8uoߎ}A~(8F~G?l555aȑHMMɓqΝ3<<-Nχc֫W/X[[͛4iR6)))x1RSSEf}r"999JB  33eee۷y>iiiTtB!T@I #Gؽ{7BBB64.]\.VX.9s4+++ z2O>D@@`֭_ؼy31zh._ ???;`mm?>(++c̙8p֭[gbƍgϞRRRu~7wwwDDD_@ WAhh(?~BHJJoRSSϟ@ @=jP---Jk֬ݡ LMM V###7hLQQŢq@ qvw Ȁ磲R A^_^,%㐔L+p@ZZ pjϺRrBb0CIDATXԿ<!!!PUUEϞ=1l0уP%- .D@@nnn {… 555 <6㑝 ^첣F¦Mccc=z׬Y333={6"##!)) aѢEc`ii$$$ʪV{)Sf?m>pm$$$劝X^^<{ )))BVV*++!++ ???駟RH_WA߾}acc000m`cc^z>@ZZpE|WPQQӑ`xjYF4!C믿!j*[믿b߾}6m$zk„ gϞZm -- H Ӄ7.]ֹt`󔖖86ի߻w999ǸqdXBT< iitCnߎGĂc*v9@Pk) BzOVVtuuE͖,Y'N*6سg^%njCm[~~>444عsg 777l۶ O?'NSdijLh|Q}RP(DYYj\Kyǃ@UUpss B@#!!Q(JFFFlӫxϞ=bGycn \_k׮AMM u0b7qwVSAA#G}-gggCVVRRR4TUU;D())Aee%x<x<a``P2Xǣ\1www :T\\\233eee(++CII "Fۓn O5 fcgr8055˗/QQQQ Ǐݻ@ =#F#::Ң*)) X[[(d==lmmkL8TWWڭs[n޽{o R$Ɛ< // c8p !*III=(ԁ1*Zg؈vaWVV.244Fo344P($zQc9www8::BZZ>LMMajjJMBk D D~Ѭ +.A -l@+ KVp+mfTt թ$&L(TUU?YRҷuww^,5wgcۋ0~qu 9S'1Lbe~P24&.nfp{-dn8>xHΉE.,~m(m%yf;~*(&"PPPPP@@@@@@@@@@,eDyL7"IENDB`vctrs/man/figures/lifecycle-questioning.svg0000644000176200001440000000171413505146267020671 0ustar liggesuserslifecyclelifecyclequestioningquestioning vctrs/man/figures/lifecycle-stable.svg0000644000176200001440000000167413505146267017603 0ustar liggesuserslifecyclelifecyclestablestable vctrs/man/figures/lifecycle-experimental.svg0000644000176200001440000000171613505146267021023 0ustar liggesuserslifecyclelifecycleexperimentalexperimental vctrs/man/figures/vec-count-deps.svg0000644000176200001440000002460114276722575017233 0ustar liggesusers Produced by OmniGraffle 6.6.2 2020-05-22 13:14:22 +0000Canvas 1Layer 1vec_count()vec_proxy_equal()vec_slice()vec_order()vec_proxy()vec_restore()vec_proxy_compare() vctrs/man/figures/lifecycle-deprecated.svg0000644000176200001440000000171213505146267020422 0ustar liggesuserslifecyclelifecycledeprecateddeprecated vctrs/man/figures/lifecycle-superseded.svg0000644000176200001440000000171314276722575020476 0ustar liggesusers lifecyclelifecyclesupersededsuperseded vctrs/man/figures/logo.png0000644000176200001440000020176414402367166015323 0ustar liggesusersPNG  IHDRޫhgAMA a cHRMz&u0`:pQ<bKGDtIME%*IDATxgdv ε#Y{G")H4"E(C2=Ù^3gzj25"&ER|GP(+ ;ܸ*P*ʪȈks[{*4_EJ$!?;}[vLزOn$Hu~)GA{]Oo'4N}2 %ULKOhAUGpA{[mQ'/-+H/K) :#Z=3 <| oI)! '۝>- 3f D >R$'P[ EA3z2f '@,SB 8ezKHd ſ`JBpS޲M3`7% "e>jY_,pӿ;}[vbk+ x?B<uB\.^OJɥ3~/Ŗm`[߅6qO@D ZKFR"o@⟃wRͷOo|w$C7]oϰh7+!G2Z 0,1n޶}RxU+;6IxR4m]߀ȥs߻ӷsk[ EaЅA=$ x >ǵ푒 &׃Wmǁ! ꫗Nߚϝm|m?C!G  qx[>Q|3ׁm|}xeg˸1x_ElA|m [d \[} >B~KP{|sF۞[,bCxM+g 6mw?co om mC?( ɶv8oh7kF6{orUw@;n~-?m ?: I=b;X?ʪ ƻ ^@u_Q.g7;}+?߄{gj.Bʧ<^ gqxx7,Ees7 ~ x/|NϜm|B@_G {]$,ow oS\}ĩ7@޼m ϩJ!mܶ{z?7y-x+6 o= -g[_ǚJH)DoGB2/u3rm&6'ܵBB &R/_;jxkuu8! 3 o踚;Y o[ o$5ϑ6BJ*6k'O-_D([DormھA؊z.RoMo \+ 5r]Y킷ymxӄE`ʶ=^SHCnxo@~L«W!ގux#P@ǁT> 79)ȿ}F Vock; ?eO /$ \vW6 o;8^/k鄷""Wo~AB\h@ܳ-^o!k?vngǁKo~@9PVuwwcgPt7bxn3MsQA( {zxrc~Msm'_c7? n/F%Dn[ q5}Bx;e![F@9Ov.0| OB8HDZ \"!a穟w5}"ޛvdžn`nV Pjys~H )!/ oY =޶&@y{mZnۨud>_./6<]?+qxCL<68 *_-4 \[ 7;K5Gm ~Z? fU;$H-FvY l WӻIxΝlρ*|v!/GPq?BXOF}ul-Yn5("LY׳G~)|% _M`F)xZmsew_`xm _9]yE>3H~~۴] [kn3m?~ϐl񮾪Mx1) U}*ސ,p6o;m]޶}nnMofŻxQ7^ I?RZ-^31\hz}7B4Td\ ގ?&m2].[/ 2ȑ7X-U;3&vŵtg4%;ܥŻ2Á'VfA ෥-^6{=x۠W5Z[5bxWv۴6.[͗R~T5?ZmB~ y׍bX3[ގl񷀗Kdw* p/R=Af;8׷;]<~>ϒ-jV#;?;r0JxWW`%U`{o/QA8ƻVXf཮I 7禮2v+Ν%N0o^V+7+6j7[B,=@3ch`x =պ?Ǯ d´|7_Gs>Vm>D{$OH&6O /\OO-^vۯ؂MA G&3Зq}4.):T';6CڦF_~A"z=]x56w5lۺk/rx /_`x^hDv4tʦ+ %BRީ)(;Ov-o>ԯq_k:g?ajmg8zvs;ᕷ^6 &M{ (;nKkE݄L ݎY14c&h8>~tǏ7gG⿮iU;o7}7p6 Zl޶FiA>l܆§$oH~[~ֺBo:h!zSZJ$ ?0=[ia<}#/#!Nx@"?~ߚvXIx۶e_oLz[u]gzixmmj cChih ga{׽@ %]{\,9O|P_ ,;m O _Fѭo~;C$Wl ov.ʢ57gEE&p]x;;j k0ɄEowRN"aQkH)ф@Jm=Ju.\ͱ} CWNoA_\18u._ytM}5[@OHkusrMo⚊ ?)xDmﭳBuLt:R }*Pcr !x,V44n$3='^suO/=_.#!<Hg V[W@5uWZ6k7 x;A-Ag4ͰBC`7,,W,\q `l0`)WhM|[4Ĉ}GXf/>T7']4H7DwMu86ږ:- c=Y5}~mok927`BpXMH>+:.^^o!4z~4-`u `ecF{>R{4/wZn{#VZ;r^G^nNؾݍpP|-ݮ$ojkOB3a/CXR d-vre&Jyy>:{f!TpHKGt96rjGf&_hu{vx'@n^X^ ûfHr+m^ u܂M HНMT%1۠xxĎ^b1<\J)/t$Oaf0 f}l"FQ܆ʢ氚.m%ސ߼:׀-xo5 H`se CDz j5b`s}LRXBChB7$0=ik[7qg/uZA5[:NE[Fl xa '`H*c:J)\\\i =#] Tz]G3Lt+bucfc)R)'Nևx$JshyxY޶ \V5`V\OWxko^+a CiJe|mu]uٿ{s2jߩCJZhZ4t}X.4C# fy 5zb;ѧᕭ6mFkte\޶suzۯ7 5\ s[EWkCC\j#3BlT*ҫUx"Sy>Ra49t4+!#CigYPǁD/1:ferYjxuF▦c  @6ڍe:8*^}4B 0b5D3h'ضn5`j M-*Ԯ-A c;MTmu7-xr[-`A+ .I2=WbrW-M6tz_D%ɡvo\΃B 'Zb8iz6$C ;wt۱-6WF[WY_ONv[3eA d\V u@0JrjQūEt+@N,MC6BPq<|O;SEzrq %?xݥ\k)Y ognB5S{-xoڤ0X+&B@p sjerZFhz ZBj GMHot:_x|_/W<#2 c-aKd]{<1ЄWz߹7oͫ,uM:<iYr+n+ Ud,r(h !۲7̙ DqK+4Vf iajr=xW-]޶aúʛn[-(# CRD* q\Xs=z\:`@?&B7z:?Z8~[̓a$RHSq7 otׂ )m׆r +ཫCic)W ɘAw6Jr^*x"~B9HCFHtCk5灃 gxi*Na '?[ZWf\WzuH ۬Qv=x6 6'H mw z)`ϐ/6(U;ƺJP*;8]FU:HC>9$xW|tz6yNaV!pHT:Mn eoXxSJ7˴+[5]PkxA<WPlaiː<64MR%N~ZwK9 2POoA^C|uz lBeoήeE~օwc> ގku  =- "%.As !j df@Ǯm=;9<ҭqr xSS ֶ5齉m/|jcZ'VMy]-E!`b &R(WRwQ@hu<8+U;>:x<Na ^ 2Psrbܞ:]3*uzk\Z4GӭPQBl`h]TA8 KئNڼUA:*Eۈ:Y_hl6=]{r=x#l{5 a+}{[7|?j'~uh{ADq&vs"g/-s)_P^gzQ).2Юωׄ7:q1Cfuz]}~t z4w5zl,05Ɂ=5f ̾M |=5oiky%` d2lkCގׁu(-M]jmY? B0Pdd `p=p!_4ՋSPP1M;>r(U#`Vm[y6ˍ[ Fˍvr#^ >eȢ0lKP g//Yx^@tZ(4= R326Crj n)W7jbS)Oo[#ߴ:k#;7]^X OZP޶d Io\Qŕך*sW?V=$nmM:&/er]x׈ۏMؚG+cIǙ[,}4K*isq\Qv,l}l^wUu]EbarN5WkWAy5^;L>t7?vA~ǶX6xm hW!.q-x;G+o=4 Bߓ5 vK1 -L'H#t#U)r}hV$="gϡkZT^믆tl3=uHai'mQu-x[].mͷ޵C̈́Hn{{M` 21Kg?ùK˔.c[& c l/Vz"iy`dk(B>`mׂ+owsn b[O`+ JCY<_r Ri꘦(}uaò(6;{901ȩ󋜻#QykR9ׄ- и&1_ ̺kVύFZ90E6P2:ՙ<QjKF"K T a= /#XBՏ<ˣ$7YG'l {:_x i CNŘ_*su%k惮7\ikz8Q.MRLmIR*bHB"R"/H^ Gfo7_Svg :A~k0U3a V(`0LLC Wr i %q[%=z6#Np4M@V30*}wuux[:\Ok<]Ѹ uaWZ^-M֚hkLv'(W]]5?1['pwUU7nejbXج ZAFqKIz5t­w]u_O,ڪCX;#z,x*zkλ]T#]α-U<> ?v]x?Ure3Z?-x;otk:ǿe7d+0'EH^ZVwUA7\?*XMΥN6x"nq9j vU!L[Ufh _e:ΎU= o6ێ'b߼V»viQ2K5"O5fͻ v"@ٕd篬 eMM ϗQz|, "N~1DHzs)tRaϛHcuPu"O_5 `3YxDds9KUn0t40- Ӳ W45B7- =TA<]_C^ցu7 /W$iv=>=xߚFK1k:;uەةyu:TjJ m A&iqx s e%paYW/^UQ{]?:&@㘙^b}c03=薂#0LPI!7}Fl{qCs%] ͌a$3,Ƕ D[*+w͛&ػ Ka=pr;ؿ3DӶ!;7C3ܳ'IPð~^pM{cvx%tWDcTy;>c<&уiAsd= ֚̅vՍ^0Z:YsmǿmǮ[ck91c d#AC)F; e +-M0T2fh 39zb WW²(+8RNHD6{`Ckk{"b 1l?c>{b7Bo,j[ |׳>vίUE)jfqbD_ 4 ܶRж+߈i膎%R' 2 o.c&izltM ]7kyAJ4 ]p]t-:vH||#(r.(0B-F =D5'3X\/U>f?+a! f뷴RcSW)ke՝HjpJj4墮~gX_}ju;cfaĒL~_NiCVhz># فѰك\+fcYVOsw=FC%v}z܎i[v{vvrB閁ejeMnxpp]Wŷjf&Mcba/T0M$.LӘz;nZt=_&Ĵ,D˶ )%nATZ.9ntx;ǴmuT؝zFF^u|σ;!n9tt[}54&ٽ\UU[ ѥǒB7tzS~M4q_ĭr;&l>`s`m3MD3{ф[/ej&t´0ilJi٨ڣbvBix6~}A]+ 4 ;f&`ضIyBJZPmӫTLO7tòVQp$M`c$)4D+#ǰ-C Fݡ^Ө7ݰ✦zX2I,Dziۤ{zMф@M=lL3r4@%iҩRy6M2!O%I$1LjjZJRR,Q.+ %֚_%xJٽl&2/yMXtYTq4Gz~[X-4jBC#Lz'apJ@lL/Z,IoK2aUU-,a [1R] zEM>Bz: Q!~qpѰwx »JLTxCئ ̅I}x puzCGa$3zx{66BqhTTX"NTw]=l6́=':>4\ϣT3y.\PbZN:GhCt*γv2Pg_xiB7T@rf l&ţû__z|D&C=={سclJ+ufV8u~g.3=9a~ՁwQ]_M%]Od0CU6U_Mz@&Ē!&m'7)V*YʸV7ˢDm5, КʡZ {xa&x 8}~q K41~xx١y^z^ V7] +~_LeTVCίmwOƻ'45o%dz{]|gvĆ }Y6uǾ(=u?ߓM_}xO-z{h?ODר4?CR$IH tށ>#|a;yL-owyR,(t a0-F fVrzDuCt#q|\YN(0A{YxQrNj%Tq>,sc'g:Nq4icY?8,AVV{YYTׄF9NN.Wlnf 24"V̦^$:V,F,`0cY9ѓi@O&HfC=ן}h隆ixRnvިiф8l-bG6٬Du$ekxg=A0A}ֹtdc[ `Y Y))W0Bԑ2`'˗ Y,;~C oȁt4MGJm &v·sl j'/M0Vr<8ȕS3FVƯUz~ GZZ.sE;rD+d߮QΟe0R"jL<>~@[pef3g(*lnv0ЛӳK&t͎c$U+LlzTl -bh#cYs#^w#*ݹdipE7_zhwU]r )U^xYJth@Qsuk5{ѨըW\:ETwx6^~D4q /reK$phXW H? H⇿?F R[2=DRt]'3<}w2T\(P}|2tC{A~�Jj\caaq1m:܄}Y3f\^+,5iNy;2NP1zy= <1l-Z#dJTx݃xy G.G{F9{quzW)P_4SXį)c?ɳwhC|𱋑buN]T87 /޶pUP zJ>W=g=,/blp g;g|lRkp%v<჻u;U>,|z4~5Lfzr.`FFd lс1;B ״e 1mTWGRNIq=:~K !dD:C fsY͗cvz r˶-.s4B@) w"r^ T+#Eb$& W7<\g)WydSGFX睏fQ{B"B(O<9y*~@cyP,]j<N!=LY=t/^WD)@D8ׅ5] /Eۊx-u|| gJ{wqU56j9)\kmMl#k]grz0LL6O?+?|p2ŕ\jTٱ8.Z huoޠce~ZIue%|^utH)yY`qfJziNŢT!/R+W/-S) |_-u%q4MVRTifKG&z,x R7J3ؗ f)+N^=tg|Rn,`>I &^yl' Key ~Hcy$NnZ"ȹlٷg\.;Ԑ~+ӫШz.2pC> 3S>NNZejfWx.a+o#Th@s+7 ɇ'/Q.W |òسsCѽ ɋo(,-27zpx؄Fd`s30 itpdR{.jJHX朦mabƷroKs,QXY7 =m͈2]%\Шש Up':mw^'z"̪JC 3Pa.NWpρa_Y Xsk uUjfذ:wGR8|s,Tj%ܜyssxBvo)ffs vwg/v }JJRıS"T&3C,NJh#]7s-?ʜ8{@N1 r@airS*PPQWRV.Spb6%i c'w$~.qurr RzE8b h}y vd觫d&e[ %N]Vu*nǣ+av+3],+瞔$l'u::vfEs!PQEfR+ 1ty,EdL#7 ^'Q|W2;G}#tw%0#Ӌ ,VׄDY^6 oQ~c^&/R>/`qUV+=Fqx25X"q/N3;D"ƞC ԅ)斨KeF yUU 5Y7=I2RFz}09ɤ܏>ӏ(c ;4H +ChZqǭ5 ¥9"#f[>H_o ?,.WUxu3-U4SH,@p|k VHcfzy|ЂZtQ8EceViگI"NX'Ç1l#cfzUH¸W6rmxk»kqAs]0;u]IRē0~YfiMl.=qJ<$mAA peV NQ]yCy9[`~1BcQJc9~~}{w1mQzIfҘV;nWѬf2RWɸɾ݃,*P8K<{ǘ/rB m8^5ޠH b jIoO{4䑞R,R_Qj&»vP!iP|S*!8<αnLwauabOM VhziY «,\ǡVP)%\l>@dB$q5|>>.8saFz` JosprA *DZ^Y-U7\*:ynRO %náR,R\N\`жL8_?SES], ⻁g@+=`h)N_X$}=)V%?0e$՚]RM8GTp[#JD7=ON<Ƈ(THlpK+JG\7jjn\Wݥ5*i3WNS,Ո ?B:@03}X]7*xYZ o~6 PC;WkԫU\b~)+щtD:E,g1RXg^:JY*`hSO0$d`t!jK*E@,yXjY71V%3]$1l%@b*+pp!ML>&$Bn\WP4JA4Hvau `uD+!4X'^k摾7\-%nFA\aiq&O={ʤIueIe3d kD\^}՜ۇ ޠ˓+.箮Y4~tNV))`(OfEoc\ʱ1ԟ!],ӕ69vr~4Ğ]dkmpT>hdz2훉b.3GvoW:h0YݧF*aR:ѨT7,^.`z=6hktuFBZCu¤FBCowVK&J5r9/J;qx}w`;Tk+'_Q+W# g<+= EBٷ9}a < 7JW6η~pV\U%@-ұm\Qq0RFIl?1b}#a"'}9qvޟ?k"e>Rcsmɵf3[ͪywpK3LN_ܓyqy&ǕAj# Y𶴽J|ߨhT+\NiYORq=lK=L` ӶB'PVsM4 )éթJT EEƏ<0NܗWw3ڝxu88F"ySIXX09[$M+Ü K*]\4;]WVTneXvr6;ʣq2/u+Cu{}1>ӶZ; |Az.B39zr]apַ پo2i EVXfWS}>8y(}}X1KiEDl0Hfcޮg..Q{t,ҩrGq}A nRw/Pw\N#ۈ%,v:bcnխw] lJ˓s\Z֓M1ԗaPO}|[.TC XJr>s?v,#Z*tIJ#Zk3xKTbnjogb(Ƈ˒J0t)L.s$g_eyq e #~{NU ˥LgzRAG%wJHP =HЬ+4 C'f*)op/}FWΫ\VK8EzM‡i%0cBt`fIVJ0d 9w1|d'+_{osuCMsMp|߽O w[x]K>3sVWW"9|ApޛekP_Xx RR/U^ަm ^8|tiTkTKeJSWgxvƲ-tM#hՍzFU'K*Q]_-@6s{Utyd2m P^S-WKԫU Ӡ^rܥ *4\V/ iP|ZBay`mX 2u i4:ZzBXR,Q\GnYÌr)Nx) R ]/`~LxRg:g c[|y Gƥ+:~%%k+=jQZ(]g5@͑iXNn3M{#k{_f7y%vh “Mp2?a^xj|G;VC:)UfVO/7\wj|P-эV`ԩUlfX0;zbϣ_V,iQfU4 AvGUn.HpGA8TJ%b t@3 j] OepÄyЙ3)g'B *]/GE( Bx!uAXR`i& |j&[{r7-1C)^j:f)TUR4Wɷ_>O3~&W+6Yׁ[: pFo ZW*^8m/I7?߶fO'oc;{n:Դ;:+m\|uv+=QY7I\JHN]X'cX\p"{wqqr^U aj;4Gh:1B5UY$kWZ3Vj$!t:Cx6Ws};6k^9[XTǧgH%msu/>d%WGӻG^M04SÏ'ٳFy9zƻx]$RIDgT%t ErRiMkS}⫔I tcX %nΓ̹nP@Wq)ּݭr)Y5%-RGIcU,4 *m(!Ocj`uceԊfu9&g Nn$噎DtZYA3lNU='ju܎;5JWUI\\Pq1 >eveD_+tw?bicۛw3nWSJzZW9@Xu;ײ5˽_@m0]+/Vz8tnk3rξ5j~X.Eo+L؜87pC{8saW>+U檋TOSY$L?).L摞zfk]`HlXDNJU!>4{*n9W#Ųɳ ؽgWs]^xj'_9LS^$0yQ4++o_Sܷ/= +LUgD0Np-~ 6HI2uukjr) 𥇶S*3؟PS,Q*="?3HӕS{z +5+E% [R˥ ex&v047/2\7cҝMpjFEZ: |Q#JDȒL{R* Ce <p9W-@{"  #5]) SM` E4DFd7xa:Y/\o&tLP(ְM_RR^

E_O ]ф')Vh;#[*2Zigspgdaw_HMePo*H:*^pKӑT}=tV=+ Ov2X⏿1-7jʉ Qëq9U%͕|trT`P{*FLcyQTޫcy ?8`Z%2-'1 x݌ u)AiE3|oս^ ܲϿE!8i: D9N_\ܕ|[Y`5T vjF%|=* yV Zih|?*vpCH>y!Z|bXd N;\3k!B7A\f/\-"dū/9tʎnNFnUS*ē||v?QN&39֥B0 E/<,zog(YQ@w$qWb /uKQqI&WαiGJV *uFs4=]1lM=a85+.SR7>Po|yG%38.y3uxWA< K9Z_ڂ kWʥr)]}$&is5,r%†!28/<=e|\.*/ ڶdN o:n|OElF %O=4 OO4<{xiUB}q%W0o բV{8g/.}Os"7O6niVRq*Sxˌt3:͗cd0kO1,aYx||sĖ=ZDnKEuRy'T'\Ű(ՏId1.M/JD]i5̤c|s 4;IRj5W9t?z 'VۚZ5_MtMГ1ye^|"S-X<_0t&&%jƅ>8Ǯ`Z3zUHwT|o /%koJlJHcZu>:IXC6*x:5C|Oaqj w P_ĉ2^bl'g~b).JPu_):։@[+d2\Mwcu `{fm#Y=8WWj*1FPu dܠ; VDt K%zY82W)P~fa`Y:\oL*!f=ng|$sOb?ͱ3ʅ%Uƪb }4PĎU^ZWx~Д[\ .MTx~'60Sj.\-*OTKn_uҼy~GGY\R{MyLx^3_}rsy4F,Dʛ7~7l۪\WF"QH-xh;ɸEZ~3]+*.:s gȤbu9Jf"IZa"0_4S"L 05Vq#4$V dò9wgۉahy @H8|Lˈ3R-] {kK.-r /ՙ] cr"JV #X1 =G]ˀ*P-t\KWY) 4sKT CBY@F9hµ-iMOn`$*+8V #h]x axKYKN0Л2vZcӬ-,ߩ9c |03hV$;ie7P$Vn Ll矚`x0CLDtu { ;1DӴ0Ke1)*f3zRR+RWoN \\ _^J*=8{wq`O_тٳ*Je!T10:bA5ưR\rR 9Y~"LW`fH7CiV ʍ͌qO%?c*ʶg6_~bFRRS8^ЊWd`6N[,9ZspcJ? Xg%(.WKQ@U\w%5L/VwkZ˾]K5RP](`ɮm\ȶ^˪fZ(2 j XNi+ L;0ē Y>>;8Ւzh,VԊi|ZY#8,B+?F$rOc$3҈1]qJ%&voeho+[ǯW ZP95Z+BGR7RJ|q;8R_ՙҭ<SHd[a{:Cڧ:na SC+١'/95ƒ,`9?מ=Ğ=M.-Ta̴R+ P庎f*{ڽm]:7o_&W"5%h Kx5U$PBVgMMcx Fnx*E*xz)K Lua$8Zؽc#<;CW&# !fĕ:̅2g.Еm=3Çze(UTEwM7X*6x:5dzGvz>.lN&p\z:{.C})C JSN63 he)ʗ2З3J:Aۖ5)./hm̈́xmVgЕL%kg^o񛵆ǥɜJRՊYufYz_+cbg325Je'ũ*Uf _cfvm݌ wGW_( ;rEJj [hk|am{b7ifkHcvs+ǮxgrϾ!2q JP[q4Ì:?$FXsx NC;8~fpAvm'_З{ɦc| W1q^_ͩ"~~h!,buSQ) #s[/+j9 `i&,=[ZVpY)|Xh܋K3؟bbg?g/\RK+*?{~}s~f,jxrGZFK;fx =RuOoWVpN4q+ad|)ȕ\.\Yyrٔ݃t4AOW 0,XBq4eB}Lԅe?v8w]=T>Y1f Ztͳ?Lccezr,8/<Ǖ| H]Tjo|0Ig047/p"D !8%._UqNnFnAf Hd&=Ɖ+7O#h3=5[\8]-:k{`+ۇKכdA.\YaaQ:bo7}̓C8^|0 EXhL-UX90)?{'g~w'Wꮮ9!`0`r ) N=g[t֞omﮤ^[^rNkrڳEr33swuTu)JP^& С\pHfT[[*~c6^V2#I4-*gXZq]!D\R/a:4GYQ)Vlg a|$z+7V)* wr`pHchb \&&⻈k{|v+5Z$ 3zO2>[.29.Xgc‹OL06ɻ,u| }G;N5r94c7KYdP= eВ]mH=3Mo&FX4ĝk%z3Jss 3!8ĭ~?KjCp8NgG}S=/*.+`|0}`:[SD"6TC1l3M|> 1qe!~WBBܼCff>GT߱J9ê;HB:t̯,I9GrY6 +oǢ4Q4_}&!>4_|.W^ɓ<|8o_Xdi . =d(VqS0A5<OMgݘȁ-h'&9٥_ƭĎ-྾5GJ]8782+k ocW_y ^orYX- %fMLl#$`<.b"cziq`MS].{>cQ3xk55FUu&]H(I4 'hKob;W&ՙM*`oMhbE>:"lV[>+%VrcG3JbQ!dh/fWo)[9-d0ҽ2zsx/pk6DZ=l 3 9^93K.o`s{(8r(B&-OKEfJ]JH""DK uObx3|,ق JLIrRV*n7~[FBY39*K+ysp5`#[ev~^z3 zeCLEsp۬w#%TwsܸC̭V:maD7Sc|qU٥wllh tMbT/3V;s9f򜻼)|xᵳ 4]7O''e yCڶ*mTK?-y@Y% SF0tf5@($Qwf(xu!DgfW+yd=$l9U Yc.Bor';W<6Lans' IHx m~/.Ary9Mq98yt'V3kndk3&4@Ow.,VO-?kwttمSTp m7W8qdd=Cl=ϞDVd<7J j`~sn]瞚f7zw?\n6q9٥ ,F1|KzSFj#K6;6ēR]n1;UrELjF "[hpw~Y.nZO7wiK%I29r'd5p4uFSFtLJy8}[6kN6 5;pEfP⇐+<Q돼v+yd6'rp[3Y*V5Bp-Z²n]bLN[~cciq%8VFCKklxϞe7 {XXl|O~`Sb..`gd?ܙ R5col Di28ElI' ~tNfyR4y,~v w3?FhS[,WFWDv񜶵ao|Z"&&{ f '.fY;8^ڙرhprFW%tXk Z .حWpՠW=TM@zɲ̖7aX]]16ܼf!1ȕL,O)5]P"xA Av, =)F%g'OL{<{nDtnr X Ur~([$8zh۳۬l8v<>,Kvfb6MhVۖ@xFO;QBQzǝ%!ZbrDċLCQD؃GS(=H{Y|c/3FyA|܇qvn/~`(fFTcl !KhJ$\'u]]W؋V 8"Z4I$q{f0؟bfS1nҶ-e !⃫ˌd8q"kN#+ByQm֑ejVls5n=^zC7ov WVJo=ciLfVyw~7 K/v ˜FTT mi1޵1^毻s%Lyh7n!IJŸ3k #Wpl jS$CBZkܩQfkΰ^nD :Vq+ӑ4@X|hn+Qt1$Uu=tC'T{>9ޜ!_lwY^ძ 2'Ι m(HޖmQœ-9d;_IѢ-J وN9ylB+#my,)0]\4EeWm(EfRk4:R\_AK((8+P[ABak z_8.J~Jb7FҜ<:FmAKt+Y%n2ڝ,q [jRwD \Zm8N[N68^`%$ Vr]Sǒ]M"atCglK"KKoͰ] _DpPV}!$I<,˥ְTJO$ġ=ldͦe7,[}'* ujlXZ1=5,Sܦn|V@p;vnZ/S+c(8!#L{ Gx /w7;l||I9'XS-ë̓Q#벎{l]ֳK|pe${$0xqz✿J6WEDZruA,kej #=w'![Y\^hN$`l(=ܙb6/>5Mշg7םkk+ * ꄘ8}BΈJEx1Mvp2fsbE[^,fq%| Wz-iZ`WVqw-;Ѓߍn l`IᙔBOtv1z<|.Ls$Hf82fn-}f#AD/xdEU$nkkld+|)zeXy%E 0:{pU9'C%>wSHJ*S-b6~w}+쫨HFӴwS: .rIUPb%3aLb!Nws`?rWVXި!Z9w0nvj6Dyp=z}\cPGS(#5HCaRxCITUlۣi:TkڅB"`z,ͅk&H1T-T]=78d>3O~<ď0R=8x#w5YERTl)gg cY.g>\q`ĩq{-S#̥nx$ s`?KkEnXY/QC1!\߬u9Ko mzJ%֖iۢ8vpJդ;I2wg3_Daʁ|.y4k{6!t£:g.,rv {',>lDЃUI a2]1>9ÇY\-r{vz9@VZbmY]}* $w:AU%޿‡7Q^p]]q޿BwZ,㤒a޿Wh> =[صxQB#+ 20q$ `D:sȪFwM (4N"TDZ<ض)E_صTKmڲ#j$I&glVV2mrNHኮ YѐTMШ&lTVWRO ju iddPmؔ ,x?vFi9A5I0IT]`y5Bo/cX-Dݫ[E #$-I݌VE6~noE޽qa~Z6kEr+BYmWrW#C;#ϭ-޾$lrڀ%P"bcgUJ,FͱY J\}DB*]1u %2KLYQ۟E4LQHz(-$EV4">4@#zRrfaઈ EųzRi?GoYo `R֚ zk/p,]&vRQ_e"]$&gk8Oh>lTq=tq]V=З, \CWT1LDLBQdb<;MaSo:],7(WM5zƴ[ۭLkUy yq,evGOfP0t)(-HF{KK< 2Л wy7Kw.:.4zNkr#~bl`b{z7O){׷7+m8Y^C11I6TV=USPH=  &^i*$|x1aߟXDn{#ѓ"ۭܥz~Of N>!V$aID FӴyyڠXnJ$v%/њ|1PTY5* T@gMh@&hsYN1؛`+[j|#ۨ໾lPܞ, 0$,$() ]mǽ۹V++;ͽ.Q@(\a %VBk` *a(g>X< ŗ^v qX˂rK5k%MU'|ɫa Yoˑ%ju7A}m> W~KJ'f;4J$FSci.Xgs xVS+K2YhE3CaIP* ?ه8~x3Gఞ Kb`ģ}%y<# R1-E&5X1Ť<ڶ+.L_[Ez&y?i]Y-LK|.טYG:X=::WfiD򷒕[;^Fg?Dh)XjV0@g@dMeh3y|'x0{'3,x96qv9Y©{BwW|Af*thyqk3ZYWUQp`6Au!U@dMxP_1./VƱĀ]Ήc&. ~@|{ >xcm*Aؔ*2$a ǐ5Mj>mW5pEwb*WnCNtݮI3ҵ)ojBCKv67nr` +EnD%X4,z ŝ~[B>4UBF"SDdGnl0gT]L]qo>G6W#(fw.eI- ;h[%^}wb6 kV#@'3 u{c<{jrEU>l%T7)D x{ kN/ 4[#lJuKOOuct .;Sa6yyLSFz eA8M-|ܝ]w0-$[_4?4%IMπ'}+nǩW\ә[fz۹r o[t_{o^i @7KG_.Yꕷ6(Mh5V[/'^Ү+-PIQX(]c)~(O>4fRY 8\[ .Uĥjx-+kV/4My@O܃xܾȤaR!hΤنZAmS|:' rݝ{x r8(TE|ԙ]Ggb4{'zyH?^6E7][0h?e-9w >Pt: OMI8wq ϗإms-jQtu^Ij; lEetMƫ 7yC5bdkۂFY4B;ՎG5ǻ)WL 8B0,jYĩ_t so~7α]*Bṷc"*0FRut]'3)ed.DZ#I%#!Cee-OoB$W謙̯c%z 0AVF"wm}iOdkE֦j_R±N-]ɇ Nϻ0*{O\|cn1hߓ +EaQ~Ⱦ4rKQ۫\ OЎFsιـR:H"2*4vc;Sa{N6R>'ȴjjN :[5N02flhBb{ y6+X"xԳͶx"C25$y{&6wk5]Y5P UW7كʘÅ++Laux_ 5@@Y%LFkK7$Ib dF1"q$ 17. ?,v[Fspы ~<$S-L̯3wTι}d%24]زfnYP{xF մ6]cm(϶Dl-ĭ{;<oQX 5®m$k e~z)xj^Y8i}`GɌPhM ^P$x;Ç0m`󃟷Q DTV6QTu$f5<\Sӌdh4-ZVx̠;BT)|C* tL:J&Ⱦ^V7ܙͱZ\m(kZ9 >$IXnp*{H D?`rߟ,eʘM1 -:PK[80C8vRh5_ C} x.ԛ]!TMѰDmքl v=q ,˝w<YplO7_n8+:ީ>p...2TbD YƇ3 $E %}A$Q}_ lIyxU3~{C9G8/aAsTI1,pD?u| <4S Qu)ƙg#?cClj] -쁖yVjUC&]T(/~a|dvz{S/+4C *N;h~/^S{L u(lYd]SX Rq bѰ#y@^Jk V պvp03\lU- LwgN32zŵln[[L5A&i|I] u#uR[eh:;#/156suhrN. Xk(¹AQ.ck$+罝 hnkcaK_ޫWRCO;| o>[OOHAQ+1:7Foy,\-Ԋ5f7裯;R)YA}#/ %bkX\+s"'.\Pc=Į] $+]_~?#PqT#1XšjI ,Sf]|בv||%L#F|NƆ:YZ+ 4)(&l۟M[t8 o5Kbţ!j6lnM35!WlRb$KbU={WaY@2 v튪az2K9 C&dd Lkì[@C+6]13IPEQd!]0x> ҕa6퀩0ЛT}]Lt2ԛ ejAe9wd" hZnۑR &x-Atc=8 @L ShNpP4¾n|{z(yvȕmqfEdJ1ЗM5+xF 7RBSFOe2u߇XVBC1"K[ o-f_,:8R[k`|A`5$Q\!aiZkw'dnH.Y#lPSO$3R ?E"M!- !HP$J$%HLՕ;I_NƆN*yZw~ʓ_3?GYۿwneF'>c_,ǀڃ8v]/cc,,+r{B2oB;2^ .Z1:8.a29Z-a KxҭKo7<{b=ͽ/y=w䋯!1:zD0\b!˵_L_ `'3ؑ8_^cuӧya~n{8'L[WQײ @@:!=>$o&48}bif ܛߦ#aȒrWb3Wwi$b:"h+m)㪅lқIP(7my==ʽ,\]e}ά$fKB[@$Y 0 2 tU8qH MFUt]Aćh(P"2_^-$!/^\}s;Ez/*1px񙃼-f~b?xP= Z2rw.U90͍{[T(j>x2}dT1^nbbdCӽ/}.V7sޓg$IyY|E3^?PVw +nֆhN"}DRaz|׿rzLmͭ%Qr RKm 5$5HPGܓyo?ҍEbVd]zE}"hwJ[O FȞ38~dx9uq͖~= %u=S]<$|pmFYP2RAU$NgxA&\]TKkEJePHa7A4zP Ћ" WyEi+gB>BqRthZ6)vhek|}dqM۳.,RƱA< `D܆;5)bĶL`v^{َ (D$љ hw5_./)m+w$c:4c=p k$&9~dxwg6phlclRoil;ert?J8zgo|ŬॖB Ա pj;wr;E$|4'9oE,1ukMw!|ZRK_ *J;[4UB\FLM"IUjuKdD]ASQ*B\nj'vtr+>F9wiȈIg4t11 Qj(j[wY; bP 6g}Hd7E2feMMDTԉI*7r0lnK5kA csH<6_ 3Lt39M2R[*pw>z&V5 ]arf WV<'P APT\F(w>ZF|,j qIo'ܦ,K_%~=WSedBiϻ FbelDQ1I6(l˖ z2-c3I[Z (f x)ܥ@_A(K0&fjX/ w] 쓌ɣۃiDT`x8p˳ӴTZeiLk**JQpHrH<}T6%;ت[SsOM"IP*9~xֹ9>&̧[ZJ\X`/6XިK:˛MxÝTCֳU[)Q*7$5ޥ3A:} !j,*jgϐkOMՈGu:vE銒 KP4z<ߑPo{T[لeglⅎO>ԫc?FI$5ho?c\3B>@'`yf;%Tw,VVdTUAUd'(*^\q UUE(rܭo]W8"lӴI%dlmW=E3-ƴueض88Km:4$K} + 5|NaTUްWn^`Xv/,^Rgu/} E TR4H!]ZKB4?p@*$ӓgVVY/QdKzlpr?~ o]\ r^ P4!h&d:$+Fg*L" rIH%"]e _mnKomk23Kx}㗾&k JK,mT_fz4ͥl嫨21J^\.~z@jP O'Exqx:㴲KiR*;*neAVLaf8+KgB/"]45IAFz;2@G 5 vBtj,(R7M*gNBzj~9ͱ[MQ.Eh'1-6ҳB>ECxjfek/.r@?'!\~g+(T\aL:FPaIڥ$+;}VB!MXvFӛёa R*7[̲YfcJ._#&9`_\Ϋgqk6Gi,1:dT/'ӨZS&H%B#LˡPqk[e*&M lk#R`"_d>OS^^+Ow+xgݲe;N;υ_8qt|ʭu_^ a+eV)62TŚ{fp/;kbJd=6)*wwD̂) ۖfqW^po̰Y{_ITZ2|+ɊMЀRu"aϜg7zwIQ1tx<,I w^g}i VNY .eob'(8q4@MuY5L[yz:C~z|]7y^OK}}=[#xa@i1NK ۑGݝ># >#t}O "K>M7q{v1+۫\+ 124#n־X%8T"n$YaT=wb⋯e+[­dc3&zpj$K{P ZP$g^޿̙-t<(#iZD#&8vx`~)DžBˠhX' ӛ#B,*2՚V2|tn:RQ:8D*B* ӹ?Ej6uǩt/ؕk='WxgdX^u/+qku޻|s%ߵǜzo`fW0^x^-'KA?W df@Y7PC,m 1FL[;Z֮URzksc ^t3FÝ p=!$DKtB:6̣G3kyp`/=6H㍳sgȊ*7{zij.ⵑ0(*=gvջY|GdwcP{{q\FTq88qG pdoC} 4E"]^7I&¼qv7ͱ*wf6n4&;90@`.y.Rϻ9xӞe^5Õ\ o~ҏO+{$|*}]kW xfS8--v[[N,8'buana~̼-_s]eG/6kXo5Ʌ+'y/3CuVFͺ] >?:r0 +^}wEy0Vds#\b1Waeȓ'gMU5Y<=TU63ڟC}(W=.d.ebSa"a ToXL|ɤZ3y(=)ֲu\O|*5kwܚݦ;-^=ZiF2wu^L]n>G[>2o0ڿe?87xmTn6M}ggj i<ŠXot"+poBA% h<MK[ J~YHJz2&qxY٨"`IE ·(Ս9g$7 k|)F\9vx׃WQV-+ b+`aixT=}I5R$y8RSy00Kyo?8ɣ+﹬ovk/-eͲQTs\a.CC8χְv bLR[[E ]a|8]Ky?9dU j4_l-|wN:[g_fUe2ݏ ٪%!W ;b{;.+J_[mzȒǡ}XfI]f&@ %![sھDh!V6J*i*+[5K"4d#ݏэlŧ,^z]ig.,K=0x/0\f]`}YUJ3>3[4 F2t&tO{'2v 'Gtp]Wnr./sk%i l8I&Uy0nl6jXu[˘ Z)7^[wo~mY#v?~lODC/Pug5޺˂|m&lr;#VIو+TM21auB&JMh oF}ALn{$I2:˫9:!Gb-[YC1BO%:0)>Gd x83\P_gOOR⽋+`wm.IVpQX^+-){@S!pX|M\Z7Wy"p^2riʥYŭ Q֊/ 8kwXY+*fvqYlToUTﻲw?Hؙ'ߗ=ǝ~~ $;0cCd5n=-".,Iv SVA:hq^~6r rʪ;w!*)f4$Loog+$R9}|9wyvu@E-l;Cq7tIr}oC uJU r _4úfY^7%H$E'`k*VAVQiTqU(RTK(ܶUYOޭooy~4-{.[go-E7A5';NBήŪ$ \#'88W-xo $xv ݮj~FBU7n/<0q:N|ib[68e{OKmڕ/ɠhF8ё ӛ'!k<_0uXLEn5(TlEAR@‘rfm IQJ |PeS[M\L6.wݦX/y݋g79o'(~[T QXX'1K׍n^#G.TdkwD޴@4^˖Q`h}ݔ[gd/EX;IPy_]vi'y"#ѰF_&4C?`HC:,hX,ECm헯 E} wRo:K$c4«*U*"p$5HePc)]sֆ0/f1 8c_gs τ2W~{>['b| @G݁Ԟ\r{7lz2zC0| ٦`8-t`Q~$ˢ\v9ƇLuQ79~.| 1(|nA}PegNқs ,qA@!S(Ը5XMWgE];hc\rmJ*[)d uAGk#dm=sGZBB6"ՠ%;уCܞfa9'ڊ&VaQ=?Kg%Iq\fo'}Ļ ^CMeuw[;V (I2nO/V)sE;e[!;P{lD%ψPi8lmbttoߝ+oͲ-5U&80CHDo&Α} %T\[68y5- 29MTssBfԣȲ™ K@^*%nqUr1% jk.'EAd#žp`Fv[HZS);j?|_ǣ*0:'}Glqy@oJ$&l>pA! Q" 7pw0|0L. 9 UFHDB=$*Zm}!e.D5Ntїt|ߧXUf#[%_jRkEOrc)&omjM!0c7ŕ[l߂JVo}:8yb^޿¥4$CՑCBuvptEIh:uwW%IeW6k#?:^Ep0"js/{.aPqBF\;??̉#z)LSݸͱg XyNWd)p7ýb %$iJ("|G3=ۆYC(&&aC:7Ǎ{[4-k!$$; e.ܠP!_Q]|nm:-rQBQL/ًKnx4O=:JO3,7mh1۬#k*dt$mn]~AC{Nsln~>zgWYm/eDY0ݰ&(R0{'{JfPJ  ,DUmVms-V:{1:0:{Rh4JdǤG/[IPMﳰ5~v E`Ӗ(ItNcuǙYwqN$B9Sy8U.\Yiְ Y*V)o5EO3 [xer4e{+b!=ESNd#JWgcVLOǬ;M}}ҏwwegu#I% ^ v1# rw1ɩcCoU(Ө 2TEnZHĩz 0D({Q@ʢXjUs]\m΅eHHS؎ڎi(Osl]^+r򒀊n)_2531& ΅%Lm԰K4We/iT1z*^{w"Oŧ&Mp2 Ix3jE5Uُ_O%#gp> ή@_FI$Jg5E06lqgDZ XHۨ #,6-cEU;؞}1zb$!4Mh8K6g?6E5Ltph|zVȯ*_QoLFCܠU]I2&oBJAq*f^ QcIn]G&8g-bn)A‘$-$ ^}׳>_*]E%12-bIjjX _ 5LeX۪R9 ai=@2hNh%C G'bu3-vG8~hɑN1JZMUD J_}.onRY8#вFMCgv|->Q~tdžPU,ڻ,,pLsG̾mTA*# yhw3+U@إ߬4mm:]FT HW#!gyS] 1灲:"UIQ?^IQBrc: gG~8 ioIJD":0]ԝ(Di+Ԙ]̲Ya3WE:RjuWpg>׬9xΙ0 mWά` 6,K\·VʉL5j-tl 50ۖ$ֹo*ܙMozs]< `u @$Y,{7N_4~YO)>7qvyОmW;A-ЋtV|xu'&8WVQ5hX#H@aq{f ۅպ BHgOߓi:yn yp@adUG ,Wf}Oġ==;"$Ky.\]aic;VV)+ͺP&5dΤpKk%MvwzJ)0m (%uكOW_|?'oZW.;OoiܫLF➤E(}Z'H4&{> \ΞN#LtF5Z3Wvk : (ϡ.;>B"f`.g>X,fM=pzgCmK=]T/q;9,'Ym`6+\.`/,35&ҹ=eZks7Z}B. l=|~HuB';pf^)Z_ M-)ʟFMLebtEIÄBXUd}zB(OjۼHyD SGxh// kd% v1$@r*JDP5 Wda*Xub׬>j8.+Q8 !WZZ%n#(Zb_]WcU?si?S7='*|_?cğ*]*c#\kx~;E?,Ow90/T"O2> 79w9;@U5$U U+V7qgאpg̩Qlݭc ~ْ hJ*CI✽Lij8v(OKqg[;/F'X2[\IϧXq[/eE#+!^ɗ^J sSXݹ-P =P̖k$j$ؖ(.e[hS73UH_}]I~~'Χkߣί~?WUy3_ ;5d5KR -OIBS${ubQ<;GEl뀼g`z,>ĵ;ٶf*05hlAj!lX}R0zG GtxqW\ۨc6ꨜ Ԗm$I^߮is?v"QBrMMLt u (H$:ϜClnYY3h 8 X3ޝOp*o?]k?™k+׺q%3&!}j4na*]b |d {b<•ey (;F;ֱ 8ͪ`0aR^SeLYfej%'5xպ1uRv)g6%IM^Xߖi;/qV+hXue QYF7OOx+E>=z=6rMAٙ48ut3M^=3c]lViڠ6IR4:yo3ۨ`70sص"c>,D~؀O;q>b}^Rk?|`_CZuzCe^z5$Y s/iO,uZ[?o;?"O$&oƽ,,(L%8ܞ9,n q`  zTBܥe CX(7OagKإfq, YV}cp%އ>,4?sl/?3oMۿKՕvn'ӑ'SNt\] ) dN4]]Z4-z9(8 ^B\;` EE!Kyt]ÇW˰Qrݷ}6ݗ>4?4/o\aN΅>NGݹ WVX۪ K חe@\맯'[ت2}P:;" QqF :$IƙC JDsyfn IVyy|!4 Nm3,DW^=3MGqu{Uy81d>jae0AiU!-""hY-%hIm(E YV%54nj3yϽ3gAq[^p>=6IDAT}*PDF.%.bptrDD嶺I 1k5gfɫV! !چH 5`7#,F)~m{<.Gfټַ3fě80@6 ua)F%'69{DŽ04]؅:)oC+pHy%tEXtdate:create2023-03-08T14:00:34+00:00zD%tEXtdate:modify2023-03-08T14:00:34+00:00tEXtSoftwareAdobe ImageReadyqe<IENDB`vctrs/man/figures/combined.png0000644000176200001440000013327413347722504016141 0ustar liggesusersPNG  IHDRN^ sRGB pHYs+ iTXtXML:com.adobe.xmp 2 5 1 2 Ү$@IDATx ~՜8_a #!)d _ Z6[ڣˤm$UeS٩f̘ cf,3>>w>z:Ϲgs>糟.w}7@`Eݿ/Oxi\ Xtou]a=G_M|`~lX\?|Bvo@ÿ,~v@ 4 ;qr=A}WZii2 袋2>pF>Ͻ59gaR2+J ,,3- 67r-?Oq=9yWzچ:%;d<9]߷?^xa!{A +$~Ї.&я'?mOCzAK_Q<p@T n@ӰBg|y!A'gv^BcpF/rmiaM6.+_fmo?g=YD@/zыN:$/}K)gk =p!\r%CЇ={I~w11Ur(^Շb.OxŔLj%Gy+ Wyٯ_%N̍7xQG/xwBÏ3J$/3^39Oh` !4Jq@|tCb @Ayy=)O!~#Ŋ~vX(zPn׿5O(=oڭK _(CvyjG\Ұ%$J;?o9JZv{页o~Be:B Ud.e;12Mo|~-^}տv:l& Ho2=\yH7D|e]bwşCbLC-)pX{J{Ygu`CS׽6Iovg0씜DiS<0i[}Fob-fka6!ƌNa.ͤv7A{ ZH 1gAm?ZCvp /.pDv'\oNźERwd [s=ӟMb%~;8AqP> /ӴV^QE{-k%W/&*KvmQ_qkEE\>|o2>SOŧ1lLm5iv7ߜPIF8Zk:sĬ)(jqcJF@5v^WOx4~ 07t~=IObZh1tʳR~hdѭݺV[3շm.=VOݝ_2Y^וc4[m_#skL*aChq@6\?S%O,dNqKh Ә')iqvװ Ao^2En?tʭZ;ޛL]srnQG ɤOq/n:ec|4vO~buKc̲G}- wxџY-6lc+Nx XzR0Ӟ4Nb/X~T7S>|k_ФzG /d8yBַ̻ %[x}bZR83ԧ~#=q84I[z/I4ib]T7)|'?I]! w7`3=ѰY3o!!훈FL^WN}U>oKlan֝CHlMGs[K `}g|饗TROu42.?A c7#F{y{`ҬL~ń]ON%:k}Xv%3ECK0c=hm0PBQ_o}&b7Y//Eߟ9@?+68<  m%r7{YOI|KA3b=1˾|TADv lܽaPkNC-.9UHM>"j\mj[4Yn4 W?C0V)3h!@i 0eMr$kY]P@I##j2>7EUxU?J<5Oz)\vŷv$ =~$qp++ܿ'M;BRq&`#Jb/Mj}sPHsT+' b"%=@GKxÀ=yD?xyh}"`7onO#8¹n=>__Q `Y;Ӱ1>}% 1fV{7 YDX~u]׮10(V% ъ"ZGbozj!`Mg䐓BSoo\\[o5k*h%chF`Xq}1My˱~dD1F83\X,-y+Ex: Ue:}%" "hXބfYV@,^z>#9CL~f1b"dЇ>+ Oxc쉰B82Vy|~!aN(hyRъ}c^uodDψg+L[ &)9rsk]**%Պӯ5 MM$SLs]ggg]҃G67hQHYV`TFD?묠?d+%&:O~rNЋ]?S`ҟ4Ԝ?OY59m_EXFKA3-9KL?:]> ǪsbTnKldkcWs}'X,<h$'`H9hL^Fo}x0BF(q5X^xE?XXVµ(|rAo:ޫdN 3)GaG\ƮȾad[(ʺNCy`~beІmx =ӏκɘcO9v}b LӖ8榛nb_͔;O!5ꚅgf:렧Nmlu ؔp[]T? \? u) _FGf |3"oAvVQd%03KQkZ C)a2#$Zi</B]tIx3ԙ*ҭtkeIzEÖwfvoVD@x%f[nȑ b˄}C11y( ʮy [FgJӃ#Dr\ph_#W;}oxh6%Bc~=Z8v^Vow"T?=fl̼3t8A[ijtQ3X°wgZvT)}1=ϵY#/E┻N2=tN[,|$i/%-^xLůd= dY@zm[kzqyNׁV4ǣjmFiR+F,ov%b\p1b*Zv9do2Ϙ~4"[EaqNlN3;{^1BcA!ybN!AVKm2ܺQX-V/Ń[ 8&|d~5icz[ J.쏠pT`Pz*I4nsd0JxduE+)E&L#NzTz vV"ğM=B/ ptW^f{yM0Z_>w H|a!rXS(E:$׸`kRoTi}3N;C9-ж(b b/K)xxߢP5X?/6N&RIڢ~U ҫ׿ Jsj% ?^cϳrwOx&Ní!b6XodҡmVxjx5LW@'"谹q&mظ-H!G% b Oʡ0R:`Nԧ>5zM%Q}Զ&Տgvǥ-𒣿~ "HXȽ]thA;ǎ^DdG.[)h(i~t?qxa-miy#m`~, KBiEZ VP+91)yq鷮,`E/"xC bD?<$atiդu}^pZlM%08;=CG=–h ڦ@iS+VH6It( vdnGi@ؤL~za .z(`UW]Ey,(BkM [N{s MEfF#3QDMӤ\C]L$-VhR`_ve@&&l#Ϧ"܋#hQH ~맗>}f<[]wyD@;k ,l rh}/!g'Bկ~u~i\B@)jNl8ۖthֵ.Gؑ :cHdd=`ooQBʏ 7u1~m?{HނȈ'(CJQt8+9?>ܵn`{{' rf|?%]n bmMLh▛I/$ 2 򝎮t#Z:ؐw+lf.] o~4Rz&j_L?Kn{{%Djlb?;k}Aj1 ͩTv$ cb-HDw$}AOz:uf(wyˇ?☮qoD~USydUeUʐ ڌoFB,LqOCN׭RCBn.J\8~YA-zhYtڋ\x߶PӖlZ8D+ )##j%ZHc>32[ nT\a^v[[S@>Uօx!qug.;9I^dfM5~2H PWWD%/ނ1J6K@}D|@sM: fۤJ+c@sG+T׏n V& 6z~n5 5aP]D_^K?@ҫX {mx%-0ݏ H/YG<ҁ4)F+@_&fƧmLFjNBGj;D(i,S/tnrQC/ٜwicy'~/m̯ Y0} BL,g0^.$ax6NcIB<Ǚ]!@Vg.ҏ\%SW\An?wGt}H3'J.NUSFvD-x!{`̥^:0]avm -rĜWM[jIS 7Kr KnI\ ZAiu"9x,TTy#Dbf3%]^W4cIɏl2-fL1,"orƲO\TS S0fCbq9[MozΆ㤉|>m 5kN[襷bx Jac`Ϥ2'JgKc{`]%NS^ͺćmLd=ct)L5s$Hǁn#c49o<-_\9K;{`aQK;9?Ci{a.,^`Cgb 3[e8X'heUz  0sg 7q!0@ 0Xgx)z3eWwEXgx+C>+Jβ|@ i(۸ΜƛnǏ~ܒv=~Ox/뺣B <Xt_(컝Bo勽tv_ҧ{{X9-;"?R.wq%ns}w)|M0QIHš(Ȯ{3}&D9l^֤^b ȿ ]8*?#6ZocDqu WhO|Fo} 7%,4\ފufi/ c HXhg1G%.kAnH|!iwaT7^yLي:4[g~]CxH,%"{&eq@蝿o{F;&~lO3>mo{- n֋?Frq^Gz2^Լ}i{L,ʃ< 11^zyjPQ u^*pPZ*gxh~E[oԧ>[{*P[oPpwk&DL$^ x~뮻|G No2R$4?!yHpz]in _ƙ^pN6s`[p?W U;iQy뭷[4mf5(_ѯb~v8{F^~*^]x!g@.Ds[CmBq3x^`>%2?k -S-f{ H {}[2dޭۢ} ꫻ DP55n-kx3/'Cin r!Zn#}?߳l9v @PҎ;XiMƒ{ Mʫn-੡<OSbfyc{5d66ɡcEK;$%\ !1_waJV[ bg=Wf&=9^Dzp_2]?/B GL3xU3`h:il<Bro|#fHΘ?$d ϥ{ţ|*ICytwI{PBn/K,#ScfP ?կ~5%9Z+S01ĄRK8r6X`H My%9͌()>9/.cñ%G 0p 䴌n&ܞ*fz)}ةbXaNGdkKb7$h*c>91V.s!pN13&p܄L^b"@E \IUi#믿]ZD>\e97ewWtI#AHp A$~x &NQ7mCu&Tz *=!KZ&fCy_ m2p,0$!)дLK&s,J^Z} +0tAOg`+@yDr^(!%gW|U&Z Mz7ZY]k @kONv/ZȥMuRhR,:Dњ3% ,㳘"aHhy8e283PPO&lC; 20B Vz%g<A^ysBXW@ B/dV8[(BXjz[ڼL#8-+OcAx{m#h]'l׽Y tK^(1<xo۩87l|0хUq2kg[ԟ@Z$-9 +fZDN[hם`Af!-A9jF[k.L]Py$t e AF|)bJDH%򶷽M_+NbEj(n.N?LNW2CXc\6w9Sq^LѽoR{ ܲ%;ƈ]֊~)gJiQia); O`G`rc_,0;  .,X*mD@ kQzQt!!`'S*O' >Z@Mk2Z$pDdaXdO:뮻 rb1z@|{,^;N@<Rχ #leǤ&,k:0Ze4'c(3Z`Yd(4 IG>'p*d ~, B YR~S ɍC_N:Ci-8]|_yH~p=B1g9s4~[*49e]A"!Qг۵Gf3P kby_ew2gwhojupkFoYqꩧ> ,86uG!Ӕ$qh(9pwu i}CzD-8U1(П Q1b/Ozғp:TJ,1(PEbN+*SD @{{kT#_΋_%bWt_'GB>k_ZX;85Sn.BD|p: >9N9ig1ĞEB1~D_h&l*hRMi>t  vX͘Ik$]DaVQB Jʃ*wƿvbO1aL0eq&^J0ƒ3`jqPLuY FVC +M=gk69@ ;;>&/IŐ^WIUD[յ+t$Jf I2[2*Ճ'ќV*][4%&ڽɒQ~ ѕ"`c;Cb @@?P/;r!@.XldR{5;X hiK#ݢ@TڌvɒԞ0^ DHPk\^Idotn!GEg\b ?siXƄԘ~c+?@qdGiI"*W^0I6!Ԁ|p1:{p_EW*Vi/:ݓR~PF *}чxk 3 ¶7sEL"eE.AnɢLW1(j^\r<^}h"4];#1GȳBRb侇>߭j br=t[D,iGzP],?:!g@ k֑y ^R i4BiE^*-PcDb>6Ӿ2vEm.AҢLC琼:뤈- Gd_;>r}i=A,"iHdH^c_}(cӈ2$@@̐ P &fH*/4Bd{)j6/fE,/V$"l*ff %W22._'w Z >?}}֦5blLo'Cb((g2u\{̧џ;;i/TNh Jm+fUNAP46Jۛ]?; *ة]̉ S1AC6Cb(}r-S1a2:]v*};H{Ï$#' e* }(dr1;و?GNJQ3L:;2,2}dA,R1p^L:\*LO[$|?u_hi4s*/fNI2 ]݃rvLo@%⧀Bgvoc\wode,J/,8Zç=?hbGf *hV4+j' Lv< [n99;2QHHD^)B|] u/rDHڱ#'$ T|+ t՞2حaI*-;Z[]8s9#Wg&a0a( vygĒ7/d Bz}z;ށGZy2(Ga<5:Ւv ;ny"Eh@@=]ʌA)d2 LC8%#.{071,{gy):10 ˄YAmȠ*0O)"Yiv}w/vaa;Suܢ°!"~04?>43$L%.qhIOzy+dVØUʗ-/9 },02b#AYa -_gcrQµ vQ ҄u诞RahលlAhLjX[;Y;&䠪x[Nz_}'cƇ b%_B>;γÌ5LmM~?UG!ϔ oqe HtL؟pyYeUx}ӋSCzKhI;F˴0WRvջcQ"t QmeTLli _y;,o솉 _Q 4:v"7[\! 'k8ng$ªR^X?w0,E{rLJf&`@F;#)^5R9"fkdM i:8``@赨M2w7Ͻ^jUϯA1'ʰ wͪPW}vCb!..]r/LI|&rT.=#gGv/B ""uu09a̰Q:5 Oh8 L!))5I-)jyȟK =M<ϔ.j.#"Jhl_S$y36B&hXcl6Wq%C] yr 6Z=Ղ*3d/J>]2L@ EAJnM 4*8舛fٙ.ZK/%Ցz/U9k8/e;&P[^6A w'/DŽS 7pԘ^®XbA龍6膛~oV}`_QHf5ePc |j72J2a>{Yus  VXW[u?vlJĤ'*?O8Gþ}N;>ݳ*p-^RO%In:J@:|ZZ,4[Zзۻ1iG XZ oif4Ӄ2&X8!ٹzS^#BR{`SXѺm@ _- ?zx';\A2ྲ j LQP'!D'98`4/ދy.9Q$f`1C C& iUW]5N9W2uϩ f_q\~Gs}Ap.|=]BV< ?\ں+p]^l0` ~?:4W!> ;PVzh[׭ev~I.|'Ӌ6ԉ/`Z=R?>6a|PO&d40 秉z P->"~ RM`)6њւbWp7ch=t]n|ukN/Tַ 6mi^/ųT@2etZPi1bv?tmeog.1DM u@v-f/]ώv?V bql\/84ƀڟ3*Ŏsq3#jyX+Mx>*oh._-,Tں*_pX4S3 ve@V׋0pqc-21~_pD|w3ŷ_tQ|W^yZa g4^ MKRKq*s嬷N ]E[bŠTD:M!6 ~%-PVd2GɊZ-29n nfG <{Pt659嬳=/ iu=MVJ_2zByTE-Cwy:Gq@IDATJ`~y`,}G # #%:vS(IbC@|L]lvs]}v[k~ПN w@u֏U1A >( e%S, Q (U57 *s±7Fnw{z{u3T8h 䈘{".3KN;`,ɝYwn֫ ?INm}OnLh";fʇ)ui9s f"^ ?ȋ8uT;)@Wм:s=c wז*HO~q#-(يsmaF3  <'b]<:WUwacKf [uW?{;!DJ,-'6kA яV"#찴p+ ) XSGzMUOPzI(X$RM5r-EzjhJKhRo]0v#S42V:ŗ6*:Y]>9DigqF1AL¶>C[y <&dt!)U|U ׎Rװ2UiOJEnhB.՛͔1knĵgV ˲z.,p?T4%- -FLJʔ2Tźصh.nd^L1@UZ[ 1&% {,O?Xl4A;f%Q[ou$"^D@bx4M"" 1sh\y#QJ(?SQy>ZuWKQDDxtluj>h{@1{5KWt_) YBk]M\7v"V` HKBG0şUS.m8J%et))k3q-s뢎 i;g1u)Y}lusEg8>d?CPd, ,xĐ餡!jXizȾ1;: ;C(8IN &IU(bjH1;'ƅ+ql>v*1DnnSſ7 D;tsX[YWQK9aoNy0y_c‼8hAOn)$5$N-[) l蒰$v$}{n~'u:c Pf Y >=(} sWK{ի^JTp[ll]1c[N hĭisyqit\&jj fk.=~;邏Nb@i͖ za fNr:R_KVcᢀF Mᅺav)hOVE4Đb;B9TwLWNPKZ,7 e[qD]6fÃag&\*&;gui"SֈL{;ށ^lc+^C9eArf;.%<+bGe/ f "RLɞ UsH0ʿ2@{6)ӯx S3fnnF O6\N[~RrvK ϒdeCL{$aA]؈$e~ -g \d(sG|gZ @xNl|I/E??\cQ}%/\$ G`gsZ\Ҡ,B1s)e0]+K l K|]V]@ M3~inZ'wZq3e*8'8 b=b-F1jw(|1!^bŘ9 O\`}ԍe{ꩧUc@6JG$!%Σ]{-QŹMTQ#cŲ-6@bǺບLG %<͍VB'w!D旌+ˆ^'O^*rчp* ADkULC ];se[,ai!wi~*0SLM?c(1P(7~B\AOl?$ LÉ]Qǚg~8|}Iꐇ;a`|g+cvioxum .B ^{A}6ۤ{OozӛHɰKTIAC0f?G?\3mꖶcU&ZbiuBE.dm9:\%4lm49(>~JԱpn&84 \p@$BgF3C= _`9U_I3ĦFI_x "/ڏ'Xq4@8# ҜL$ccHYЩ8ɑ\6䞙bW/+$QVr`%|bɱbJ)D,fG;"9v#Zmpsm3։h1(jkyzV -؊ty+MoV,9O ]~?N<%`DVx@0[@5D(=r V8,.zZS*!2g1A '.DffҌ]vK {KAGlJĈ*B aR 4Y]eHH?Kc.ڻ +ΥHfLXILtIW:S`xb6H%Lfq_̥wŠY2=H3(zF\BNH1!/ \6h9=&Lf Gꇧ EeZ+Fk(Exdr`vկ )YOY}`2vuq^"L@Iĉօ( ɨ Շ,h&RW +䃿 1>rADq~.kV_ed܁4VЋI5ПtjۈL(-b`캗 /Gs!M5̿S/I}^Wx]ɮjΆ>^11]>p(z';xC>0@ *PAqA E,(7K|EVZe(WB;,*Rd7xq݇ԧ6X4;mlI ^ҝ }ĸLdð`6UXf@Ɩ,/8k|Wb Bmz[zS=a5ax8)GԚ/P9sR rzY2ϣD{d0UNfH}%]bV/˒ᾱ -T̖bw8)>{^,=CkXMjk)X4Ґ~ gܙӉK#:>GWo$=BIZ뛻B/"VT1+5h@mQl͒0Ԋ?0}#qQ"Mo4ۧuBX$qwZ[c{1K3 EbP@/%ЋFO/h5ޙp0;ϴ|f]RTq?%M ^ Hc7ia">2i#GI8 RC2!!WRq2i1 nnz\r GCMco,JE IX!i3{+kr6x@7e yVĕ=* 嗴@ᭇB^ kq< ?#5ǟOKt c -r4:Q<.KwB[K"F5Ejԝ*. xMZ b9ibq_:&i2T/ܤ:Y-= >1+B?V5QDH?6b\;M(IX- +2"A窸*/"O* [/Eྊ@_b"ŐعFA,[vDb7WwM~k %(6~;?>>]*fϰۭdIq ȼOKڋI_4cY:EdQӉ(=ǣR|9b*OUsSGerϫri w}?z\ReZRLXRm4+va2)$N%àgl` GBmfqݡ_Z"8vFWQ0t&bS[G1nVJp̓F(8^^ge=-miRWB *X ,FT+ l Py[UP 11| 8 #DqqqU\ ;RNS4Uwk"] TFXڸ@ hQ Ze!74Q~sԷ*t!Ue80ؠj2:}z`sXz ߆nh vqG}R[VkZ5?_u|{=pKh]OAeu@C!_F6Pb@]Tn[oŊs !Rii^nJl54n}^a. ڄݝCleN ϺxQG,4Ǟu6Ey%/i\X3lښ7i9Ϻt>K{4tMa3,.>HfVB /{?YF{!Ok} ~blV MS}d,NYvr?{̖msb5 &pnL=[=]Xی >G/#ė{@]w]^Bq8mzFN)A8uUԙeq/}.a*\; &#Y |'rp͓zׄFQtCcbP],(lb+Y.Bvtzg$u=$L(>b М 6eLv=lg(mݶ#G/΄.i~yZ/@_Ҷ<RO;,d0m+;kiD? DiC\E%!! 7MjOnhJ%UWP`HZqv2񴻹 O[0fvsQ9i~p_ 9I#U\z;Ӥj\[ni, QrwarG8`Ebۯ)ZoI_(-]00ZW8m\6.w\8 /,IE UMPepԟ믿~~#$P'2~"15c"$\zQe \(ͬte 6+EL nvyKJXB[ s!CkOpGL|S?Glυ1@|$үzMWDfY̰baQT>w] 6W@`ߟvia1q"&"$`:as[8Ƒ D^.h<`$ cJ ֚Lܮmd:(4֥Od>V3Eo4B;եۜb gi>뭷bԟ1_.*HsKWa뼦Qo45qXYIue)ql>Ca7.\pA76;F&'|2܋.dA|D"ȡj#T ?c# pa@Ymի[Пc3 /K!X}pe \5@>Z-`ՋNgp^*8 GC'tTmKѾAve;G(j`G-"9uL:T#;]1?'maʻbOSEX&blQ/KPyx?< erd!Tv'r{.T*z%~{lg_7) /LWrÔ.j̽SN&^Nk45U@dJjXg1h$~\2^:.=]ܪL_yx(eԐ 4˝bsOg~tgDjM7t؀r79BA"u'_uUiv3@'pBiA_:vQlӚDiu45AirjAZ=iQ}14D3!NDLvܗtnL%yEg&di0X!{Q˱x :)Y: {K֘'pdm1"˟Ͻޛ/})Sqf JoCr'D]51h55)ĔwDЏ5b>lH(*R2}+pK|Σ!UY2'<Ƞ-F*a(j?  =.J/)F*x4 ٽ8'.܌~kV'< jvz]kʼtk>0na*,71&?!S!F2ffPZ2RfIqd 5`h>ф/t4W\qx (}UQRЈ!U4b82Yy^_T@Rc^nc׌m]T>׾Vc5gCW~ N1 \Ux w*ы:t>-% A>R)c{͟#-*?v~R&y{^C8^$[.4agfϸɭ;ә6 T$ZdWS/" 㾢 M @H,??svjoؓBTa0-+J얭HaDQPņң"뮻N}'@x}OI\|]\4K|-Ӗժp!D13JI\q۷w/*OmFGF-\%LT: O?֥tWcobkwk1 bLFŀr} s!ˀr..1b"{`6 ne_ʰY* `ݪ5\uc]yKUʔC_H>}bj=6ྉ`>HJa{a|nS+N,qlza2='F7D'4Mt *"^̋'W+ BVbaEȬqdkǬ~oJB/b>nH> X/ {'.Gզˠ$]1Zgu8]ZQ=ep%BJqu`BcI- S1 %;o3аhE!l"2w3A)ԕ!C,&%ЈpU^4G;ܐWqүc" }}8ڑ@ _#Pg9sU؀¿,U(G"r 1O^QQR1JO?6;:Fa2'9$BP Z2/3C-7}|Ӧ?s`RJ_lm<7fM0H2nN(ėimV[6\3_s7>rSiYJBIV?TT6 K3J~=ִӋЊ{l}%0OT>LAESPpD*"Д'9 ^B>_r&dW!Qe&crd2؟, &VBh c!(ĄG~( =#s=2WT r'}vTD瞓BVf^'`*Aغwt&Xݴ/r4s4 [~ dg[`ȌIS1Ჴh"U\$$[2S @l~.צnZc2Qx~pl]m;?m?^4s4 / -̢JcQL>ϑ_2φ%֚z#')LA (r^{̻H3p._6aҮռrõUyjh$\%W3jyMy駏fK='#%\|yyJR\{ C5 h ͷ^xaS'2=>vҺL1tW mj ``E$La\OFXH%kg5-!m.+iާ+oNNIGVcNHїK+S IYEJtZr4LX&d*j.w6B}5vf4f =A(] -Y}7})% 4:,`c#XZmP|UDp"h. dk CQMDlh/槷oUeY)pJbUR*bP҈OvQ8W@ +zd=8e`~sNr'hSkh~!wQB~Q/w}o}[_p$^9v(˯]vldCLDlHf,8- sMw;b;1f2_pq '@H4˜O<5{GjbwN9lJӴ`F|mދ5J+'#av3Թ }(0K+oqKV=y򕯼UPqX5q"""pqrڨAbs ; tv;S ߱k8∴h)$qxTyR8rXTzw5&Kad[7L.*_S/,酲*zX+AeoXvʖ~[;q f3!} CF 6 6:s=&T>mY 92F UT*:ɘ8~_9paa|]?b58%8DGHVBAd}bډ&ӀR[*д|Q:E_W+;y^AXzRBaA(/ȑ(:%Q ʘ^p$ φZHr[_bڨD`r..]IױXbVGS ?asf7r^d qg/K]Q i"xޫ,h=v'eIҚ'H&~r AҺ;O!؍$4'BGȐ ڨtmw9}`9Gg.~Do|c h&9%5iv6}AT4LK-(H/)C)G(;Zt9ty4֊r L>_}c|m喣 zO D;@Gmo ufaXPJ=-Ƀq% Yc8-1HnlΤݍmp ؔ`*To:$}ZNyu S]6 5[tR([z[p_ha*)* ߪ5cK ܉V_}u:\l!N>0,73|yV[Sx 4%_VunZa΀}#.b9!8zj1SEI814L44#q& P#+~sb p2AHU>w2YQU^94|^y۱ +J<^ %SW iV#qBG_W^yDۤbj%gtAR]p_B!-<+îvђ]+c*@ޛ1jmp+K̬j`JaeC1';tIJ#\pF8b h{@y]§(mYQ4ؤPu%OPྴ!=D4ڪv435Z1rPEK-V11N ȗ󡎙^35 i&) Rb@٪We]6>:V 5ƝZ^ ZWWۋ UZ@ؐ hl)lD**Һ)3fmb5# 8|o|_a^~Ҿ&J/8r ,(`R+s@Jdw001W\qjD=暴؝6.% f0D.ϴr8\hH7SH|Жi'U.tyH2kh MTETpʦyY*`Tޟ#1fg#.KW'-U$Lj8\l<-=zF0) OK1i ,u Zh]خ+d|, xaOzgbң K~2ի4 NxI"3IpvXrџwՇܦVߝ^eTh%؀[2 b;i-Dp;o׫a?Qt]p&D*4W.+fW3_ Hog2Hst> &m>VbN^k3Ҋ+9d:1;ʖW2WÙ jȘazuUW-|%'^JzZָ,^k%zcz9ꨣ`y֖lOa_vͷG/࿒4*ްacC/7RXnn 㥷J_{?i&{_Jw3 `?xu/+Oh'.Bunz9kOß@odY@~zYv-VZ{yba-J}u6uq]_sȝww1>p=`:lq}CluGV!;|_?uu 7)9&7yݓ~#Glޚsr'p7}p_{Z #g~ccn<9:fɺS~ZLٖ诡|:Z.[0Vly.U΀R@`i|$Eμ_`=n8&(tSu #n6BC|i SgQys@x!Q'Zlj>[o2+/&UܗC!TF <"҉qC'Z1-wha=s bLD>] `!wohUXLg񄩅uD?DB /t?6G B ?/n;[ Ҏ& b >PI0  -~\1b!>-@R"@xBy6-#9'zLD"!1i h^"plPMC ^{5W\DK/pacL"nTZ㉰t磰YQ煍fBa`ԥtTE: xҿU|QAf AWHzE";16bQɐibƟ!&%iHep%ʻ`ؖ<\3to-r~ՑE' NwR9ؙ9`vX8.21IS&M5?'v/pXуK]Ġ?»cXo3{(W+}JIHwtiPxPO[!t)2htjq CS@#s>\2BtQH[&A48D$L$a >8mƶAٺfMA b 1jM.(I+ՈјFAv[*j!Iֶ#Ü9|_o~!Ι33s< #S X..$}Ɖ:eK[{B .`Z sq42IDATT tR`'ƝVrCqEӟT,]j(X@ؠ]`HGi7acz햫ƾ jMvba{/C(b~fŌ%0kf7cΡgLm\hDǗ s b[{`mdo<'GlRJ_Xw93K=q!,;[TcgB(ܭJޜӧρƠDr ǫM8Հ^'* EbK8xM>fc >ȱ$ـ;'nJk0x?|I8?'*!_~_j`b%"l Y̤gMjs&bP)Y((7,M':@t Θ!X7 hnY:G˼.)gЅ@0 dƦA{}ݢ m3KW]u-,^ dq]2ZQ꿕ˣ'?9G}?C Hւkǩ;+%OUtFyt9\:g:?,lX6G&Ʊ[ՕW^C!bZtGKe E>OjSiğ8Xj\ '{X?쓝Gc_HCJO)Y~ 9w@-n$wډ~v%/mT;t:kR`P`q4s5#:;9٠5&1Psڔq+ S78*kTl 1u&}lII5z5z:;qza&HQ3ؿxW-T r|"=dL%cS~_ge@C5ȕL^xER'2I\o(޽3S7hr)(t/a hN>O\wu=nd}7 7'|nJFs~jM&da&k.(ek0XgW})|5[ n-2Lq)pGwǢ5 6Yj gPv\ fb&GsW\}M7N`t]^}{_5WV 3ɧKh@S@['vh5LKf٠^J0#jJ^Ζ}z1!~'85$+G咜6a$hF:/7!$7;y_-\WVZL<㮐]jOؙ6XEWNmZv#֫2 %þECh%'Y}a@##Ox#X.xb|'B& p )|d|E-)S^]d,~i4NQUr\2}7_p)"q˯k)n}}=JJR`Ig UxdĆq nqUwqGRbk r R6 ^o1pT+!fmS)8S`1Gf d&XBʁ/t(IʰI4qf^7:6IETgFa;cʹ]V/U T FR%I"AN|8 _ѿOSQabʨ24WPu eSQQ4gsO3n+ԨJcx%0G~ ">b, Q HbqF<`'xi;$/Mg=^<ςdS]YMZS)"Ttce<]p1f3R93W t!xXK}5IV l@?q.AYl5y/C*>0;Qʉ(vߓv<'޿g/yS5ql|nY(W =);v1o9×\G.5"=)0o"~%ap谞hz6[cj}Ϟ=twg9RNŭDko97thr Lh)m0T /~'^rk^p6gë&Y}TR`-( ZLhLb&yQu8#+yQk+VNvau;κrH Ag,\% ܹԈ֗dbQD6 3Ĩ5: [++N6 VZ =)Dh\ʉWLP3Jf,=\JAD?+SV ,8>L%ѹ4/ o_$L!} S+A}I#>.3ڔ1+7wb_8R)ȓ:%kX2H\KGԍ ȟVfʼe<3HKQP3xq)IQ K]5v$RweßsHRg!`ڞ3iڱ%g%P.Ufgyf;"cH!mW:*ERBr)@c|s;mpШؖXpK!ɋ쭝O'%~5pZ9ǗvUocѺ,/jR`R@vF9Ǩ2V>}K?YIe2%g8=F0?X.@K5yhrϳy¹61uzhVV ?|'PܡRV},<˳8"ggm2jat8Tʧ/~% #/qm\.9SU~l]R4%-kJur98}$߬qu$||%ܨҠcTӟ4ERyY0~GHV/}<ofsY2H'g/gidrꒈ*^ИoBGVjͤRw3(t\) 0RشQ=+] q-OhK0?f8DE+_qnD&|GE#vzA]}~p QSNC3xE1icw޴;jқf*HPT{lè~ZS)b ŜT'c㠋?`Fo:8h%Dw BQ4[Ql>v6fzSĦ"s<1.Ww #e]Fz}q׃bhymsυꅼW <. x2M'Qe6ظ`!E7X}5822y.k¾;ubУqu^Eo OApٿ?\ hU2_0(wg<ZsiK\ɵoPD?ß 9\tP_YK6ή.&F /n)JYaD;GawBPyc ͱ Z6^Jʧ(ohoZ+ 8ho߾FMygr(Hǒy.N~nq㨟ڹ[s:e~;<0|_PsBpEڸR`e傠3gPG^ +C=+ς%kS)-}(:h믿wF,&|)3l!=_ v HlVcd lJǑ0kY1[LO)bso`n8>杣H  ״~_ .m{.B#KUjN-6SO=Ԭ1יN;Mh/־U -B|UtO$߆1eZ']d{E{!ϛB:FY> |GOLFþ̿gu6ГbȚCwUW1Z%Q@mE5yüo:D՚JuG3 d,>> z}.ǩv,B6:Cyo˘7HD\Ԑ}!:ڼ9'uŹ8*h^Sxo1@\s'#5aKڕBT)S̙p13Jp|xJڽ@䬃b}6 6V_r`0ξrhų76j艧Y T ,l lQߗ% ATm5SjT >mgPt%4p NiR+tmV));˂*jIy2PpK_z1 6`TWqTn[&;gB%r>Hj߈w^䜰v Pcb%wtž6jMr)?Վtzrdue'u_ fȭz㗥yd; Aw \osfRv8V;plf $oBF;;@dneBK͑iƣŁ 5W}*3AXp$\TCmנpY)" 9ZN/uhuQ>&fIv%YY0_OO@d2ȔQ5 8!Q ꅱ+MyJEQO?¬IF d]o!sGkPX_}*"9_iK>yž-ZCU ,gRpS.`UujUC?'x饗ҧ,u{(DRI;|Qc[Jrtl O̒M\7(. `[ϕlB%QI(v_{p{w̴֒ (h'q";v#1Z}ٳ'&^WG)Vص:x/,ؖ6,{ bQއ9裏޳Ly.ھR`;RsmGž3W L@4}l ۾2oɻTwH(E|5ycJ{Qf~GMks > UQ33Ku<}WWCt\VG!(ԅP6Ԍ%Ÿt~np>ی9m쫩M֢gv0HpO$3Mo'L> OCMC@2d0W?>:AuHj('u[|P˂$1B;d~[HhE+5+ YxX8[MRnVLމ|ܛMńEK2]l)WCt".ٌ'LNx[!.;_kiaA@)Jm uBBfX],M:*ey ]JۅpW:>NW2ZذU5盃KK^')JS I:I;LIж^mIlh*\6;?m kh:qXQIhANc39(Vw Qګqnuc%?-frГ7.+ CAY?p6%1jt`A=- Cp-0˯|+O8h>b_BZʠB7\ؿDbrO Ֆ_rnh716wz_uXxax1oP|RqiF@-,)]2w\6b>ϥ'm&W؋~絯}*FK"CL}'$VCXmA餓NFKlkgYC@I$- `+QPf )α3dL7Rn8s0\- (Dh)|>~_ZKbQw>5?FhL3e}x(3%q'8~}j*,I*ttҫҋq/6z[:EGiD5D4\Yy}n)/.&F:2QzjZIEvaA]%_=q6 K,6f'9b(Hr-SQQAgbNF]•}Ӟָuu>uC1s`!o3-s:Lٽ/0ӆ!űNeJ/9 W  '/W3U[E፱XP•M޼2u] ϠCO% ':J2<3+=NEh6ܷo_g}tg 7l'!lNqjj,fɄ-ư(Xce(}V(z=t`!"g>±8jy[}5]-:JkB10e V/}ט؟qZ}Eq4}3ϴ-NE|GQg7R3zPAAx`?PoQ:At qJϢQg>@H9V`XvR$j׫1H٠25)3+51PnLh?/SoRTqgi%Ӛrܟ=sмMcfѪ*CE_oQp &^jK.uvN:eꪫ\|IS>v%a#_GYO’PQ} :GiLka1wB|:`ov :d#TgqFw Ѥ^|7tSpܻw/5*?ѱ8s=8A\_˾q/8º>믿 : Gt|d|Xq%_Sɽi33H"3el9=}ZGis(fVdkC=iOrl:oF!-d9tZo Ѵ e^4݀}m[z`;bUE,;+ y26ڎ48PDiáJ}-Ʈhg_;j:Vz`\?5uSAz[D+<}a%y([LIOL1e&k!ѷi̇?Z=Xd6`n 63U:]bw'ܹ[+{k7N¥R+h1i&+UR3ߢBܸA|2mCҠޚP qoLrO+0wyW_L |^a}GA~1Q쓳y晇z(PtNkk7M?n4p>CtxWa3ߛ2b`w!^GipM(F0ycdS?3>FsI@(wn~nW-oyKv NmgJCk Y=yF[T 2ڽ{9眃#,3}ī Q:˴,v9 XT'X ˨3ȳ>CPN,m{Y%Ba}(fqbg'm.*%n~jOט5Lnt8: 3d#QviLߏ` ="0l]"qDB(YfZf 11̀݌ VyC<"s8L'TՃx5HBB v(13kQ"ևb&ףzz*u}NF}Hl>fZ;N᳥"c(smΧ)C# 8SXk SdO?\{ |ːӜXk"< l\G0t.Fs6˷W.鐪Xv a詘MiH!=RKt*vŃH1:=u@umfMRY˔I3ohN ]1O)sz}攴ey9(J0!%dɽ5~K&nGjgc-E yƻ]8v'FDZ}$EEi"s]TXqCjԦ37m+c?uc9F4{!Eʟ"^eeMP:h١;rHlXO<1Ze60L|@yIt><y/b[->K8΃$[J@(OM3 ݙlFŇQ֓b2|5Rb, Nm@D7(X{ㄬX`tE'0J qIGl'ʒ[Or O?;F[G_mU5LpsX9:c:bc!}`'a棼3t26WJd$iY 4+,IKFw"O2W&coؘg+,<&2!K=/.,_)X|V cH!B7L[35jr q9*;=oZΨM T Ѱuze9 ~bV^FVt,*C,1keF4 .j>;zy ,upy( ^i ǃI,=$dئx*NW}T륏U d;%&l]t&9=av2+(yb֒WEhA( ѭ=m숓7\*Pc_E4@VQHe3a> q1߅|/HAƲx = F'B:SN`_o=#Ӎv.7(4bs>fadl ~, D/ *4 Zr$OLCv1H(21|/*(MYo]@vbeX?ޞ~6Yd^z)R-Eur"o)O>Y3ЉXe\ L~?~ov_uo>g]J=`[hF34^ֲQ^Q׊(ߣO_ƶnkFo5?῍wB}Ϟ=H6yz=w]eb߾eO|O/~i&t=,-e5.o}[}/Ң*Aߋ.=_ß5Fyow ۣx1QePY)P)0o)}TXVv(AB(8؜h|/O{B z[Ww}ɞ&6%ƃ+]r/t"p=^E,۞եy K[bmeA:{yՄE!W%0 $Ʌ b_Y${l;$PլE/ LTJc!Z"⮻$2(i~Q:ɒJte5muV-Z \l[-\8A ]$EK~֣-|/_: LN h~^|/y/%7L-N*f٤# a,:;PdإZc/1w:G@IPF : Αsft(LmdQR⬌bV^Gw*cJCx/zig~/pV~9Zc_|x.H|_cM@w@Y[®Ŭ1uX5 MkҲ/P 2/WQ49uhepˏU~r';ﵿj0'&0-os`Z3q 9r˾}΂u3$& XevK- AO_ZXobBzk:J  jFJ3[8# R䅹D&vkowT3IuY(%%C8 Qu>M}=zuFYGiSoP hS`T wh\pȨ>;翗[b|XA38?S8qa,K~WyRھL^H'6߸ol;P3.d k-y_h YT= ̹>{كFtNYm 9cM46o!/ҼENMԦLnF$vB.|@W⹞+4|:(AVuI[bpd![8Md5^"? &=o>z<.UW|_ʴ @iǻ0.nYG# 2#D ȕ ū,Udʴsc?6]v1}-:Ӆ8)!9}JCQ%~2mv6ue3--:|GdGnCʓ{+DTRWsdnwI80w;fXo@Q"Ba[QڐYA?A ^|}s  <|yu~#zqN>d0>d %h6K¶=oFN|FC/_E*?^H^[:@]3t\fل\2v;/;E':J$vGh_1amfx_6}5\_k}T~8!8Pw;[B>AtpK+_D;2TV1:XxC?Uen2u(E|״UfžǞY-a{R\2:W$+1nۯzի #umK1 D?LU2mX<ʥ%AR/I[*XIǂg4 rގrZK@Hr݃o>L>LeR2Nzk!G7!-<4"d1X}/BmU"VeԐ[1ԧ1TI o`ɩiE+M+^AhÔ9 }g\TE.((P)ѕWULo,?I-CLBh{ J@%mwo|/,0T kB|h]OD'WQXx@D ]ˏh"XnV)?0TC<>̝8\%e(lV<Ψb$In)=}/4{a[Ë^ؗ#7/AV'uV=E::Jw]26_I|!,0v\7geعs[vDo2 lifecyclelifecyclearchivedarchived vctrs/man/figures/lifecycle-defunct.svg0000644000176200001440000000170413505146267017753 0ustar liggesuserslifecyclelifecycledefunctdefunct vctrs/man/figures/vec-count-deps.graffle0000644000176200001440000000545514276722575020050 0ustar liggesusers\[WH~~u:0Ȓ`Ι# K$H}囮`X2LXRu}_WK6~^Mj {^0eu{͕mt?#ߋ^X]oڣZmp{l@Uc"IFZ+R`:‘}l^[a&́=M6W~ڸT7m7}FE{AO]oYhD T6ZÙ N]Lz+9Q胟6$K px(8z9Fk*\DpIFd5zb8 ۽?q2jR`х?mnǷoh@k7*I$S^3@&L q{[wrT3'bW LrњioG¬a总n-&է0..'72stJ=x}o,eSq|Bܦ3Q$%c "huB L_!ƿ|JN¨|ucz3,*m{wnFp/kW7*ʦ! "$%ONC=ּTACfnϾ L/" G]Q X)2ϥn:Y|L4vӫ7! q.}fJ*(iNe=7p(&!6, knb eS$)-cM(Rf̖XH) ̀9# Verqn3%Tm٦m[r!% Z\E_8*@5Ց_v&Y7PEyyTdKA3Q6jrbQQe3DTh~*=uQ 9lv&d0f8MNq/앇V$38Iǎ^ "u7fZ?ʟT>?1- IbCaQfbjF' 7DmYǀ6as@:Df_l N!l'O{<>ɣO,9` %}dZ1 8ۄܟXW3W6/?iiwwW<͟17ŨAc=tzS.L"0XܙS\dvZfs,g?0EGf6w!=7mrk?&3BA6Z6GHF /?e~"L[A!p17u)CHQ#TKlҴ- YJ.m"5ӽpFW='v ^^aSSG?a*cp<}rhC0*!lJn"Ŷ9/XbI{@^z?FQ#ӗ{nyIEI9AF h +_ jhrf;I?Yr?룳~{DJa | 4aAHcgH@cߩ? z7g#'%׌#1I`2/42TTǩ IƑ}v\$*;he% #rEn8ir^~Y7!u(,HğIC 7PKHĤ M,jIդNm3} X `|I3Ka YuܐzC 7't")=*fLX8NL>ɃK2tHm| S tnI&˔.7Fņ~DF 6IFf%֟h!B{9nbS,M 9:B$Xl-+X6 5칛=zF>T KHTud5DYX7S9μ.^oxIT_LV(&zٷ2tk, Yln;0T7EAxkR ;Rnp |7cнTb:3'^읗K&I_0_}Y}V<wWvv{A!aOy@[ko#-nض~msѾ8>?}뻃|.wO6wv1y{;iw?knU?_ܣ)+B7*Qna0zUiuƯ1mH>-A6vƼ mv[g#\G:1|G(@iɜ;y(Z(.U 4]LᓊTk1Aj.9IP'yOSe8!>W!V;AEwdT `w\uQH=YiTl4v'.daOiQ{,. wt5Cm#ƪk?$3t|IR꺾s4 o]A~XLjGP+[09q`WA?U ^ERJ̱NkaoZ4[vctrs/man/figures/sizes.graffle0000644000176200001440000000653014211412552016320 0ustar liggesusers]ks ]kFoN8& )" c3B'.[B:=}Q0{c?%h?'_ޞީ ZPa]Neu-X.u{(TFܸ: pߕ岤`TIu藻B/X]N@I #Tͺ:foNTݿYN:xOޜ+9s=g:)rIA/2䴼):TmtB1;m qYpELPYiժ,'EMu.2|#05_8Բ\ur7VNqŷk)sawJ+s6 }" vwN;^s^WMQy{L[ܮ+'[㙺i "7A۞9[hʑRL"E,G AÁT,cXpxWV,i"| *W ?U#sjo<-UZgnٍ%?0_nGW5i>XZkpy|ȿ^YZOJ}JZ3&[;ip 4DWNDYdQ S;@uPǔ1OT#оzʅM>h!luw-eWhe^:S|(:/QHLy/ˆJQT8VDAlX ]gLвX΢S1^gкZ~ )ۮL+8%EpY `p7-µJ]3vJ.m .;7A'g;1LIE A) /W?Q gВ$ "ا4Ṅ {5HTOiS>l[Y3Ӳ {,HKj E\@]QWjfr(1& 5 R((R+ 3<<1"2/& ߣO#6k<> lA/Ix=> ;pˆ5c)F,DܗT_KPgqL`&3QBd~/=OoMf5ѓmj(JŸbOF?'6>Z !*-]M^_>H4>la+!BO$! \ӳ`)䋊/xW3E ~6lDєXه B9%X8SoJzkwK0B^=}xU \^4Pom|*ΐnvt=ӹ*o ^{`]us=ٸ؈ "H$Qڽesl82r$VF9/A5UYnX@ý׆,{9qq:7] K5!D7mTW4=XQEPEW}6kj#m5Tu5?m v!jq˞j@^m]uK}n2 jOF.˗|WTn+-C>D)nŝІ"zhUHaFؗ4oaO/zޒyRɌJ-f<XA=PU[Yc!L|!>B|vK"sL /-*EW«dlQ-*EE_e-Ȓk^2 v6]Jx,a|ͤ#F>GH"=Loߦakm#2xz8k&>%GoR_$Qh !+^1#Gq,b1@+~h+=GxlڋtJ(i7 Ǒ5mABi8_{F L-٤(KZd(EԪ$/lR}vEl?/Yb8KKLI'dJ؊e;pB0!tN6Dٸ8"d44a)ÃFVJ%@хR2Hzc.>C:`ɶ9Od]!\5=չ4z*dž>DCa:F.9+Ta]8"~S71T|eC2eIh<úW7ѺXH2y|w?":3^HӸr<F|vctrs/man/figures/sizes-recycling.png0000644000176200001440000002012313532250523017450 0ustar liggesusersPNG  IHDRyyajsRGB pHYs.#.#x?v iTXtXML:com.adobe.xmp 2 5 1 2 Ү$IDATx pT M0X"B[S*L”2m(С 86mymZ[*@I0E*K20aBi)`,"CbxIxK]͒=g#gcfwIjjjRn(***//wݸ{ [=/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/@7c/.aT[lihh;vlvvC zdM,}>dʕ.]~׬Y"Cd!h_etXgqkG6F nȚjիWW^d}_g Y_ްa#<5))ɻ+w3fHMMxu:ujܹu ]yMbN#G 2G2!@P[sСaK\m6zӦMׯߺunzɜe˖eee566]vGL4I敞.k;wtfZPP!JΝ7nܜ9sNbD@aa=n]Jp)7sL#SL6l=߿ᄏ`z^HuOs*۫wխNHiV:_'563I 5[ Bf&uz 8D5"$.@Xo'FD$-!@ֈh3DYc"mfX k@@Y#Ldp kDI"`]Nd63I 5[ Bf&uz 8D5"$.@Xo'FD$-!@ֈh3DYc"mfX k@@Y#Ldp kDI"`]NI"&go=l   D$5!x& L )#$EEEkeee 8IY~m֛jH kvy#`V1M5 5R;ϼ0+@֘Rg kzS d3o 5fTFj7fTC@Y#dYo! Uy捀YƬ7*@H+))YrK|~5kD>82 D .+VxgwB 2< ON\zuK,gɌD}YNlذG\H~bf̘꟮SN͝;7;9;d: fN2G2dHv.nLܶPN:th#Ӿ۶m9ӦMׯߺunzɜe˖eee566]vGL4IPd;wtUPP;JΝ7nܜ9sNbqֲ)6nꫯ~뭷˗/gϞ=zT&… =.\8qC\ַ#F۷or.a S."7x @/^?~4 T?q8p@;4bp &7cǎ͚5K?5)Zϟ~x] d$tYe -G'I%$vΎk(Nݻs۷,Ns\C9XR_ޙeYȏD_"EEE΂²1ƳL'&GRQ:0麀k^-xL何5Ñ+>P)PFh6C@Y#Ldap! Txa08*@m4zO~z/%kb TngRum Ro.pYm}&  ߩw~|R-m{TS$'5:YӶ/G =Rb:{:0"]3՝̎TC ds|Z+wبԴuVbwlYm۶-ɓK,3fm6}C!ظf͚}kzwQXXX[[kCtMdˆ]Pe_{֍w_qbE̙3L2eذa߿ᄏ`zz5Ȅq' w7^1567+[X˚O ܪ[i{eۅ5)@'J8vνq]#Ɔ:5򚣭/ܣTpҟWz#kSh \V, lAJ~e^Q\1NNAb!UT{oc־݅kԺgݱ?D jZ|;" _էނOD /OyHx#~7~Nt^=mk5䩋@>H{ީ!*Kؑ(wƟ[#w3hYSLTC =dzFoPݳLgM޽ `K߾}Woܼ/233L.!ה~~\i_RR`lj5o~Gh3 STTT^^WVVfVd%nKœUȄ>8apl2} ekEd]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#d]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#d]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#d]# Ei所]Ʈ?"@H4DYcןH kty"`WOu5R:<+@:R)f kS)dN3O 5vFJ'vTG@Y#얧:[jvw˼jW;fSߝf0.@'`zR/ޮzzJ~k[ޝGtA:vjXqd&0G-B'ݠѫ۰ԒYc ~_ztwz)U_ԛ >-uV=oB1L(~?='Ҷ\sZ&GyRӖ}z 8N =R[XΞ_ٻ˽;ysT` kEgnT=nxZ+wبԴuVbw$`P߹sPPPJΝ7nܜ9sNOV1?dnNPm^>y^TsUgՑӿF|I-æ}񚚚,o71bٳg]sB49Ŀ1p^*or+VӕM s~9p&| U]]=a„CZ|| ;6k,R!ag/}Pp>}uw"6&77ѣ10f̘kرcѢEΣݯO|eժU}{?;xq<\CŧNUy`lJʹc16(aÆoʤI篥};ciG&\VrھU7,z'n6Fٳ?*͛,v~ x͟n_2yac9k<"uuލK27m_/ M&XY3|o~j IwSc*Yx]燍IOOٳSv!W XL&WnU^v5 L7 (xkťYm/9L=jNuwn k-TGW.+jaWOB$`Ym:*u]m=yU5jݳ?Inp..Ԫ?~:AwD'~?˒|L#ּYcޜ]xD~M»YaJB/WeO{kĉY'4n&GjݍzB/+ϫcGnvD krSwm5 jO݅ v; kS՟?qwIc?|BenVY_}ܯ_WUrtة)TŹIo7Wԣ8qGaZ"qř;;)++ Z,$@j6SEYc k5"`QOi 5T(@Xħ4AfX k,SAdf3U, 5) FP*F@Y#LdE|J# HlE">$@j6SEYc k5"`QOi 5T(@Xħ4AfX k,SAdf3U, 5) FP*F@Y#LdE|J# HlE">$`3V5FIENDB`vctrs/man/figures/coerce.png0000644000176200001440000002226213347722504015613 0ustar liggesusersPNG  IHDRm3sRGB pHYs+ iTXtXML:com.adobe.xmp 2 5 1 2 Ү$"@IDATxyUǕMҠ9BI" 2D3=J%JHIA(*Q%Ji4@za$s[ny-Z0b~&Md̘RJŋg(DoFd٫Wp 8w\IM ^x$e˖_|1Nz'% h ϟFTAd/C̙suwȑCf}Ν{ΐ T!C (SOs:0JH*\0U,}'H@%\B6ܹC=$bjժUuAu"ɓtM._<=n 9(0ł\s5v̙Č[z &$ |.]-eCLPjXb^gy:1Ǡ~@y1n:ն1d9Q vIٳ0wBjcGQ@I#Hl!p0Ukn߾aWBd<&`+ke7o22ۆ =z[]Ug%/ch"z/q+iX՛oYf͚I_~ɖ-۰aøQLt9YPr١^+#F]tEC_E (/ŪW_-o~Ie"̨mۚR p#l&LB0E`ѢEH6|F FAH5jILTv']vfB"҅8Xƨ~AO?[F$ӧO6Tu3/RDás{m都|gba~tY߾} Dr١^k8n'N7: yf~b_|IbpI !0ӚsӦM4(*X'Nʔ)c6T"F1&&Z.kuv0'78Åd̢SKr[re/M*[7x+5Ʉ XK1IX3 \.f-aX$[]"sz(-~m6B WX1A2X@Goɴ/ͨ2B"t.r!ra0oƖEAZ l`UV?&L{N]H5kpk𹕝 R#.n#YkF\l!Ot#0B<vڕ&L41p=pvc";DoH. ;AH@*QVQǜS>S[2ͭOMwj&=/RKf ÛA^<ް Wqgȑ# UL:l F^yf>i8'Iu9[Y}*$^Qps@|bN I ?nɁXxa;n}6IY3 ȇ1eﺳ69 Ɂ=;A\^xygGqPo%JA͛7K0&BGyL`GEt:|G*&s~8d9xW}լY ~ʝ;7͆Wei*~gML&#a_~x6Abɇnkw?>h4|H$ Qipl('1ŕQ9L3 I9T9kW2=^A8/%ܔ2ʫNW9"'GoT!?fW̒+@_|ӧts=tP!?P2ʟZql>UF#҂|Cqm a8ߺ)eT\{?} ǦyPqSSF*ک}ڵ|6NTt[oRpC/ ›x7gFwEn+" ijbȧǃ'ϓr->tSʨtx::jԨ͛:?,v馔Qlz{#Fy e˖mԨIIQF%4R5ITPC%q"EU2*2) 4Ta4h+7 &q>|le<͊MDۑ[N2^ۆZ+yæXnQB>-Իo߾/Onc;h )eTrCj߽bmFr$W ]^HIN:Ż<'qSTr;5uT u|#5 :GlٲÞإK^H"OqW!]({cƌ ;ٳdɒgիWwIl{֭v-_qp(HEs )J~/[s5ok9sT\ܹs0'^c/^nj6{%Jp'܉hB"+V Q L2@hr:u9(׃ EL` xhǎ  H?3s+`|sɼhѢ;Ңj֭͛ǁ^3fFš5k6oތͥd0>ӦM9r$ >+hDܹwE+߬YLH*v7}t Ġ':r̝;R4loׯX̙3S:˗<X  U@9Qq7K) d IEyozj:uDT0I3f(= ;,I}I[UhQ\Ӻui! 6B 5kLXl8Dnk֬CIءۿP2W\XeT:vX/_" ^^eիc&pMM81G ƬgΜ1p*9c>s:c#z''p(sfT޽M>}Zj*32L0{ŋKC,FիbŊF8Nnժ XMYSƸ0S/($ls2ʡF荒/ ڹKİ2t♏;FNom# Fr >q6v)(B]&bAqY>bQƇ ^(YȲΌ.\00İ:$PTO 8S 0OLFyNV("!<[!Z V(fZ%{2ʁ >[(3+k~V~'baAGA]`j$ؑ3V퀊(,$9xgF2;m[۠a3ƍC1k6%3>T-eI 'm'EY5J+%g2z:7301جڵk? ,L 56aO&.'XMT°!GF-J  0[)o4dfq2rpGlQbM2q~a ȔbOt/]r$6DL6 ,"{U_rUp42b,_|a#Gf!lv1XV̠Ef۰KjɹcċcZ6'Ico RJIq@ w8ni<]ulْ "vqA6ڃס\ӦM?[ZLcXֳJ1Md|Ν̞C 1( u`4Yu$|6'I~a>L{ؒA'6]yloЏƔrJLyOCۇL3:`?@1aPN19X V} ό].v'OL%Qx26uoj3H9c䷮إ{dWo^ [ٖr~k"_f l%p봐mwc{pr!]~r*XwX$8gN#eW#܆jff]Gcќ0# cЙ^`eǜ~d$1FъdwnzL1\ YqhYG1MW8+^ b 7=6GdHÒ O;d he}1rD[iA)RLXs 3„K#!̤U|eצΔ+}dcnL+tR7ul\҉% 3[ oELX0acc9C;&;Gd&>QY2Tc_5,.A @ N5v!>l2TPNʲ-ɒ/@O`8' 'oЛ |̗V D3J̘8ͱ85O* F 9Jp#:E ax{0"E(;[!J>S2Ͻʨ3$Qg T7E{Ώ )xآ'zE$G0q?QPHxȈBxQ@g}9YC@/ͭ8#rGSw(᥹gQh"e;4"2MU!rVPF9㣩;Q܊3(g|4Up2^[pF@匏*PFKs+(TE(wxinEe3>C@/ͭ8#rGSw(᥹gQh"e;4"2MU!rVPF9㣩;Q܊3(g|4Up2^[pF@匏*.{ׯmQ- a*A*1 Ӣ@ʨH4BeT iQE eT$Ā2*"2*Pb@@xZT@ RF?~]D"aÇ?~4" "eO?=gΜX4EŽ;vzXв@HSHdڴiEM@ E Dɨ,Y_~TR͚53fݻww߾}jբN:si*sUVeΜnݺJ2LfϞ}С 6n8o޼&I@ wdW"E  y'N#G… WZJԫWIҘ1c2et饗VP!_|mڴ! IFɓ'[l!CC /pРAgʔ)Ydɟ?:us%$AQd'"[.P3Çm۶"V58n޼PB[&i&xb7k,|W ^~eٳg{A]vqf͚f˖mذaQD"x!ǫoHrz=yG7Xb0DD5jԨAbGy}TwرΝ;}ѣ*EO`(Ac"zu֊+f͚4z꘎۷㾮*뚊y*<URDʕ]vr[vm6HZ]&?#Qٳgb *kk;f% ߸YN#G4`ݺueʔL95׳zݺuș^b]TlY/_1$3gl`̛7įXqƹsٳ͉͛3L6 ()kFu}ׯ<,X%IM6eڶl2: D':u! ޽{&M؅ϙ3'!![liժI/@5>^똼Ѱܹs׏YaK]tgV$ ^c{CpQê 4Q$ϤItO0ԿG s&$lĕ(Q T'N(P\-χ~hXw~XM 򤅮iưEaMv`曏?;4ElŋDbJ,i(v ۊ"CQ(z7٣c ~zǎyf\)ƤUd`ă|k׮eO:}46mW(gT4^HDJh("'QNh"e[4"2 MS"rWPF9i[Qn('t4Mp2-b_pB࿶iR IENDB`vctrs/man/figures/lifecycle-soft-deprecated.svg0000644000176200001440000000172613505146267021400 0ustar liggesuserslifecyclelifecyclesoft-deprecatedsoft-deprecated vctrs/man/figures/types.graffle0000644000176200001440000003254013370406002016325 0ustar liggesusers}isG_}ૃGG{>tG^|YvG>|aT4G?̦g,Nv@Dl]fqЧt}?|:\.ŰǏx6xEIz2EWqb?R-{# C?tkˋN9Mf^^oI]en*[ѣ {k}ge }/N\7͒gI:z*6b,}OC/>>fxFwp9SqVtO(y壕;/r:K5ctu" pO^}t뫗'/?y翏'?_i~{'?O^i>?9z71dM/ikF39s:xjJPN>Sw6;p Ázؖt]m27;wӳ8G M?mȐ.Ra?%WQqyvy'ϳxn‘Vy:JwǬ2[^1.oZNMo@dw>r_ovO+s=e0ى(4?B#Ϗ{߇1ORw>&ʑ`KΓ|x:9%#xx2KIh|п?&blGm|d%<8LҭIS˩ֆ<+~[-oao^&.͚ lԻ8I?Oi;X=ͽT{?uS[OZ{ۙSoɁ0r[p utfv^G>trPgN{;s긪⾪*}lfYG#kA>9)(悫B7y)4c [F^˷ {!e(#%V+p[8 (A РE\O`l`p aڰ- `Q681>`l`l-lXOnFv/sӶW%Bf$]gEwޯx0PpKF+pCCCGnlJja@ 8c{R6Ko$(\\#.28ܴ~5s08p2{L908p]bA^OZ28pBEL[nn^p{AЖ;ڤW7NrIܮ/ e1(ܷ}+5Щ&vq]Gmu]no ])v (Uن 5{,or݅65GrƅlrD}meƚ7fD`DMymre^Smc}vLҏO|g19`rХiyօkk=##BoGmzm=V.}إA@]xzޤ7Myzޤ{J[٢^tczqz kpP[mQNFz5P ߇|$7@_P>  BhʂC mӶTA5Ua`"yUJÎC^Rұ

:7>k;Ce9.|vrTxTP?R&Jl%{7X,Pլ1LLA@ic9pZsc#'He"SD5@H#K9=uZ"MTW*큘2iE6Jsb"ҒǂB sY...2eLsvU忙V5BNaz l!JGd͂qo~LEKEpNqpRƬC=a|*Be-u[<-sl؜t6XpxB/ƠAg-r(В~L}kz4W AIBein(Ra1$ p W^vR4P,)/3g 'ijϦ_|Mu8G,1u'99'{p;ovL6_g)7 L™}G`f]D9,y!jJ}spח{V)A(AVڒ'/; ge~P:%5D2(RF{Jh2,_+P Jh@u7Z-HusmepBt7ZY^# LQ ޥ퀣G=v¢18SE't߽ Zˈl)b<x5(2zGkºŽ-6C芞Q̍-ɸ@Փj o>j;5HvݕF W:Q(+(K K,i( p p.&P;Eҧj ҃}F"\lv\lv=xQ_aD RZ1e040VWi4S/УxZ!̥0E="exYMs,0m6z!<k(EqA)",z^Q( 0% b6¨^R*'r*g*,=DFKYV9#xJ9d"'mh'(w^edi Ę3g P*.)_}#;1gt>n}M1_}x=PK_=Ĵ젾eJո{߲V(h:SS~;Ut* y7s*OGs>`Yf\`#% 81(P.5b_.92G"Cӕ^''S֛d>~Y oi21 ZS4x[nFySW [ &4yk62"R^a[KJYxS VZkuqnU`DBX DFOI aKuJMҔi6>h}XiZDKB5kZdV zTCxO `jZv{I @ +@pnuPQK"!ԀX Arb "EQDHi@)e<@H` `^6$:b (w^[J-@h)X` XdK 4Ew( R>w W+QЗ@lP g\=+[qIr.i")iJͽW '@ ^VK[Iv#Ei ,'r PQ (sECa[ևNW{X^RV ,d}$`O?5\lT~~Kl#gMi^ڈPQK R".L\l:ɎBH,xm/#EQ`` \$HrcQVAɯ! 冏,@% z!@X,bB[n ,@ɮ NE ڄ%>>AF9A冏nXϨ _V$D*H,v +@"AI-ڔˀ*cD0V Y`V Xdi<OR]O irД oAabJ}HBK:r@x]P,X~ @0.Ҕ+۠@kqXN`9:EDȴ|ε"'p=٫~ uVKފ3o.~F (=W{+2g\>ss}#%†2}{GsF HCXd_2gd^^n|N%z&PgOd>}&L3ZF *(V"-o{6MTFDBZ @BqE ?Mɮ}'n\=|ΆZSJJ:T0.%)5ürッ][㚶_a?8ᄶDN\w`G"hpD_@*a:-zF-E)O!tdh҇V#\wEkW>- Z:Z"q 1@y㱻bUl Bs +ٺ-hbQas]-(>ha|- DZ"Dqrh1RQkDe ӳ`)DIᳰSqE(/DI+-pP= ̂qBYT@3T @ ދ Я`BxAg9XYg"cv"X`Ϫg{,v)Xp,1] =+~ϒE@iCIQCtx1 QAoFSzh!-'؝h-G =;-t{j˥ˍrF/Uw/hѣajkl]V[in E?VZH[gj%[;+X#b4XBw7XV[-N ^'̎VBJMo"KP{)'I{~O%^(:h ZP ^J{Q@=xO2ޓ$c$s$<r_zp1x]7%=[)+{@Dd z}fa]O֏F`waai_Y@O_H<0Ac<,tceqg=ԝlT5ىB|tW8g|035[X";IwǬ_^m͓ /I/i0MY2"r{x2KIh|п?&(-燣8glu}ϒq<̦3x 1 H3p?kY{?6N\O$D=PMYS(o$~ZxSiBu{FT-8 ogt[hl4$Mejc٘|nz6&[Iڔhf+nМ`?۳ApYÎg ٧Dv5m0pҜ4?ڋA 44O1֗ϫTd2"JH3K/"εqOI\,Ʊu8 HDBk= D̖ؒ-[^DV{Prz^Zc=Mю|m󉴞;9 ;9;y'\-qoݓqz2)[Wmcγ) ;yֈr.LSu{P]OY`J2oXLuL ))Z|܅lCG$لQFcccT FMi0] RF|ASH R8K4Lb*T\Jߓaaaj LuaSD*tؔH齛ړ:C u= (;2PPPm9h SoKB((ƨx)KL(Dg JE; R R RmUq@Jc9sBqʰRso랣>۝ATX6t*$@"Z<:dG*G@ v~}؄]£%lZuhV_o=("N;aɱq`8RF"e#rHJ͢THc#!#ql$:, '5*G/(HH,IHbWFbHJ]IHh$ 62FR0еRb"-* ]&Y2q?[J7+ݽR;ՆKRL.wi4>]r)u`k,RUL6HUU2F/og#adDHUU2HjF"UU,sl$d$VCau}FBF"HiP`Emi2pPd*.k}y[rݝig);C_;KƝ^NwZTД<-^!{٣{^ON rH)Kȣ^􎑇wӉ:5ξAAWӕ搦 w׼aaWә~!ב20 vQzL-'e Gww3]C҄ (;J}$ ^/žzwG۸\6b9b^vqXmWn\+{EFximXu /9}e#굍po"֚{3#lFXGؤ#IG`aa6:&muM5+2, (؈}6Ik꽍pF}6H#{ca{aa{aa{aaFXGب#FXG8wюu{ca{aa{aa{aa Bb͊雍IG`aaS=BmDzMZؤX#YbZkM&֚6iMl#5mԚPFXkڨ5FXkڨ5F$kMkMlGzo#H#G}s7)bMFsL7ľÍ&phM2;bH+uiMm&~{a~Dn⇽7懛"{o"!-x6g˯,-p{6tF}K>-?Zһ6G\)R/F!>I ϮHwӷø dU\7 FnxO2k Óqz2hx (0IKd]~'L-Xv(z0KHr~5% }rMy^4ڳ};lqyW-\p!aD JR c;U?&2]lε[srVZ: [=hǾvt˷,Q6xu]UΙl:N⛋#vOB3BjNqq" 8:X~] 㙿.ڼDSH%3:K+Qy Ep hCyFC`_t$,E#[EODP8/W5nG֐fR#l-7Ѷ,2ebtE~g@h<hz,8B0Ļ2HO}HGܠ۩ڃZqڰV[cWp.[>kp -A{X-ghnEԿo-#m׫Z X2"WMAY'T/'Z:4:BRзVx+ -`kW̧ɿ ')~V]K9PP!JBYAS#WgA@+8EWg 'ijϦ_|Mu8G,'99'{p;ovL6_g).787Nnrfd&W_ղջ #L$|J}Rg Rin0/1_b12ﵹ֤5>~7ԑ5 xdAWaAXP DHt XPhL˂ ,(&.(h(E+ A߱IQW,#0Rzg3F^Q((VXQ`E"0<:%\ŦV㺛e^ 4>}-pg BށdaS{,U}r' ,TІP0ty t<44;$<`2d0.9OC9%#⋇'$/ a,r<}~8JJxpxF_> g,l:+98`giR/_ׯDP%OYAAH| r75"ޫ_ۛM^1 tG0pX]^Y!I膝*Z㚾WN{{rMJ:34 cG<^gT$-R;ƫu*)mck)ÁjT MlTn[i+wϻ (-QE"2GAZ[\^ս/& s^QFFJ }}ksI6dsI6dsrS+MƁ7bQ$@ʀ#>e-/ ]X}K:MTm\,4O4Y2K;'/)`yP]ƲSSvMy.NƭE$S(+m>+׆~y7s7 ݘt-缷usI;L1Jb& yM -ۖUwyțdneT7vvDL@Yڏg؎zR/PC4<ǟݯa"CJ!:c@u \u \@I ZΩzR\ǀu \u \u zHj Luch6({{S*2v:0N>3R;AO9NyXd[&){{Fa)R NaB' eSeDȀ"duNz4,Zhɢ%]--[n/!\S C;)*A|N {Vb;Ktl\_4A9ߙ3g{ǚ舢a±_`: bIeDEXm(%y3ge[8+%nk X 3;-CHe[;"KG.#RswꝲJmoNg-Xn[YE40h^1[S|*aⓤ`+H)]>IrnvQq<'V^ǟY&$9W>4i_r >Ț/8&o4#<[oO&i% zc!%ot,X&zՈqBq~/&'FӬ&IM_IiB2څs\fӳtc~,M&`lexO`kCמ eqo%IK^1~:*I1X1;i;Y='d$0Lp't1>B̩ܫBx8K+0&]acgՔ1[&|qڜӽy[ iN=g;joilifecyclelifecyclematuringmaturing vctrs/man/vec_unique.Rd0000644000176200001440000000343414276722575014645 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_unique} \alias{vec_unique} \alias{vec_unique_loc} \alias{vec_unique_count} \title{Find and count unique values} \usage{ vec_unique(x) vec_unique_loc(x) vec_unique_count(x) } \arguments{ \item{x}{A vector (including a data frame).} } \value{ \itemize{ \item \code{vec_unique()}: a vector the same type as \code{x} containing only unique values. \item \code{vec_unique_loc()}: an integer vector, giving locations of unique values. \item \code{vec_unique_count()}: an integer vector of length 1, giving the number of unique values. } } \description{ \itemize{ \item \code{vec_unique()}: the unique values. Equivalent to \code{\link[=unique]{unique()}}. \item \code{vec_unique_loc()}: the locations of the unique values. \item \code{vec_unique_count()}: the number of unique values. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \section{Missing values}{ In most cases, missing values are not considered to be equal, i.e. \code{NA == NA} is not \code{TRUE}. This behaviour would be unappealing here, so these functions consider all \code{NAs} to be equal. (Similarly, all \code{NaN} are also considered to be equal.) } \examples{ x <- rpois(100, 8) vec_unique(x) vec_unique_loc(x) vec_unique_count(x) # `vec_unique()` returns values in the order that encounters them # use sort = "location" to match to the result of `vec_count()` head(vec_unique(x)) head(vec_count(x, sort = "location")) # Normally missing values are not considered to be equal NA == NA # But they are for the purposes of considering uniqueness vec_unique(c(NA, NA, NA, NA, 1, 2, 1)) } \seealso{ \link{vec_duplicate} for functions that work with the dual of unique values: duplicated values. } vctrs/man/vec_is_list.Rd0000644000176200001440000000244014401377400014760 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_is_list} \alias{vec_is_list} \alias{vec_check_list} \title{List checks} \usage{ vec_is_list(x) vec_check_list(x, ..., arg = caller_arg(x), call = caller_env()) } \arguments{ \item{x}{For \verb{vec_*()} functions, an object. For \verb{list_*()} functions, a list.} \item{...}{These dots are for future extensions and must be empty.} \item{arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} These functions have been deprecated as of vctrs 0.6.0. \itemize{ \item \code{vec_is_list()} has been renamed to \code{\link[=obj_is_list]{obj_is_list()}}. \item \code{vec_check_list()} has been renamed to \code{\link[=obj_check_list]{obj_check_list()}}. } } \keyword{internal} vctrs/man/vec-case-and-replace.Rd0000644000176200001440000001335015072256373016330 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/case-when.R \name{vec-case-and-replace} \alias{vec-case-and-replace} \alias{vec_case_when} \alias{vec_replace_when} \title{Recode and replace using logical conditions} \usage{ vec_case_when( conditions, values, ..., default = NULL, unmatched = "default", ptype = NULL, size = NULL, conditions_arg = "conditions", values_arg = "values", default_arg = "default", error_call = current_env() ) vec_replace_when( x, conditions, values, ..., x_arg = "x", conditions_arg = "conditions", values_arg = "values", error_call = current_env() ) } \arguments{ \item{conditions}{A list of logical condition vectors. For \code{vec_case_when()}, each vector should be the same size. For \code{vec_replace_when()}, each vector should be the same size as \code{x}. Where a value in \code{conditions} is \code{TRUE}, the corresponding value in \code{values} will be assigned to the result.} \item{values}{A list of vectors. For \code{vec_case_when()}, each vector should be size 1 or the size implied by \code{conditions}. The common type of \code{values} and \code{default} determine the output type, unless overridden by \code{ptype}. For \code{vec_replace_when()}, each vector should be size 1 or the same size as \code{x}. Each vector will be cast to the type of \code{x}.} \item{...}{These dots are for future extensions and must be empty.} \item{default}{Default value to use when \code{conditions} does not match every location in the output. By default, a missing value is used as the default value. If supplied, \code{default} must be size 1 or the size implied by \code{conditions}. Can only be set when \code{unmatched = "default"}.} \item{unmatched}{Handling of unmatched locations. One of: \itemize{ \item \code{"default"} to use \code{default} in unmatched locations. \item \code{"error"} to error when there are unmatched locations. }} \item{ptype}{An optional override for the output type, which is usually computed as the common type of \code{values} and \code{default}.} \item{size}{An optional override for the output size, which is usually computed as the size of the first element of \code{conditions}. Only useful for requiring a fixed size when \code{conditions} is an empty list.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{x}{A vector.} \item{x_arg, conditions_arg, values_arg, default_arg}{Argument names used in error messages.} } \value{ A vector. \itemize{ \item For \code{vec_case_when()}, the type of the output is computed as the common type of \code{values} and \code{default}, unless overridden by \code{ptype}. The names of the output come from the names of \code{values} and \code{default}. The size of the output comes from the implied size from \code{conditions}, unless overridden by \code{size}. \item For \code{vec_replace_when()}, the type of the output will have the same type as \code{x}. The names of the output will be the same as the names of \code{x}. The size of the output will be the same size as \code{x}. } } \description{ \itemize{ \item \code{vec_case_when()} constructs an entirely new vector by recoding the \code{TRUE} \code{conditions} to their corresponding \code{values}. If there are locations not matched by \code{conditions}, then they are recoded to the \code{default} value. \item \code{vec_replace_when()} updates an existing vector by replacing the values from \code{x} matched by the \code{TRUE} \code{conditions} with their corresponding \code{values}. In this case, each element of \code{values} must have the same type as \code{x} and locations not matched by \code{conditions} retain their original \code{x} value. } \code{vec_case_when()} is often thought of as a way to vectorize multiple if-else statements, and is an R equivalent of the SQL "searched" \verb{CASE WHEN} statement. } \examples{ # Note how the first `TRUE` is used in the output. # Also note how the `NA` falls through to `default`. x <- seq(-2L, 2L, by = 1L) x <- c(x, NA) conditions <- list( x < 0, x < 1 ) values <- list( "<0", "<1" ) vec_case_when( conditions, values, default = "other" ) # Missing values need to be handled with their own case # if you want them to have a special value conditions <- list( x < 0, x < 1, is.na(x) ) values <- list( "<0", "<1", NA ) vec_case_when( conditions, values, default = "other" ) # Both `values` and `default` are vectorized values <- list( x * 5, x * 10, NA ) vec_case_when( conditions, values, default = x * 100 ) # Use `vec_replace_when()` if you need to update `x`, retaining # all previous values in locations that you don't match conditions <- list( x < 0, x < 1 ) values <- list( 0, 1 ) out <- vec_replace_when( x, conditions, values ) out # Note how `vec_replace_when()` is type stable on `x`, we retain the # integer type here even though `values` contained doubles typeof(out) # `vec_case_when()` creates a new vector, so names come from `values` # and `default`. `vec_replace_when()` modifies an existing vector, so # names come from `x` no matter what, just like `[<-` and `base::replace()` x <- c(a = 1, b = 2, c = 3) conditions <- list(x == 1, x == 2) values <- list(c(x = 0), c(y = -1)) vec_case_when(conditions, values) vec_replace_when(x, conditions, values) # If you want to enforce that you've covered all of the locations in your # `conditions`, use `unmatched = "error"` rather than providing a `default` x <- c(0, 1, 2) conditions <- list(x == 1, x == 2) values <- list("a", "b") try(vec_case_when(conditions, values, unmatched = "error")) } vctrs/man/vec_duplicate.Rd0000644000176200001440000000413114276722575015304 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_duplicate} \alias{vec_duplicate} \alias{vec_duplicate_any} \alias{vec_duplicate_detect} \alias{vec_duplicate_id} \title{Find duplicated values} \usage{ vec_duplicate_any(x) vec_duplicate_detect(x) vec_duplicate_id(x) } \arguments{ \item{x}{A vector (including a data frame).} } \value{ \itemize{ \item \code{vec_duplicate_any()}: a logical vector of length 1. \item \code{vec_duplicate_detect()}: a logical vector the same length as \code{x}. \item \code{vec_duplicate_id()}: an integer vector the same length as \code{x}. } } \description{ \itemize{ \item \code{vec_duplicate_any()}: detects the presence of duplicated values, similar to \code{\link[=anyDuplicated]{anyDuplicated()}}. \item \code{vec_duplicate_detect()}: returns a logical vector describing if each element of the vector is duplicated elsewhere. Unlike \code{\link[=duplicated]{duplicated()}}, it reports all duplicated values, not just the second and subsequent repetitions. \item \code{vec_duplicate_id()}: returns an integer vector giving the location of the first occurrence of the value. } } \section{Missing values}{ In most cases, missing values are not considered to be equal, i.e. \code{NA == NA} is not \code{TRUE}. This behaviour would be unappealing here, so these functions consider all \code{NAs} to be equal. (Similarly, all \code{NaN} are also considered to be equal.) } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ vec_duplicate_any(1:10) vec_duplicate_any(c(1, 1:10)) x <- c(10, 10, 20, 30, 30, 40) vec_duplicate_detect(x) # Note that `duplicated()` doesn't consider the first instance to # be a duplicate duplicated(x) # Identify elements of a vector by the location of the first element that # they're equal to: vec_duplicate_id(x) # Location of the unique values: vec_unique_loc(x) # Equivalent to `duplicated()`: vec_duplicate_id(x) == seq_along(x) } \seealso{ \code{\link[=vec_unique]{vec_unique()}} for functions that work with the dual of duplicated values: unique values. } vctrs/man/theory-faq-coercion.Rd0000644000176200001440000002713114511320527016337 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{theory-faq-coercion} \alias{theory-faq-coercion} \title{FAQ - How does coercion work in vctrs?} \description{ This is an overview of the usage of \code{vec_ptype2()} and \code{vec_cast()} and their role in the vctrs coercion mechanism. Related topics: \itemize{ \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. \item For a tutorial about implementing vctrs classes from scratch, see \code{vignette("s3-vector")}. } \subsection{Combination mechanism in vctrs}{ The coercion system in vctrs is designed to make combination of multiple inputs consistent and extensible. Combinations occur in many places, such as row-binding, joins, subset-assignment, or grouped summary functions that use the split-apply-combine strategy. For example: \if{html}{\out{
}}\preformatted{vec_c(TRUE, 1) #> [1] 1 1 vec_c("a", 1) #> Error in `vec_c()`: #> ! Can't combine `..1` and `..2` . vec_rbind( data.frame(x = TRUE), data.frame(x = 1, y = 2) ) #> x y #> 1 1 NA #> 2 1 2 vec_rbind( data.frame(x = "a"), data.frame(x = 1, y = 2) ) #> Error in `vec_rbind()`: #> ! Can't combine `..1$x` and `..2$x` . }\if{html}{\out{
}} One major goal of vctrs is to provide a central place for implementing the coercion methods that make generic combinations possible. The two relevant generics are \code{vec_ptype2()} and \code{vec_cast()}. They both take two arguments and perform \strong{double dispatch}, meaning that a method is selected based on the classes of both inputs. The general mechanism for combining multiple inputs is: \enumerate{ \item Find the common type of a set of inputs by reducing (as in \code{base::Reduce()} or \code{purrr::reduce()}) the \code{vec_ptype2()} binary function over the set. \item Convert all inputs to the common type with \code{vec_cast()}. \item Initialise the output vector as an instance of this common type with \code{vec_init()}. \item Fill the output vector with the elements of the inputs using \code{vec_assign()}. } The last two steps may require \code{vec_proxy()} and \code{vec_restore()} implementations, unless the attributes of your class are constant and do not depend on the contents of the vector. We focus here on the first two steps, which require \code{vec_ptype2()} and \code{vec_cast()} implementations. } \subsection{\code{vec_ptype2()}}{ Methods for \code{vec_ptype2()} are passed two \emph{prototypes}, i.e. two inputs emptied of their elements. They implement two behaviours: \itemize{ \item If the types of their inputs are compatible, indicate which of them is the richer type by returning it. If the types are of equal resolution, return any of the two. \item Throw an error with \code{stop_incompatible_type()} when it can be determined from the attributes that the types of the inputs are not compatible. } \subsection{Type compatibility}{ A type is \strong{compatible} with another type if the values it represents are a subset or a superset of the values of the other type. The notion of “value” is to be interpreted at a high level, in particular it is not the same as the memory representation. For example, factors are represented in memory with integers but their values are more related to character vectors than to round numbers: \if{html}{\out{
}}\preformatted{# Two factors are compatible vec_ptype2(factor("a"), factor("b")) #> factor() #> Levels: a b # Factors are compatible with a character vec_ptype2(factor("a"), "b") #> character(0) # But they are incompatible with integers vec_ptype2(factor("a"), 1L) #> Error: #> ! Can't combine `factor("a")` > and `1L` . }\if{html}{\out{
}} } \subsection{Richness of type}{ Richness of type is not a very precise notion. It can be about richer data (for instance a \code{double} vector covers more values than an integer vector), richer behaviour (a \code{data.table} has richer behaviour than a \code{data.frame}), or both. If you have trouble determining which one of the two types is richer, it probably means they shouldn’t be automatically coercible. Let’s look again at what happens when we combine a factor and a character: \if{html}{\out{
}}\preformatted{vec_ptype2(factor("a"), "b") #> character(0) }\if{html}{\out{
}} The ptype2 method for \verb{} and \verb{>} returns \verb{} because the former is a richer type. The factor can only contain \code{"a"} strings, whereas the character can contain any strings. In this sense, factors are a \emph{subset} of character. Note that another valid behaviour would be to throw an incompatible type error. This is what a strict factor implementation would do. We have decided to be laxer in vctrs because it is easy to inadvertently create factors instead of character vectors, especially with older versions of R where \code{stringsAsFactors} is still true by default. } \subsection{Consistency and symmetry on permutation}{ Each ptype2 method should strive to have exactly the same behaviour when the inputs are permuted. This is not always possible, for example factor levels are aggregated in order: \if{html}{\out{
}}\preformatted{vec_ptype2(factor(c("a", "c")), factor("b")) #> factor() #> Levels: a c b vec_ptype2(factor("b"), factor(c("a", "c"))) #> factor() #> Levels: b a c }\if{html}{\out{
}} In any case, permuting the input should not return a fundamentally different type or introduce an incompatible type error. } \subsection{Coercion hierarchy}{ The classes that you can coerce together form a coercion (or subtyping) hierarchy. Below is a schema of the hierarchy for the base types like integer and factor. In this diagram the directions of the arrows express which type is richer. They flow from the bottom (more constrained types) to the top (richer types). \figure{coerce.png} A coercion hierarchy is distinct from the structural hierarchy implied by memory types and classes. For instance, in a structural hierarchy, factors are built on top of integers. But in the coercion hierarchy they are more related to character vectors. Similarly, subclasses are not necessarily coercible with their superclasses because the coercion and structural hierarchies are separate. } \subsection{Implementing a coercion hierarchy}{ As a class implementor, you have two options. The simplest is to create an entirely separate hierarchy. The date and date-time classes are an example of an S3-based hierarchy that is completely separate. Alternatively, you can integrate your class in an existing hierarchy, typically by adding parent nodes on top of the hierarchy (your class is richer), by adding children node at the root of the hierarchy (your class is more constrained), or by inserting a node in the tree. These coercion hierarchies are \emph{implicit}, in the sense that they are implied by the \code{vec_ptype2()} implementations. There is no structured way to create or modify a hierarchy, instead you need to implement the appropriate coercion methods for all the types in your hierarchy, and diligently return the richer type in each case. The \code{vec_ptype2()} implementations are not transitive nor inherited, so all pairwise methods between classes lying on a given path must be implemented manually. This is something we might make easier in the future. } } \subsection{\code{vec_cast()}}{ The second generic, \code{vec_cast()}, is the one that looks at the data and actually performs the conversion. Because it has access to more information than \code{vec_ptype2()}, it may be stricter and cause an error in more cases. \code{vec_cast()} has three possible behaviours: \itemize{ \item Determine that the prototypes of the two inputs are not compatible. This must be decided in exactly the same way as for \code{vec_ptype2()}. Call \code{stop_incompatible_cast()} if you can determine from the attributes that the types are not compatible. \item Detect incompatible values. Usually this is because the target type is too restricted for the values supported by the input type. For example, a fractional number can’t be converted to an integer. The method should throw an error in that case. \item Return the input vector converted to the target type if all values are compatible. Whereas \code{vec_ptype2()} must return the same type when the inputs are permuted, \code{vec_cast()} is \emph{directional}. It always returns the type of the right-hand side, or dies trying. } } \subsection{Double dispatch}{ The dispatch mechanism for \code{vec_ptype2()} and \code{vec_cast()} looks like S3 but is actually a custom mechanism. Compared to S3, it has the following differences: \itemize{ \item It dispatches on the classes of the first two inputs. \item There is no inheritance of ptype2 and cast methods. This is because the S3 class hierarchy is not necessarily the same as the coercion hierarchy. \item \code{NextMethod()} does not work. Parent methods must be called explicitly if necessary. \item The default method is hard-coded. } } \subsection{Data frames}{ The determination of the common type of data frames with \code{vec_ptype2()} happens in three steps: \enumerate{ \item Match the columns of the two input data frames. If some columns don’t exist, they are created and filled with adequately typed \code{NA} values. \item Find the common type for each column by calling \code{vec_ptype2()} on each pair of matched columns. \item Find the common data frame type. For example the common type of a grouped tibble and a tibble is a grouped tibble because the latter is the richer type. The common type of a data table and a data frame is a data table. } \code{vec_cast()} operates similarly. If a data frame is cast to a target type that has fewer columns, this is an error. If you are implementing coercion methods for data frames, you will need to explicitly call the parent methods that perform the common type determination or the type conversion described above. These are exported as \code{\link[=df_ptype2]{df_ptype2()}} and \code{\link[=df_cast]{df_cast()}}. \subsection{Data frame fallbacks}{ Being too strict with data frame combinations would cause too much pain because there are many data frame subclasses in the wild that don’t implement vctrs methods. We have decided to implement a special fallback behaviour for foreign data frames. Incompatible data frames fall back to a base data frame: \if{html}{\out{
}}\preformatted{df1 <- data.frame(x = 1) df2 <- structure(df1, class = c("foreign_df", "data.frame")) vec_rbind(df1, df2) #> x #> 1 1 #> 2 1 }\if{html}{\out{
}} When a tibble is involved, we fall back to tibble: \if{html}{\out{
}}\preformatted{df3 <- tibble::as_tibble(df1) vec_rbind(df1, df3) #> # A tibble: 2 x 1 #> x #> #> 1 1 #> 2 1 }\if{html}{\out{
}} These fallbacks are not ideal but they make sense because all data frames share a common data structure. This is not generally the case for vectors. For example factors and characters have different representations, and it is not possible to find a fallback time mechanically. However this fallback has a big downside: implementing vctrs methods for your data frame subclass is a breaking behaviour change. The proper coercion behaviour for your data frame class should be specified as soon as possible to limit the consequences of changing the behaviour of your class in R scripts. } } } vctrs/man/obj_is_list.Rd0000644000176200001440000000770315120272011014753 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/assert.R \name{obj_is_list} \alias{obj_is_list} \alias{obj_check_list} \alias{list_all_vectors} \alias{list_check_all_vectors} \alias{list_all_size} \alias{list_check_all_size} \alias{list_all_recyclable} \alias{list_check_all_recyclable} \title{List checks} \usage{ obj_is_list(x) obj_check_list(x, ..., arg = caller_arg(x), call = caller_env()) list_all_vectors(x, ..., allow_null = FALSE) list_check_all_vectors( x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) list_all_size(x, size, ..., allow_null = FALSE) list_check_all_size( x, size, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) list_all_recyclable(x, size, ..., allow_null = FALSE) list_check_all_recyclable( x, size, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env() ) } \arguments{ \item{x}{For \verb{vec_*()} functions, an object. For \verb{list_*()} functions, a list.} \item{...}{These dots are for future extensions and must be empty.} \item{arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{allow_null}{Whether \code{NULL} elements should be skipped over automatically or not.} \item{size}{The size to check each element for compatibility with.} } \description{ \itemize{ \item \code{obj_is_list()} tests if \code{x} is considered a list in the vctrs sense. It returns \code{TRUE} if all of the following hold: \itemize{ \item \code{x} must have list storage, i.e. \code{typeof(x)} returns \code{"list"} \item \code{x} must not have a \code{dim} attribute \item \code{x} must not have a \code{class} attribute, or must explicitly inherit from \code{"list"} as the last class } \item \code{list_all_vectors()} takes a list and returns \code{TRUE} if all elements of that list are vectors. \item \code{list_all_size()} takes a list and returns \code{TRUE} if all elements of that list have the same \code{size}. \item \code{list_all_recyclable()} takes a list and returns \code{TRUE} if all elements of that list can recycle to \code{size}. \item \code{obj_check_list()}, \code{list_check_all_vectors()}, \code{list_check_all_size()}, and \code{list_check_all_recyclable()} use the above functions, but throw a standardized and informative error if they return \code{FALSE}. } } \details{ Notably, data frames and S3 record style classes like POSIXlt are not considered lists. } \examples{ obj_is_list(list()) obj_is_list(list_of(1)) obj_is_list(data.frame()) list_all_vectors(list(1, mtcars)) list_all_vectors(list(1, environment())) list_all_size(list(1:2, 2:3), 2) list_all_size(list(1:2, 2:4), 2) list_all_recyclable(list(1, 2:3), 2) list_all_recyclable(list(1, 2:4), 2) # `list_`-prefixed functions assume a list: try(list_all_vectors(environment())) # `NULL` elements are not considered vectors and generally have a size of 0 try(list_check_all_vectors(list(1, NULL, 2))) try(list_check_all_size(list(1, NULL, 2), size = 1)) # However, it is often useful to perform upfront vector/size checks on a # list, excluding `NULL`s, and then filter them out later on list_check_all_vectors(list(1, NULL, 2), allow_null = TRUE) list_check_all_size(list(1, NULL, 2), size = 1, allow_null = TRUE) # Performing the checks before removing `NULL`s from the list ensures that # any errors report the correct index. Note how the index is incorrect from a # user's point of view if we filter out `NULL` too soon. xs <- list(1, NULL, 2:3) try(list_check_all_size(xs, size = 1, allow_null = TRUE)) xs <- vec_slice(xs, !vec_detect_missing(xs)) try(list_check_all_size(xs, size = 1)) } \seealso{ \code{\link[=list_sizes]{list_sizes()}} } vctrs/man/vec_rank.Rd0000644000176200001440000001236314315060307014251 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rank.R \name{vec_rank} \alias{vec_rank} \title{Compute ranks} \usage{ vec_rank( x, ..., ties = c("min", "max", "sequential", "dense"), incomplete = c("rank", "na"), direction = "asc", na_value = "largest", nan_distinct = FALSE, chr_proxy_collate = NULL ) } \arguments{ \item{x}{A vector} \item{...}{These dots are for future extensions and must be empty.} \item{ties}{Ranking of duplicate values. \itemize{ \item \code{"min"}: Use the current rank for all duplicates. The next non-duplicate value will have a rank incremented by the number of duplicates present. \item \code{"max"}: Use the current rank \code{+ n_duplicates - 1} for all duplicates. The next non-duplicate value will have a rank incremented by the number of duplicates present. \item \code{"sequential"}: Use an increasing sequence of ranks starting at the current rank, applied to duplicates in order of appearance. \item \code{"dense"}: Use the current rank for all duplicates. The next non-duplicate value will have a rank incremented by \code{1}, effectively removing any gaps in the ranking. }} \item{incomplete}{Ranking of missing and \link[=vec_detect_complete]{incomplete} observations. \itemize{ \item \code{"rank"}: Rank incomplete observations normally. Missing values within incomplete observations will be affected by \code{na_value} and \code{nan_distinct}. \item \code{"na"}: Don't rank incomplete observations at all. Instead, they are given a rank of \code{NA}. In this case, \code{na_value} and \code{nan_distinct} have no effect. }} \item{direction}{Direction to sort in. \itemize{ \item A single \code{"asc"} or \code{"desc"} for ascending or descending order respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"asc"} or \code{"desc"}, specifying the direction for each column. }} \item{na_value}{Ordering of missing values. \itemize{ \item A single \code{"largest"} or \code{"smallest"} for ordering missing values as the largest or smallest values respectively. \item For data frames, a length \code{1} or \code{ncol(x)} character vector containing only \code{"largest"} or \code{"smallest"}, specifying how missing values should be ordered within each column. }} \item{nan_distinct}{A single logical specifying whether or not \code{NaN} should be considered distinct from \code{NA} for double and complex vectors. If \code{TRUE}, \code{NaN} will always be ordered between \code{NA} and non-missing numbers.} \item{chr_proxy_collate}{A function generating an alternate representation of character vectors to use for collation, often used for locale-aware ordering. \itemize{ \item If \code{NULL}, no transformation is done. \item Otherwise, this must be a function of one argument. If the input contains a character vector, it will be passed to this function after it has been translated to UTF-8. This function should return a character vector with the same length as the input. The result should sort as expected in the C-locale, regardless of encoding. } For data frames, \code{chr_proxy_collate} will be applied to all character columns. Common transformation functions include: \code{tolower()} for case-insensitive ordering and \code{stringi::stri_sort_key()} for locale-aware ordering.} } \description{ \code{vec_rank()} computes the sample ranks of a vector. For data frames, ranks are computed along the rows, using all columns after the first to break ties. } \details{ Unlike \code{\link[base:rank]{base::rank()}}, when \code{incomplete = "rank"} all missing values are given the same rank, rather than an increasing sequence of ranks. When \code{nan_distinct = FALSE}, \code{NaN} values are given the same rank as \code{NA}, otherwise they are given a rank that differentiates them from \code{NA}. Like \code{\link[=vec_order_radix]{vec_order_radix()}}, ordering is done in the C-locale. This can affect the ranks of character vectors, especially regarding how uppercase and lowercase letters are ranked. See the documentation of \code{\link[=vec_order_radix]{vec_order_radix()}} for more information. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_order_radix]{vec_order_radix()}} \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ x <- c(5L, 6L, 3L, 3L, 5L, 3L) vec_rank(x, ties = "min") vec_rank(x, ties = "max") # Sequential ranks use an increasing sequence for duplicates vec_rank(x, ties = "sequential") # Dense ranks remove gaps between distinct values, # even if there are duplicates vec_rank(x, ties = "dense") y <- c(NA, x, NA, NaN) # Incomplete values match other incomplete values by default, and their # overall position can be adjusted with `na_value` vec_rank(y, na_value = "largest") vec_rank(y, na_value = "smallest") # NaN can be ranked separately from NA if required vec_rank(y, nan_distinct = TRUE) # Rank in descending order. Since missing values are the largest value, # they are given a rank of `1` when ranking in descending order. vec_rank(y, direction = "desc", na_value = "largest") # Give incomplete values a rank of `NA` by setting `incomplete = "na"` vec_rank(y, incomplete = "na") # Can also rank data frames, using columns after the first to break ties z <- c(2L, 3L, 4L, 4L, 5L, 2L) df <- data_frame(x = x, z = z) df vec_rank(df) } vctrs/man/new_list_of.Rd0000644000176200001440000000124015120272011014751 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-list-of.R \name{new_list_of} \alias{new_list_of} \title{Create list_of subclass} \usage{ new_list_of( x = list(), ptype = logical(), size = NULL, ..., class = character() ) } \arguments{ \item{x}{A list} \item{ptype}{The prototype which every element of \code{x} belongs to. If \code{NULL}, the prototype is not specified.} \item{size}{The size which every element of \code{x} has. If \code{NULL}, the size is not specified.} \item{...}{Additional attributes used by subclass} \item{class}{Optional subclass name} } \description{ Create list_of subclass } \keyword{internal} vctrs/man/faq/0000755000176200001440000000000015157552632012750 5ustar liggesusersvctrs/man/faq/user/0000755000176200001440000000000014315060307013712 5ustar liggesusersvctrs/man/faq/user/faq-error-scalar-type.Rmd0000644000176200001440000000305314315060307020477 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` This error occurs when a function expects a vector and gets a scalar object instead. This commonly happens when some code attempts to assign a scalar object as column in a data frame: ```{r, error = TRUE} fn <- function() NULL tibble::tibble(x = fn) fit <- lm(1:3 ~ 1) tibble::tibble(x = fit) ``` # Vectorness in base R and in the tidyverse In base R, almost everything is a vector or behaves like a vector. In the tidyverse we have chosen to be a bit stricter about what is considered a vector. The main question we ask ourselves to decide on the vectorness of a type is whether it makes sense to include that object as a column in a data frame. The main difference is that S3 lists are considered vectors by base R but in the tidyverse that's not the case by default: ```{r, error = TRUE} fit <- lm(1:3 ~ 1) typeof(fit) class(fit) # S3 lists can be subset like a vector using base R: fit[c(1, 4)] # But not in vctrs vctrs::vec_slice(fit, c(1, 4)) ``` Defused function calls are another (more esoteric) example: ```{r, error = TRUE} call <- quote(foo(bar = TRUE, baz = FALSE)) call # They can be subset like a vector using base R: call[1:2] lapply(call, function(x) x) # But not with vctrs: vctrs::vec_slice(call, 1:2) ``` # I get a scalar type error but I think this is a bug It's possible the author of the class needs to do some work to declare their class a vector. Consider reaching out to the author. We have written a [developer FAQ page][howto-faq-fix-scalar-type-error] to help them fix the issue. vctrs/man/faq/user/faq-compatibility-types.Rmd0000644000176200001440000000517614276722575021171 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} stopifnot(rlang::is_installed("dplyr")) ``` Two vectors are __compatible__ when you can safely: - Combine them into one larger vector. - Assign values from one of the vectors into the other vector. Examples of compatible types are integer and double vectors. On the other hand, integer and character vectors are not compatible. # Common type of multiple vectors There are two possible outcomes when multiple vectors of different types are combined into a larger vector: - An incompatible type error is thrown because some of the types are not compatible: ```{r, error = TRUE} df1 <- data.frame(x = 1:3) df2 <- data.frame(x = "foo") dplyr::bind_rows(df1, df2) ``` - The vectors are combined into a vector that has the common type of all inputs. In this example, the common type of integer and logical is integer: ```{r} df1 <- data.frame(x = 1:3) df2 <- data.frame(x = FALSE) dplyr::bind_rows(df1, df2) ``` In general, the common type is the _richer_ type, in other words the type that can represent the most values. Logical vectors are at the bottom of the hierarchy of numeric types because they can only represent two values (not counting missing values). Then come integer vectors, and then doubles. Here is the vctrs type hierarchy for the fundamental vectors: \\figure{coerce.png} # Type conversion and lossy cast errors Type compatibility does not necessarily mean that you can __convert__ one type to the other type. That's because one of the types might support a larger set of possible values. For instance, integer and double vectors are compatible, but double vectors can't be converted to integer if they contain fractional values. When vctrs can't convert a vector because the target type is not as rich as the source type, it throws a lossy cast error. Assigning a fractional number to an integer vector is a typical example of a lossy cast error: ```{r, error = TRUE} int_vector <- 1:3 vec_assign(int_vector, 2, 0.001) ``` # How to make two vector classes compatible? If you encounter two vector types that you think should be compatible, they might need to implement coercion methods. Reach out to the author(s) of the classes and ask them if it makes sense for their classes to be compatible. These developer FAQ items provide guides for implementing coercion methods: - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. vctrs/man/faq/internal/0000755000176200001440000000000015113325071014547 5ustar liggesusersvctrs/man/faq/internal/matches-algorithm.Rmd0000644000176200001440000002456514315060307020640 0ustar liggesusers--- output: html_document editor_options: chunk_output_type: console --- ```{r, child = "../setup.Rmd", include = FALSE} ``` `vec_locate_matches()` is similar to `vec_match()`, but detects _all_ matches by default, and can match on conditions other than equality (like `>=` and `<`). There are also various other arguments to limit or adjust exactly which kinds of matches are returned. Here is an example: ```{r} x <- c("a", "b", "a", "c", "d") y <- c("d", "b", "a", "d", "a", "e") # For each value of `x`, find all matches in `y` # - The "c" in `x` doesn't have a match, so it gets an NA location by default # - The "e" in `y` isn't matched by anything in `x`, so it is dropped by default vec_locate_matches(x, y) ``` # Algorithm description ## Overview and `==` The simplest (approximate) way to think about the algorithm that `df_locate_matches_recurse()` uses is that it sorts both inputs, and then starts at the midpoint in `needles` and uses a binary search to find each needle in `haystack`. Since there might be multiple of the same needle, we find the location of the lower and upper duplicate of that needle to handle all duplicates of that needle at once. Similarly, if there are duplicates of a matching `haystack` value, we find the lower and upper duplicates of the match. If the condition is `==`, that is pretty much all we have to do. For each needle, we then record 3 things: the location of the needle, the location of the lower match in the haystack, and the match size (i.e. `loc_upper_match - loc_lower_match + 1`). This later gets expanded in `expand_compact_indices()` into the actual output. After recording the matches for a single needle, we perform the same procedure on the LHS and RHS of that needle (remember we started on the midpoint needle). i.e. from `[1, loc_needle-1]` and `[loc_needle+1, size_needles]`, again taking the midpoint of those two ranges, finding their respective needle in the haystack, recording matches, and continuing on to the next needle. This iteration proceeds until we run out of needles. When we have a data frame with multiple columns, we add a layer of recursion to this. For the first column, we find the locations of the lower/upper duplicate of the current needle, and we find the locations of the lower/upper matches in the haystack. If we are on the final column in the data frame, we record the matches, otherwise we pass this information on to another call to `df_locate_matches_recurse()`, bumping the column index and using these refined lower/upper bounds as the starting bounds for the next column. I think an example would be useful here, so below I step through this process for a few iterations: ```{r} # these are sorted already for simplicity needles <- data_frame(x = c(1, 1, 2, 2, 2, 3), y = c(1, 2, 3, 4, 5, 3)) haystack <- data_frame(x = c(1, 1, 2, 2, 3), y = c(2, 3, 4, 4, 1)) needles haystack ## Column 1, iteration 1 # start at midpoint in needles # this corresponds to x==2 loc_mid_needles <- 3L # finding all x==2 values in needles gives us: loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 5L # finding matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # compute LHS/RHS bounds for next needle lhs_loc_lower_bound_needles <- 1L # original lower bound lhs_loc_upper_bound_needles <- 2L # lower_duplicate-1 rhs_loc_lower_bound_needles <- 6L # upper_duplicate+1 rhs_loc_upper_bound_needles <- 6L # original upper bound # We still have a 2nd column to check. So recurse and pass on the current # duplicate and match bounds to start the 2nd column with. ## Column 2, iteration 1 # midpoint of [3, 5] # value y==4 loc_mid_needles <- 4L loc_lower_duplicate_needles <- 4L loc_upper_duplicate_needles <- 4L loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # last column, so record matches # - this was location 4 in needles # - lower match in haystack is at loc 3 # - match size is 2 # Now handle LHS and RHS of needle midpoint lhs_loc_lower_bound_needles <- 3L # original lower bound lhs_loc_upper_bound_needles <- 3L # lower_duplicate-1 rhs_loc_lower_bound_needles <- 5L # upper_duplicate+1 rhs_loc_upper_bound_needles <- 5L # original upper bound ## Column 2, iteration 2 (using LHS bounds) # midpoint of [3,3] # value of y==3 loc_mid_needles <- 3L loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 3L # no match! no y==3 in haystack for x==2 # lower-match will always end up > upper-match in this case loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 2L # no LHS or RHS needle values to do, so we are done here ## Column 2, iteration 3 (using RHS bounds) # same as above, range of [5,5], value of y==5, which has no match in haystack ## Column 1, iteration 2 (LHS of first x needle) # Now we are done with the x needles from [3,5], so move on to the LHS and RHS # of that. Here we would do the LHS: # midpoint of [1,2] loc_mid_needles <- 1L # ... ## Column 1, iteration 3 (RHS of first x needle) # midpoint of [6,6] loc_mid_needles <- 6L # ... ``` In the real code, rather than comparing the double values of the columns directly, we replace each column with pseudo "joint ranks" computed between the i-th column of `needles` and the i-th column of `haystack`. It is approximately like doing `vec_rank(vec_c(needles$x, haystack$x), type = "dense")`, then splitting the resulting ranks back up into their corresponding needle/haystack columns. This keeps the recursion code simpler, because we only have to worry about comparing integers. ## Non-equi conditions and containers At this point we can talk about non-equi conditions like `<` or `>=`. The general idea is pretty simple, and just builds on the above algorithm. For example, start with the `x` column from needles/haystack above: ```{r} needles$x haystack$x ``` If we used a condition of `<=`, then we'd do everything the same as before: - Midpoint in needles is location 3, value `x==2` - Find lower/upper duplicates in needles, giving locations `[3, 5]` - Find lower/upper _exact_ match in haystack, giving locations `[3, 4]` At this point, we need to "adjust" the `haystack` match bounds to account for the condition. Since `haystack` is ordered, our "rule" for `<=` is to keep the lower match location the same, but extend the upper match location to the upper bound, so we end up with `[3, 5]`. We know we can extend the upper match location because every haystack value after the exact match should be less than the needle. Then we just record the matches and continue on normally. This approach is really nice, because we only have to exactly match the `needle` in `haystack`. We don't have to compare each needle against every value in `haystack`, which would take a massive amount of time. However, it gets slightly more complex with data frames with multiple columns. Let's go back to our original `needles` and `haystack` data frames and apply the condition `<=` to each column. Here is another worked example, which shows a case where our "rule" falls apart on the second column. ```{r} needles haystack # `condition = c("<=", "<=")` ## Column 1, iteration 1 # x == 2 loc_mid_needles <- 3L loc_lower_duplicate_needles <- 3L loc_upper_duplicate_needles <- 5L # finding exact matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # because haystack is ordered we know we can expand the upper bound automatically # to include everything past the match. i.e. needle of x==2 must be less than # the haystack value at loc 5, which we can check by seeing that it is x==3. loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 5L ## Column 2, iteration 1 # needles range of [3, 5] # y == 4 loc_mid_needles <- 4L loc_lower_duplicate_needles <- 4L loc_upper_duplicate_needles <- 4L # finding exact matches in haystack give us: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 4L # lets try using our rule, which tells us we should be able to extend the upper # bound: loc_lower_match_haystack <- 3L loc_upper_match_haystack <- 5L # but the haystack value of y at location 5 is y==1, which is not less than y==4 # in the needles! looks like our rule failed us. ``` If you read through the above example, you'll see that the rule didn't work here. The problem is that while `haystack` is ordered (by `vec_order()`s standards), each column isn't ordered _independently_ of the others. Instead, each column is ordered within the "group" created by previous columns. Concretely, `haystack` here has an ordered `x` column, but if you look at `haystack$y` by itself, it isn't ordered (because of that 1 at the end). That is what causes the rule to fail. ```{r} haystack ``` To fix this, we need to create haystack "containers" where the values within each container are all _totally_ ordered. For `haystack` that would create 2 containers and look like: ``` r haystack[1:4,] #> # A tibble: 4 × 2 #> x y #> #> 1 1 2 #> 2 1 3 #> 3 2 4 #> 4 2 4 haystack[5,] #> # A tibble: 1 × 2 #> x y #> #> 1 3 1 ``` This is essentially what `computing_nesting_container_ids()` does. You can actually see these ids with the helper, `compute_nesting_container_info()`: ```{r} haystack2 <- haystack # we really pass along the integer ranks, but in this case that is equivalent # to converting our double columns to integers haystack2$x <- as.integer(haystack2$x) haystack2$y <- as.integer(haystack2$y) info <- compute_nesting_container_info(haystack2, condition = c("<=", "<=")) # the ids are in the second slot. # container ids break haystack into [1, 4] and [5, 5]. info[[2]] ``` So the idea is that for each needle, we look in each haystack container and find all the matches, then we aggregate all of the matches once at the end. `df_locate_matches_with_containers()` has the job of iterating over the containers. Computing totally ordered containers can be expensive, but luckily it doesn't happen very often in normal usage. - If there are all `==` conditions, we don't need containers (i.e. any equi join) - If there is only 1 non-equi condition and no conditions after it, we don't need containers (i.e. most rolling joins) - Otherwise the typical case where we need containers is if we have something like `date >= lower, date <= upper`. Even so, the computation cost generally scales with the number of columns in `haystack` you compute containers with (here 2), and it only really slows down around 4 columns or so, which I haven't ever seen a real life example of. vctrs/man/faq/internal/ptype2-identity.Rmd0000644000176200001440000000747615113325071020303 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ## Promotion monoid Promotions (i.e. automatic coercions) should always transform inputs to their richer type to avoid losing values of precision. `vec_ptype2()` returns the _richer_ type of two vectors, or throws an incompatible type error if none of the two vector types include the other. For example, the richer type of integer and double is the latter because double covers a larger range of values than integer. `vec_ptype2()` is a [monoid](https://en.wikipedia.org/wiki/Monoid) over vectors, which in practical terms means that it is a well behaved operation for [reduction](https://purrr.tidyverse.org/reference/reduce.html). Reduction is an important operation for promotions because that is how the richer type of multiple elements is computed. As a monoid, `vec_ptype2()` needs an identity element, i.e. a value that doesn't change the result of the reduction. vctrs has two identity values, `NULL` and __unspecified__ vectors. ## The `NULL` identity As an identity element that shouldn't influence the determination of the common type of a set of vectors, `NULL` is promoted to any type: ```{r} vec_ptype2(NULL, "") vec_ptype2(1L, NULL) ``` The common type of `NULL` and `NULL` is the identity `NULL`: ```{r} vec_ptype2(NULL, NULL) ``` This way the result of `vec_ptype2(NULL, NULL)` does not influence subsequent promotions: ```{r} vec_ptype2( vec_ptype2(NULL, NULL), "" ) ``` ## Unspecified vectors In the vctrs coercion system, logical vectors of missing values are also automatically promoted to the type of any other vector, just like `NULL`. We call these vectors unspecified. The special coercion semantics of unspecified vectors serve two purposes: 1. It makes it possible to assign vectors of `NA` inside any type of vectors, even when they are not coercible with logical: ```{r} x <- letters[1:5] vec_assign(x, 1:2, c(NA, NA)) ``` 2. We can't put `NULL` in a data frame, so we need an identity element that behaves more like a vector. Logical vectors of `NA` seem a natural fit for this. Unspecified vectors are thus promoted to any other type, just like `NULL`: ```{r} vec_ptype2(NA, "") vec_ptype2(1L, c(NA, NA)) ``` ## Finalising common types vctrs has an internal vector type of class `vctrs_unspecified`. Users normally don't see such vectors in the wild, but they do come up when taking the common type of an unspecified vector with another identity value: ```{r} vec_ptype2(NA, NA) vec_ptype2(NA, NULL) vec_ptype2(NULL, NA) ``` We can't return `NA` here because `vec_ptype2()` normally returns empty vectors. We also can't return `NULL` because unspecified vectors need to be recognised as logical vectors if they haven't been promoted at the end of the reduction. ```{r} vec_ptype_finalise(vec_ptype2(NULL, NA)) ``` See the output of `vec_ptype_common()` which performs the reduction and finalises the type, ready to be used by the caller: ```{r} vec_ptype_common(NULL, NULL) vec_ptype_common(NA, NULL) ``` `vec_ptype_finalise()` is an S3 generic, but the only time you should ever need to write an S3 method for it is if your class _wraps_ another vector in some way and needs special handling to propagate the default finalisation. For example, the ivs package contains an interval class that wraps `start` and `end` vectors of the same type and has a `vec_ptype_finalize()` method that finalises those wrapped vectors: ```{r, eval = FALSE} vec_ptype_finalise.ivs_iv <- function(x, ...) { start <- unclass(x)[[1L]] ptype <- vec_ptype_finalise(start, ...) new_bare_iv(ptype, ptype) } ``` This ensures that `vec_ptype_finalise(vec_ptype(ivs::iv(NA, NA)))` correctly finalises to `>` rather than `>`. Note that data frames are already recursively finalised, so you don't need a `vec_ptype_finalise()` method for a data frame subclass. vctrs/man/faq/setup.Rmd0000644000176200001440000000026114315060307014537 0ustar liggesusers ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options( cli.unicode = FALSE, rlang_call_format_srcrefs = FALSE ) library(vctrs) ``` vctrs/man/faq/developer/0000755000176200001440000000000015132161317014722 5ustar liggesusersvctrs/man/faq/developer/links-coercion.Rmd0000644000176200001440000000100714276722575020324 0ustar liggesusers # Implementing coercion methods - For an overview of how these generics work and their roles in vctrs, see [`?theory-faq-coercion`][theory-faq-coercion]. - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. - For a tutorial about implementing vctrs classes from scratch, see `vignette("s3-vector")`. vctrs/man/faq/developer/snippet-roxy-workflow.Rmd0000644000176200001440000000122414276722575021737 0ustar liggesusers To implement methods for generics, first import the generics in your namespace and redocument: ```{r, eval = FALSE} #' @importFrom vctrs vec_ptype2 vec_cast NULL ``` Note that for each batches of methods that you add to your package, you need to export the methods and redocument immediately, even during development. Otherwise they won't be in scope when you run unit tests e.g. with testthat. Implementing double dispatch methods is very similar to implementing regular S3 methods. In these examples we are using roxygen2 tags to register the methods, but you can also register the methods manually in your NAMESPACE file or lazily with `s3_register()`. vctrs/man/faq/developer/howto-faq-fix-scalar-type-error.Rmd0000644000176200001440000000445114376223321023437 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} stopifnot(rlang::is_installed("dplyr")) ``` The tidyverse is a bit stricter than base R regarding what kind of objects are considered as vectors (see the [user FAQ][faq-error-scalar-type] about this topic). Sometimes vctrs won't treat your class as a vector when it should. ## Why isn't my list class considered a vector? By default, S3 lists are not considered to be vectors by vctrs: ```{r} my_list <- structure(list(), class = "my_class") vctrs::vec_is(my_list) ``` To be treated as a vector, the class must either inherit from `"list"` explicitly: ```{r} my_explicit_list <- structure(list(), class = c("my_class", "list")) vctrs::vec_is(my_explicit_list) ``` Or it should implement a `vec_proxy()` method that returns its input if explicit inheritance is not possible or troublesome: ```{r} #' @export vec_proxy.my_class <- function(x, ...) x vctrs::vec_is(my_list) ``` Note that explicit inheritance is the preferred way because this makes it possible for your class to dispatch on `list` methods of S3 generics: ```{r, error = TRUE} my_generic <- function(x) UseMethod("my_generic") my_generic.list <- function(x) "dispatched!" my_generic(my_list) my_generic(my_explicit_list) ``` ## Why isn't my data frame class considered a vector? The most likely explanation is that the data frame has not been properly constructed. However, if you get an "Input must be a vector" error with a data frame subclass, it probably means that the data frame has not been properly constructed. The main cause of these errors are data frames whose _base class_ is not `"data.frame"`: ```{r, error = TRUE} my_df <- data.frame(x = 1) class(my_df) <- c("data.frame", "my_class") vctrs::obj_check_vector(my_df) ``` This is problematic as many tidyverse functions won't work properly: ```{r, error = TRUE} dplyr::slice(my_df, 1) ``` It is generally not appropriate to declare your class to be a superclass of another class. We generally consider this undefined behaviour (UB). To fix these errors, you can simply change the construction of your data frame class so that `"data.frame"` is a base class, i.e. it should come last in the class vector: ```{r} class(my_df) <- c("my_class", "data.frame") vctrs::obj_check_vector(my_df) dplyr::slice(my_df, 1) ``` vctrs/man/faq/developer/theory-coercion.Rmd0000644000176200001440000002400014315060307020472 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` This is an overview of the usage of `vec_ptype2()` and `vec_cast()` and their role in the vctrs coercion mechanism. Related topics: - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. - For a tutorial about implementing vctrs classes from scratch, see `vignette("s3-vector")`. ## Combination mechanism in vctrs The coercion system in vctrs is designed to make combination of multiple inputs consistent and extensible. Combinations occur in many places, such as row-binding, joins, subset-assignment, or grouped summary functions that use the split-apply-combine strategy. For example: ```{r, error = TRUE} vec_c(TRUE, 1) vec_c("a", 1) vec_rbind( data.frame(x = TRUE), data.frame(x = 1, y = 2) ) vec_rbind( data.frame(x = "a"), data.frame(x = 1, y = 2) ) ``` One major goal of vctrs is to provide a central place for implementing the coercion methods that make generic combinations possible. The two relevant generics are `vec_ptype2()` and `vec_cast()`. They both take two arguments and perform __double dispatch__, meaning that a method is selected based on the classes of both inputs. The general mechanism for combining multiple inputs is: 1. Find the common type of a set of inputs by reducing (as in `base::Reduce()` or `purrr::reduce()`) the `vec_ptype2()` binary function over the set. 2. Convert all inputs to the common type with `vec_cast()`. 3. Initialise the output vector as an instance of this common type with `vec_init()`. 4. Fill the output vector with the elements of the inputs using `vec_assign()`. The last two steps may require `vec_proxy()` and `vec_restore()` implementations, unless the attributes of your class are constant and do not depend on the contents of the vector. We focus here on the first two steps, which require `vec_ptype2()` and `vec_cast()` implementations. ## `vec_ptype2()` Methods for `vec_ptype2()` are passed two _prototypes_, i.e. two inputs emptied of their elements. They implement two behaviours: * If the types of their inputs are compatible, indicate which of them is the richer type by returning it. If the types are of equal resolution, return any of the two. * Throw an error with `stop_incompatible_type()` when it can be determined from the attributes that the types of the inputs are not compatible. ### Type compatibility A type is __compatible__ with another type if the values it represents are a subset or a superset of the values of the other type. The notion of "value" is to be interpreted at a high level, in particular it is not the same as the memory representation. For example, factors are represented in memory with integers but their values are more related to character vectors than to round numbers: ```{r, error = TRUE} # Two factors are compatible vec_ptype2(factor("a"), factor("b")) # Factors are compatible with a character vec_ptype2(factor("a"), "b") # But they are incompatible with integers vec_ptype2(factor("a"), 1L) ``` ### Richness of type Richness of type is not a very precise notion. It can be about richer data (for instance a `double` vector covers more values than an integer vector), richer behaviour (a `data.table` has richer behaviour than a `data.frame`), or both. If you have trouble determining which one of the two types is richer, it probably means they shouldn't be automatically coercible. Let's look again at what happens when we combine a factor and a character: ```{r} vec_ptype2(factor("a"), "b") ``` The ptype2 method for `` and `>` returns `` because the former is a richer type. The factor can only contain `"a"` strings, whereas the character can contain any strings. In this sense, factors are a _subset_ of character. Note that another valid behaviour would be to throw an incompatible type error. This is what a strict factor implementation would do. We have decided to be laxer in vctrs because it is easy to inadvertently create factors instead of character vectors, especially with older versions of R where `stringsAsFactors` is still true by default. ### Consistency and symmetry on permutation Each ptype2 method should strive to have exactly the same behaviour when the inputs are permuted. This is not always possible, for example factor levels are aggregated in order: ```{r} vec_ptype2(factor(c("a", "c")), factor("b")) vec_ptype2(factor("b"), factor(c("a", "c"))) ``` In any case, permuting the input should not return a fundamentally different type or introduce an incompatible type error. ### Coercion hierarchy The classes that you can coerce together form a coercion (or subtyping) hierarchy. Below is a schema of the hierarchy for the base types like integer and factor. In this diagram the directions of the arrows express which type is richer. They flow from the bottom (more constrained types) to the top (richer types). \\figure{coerce.png} A coercion hierarchy is distinct from the structural hierarchy implied by memory types and classes. For instance, in a structural hierarchy, factors are built on top of integers. But in the coercion hierarchy they are more related to character vectors. Similarly, subclasses are not necessarily coercible with their superclasses because the coercion and structural hierarchies are separate. ### Implementing a coercion hierarchy As a class implementor, you have two options. The simplest is to create an entirely separate hierarchy. The date and date-time classes are an example of an S3-based hierarchy that is completely separate. Alternatively, you can integrate your class in an existing hierarchy, typically by adding parent nodes on top of the hierarchy (your class is richer), by adding children node at the root of the hierarchy (your class is more constrained), or by inserting a node in the tree. These coercion hierarchies are _implicit_, in the sense that they are implied by the `vec_ptype2()` implementations. There is no structured way to create or modify a hierarchy, instead you need to implement the appropriate coercion methods for all the types in your hierarchy, and diligently return the richer type in each case. The `vec_ptype2()` implementations are not transitive nor inherited, so all pairwise methods between classes lying on a given path must be implemented manually. This is something we might make easier in the future. ## `vec_cast()` The second generic, `vec_cast()`, is the one that looks at the data and actually performs the conversion. Because it has access to more information than `vec_ptype2()`, it may be stricter and cause an error in more cases. `vec_cast()` has three possible behaviours: - Determine that the prototypes of the two inputs are not compatible. This must be decided in exactly the same way as for `vec_ptype2()`. Call `stop_incompatible_cast()` if you can determine from the attributes that the types are not compatible. - Detect incompatible values. Usually this is because the target type is too restricted for the values supported by the input type. For example, a fractional number can't be converted to an integer. The method should throw an error in that case. - Return the input vector converted to the target type if all values are compatible. Whereas `vec_ptype2()` must return the same type when the inputs are permuted, `vec_cast()` is _directional_. It always returns the type of the right-hand side, or dies trying. ## Double dispatch The dispatch mechanism for `vec_ptype2()` and `vec_cast()` looks like S3 but is actually a custom mechanism. Compared to S3, it has the following differences: * It dispatches on the classes of the first two inputs. * There is no inheritance of ptype2 and cast methods. This is because the S3 class hierarchy is not necessarily the same as the coercion hierarchy. * `NextMethod()` does not work. Parent methods must be called explicitly if necessary. * The default method is hard-coded. ## Data frames The determination of the common type of data frames with `vec_ptype2()` happens in three steps: 1. Match the columns of the two input data frames. If some columns don't exist, they are created and filled with adequately typed `NA` values. 2. Find the common type for each column by calling `vec_ptype2()` on each pair of matched columns. 3. Find the common data frame type. For example the common type of a grouped tibble and a tibble is a grouped tibble because the latter is the richer type. The common type of a data table and a data frame is a data table. `vec_cast()` operates similarly. If a data frame is cast to a target type that has fewer columns, this is an error. If you are implementing coercion methods for data frames, you will need to explicitly call the parent methods that perform the common type determination or the type conversion described above. These are exported as [df_ptype2()] and [df_cast()]. ### Data frame fallbacks Being too strict with data frame combinations would cause too much pain because there are many data frame subclasses in the wild that don't implement vctrs methods. We have decided to implement a special fallback behaviour for foreign data frames. Incompatible data frames fall back to a base data frame: ```{r} df1 <- data.frame(x = 1) df2 <- structure(df1, class = c("foreign_df", "data.frame")) vec_rbind(df1, df2) ``` When a tibble is involved, we fall back to tibble: ```{r} df3 <- tibble::as_tibble(df1) vec_rbind(df1, df3) ``` These fallbacks are not ideal but they make sense because all data frames share a common data structure. This is not generally the case for vectors. For example factors and characters have different representations, and it is not possible to find a fallback time mechanically. However this fallback has a big downside: implementing vctrs methods for your data frame subclass is a breaking behaviour change. The proper coercion behaviour for your data frame class should be specified as soon as possible to limit the consequences of changing the behaviour of your class in R scripts. vctrs/man/faq/developer/howto-coercion-data-frame.Rmd0000644000176200001440000003051714276722575022353 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} old_warn_on_fallback <- options(`vctrs:::warn_on_fallback` = FALSE) knitr_defer(options(old_warn_on_fallback)) ``` This guide provides a practical recipe for implementing `vec_ptype2()` and `vec_cast()` methods for coercions of data frame subclasses. Related topics: - For an overview of the coercion mechanism in vctrs, see [`?theory-faq-coercion`][theory-faq-coercion]. - For an example of implementing coercion methods for simple vectors, see [`?howto-faq-coercion`][howto-faq-coercion]. Coercion of data frames occurs when different data frame classes are combined in some way. The two main methods of combination are currently row-binding with [vec_rbind()] and col-binding with [vec_cbind()] (which are in turn used by a number of dplyr and tidyr functions). These functions take multiple data frame inputs and automatically coerce them to their common type. vctrs is generally strict about the kind of automatic coercions that are performed when combining inputs. In the case of data frames we have decided to be a bit less strict for convenience. Instead of throwing an incompatible type error, we fall back to a base data frame or a tibble if we don't know how to combine two data frame subclasses. It is still a good idea to specify the proper coercion behaviour for your data frame subclasses as soon as possible. We will see two examples in this guide. The first example is about a data frame subclass that has no particular attributes to manage. In the second example, we implement coercion methods for a tibble subclass that includes potentially incompatible attributes. ## Roxygen workflow ```{r, child = "snippet-roxy-workflow.Rmd"} ``` ## Parent methods Most of the common type determination should be performed by the parent class. In vctrs, double dispatch is implemented in such a way that you need to call the methods for the parent class manually. For `vec_ptype2()` this means you need to call `df_ptype2()` (for data frame subclasses) or `tib_ptype2()` (for tibble subclasses). Similarly, `df_cast()` and `tib_cast()` are the workhorses for `vec_cast()` methods of subtypes of `data.frame` and `tbl_df`. These functions take the union of the columns in `x` and `y`, and ensure shared columns have the same type. These functions are much less strict than `vec_ptype2()` and `vec_cast()` as they accept any subclass of data frame as input. They always return a `data.frame` or a `tbl_df`. You will probably want to write similar functions for your subclass to avoid repetition in your code. You may want to export them as well if you are expecting other people to derive from your class. ## A `data.table` example ```{r, include = FALSE} delayedAssign("as.data.table", { if (is_installed("data.table")) { env_get(ns_env("data.table"), "as.data.table") } else { function(...) abort("`data.table` must be installed.") } }) delayedAssign("data.table", { if (is_installed("data.table")) { env_get(ns_env("data.table"), "data.table") } else { function(...) abort("`data.table` must be installed.") } }) ``` This example is the actual implementation of vctrs coercion methods for `data.table`. This is a simple example because we don't have to keep track of attributes for this class or manage incompatibilities. See the tibble section for a more complicated example. We first create the `dt_ptype2()` and `dt_cast()` helpers. They wrap around the parent methods `df_ptype2()` and `df_cast()`, and transform the common type or converted input to a data table. You may want to export these helpers if you expect other packages to derive from your data frame class. These helpers should always return data tables. To this end we use the conversion generic `as.data.table()`. Depending on the tools available for the particular class at hand, a constructor might be appropriate as well. ```{r} dt_ptype2 <- function(x, y, ...) { as.data.table(df_ptype2(x, y, ...)) } dt_cast <- function(x, to, ...) { as.data.table(df_cast(x, to, ...)) } ``` We start with the self-self method: ```{r} #' @export vec_ptype2.data.table.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } ``` Between a data frame and a data table, we consider the richer type to be data table. This decision is not based on the value coverage of each data structures, but on the idea that data tables have richer behaviour. Since data tables are the richer type, we call `dt_type2()` from the `vec_ptype2()` method. It always returns a data table, no matter the order of arguments: ```{r} #' @export vec_ptype2.data.table.data.frame <- function(x, y, ...) { dt_ptype2(x, y, ...) } #' @export vec_ptype2.data.frame.data.table <- function(x, y, ...) { dt_ptype2(x, y, ...) } ``` The `vec_cast()` methods follow the same pattern, but note how the method for coercing to data frame uses `df_cast()` rather than `dt_cast()`. Also, please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents `to`, whereas the second class represents `x`. ```{r} #' @export vec_cast.data.table.data.table <- function(x, to, ...) { dt_cast(x, to, ...) } #' @export vec_cast.data.table.data.frame <- function(x, to, ...) { # `x` is a data.frame to be converted to a data.table dt_cast(x, to, ...) } #' @export vec_cast.data.frame.data.table <- function(x, to, ...) { # `x` is a data.table to be converted to a data.frame df_cast(x, to, ...) } ``` With these methods vctrs is now able to combine data tables with data frames: ```{r} vec_cbind(data.frame(x = 1:3), data.table(y = "foo")) ``` ## A tibble example In this example we implement coercion methods for a tibble subclass that carries a colour as a scalar metadata: ```{r} # User constructor my_tibble <- function(colour = NULL, ...) { new_my_tibble(tibble::tibble(...), colour = colour) } # Developer constructor new_my_tibble <- function(x, colour = NULL) { stopifnot(is.data.frame(x)) tibble::new_tibble( x, colour = colour, class = "my_tibble", nrow = nrow(x) ) } df_colour <- function(x) { if (inherits(x, "my_tibble")) { attr(x, "colour") } else { NULL } } #'@export print.my_tibble <- function(x, ...) { cat(sprintf("<%s: %s>\n", class(x)[[1]], df_colour(x))) cli::cat_line(format(x)[-1]) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("base::print", "my_tibble") ``` This subclass is very simple. All it does is modify the header. ```{r} red <- my_tibble("red", x = 1, y = 1:2) red red[2] green <- my_tibble("green", z = TRUE) green ``` Combinations do not work properly out of the box, instead vctrs falls back to a bare tibble: ```{r} vec_rbind(red, tibble::tibble(x = 10:12)) ``` Instead of falling back to a data frame, we would like to return a `` when combined with a data frame or a tibble. Because this subclass has more metadata than normal data frames (it has a colour), it is a _supertype_ of tibble and data frame, i.e. it is the richer type. This is similar to how a grouped tibble is a more general type than a tibble or a data frame. Conceptually, the latter are pinned to a single constant group. The coercion methods for data frames operate in two steps: - They check for compatible subclass attributes. In our case the tibble colour has to be the same, or be undefined. - They call their parent methods, in this case [tib_ptype2()] and [tib_cast()] because we have a subclass of tibble. This eventually calls the data frame methods [df_ptype2()] and [tib_ptype2()] which match the columns and their types. This process should usually be wrapped in two functions to avoid repetition. Consider exporting these if you expect your class to be derived by other subclasses. We first implement a helper to determine if two data frames have compatible colours. We use the `df_colour()` accessor which returns `NULL` when the data frame colour is undefined. ```{r} has_compatible_colours <- function(x, y) { x_colour <- df_colour(x) %||% df_colour(y) y_colour <- df_colour(y) %||% x_colour identical(x_colour, y_colour) } ``` Next we implement the coercion helpers. If the colours are not compatible, we call `stop_incompatible_cast()` or `stop_incompatible_type()`. These strict coercion semantics are justified because in this class colour is a _data_ attribute. If it were a non essential _detail_ attribute, like the timezone in a datetime, we would just standardise it to the value of the left-hand side. In simpler cases (like the data.table example), these methods do not need to take the arguments suffixed in `_arg`. Here we do need to take these arguments so we can pass them to the `stop_` functions when we detect an incompatibility. They also should be passed to the parent methods. ```{r} #' @export my_tib_cast <- function(x, to, ..., x_arg = "", to_arg = "") { out <- tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg) if (!has_compatible_colours(x, to)) { stop_incompatible_cast( x, to, x_arg = x_arg, to_arg = to_arg, details = "Can't combine colours." ) } colour <- df_colour(x) %||% df_colour(to) new_my_tibble(out, colour = colour) } #' @export my_tib_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") { out <- tib_ptype2(x, y, ..., x_arg = x_arg, y_arg = y_arg) if (!has_compatible_colours(x, y)) { stop_incompatible_type( x, y, x_arg = x_arg, y_arg = y_arg, details = "Can't combine colours." ) } colour <- df_colour(x) %||% df_colour(y) new_my_tibble(out, colour = colour) } ``` Let's now implement the coercion methods, starting with the self-self methods. ```{r} #' @export vec_ptype2.my_tibble.my_tibble <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_cast.my_tibble.my_tibble <- function(x, to, ...) { my_tib_cast(x, to, ...) } ``` ```{r, include = FALSE} knitr_local_registration("vctrs::vec_ptype2", "my_tibble.my_tibble") knitr_local_registration("vctrs::vec_cast", "my_tibble.my_tibble") ``` We can now combine compatible instances of our class! ```{r, error = TRUE} vec_rbind(red, red) vec_rbind(green, green) vec_rbind(green, red) ``` The methods for combining our class with tibbles follow the same pattern. For ptype2 we return our class in both cases because it is the richer type: ```{r} #' @export vec_ptype2.my_tibble.tbl_df <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_ptype2.tbl_df.my_tibble <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } ``` For cast are careful about returning a tibble when casting to a tibble. Note the call to `vctrs::tib_cast()`: ```{r} #' @export vec_cast.my_tibble.tbl_df <- function(x, to, ...) { my_tib_cast(x, to, ...) } #' @export vec_cast.tbl_df.my_tibble <- function(x, to, ...) { tib_cast(x, to, ...) } ``` ```{r, include = FALSE} knitr_local_registration("vctrs::vec_ptype2", "my_tibble.tbl_df") knitr_local_registration("vctrs::vec_ptype2", "tbl_df.my_tibble") knitr_local_registration("vctrs::vec_cast", "tbl_df.my_tibble") knitr_local_registration("vctrs::vec_cast", "my_tibble.tbl_df") ``` From this point, we get correct combinations with tibbles: ```{r} vec_rbind(red, tibble::tibble(x = 10:12)) ``` However we are not done yet. Because the coercion hierarchy is different from the class hierarchy, there is no inheritance of coercion methods. We're not getting correct behaviour for data frames yet because we haven't explicitly specified the methods for this class: ```{r} vec_rbind(red, data.frame(x = 10:12)) ``` Let's finish up the boiler plate: ```{r} #' @export vec_ptype2.my_tibble.data.frame <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_ptype2.data.frame.my_tibble <- function(x, y, ...) { my_tib_ptype2(x, y, ...) } #' @export vec_cast.my_tibble.data.frame <- function(x, to, ...) { my_tib_cast(x, to, ...) } #' @export vec_cast.data.frame.my_tibble <- function(x, to, ...) { df_cast(x, to, ...) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_tibble.data.frame") knitr_local_registration("vctrs::vec_ptype2", "data.frame.my_tibble") knitr_local_registration("vctrs::vec_cast", "my_tibble.data.frame") knitr_local_registration("vctrs::vec_cast", "data.frame.my_tibble") ``` This completes the implementation: ```{r} vec_rbind(red, data.frame(x = 10:12)) ``` vctrs/man/faq/developer/howto-coercion.Rmd0000644000176200001440000002106014511320530020316 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` ```{r, include = FALSE} old_warn_on_fallback <- options(`vctrs:::warn_on_fallback` = FALSE) knitr_defer(options(old_warn_on_fallback)) ``` This guide illustrates how to implement `vec_ptype2()` and `vec_cast()` methods for existing classes. Related topics: - For an overview of how these generics work and their roles in vctrs, see [`?theory-faq-coercion`][theory-faq-coercion]. - For an example of implementing coercion methods for data frame subclasses, see [`?howto-faq-coercion-data-frame`][howto-faq-coercion-data-frame]. - For a tutorial about implementing vctrs classes from scratch, see `vignette("s3-vector")` ## The natural number class We'll illustrate how to implement coercion methods with a simple class that represents natural numbers. In this scenario we have an existing class that already features a constructor and methods for `print()` and subset. ```{r} #' @export new_natural <- function(x) { if (is.numeric(x) || is.logical(x)) { stopifnot(is_whole(x)) x <- as.integer(x) } else { stop("Can't construct natural from unknown type.") } structure(x, class = "my_natural") } is_whole <- function(x) { all(x %% 1 == 0 | is.na(x)) } #' @export print.my_natural <- function(x, ...) { cat("\n") x <- unclass(x) NextMethod() } #' @export `[.my_natural` <- function(x, i, ...) { new_natural(NextMethod()) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("base::print", "my_natural") knitr_local_registration("base::[", "my_natural") ``` ```{r} new_natural(1:3) new_natural(c(1, NA)) ``` ## Roxygen workflow ```{r, child = "snippet-roxy-workflow.Rmd"} ``` ## Implementing `vec_ptype2()` ### The self-self method The first method to implement is the one that signals that your class is compatible with itself: ```{r} #' @export vec_ptype2.my_natural.my_natural <- function(x, y, ...) { x } vec_ptype2(new_natural(1), new_natural(2:3)) ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_natural.my_natural") ``` `vec_ptype2()` implements a fallback to try and be compatible with simple classes, so it may seem that you don't need to implement the self-self coercion method. However, you must implement it explicitly because this is how vctrs knows that a class that is implementing vctrs methods (for instance this disable fallbacks to `base::c()`). Also, it makes your class a bit more efficient. ### The parent and children methods Our natural number class is conceptually a parent of `` and a child of ``, but the class is not compatible with logical, integer, or double vectors yet: ```{r, error = TRUE} vec_ptype2(TRUE, new_natural(2:3)) vec_ptype2(new_natural(1), 2:3) ``` We'll specify the twin methods for each of these classes, returning the richer class in each case. ```{r} #' @export vec_ptype2.my_natural.logical <- function(x, y, ...) { # The order of the classes in the method name follows the order of # the arguments in the function signature, so `x` is the natural # number and `y` is the logical x } #' @export vec_ptype2.logical.my_natural <- function(x, y, ...) { # In this case `y` is the richer natural number y } ``` Between a natural number and an integer, the latter is the richer class: ```{r} #' @export vec_ptype2.my_natural.integer <- function(x, y, ...) { y } #' @export vec_ptype2.integer.my_natural <- function(x, y, ...) { x } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_natural.logical") knitr_local_registration("vctrs::vec_ptype2", "my_natural.integer") knitr_local_registration("vctrs::vec_ptype2", "integer.my_natural") knitr_local_registration("vctrs::vec_ptype2", "logical.my_natural") ``` We no longer get common type errors for logical and integer: ```{r} vec_ptype2(TRUE, new_natural(2:3)) vec_ptype2(new_natural(1), 2:3) ``` We are not done yet. Pairwise coercion methods must be implemented for all the connected nodes in the coercion hierarchy, which include double vectors further up. The coercion methods for grand-parent types must be implemented separately: ```{r} #' @export vec_ptype2.my_natural.double <- function(x, y, ...) { y } #' @export vec_ptype2.double.my_natural <- function(x, y, ...) { x } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_ptype2", "my_natural.double") knitr_local_registration("vctrs::vec_ptype2", "double.my_natural") ``` ### Incompatible attributes Most of the time, inputs are incompatible because they have different classes for which no `vec_ptype2()` method is implemented. More rarely, inputs could be incompatible because of their attributes. In that case incompatibility is signalled by calling `stop_incompatible_type()`. In the following example, we implement a self-self ptype2 method for a hypothetical subclass of `` that has stricter combination semantics. The method throws an error when the levels of the two factors are not compatible. ```{r, eval = FALSE} #' @export vec_ptype2.my_strict_factor.my_strict_factor <- function(x, y, ..., x_arg = "", y_arg = "") { if (!setequal(levels(x), levels(y))) { stop_incompatible_type(x, y, x_arg = x_arg, y_arg = y_arg) } x } ``` Note how the methods need to take `x_arg` and `y_arg` parameters and pass them on to `stop_incompatible_type()`. These argument tags help create more informative error messages when the common type determination is for a column of a data frame. They are part of the generic signature but can usually be left out if not used. ## Implementing `vec_cast()` Corresponding `vec_cast()` methods must be implemented for all `vec_ptype2()` methods. The general pattern is to convert the argument `x` to the type of `to`. The methods should validate the values in `x` and make sure they conform to the values of `to`. Please note that for historical reasons, the order of the classes in the method name is in reverse order of the arguments in the function signature. The first class represents `to`, whereas the second class represents `x`. The self-self method is easy in this case, it just returns the target input: ```{r} #' @export vec_cast.my_natural.my_natural <- function(x, to, ...) { x } ``` The other types need to be validated. We perform input validation in the `new_natural()` constructor, so that's a good fit for our `vec_cast()` implementations. ```{r} #' @export vec_cast.my_natural.logical <- function(x, to, ...) { # The order of the classes in the method name is in reverse order # of the arguments in the function signature, so `to` is the natural # number and `x` is the logical new_natural(x) } vec_cast.my_natural.integer <- function(x, to, ...) { new_natural(x) } vec_cast.my_natural.double <- function(x, to, ...) { new_natural(x) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_cast", "my_natural.my_natural") knitr_local_registration("vctrs::vec_cast", "my_natural.logical") knitr_local_registration("vctrs::vec_cast", "my_natural.integer") knitr_local_registration("vctrs::vec_cast", "my_natural.double") ``` With these methods, vctrs is now able to combine logical and natural vectors. It properly returns the richer type of the two, a natural vector: ```{r} vec_c(TRUE, new_natural(1), FALSE) ``` Because we haven't implemented conversions _from_ natural, it still doesn't know how to combine natural with the richer integer and double types: ```{r, error = TRUE} vec_c(new_natural(1), 10L) vec_c(1.5, new_natural(1)) ``` This is quick work which completes the implementation of coercion methods for vctrs: ```{r} #' @export vec_cast.logical.my_natural <- function(x, to, ...) { # In this case `to` is the logical and `x` is the natural number attributes(x) <- NULL as.logical(x) } #' @export vec_cast.integer.my_natural <- function(x, to, ...) { attributes(x) <- NULL as.integer(x) } #' @export vec_cast.double.my_natural <- function(x, to, ...) { attributes(x) <- NULL as.double(x) } ``` ```{r, include = FALSE} # Necessary because includeRmd() evaluated in a child of global knitr_local_registration("vctrs::vec_cast", "logical.my_natural") knitr_local_registration("vctrs::vec_cast", "integer.my_natural") knitr_local_registration("vctrs::vec_cast", "double.my_natural") ``` And we now get the expected combinations. ```{r} vec_c(new_natural(1), 10L) vec_c(1.5, new_natural(1)) ``` vctrs/man/faq/developer/theory-recycling.Rmd0000644000176200001440000000321714511524374020666 0ustar liggesusers ```{r, child = "../setup.Rmd", include = FALSE} ``` Recycling describes the concept of repeating elements of one vector to match the size of another. There are two rules that underlie the "tidyverse" recycling rules: - Vectors of size 1 will be recycled to the size of any other vector - Otherwise, all vectors must have the same size # Examples ```{r, warning = FALSE, message = FALSE, include = FALSE} library(tibble) ``` Vectors of size 1 are recycled to the size of any other vector: ```{r} tibble(x = 1:3, y = 1L) ``` This includes vectors of size 0: ```{r} tibble(x = integer(), y = 1L) ``` If vectors aren't size 1, they must all be the same size. Otherwise, an error is thrown: ```{r, error = TRUE} tibble(x = 1:3, y = 4:7) ``` # vctrs backend Packages in r-lib and the tidyverse generally use [vec_size_common()] and [vec_recycle_common()] as the backends for handling recycling rules. - `vec_size_common()` returns the common size of multiple vectors, after applying the recycling rules - `vec_recycle_common()` goes one step further, and actually recycles the vectors to their common size ```{r, error = TRUE} vec_size_common(1:3, "x") vec_recycle_common(1:3, "x") vec_size_common(1:3, c("x", "y")) ``` # Base R recycling rules The recycling rules described here are stricter than the ones generally used by base R, which are: - If any vector is length 0, the output will be length 0 - Otherwise, the output will be length `max(length_x, length_y)`, and a warning will be thrown if the length of the longer vector is not an integer multiple of the length of the shorter vector. We explore the base R rules in detail in `vignette("type-size")`. vctrs/man/faq/developer/reference-compatibility.Rmd0000644000176200001440000000576214276722575022226 0ustar liggesusers vctrs provides a framework for working with vector classes in a generic way. However, it implements several compatibility fallbacks to base R methods. In this reference you will find how vctrs tries to be compatible with your vector class, and what base methods you need to implement for compatibility. If you're starting from scratch, we think you'll find it easier to start using [new_vctr()] as documented in `vignette("s3-vector")`. This guide is aimed for developers with existing vector classes. ## Aggregate operations with fallbacks All vctrs operations are based on four primitive generics described in the next section. However there are many higher level operations. The most important ones implement fallbacks to base generics for maximum compatibility with existing classes. - [vec_slice()] falls back to the base `[` generic if no [vec_proxy()] method is implemented. This way foreign classes that do not implement [vec_restore()] can restore attributes based on the new subsetted contents. - [vec_c()] and [vec_rbind()] now fall back to [base::c()] if the inputs have a common parent class with a `c()` method (only if they have no self-to-self `vec_ptype2()` method). vctrs works hard to make your `c()` method success in various situations (with `NULL` and `NA` inputs, even as first input which would normally prevent dispatch to your method). The main downside compared to using vctrs primitives is that you can't combine vectors of different classes since there is no extensible mechanism of coercion in `c()`, and it is less efficient in some cases. ## The vctrs primitives Most functions in vctrs are aggregate operations: they call other vctrs functions which themselves call other vctrs functions. The dependencies of a vctrs functions are listed in the Dependencies section of its documentation page. Take a look at [vec_count()] for an example. These dependencies form a tree whose leaves are the four vctrs primitives. Here is the diagram for `vec_count()`: \\figure{vec-count-deps.png} ### The coercion generics The coercion mechanism in vctrs is based on two generics: - [vec_ptype2()] - [vec_cast()] See the [theory overview][theory-faq-coercion]. Two objects with the same class and the same attributes are always considered compatible by ptype2 and cast. If the attributes or classes differ, they throw an incompatible type error. Coercion errors are the main source of incompatibility with vctrs. See the [howto guide][howto-faq-coercion] if you need to implement methods for these generics. ### The proxy and restoration generics - [vec_proxy()] - [vec_restore()] These generics are essential for vctrs but mostly optional. `vec_proxy()` defaults to an [identity][identity] function and you normally don't need to implement it. The proxy a vector must be one of the atomic vector types, a list, or a data frame. By default, S3 lists that do not inherit from `"list"` do not have an identity proxy. In that case, you need to explicitly implement `vec_proxy()` or make your class inherit from list. vctrs/man/list_of_transpose.Rd0000644000176200001440000000750015120272011016203 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/list-of-transpose.R \name{list_of_transpose} \alias{list_of_transpose} \title{Transpose a list of homogenous vectors} \usage{ list_of_transpose(x, ..., x_arg = caller_arg(x), error_call = current_env()) } \arguments{ \item{x}{A \link[=list_of]{list_of} with both \code{size} and \code{ptype} specified.} \item{...}{These dots are for future extensions and must be empty.} \item{x_arg}{Argument name used in error messages.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A \code{list_of} of size \code{list_of_size(x)}, with an element size of \code{vec_size(x)} and an element type of \code{list_of_ptype(x)}. } \description{ \code{list_of_transpose()} takes a list of homogenous vectors, transposes it, and returns a new list of homogenous vectors. To perform a transpose, three pieces of information are required: \itemize{ \item The \emph{list size}, from \code{\link[=vec_size]{vec_size(x)}}. \item The \emph{element size}, from \code{\link[=list_of_size]{list_of_size(x)}}. \item The \emph{element type}, from \code{\link[=list_of_ptype]{list_of_ptype(x)}}. } Because all three of these are required, this function only works on fully specified \code{\link[=list_of]{list_of()}}s, with both \code{size} and \code{ptype} specified. To predict the output from \code{list_of_transpose()}, swap the list size with the element size. For example: \itemize{ \item Input: \verb{list_of[2]} \item Output: \verb{list_of[3]} } } \examples{ # A form of `list_of()` that infers both ptype and size list_of2 <- function(...) { list_of(..., .ptype = NULL, .size = NULL) } # I: list_of[3] # O: list_of[2] list_of_transpose(list_of2(1:2, 3:4, 5:6)) # With data frames x <- data_frame(a = 1:2, b = letters[1:2]) y <- data_frame(a = 3:4, b = letters[3:4]) list_of_transpose(list_of2(x, y)) # Size 1 elements are recycled list_of_transpose(list_of2(1, 2:3, 4)) # --------------------------------------------------------------------------- # `NULL` handling # `NULL` values aren't allowed in `list_of_transpose()` x <- list_of2(1:3, NULL, 5:7, NULL) try(list_of_transpose(x)) # Either drop them entirely or replace them up front before transposing x_dropped <- vec_slice(x, !vec_detect_missing(x)) x_dropped list_of_transpose(x_dropped) x_replaced <- vec_assign(x, vec_detect_missing(x), list(NA)) x_replaced list_of_transpose(x_replaced) # --------------------------------------------------------------------------- # Reversibility # Because `list_of_transpose()` takes and returns fully specified list-ofs, # it is fully reversible, even in the edge cases. x <- list_of2(integer(), integer()) # This returns a list of size 0 # I: list_of[2] # O: list_of[0] out <- list_of_transpose(x) out # Even though there are no elements, we know the element size and type, # so we can transpose a second time to recover `x`. This would not be # possible if this function returned a bare `list()`, which would result # in lost information. # I: list_of[0] # O: list_of[2] list_of_transpose(out) # --------------------------------------------------------------------------- # Padding # If you'd like to pad with a missing value rather than erroring, # you might do something like this, which left-pads before conversion # to list-of. x <- list(1, 2:5, 6:7) sizes <- list_sizes(x) size <- max(sizes) index <- which(sizes != size) x[index] <- lapply( index, function(i) vec_c(rep(NA, times = size - sizes[[i]]), x[[i]]) ) x x <- as_list_of(x, .ptype = NULL, .size = NULL) list_of_transpose(x) } vctrs/man/list_of.Rd0000644000176200001440000001004115120272011014077 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-list-of.R \name{list_of} \alias{list_of} \alias{as_list_of} \alias{is_list_of} \alias{vec_ptype2.vctrs_list_of} \alias{vec_cast.vctrs_list_of} \title{Construct a list of homogenous vectors} \usage{ list_of(..., .ptype = NULL, .size = zap()) as_list_of(x, ...) is_list_of(x) \method{vec_ptype2}{vctrs_list_of}(x, y, ..., x_arg = "", y_arg = "") \method{vec_cast}{vctrs_list_of}(x, to, ...) } \arguments{ \item{...}{For \code{list_of()}, vectors to include in the list. For other methods, these dots must be empty.} \item{.ptype}{The type to restrict each list element to. One of: \itemize{ \item A prototype like \code{integer()} or \code{double()}. \item \code{NULL}, to infer the type from \code{...}. If no vector inputs are provided, an error is thrown. \item \code{\link[rlang:zap]{rlang::zap()}} to avoid placing any restrictions on the type. }} \item{.size}{The size to restrict each list element to. One of: \itemize{ \item A scalar integer size. \item \code{NULL}, to infer the size from \code{...}. If no vector inputs are provided, an error is thrown. \item \code{\link[rlang:zap]{rlang::zap()}} to avoid placing any restrictions on the size. }} \item{x}{For \code{as_list_of()}, a vector to be coerced to list_of. For \code{is_list_of()}, an object to test.} \item{y, to}{Arguments to \code{vec_ptype2()} and \code{vec_cast()}.} \item{x_arg, y_arg}{Argument names for \code{x} and \code{y}. These are used in error messages to inform the user about the locations of incompatible types (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}}).} } \description{ A \code{list_of} is a restricted version of a list that adds constraints on the list elements. \itemize{ \item \code{list_of(.ptype = )} restricts the \emph{type} of each element. \itemize{ \item \verb{.ptype = } asserts that each element has type \verb{}. \item \code{.ptype = NULL} infers the type from the original set of elements, or errors if no vector inputs were provided. \item \code{.ptype = rlang::zap()} doesn't restrict the type. } \item \code{list_of(.size = )} restricts the \emph{size} of each element. \itemize{ \item \verb{.size = } asserts that each element has size \verb{}. \item \code{.size = NULL} infers the size from the original set of elements, or errors if no vector inputs were provided. \item \code{.size = rlang::zap()} doesn't restrict the size. } } The default behavior infers the element type and doesn't restrict the size. Both \code{.ptype} and \code{.size} may be specified to restrict both the size and type of the list elements. You cannot set both of these to \code{rlang::zap()}, as that would be the same as a bare \code{list()} with no restrictions. Modifying a \code{list_of} with \verb{$<-}, \verb{[<-}, and \verb{[[<-} preserves the constraints by coercing and recycling all input items. } \examples{ # Restrict the type, but not the size x <- list_of(1:3, 5:6, 10:15) x if (requireNamespace("tibble", quietly = TRUE)) { # As a column in a tibble tibble::tibble(x = x) } # Coercion happens during assignment x[1] <- list(4) typeof(x[[1]]) try(x[1] <- list(4.5)) # Restrict the size, but not the type x <- list_of(1, 2:3, .ptype = rlang::zap(), .size = 2) x # Recycling happens during assignment x[1] <- list(4) x try(x[1] <- list(3:6)) # Restricting both size and type x <- list_of(1L, 2:3, .ptype = integer(), .size = 2) x # Setting an element to `NULL` x[2] <- list(NULL) x # Note that using `NULL` shortens the list, like a base R list x[2] <- NULL x # Combining a list_of with a list results in a list vec_c(list_of(1), list(2, "x")) # Combining a list_of with another list_of tries to find a common element # type and common element size, but will remove the constraint if that # fails x <- list_of(1, .ptype = double()) y <- list_of(c("a", "b"), .ptype = character(), .size = 2) z <- list_of(c("c", "d", "e"), .ptype = character(), .size = 3) # Falls back to a list vec_c(x, y) # Falls back to a `list_of` with no size restriction vec_c(y, z) } vctrs/man/vctrs-package.Rd0000644000176200001440000000231315154276515015221 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-package.R \docType{package} \name{vctrs-package} \alias{vctrs} \alias{vctrs-package} \title{vctrs: Vector Helpers} \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} Defines new notions of prototype and size that are used to provide tools for consistent and well-founded type-coercion and size-recycling, and are in turn connected to ideas of type- and size-stability useful for analysing function interfaces. } \seealso{ Useful links: \itemize{ \item \url{https://vctrs.r-lib.org/} \item \url{https://github.com/r-lib/vctrs} \item Report bugs at \url{https://github.com/r-lib/vctrs/issues} } } \author{ \strong{Maintainer}: Davis Vaughan \email{davis@posit.co} Authors: \itemize{ \item Hadley Wickham \email{hadley@posit.co} \item Lionel Henry \email{lionel@posit.co} } Other contributors: \itemize{ \item data.table team (Radix sort based on data.table's forder() and their contribution to R's order()) [copyright holder] \item Posit Software, PBC [copyright holder, funder] } } \keyword{internal} vctrs/man/vec_proxy_equal.Rd0000644000176200001440000000313514315060307015663 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/equal.R \name{vec_proxy_equal} \alias{vec_proxy_equal} \title{Equality proxy} \usage{ vec_proxy_equal(x, ...) } \arguments{ \item{x}{A vector x.} \item{...}{These dots are for future extensions and must be empty.} } \value{ A 1d atomic vector or a data frame. } \description{ Returns a proxy object (i.e. an atomic vector or data frame of atomic vectors). For \link{vctr}s, this determines the behaviour of \code{==} and \code{!=} (via \code{\link[=vec_equal]{vec_equal()}}); \code{\link[=unique]{unique()}}, \code{\link[=duplicated]{duplicated()}} (via \code{\link[=vec_unique]{vec_unique()}} and \code{\link[=vec_duplicate_detect]{vec_duplicate_detect()}}); \code{\link[=is.na]{is.na()}} and \code{\link[=anyNA]{anyNA()}} (via \code{\link[=vec_detect_missing]{vec_detect_missing()}}). } \details{ The default method calls \code{\link[=vec_proxy]{vec_proxy()}}, as the default underlying vector data should be equal-able in most cases. If your class is not equal-able, provide a \code{vec_proxy_equal()} method that throws an error. } \section{Data frames}{ If the proxy for \code{x} is a data frame, the proxy function is automatically recursively applied on all columns as well. After applying the proxy recursively, if there are any data frame columns present in the proxy, then they are unpacked. Finally, if the resulting data frame only has a single column, then it is unwrapped and a vector is returned as the proxy. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} called by default } } \keyword{internal} vctrs/man/obj_print.Rd0000644000176200001440000000166514202760666014464 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print-str.R \name{obj_print} \alias{obj_print} \alias{obj_print_header} \alias{obj_print_data} \alias{obj_print_footer} \alias{obj_str} \alias{obj_str_header} \alias{obj_str_data} \alias{obj_str_footer} \title{\code{print()} and \code{str()} generics.} \usage{ obj_print(x, ...) obj_print_header(x, ...) obj_print_data(x, ...) obj_print_footer(x, ...) obj_str(x, ...) obj_str_header(x, ...) obj_str_data(x, ...) obj_str_footer(x, ...) } \arguments{ \item{x}{A vector} \item{...}{Additional arguments passed on to methods. See \code{\link[=print]{print()}} and \code{\link[=str]{str()}} for commonly used options} } \description{ These are constructed to be more easily extensible since you can override the \verb{_header()}, \verb{_data()} or \verb{_footer()} components individually. The default methods are built on top of \code{format()}. } \keyword{internal} vctrs/man/new_factor.Rd0000644000176200001440000000257314276722575014634 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-factor.R \name{new_factor} \alias{new_factor} \alias{new_ordered} \alias{vec_ptype2.factor} \alias{vec_ptype2.ordered} \alias{vec_cast.factor} \alias{vec_cast.ordered} \title{Factor/ordered factor S3 class} \usage{ new_factor(x = integer(), levels = character(), ..., class = character()) new_ordered(x = integer(), levels = character()) \method{vec_ptype2}{factor}(x, y, ...) \method{vec_ptype2}{ordered}(x, y, ...) \method{vec_cast}{factor}(x, to, ...) \method{vec_cast}{ordered}(x, to, ...) } \arguments{ \item{x}{Integer values which index in to \code{levels}.} \item{levels}{Character vector of labels.} \item{..., class}{Used to for subclasses.} } \description{ A \link{factor} is an integer with attribute \code{levels}, a character vector. There should be one level for each integer between 1 and \code{max(x)}. An \link{ordered} factor has the same properties as a factor, but possesses an extra class that marks levels as having a total ordering. } \details{ These functions help the base factor and ordered factor classes fit in to the vctrs type system by providing constructors, coercion functions, and casting functions. \code{new_factor()} and \code{new_ordered()} are low-level constructors - they only check that types, but not values, are valid, so are for expert use only. } \keyword{internal} vctrs/man/op-empty-default.Rd0000644000176200001440000000101114276722575015663 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/size.R \name{\%0\%} \alias{\%0\%} \title{Default value for empty vectors} \usage{ x \%0\% y } \arguments{ \item{x}{A vector} \item{y}{Value to use if \code{x} is empty. To preserve type-stability, should be the same type as \code{x}.} } \description{ Use this inline operator when you need to provide a default value for empty (as defined by \code{\link[=vec_is_empty]{vec_is_empty()}}) vectors. } \examples{ 1:10 \%0\% 5 integer() \%0\% 5 } vctrs/man/vec_slice.Rd0000644000176200001440000001365715056611175014434 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice.R \name{vec_slice} \alias{vec_slice} \alias{vec_slice<-} \alias{vec_assign} \title{Get or set observations in a vector} \usage{ vec_slice(x, i, ..., error_call = current_env()) vec_slice(x, i) <- value vec_assign(x, i, value, ..., slice_value = FALSE, x_arg = "", value_arg = "") } \arguments{ \item{x}{A vector} \item{i}{An integer, character or logical vector specifying the locations or names of the observations to get/set. Specify \code{TRUE} to index all elements (as in \code{x[]}), or \code{NULL}, \code{FALSE} or \code{integer()} to index none (as in \code{x[NULL]}).} \item{...}{These dots are for future extensions and must be empty.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{value}{A vector of replacement values \code{value} is cast to the type of \code{x}. If \code{slice_value = FALSE}, \code{value} must be size 1 or the same size as \code{i} after \code{i} has been converted to a positive integer location vector with \code{\link[=vec_as_location]{vec_as_location()}} (which may not be the same size as \code{i} originally). If \code{slice_value = TRUE}, \code{value} must be size 1 or the same size as \code{x}.} \item{slice_value}{A boolean. If \code{TRUE}, the assignment proceeds as if you had provided \code{vec_slice(x, i) <- vec_slice(value, i)}, but is optimized to avoid materializing the slice of \code{value}.} \item{x_arg, value_arg}{Argument names for \code{x} and \code{value}. These are used in error messages to inform the user about the locations of incompatible types and sizes (see \code{\link[=stop_incompatible_type]{stop_incompatible_type()}} and \code{\link[=stop_incompatible_size]{stop_incompatible_size()}}).} } \value{ A vector of the same type as \code{x}. } \description{ This provides a common interface to extracting and modifying observations for all vector types, regardless of dimensionality. They are analogs to \code{[} and \verb{[<-} that match \code{\link[=vec_size]{vec_size()}} instead of \code{length()}. } \section{Genericity}{ Support for S3 objects depends on whether the object implements a \code{\link[=vec_proxy]{vec_proxy()}} method. \itemize{ \item When a \code{vec_proxy()} method exists, the proxy is sliced or assigned to and \code{vec_restore()} is called on the result. \item Otherwise, \code{vec_slice()} falls back to the base generic \code{[} and \verb{vec_slice<-()} falls back to the base generic \verb{[<-}. } When \verb{vec_slice<-()} falls back to \verb{[<-}, it is expected that the subclass's \verb{[<-} method can handle the following subset of cases that base R's \verb{[<-} can also handle: \itemize{ \item An \code{i} vector of positive integer positions (notably excluding \code{NA}). \item A \code{value} vector of length 1 or length \code{length(i)}. If length 1, it should be recycled by the \verb{[<-} method to the length of \code{i}. } If your \verb{[<-} method eventually calls base R's native \verb{[<-} code, then these cases will be handled for you. Note that S3 lists are treated as scalars by default, and will cause an error if they don't implement a \code{\link[=vec_proxy]{vec_proxy()}} method. } \section{Differences with base R subsetting}{ \itemize{ \item \code{vec_slice()} only slices along one dimension. For two-dimensional types, the first dimension is subsetted. \item \code{vec_slice()} preserves attributes by default. \item \verb{vec_slice<-()} is type-stable and always returns the same type as the LHS. } } \section{Dependencies}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_restore]{vec_restore()}} } } \subsection{base dependencies}{ \itemize{ \item \code{base::`[`} \item \code{base::`[<-`} } } } \examples{ x <- sample(10) x vec_slice(x, 1:3) # You can assign with the infix variant: vec_slice(x, 2) <- 100 x # Or with the regular variant that doesn't modify the original input: y <- vec_assign(x, 3, 500) y x # Slicing objects of higher dimension: vec_slice(mtcars, 1:3) # Type stability -------------------------------------------------- # The assign variant is type stable. It always returns the same # type as the input. x <- 1:5 vec_slice(x, 2) <- 20.0 # `x` is still an integer vector because the RHS was cast to the # type of the LHS: vec_ptype(x) # Compare to `[<-`: x[2] <- 20.0 vec_ptype(x) # Note that the types must be coercible for the cast to happen. # For instance, you can cast a double vector of whole numbers to an # integer vector: vec_cast(1, integer()) # But not fractional doubles: try(vec_cast(1.5, integer())) # For this reason you can't assign fractional values in an integer # vector: x <- 1:3 try(vec_slice(x, 2) <- 1.5) # Slicing `value` ------------------------------------------------- # Sometimes both `x` and `value` start from objects that are the same length, # and you need to slice `value` by `i` before assigning it to `x`. This comes # up when thinking about how `base::ifelse()` and `dplyr::case_when()` work. condition <- c(TRUE, FALSE, TRUE, FALSE) yes <- 1:4 no <- 5:8 # Create an output container and fill it out <- vec_init(integer(), 4) out <- vec_assign(out, condition, vec_slice(yes, condition)) out <- vec_assign(out, !condition, vec_slice(no, !condition)) out # This is wasteful because you have to materialize the slices of `yes` and # `no` before they can be assigned, and you also have to validate `condition` # multiple times. Using `slice_value` internally performs # `vec_slice(yes, condition)` and `vec_slice(no, !condition)` for you, # but does so in a way that avoids the materialization. out <- vec_init(integer(), 4) out <- vec_assign(out, condition, yes, slice_value = TRUE) out <- vec_assign(out, !condition, no, slice_value = TRUE) out } \keyword{internal} vctrs/man/vec-recode-and-replace.Rd0000644000176200001440000001507515065005761016657 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/recode.R \name{vec-recode-and-replace} \alias{vec-recode-and-replace} \alias{vec_recode_values} \alias{vec_replace_values} \title{Recode and replace values} \usage{ vec_recode_values( x, ..., from, to, default = NULL, unmatched = "default", from_as_list_of_vectors = FALSE, to_as_list_of_vectors = FALSE, ptype = NULL, x_arg = "x", from_arg = "from", to_arg = "to", default_arg = "default", error_call = current_env() ) vec_replace_values( x, ..., from, to, from_as_list_of_vectors = FALSE, to_as_list_of_vectors = FALSE, x_arg = "x", from_arg = "from", to_arg = "to", error_call = current_env() ) } \arguments{ \item{x}{A vector.} \item{...}{These dots are for future extensions and must be empty.} \item{from}{Values to locate in \code{x} and map to values in \code{to}. Extra values present in \code{from} but not in \code{x} are ignored. \itemize{ \item If \code{from_as_list_of_vectors = FALSE}, \code{from} must be a single vector of any size, which will be \link[=theory-faq-coercion]{cast} to the type of \code{x}. \item If \code{from_as_list_of_vectors = TRUE}, \code{from} must be a list of vectors of any size, which will individually be \link[=theory-faq-coercion]{cast} to the type of \code{x}. }} \item{to}{Values to map \code{from} to. The common type of \code{to} and \code{default} will determine the type of the output, unless \code{ptype} is provided. \itemize{ \item If \code{to_as_list_of_vectors = FALSE}, \code{to} must be a single vector of size 1 or the same size as \code{from}. \item If \code{to_as_list_of_vectors = TRUE}, \code{to} must be a list of vectors. The list itself must be size 1 or the same size as \code{from}. Each individual vector in the list must be size 1 or the same size as \code{x}. }} \item{default}{Default value to use when there is a value present in \code{x} that is unmatched by a value in \code{from}. By default, a missing value is used as the default value. If supplied, \code{default} must be size 1 or the same size as \code{x}. Can only be set when \code{unmatched = "default"}.} \item{unmatched}{Handling of unmatched locations. One of: \itemize{ \item \code{"default"} to use \code{default} in unmatched locations. \item \code{"error"} to error when there are unmatched locations. }} \item{from_as_list_of_vectors, to_as_list_of_vectors}{Boolean values determining whether to treat \code{from} and \code{to} as vectors or as lists of vectors. See their parameter descriptions for more details.} \item{ptype}{An optional override for the output type, which is usually computed as the common type of \code{to} and \code{default}.} \item{x_arg, from_arg, to_arg, default_arg}{Argument names used in error messages.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A vector the same size as \code{x}. \itemize{ \item For \code{vec_recode_values()}, the type of the output is computed as the common type of \code{to} and \code{default}, unless overridden by \code{ptype}. The names of the output come from the names of \code{to} and \code{default}. \item For \code{vec_replace_values()}, the type of the output will have the same type as \code{x}. The names of the output will be the same as the names of \code{x}. } } \description{ \itemize{ \item \code{vec_recode_values()} constructs an entirely new vector by recoding the values from \code{x} specified in \code{from} to the corresponding values in \code{to}. If there are values in \code{x} not matched by \code{from}, then they are recoded to the \code{default} value. \item \code{vec_replace_values()} updates an existing vector by replacing the values from \code{x} specified in \code{from} with the corresponding values in \code{to}. In this case, \code{to} must have the same type as \code{x} and values in \code{x} not matched by \code{from} pass through untouched. } } \examples{ x <- c(1, 2, 3, 1, 2, 4, NA, 5) # Imagine you have a pre-existing lookup table likert <- data.frame( from = c(1, 2, 3, 4, 5), to = c( "Strongly disagree", "Disagree", "Neutral", "Agree", "Strongly agree" ) ) vec_recode_values(x, from = likert$from, to = likert$to) # If you don't map all of the values, a `default` is used x <- c(1, 2, 3, 1, 2, 4, NA, 5, 6, 7) vec_recode_values(x, from = likert$from, to = likert$to) vec_recode_values(x, from = likert$from, to = likert$to, default = "Unknown") # If you want existing `NA`s to pass through, include a mapping for `NA` in # your lookup table likert <- data.frame( from = c(1, 2, 3, 4, 5, NA), to = c( "Strongly disagree", "Disagree", "Neutral", "Agree", "Strongly agree", NA ) ) vec_recode_values(x, from = likert$from, to = likert$to, default = "Unknown") # If you believe you've captured all of the cases, you can assert this with # `unmatched = "error"`, which will error if you've missed any cases # (including `NA`, which must be explicitly handled) try(vec_recode_values( x, from = likert$from, to = likert$to, unmatched = "error" )) if (require("tibble")) { # If you want to partially update `x`, retaining the type of `x` and # leaving values not covered by `from` alone, use `vec_replace_values()` universities <- c( "Duke", "Fake U", "Duke U", NA, "Chapel Hill", "UNC", NA, "Duke" ) standardize <- tibble::tribble( ~from, ~to, "Duke", "Duke University", "Duke U", "Duke University", "UNC", "UNC Chapel Hill", "Chapel Hill", "UNC Chapel Hill", ) vec_replace_values( universities, from = standardize$from, to = standardize$to ) # In this case, you can use a more powerful feature of # `vec_replace_values()`, `from_as_list_of_vectors`, which allows you to # provide a list of `from` vectors that each match multiple `from` values # to a single `to` value. `tribble()` can help you create these! standardize <- tibble::tribble( ~from, ~to, c("Duke", "Duke U"), "Duke University", c("UNC", "Chapel Hill"), "UNC Chapel Hill", ) # Note how `from` is a list column standardize vec_replace_values( universities, from = standardize$from, to = standardize$to, from_as_list_of_vectors = TRUE ) # `vec_replace_values()` is also a useful way to map from or to `NA` vec_replace_values(universities, from = NA, to = "Unknown") vec_replace_values(universities, from = "Fake U", to = NA) } } vctrs/man/faq-compatibility-types.Rd0000644000176200001440000000636415053353777017265 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq.R \name{faq-compatibility-types} \alias{faq-compatibility-types} \title{FAQ - How is the compatibility of vector types decided?} \description{ Two vectors are \strong{compatible} when you can safely: \itemize{ \item Combine them into one larger vector. \item Assign values from one of the vectors into the other vector. } Examples of compatible types are integer and double vectors. On the other hand, integer and character vectors are not compatible. } \section{Common type of multiple vectors}{ There are two possible outcomes when multiple vectors of different types are combined into a larger vector: \itemize{ \item An incompatible type error is thrown because some of the types are not compatible: \if{html}{\out{
}}\preformatted{df1 <- data.frame(x = 1:3) df2 <- data.frame(x = "foo") dplyr::bind_rows(df1, df2) #> Error in `dplyr::bind_rows()`: #> ! Can't combine `..1$x` and `..2$x` . }\if{html}{\out{
}} \item The vectors are combined into a vector that has the common type of all inputs. In this example, the common type of integer and logical is integer: \if{html}{\out{
}}\preformatted{df1 <- data.frame(x = 1:3) df2 <- data.frame(x = FALSE) dplyr::bind_rows(df1, df2) #> x #> 1 1 #> 2 2 #> 3 3 #> 4 0 }\if{html}{\out{
}} } In general, the common type is the \emph{richer} type, in other words the type that can represent the most values. Logical vectors are at the bottom of the hierarchy of numeric types because they can only represent two values (not counting missing values). Then come integer vectors, and then doubles. Here is the vctrs type hierarchy for the fundamental vectors: \figure{coerce.png} } \section{Type conversion and lossy cast errors}{ Type compatibility does not necessarily mean that you can \strong{convert} one type to the other type. That’s because one of the types might support a larger set of possible values. For instance, integer and double vectors are compatible, but double vectors can’t be converted to integer if they contain fractional values. When vctrs can’t convert a vector because the target type is not as rich as the source type, it throws a lossy cast error. Assigning a fractional number to an integer vector is a typical example of a lossy cast error: \if{html}{\out{
}}\preformatted{int_vector <- 1:3 vec_assign(int_vector, 2, 0.001) #> Error in `vec_assign()`: #> ! Can't convert from to due to loss of precision. #> * Locations: 1 }\if{html}{\out{
}} } \section{How to make two vector classes compatible?}{ If you encounter two vector types that you think should be compatible, they might need to implement coercion methods. Reach out to the author(s) of the classes and ask them if it makes sense for their classes to be compatible. These developer FAQ items provide guides for implementing coercion methods: \itemize{ \item For an example of implementing coercion methods for simple vectors, see \code{\link[=howto-faq-coercion]{?howto-faq-coercion}}. \item For an example of implementing coercion methods for data frame subclasses, see \code{\link[=howto-faq-coercion-data-frame]{?howto-faq-coercion-data-frame}}. } } vctrs/man/vec-set.Rd0000644000176200001440000001143615154276515014043 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/set.R \name{vec-set} \alias{vec-set} \alias{vec_set_intersect} \alias{vec_set_difference} \alias{vec_set_union} \alias{vec_set_symmetric_difference} \title{Set operations} \usage{ vec_set_intersect( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) vec_set_difference( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) vec_set_union( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) vec_set_symmetric_difference( x, y, ..., ptype = NULL, x_arg = "x", y_arg = "y", error_call = current_env() ) } \arguments{ \item{x, y}{A pair of vectors.} \item{...}{These dots are for future extensions and must be empty.} \item{ptype}{If \code{NULL}, the default, the output type is determined by computing the common type between \code{x} and \code{y}. If supplied, both \code{x} and \code{y} will be cast to this type.} \item{x_arg, y_arg}{Argument names for \code{x} and \code{y}. These are used in error messages.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A vector of the common type of \code{x} and \code{y} (or \code{ptype}, if supplied) containing the result of the corresponding set function. } \description{ \itemize{ \item \code{vec_set_intersect()} returns all values in both \code{x} and \code{y}. \item \code{vec_set_difference()} returns all values in \code{x} but not \code{y}. Note that this is an asymmetric set difference, meaning it is not commutative. \item \code{vec_set_union()} returns all values in either \code{x} or \code{y}. \item \code{vec_set_symmetric_difference()} returns all values in either \code{x} or \code{y} but not both. This is a commutative difference. } Because these are \emph{set} operations, these functions only return unique values from \code{x} and \code{y}, returned in the order they first appeared in the original input. Names of \code{x} and \code{y} are retained on the result, but names are always taken from \code{x} if the value appears in both inputs. These functions work similarly to \code{\link[base:sets]{base::intersect()}}, \code{\link[base:sets]{base::setdiff()}}, and \code{\link[base:sets]{base::union()}}, but don't strip attributes and can be used with data frames. } \details{ Missing values are treated as equal to other missing values. For doubles and complexes, \code{NaN} are equal to other \code{NaN}, but not to \code{NA}. } \section{Dependencies}{ \subsection{\code{vec_set_intersect()}}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} \item \code{\link[=vec_slice]{vec_slice()}} \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} } } \subsection{\code{vec_set_difference()}}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} \item \code{\link[=vec_slice]{vec_slice()}} \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} } } \subsection{\code{vec_set_union()}}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} \item \code{\link[=vec_slice]{vec_slice()}} \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} \item \code{\link[=vec_c]{vec_c()}} } } \subsection{\code{vec_set_symmetric_difference()}}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} \item \code{\link[=vec_slice]{vec_slice()}} \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} \item \code{\link[=vec_c]{vec_c()}} } } } \examples{ x <- c(1, 2, 1, 4, 3) y <- c(2, 5, 5, 1) # All unique values in both `x` and `y`. # Duplicates in `x` and `y` are always removed. vec_set_intersect(x, y) # All unique values in `x` but not `y` vec_set_difference(x, y) # All unique values in either `x` or `y` vec_set_union(x, y) # All unique values in either `x` or `y` but not both vec_set_symmetric_difference(x, y) # These functions can also be used with data frames x <- data_frame( a = c(2, 3, 2, 2), b = c("j", "k", "j", "l") ) y <- data_frame( a = c(1, 2, 2, 2, 3), b = c("j", "l", "j", "l", "j") ) vec_set_intersect(x, y) vec_set_difference(x, y) vec_set_union(x, y) vec_set_symmetric_difference(x, y) # Vector names don't affect set membership, but if you'd like to force # them to, you can transform the vector into a two column data frame x <- c(a = 1, b = 2, c = 2, d = 3) y <- c(c = 2, b = 1, a = 3, d = 3) vec_set_intersect(x, y) x <- data_frame(name = names(x), value = unname(x)) y <- data_frame(name = names(y), value = unname(y)) vec_set_intersect(x, y) } vctrs/man/vec_as_names_legacy.Rd0000644000176200001440000000211413505165544016432 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/names.R \name{vec_as_names_legacy} \alias{vec_as_names_legacy} \title{Repair names with legacy method} \usage{ vec_as_names_legacy(names, prefix = "V", sep = "") } \arguments{ \item{names}{A character vector.} \item{prefix, sep}{Prefix and separator for repaired names.} } \description{ This standardises names with the legacy approach that was used in tidyverse packages (such as tibble, tidyr, and readxl) before \code{\link[=vec_as_names]{vec_as_names()}} was implemented. This tool is meant to help transitioning to the new name repairing standard and will be deprecated and removed from the package some time in the future. } \examples{ if (rlang::is_installed("tibble")) { library(tibble) # Names repair is turned off by default in tibble: try(tibble(a = 1, a = 2)) # You can turn it on by supplying a repair method: tibble(a = 1, a = 2, .name_repair = "universal") # If you prefer the legacy method, use `vec_as_names_legacy()`: tibble(a = 1, a = 2, .name_repair = vec_as_names_legacy) } } \keyword{internal} vctrs/man/reference-faq-compatibility.Rd0000644000176200001440000000723614315612253020041 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{reference-faq-compatibility} \alias{reference-faq-compatibility} \title{FAQ - Is my class compatible with vctrs?} \description{ vctrs provides a framework for working with vector classes in a generic way. However, it implements several compatibility fallbacks to base R methods. In this reference you will find how vctrs tries to be compatible with your vector class, and what base methods you need to implement for compatibility. If you’re starting from scratch, we think you’ll find it easier to start using \code{\link[=new_vctr]{new_vctr()}} as documented in \code{vignette("s3-vector")}. This guide is aimed for developers with existing vector classes. \subsection{Aggregate operations with fallbacks}{ All vctrs operations are based on four primitive generics described in the next section. However there are many higher level operations. The most important ones implement fallbacks to base generics for maximum compatibility with existing classes. \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} falls back to the base \code{[} generic if no \code{\link[=vec_proxy]{vec_proxy()}} method is implemented. This way foreign classes that do not implement \code{\link[=vec_restore]{vec_restore()}} can restore attributes based on the new subsetted contents. \item \code{\link[=vec_c]{vec_c()}} and \code{\link[=vec_rbind]{vec_rbind()}} now fall back to \code{\link[base:c]{base::c()}} if the inputs have a common parent class with a \code{c()} method (only if they have no self-to-self \code{vec_ptype2()} method). vctrs works hard to make your \code{c()} method success in various situations (with \code{NULL} and \code{NA} inputs, even as first input which would normally prevent dispatch to your method). The main downside compared to using vctrs primitives is that you can’t combine vectors of different classes since there is no extensible mechanism of coercion in \code{c()}, and it is less efficient in some cases. } } \subsection{The vctrs primitives}{ Most functions in vctrs are aggregate operations: they call other vctrs functions which themselves call other vctrs functions. The dependencies of a vctrs functions are listed in the Dependencies section of its documentation page. Take a look at \code{\link[=vec_count]{vec_count()}} for an example. These dependencies form a tree whose leaves are the four vctrs primitives. Here is the diagram for \code{vec_count()}: \figure{vec-count-deps.png} \subsection{The coercion generics}{ The coercion mechanism in vctrs is based on two generics: \itemize{ \item \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{\link[=vec_cast]{vec_cast()}} } See the \link[=theory-faq-coercion]{theory overview}. Two objects with the same class and the same attributes are always considered compatible by ptype2 and cast. If the attributes or classes differ, they throw an incompatible type error. Coercion errors are the main source of incompatibility with vctrs. See the \link[=howto-faq-coercion]{howto guide} if you need to implement methods for these generics. } \subsection{The proxy and restoration generics}{ \itemize{ \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_restore]{vec_restore()}} } These generics are essential for vctrs but mostly optional. \code{vec_proxy()} defaults to an \link{identity} function and you normally don’t need to implement it. The proxy a vector must be one of the atomic vector types, a list, or a data frame. By default, S3 lists that do not inherit from \code{"list"} do not have an identity proxy. In that case, you need to explicitly implement \code{vec_proxy()} or make your class inherit from list. } } } vctrs/man/vec_c.Rd0000644000176200001440000001022215056104732013534 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/c.R \name{vec_c} \alias{vec_c} \title{Combine many vectors into one vector} \usage{ vec_c( ..., .ptype = NULL, .name_spec = NULL, .name_repair = c("minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet"), .error_arg = "", .error_call = current_env() ) } \arguments{ \item{...}{Vectors to coerce.} \item{.ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{...}. Alternatively, you can supply \code{.ptype} to give the output known type. If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: this is a convenient way to make production code demand fixed types.} \item{.name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item \code{"inner"}, in which case outer names are ignored, and inner names are used if they exist. Note that outer names may still be used to provide informative error messages. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} \item{.name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} \item{.error_arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A vector with class given by \code{.ptype}, and length equal to the sum of the \code{vec_size()} of the contents of \code{...}. The vector will have names if the individual components have names (inner names) or if the arguments are named (outer names). If both inner and outer names are present, an error is thrown unless a \code{.name_spec} is provided. } \description{ Combine all arguments into a new vector of common type. } \section{Invariants}{ \itemize{ \item \code{vec_size(vec_c(x, y)) == vec_size(x) + vec_size(y)} \item \code{vec_ptype(vec_c(x, y)) == vec_ptype_common(x, y)}. } } \section{Dependencies}{ \subsection{vctrs dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_proxy]{vec_proxy()}} \item \code{\link[=vec_restore]{vec_restore()}} } } \subsection{base dependencies}{ \itemize{ \item \code{\link[base:c]{base::c()}} } If inputs inherit from a common class hierarchy, \code{vec_c()} falls back to \code{base::c()} if there exists a \code{c()} method implemented for this class hierarchy. } } \examples{ vec_c(FALSE, 1L, 1.5) # Date/times -------------------------- c(Sys.Date(), Sys.time()) c(Sys.time(), Sys.Date()) vec_c(Sys.Date(), Sys.time()) vec_c(Sys.time(), Sys.Date()) # Factors ----------------------------- c(factor("a"), factor("b")) vec_c(factor("a"), factor("b")) # By default, named inputs must be length 1: vec_c(name = 1) try(vec_c(name = 1:3)) # Pass a name specification to work around this: vec_c(name = 1:3, .name_spec = "{outer}_{inner}") # See `?name_spec` for more examples of name specifications. } \seealso{ \code{\link[=vec_cbind]{vec_cbind()}}/\code{\link[=vec_rbind]{vec_rbind()}} for combining data frames by rows or columns. } vctrs/man/howto-faq-fix-scalar-type-error.Rd0000644000176200001440000001025715132202571020522 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/faq-developer.R \name{howto-faq-fix-scalar-type-error} \alias{howto-faq-fix-scalar-type-error} \alias{howto_faq_fix_scalar_type_error} \title{FAQ - Why isn't my class treated as a vector?} \description{ The tidyverse is a bit stricter than base R regarding what kind of objects are considered as vectors (see the \link[=faq-error-scalar-type]{user FAQ} about this topic). Sometimes vctrs won’t treat your class as a vector when it should. \subsection{Why isn’t my list class considered a vector?}{ By default, S3 lists are not considered to be vectors by vctrs: \if{html}{\out{
}}\preformatted{my_list <- structure(list(), class = "my_class") vctrs::vec_is(my_list) #> [1] FALSE }\if{html}{\out{
}} To be treated as a vector, the class must either inherit from \code{"list"} explicitly: \if{html}{\out{
}}\preformatted{my_explicit_list <- structure(list(), class = c("my_class", "list")) vctrs::vec_is(my_explicit_list) #> [1] TRUE }\if{html}{\out{
}} Or it should implement a \code{vec_proxy()} method that returns its input if explicit inheritance is not possible or troublesome: \if{html}{\out{
}}\preformatted{#' @export vec_proxy.my_class <- function(x, ...) x vctrs::vec_is(my_list) #> [1] FALSE }\if{html}{\out{
}} Note that explicit inheritance is the preferred way because this makes it possible for your class to dispatch on \code{list} methods of S3 generics: \if{html}{\out{
}}\preformatted{my_generic <- function(x) UseMethod("my_generic") my_generic.list <- function(x) "dispatched!" my_generic(my_list) #> Error in `UseMethod()`: #> ! no applicable method for 'my_generic' applied to an object of class "my_class" my_generic(my_explicit_list) #> [1] "dispatched!" }\if{html}{\out{
}} } \subsection{Why isn’t my data frame class considered a vector?}{ The most likely explanation is that the data frame has not been properly constructed. However, if you get an “Input must be a vector” error with a data frame subclass, it probably means that the data frame has not been properly constructed. The main cause of these errors are data frames whose \emph{base class} is not \code{"data.frame"}: \if{html}{\out{
}}\preformatted{my_df <- data.frame(x = 1) class(my_df) <- c("data.frame", "my_class") vctrs::obj_check_vector(my_df) #> Error: #> ! `my_df` must be a vector, not a object. #> x Detected incompatible data frame structure. A data frame is normally treated as a vector, but an incompatible class ordering was detected. To be compatible, the subclass must come before , not after. Class: . #> i If this object comes from a package, please report this error to the package author. #> i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. }\if{html}{\out{
}} This is problematic as many tidyverse functions won’t work properly: \if{html}{\out{
}}\preformatted{dplyr::slice(my_df, 1) #> Error in `vec_slice()`: #> ! `x` must be a vector, not a object. #> x Detected incompatible data frame structure. A data frame is normally treated as a vector, but an incompatible class ordering was detected. To be compatible, the subclass must come before , not after. Class: . #> i If this object comes from a package, please report this error to the package author. #> i Read our FAQ about creating vector types (`?vctrs::howto_faq_fix_scalar_type_error`) to learn more. }\if{html}{\out{
}} It is generally not appropriate to declare your class to be a superclass of another class. We generally consider this undefined behaviour (UB). To fix these errors, you can simply change the construction of your data frame class so that \code{"data.frame"} is a base class, i.e. it should come last in the class vector: \if{html}{\out{
}}\preformatted{class(my_df) <- c("my_class", "data.frame") vctrs::obj_check_vector(my_df) dplyr::slice(my_df, 1) #> x #> 1 1 }\if{html}{\out{
}} } } vctrs/man/int64.Rd0000644000176200001440000000134413532250523013423 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-integer64.R \name{vec_ptype_full.integer64} \alias{vec_ptype_full.integer64} \alias{vec_ptype_abbr.integer64} \alias{vec_ptype2.integer64} \alias{vec_cast.integer64} \title{64 bit integers} \usage{ \method{vec_ptype_full}{integer64}(x, ...) \method{vec_ptype_abbr}{integer64}(x, ...) \method{vec_ptype2}{integer64}(x, y, ...) \method{vec_cast}{integer64}(x, to, ...) } \description{ A \code{integer64} is a 64 bits integer vector, implemented in the \code{bit64} package. } \details{ These functions help the \code{integer64} class from \code{bit64} in to the vctrs type system by providing coercion functions and casting functions. } \keyword{internal} vctrs/man/table.Rd0000644000176200001440000000044614276722575013571 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-table.R \name{table} \alias{table} \title{Table S3 class} \description{ These functions help the base table class fit into the vctrs type system by providing coercion and casting functions. } \keyword{internal} vctrs/man/vec_arith.Rd0000644000176200001440000000373513566016500014433 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/arith.R \name{vec_arith} \alias{vec_arith} \alias{vec_arith.default} \alias{vec_arith.logical} \alias{vec_arith.numeric} \alias{vec_arith_base} \alias{MISSING} \title{Arithmetic operations} \usage{ vec_arith(op, x, y, ...) \method{vec_arith}{default}(op, x, y, ...) \method{vec_arith}{logical}(op, x, y, ...) \method{vec_arith}{numeric}(op, x, y, ...) vec_arith_base(op, x, y) MISSING() } \arguments{ \item{op}{An arithmetic operator as a string} \item{x, y}{A pair of vectors. For \code{!}, unary \code{+} and unary \code{-}, \code{y} will be a sentinel object of class \code{MISSING}, as created by \code{MISSING()}.} \item{...}{These dots are for future extensions and must be empty.} } \description{ This generic provides a common double dispatch mechanism for all infix operators (\code{+}, \code{-}, \code{/}, \code{*}, \code{^}, \code{\%\%}, \code{\%/\%}, \code{!}, \code{&}, \code{|}). It is used to power the default arithmetic and boolean operators for \link{vctr}s objects, overcoming the limitations of the base \link{Ops} generic. } \details{ \code{vec_arith_base()} is provided as a convenience for writing methods. It recycles \code{x} and \code{y} to common length then calls the base operator with the underlying \code{\link[=vec_data]{vec_data()}}. \code{vec_arith()} is also used in \code{diff.vctrs_vctr()} method via \code{-}. } \examples{ d <- as.Date("2018-01-01") dt <- as.POSIXct("2018-01-02 12:00") t <- as.difftime(12, unit = "hours") vec_arith("-", dt, 1) vec_arith("-", dt, t) vec_arith("-", dt, d) vec_arith("+", dt, 86400) vec_arith("+", dt, t) vec_arith("+", t, t) vec_arith("/", t, t) vec_arith("/", t, 2) vec_arith("*", t, 2) } \seealso{ \code{\link[=stop_incompatible_op]{stop_incompatible_op()}} for signalling that an arithmetic operation is not permitted/supported. See \code{\link[=vec_math]{vec_math()}} for the equivalent for the unary mathematical functions. } \keyword{internal} vctrs/man/fields.Rd0000644000176200001440000000154213566016500013727 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fields.R \name{fields} \alias{fields} \alias{n_fields} \alias{field} \alias{field<-} \title{Tools for accessing the fields of a record.} \usage{ fields(x) n_fields(x) field(x, i) field(x, i) <- value } \arguments{ \item{x}{A \link{rcrd}, i.e. a list of equal length vectors with unique names.} } \description{ A \link{rcrd} behaves like a vector, so \code{length()}, \code{names()}, and \code{$} can not provide access to the fields of the underlying list. These helpers do: \code{fields()} is equivalent to \code{names()}; \code{n_fields()} is equivalent to \code{length()}; \code{field()} is equivalent to \code{$}. } \examples{ x <- new_rcrd(list(x = 1:3, y = 3:1, z = letters[1:3])) n_fields(x) fields(x) field(x, "y") field(x, "y") <- runif(3) field(x, "y") } \keyword{internal} vctrs/man/vec_equal.Rd0000644000176200001440000000256515157004241014430 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/equal.R \name{vec_equal} \alias{vec_equal} \title{Equality} \usage{ vec_equal(x, y, na_equal = FALSE, .ptype = NULL) } \arguments{ \item{x, y}{Vectors with compatible types and lengths.} \item{na_equal}{Should \code{NA} values be considered equal?} \item{.ptype}{Override to optionally specify common type} } \value{ A logical vector the same size as the common size of \code{x} and \code{y}. Will only contain \code{NA}s if \code{na_equal} is \code{FALSE}. } \description{ \code{vec_equal()} tests if two vectors are equal. } \details{ Attributes of \code{x} and \code{y} are considered equal if they have the same names and values, even if the attribute ordering is different. This reflects the idea that attributes are treated as a map rather than an ordered list. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_cast_common]{vec_cast_common()}} with fallback \item \code{\link[=vec_recycle_common]{vec_recycle_common()}} \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} } } \examples{ vec_equal(c(TRUE, FALSE, NA), FALSE) vec_equal(c(TRUE, FALSE, NA), FALSE, na_equal = TRUE) vec_equal(5, 1:10) vec_equal("d", letters[1:10]) df <- data.frame(x = c(1, 1, 2, 1), y = c(1, 2, 1, NA)) vec_equal(df, data.frame(x = 1, y = 2)) } \seealso{ \code{\link[=vec_detect_missing]{vec_detect_missing()}} } vctrs/man/vctrs-data-frame.Rd0000644000176200001440000000075714276722575015647 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{vctrs-data-frame} \alias{vctrs-data-frame} \alias{vec_ptype2.data.frame} \alias{vec_cast.data.frame} \title{vctrs methods for data frames} \usage{ \method{vec_ptype2}{data.frame}(x, y, ...) \method{vec_cast}{data.frame}(x, to, ...) } \description{ These functions help the base data.frame class fit into the vctrs type system by providing coercion and casting functions. } \keyword{internal} vctrs/man/list_combine.Rd0000644000176200001440000001523715072256373015146 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/list-combine.R \name{list_combine} \alias{list_combine} \title{Combine a list of vectors} \usage{ list_combine( x, ..., indices, size, default = NULL, unmatched = "default", multiple = "last", slice_x = FALSE, ptype = NULL, name_spec = NULL, name_repair = c("minimal", "unique", "check_unique", "universal", "unique_quiet", "universal_quiet"), x_arg = "x", indices_arg = "indices", default_arg = "default", error_call = current_env() ) } \arguments{ \item{x}{A list of vectors. If \code{slice_x = FALSE}, each element must be size 1 or the same size as its corresponding index in \code{indices} after that index has been converted to a positive integer location vector with \code{\link[=vec_as_location]{vec_as_location()}}. If \code{slice_x = TRUE}, each element must be size 1 or size \code{size}.} \item{...}{These dots are for future extensions and must be empty.} \item{indices}{A list of indices. Indices can be provided in one of two forms: \itemize{ \item Positive integer vectors of locations less than or equal to \code{size}. Each vector can be any size. \item Logical vectors of size \code{size} where \code{TRUE} denotes the location in the output to assign to, and the location from the \code{x} element to pull from. Both \code{NA} and \code{FALSE} are considered unmatched. } The size of \code{indices} must match the size of \code{x}.} \item{size}{The output size.} \item{default}{If \code{NULL}, a missing value is used for locations unmatched by \code{indices}, otherwise the provided \code{default} is used. If provided, \code{default} must be size 1 or size \code{size}. Can only be set when \code{unmatched = "default"}.} \item{unmatched}{Handling of locations in the output unmatched by \code{indices}. One of: \itemize{ \item \code{"default"} to use \code{default} in unmatched locations. \item \code{"error"} to error when there are unmatched locations. }} \item{multiple}{Handling of locations in the output matched by multiple \code{indices}. \itemize{ \item \code{"last"} uses the value from the last matched index. \item \code{"first"} uses the value from the first matched index. } Note that \code{multiple} only applies across \code{indices}. Within a single index if there are overlapping locations, then the last will always win. This can only occur with integer \code{indices}, as you can't overlap within an index when using logical \code{indices}.} \item{slice_x}{A boolean. If \code{TRUE}, each element of \code{x} is sliced by its corresponding index from \code{indices} before being assigned into the output, which is effectively the same as \code{map2(list(x, indices), function(x, index) vec_slice(x, index))}, but is optimized to avoid materializing the slices. See the \code{slice_value} argument of \code{\link[=vec_assign]{vec_assign()}} for more examples.} \item{ptype}{If \code{NULL}, the output type is determined by computing the common type across all elements of \code{x} and \code{default}. Alternatively, you can supply \code{ptype} to give the output a known type.} \item{name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item \code{"inner"}, in which case outer names are ignored, and inner names are used if they exist. Note that outer names may still be used to provide informative error messages. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} \item{name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} \item{x_arg, indices_arg, default_arg}{An argument name as a string. This argument will be mentioned in error messages as the input that is at the origin of a problem.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A vector of type \code{vec_ptype_common(!!!x)}, or \code{ptype}, if specified. The size of the output is determined by \code{size}. } \description{ \code{list_combine()} is a more powerful version of \code{\link[=vec_c]{vec_c()}}. While \code{vec_c()} is used for sequential combination, \code{list_combine()} takes a list of \code{indices} that specify where to place each element in the output. If you have a list of vectors and just need to combine them sequentially, you'll still want to use \code{vec_c(!!!x)}. } \examples{ # Combine a list of vectors using # a list of `indices` x <- list( 1:3, 4:6, 7:8 ) indices <- list( c(1, 3, 7), c(8, 6, 5), c(2, 4) ) list_combine(x, indices = indices, size = 8) # Overlapping `indices` are allowed. # The last match "wins" by default. x <- list( 1:3, 4:6 ) indices <- list( c(1, 2, 3), c(1, 2, 6) ) list_combine(x, indices = indices, size = 6) # Use `multiple` to force the first match to win. # This is similar to how `dplyr::case_when()` works. list_combine(x, indices = indices, size = 6, multiple = "first") # Works with data frames as well. # Now how index 2 is not assigned to. x <- list( data.frame(x = 1:2, y = c("a", "b")), data.frame(x = 3:4, y = c("c", "d")) ) indices <- list( c(4, 1), c(3, NA) ) list_combine(x, indices = indices, size = 4) # You can use `size` to combine into a larger object than you have values for list_combine(list(1:2, 4:5), indices = list(1:2, 4:5), size = 8) # Additionally specifying `default` allows you to control the value used in # unfilled locations list_combine( list(1:2, 4:5), indices = list(1:2, 4:5), size = 8, default = 0L ) # Alternatively, if you'd like to assert that you've covered all output # locations through `indices`, set `unmatched = "error"`. # Here, we've set the size to 5 but missed location 3: try(list_combine( list(1:2, 4:5), indices = list(1:2, 4:5), size = 5, unmatched = "error" )) } vctrs/man/vec_repeat.Rd0000644000176200001440000000151714276722575014617 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_repeat} \alias{vec_repeat} \title{Expand the length of a vector} \usage{ vec_repeat(x, each = 1L, times = 1L) } \arguments{ \item{x}{A vector.} \item{each}{Number of times to repeat each element of \code{x}.} \item{times}{Number of times to repeat the whole vector of \code{x}.} } \value{ A vector the same type as \code{x} with size \code{vec_size(x) * times * each}. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{vec_repeat()} has been replaced with \code{\link[=vec_rep]{vec_rep()}} and \code{\link[=vec_rep_each]{vec_rep_each()}} and is deprecated as of vctrs 0.3.0. } \keyword{internal} vctrs/man/data_frame.Rd0000644000176200001440000000716114511524374014553 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-data-frame.R \name{data_frame} \alias{data_frame} \title{Construct a data frame} \usage{ data_frame( ..., .size = NULL, .name_repair = c("check_unique", "unique", "universal", "minimal", "unique_quiet", "universal_quiet"), .error_call = current_env() ) } \arguments{ \item{...}{Vectors to become columns in the data frame. When inputs are named, those names are used for column names.} \item{.size}{The number of rows in the data frame. If \code{NULL}, this will be computed as the common size of the inputs.} \item{.name_repair}{One of \code{"check_unique"}, \code{"unique"}, \code{"universal"}, \code{"minimal"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. See \code{\link[=vec_as_names]{vec_as_names()}} for the meaning of these options.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \code{data_frame()} constructs a data frame. It is similar to \code{\link[base:data.frame]{base::data.frame()}}, but there are a few notable differences that make it more in line with vctrs principles. The Properties section outlines these. } \details{ If no column names are supplied, \code{""} will be used as a default name for all columns. This is applied before name repair occurs, so the default name repair of \code{"check_unique"} will error if any unnamed inputs are supplied and \code{"unique"} (or \code{"unique_quiet"}) will repair the empty string column names appropriately. If the column names don't matter, use a \code{"minimal"} name repair for convenience and performance. } \section{Properties}{ \itemize{ \item Inputs are \link[=theory-faq-recycling]{recycled} to a common size with \code{\link[=vec_recycle_common]{vec_recycle_common()}}. \item With the exception of data frames, inputs are not modified in any way. Character vectors are never converted to factors, and lists are stored as-is for easy creation of list-columns. \item Unnamed data frame inputs are automatically unpacked. Named data frame inputs are stored unmodified as data frame columns. \item \code{NULL} inputs are completely ignored. \item The dots are dynamic, allowing for splicing of lists with \verb{!!!} and unquoting. } } \examples{ data_frame(x = 1, y = 2) # Inputs are recycled using tidyverse recycling rules data_frame(x = 1, y = 1:3) # Strings are never converted to factors class(data_frame(x = "foo")$x) # List columns can be easily created df <- data_frame(x = list(1:2, 2, 3:4), y = 3:1) # However, the base print method is suboptimal for displaying them, # so it is recommended to convert them to tibble if (rlang::is_installed("tibble")) { tibble::as_tibble(df) } # Named data frame inputs create data frame columns df <- data_frame(x = data_frame(y = 1:2, z = "a")) # The `x` column itself is another data frame df$x # Again, it is recommended to convert these to tibbles for a better # print method if (rlang::is_installed("tibble")) { tibble::as_tibble(df) } # Unnamed data frame input is automatically unpacked data_frame(x = 1, data_frame(y = 1:2, z = "a")) } \seealso{ \code{\link[=df_list]{df_list()}} for safely creating a data frame's underlying data structure from individual columns. \code{\link[=new_data_frame]{new_data_frame()}} for constructing the actual data frame from that underlying data structure. Together, these can be useful for developers when creating new data frame subclasses supporting standard evaluation. } vctrs/man/vec_as_location.Rd0000644000176200001440000001136415056611175015621 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/subscript-loc.R \name{vec_as_location} \alias{vec_as_location} \alias{num_as_location} \alias{vec_as_location2} \alias{num_as_location2} \title{Create a vector of locations} \usage{ vec_as_location( i, n, names = NULL, ..., missing = c("propagate", "remove", "error"), arg = caller_arg(i), call = caller_env() ) num_as_location( i, n, ..., missing = c("propagate", "remove", "error"), negative = c("invert", "error", "ignore"), oob = c("error", "remove", "extend"), zero = c("remove", "error", "ignore"), arg = caller_arg(i), call = caller_env() ) vec_as_location2( i, n, names = NULL, ..., missing = c("error", "propagate"), arg = caller_arg(i), call = caller_env() ) num_as_location2( i, n, ..., negative = c("error", "ignore"), missing = c("error", "propagate"), arg = caller_arg(i), call = caller_env() ) } \arguments{ \item{i}{An index vector to convert.} \item{n}{A single integer representing the total size of the object that \code{i} is meant to index into.} \item{names}{If \code{i} is a character vector, \code{names} should be a character vector that \code{i} will be matched against to construct the index. Otherwise, not used. The default value of \code{NULL} will result in an error if \code{i} is a character vector.} \item{...}{These dots are for future extensions and must be empty.} \item{missing}{How should missing \code{i} values be handled? \itemize{ \item \code{"error"} throws an error. \item \code{"propagate"} returns them as is. \item \code{"remove"} removes them. } By default, vector subscripts propagate missing values but scalar subscripts error on them. Propagated missing values can't be combined with negative indices when \code{negative = "invert"}, because they can't be meaningfully inverted.} \item{arg}{The argument name to be displayed in error messages.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{negative}{How should negative \code{i} values be handled? \itemize{ \item \code{"error"} throws an error. \item \code{"ignore"} returns them as is. \item \code{"invert"} returns the positive location generated by inverting the negative location. When inverting, positive and negative locations can't be mixed. This option is only applicable for \code{num_as_location()}. }} \item{oob}{How should out-of-bounds \code{i} values be handled? \itemize{ \item \code{"error"} throws an error. \item \code{"remove"} removes both positive and negative out-of-bounds locations. \item \code{"extend"} allows positive out-of-bounds locations if they directly follow the end of a vector. This can be used to implement extendable vectors, like \code{letters[1:30]}. }} \item{zero}{How should zero \code{i} values be handled? \itemize{ \item \code{"error"} throws an error. \item \code{"remove"} removes them. \item \code{"ignore"} returns them as is. }} } \value{ \itemize{ \item \code{vec_as_location()} and \code{num_as_location()} return an integer vector that can be used as an index in a subsetting operation. \item \code{vec_as_location2()} and \code{num_as_location2()} return an integer of size 1 that can be used a scalar index for extracting an element. } } \description{ These helpers provide a means of standardizing common indexing methods such as integer, character or logical indexing. \itemize{ \item \code{vec_as_location()} accepts integer, character, or logical vectors of any size. The output is always an integer vector that is suitable for subsetting with \code{[} or \code{\link[=vec_slice]{vec_slice()}}. It might be a different size than the input because negative selections are transformed to positive ones and logical vectors are transformed to a vector of indices for the \code{TRUE} locations. \item \code{vec_as_location2()} accepts a single number or string. It returns a single location as a integer vector of size 1. This is suitable for extracting with \code{[[}. \item \code{num_as_location()} and \code{num_as_location2()} are specialized variants that have extra options for numeric indices. } } \examples{ x <- array(1:6, c(2, 3)) dimnames(x) <- list(c("r1", "r2"), c("c1", "c2", "c3")) # The most common use case validates row indices vec_as_location(1, vec_size(x)) # Negative indices can be used to index from the back vec_as_location(-1, vec_size(x)) # Character vectors can be used if `names` are provided vec_as_location("r2", vec_size(x), rownames(x)) # You can also construct an index for dimensions other than the first vec_as_location(c("c2", "c1"), ncol(x), colnames(x)) } \keyword{internal} vctrs/man/vec_count.Rd0000644000176200001440000000340614315060307014444 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \name{vec_count} \alias{vec_count} \title{Count unique values in a vector} \usage{ vec_count(x, sort = c("count", "key", "location", "none")) } \arguments{ \item{x}{A vector (including a data frame).} \item{sort}{One of "count", "key", "location", or "none". \itemize{ \item "count", the default, puts most frequent values at top \item "key", orders by the output key column (i.e. unique values of \code{x}) \item "location", orders by location where key first seen. This is useful if you want to match the counts up to other unique/duplicated functions. \item "none", leaves unordered. This is not guaranteed to produce the same ordering across R sessions, but is the fastest method. }} } \value{ A data frame with columns \code{key} (same type as \code{x}) and \code{count} (an integer vector). } \description{ Count the number of unique values in a vector. \code{vec_count()} has two important differences to \code{table()}: it returns a data frame, and when given multiple inputs (as a data frame), it only counts combinations that appear in the input. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_proxy_equal]{vec_proxy_equal()}} \item \code{\link[=vec_slice]{vec_slice()}} \item \code{\link[=vec_order]{vec_order()}} } } \examples{ vec_count(mtcars$vs) vec_count(iris$Species) # If you count a data frame you'll get a data frame # column in the output str(vec_count(mtcars[c("vs", "am")])) # Sorting --------------------------------------- x <- letters[rpois(100, 6)] # default is to sort by frequency vec_count(x) # by can sort by key vec_count(x, sort = "key") # or location of first value vec_count(x, sort = "location") head(x) # or not at all vec_count(x, sort = "none") } vctrs/man/vec_expand_grid.Rd0000644000176200001440000000462514362266120015607 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/expand.R \name{vec_expand_grid} \alias{vec_expand_grid} \title{Create a data frame from all combinations of the inputs} \usage{ vec_expand_grid( ..., .vary = "slowest", .name_repair = "check_unique", .error_call = current_env() ) } \arguments{ \item{...}{Name-value pairs. The name will become the column name in the resulting data frame.} \item{.vary}{One of: \itemize{ \item \code{"slowest"} to vary the first column slowest. This produces sorted output and is generally the most useful. \item \code{"fastest"} to vary the first column fastest. This matches the behavior of \code{\link[=expand.grid]{expand.grid()}}. }} \item{.name_repair}{One of \code{"check_unique"}, \code{"unique"}, \code{"universal"}, \code{"minimal"}, \code{"unique_quiet"}, or \code{"universal_quiet"}. See \code{\link[=vec_as_names]{vec_as_names()}} for the meaning of these options.} \item{.error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ A data frame with as many columns as there are inputs in \code{...} and as many rows as the \code{\link[=prod]{prod()}} of the sizes of the inputs. } \description{ \code{vec_expand_grid()} creates a new data frame by creating a grid of all possible combinations of the input vectors. It is inspired by \code{\link[=expand.grid]{expand.grid()}}. Compared with \code{expand.grid()}, it: \itemize{ \item Produces sorted output by default by varying the first column the slowest, rather than the fastest. Control this with \code{.vary}. \item Never converts strings to factors. \item Does not add additional attributes. \item Drops \code{NULL} inputs. \item Can expand any vector type, including data frames and \link[=new_rcrd]{records}. } } \details{ If any input is empty (i.e. size 0), then the result will have 0 rows. If no inputs are provided, the result is a 1 row data frame with 0 columns. This is consistent with the fact that \code{prod()} with no inputs returns \code{1}. } \examples{ vec_expand_grid(x = 1:2, y = 1:3) # Use `.vary` to match `expand.grid()`: vec_expand_grid(x = 1:2, y = 1:3, .vary = "fastest") # Can also expand data frames vec_expand_grid( x = data_frame(a = 1:2, b = 3:4), y = 1:4 ) } vctrs/man/vec-rep.Rd0000644000176200001440000000657315154276515014044 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rep.R \name{vec-rep} \alias{vec-rep} \alias{vec_rep} \alias{vec_rep_each} \alias{vec_unrep} \title{Repeat a vector} \usage{ vec_rep( x, times, ..., error_call = current_env(), x_arg = "x", times_arg = "times" ) vec_rep_each( x, times, ..., error_call = current_env(), x_arg = "x", times_arg = "times" ) vec_unrep(x) } \arguments{ \item{x}{A vector.} \item{times}{For \code{vec_rep()}, a single integer for the number of times to repeat the entire vector. For \code{vec_rep_each()}, an integer vector of the number of times to repeat each element of \code{x}. \code{times} will be \link[=theory-faq-recycling]{recycled} to the size of \code{x}.} \item{...}{These dots are for future extensions and must be empty.} \item{error_call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} \item{x_arg, times_arg}{Argument names for errors.} } \value{ For \code{vec_rep()}, a vector the same type as \code{x} with size \code{vec_size(x) * times}. For \code{vec_rep_each()}, a vector the same type as \code{x} with size \code{sum(vec_recycle(times, vec_size(x)))}. For \code{vec_unrep()}, a data frame with two columns, \code{key} and \code{times}. \code{key} is a vector with the same type as \code{x}, and \code{times} is an integer vector. } \description{ \itemize{ \item \code{vec_rep()} repeats an entire vector a set number of \code{times}. \item \code{vec_rep_each()} repeats each element of a vector a set number of \code{times}. \item \code{vec_unrep()} compresses a vector with repeated values. The repeated values are returned as a \code{key} alongside the number of \code{times} each key is repeated. } } \details{ Using \code{vec_unrep()} and \code{vec_rep_each()} together is similar to using \code{\link[base:rle]{base::rle()}} and \code{\link[base:rle]{base::inverse.rle()}}. The following invariant shows the relationship between the two functions: \if{html}{\out{
}}\preformatted{compressed <- vec_unrep(x) identical(x, vec_rep_each(compressed$key, compressed$times)) }\if{html}{\out{
}} There are two main differences between \code{vec_unrep()} and \code{\link[base:rle]{base::rle()}}: \itemize{ \item \code{vec_unrep()} treats adjacent missing values as equivalent, while \code{rle()} treats them as different values. \item \code{vec_unrep()} works along the size of \code{x}, while \code{rle()} works along its length. This means that \code{vec_unrep()} works on data frames by compressing repeated rows. } } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_slice]{vec_slice()}} } } \examples{ # Repeat the entire vector vec_rep(1:2, 3) # Repeat within each vector vec_rep_each(1:2, 3) x <- vec_rep_each(1:2, c(3, 4)) x # After using `vec_rep_each()`, you can recover the original vector # with `vec_unrep()` vec_unrep(x) df <- data.frame(x = 1:2, y = 3:4) # `rep()` repeats columns of data frames, and returns lists rep(df, each = 2) # `vec_rep()` and `vec_rep_each()` repeat rows, and return data frames vec_rep(df, 2) vec_rep_each(df, 2) # `rle()` treats adjacent missing values as different y <- c(1, NA, NA, 2) rle(y) # `vec_unrep()` treats them as equivalent vec_unrep(y) } vctrs/man/list-of-attributes.Rd0000644000176200001440000000141715120272011016210 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-list-of.R \name{list-of-attributes} \alias{list-of-attributes} \alias{list_of_ptype} \alias{list_of_size} \title{\code{list_of} attributes} \usage{ list_of_ptype(x) list_of_size(x) } \arguments{ \item{x}{A \link[=list_of]{list_of}.} } \description{ \itemize{ \item \code{list_of_ptype()} returns the \code{ptype} required by the \code{list_of}. If no \code{ptype} is required, then \code{NULL} is returned. \item \code{list_of_size()} returns the \code{size} required by the \code{list_of}. If no \code{size} is required, then \code{NULL} is returned. } } \examples{ x <- list_of(1, 2) list_of_ptype(x) list_of_size(x) x <- list_of(.ptype = integer(), .size = 5) list_of_ptype(x) list_of_size(x) } vctrs/man/s3_register.Rd0000644000176200001440000000507414315060307014713 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/register-s3.R \name{s3_register} \alias{s3_register} \title{Register a method for a suggested dependency} \arguments{ \item{generic}{Name of the generic in the form \code{pkg::generic}.} \item{class}{Name of the class} \item{method}{Optionally, the implementation of the method. By default, this will be found by looking for a function called \code{generic.class} in the package environment. Note that providing \code{method} can be dangerous if you use devtools. When the namespace of the method is reloaded by \code{devtools::load_all()}, the function will keep inheriting from the old namespace. This might cause crashes because of dangling \code{.Call()} pointers.} } \description{ Generally, the recommend way to register an S3 method is to use the \code{S3Method()} namespace directive (often generated automatically by the \verb{@export} roxygen2 tag). However, this technique requires that the generic be in an imported package, and sometimes you want to suggest a package, and only provide a method when that package is loaded. \code{s3_register()} can be called from your package's \code{.onLoad()} to dynamically register a method only if the generic's package is loaded. } \details{ For R 3.5.0 and later, \code{s3_register()} is also useful when demonstrating class creation in a vignette, since method lookup no longer always involves the lexical scope. For R 3.6.0 and later, you can achieve a similar effect by using "delayed method registration", i.e. placing the following in your \code{NAMESPACE} file: \if{html}{\out{
}}\preformatted{if (getRversion() >= "3.6.0") \{ S3method(package::generic, class) \} }\if{html}{\out{
}} } \section{Usage in other packages}{ To avoid taking a dependency on vctrs, you copy the source of \href{https://github.com/r-lib/vctrs/blob/main/R/register-s3.R}{\code{s3_register()}} into your own package. It is licensed under the permissive \href{https://choosealicense.com/licenses/unlicense/}{unlicense} to make it crystal clear that we're happy for you to do this. There's no need to include the license or even credit us when using this function. } \examples{ # A typical use case is to dynamically register tibble/pillar methods # for your class. That way you avoid creating a hard dependency on packages # that are not essential, while still providing finer control over # printing when they are used. .onLoad <- function(...) { s3_register("pillar::pillar_shaft", "vctrs_vctr") s3_register("tibble::type_sum", "vctrs_vctr") } } \keyword{internal} vctrs/man/vec_unchop.Rd0000644000176200001440000000530315072256373014621 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_unchop} \alias{vec_unchop} \title{Chopping} \usage{ vec_unchop( x, indices = NULL, ptype = NULL, name_spec = NULL, name_repair = c("minimal", "unique", "check_unique", "universal") ) } \arguments{ \item{x}{A list} \item{indices}{A list of positive integer vectors specifying the locations to place elements of \code{x} in. Each element of \code{x} is recycled to the size of the corresponding index vector. The size of \code{indices} must match the size of \code{x}. If \code{NULL}, \code{x} is combined in the order it is provided in, which is equivalent to using \code{\link[=vec_c]{vec_c()}}.} \item{ptype}{If \code{NULL}, the default, the output type is determined by computing the common type across all elements of \code{x}. Alternatively, you can supply \code{ptype} to give the output a known type.} \item{name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a name, when these inputs are themselves named, like \code{outer = c(inner = 1)}, or when they have length greater than 1: \code{outer = 1:2}. By default, these cases trigger an error. You can resolve the error by providing a specification that describes how to combine the names or the indices of the inner vector with the name of the input. This specification can be: \itemize{ \item A function of two arguments. The outer name is passed as a string to the first argument, and the inner names or positions are passed as second argument. \item An anonymous function as a purrr-style formula. \item A glue specification of the form \code{"{outer}_{inner}"}. \item \code{"inner"}, in which case outer names are ignored, and inner names are used if they exist. Note that outer names may still be used to provide informative error messages. \item An \code{\link[rlang:zap]{rlang::zap()}} object, in which case both outer and inner names are ignored and the result is unnamed. } See the \link[=name_spec]{name specification topic}.} \item{name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} } \value{ A vector of type \code{vec_ptype_common(!!!x)}, or \code{ptype}, if specified. The size is computed as \code{vec_size_common(!!!indices)} unless the indices are \code{NULL}, in which case the size is \code{vec_size_common(!!!x)}. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{vec_unchop()} has been renamed to \code{\link[=list_unchop]{list_unchop()}} and is deprecated as of vctrs 0.5.0. } \keyword{internal} vctrs/man/new_date.Rd0000644000176200001440000000440214276722575014264 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-date-time.R \name{new_date} \alias{new_date} \alias{new_datetime} \alias{new_duration} \alias{vec_ptype2.Date} \alias{vec_ptype2.POSIXct} \alias{vec_ptype2.POSIXlt} \alias{vec_ptype2.difftime} \alias{vec_cast.Date} \alias{vec_cast.POSIXct} \alias{vec_cast.POSIXlt} \alias{vec_cast.difftime} \alias{vec_arith.Date} \alias{vec_arith.POSIXct} \alias{vec_arith.POSIXlt} \alias{vec_arith.difftime} \title{Date, date-time, and duration S3 classes} \usage{ new_date(x = double()) new_datetime(x = double(), tzone = "") new_duration(x = double(), units = c("secs", "mins", "hours", "days", "weeks")) \method{vec_ptype2}{Date}(x, y, ...) \method{vec_ptype2}{POSIXct}(x, y, ...) \method{vec_ptype2}{POSIXlt}(x, y, ...) \method{vec_ptype2}{difftime}(x, y, ...) \method{vec_cast}{Date}(x, to, ...) \method{vec_cast}{POSIXct}(x, to, ...) \method{vec_cast}{POSIXlt}(x, to, ...) \method{vec_cast}{difftime}(x, to, ...) \method{vec_arith}{Date}(op, x, y, ...) \method{vec_arith}{POSIXct}(op, x, y, ...) \method{vec_arith}{POSIXlt}(op, x, y, ...) \method{vec_arith}{difftime}(op, x, y, ...) } \arguments{ \item{x}{A double vector representing the number of days since UNIX epoch for \code{new_date()}, number of seconds since UNIX epoch for \code{new_datetime()}, and number of \code{units} for \code{new_duration()}.} \item{tzone}{Time zone. A character vector of length 1. Either \code{""} for the local time zone, or a value from \code{\link[=OlsonNames]{OlsonNames()}}} \item{units}{Units of duration.} } \description{ \itemize{ \item A \code{date} (\link{Date}) is a double vector. Its value represent the number of days since the Unix "epoch", 1970-01-01. It has no attributes. \item A \code{datetime} (\link{POSIXct} is a double vector. Its value represents the number of seconds since the Unix "Epoch", 1970-01-01. It has a single attribute: the timezone (\code{tzone})) \item A \code{duration} (\link{difftime}) } } \details{ These function help the base \code{Date}, \code{POSIXct}, and \code{difftime} classes fit into the vctrs type system by providing constructors, coercion functions, and casting functions. } \examples{ new_date(0) new_datetime(0, tzone = "UTC") new_duration(1, "hours") } \keyword{internal} vctrs/man/vec_fill_missing.Rd0000644000176200001440000000247415047425317016010 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fill.R \name{vec_fill_missing} \alias{vec_fill_missing} \title{Fill in missing values with the previous or following value} \usage{ vec_fill_missing( x, direction = c("down", "up", "downup", "updown"), max_fill = NULL ) } \arguments{ \item{x}{A vector} \item{direction}{Direction in which to fill missing values. Must be either \code{"down"}, \code{"up"}, \code{"downup"}, or \code{"updown"}.} \item{max_fill}{A single positive integer specifying the maximum number of sequential missing values that will be filled. If \code{NULL}, there is no limit.} } \description{ \code{vec_fill_missing()} fills gaps of missing values with the previous or following non-missing value. } \examples{ x <- c(NA, NA, 1, NA, NA, NA, 3, NA, NA) # Filling down replaces missing values with the previous non-missing value vec_fill_missing(x, direction = "down") # To also fill leading missing values, use `"downup"` vec_fill_missing(x, direction = "downup") # Limit the number of sequential missing values to fill with `max_fill` vec_fill_missing(x, max_fill = 1) # Data frames are filled rowwise. Rows are only considered missing # if all elements of that row are missing. y <- c(1, NA, 2, NA, NA, 3, 4, NA, 5) df <- data_frame(x = x, y = y) df vec_fill_missing(df) } vctrs/man/vec_split.Rd0000644000176200001440000000240614276722575014470 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/split.R \name{vec_split} \alias{vec_split} \title{Split a vector into groups} \usage{ vec_split(x, by) } \arguments{ \item{x}{Vector to divide into groups.} \item{by}{Vector whose unique values defines the groups.} } \value{ A data frame with two columns and size equal to \code{vec_size(vec_unique(by))}. The \code{key} column has the same type as \code{by}, and the \code{val} column is a list containing elements of type \code{vec_ptype(x)}. Note for complex types, the default \code{data.frame} print method will be suboptimal, and you will want to coerce into a tibble to better understand the output. } \description{ This is a generalisation of \code{\link[=split]{split()}} that can split by any type of vector, not just factors. Instead of returning the keys in the character names, the are returned in a separate parallel vector. } \section{Dependencies}{ \itemize{ \item \code{\link[=vec_group_loc]{vec_group_loc()}} \item \code{\link[=vec_chop]{vec_chop()}} } } \examples{ vec_split(mtcars$cyl, mtcars$vs) vec_split(mtcars$cyl, mtcars[c("vs", "am")]) if (require("tibble")) { as_tibble(vec_split(mtcars$cyl, mtcars[c("vs", "am")])) as_tibble(vec_split(mtcars, mtcars[c("vs", "am")])) } } vctrs/man/vec_type.Rd0000644000176200001440000000150214276722575014312 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vctrs-deprecated.R \name{vec_type} \alias{vec_type} \alias{vec_type_common} \alias{vec_type2} \title{Deprecated type functions} \usage{ vec_type(x) vec_type_common(..., .ptype = NULL) vec_type2(x, y, ...) } \arguments{ \item{x, y, ..., .ptype}{Arguments for deprecated functions.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} These functions have been renamed: \itemize{ \item \code{vec_type()} => \code{\link[=vec_ptype]{vec_ptype()}} \item \code{vec_type2()} => \code{\link[=vec_ptype2]{vec_ptype2()}} \item \code{vec_type_common()} => \code{\link[=vec_ptype_common]{vec_ptype_common()}} } } \keyword{internal} vctrs/DESCRIPTION0000644000176200001440000000342515157552632013140 0ustar liggesusersPackage: vctrs Title: Vector Helpers Version: 0.7.2 Authors@R: c( person("Hadley", "Wickham", , "hadley@posit.co", role = "aut"), person("Lionel", "Henry", , "lionel@posit.co", role = "aut"), person("Davis", "Vaughan", , "davis@posit.co", role = c("aut", "cre")), person("data.table team", role = "cph", comment = "Radix sort based on data.table's forder() and their contribution to R's order()"), person("Posit Software, PBC", role = c("cph", "fnd")) ) Description: Defines new notions of prototype and size that are used to provide tools for consistent and well-founded type-coercion and size-recycling, and are in turn connected to ideas of type- and size-stability useful for analysing function interfaces. License: MIT + file LICENSE URL: https://vctrs.r-lib.org/, https://github.com/r-lib/vctrs BugReports: https://github.com/r-lib/vctrs/issues Depends: R (>= 4.0.0) Imports: cli (>= 3.4.0), glue, lifecycle (>= 1.0.3), rlang (>= 1.1.7) Suggests: bit64, covr, crayon, dplyr (>= 0.8.5), generics, knitr, pillar (>= 1.4.4), pkgdown (>= 2.0.1), rmarkdown, testthat (>= 3.0.0), tibble (>= 3.1.3), waldo (>= 0.2.0), withr, xml2, zeallot VignetteBuilder: knitr Config/build/compilation-database: true Config/Needs/website: tidyverse/tidytemplate Config/testthat/edition: 3 Config/testthat/parallel: true Encoding: UTF-8 Language: en-GB RoxygenNote: 7.3.3 NeedsCompilation: yes Packaged: 2026-03-20 19:53:17 UTC; davis Author: Hadley Wickham [aut], Lionel Henry [aut], Davis Vaughan [aut, cre], data.table team [cph] (Radix sort based on data.table's forder() and their contribution to R's order()), Posit Software, PBC [cph, fnd] Maintainer: Davis Vaughan Repository: CRAN Date/Publication: 2026-03-21 17:30:02 UTC