bit64/0000755000176200001440000000000014742226407011212 5ustar liggesusersbit64/tests/0000755000176200001440000000000014705122715012350 5ustar liggesusersbit64/tests/testthat/0000755000176200001440000000000014742226407014214 5ustar liggesusersbit64/tests/testthat/test-matrix64.R0000644000176200001440000000505414705122715016772 0ustar liggesuserstest_that("colSums and rowSums work on simple integer64 input", { A = array(seq_len(120L), dim = 2:5) A64 = array64(A, dim=dim(A)) # matches the behavior of sum.integer64 to not become numeric expect_s3_class(rowSums(A64), "integer64") expect_s3_class(colSums(A64), "integer64") expect_int_32_64_equivalent(rowSums(A)) expect_int_32_64_equivalent(rowSums(A, dims=2L)) expect_int_32_64_equivalent(rowSums(A, dims=3L)) expect_int_32_64_equivalent(colSums(A)) expect_int_32_64_equivalent(colSums(A, dims=2L)) expect_int_32_64_equivalent(colSums(A, dims=3L)) skip_if_not_r_version("4.0.0") # named args in stopifnot() unsupported -> different error expect_error( rowSums(A64, dims=4L), "dims= should be a length-1 integer", fixed = TRUE ) expect_error( colSums(A64, dims=4L), "dims= should be a length-1 integer", fixed = TRUE ) }) test_that("colSums and rowSums work in presence of missing", { A = array(seq_len(120L), dim = 2:5) A[1L, 1L, 1L, 1L] = NA_integer_ expect_int_32_64_equivalent(rowSums(A)) expect_int_32_64_equivalent(rowSums(A, dims=2L)) expect_int_32_64_equivalent(rowSums(A, dims=3L)) expect_int_32_64_equivalent(colSums(A)) expect_int_32_64_equivalent(colSums(A, dims=2L)) expect_int_32_64_equivalent(colSums(A, dims=3L)) expect_int_32_64_equivalent(rowSums(A, na.rm=TRUE)) expect_int_32_64_equivalent(rowSums(A, na.rm=TRUE, dims=2L)) expect_int_32_64_equivalent(rowSums(A, na.rm=TRUE, dims=3L)) expect_int_32_64_equivalent(colSums(A, na.rm=TRUE)) expect_int_32_64_equivalent(colSums(A, na.rm=TRUE, dims=2L)) expect_int_32_64_equivalent(colSums(A, na.rm=TRUE, dims=3L)) }) test_that("All-missing inputs are handled correctly by colSums and rowSums", { A64 = matrix64(rep(NA_integer64_, 6L), nrow=3L, ncol=2L) expect_identical(rowSums(A64), rep(NA_integer64_, 3L)) expect_identical(colSums(A64), rep(NA_integer64_, 2L)) }) test_that("out-of-integer-range inputs are handled correctly", { A64 = matrix64(2.0^(30:35), nrow=3L, ncol=2L) expect_identical(rowSums(A64), as.integer64(2L^30L*c(1L+8L, 2L+16L, 4L+32L))) expect_identical(colSums(A64), as.integer64(2L^30L*c(1L+2L+4L, 8L+16L+32L))) }) test_that("aperm works in simple cases", { # example from ?aperm A = array64(1:24, 2:4) B = aperm(A, c(2L, 1L, 3L)) # ignore class: t() gives 'array', not easy to delete it/add it to A[...] expect_identical(t(B[,, 2L]), A[,, 2L], ignore_attr="class") expect_identical(t(B[,, 3L]), A[,, 3L], ignore_attr="class") expect_identical(t(B[,, 4L]), A[,, 4L], ignore_attr="class") }) bit64/tests/testthat/test-hash64.R0000644000176200001440000000127414742210225016404 0ustar liggesuserstest_that("runif64 behaves as expected", { withr::local_seed(3478) expect_identical( runif64(10L), as.integer64(c( "6312937654860439830", "5047107428523623805", "7829389831893364707", "-3641910282010306573", "4600438248413496767", "4871064969903669683", "2693636032523872093", "4503042760826424596", "-8860474785465525016", "-4614238549190155011" )) ) expect_identical( runif64(5L, 10L, 20L), as.integer64(c(16L, 19L, 16L, 15L, 20L)) ) # large enough number to be confident the test isn't "randomly" succeeding, # but not so large as to noticeably slow down the suite. x = runif64(100000L, -5L, 5L) expect_true(all(x >= -5L & x <= 5L)) }) bit64/tests/testthat/test-bit64-package.R0000644000176200001440000003677414705122715017652 0ustar liggesusers# These tests were previously kept as tests under \examples{\dontshow{...}}. # Converted to "proper" unit tests for clarity, after making them more # canonical within {testthat}, e.g. better capturing expected warnings, # changing stopifnot(identical(...)) to expect_identical(...). test_that("identical.integer64", { i64 = NA_real_ class(i64) = "integer64" expect_identical(unclass(i64 - 1.0), unclass(i64 + 1.0)) expect_identical(i64 - 1.0, i64 + 1.0) expect_false(identical.integer64(i64 - 1.0, i64 + 1.0)) }) test_that("dispatch of 'c' method", { expect_true(identical.integer64(c(integer64(0L), NA), as.integer64(NA))) }) test_that("Dispatch on the second argument fails and we want to be notified once that changes", { expect_false(identical.integer64(c(NA, integer64(0L)), as.integer64(NA))) }) test_that("Minus and plus", { d64 = c( -.Machine$double.base^.Machine$double.digits, -.Machine$integer.max, -1.0, 0.0, 1.0, .Machine$integer.max, .Machine$double.base^.Machine$double.digits ) i64 = as.integer64(d64) expect_true(identical.integer64(i64 - 1.0 + 1.0, i64)) expect_true(identical.integer64(i64 + 1.0 - 1.0, i64)) }) test_that("Minus and plus edge cases and 'rev'", { # UBSAN signed integer overflow expected for type 'long long int' # This is a false UBSAN alarm because overflow is detected and NA returned expect_warning( expect_true( identical.integer64(lim.integer64() + 1.0 - 1.0, c(lim.integer64()[1L], NA)) ), "NAs produced by integer64 overflow", fixed = TRUE ) expect_warning( expect_true( identical.integer64(rev(lim.integer64()) - 1.0 + 1.0, c(lim.integer64()[2L], NA)) ), "NAs produced by integer64 overflow", fixed = TRUE ) }) test_that("'range.integer64', multiplication, integer division, sqrt, power, and log", { i64 = integer64(63L) i64[1L] = 1.0 for (i in 2:63) i64[i] = 2.0 * i64[i-1L] expect_true(identical.integer64(i64 * rev(i64), rep(i64[63L], 63L))) for (i in 63:2) i64[i-1L] = i64[i] %/% 2.0 expect_true(identical.integer64(i64 * rev(i64), rep(i64[63L], 63L))) for (i in 63:2) i64[i-1L] = i64[i] / 2.0 expect_true(identical.integer64(i64 * rev(i64), rep(i64[63L], 63L))) expect_true(identical.integer64( c( -i64[63L] - (i64[63L] - 1.0), i64[63L] + (i64[63L] - 1.0) ), lim.integer64() )) expect_true(identical.integer64(i64[-1L] %/%2.0 * as.integer64(2L), i64[-1L])) expect_true(identical.integer64(i64[-1L] %/%2L * as.integer64(2L), i64[-1L])) expect_true(identical.integer64(i64[-1L] / 2.0 * as.integer64(2L), i64[-1L])) expect_true(identical.integer64(i64[-1L] / 2.0 * as.integer64(2L), i64[-1L])) expect_true(identical.integer64(i64[-63L] * 2.0 %/% 2.0, i64[-63L])) expect_true(identical.integer64(i64[-63L] * 2L %/% 2L, i64[-63L])) expect_true(identical.integer64(as.integer64(i64[-63L] * 2.0 / 2.0), i64[-63L])) expect_true(identical.integer64(as.integer64(i64[-63L] * 2L / 2L), i64[-63L])) expect_true(identical.integer64( as.integer64(sqrt( i64[-1L][c(FALSE, TRUE)]) * sqrt(i64[-1L][c(FALSE, TRUE)] )), i64[-1L][c(FALSE, TRUE)] )) expect_true(identical.integer64(as.integer64(2L) ^ (0:62), i64)) expect_true(identical.integer64(as.integer64(0:62), as.integer64(round(log2(i64))))) expect_true(identical.integer64( as.integer64(round(log(as.integer64(2L)^(0:62), 2.0))), as.integer64(0:62) )) expect_true(identical.integer64( as.integer64(round(log(as.integer64(3L)^(0:39), 3.0))), as.integer64(0:39) )) expect_true(identical.integer64( as.integer64(round(log(as.integer64(10L)^(0:18), 10.0))), as.integer64(0:18) )) expect_true(identical.integer64( as.integer64(round(log10(as.integer64(10L)^(0:18)))), as.integer64(0:18) )) expect_true(identical.integer64( (as.integer64(2L)^(1:62))^(1.0/1:62), as.integer64(rep(2.0, 62L)) )) expect_true(identical.integer64( (as.integer64(3L)^(1:39))^(1.0/1:39), as.integer64(rep(3.0, 39L)) )) expect_true(identical.integer64( (as.integer64(10L)^(1:18))^(1.0/1:18), as.integer64(rep(10.0, 18L)) )) }) test_that("c and rep", { expect_true(identical.integer64( as.integer64(rep(1:3, 1:3)), rep(as.integer64(1:3), 1:3) )) expect_true(identical.integer64( as.integer64(rep(1:3, 3L)), rep(as.integer64(1:3), 3L) )) x = rep(NA_real_, 3L) class(x) = "integer64" x = x + -1:1 expect_true(identical.integer64(rep(x, 3L), c(x, x, x))) expect_true(identical.integer64( c.integer64(list(x, x, x), recursive=TRUE), c(x, x, x) )) }) test_that("seq", { expect_true(identical.integer64( seq(as.integer64(1L), 10.0, 2.0), as.integer64(seq(1.0, 10.0, 2.0)) )) expect_true(identical.integer64( seq(as.integer64(1L), by=2.0, length.out=5.0), as.integer64(seq(1.0, by=2.0, length.out=5.0)) )) expect_true(identical.integer64( seq(as.integer64(1L), by=2.0, length.out=6.0), as.integer64(seq(1.0, by=2.0, length.out=6.0)) )) expect_true(identical.integer64( seq.integer64(along.with=3:5), as.integer64(seq(along.with=3:5)) )) expect_true(identical.integer64( seq(as.integer64(1L), to=-9.0), as.integer64(seq(1.0, to=-9.0)) )) }) test_that("cbind and rbind", { x = rep(as.integer64(1:3), 2L) dim(x) = c(3L, 2L) expect_true(identical.integer64(cbind(as.integer64(1:3), 1:3), x)) expect_true(identical.integer64(rbind(as.integer64(1:3), 1:3), t(x))) }) test_that("Coercion", { expect_identical( as.double(as.integer64(c(NA, seq(0.0, 9.0, 0.25)))), as.double(as.integer(c(NA, seq(0.0, 9.0, 0.25)))) ) expect_identical( as.character(as.integer64(c(NA, seq(0.0, 9.0, 0.25)))), as.character(as.integer(c(NA, seq(0.0, 9.0, 0.25)))) ) expect_identical( as.integer(as.integer64(c(NA, seq(0.0, 9.0, 0.25)))), as.integer(c(NA, seq(0.0, 9.0, 0.25))) ) expect_identical( as.logical(as.integer64(c(NA, seq(0.0, 9.0, 0.25)))), as.logical(as.integer(c(NA, seq(0.0, 9.0, 0.25)))) ) expect_identical( as.integer(as.integer64(c(NA, FALSE, TRUE))), as.integer(c(NA, FALSE, TRUE)) ) expect_identical( as.integer64(as.integer(as.integer64(-9:9))), as.integer64(-9:9) ) expect_identical( as.integer64(as.double(as.integer64(-9:9))), as.integer64(-9:9) ) expect_identical( as.integer64(as.character(as.integer64(-9:9))), as.integer64(-9:9) ) expect_identical( as.integer64(as.character(lim.integer64())), lim.integer64() ) }) test_that("Logical operators", { expect_true(identical.integer64( !c(NA, -1:1), !c(as.integer64(NA), -1:1) )) xi = rep(c(NA, -1:1), 4L) xi64 = as.integer64(xi) yi = rep(c(NA, -1:1), each=4L) yi64 = as.integer64(yi) expect_true(identical.integer64(xi64 & yi64, xi & yi)) expect_true(identical.integer64(xi64 | yi64, xi | yi)) expect_true(identical.integer64(xor(xi64, yi64), xor(xi, yi))) }) test_that("Comparison operators", { xi = rep(c(NA, -1:1), 4L) xi64 = as.integer64(xi) yi = rep(c(NA, -1:1), each=4L) yi64 = as.integer64(yi) expect_true(identical.integer64(xi64 == yi64, xi == yi)) expect_true(identical.integer64(xi64 != yi64, xi != yi)) expect_true(identical.integer64(xi64 > yi64, xi > yi)) expect_true(identical.integer64(xi64 >= yi64, xi >= yi)) expect_true(identical.integer64(xi64 < yi64, xi < yi)) expect_true(identical.integer64(xi64 <= yi64, xi <= yi)) }) test_that("Vector functions", { xi = c(NA, -1:1) xi64 = as.integer64(xi) expect_true(identical.integer64(is.na(xi64), is.na(xi))) expect_true(identical.integer64(format(xi64), format(xi))) expect_true(identical.integer64(abs(xi64), as.integer64(abs(xi)))) expect_true(identical.integer64(sign(xi64), as.integer64(sign(xi)))) expect_true(identical.integer64(ceiling(xi64), as.integer64(ceiling(xi)))) expect_true(identical.integer64(floor(xi64), as.integer64(floor(xi)))) expect_true(identical.integer64(trunc(xi64), as.integer64(trunc(xi)))) expect_true(identical.integer64(signif(xi64), xi64)) }) test_that("Summary functions", { expect_identical(all(as.integer64(1L)), all(1L)) expect_identical(all(as.integer64(0L)), all(0L)) expect_identical(all(NA_integer64_), all(NA_integer_)) expect_identical(all(NA_integer64_, na.rm=TRUE), all(NA_integer_, na.rm=TRUE)) expect_identical(all(as.integer64(1L), NA), all(1L, NA)) expect_identical(all(as.integer64(0L), NA), all(0L, NA)) expect_identical(all(as.integer64(1L), NA, na.rm=TRUE), all(1L, NA, na.rm=TRUE)) expect_identical(all(as.integer64(0L), NA, na.rm=TRUE), all(0L, NA, na.rm=TRUE)) expect_identical(all(as.integer64(c(1L, NA))), all(c(1L, NA_integer_))) expect_identical(all(as.integer64(c(0L, NA))), all(c(0L, NA_integer_))) expect_identical(all(as.integer64(c(1L, NA)), na.rm=TRUE), all(c(1L, NA_integer_), na.rm=TRUE)) expect_identical(all(as.integer64(c(0L, NA)), na.rm=TRUE), all(c(0L, NA_integer_), na.rm=TRUE)) expect_identical(any(as.integer64(1L)), any(1L)) expect_identical(any(as.integer64(0L)), any(0L)) expect_identical(any(NA_integer64_), any(NA_integer_)) expect_identical(any(NA_integer64_, na.rm=TRUE), any(NA_integer_, na.rm=TRUE)) expect_identical(any(as.integer64(1L), NA), any(1L, NA)) expect_identical(any(as.integer64(0L), NA), any(0L, NA)) expect_identical(any(as.integer64(1L), NA, na.rm=TRUE), any(1L, NA, na.rm=TRUE)) expect_identical(any(as.integer64(0L), NA, na.rm=TRUE), any(0L, NA, na.rm=TRUE)) expect_identical(any(as.integer64(c(1L, NA))), any(c(1L, NA_integer_))) expect_identical(any(as.integer64(c(0L, NA))), any(c(0L, NA_integer_))) expect_identical(any(as.integer64(c(1L, NA)), na.rm=TRUE), any(c(1L, NA_integer_), na.rm=TRUE)) expect_identical(any(as.integer64(c(0L, NA)), na.rm=TRUE), any(c(0L, NA_integer_), na.rm=TRUE)) xd = c(2.0, 3.0, NA) xi64 = as.integer64(xd) expect_true(identical.integer64( as.integer64(sum(xd)), sum(xi64) )) expect_true(identical.integer64( as.integer64(sum(xd, na.rm=TRUE)), sum(xi64, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(sum(xd)), sum(xi64) )) expect_true(identical.integer64( as.integer64(sum(xd, na.rm=TRUE)), sum(xi64, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(sum(2.0, 3.0, NA)), sum(as.integer64(2L), 3.0, NA) )) expect_true(identical.integer64( as.integer64(sum(2.0, 3.0, NA, na.rm=TRUE)), sum(as.integer64(2L), 3.0, NA, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(sum(2.0, 3.0, NA)), sum(as.integer64(2L), 3.0, NA) )) expect_true(identical.integer64( as.integer64(sum(2.0, 3.0, NA, na.rm=TRUE)), sum(as.integer64(2L), 3.0, NA, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(prod(xd)), prod(xi64) )) expect_true(identical.integer64( as.integer64(prod(xd, na.rm=TRUE)), prod(xi64, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(prod(xd)), prod(xi64) )) expect_true(identical.integer64( as.integer64(prod(xd, na.rm=TRUE)), prod(xi64, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(prod(2.0, 3.0, NA)), prod(as.integer64(2L), 3.0, NA) )) expect_true(identical.integer64( as.integer64(prod(2.0, 3.0, NA, na.rm=TRUE)), prod(as.integer64(2L), 3.0, NA, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(prod(2.0, 3.0, NA)), prod(as.integer64(2L), 3.0, NA) )) expect_true(identical.integer64( as.integer64(prod(2.0, 3.0, NA, na.rm=TRUE)), prod(as.integer64(2L), 3.0, NA, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(min(xd)), min(xi64) )) expect_true(identical.integer64( as.integer64(min(xd, na.rm=TRUE)), min(xi64, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(min(xd)), min(xi64) )) expect_true(identical.integer64( as.integer64(min(xd, na.rm=TRUE)), min(xi64, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(min(2.0, 3.0, NA)), min(as.integer64(2L), 3.0, NA) )) expect_warning( expect_warning( expect_true(identical.integer64( as.integer64(min(2.0, 3.0, NA, na.rm=TRUE)), min(as.integer64(2L), 3.0, NA, na.rm=TRUE) )), "no non-missing arguments to min; returning Inf", fixed = TRUE ), "NAs produced by integer64 overflow", fixed = TRUE ) expect_true(identical.integer64( as.integer64(min(2.0, 3.0, NA)), min(as.integer64(2L), 3.0, NA) )) expect_warning( expect_warning( expect_true(identical.integer64( as.integer64(min(2.0, 3.0, NA, na.rm=TRUE)), min(as.integer64(2L), 3.0, NA, na.rm=TRUE) )), "no non-missing arguments to min; returning Inf", fixed = TRUE ), "NAs produced by integer64 overflow", fixed = TRUE ) expect_true(identical.integer64( as.integer64(max(xd)), max(xi64) )) expect_true(identical.integer64( as.integer64(max(xd, na.rm=TRUE)), max(xi64, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(max(xd)), max(xi64) )) expect_true(identical.integer64( as.integer64(max(xd, na.rm=TRUE)), max(xi64, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(max(2.0, 3.0, NA)), max(as.integer64(2L), 3.0, NA) )) expect_warning( expect_warning( expect_true(identical.integer64( as.integer64(max(2.0, 3.0, NA, na.rm=TRUE)), max(as.integer64(2L), 3.0, NA, na.rm=TRUE) )), "no non-missing arguments to max; returning -Inf", fixed = TRUE ), "NAs produced by integer64 overflow", fixed = TRUE ) expect_true(identical.integer64( as.integer64(max(2.0, 3.0, NA)), max(as.integer64(2L), 3.0, NA) )) expect_warning( expect_warning( expect_true(identical.integer64( as.integer64(max(2.0, 3.0, NA, na.rm=TRUE)), max(as.integer64(2L), 3.0, NA, na.rm=TRUE) )), "no non-missing arguments to max; returning -Inf", fixed = TRUE ), "NAs produced by integer64 overflow", fixed = TRUE ) expect_true(identical.integer64( as.integer64(range(xd)), range(xi64) )) expect_true(identical.integer64( as.integer64(range(xd, na.rm=TRUE)), range(xi64, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(range(xd)), range(xi64) )) expect_true(identical.integer64( as.integer64(range(xd, na.rm=TRUE)), range(xi64, na.rm=TRUE) )) expect_true(identical.integer64( as.integer64(range(2.0, 3.0, NA)), range(as.integer64(2L), 3.0, NA) )) expect_warning( expect_warning( expect_warning( expect_true(identical.integer64( as.integer64(range(2.0, 3.0, NA, na.rm=TRUE)), range(as.integer64(2L), 3.0, NA, na.rm=TRUE) )), "no non-missing arguments to max; returning -Inf", fixed = TRUE ), "no non-missing arguments to min; returning Inf", fixed = TRUE ), "NAs produced by integer64 overflow", fixed = TRUE ) }) test_that("Cumulative functions", { xd = c(2.0, 3.0, NA, 1.0, 4.0) xi64 = as.integer64(xd) expect_true(identical.integer64(cumsum(xi64), as.integer64(cumsum(xd)))) expect_true(identical.integer64(cumprod(xi64), as.integer64(cumprod(xd)))) expect_true(identical.integer64(cummin(xi64), as.integer64(cummin(xd)))) expect_true(identical.integer64(cummax(xi64), as.integer64(cummax(xd)))) }) test_that("diff", { d64 = diffinv(rep(.Machine$integer.max, 100L), lag=2.0, differences=2L) i64 = as.integer64(d64) expect_identical( diff(d64, lag=2L, differences=2L), as.double(diff(i64, lag=2L, differences=2L)) ) }) bit64/tests/testthat/test-highlevel64.R0000644000176200001440000001731014705642333017436 0ustar liggesuserstest_that("match & %in% basics work", { x = as.integer64(2:5) y = as.integer64(3:6) expect_identical(match(x, y), c(NA, 1:3)) expect_identical(match(y, x), c(2:4, NA)) expect_identical(match(2:5, y), c(NA, 1:3)) expect_identical(match(as.numeric(2:5), y), c(NA, 1:3)) expect_identical(match(y, 2:5), c(2:4, NA)) expect_identical(match(y, as.numeric(2:5)), c(2:4, NA)) expect_identical(match(x, y, nomatch=0L), 0:3) expect_identical(x %in% y, c(FALSE, TRUE, TRUE, TRUE)) expect_identical(y %in% x, c(TRUE, TRUE, TRUE, FALSE)) expect_identical(x %in% 3:6, c(FALSE, TRUE, TRUE, TRUE)) expect_identical(x %in% c(3.0, 4.0, 5.0, 6.0), c(FALSE, TRUE, TRUE, TRUE)) }) test_that("Different method= for match() and %in% work", { x = as.integer64(2:5) y = as.integer64(3:6) expected = c(NA_integer_, 1:3) expect_identical(match(x, y, method="hashpos"), expected) expect_identical(match(x, y, method="hashrev"), expected) expect_identical(match(x, y, method="sortorderpos"), expected) expect_error(match(x, y, method="_unknown_"), "unknown method _unknown_", fixed=TRUE) # TODO(#58): Fix this, currently fails. # expect_identical(match(x, y, method="orderpos"), expected) # NB: %in% is quite a bit different; while there's a public API to # `%in%.integer64`, likely, there shouldn't be (it's strange to export # an S3 method like is currently done). The tests are designed to tickle # the different methods through the public API only; this makes them # prone to winding up testing something totally different later. I think # that's fine; now that we have coverage tests up, any refactor that bumps # around what exactly the following tests are covering, will show up in the PR. # method="hashrin" used when x is "short" but table is "long" x = as.integer64(seq_len(10L)) table = as.integer64(seq_len(2.0**16.0 * 2.0/3.0 + 10.0)) # invert condition for bx>=16, 10.0 arbitrary buffer expect_identical(x %in% table, rep(TRUE, 10L)) }) # TODO(#59): Don't call table.integer64() directly. test_that("duplicated, unique, table methods work", { x = as.integer64(1:3) expect_identical(duplicated(x), rep(FALSE, 3L)) expect_identical(unique(x), x) expect_identical(table.integer64(x), table(x = 1:3)) x = as.integer64(rep(1L, 3L)) expect_identical(duplicated(x), c(FALSE, TRUE, TRUE)) expect_identical(unique(x), x[1L]) expect_identical(table.integer64(x), table(x = rep(1L, 3L))) x = as.integer64(c(1L, 2L, 1L)) expect_identical(duplicated(x), c(FALSE, FALSE, TRUE)) expect_identical(unique(x), x[1:2]) expect_identical(table.integer64(x), table(x = c(1L, 2L, 1L))) x = as.integer64(c(1L, 1L, 2L)) expect_identical(duplicated(x), c(FALSE, TRUE, FALSE)) expect_identical(unique(x), x[c(1L, 3L)]) expect_identical(table.integer64(x), table(x = c(1L, 1L, 2L))) expect_error(duplicated(x, method="_unknown_"), "unknown method _unknown_", fixed=TRUE) expect_error(unique(x, method="_unknown_"), "unknown method _unknown_", fixed=TRUE) }) test_that("different method= for duplicated, unique work", { x = as.integer64(c(1L, 2L, 1L)) exp_dup = c(FALSE, FALSE, TRUE) exp_unq = x[1:2] expect_identical(duplicated(x, method="hashdup"), exp_dup) expect_identical(unique(x, method="hashmapuni"), exp_unq) expect_identical(unique(x, method="hashuni"), exp_unq) expect_identical(duplicated(x, method="sortorderdup"), exp_dup) expect_identical(unique(x, method="sortorderuni"), exp_unq) expect_identical(unique(x, method="sortuni"), exp_unq) # TODO(#58): Fix this, currently fails. # expect_identical(duplicated(x, method="orderdup"), exp_dup) expect_identical(unique(x, method="orderuni"), exp_unq) }) test_that("more coercion works", { expect_identical(as.factor(as.integer64(2:4)), factor(2:4)) expect_identical(as.ordered(as.integer64(2:4)), as.ordered(2:4)) expect_identical(as.integer64(factor(2:11)), as.integer64(1:10)) # NB: _not_ 2:11! }) test_that("sorting methods work", { x = as.integer64(c(10L, 4L, 8L)) x_rank = c(3.0, 1.0, 2.0) expect_identical(rank(x), x_rank) expect_identical(rank(x, method="orderrnk"), x_rank) x = as.integer64(1:100) q = as.integer64(c(1L, 26L, 50L, 75L, 100L)) expect_identical(quantile(x, names=FALSE), q) expect_identical(median(x), q[3L]) names(q) = c('0%', '25%', '50%', '75%', '100%') expect_identical(quantile(x), q) expect_identical(quantile(x, 0.2, names=FALSE), as.integer64(21L)) expect_error(quantile(x, type=7L), "only.*qtile.*supported") expect_error(median(NA_integer64_), "missing values not allowed") expect_error(quantile(NA_integer64_), "missing values not allowed") x = as.integer64(1:100) q = as.integer64(c(1L, 26L, 50L, 75L, 100L)) names(q) = c('0%', '25%', '50%', '75%', '100%') expect_identical(qtile(x, method="sortqtl"), q) expect_identical(qtile(x, method="orderqtl"), q) x = as.integer64(c(1L, 1L, 2L, 3L, 2L, 4L)) x_tiepos = c(1L, 2L, 3L, 5L) expect_identical(tiepos(x), x_tiepos) expect_identical(tiepos(x, method="ordertie"), x_tiepos) expect_error(rank(x, method="_unknown_"), "unknown method _unknown_", fixed=TRUE) expect_error(qtile(x, method="_unknown_"), "unknown method _unknown_", fixed=TRUE) expect_error(tiepos(x, method="_unknown_"), "unknown method _unknown_", fixed=TRUE) }) # These tests were previously kept as tests under \examples{\dontshow{...}}. # Converted to "proper" unit tests for clarity, after making them more # canonical within {testthat}, e.g. better capturing expected warnings, # changing stopifnot(identical(...)) to expect_identical(...). test_that("Old \\dontshow{} tests continue working", { xi = c(1L, 1L, 2L) xi64 = as.integer64(xi) yi = c(3L, 4L, 4L) yi64 = as.integer64(yi) t_xi = table(x=xi) t_xi_yi = table(x=xi, y=yi) expect_identical(table.integer64(x=xi64), t_xi) expect_identical(table.integer64(x=xi64, y=yi64), t_xi_yi) expect_warning( expect_identical(table.integer64(x=xi), t_xi), "coercing first argument to integer64", fixed = TRUE ) expect_warning( expect_identical(table.integer64(x=xi64, y=yi), t_xi_yi), "coercing argument 2 to integer64", fixed = TRUE ) expect_warning( expect_identical(table.integer64(x=xi, y=yi64), t_xi_yi), "coercing argument 1 to integer64", fixed = TRUE ) expect_identical(table(x=xi64), t_xi) expect_identical(table(x=xi64, y=yi64), t_xi_yi) expect_identical(table(x=xi64, y=yi), t_xi_yi) expect_identical(table(x=xi, y=yi64), t_xi_yi) }) test_that("unipos() works as intended", { x = as.integer64(c(1L, 2L, 1L, 3L, 2L, 4L)) x_unipos = c(1L, 2L, 4L, 6L) expect_identical(unipos(x), x_unipos) expect_identical(unipos(x, method="hashupo"), x_unipos) expect_identical(unipos(x, method="sortorderupo"), x_unipos) expect_identical(unipos(x, method="orderupo"), x_unipos) expect_error(unipos(x, method="_unknown_"), "unknown method _unknown_", fixed=TRUE) }) test_that("keypos() works as intended", { x = as.integer64(c(5L, 2L, 5L, 3L, 2L, 4L)) x_keypos = c(4L, 1L, 4L, 2L, 1L, 3L) expect_identical(keypos(x), x_keypos) expect_identical(keypos(x, method="orderkey"), x_keypos) expect_error(keypos(x, method="_unknown_"), "unknown method _unknown_", fixed=TRUE) }) test_that("summary() works as intended", { x = as.integer64(c(1L, 2L, 10L, 20L, NA, 30L)) # NB: as.integer64() strips names, so as.integer64(c(Min. = ...)) won't work x_summary = as.integer64(c(1L, 2L, 10L, 12L, 20L, 30L, 1L)) names(x_summary) = c("Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.", "NA's") expect_identical(summary(x), x_summary) expect_identical(summary(x[-5L]), x_summary[-7L]) }) test_that("prank() works as intended", { x = as.integer64(1:100) expect_identical(prank(x), (x-1.0)/99.0) expect_identical(prank(x[1L]), NA_integer64_) })bit64/tests/testthat/helper.R0000644000176200001440000000337314705122715015620 0ustar liggesusers# TODO(#45): use matrix() directly matrix64 = function(x, nrow=1L, ncol=1L, byrow=FALSE) { x = as.integer64(x) if (byrow) { dim(x) = c(ncol, nrow) t(x) } else { dim(x) = c(nrow, ncol) x } } array64 = function(x, dim) { x = as.integer64(x) dim(x) = dim x } # Test that 'expr' gives the same result whether # the input is integer or integer64, in the sense # of equivalence after casting between the types. # expr gets integer names converted to integer64, # retaining attributes (esp. for arrays), and # we test that the result of evaluating expr # is equivalent (after converting back to integer) # Starting with integer and casting to integer64 # guarantees representation, where casting integer64 # to integer might have to stipulate inputs must be # representable as integer. expect_int_32_64_equivalent <- function(expr) { # Capture the unevaluated expression esub = substitute(expr) evar = all.vars(esub) # replace all integer values in expr with integer64 equivalents # in a tailored environment parent_ = parent.frame() int64_env = new.env(parent = parent_) for (key in evar) { val = get(key, parent_) if (!is.integer(val)) next assign(key, as.integer64(val), envir=int64_env) attributes(int64_env[[key]]) = attributes(val) } int_result = eval(expr, parent_) int64_result = eval(expr, int64_env) int64_result_as_int = as(int64_result, typeof(int_result)) # ignore class (which includes integer64) a64 = attributes(int64_result) for (a in setdiff(names(a64), "class")) attr(int64_result_as_int, a) = a64[[a]] expect_identical(int64_result_as_int, int_result) } skip_if_not_r_version = function(ver) { skip_if(getRversion() < ver, paste("R version >=", ver, "required.")) } bit64/tests/testthat/test-patch64.R0000644000176200001440000000235114705122715016562 0ustar liggesuserstest_that("base generic overwrites work", { x = c(2L, 4L, 3L) expect_identical(rank(x), c(1.0, 3.0, 2.0)) expect_identical(order(x), c(1L, 3L, 2L)) }) # These tests were previously kept as tests under \examples{\dontshow{...}}. # Converted to "proper" unit tests for clarity, after making them more # canonical within {testthat}, e.g. better capturing expected warnings, # changing stopifnot(identical(...)) to expect_identical(...). test_that("Old \\dontshow{} tests continue working", { expect_identical(match(as.integer64(2L), as.integer64(0:3)), match(2L, 0:3)) expect_identical(as.integer64(2L) %in% as.integer64(0:3), 2L %in% 0:3) xi = c(1L, 1L, 2L) xi64 = as.integer64(xi) yi = c(3L, 4L, 4L) yi64 = as.integer64(yi) zi = c(1L, NA_integer_, 2L) zi64 = as.integer64(zi) expect_identical(unique(xi64), as.integer64(unique(xi))) expect_identical(rank(xi64), rank(xi)) expect_identical(table(x=xi64), table(x=xi)) expect_identical(table(x=xi64, y=yi64), table(x=xi, y=yi)) expect_identical(table(x=xi64, y=yi), table(x=xi, y=yi)) expect_identical(table(x=xi, y=yi64), table(x=xi, y=yi)) expect_identical(order(zi64), order(zi)) expect_identical(order(zi64, decreasing=TRUE), order(zi, decreasing=TRUE)) }) bit64/tests/testthat/test-sort64.R0000644000176200001440000000071214705122715016451 0ustar liggesuserstest_that("order basics work", { x = as.integer64(c(2L, 4L, 3L)) expect_identical(order(x), c(1L, 3L, 2L)) expect_identical(order(x, decreasing=TRUE), c(2L, 3L, 1L)) x = c(x, NA_integer64_) expect_identical(order(x), c(1L, 3L, 2L, 4L)) expect_identical(order(x, decreasing=TRUE), c(2L, 3L, 1L, 4L)) expect_identical(order(x, na.last=FALSE), c(4L, 1L, 3L, 2L)) expect_identical(order(x, na.last=FALSE, decreasing=TRUE), c(4L, 2L, 3L, 1L)) }) bit64/tests/testthat/test-integer64.R0000644000176200001440000002636214705642333017133 0ustar liggesuserstest_that("integer64 coercion to/from other types work", { # from integer64 expect_identical(as.logical(as.integer64(0:1)), c(FALSE, TRUE)) expect_identical(as.integer(as.integer64(1:10)), 1:10) expect_identical(as.character(as.integer64(1:10)), as.character(1:10)) expect_identical(as.double(as.integer64(1:10)), as.double(1:10)) expect_identical(as.numeric(as.integer64(1:10)), as.numeric(1:10)) # to integer64 expect_identical(as.integer64(TRUE), as.integer64(1L)) expect_identical(as.integer64(as.character(1:10)), as.integer64(1:10)) expect_identical(as.integer64(as.double(1:10)), as.integer64(1:10)) expect_identical(as.integer64(NULL), as.integer64()) x = as.integer64(1:10) expect_identical(as.integer64(x), x) # S4 version expect_identical(methods::as(as.character(1:10), "integer64"), as.integer64(1:10)) expect_identical(methods::as(as.integer64(1:10), "character"), as.character(1:10)) # now for NA expect_identical(as.logical(NA_integer64_), NA) expect_identical(as.integer(NA_integer64_), NA_integer_) expect_identical(as.double(NA_integer64_), NA_real_) expect_identical(as.character(NA_integer64_), NA_character_) expect_identical(as.integer64(NA), NA_integer64_) expect_identical(as.integer64(NA_integer_), NA_integer64_) expect_identical(as.integer64(NA_real_), NA_integer64_) expect_identical(as.integer64(NA_character_), NA_integer64_) }) test_that("S3 class basics work", { x = as.integer64(1:10) expect_s3_class(x, "integer64") expect_true(is.integer64(x)) length(x) = 11L expect_length(x, 11L) expect_identical(x[11L], as.integer64(0L)) }) test_that("indexing works", { x = as.integer64(1:10) x[1.0] = 2.0 x[2L] = 3L expect_identical(x, as.integer64(c(2:3, 3:10))) x[[1.0]] = 3.0 x[[2L]] = 4L expect_identical(x, as.integer64(c(3:4, 3:10))) expect_identical(x[3L], as.integer64(3L)) expect_identical(x[[4L]], as.integer64(4L)) names(x) = letters[1:10] expect_identical(x[c("b", "c")], x[2:3]) expect_identical(x[["d"]], x[[4L]]) }) test_that("arithmetic & basic math works", { x = as.integer64(1:10) y = as.integer64(10:1) expect_identical(x + y, as.integer64(rep(11L, 10L))) expect_identical(y - x, as.integer64(seq(9L, -9L, by=-2L))) expect_identical(x * y, as.integer64(c(10L, 18L, 24L, 28L, 30L, 30L, 28L, 24L, 18L, 10L))) # output is double even though it fits in integer [and integer64] expect_identical(x[seq(2L, 10L, by=2L)] / 2L, as.double(1:5)) expect_identical(x ^ 2L, as.integer64((1:10)^2L)) expect_identical(-x, as.integer64(-(1:10))) expect_identical(x %/% 2L, as.integer64(c(0L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L))) expect_identical(x %% 2L, as.integer64(rep_len(c(1L, 0L), 10L))) expect_identical(sign(x - 6L), as.integer64(rep(c(-1L, 0L, 1L), c(5L, 1L, 4L)))) expect_identical(abs(x - 6.0), as.integer64(c(5:0, 1:4))) expect_identical(sqrt(as.integer64(c(0L, 1L, 4L, 9L))), as.numeric(0:3)) expect_identical(log(x), log(as.numeric(x))) expect_identical(log(as.integer64(c(1L, 2L, 4L, 8L)), base=2L), as.numeric(0:3)) expect_identical(log2(as.integer64(c(1L, 2L, 4L, 8L))), as.numeric(0:3)) # TODO(#48): Improve the numerical precision here. expect_identical(log10(as.integer64(c(1L, 10L, 100L, 1000L))), as.numeric(0:3), tolerance=1e-7) expect_identical(trunc(x), x) expect_identical(floor(x), x) expect_identical(ceiling(x), x) expect_identical(signif(x), x) expect_identical(round(x), x) expect_identical(round(x, -1L), as.integer64(rep(c(0L, 10L), each=5L))) }) test_that("basic statistics work", { x = as.integer64(1:10) expect_identical(sum(x), as.integer64(55L)) expect_identical(sum(x, x), as.integer64(110L)) expect_identical(prod(x), as.integer64(factorial(10L))) expect_identical(prod(x[1:5], x[6:10]), as.integer64(factorial(10L))) expect_identical(min(x), x[1L]) expect_identical(min(x, as.integer64(0L)), as.integer64(0L)) expect_identical(max(x), x[10L]) expect_identical(max(x, as.integer64(11L)), as.integer64(11L)) expect_identical(range(x), x[c(1L, 10L)]) expect_identical(range(x, x+1L), c(x[1L], x[10L]+1L)) expect_identical(diff(x), as.integer64(rep(1L, 9L))) expect_identical(cummin(x), as.integer64(rep(1L, 10L))) expect_identical(cummax(x), x) expect_identical(cumsum(x), as.integer64(choose(2:11, 2L))) expect_identical(cumprod(x), as.integer64(factorial(1:10))) }) test_that("display methods work", { x = as.integer64(1:3) expect_identical(format(x), as.character(1:3)) expect_output(print(x), "integer64.*\\s*1\\s*2\\s*3") expect_output(print(x[0L]), "integer64(0)", fixed=TRUE) expect_output(str(x), "integer64 [1:3] 1 2 3", fixed=TRUE) }) test_that("vector builders of integer64 work", { x = as.integer64(1:3) expect_identical(c(x, FALSE), as.integer64(c(1:3, 0L))) expect_identical(c(x, 4:6), as.integer64(1:6)) expect_identical(c(x, 4.0, 5.0, 6.0), as.integer64(1:6)) expect_identical(c(x, as.integer64(4:6)), as.integer64(1:6)) expect_identical(cbind(x, FALSE), matrix64(c(1:3, 0L, 0L, 0L), nrow=3L, ncol=2L)) expect_identical(cbind(x, 4:6), matrix64(1:6, nrow=3L, ncol=2L)) expect_identical(cbind(x, 0.0), matrix64(c(1:3, 0L, 0L, 0L), nrow=3L, ncol=2L)) expect_identical(cbind(x, as.integer64(4:6)), matrix64(1:6, nrow=3L, ncol=2L)) expect_identical(rbind(x, FALSE), matrix64(c(1:3, 0L, 0L, 0L), nrow=2L, ncol=3L, byrow=TRUE)) expect_identical(rbind(x, 4:6), matrix64(1:6, nrow=2L, ncol=3L, byrow=TRUE)) expect_identical(rbind(x, 0.0), matrix64(c(1:3, 0L, 0L, 0L), nrow=2L, ncol=3L, byrow=TRUE)) expect_identical(rbind(x, as.integer64(4:6)), matrix64(1:6, nrow=2L, ncol=3L, byrow=TRUE)) expect_identical(rep(x, 2L), c(x, x)) expect_identical(rep(x, each=2L), as.integer64(c(1L, 1L, 2L, 2L, 3L, 3L))) expect_identical(x[1L]:x[3L], x) expect_identical(x[3L]:x[1L], x[3:1]) # rev() a separate method expect_identical(seq(x[1L], x[3L], by=1L), x) expect_identical(seq(x[1L], x[3L], by=x[1L]), x) expect_identical(seq(x[1L], to=10L, by=1L), as.integer64(1:10)) expect_identical(seq(x[1L], to=11L, by=2L), as.integer64(c(1L, 3L, 5L, 7L, 9L, 11L))) # TODO(#47): More tests when the behavior is corrected. }) # These tests were previously kept as tests under \examples{\dontshow{...}}. # Converted to "proper" unit tests for clarity, after making them more # canonical within {testthat}, e.g. better capturing expected warnings, # changing stopifnot(identical(...)) to expect_identical(...). test_that("Old \\dontshow{} tests in ?format.integer64 continue working", { i <- -999:999 for (s in -3:3){ r <- as.integer64(round(as.integer(i), s)) r64 <- round(as.integer64(i), s) expect_identical(r, r64) } }) test_that("Old \\dontshow{} tests in ?extract.replace.integer64 continue working", { r <- c(runif64(1000L, lim.integer64()[1L], lim.integer64()[2L]), NA, -2:2) expect_identical(r, as.integer64(as.bitstring(r))) }) test_that("empty inputs give empty outputs for arithmetic", { x = integer64(1L) y = integer64(0L) expect_identical(x+y, integer64()) expect_identical(y+x, integer64()) expect_identical(x-y, integer64()) expect_identical(y-x, integer64()) expect_identical(+y, integer64()) expect_identical(-y, integer64()) expect_identical(x*y, integer64()) expect_identical(y*x, integer64()) expect_identical(x/y, double()) expect_identical(y/x, double()) expect_identical(x^y, integer64()) expect_identical(y^x, integer64()) expect_identical(x %/% y, integer64()) expect_identical(y %/% x, integer64()) expect_identical(x%%y, integer64()) expect_identical(y%%x, integer64()) expect_identical(log(x, base=y), double()) # TODO(#93): don't suppress this warning which is inconsistent with integer() expect_identical(suppressWarnings(log(y, base=x)), double()) expect_identical(x==y, logical()) expect_identical(y==x, logical()) expect_identical(x!=y, logical()) expect_identical(y!=x, logical()) expect_identical(x>=y, logical()) expect_identical(y>=x, logical()) expect_identical(x<=y, logical()) expect_identical(y<=x, logical()) expect_identical(x>y, logical()) expect_identical(y>x, logical()) expect_identical(x 0L } s <- clone(x) na.count <- ramsort(s, has.na = has.na, na.last = FALSE, decreasing = FALSE, stable = FALSE, optimize = "time") nut <- .Call(C_r_ram_integer64_sortnut, x = s, PACKAGE = "bit64") setcache(x, "sort", s) setcache(x, "na.count", na.count) setcache(x, "nunique", nut[[1L]]) setcache(x, "nties", nut[[2L]]) invisible(x) } #' @rdname hashcache #' @param stable boolean scalar defining whether stable sorting is needed. Allowing #' non-stable may speed-up. #' @export sortordercache <- function(x, has.na = NULL, stable = NULL){ if (is.null(has.na)){ na.count <- getcache(x, "na.count") if (is.null(na.count)) has.na <- TRUE else has.na <- na.count > 0L } if (is.null(stable)){ nunique <- getcache(x, "nunique") if (is.null(nunique)) stable <- TRUE else stable <- nunique < length(x) } s <- clone(x) o <- seq_along(x) na.count <- ramsortorder(s, o, has.na = has.na, na.last = FALSE, decreasing = FALSE, stable = stable, optimize = "time") nut <- .Call(C_r_ram_integer64_sortnut, x = s, PACKAGE = "bit64") setcache(x, "sort", s) setcache(x, "order", o) setcache(x, "na.count", na.count) setcache(x, "nunique", nut[[1L]]) setcache(x, "nties", nut[[2L]]) invisible(x) } #' @rdname hashcache #' @param optimize by default ramsort optimizes for 'time' which requires more RAM, #' set to 'memory' to minimize RAM requirements and sacrifice speed. #' @export ordercache <- function(x, has.na = NULL, stable = NULL, optimize = "time"){ if (is.null(has.na)){ na.count <- getcache(x, "na.count") if (is.null(na.count)) has.na <- TRUE else has.na <- na.count > 0L } if (is.null(stable)){ nunique <- getcache(x, "nunique") if (is.null(nunique)) stable <- TRUE else stable <- nunique < length(x) } o <- seq_along(x) na.count <- ramorder(x, o, has.na = has.na, na.last = FALSE, decreasing = FALSE, stable = stable, optimize = optimize) nut <- .Call(C_r_ram_integer64_ordernut, table = x, order = o, PACKAGE = "bit64") setcache(x, "order", o) setcache(x, "na.count", na.count) setcache(x, "nunique", nut[[1L]]) setcache(x, "nties", nut[[2L]]) invisible(x) } #' Small cache access methods #' #' These methods are packaged here for methods in packages `bit64` and `ff`. #' #' @param x some object #' @param ... ignored #' #' @details #' All these functions benefit from a [sortcache()], [ordercache()] or #' [sortordercache()]. `na.count()`, `nvalid()` and `nunique()` also #' benefit from a [hashcache()]. #' #' @note #' If a [cache()] exists but the desired value is not cached, then these #' functions will store their result in the cache. We do not consider this #' a relevant side-effect, since these small cache results do not have a #' relevant memory footprint. #' #' @return #' `is.sorted` returns a logical scalar, the other methods return an integer scalar. #' #' @seealso #' [cache()] for caching functions and [sortordercache()] for functions creating big caches #' #' @examples #' x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) #' length(x) #' na.count(x) #' nvalid(x) #' nunique(x) #' nties(x) #' table.integer64(x) #' x #' #' @keywords environment methods #' @name is.sorted.integer64 NULL #' @describeIn is.sorted.integer64 returns the number of `NA`s #' @export na.count.integer64 <- function(x, ...){ env <- cache(x) if (is.null(env)) return(.Call(C_r_ram_integer64_nacount, x = x, PACKAGE = "bit64")) if (exists("na.count", envir=env, inherits=FALSE)) return(get("na.count", envir=env, inherits=FALSE)) ret <- .Call(C_r_ram_integer64_nacount, x = x, PACKAGE = "bit64") assign("na.count", ret, envir=env) ret } #' @describeIn is.sorted.integer64 returns the number of valid data points, #' usually [length()] minus `na.count`. #' @export nvalid.integer64 <- function(x, ...){ length(x) - na.count(x) } #' @describeIn is.sorted.integer64 checks for sortedness of `x` (NAs sorted first) #' @export is.sorted.integer64 <- function(x, ...){ env <- cache(x) if (is.null(env)) return(.Call(C_r_ram_integer64_issorted_asc, x = x, PACKAGE = "bit64")) if (exists("is.sorted", envir=env, inherits=FALSE)) return(get("is.sorted", envir=env, inherits=FALSE)) ret <- .Call(C_r_ram_integer64_issorted_asc, x = x, PACKAGE = "bit64") assign("is.sorted", ret, envir=env) ret } #' @describeIn is.sorted.integer64 returns the number of unique values #' @export nunique.integer64 <- function(x, ...){ env <- cache(x) if(is.null(env)) has.cache <- FALSE else if (exists("nunique", envir=env, inherits=FALSE)) return(get("nunique", envir=env, inherits=FALSE)) else has.cache <- TRUE if (is.sorted(x)){ ret <- .Call(C_r_ram_integer64_sortnut , x = x , PACKAGE = "bit64" ) if (has.cache){ assign("nunique", ret[1L], envir=env) assign("nties", ret[2L], envir=env) } ret[1L] }else{ h <- hashmap(x) if (has.cache) assign("nunique", h$nunique, envir=env) h$nunique } } #' @describeIn is.sorted.integer64 returns the number of tied values. #' @export nties.integer64 <- function(x, ...){ cv <- getcache(x, "nties") if (is.null(cv)){ if (is.sorted(x)){ cv <- .Call(C_r_ram_integer64_sortnut , x = x , PACKAGE = "bit64" )[2L] }else{ s <- clone(x) # nolint next: object_usage_linter. Keep the output of in-place ramsort for debugging. na.count <- ramsort(s, has.na = TRUE, na.last = FALSE, decreasing = FALSE, stable = FALSE, optimize = "time") cv <- .Call(C_r_ram_integer64_sortnut, x = s, PACKAGE = "bit64")[[2L]] } } cv } bit64/R/bit64-package.R0000644000176200001440000011722614705122715014064 0ustar liggesusers#' A S3 class for vectors of 64bit integers #' #' @description #' Package 'bit64' provides fast serializable S3 atomic 64bit (signed) integers #' that can be used in vectors, matrices, arrays and data.frames. Methods are #' available for coercion from and to logicals, integers, doubles, characters #' and factors as well as many elementwise and summary functions. #' #' ### Version 0.8 #' With 'integer64' vectors you can store very large integers at the expense #' of 64 bits, which is by factor 7 better than 'int64' from package 'int64'. #' Due to the smaller memory footprint, the atomic vector architecture and #' using only S3 instead of S4 classes, most operations are one to three orders #' of magnitude faster: Example speedups are 4x for serialization, 250x for #' adding, 900x for coercion and 2000x for object creation. Also 'integer64' #' avoids an ongoing (potentially infinite) penalty for garbage collection #' observed during existence of 'int64' objects (see code in example section). #' #' ### Version 0.9 #' Package 'bit64' - which extends R with fast 64-bit integers - now has fast #' (single-threaded) implementations the most important univariate algorithmic #' operations (those based on hashing and sorting). We now have methods for #' 'match', '%in%', 'duplicated', 'unique', 'table', 'sort', 'order', 'rank', #' 'quantile', 'median' and 'summary'. Regarding data management we also have #' novel generics 'unipos' (positions of the unique values), 'tiepos' ( #' positions of ties), 'keypos' (positions of foreign keys in a sorted #' dimension table) and derived methods 'as.factor' and 'as.ordered'. This 64- #' bit functionality is implemented carefully to be not slower than the #' respective 32-bit operations in Base R and also to avoid outlying waiting #' times observed with 'order', 'rank' and 'table' (speedup factors 20/16/200 #' respective). This increases the dataset size with wich we can work truly #' interactive. The speed is achieved by simple heuristic optimizers in high- #' level functions choosing the best from multiple low-level algorithms and #' further taking advantage of a novel caching if activated. In an example R #' session using a couple of these operations the 64-bit integers performed 22x #' faster than base 32-bit integers, hash-caching improved this to 24x, #' sortorder-caching was most efficient with 38x (caching hashing and sorting #' is not worth it with 32x at duplicated RAM consumption). #' #' # Design considerations #' #' 64 bit integers are related to big data: we need them to overcome address space #' limitations. Therefore performance of the 64 bit integer type is critical. In the #' S language -- designed in 1975 -- atomic objects were defined to be vectors for a #' couple of good reasons: simplicity, option for implicit parallelization, good #' cache locality. In recent years many analytical databases have learnt that lesson: #' column based data bases provide superior performance for many applications, the #' result are products such as MonetDB, Sybase IQ, Vertica, Exasol, Ingres Vectorwise. #' If we introduce 64 bit integers not natively in Base R but as an external package, #' we should at least strive to make them as 'basic' as possible. Therefore the design #' choice of bit64 not only differs from package int64, it is obvious: Like the other #' atomic types in Base R, we model data type 'integer64' as a contiguous [`atomic`] #' vector in memory, and we use the more basic [S3] class system, not [S4]. Like #' package int64 we want our 'integer64' to be [`serialize`]able, therefore we also #' use an existing data type as the basis. Again the choice is obvious: R has only one #' 64 bit data type: doubles. By using [`double`]s, `integer64` [`inherits`] some #' functionality such as [is.atomic()], [length()], [`length<-`], [names()], #' [`names<-`], [dim()], [`dim<-`], [dimnames()], [`dimnames<-`]. #' #' Our R level functions strictly follow the functional programming paradigm: #' no modification of arguments or other side-effects. Before version 0.93 we #' internally deviated from the strict paradigm in order to boost performance. Our C #' functions do not create new return values, instead we pass-in the memory to be #' returned as an argument. This gives us the freedom to apply the C-function to new #' or old vectors, which helps to avoid unnecessary memory allocation, unnecessary #' copying and unnecessary garbage collection. Prior to 0.93 _within_ our R functions #' we also deviated from conventional R programming by not using [`attr<-`] and #' [`attributes<-`] because they always did new memory allocation and copying in older #' R versions. If we wanted to set attributes of return values that we have freshly #' created, we instead used functions [bit::setattr()] and [bit::setattributes()]. #' From version 0.93 `bit::setattr()` is only used for manipulating [`cache`] objects, #' in [ramsort.integer64()], [sort.integer64()], and [as.data.frame.integer64()]. #' #' # Arithmetic precision and coercion #' #' The fact that we introduce 64 bit long long integers -- without introducing 128-bit #' long doubles -- creates some subtle challenges: Unlike 32 bit [`integer`]s, the #' `integer64` are no longer a proper subset of [`double`]. If a binary arithmetic #' operation does involve a `double` and a `integer`, it is a no-brainer to return #' `double` without loss of information. If an `integer64` meets a `double`, it is not #' trivial what type to return. Switching to `integer64` limits our ability to #' represent very large numbers, switching to `double` limits our ability to #' distinguish `x` from `x+1`. Since the latter is the purpose of introducing 64 bit #' integers, we usually return `integer64` from functions involving `integer64`, for #' example in [`c()`][c.integer64], [`cbind()`][cbind.integer64], and #' [`rbind()`][rbind.integer64] #' #' Different from Base R, our operators [`+`][+.integer64], [`-`][-.integer64], #' [`%/%`][%/%.integer64], and [`%%`][%%.integer64] coerce their arguments to #' `integer64` and always return `integer64`. #' #' The multiplication operator [`*`][*.integer64] coerces its first argument to #' `integer64` but allows its second argument to be also `double`: the second #' argument is internaly coerced to 'long double' and the result of the #' multiplication is returned as `integer64`. #' #' The division [`/`][/.integer64] and power [`^`][^.integer64] operators also #' coerce their first argument to `integer64` and coerce internally their second #' argument to 'long double', they return as `double`, like #' [`sqrt()`][sqrt.integer64], [`log()`][log.integer64], #' [`log2()`][log2.integer64], and [`log10()`][log10.integer64] do. #' #' | **argument1** | **op** | **argument2** | **->** | **coerced1** | **op** | **coerced2** | **->** | **result** | #' |:-------------:|:------:|:-------------:|:------:|:------------:|:------:|:------------:|:------:|:----------:| #' | integer64 | + | double | -> | integer64 | + | integer64 | -> | integer64 | #' | double | + | integer64 | -> | integer64 | + | integer64 | -> | integer64 | #' | integer64 | - | double | -> | integer64 | - | integer64 | -> | integer64 | #' | double | - | integer64 | -> | integer64 | - | integer64 | -> | integer64 | #' | integer64 | %/% | double | -> | integer64 | %/% | integer64 | -> | integer64 | #' | double | %/% | integer64 | -> | integer64 | %/% | integer64 | -> | integer64 | #' | integer64 | %% | double | -> | integer64 | %% | integer64 | -> | integer64 | #' | double | %% | integer64 | -> | integer64 | %% | integer64 | -> | integer64 | #' | integer64 | * | double | -> | integer64 | * | long double | -> | integer64 | #' | double | * | integer64 | -> | integer64 | * | integer64 | -> | integer64 | #' | integer64 | / | double | -> | integer64 | / | long double | -> | double | #' | double | / | integer64 | -> | integer64 | / | long double | -> | double | #' | integer64 | ^ | double | -> | integer64 | / | long double | -> | double | #' | double | ^ | integer64 | -> | integer64 | / | long double | -> | double | #' #' # Creating and testing S3 class 'integer64' #' #' Our creator function `integer64` takes an argument `length`, creates an atomic #' double vector of this length, attaches an S3 class attribute 'integer64' to it, #' and that's it. We simply rely on S3 method dispatch and interpret those 64-bit #' elements as 'long long int'. #' #' [is.double()] currently returns TRUE for `integer64` and might return `FALSE` in #' a later release. Consider `is.double()` to have undefined behavior and do query #' [is.integer64()] _before_ querying `is.double()`. #' # As a second line of defense against misinterpretation we make `is.double()` return # FALSE by making it S3 generic and adding a method `as.double.integer64()`. # #' The methods [is.integer64()] and [is.vector()] both return `TRUE` for `integer64`. #' Note that we did not patch [storage.mode()] and [typeof()], which both continue #' returning 'double'. Like for 32 bit [`integer`], [mode()] returns 'numeric' and #' [as.double()] tries coercing to [`double`]. It is possible that 'integer64' becomes #' a `vmode` in package ff. #' #' Further methods for creating `integer64` are [`range()`][range.integer64] which #' returns the range of the data type if calles without arguments, #' [`rep()`][rep.integer64], [`seq()`][seq.integer64]. #' #' For all available methods on `integer64` vectors see the index below and the examples. #' #' # Index of implemented methods #' #' | **creating, testing, printing** | **see also** | **description** | #' |--------------------------------:|----------------:|:--------------------------| #' | `NA_integer64_` | [`NA_integer_`] | NA constant | #' | `integer64` | [`integer`] | create zero atomic vector | #' | [runif64()] | [runif()] | create random vector | #' | [rep.integer64()] | [rep()] | | #' | [seq.integer64()] | [seq()] | | #' | [is.integer64()] | [is()] | | #' | | [is.integer()] | inherited from Base R | # | [is.double.integer64()] | [is.double()] | | #' | [is.vector.integer64()] | [is.vector()] | | #' | [identical.integer64()] | [identical()] | | #' | [`length<-.integer64`] | [`length<-`] | | #' | | [length()] | inherited from Base R | #' | [`names<-`] | inherited from Base R | #' | | [names()] | inherited from Base R | #' | | [`dim<-`] | inherited from Base R | #' | | [dim()] | inherited from Base R | #' | | [`dimnames<-`] | inherited from Base R | #' | | [dimnames()] | inherited from Base R | #' | | [str()] | inherited from Base R, does not print values correctly | #' | [print.integer64()] | [print()] | | #' | [str.integer64()] | [str()] | | #' #' | **coercing to integer64** | **see also** | **description** | #' |---------------------------:|-----------------:|:----------------| #' | [as.integer64()] | | generic | #' | [as.integer64.bitstring()] | [as.bitstring()] | | #' | [as.integer64.character()] | [character()] | | #' | [as.integer64.double()] | [double()] | | #' | [as.integer64.integer()] | [integer()] | | #' | [as.integer64.integer64()] | `integer64` | | #' | [as.integer64.logical()] | [logical()] | | #' | [as.integer64.NULL()] | [NULL()] | | #' #' | **coercing from integer64** | **see also** | **description** | #' |----------------------------:|-----------------:|:----------------| #' | [as.list.integer64()] | [as.list()] | generic | #' | [as.bitstring()] | [as.bitstring()] | generic | #' | [as.bitstring.integer64()] | | | #' | [as.character.integer64()] | [as.character()] | | #' | [as.double.integer64()] | [as.double()] | | #' | [as.integer.integer64()] | [as.integer()] | | #' | [as.logical.integer64()] | [as.logical()] | | # removed as requested by the CRAN maintainer # | [as.vector.integer64()] | [as.vector()] | | #' #' | **data structures** | **see also** | **description** | #' |----------------------------:|------------------:|:-----------------------------------| #' | [c.integer64()] | [c()] | vector concatenate | #' | [cbind.integer64()] | [cbind()] | column bind | #' | [rbind.integer64()] | [rbind()] | row bind | #' | [as.data.frame.integer64()] | [as.data.frame()] | coerce atomic object to data.frame | #' | | [data.frame()] | inherited from Base R since we have coercion | #' #' | **subscripting** | **see also** | **description** | #' |----------------------------------------------:|------------------------:|:-------------------------| #' | [`[.integer64`][extract.replace.integer64] | [`[`][base::Extract] | vector and array extract | #' | [`[<-.integer64`][extract.replace.integer64] | [`[<-`][base::Extract] | vector and array assign | #' | [`[[.integer64`][extract.replace.integer64] | [`[[`][base::Extract] | scalar extract | #' | [`[[<-.integer64`][extract.replace.integer64] | [`[[<-`][base::Extract] | scalar assign | #' #' | **binary operators** | **see also** | **description** | #' |---------------------:|-------------:|:------------------| #' | [`+.integer64`] | [`+`] | returns integer64 | #' | [`-.integer64`] | [`-`] | returns integer64 | #' | [`*.integer64`] | [`*`] | returns integer64 | #' | [`^.integer64`] | [`^`] | returns double | #' | [`/.integer64`] | [`/`] | returns double | #' | [`%/%.integer64`] | [`%/%`] | returns integer64 | #' | [`%%.integer64`] | [`%%`] | returns integer64 | #' #' | **comparison operators** | **see also** | **description** | #' |-------------------------:|-------------:|:----------------| #' | [`==.integer64`] | [`==`] | | #' | [`!=.integer64`] | [`!=`] | | #' | [`<.integer64`] | [`<`] | | #' | [`<=.integer64`] | [`<=`] | | #' | [`>.integer64`] | [`>`] | | #' | [`>=.integer64`] | [`>=`] | | #' #' \tabular{rrl}{ #' \strong{logical operators} \tab \strong{see also} \tab \strong{description} \cr #' \code{\link{!.integer64}} \tab \code{\link{!}} \tab \cr #' \code{\link{&.integer64}} \tab \code{\link{&}} \tab \cr #' \code{\link[=xor.integer64]{|.integer64}} \tab \code{\link[base:Logic]{|}} \tab \cr #' \code{\link{xor.integer64}} \tab \code{\link[=xor]{xor()}} \tab \cr #' } # TODO(r-lib/roxygen2#1668): Restore the markdown representation of the table. # | **logical operators** | **see also** | **description** | # |----------------------:|-------------:|:----------------| # | [`!.integer64`] | [`!`] | | # | [`&.integer64`] | [`&`] | | # | [`\|.integer64`][xor.integer64] | [`\|`][base::Logic] | | # | [`xor.integer64`] | [xor()] | | #' #' | **math functions** | **see also** | **description** | #' |----------------------:|-------------:|:-----------------------------| #' | [is.na.integer64()] | [is.na()] | returns logical | #' | [format.integer64()] | [format()] | returns character | #' | [abs.integer64()] | [abs()] | returns integer64 | #' | [sign.integer64()] | [sign()] | returns integer64 | #' | [log.integer64()] | [log()] | returns double | #' | [log10.integer64()] | [log10()] | returns double | #' | [log2.integer64()] | [log2()] | returns double | #' | [sqrt.integer64()] | [sqrt()] | returns double | #' | [ceiling.integer64()] | [ceiling()] | dummy returning its argument | #' | [floor.integer64()] | [floor()] | dummy returning its argument | #' | [trunc.integer64()] | [trunc()] | dummy returning its argument | #' | [round.integer64()] | [round()] | dummy returning its argument | #' | [signif.integer64()] | [signif()] | dummy returning its argument | #' #' | **cumulative functions** | **see also** | **description** | #' |-------------------------:|-------------:|:----------------| #' | [cummin.integer64()] | [cummin()] | | #' | [cummax.integer64()] | [cummax()] | | #' | [cumsum.integer64()] | [cumsum()] | | #' | [cumprod.integer64()] | [cumprod()] | | #' | [diff.integer64()] | [diff()] | | #' #' | **summary functions** | **see also** | **description** | #' |----------------------:|-------------:|:----------------| #' | [range.integer64()] | [range()] | | #' | [min.integer64()] | [min()] | | #' | [max.integer64()] | [max()] | | #' | [sum.integer64()] | [sum()] | | #' | [mean.integer64()] | [mean()] | | #' | [prod.integer64()] | [prod()] | | #' | [all.integer64()] | [all()] | | #' | [any.integer64()] | [any()] | | #' #' | **algorithmically complex functions** | **see also** | **description (caching)** | #' |--------------------------------------:|-------------:|:---------------------------| #' | [match.integer64()] | [match()] | position of x in table (h//o/so) | #' | [`%in%.integer64`] | [`%in%`] | is x in table? (h//o/so) | #' | [duplicated.integer64()] | [duplicated()] | is current element duplicate of previous one? (h//o/so) | #' | [unique.integer64()] | [unique()] | (shorter) vector of unique values only (h/s/o/so) | #' | [unipos.integer64()] | [unipos()] | positions corresponding to unique values (h/s/o/so) | #' | [tiepos.integer64()] | [tiepos()] | positions of values that are tied (//o/so) | #' | [keypos.integer64()] | [keypos()] | position of current value in sorted list of unique values (//o/so) | #' | [table.integer64()] | [table()] | unique values and their frequencies (h/s/o/so) | #' | [sort.integer64()] | [sort()] | sorted vector (/s/o/so) | #' | [order.integer64()] | [order()] | positions of elements that would create sorted vector (//o/so) | #' | [rank.integer64()] | [rank()] | (average) ranks of non-NAs, NAs kept in place (/s/o/so) | #' | [quantile.integer64()] | [quantile()] | (existing) values at specified percentiles (/s/o/so) | #' | [median.integer64()] | [median()] | (existing) value at percentile 0.5 (/s/o/so) | #' | [summary.integer64()] | [summary()] | (/s/o/so) | #' | [all.equal.integer64()] | [all.equal()] | test if two objects are (nearly) equal (/s/o/so) | #' #' | **helper functions** | **see also** | **description** | #' |-----------------:|---------------:|:---------------------------| #' | [minusclass()] | [minusclass()] | removing class attritbute | #' | [plusclass()] | [plusclass()] | inserting class attribute | #' | [binattr()] | [binattr()] | define binary op behaviour | #' #' | **tested I/O functions** | **see also** | **description** | #' |-------------------------:|----------------:|:----------------------| #' | | [read.table()] | inherited from Base R | #' | | [write.table()] | inherited from Base R | #' | | [serialize()] | inherited from Base R | #' | | [unserialize()] | inherited from Base R | #' | | [save()] | inherited from Base R | #' | | [load()] | inherited from Base R | #' | | [dput()] | inherited from Base R | #' | | [dget()] | inherited from Base R | #' #' # Limitations inherited from implementing 64 bit integers via an external package #' #' - **vector size** of atomic vectors is still limited to #' [`.Machine$integer.max`][.Machine]. However, external memory extending packages #' such as ff or bigmemory can extend their address space now with `integer64`. #' Having 64 bit integers also help with those not so obvious address issues that #' arise once we exchange data with SQL databases and datawarehouses, which use big #' integers as surrogate keys, e.g. on indexed primary key columns. This puts R into #' a relatively strong position compared to certain commercial statistical softwares, #' which sell database connectivity but neither have the range of 64 bit integers, #' nor have integers at all, nor have a single numeric data type in their #' macro-glue-language. #' - **literals** such as `123LL` would require changes to Base R, up to then we need #' to write (and call) `as.integer64(123L)` or `as.integer64(123)` or #' `as.integer64('123')`. Only the latter allows to specify numbers beyond Base R's #' numeric data types and therefore is the recommended way to use -- using only one #' way may facilitate migrating code to literals at a later stage. #' #' # Limitations inherited from Base R, Core team, can you change this? #' #' - **[identical()]** with default parameters does not distinguish all bit-patterns of #' doubles. For testing purposes we provide a wrapper [identical.integer64()] that #' will distinguish all bit-patterns. It would be desireable to have a single call #' of `identical()` handle both, [`double`] and `integer64`. #' #' - the **colon** operator [:] officially does not dispatch S3 methods, however, we #' have made it generic: #' #' ```r #' from <- lim.integer64()[1] #' to <- from+99 #' from:to #' ``` #' #' As a limitation remains: it will only dispatch at its first argument `from` but #' not at its second `to`. #' #' - **[is.double()]** does not dispatch S3 methods, However, we have made it generic #' and it will return `FALSE` on `integer64`. #' #' - **[c()]** only dispatches [c.integer64()] if the first argument is `integer64` #' and it does not recursively dispatch the proper method when called with argument #' `recursive=TRUE`. Therefore `c(list(integer64, integer64))` does not work and #' for now you can only call `c.integer64(list(x, x))`. #' #' - **generic binary operators** fail to dispatch *any* user-defined S3 method #' if the two arguments have two different S3 classes. For example we have two #' classes [`bit::bit`] and [`bit::bitwhich`] sparsely representing boolean vectors #' and we have methods [`&.bit`][bit::xor.default] and #' [`&.bitwhich`][bit::xor.default]. For an expression involving both as in #' `bit & bitwhich`, none of the two methods is dispatched. Instead a standard #' method is dispatched, which neither handles `bit` nor `bitwhich`. Although #' it lacks symmetry, the better choice would be to dispatch simply the method #' of the class of the first argument in case of class conflict. This choice would #' allow authors of extension packages providing coherent behaviour at least within #' their contributed classes. But as long as none of the package author's methods is #' dispatched, they cannot handle the conflicting classes at all. #' #' - **[unlist()]** is not generic and if it were, we would face similar problems as #' with [c()] #' - **[vector()]** with argument `mode='integer64'` cannot work without adjustment #' of Base R #' - **[as.vector()]** with argument `mode='integer64'` cannot work without adjustment #' of Base R #' - **[is.vector()]** does not dispatch its method [is.vector.integer64()] #' - **[mode<-()]** drops the class 'integer64' which is returned from #' `as.integer64()`. Also it does not remove an existing class 'integer64' when #' assigning mode 'integer'. #' - **[storage.mode<-()]** does not support external data types such as `integer64` #' - **[matrix()]** does drop the 'integer64' class attribute. #' - **[array()]** does drop the 'integer64' class attribute. #' + In current R versions (1.15.1) this can be circumvented by activating the #' function `as.vector.integer64()`. However, the CRAN maintainer has requested #' to remove `as.vector.integer64()`, even at the price of breaking previously #' working functionality of the package. #' #' - **[str()]** does not print the values of `integer64` correctly #' #' # Further limitations #' #' - **subscripting** non-existing elements and subscripting with `NA`s is currently #' not supported. Such subscripting currently returns `9218868437227407266` instead #' of `NA` (the `NA` value of the underlying double code). Following the full R #' behaviour here would either destroy performance or require extensive C-coding. #' #' @note `integer64` are useful for handling database keys and exact counting in +-2^63. #' Do not use them as replacement for 32bit integers, integer64 are not supported for #' subscripting by R-core and they have different semantics when combined with double. #' Do understand that `integer64` can only be useful over `double` if we do not coerce #' it to `double`. #' #' While #' #' integer + double -> double + double -> double #' #' or #' #' 1L + 0.5 -> 1.5 #' #' for additive operations we coerce to `integer64` #' #' integer64 + double -> integer64 + integer64 -> integer64 #' #' hence #' #' as.integer64(1) + 0.5 -> 1LL + 0LL -> 1LL #' #' see section "Arithmetic precision and coercion" above #' #' @seealso [integer()] in base R #' @examples #' message("Using integer64 in vector") #' x <- integer64(8) # create 64 bit vector #' x #' is.atomic(x) # TRUE #' is.integer64(x) # TRUE #' is.numeric(x) # TRUE #' is.integer(x) # FALSE - debatable #' is.double(x) # FALSE - might change #' x[] <- 1:2 # assigned value is recycled as usual #' x[1:6] # subscripting as usual #' length(x) <- 13 # changing length as usual #' x #' rep(x, 2) # replicate as usual #' seq(as.integer64(1), 10) # seq.integer64 is dispatched on first given argument #' seq(to=as.integer64(10), 1) # seq.integer64 is dispatched on first given argument #' seq.integer64(along.with=x) # or call seq.integer64 directly #' # c.integer64 is dispatched only if *first* argument is integer64 ... #' x <- c(x,runif(length(x), max=100)) #' # ... and coerces everything to integer64 - including double #' x #' names(x) <- letters # use names as usual #' x #' #' message("Using integer64 in array - note that 'matrix' currently does not work") #' message("as.vector.integer64 removed as requested by the CRAN maintainer") #' message("as consequence 'array' also does not work anymore") # y <- array(as.integer64(NA), dim=c(3,4), dimnames=list(letters[1:3], LETTERS[1:4])) #' message("we still can create a matrix or array by assigning 'dim'") #' y <- rep(as.integer64(NA), 12) #' dim(y) <- c(3,4) #' dimnames(y) <- list(letters[1:3], LETTERS[1:4]) #' y["a",] <- 1:2 # assigning as usual #' y #' y[1:2,-4] # subscripting as usual #' # cbind.integer64 dispatched on any argument and coerces everything to integer64 #' cbind(E=1:3, F=runif(3, 0, 100), G=c("-1","0","1"), y) #' #' message("Using integer64 in data.frame") #' str(as.data.frame(x)) #' str(as.data.frame(y)) #' str(data.frame(y)) #' str(data.frame(I(y))) #' d <- data.frame(x=x, y=runif(length(x), 0, 100)) #' d #' d$x #' #' message("Using integer64 with csv files") #' fi64 <- tempfile() #' write.csv(d, file=fi64, row.names=FALSE) #' e <- read.csv(fi64, colClasses=c("integer64", NA)) #' unlink(fi64) #' str(e) #' identical.integer64(d$x,e$x) #' #' message("Serializing and unserializing integer64") #' dput(d, fi64) #' e <- dget(fi64) #' identical.integer64(d$x,e$x) #' e <- d[,] #' save(e, file=fi64) #' rm(e) #' load(file=fi64) #' identical.integer64(d,e) #' #' \dontrun{ #' message("== Differences between integer64 and int64 ==") #' require(bit64) #' require(int64) #' #' message("-- integer64 is atomic --") #' is.atomic(integer64()) #' #is.atomic(int64()) #' str(integer64(3)) #' #str(int64(3)) #' #' message("-- The following performance numbers are measured under RWin64 --") #' message("-- under RWin32 the advantage of integer64 over int64 is smaller --") #' #' message("-- integer64 needs 7x/5x less RAM than int64 under 64/32 bit OS #' (and twice the RAM of integer as it should be) --") #' #as.vector(object.size(int64(1e6))/object.size(integer64(1e6))) #' as.vector(object.size(integer64(1e6))/object.size(integer(1e6))) #' #' message("-- integer64 creates 2000x/1300x faster than int64 under 64/32 bit OS #' (and 3x the time of integer) --") #' t32 <- system.time(integer(1e8)) #' t64 <- system.time(integer64(1e8)) #' #T64 <- system.time(int64(1e7))*10 # using 1e8 as above stalls our R on an i7 8 GB RAM Thinkpad #' #T64/t64 #' t64/t32 #' #' i32 <- sample(1e6) #' d64 <- as.double(i32) #' #' message("-- the following timings are rather conservative since timings #' of integer64 include garbage collection -- due to looped calls") #' message("-- integer64 coerces 900x/100x faster than int64 #' under 64/32 bit OS (and 2x the time of coercing to integer) --") #' t32 <- system.time(for(i in 1:1000)as.integer(d64)) #' t64 <- system.time(for(i in 1:1000)as.integer64(d64)) #' #T64 <- system.time(as.int64(d64))*1000 #' #T64/t64 #' t64/t32 #' td64 <- system.time(for(i in 1:1000)as.double(i32)) #' t64 <- system.time(for(i in 1:1000)as.integer64(i32)) #' #T64 <- system.time(for(i in 1:10)as.int64(i32))*100 #' #T64/t64 #' t64/td64 #' #' message("-- integer64 serializes 4x/0.8x faster than int64 #' under 64/32 bit OS (and less than 2x/6x the time of integer or double) --") #' t32 <- system.time(for(i in 1:10)serialize(i32, NULL)) #' td64 <- system.time(for(i in 1:10)serialize(d64, NULL)) #' i64 <- as.integer64(i32); #' t64 <- system.time(for(i in 1:10)serialize(i64, NULL)) #' rm(i64); gc() #' #I64 <- as.int64(i32); #' #T64 <- system.time(for(i in 1:10)serialize(I64, NULL)) #' #rm(I64); gc() #' #T64/t64 #' t64/t32 #' t64/td64 #' #' #' message("-- integer64 adds 250x/60x faster than int64 #' under 64/32 bit OS (and less than 6x the time of integer or double) --") #' td64 <- system.time(for(i in 1:100)d64+d64) #' t32 <- system.time(for(i in 1:100)i32+i32) #' i64 <- as.integer64(i32); #' t64 <- system.time(for(i in 1:100)i64+i64) #' rm(i64); gc() #' #I64 <- as.int64(i32); #' #T64 <- system.time(for(i in 1:10)I64+I64)*10 #' #rm(I64); gc() #' #T64/t64 #' t64/t32 #' t64/td64 #' #' message("-- integer64 sums 3x/0.2x faster than int64 #' (and at about 5x/60X the time of integer and double) --") #' td64 <- system.time(for(i in 1:100)sum(d64)) #' t32 <- system.time(for(i in 1:100)sum(i32)) #' i64 <- as.integer64(i32); #' t64 <- system.time(for(i in 1:100)sum(i64)) #' rm(i64); gc() #' #I64 <- as.int64(i32); #' #T64 <- system.time(for(i in 1:100)sum(I64)) #' #rm(I64); gc() #' #T64/t64 #' t64/t32 #' t64/td64 #' #' message("-- integer64 diffs 5x/0.85x faster than integer and double #' (int64 version 1.0 does not support diff) --") #' td64 <- system.time(for(i in 1:10)diff(d64, lag=2L, differences=2L)) #' t32 <- system.time(for(i in 1:10)diff(i32, lag=2L, differences=2L)) #' i64 <- as.integer64(i32); #' t64 <- system.time(for(i in 1:10)diff(i64, lag=2L, differences=2L)) #' rm(i64); gc() #' t64/t32 #' t64/td64 #' #' #' message("-- integer64 subscripts 1000x/340x faster than int64 #' (and at the same speed / 10x slower as integer) --") #' ts32 <- system.time(for(i in 1:1000)sample(1e6, 1e3)) #' t32<- system.time(for(i in 1:1000)i32[sample(1e6, 1e3)]) #' i64 <- as.integer64(i32); #' t64 <- system.time(for(i in 1:1000)i64[sample(1e6, 1e3)]) #' rm(i64); gc() #' #I64 <- as.int64(i32); #' #T64 <- system.time(for(i in 1:100)I64[sample(1e6, 1e3)])*10 #' #rm(I64); gc() #' #(T64-ts32)/(t64-ts32) #' (t64-ts32)/(t32-ts32) #' #' message("-- integer64 assigns 200x/90x faster than int64 #' (and 50x/160x slower than integer) --") #' ts32 <- system.time(for(i in 1:100)sample(1e6, 1e3)) #' t32 <- system.time(for(i in 1:100)i32[sample(1e6, 1e3)] <- 1:1e3) #' i64 <- as.integer64(i32); #' i64 <- system.time(for(i in 1:100)i64[sample(1e6, 1e3)] <- 1:1e3) #' rm(i64); gc() #' #I64 <- as.int64(i32); #' #I64 <- system.time(for(i in 1:10)I64[sample(1e6, 1e3)] <- 1:1e3)*10 #' #rm(I64); gc() #' #(T64-ts32)/(t64-ts32) #' (t64-ts32)/(t32-ts32) #' #' #' tdfi32 <- system.time(dfi32 <- data.frame(a=i32, b=i32, c=i32)) #' tdfsi32 <- system.time(dfi32[1e6:1,]) #' fi32 <- tempfile() #' tdfwi32 <- system.time(write.csv(dfi32, file=fi32, row.names=FALSE)) #' tdfri32 <- system.time(read.csv(fi32, colClasses=rep("integer", 3))) #' unlink(fi32) #' rm(dfi32); gc() #' #' i64 <- as.integer64(i32); #' tdfi64 <- system.time(dfi64 <- data.frame(a=i64, b=i64, c=i64)) #' tdfsi64 <- system.time(dfi64[1e6:1,]) #' fi64 <- tempfile() #' tdfwi64 <- system.time(write.csv(dfi64, file=fi64, row.names=FALSE)) #' tdfri64 <- system.time(read.csv(fi64, colClasses=rep("integer64", 3))) #' unlink(fi64) #' rm(i64, dfi64); gc() #' #' #I64 <- as.int64(i32); #' #tdfI64 <- system.time(dfI64<-data.frame(a=I64, b=I64, c=I64)) #' #tdfsI64 <- system.time(dfI64[1e6:1,]) #' #fI64 <- tempfile() #' #tdfwI64 <- system.time(write.csv(dfI64, file=fI64, row.names=FALSE)) #' #tdfrI64 <- system.time(read.csv(fI64, colClasses=rep("int64", 3))) #' #unlink(fI64) #' #rm(I64, dfI64); gc() #' #' message("-- integer64 coerces 40x/6x faster to data.frame than int64 #' (and factor 1/9 slower than integer) --") #' #tdfI64/tdfi64 #' tdfi64/tdfi32 #' message("-- integer64 subscripts from data.frame 20x/2.5x faster than int64 #' (and 3x/13x slower than integer) --") #' #tdfsI64/tdfsi64 #' tdfsi64/tdfsi32 #' message("-- integer64 csv writes about 2x/0.5x faster than int64 #' (and about 1.5x/5x slower than integer) --") #' #tdfwI64/tdfwi64 #' tdfwi64/tdfwi32 #' message("-- integer64 csv reads about 3x/1.5 faster than int64 #' (and about 2x slower than integer) --") #' #tdfrI64/tdfri64 #' tdfri64/tdfri32 #' #' rm(i32, d64); gc() #' #' #' message("-- investigating the impact on garbage collection: --") #' message("-- the fragmented structure of int64 messes up R's RAM --") #' message("-- and slows down R's gargbage collection just by existing --") #' #' td32 <- double(21) #' td32[1] <- system.time(d64 <- double(1e7))[3] #' for (i in 2:11)td32[i] <- system.time(gc(), gcFirst=FALSE)[3] #' rm(d64) #' for (i in 12:21)td32[i] <- system.time(gc(), gcFirst=FALSE)[3] #' #' t64 <- double(21) #' t64[1] <- system.time(i64 <- integer64(1e7))[3] #' for (i in 2:11)t64[i] <- system.time(gc(), gcFirst=FALSE)[3] #' rm(i64) #' for (i in 12:21)t64[i] <- system.time(gc(), gcFirst=FALSE)[3] #' #' #T64 <- double(21) #' #T64[1] <- system.time(I64 <- int64(1e7))[3] #' #for (i in 2:11)T64[i] <- system.time(gc(), gcFirst=FALSE)[3] #' #rm(I64) #' #for (i in 12:21)T64[i] <- system.time(gc(), gcFirst=FALSE)[3] #' #' #matplot(1:21, cbind(td32, t64, T64), pch=c("d","i","I"), log="y") #' matplot(1:21, cbind(td32, t64), pch=c("d","i"), log="y") #' } #' @aliases bit64 is.integer.integer64 is.vector.integer64 #' @keywords internal package classes manip #' @useDynLib bit64, .registration = TRUE, .fixes = "C_" "_PACKAGE" ## usethis namespace: start #' @importFrom bit clone is.sorted keyorder keysort keysortorder mergeorder #' mergesort mergesortorder na.count nties nunique nvalid quickorder #' quicksort quicksortorder radixorder radixsort radixsortorder ramorder #' ramsort ramsortorder repeat.time setattr shellorder shellsort #' shellsortorder xor #' @importFrom graphics barplot par title #' @importFrom methods as is #' @importFrom stats cor median quantile #' @importFrom utils packageDescription strOptions tail #' @export -.integer64 : :.default :.integer64 !.integer64 !=.integer64 #' @export [.integer64 [[.integer64 [[<-.integer64 [<-.integer64 *.integer64 #' @export /.integer64 &.integer64 %/%.integer64 %%.integer64 %in% %in%.default #' @export %in%.integer64 ^.integer64 +.integer64 <.integer64 <=.integer64 #' @export ==.integer64 >.integer64 >=.integer64 |.integer64 abs.integer64 #' @export all.equal.integer64 all.integer64 any.integer64 as.bitstring.integer64 #' @export as.character.integer64 as.data.frame.integer64 as.double.integer64 #' @export as.integer.integer64 as.integer64.bitstring as.integer64.character #' @export as.integer64.double as.integer64.factor as.integer64.integer #' @export as.integer64.integer64 as.integer64.logical as.integer64.NULL #' @export as.list.integer64 as.logical.integer64 c.integer64 cbind.integer64 #' @export ceiling.integer64 cummax.integer64 cummin.integer64 cumprod.integer64 #' @export cumsum.integer64 diff.integer64 duplicated.integer64 floor.integer64 #' @export format.integer64 hashdup.cache_integer64 hashfin.cache_integer64 #' @export hashfun.integer64 hashmap.integer64 hashmaptab.integer64 #' @export hashmapuni.integer64 hashmapupo.integer64 hashpos.cache_integer64 #' @export hashrev.cache_integer64 hashrin.cache_integer64 #' @export hashtab.cache_integer64 hashuni.cache_integer64 #' @export hashupo.cache_integer64 is.double is.double.default #' @export is.double.integer64 is.finite.integer64 is.infinite.integer64 #' @export is.na.integer64 is.nan.integer64 is.sorted.integer64 #' @export is.vector.integer64 keypos.integer64 length<-.integer64 log.integer64 #' @export log10.integer64 log2.integer64 match match.default match.integer64 #' @export max.integer64 mean.integer64 median.integer64 mergeorder.integer64 #' @export mergesort.integer64 mergesortorder.integer64 min.integer64 #' @export na.count.integer64 nties.integer64 nunique.integer64 nvalid.integer64 #' @export order order.default order.integer64 orderdup.integer64 #' @export orderfin.integer64 orderkey.integer64 ordernut.integer64 #' @export orderpos.integer64 orderqtl.integer64 orderrnk.integer64 #' @export ordertab.integer64 ordertie.integer64 orderuni.integer64 #' @export orderupo.integer64 prank.integer64 print.bitstring print.cache #' @export print.integer64 prod.integer64 qtile.integer64 quantile.integer64 #' @export quickorder.integer64 quicksort.integer64 quicksortorder.integer64 #' @export radixorder.integer64 radixsort.integer64 radixsortorder.integer64 #' @export ramorder.integer64 ramsort.integer64 ramsortorder.integer64 #' @export range.integer64 rank rank.default rank.integer64 rbind.integer64 #' @export rep.integer64 round.integer64 scale.integer64 seq.integer64 #' @export shellorder.integer64 shellsort.integer64 shellsortorder.integer64 #' @export sign.integer64 signif.integer64 sort.integer64 sortfin.integer64 #' @export sortnut.integer64 sortorderdup.integer64 sortorderkey.integer64 #' @export sortorderpos.integer64 sortorderrnk.integer64 sortordertab.integer64 #' @export sortordertie.integer64 sortorderuni.integer64 sortorderupo.integer64 #' @export sortqtl.integer64 sorttab.integer64 sortuni.integer64 sqrt.integer64 #' @export str.integer64 sum.integer64 summary.integer64 tiepos.integer64 #' @export trunc.integer64 unipos.integer64 unique.integer64 xor.integer64 ## usethis namespace: end NULL bit64/R/matrix64.R0000644000176200001440000000510214705122715013206 0ustar liggesusers#' Working with integer64 arrays and matrices #' #' These functions and methods facilitate working with integer64 #' objects stored in matrices. As ever, the primary motivation #' for having tailor-made functions here is that R's methods #' often receive input from bit64 and treat the vectors as doubles, #' leading to unexpected and/or incorrect results. #' #' As of now, the `colSums()` and `rowSums()` methods are implemented #' as wrappers around equivalent `apply()` approaches, because #' re-using the default routine (and then applying integer64 to the #' result) does not work for objects with missing elements. Ideally #' this would eventually get its own dedicated C routine mimicking #' that of `colSums()` for integers; feature requests and PRs welcome. #' #' `aperm()` is required for `apply()` to work, in general, otherwise #' `FUN` gets applied to a class-stripped version of the input. #' #' @param x An array of integer64 numbers. #' @param na.rm,dims Same interpretation as in [colSums()]. #' @param ... Passed on to subsequent methods. #' @examples #' A = as.integer64(1:6) #' dim(A) = 3:2 #' #' colSums(A) #' rowSums(A) #' aperm(A, 2:1) #' @name matrix64 NULL #' @rdname matrix64 #' @export colSums <- function(x, na.rm=FALSE, dims=1L) UseMethod("colSums") #' @rdname matrix64 #' @export colSums.default <- function(x, na.rm=FALSE, dims=1L) base::colSums(x, na.rm, dims) #' @rdname matrix64 #' @export colSums.integer64 <- function(x, na.rm=FALSE, dims=1L) { n_dim <- length(dim(x)) stopifnot( `dims= should be a length-1 integer between 1 and length(dim(x))-1L` = length(dims) == 1L && dims > 0L && dims < n_dim ) MARGIN = tail(seq_len(n_dim), -dims) ret = apply(x, MARGIN, sum, na.rm = na.rm) class(ret) = "integer64" ret } #' @rdname matrix64 #' @export rowSums <- function(x, na.rm=FALSE, dims=1L) UseMethod("rowSums") #' @rdname matrix64 #' @export rowSums.default <- function(x, na.rm=FALSE, dims=1L) base::rowSums(x, na.rm, dims) #' @rdname matrix64 #' @export rowSums.integer64 <- function(x, na.rm=FALSE, dims=1L) { n_dim <- length(dim(x)) stopifnot( `dims= should be a length-1 integer between 1 and length(dim(x))-1L` = length(dims) == 1L && dims > 0L && dims < n_dim ) MARGIN = seq_len(dims) ret = apply(x, MARGIN, sum, na.rm = na.rm) class(ret) = "integer64" ret } #' @rdname matrix64 #' @param a,perm Passed on to [aperm()]. #' @export aperm.integer64 <- function(a, perm, ...) { class(a) = minusclass(class(a), "integer64") ret <- aperm(a, perm, ...) class(ret) = plusclass(class(a), "integer64") ret } bit64/R/sort64.R0000644000176200001440000005071314705122715012701 0ustar liggesusers# /* # R-Code for sorting and ordering # S3 atomic 64bit integers for R # (c) 2011 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2011-12-11 # Last changed: 2011-12-11 # */ #' Low-level intger64 methods for in-RAM sorting and ordering #' #' Fast low-level methods for sorting and ordering. The `..sortorder` #' methods do sorting and ordering at once, which requires more RAM #' than ordering but is (almost) as fast as as sorting. #' #' @note #' Note that these methods purposely violate the functional programming #' paradigm: they are called for the side-effect of changing some of #' their arguments. The `sort`-methods change `x`, the `order`-methods #' change `i`, and the `sortoder`-methods change both `x` and `i` #' #' @param x a vector to be sorted by [ramsort.integer64()] and #' [ramsortorder.integer64()], i.e. the output of [sort.integer64()] #' @param i integer positions to be modified by [ramorder.integer64()] and #' [ramsortorder.integer64()], default is 1:n, in this case the output is #' similar to [order.integer64()] #' @param has.na boolean scalar defining whether the input vector might contain #' `NA`s. If we know we don't have NAs, this may speed-up. _Note_ that you #' risk a crash if there are unexpected `NA`s with `has.na=FALSE` #' @param na.last boolean scalar telling ramsort whether to sort `NA`s last #' or first. _Note_ that 'boolean' means that there is no third option `NA` #' as in [sort()] #' @param decreasing boolean scalar telling ramsort whether to sort increasing #' or decreasing #' @param stable boolean scalar defining whether stable sorting is needed. #' Allowing non-stable may speed-up. #' @param optimize by default ramsort optimizes for 'time' which requires more #' RAM, set to 'memory' to minimize RAM requirements and sacrifice speed #' @param restlevel number of remaining recursionlevels before `quicksort` #' switches from recursing to `shellsort` #' @param radixbits size of radix in bits #' @param VERBOSE cat some info about chosen method #' @param ... further arguments, passed from generics, ignored in methods #' #' @details See [bit::ramsort()] #' @return These functions return the number of `NAs` found or assumed #' during sorting #' @keywords programming manip #' @seealso [bit::ramsort()] for the generic, `ramsort.default` for the methods #' provided by package ff, [sort.integer64()] for the sort interface and #' [sortcache()] for caching the work of sorting #' @examples #' x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) #' x #' message("ramsort example") #' s <- clone(x) #' ramsort(s) #' message("s has been changed in-place - whether or not ramsort uses an in-place algorithm") #' s #' message("ramorder example") #' s <- clone(x) #' o <- seq_along(s) #' ramorder(s, o) #' message("o has been changed in-place - s remains unchanged") #' s #' o #' s[o] #' message("ramsortorder example") #' o <- seq_along(s) #' ramsortorder(s, o) #' message("s and o have both been changed in-place - this is much faster") #' s #' o #' @name ramsort.integer64 NULL #' @rdname ramsort.integer64 #' @export shellsort.integer64 <- function(x, has.na=TRUE, na.last=FALSE, decreasing=FALSE, ...) { force(x) .Call(C_r_ram_integer64_shellsort , x = x , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export shellsortorder.integer64 <- function(x, i, has.na=TRUE, na.last=FALSE, decreasing=FALSE, ...) { force(x) force(i) if (!is.integer(i)) stop("i must be integer") if (length(i) != length(x)) stop("lengths of x and i don't match") .Call(C_r_ram_integer64_shellsortorder , x = x , i = i , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export shellorder.integer64 <- function(x, i, has.na=TRUE, na.last=FALSE, decreasing=FALSE, ...) { force(x) force(i) if (!is.integer(i)) stop("i must be integer") if (length(i) != length(x)) stop("lengths of x and i don't match") .Call(C_r_ram_integer64_shellorder , x = x , i = i , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export mergesort.integer64 <- function(x, has.na=TRUE, na.last=FALSE, decreasing=FALSE, ...){ force(x) .Call(C_r_ram_integer64_mergesort , x = x , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export mergeorder.integer64 <- function(x, i, has.na=TRUE, na.last=FALSE, decreasing=FALSE, ...){ force(x) force(i) if (!is.integer(i)) stop("i must be integer") if (length(i) != length(x)) stop("lengths of x and i don't match") .Call(C_r_ram_integer64_mergeorder , x = x , i = i , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export mergesortorder.integer64 <- function(x, i, has.na=TRUE, na.last=FALSE, decreasing=FALSE, ...){ force(x) force(i) if (!is.integer(i)) stop("i must be integer") if (length(i) != length(x)) stop("lengths of x and i don't match") .Call(C_r_ram_integer64_mergesortorder , x = x , i = i , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export quicksort.integer64 <- function(x, has.na=TRUE, na.last=FALSE, decreasing=FALSE , restlevel=floor(1.5*log2(length(x))) , ...){ force(x) if (restlevel<0L) restlevel = 0L .Call(C_r_ram_integer64_quicksort , x = x , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , restlevel = as.integer(restlevel) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export quicksortorder.integer64 <- function(x, i, has.na=TRUE, na.last=FALSE, decreasing=FALSE , restlevel=floor(1.5*log2(length(x))) , ...){ force(x) force(i) if (!is.integer(i)) stop("i must be integer") if (length(i) != length(x)) stop("lengths of x and i don't match") if (restlevel<0L) restlevel = 0L .Call(C_r_ram_integer64_quicksortorder , x = x , i = i , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , restlevel = as.integer(restlevel) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export quickorder.integer64 <- function(x, i, has.na=TRUE, na.last=FALSE, decreasing=FALSE , restlevel=floor(1.5*log2(length(x))) , ...){ force(x) force(i) if (!is.integer(i)) stop("i must be integer") if (length(i) != length(x)) stop("lengths of x and i don't match") if (restlevel<0L) restlevel = 0L .Call(C_r_ram_integer64_quickorder , x = x , i = i , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , restlevel = as.integer(restlevel) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export radixsort.integer64 <- function(x, has.na=TRUE, na.last=FALSE, decreasing=FALSE , radixbits=8L , ...) { stopifnot(radixbits %in% c(1L, 2L, 4L, 8L, 16L)) force(x) .Call(C_r_ram_integer64_radixsort , x = x , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , radixbits = as.integer(radixbits) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export radixsortorder.integer64 <- function(x, i, has.na=TRUE, na.last=FALSE, decreasing=FALSE , radixbits=8L , ...) { stopifnot(radixbits %in% c(1L, 2L, 4L, 8L, 16L)) force(x) force(i) if (!is.integer(i)) stop("i must be integer") if (length(i) != length(x)) stop("lengths of x and i don't match") .Call(C_r_ram_integer64_radixsortorder , x = x , i = i , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , radixbits = as.integer(radixbits) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export radixorder.integer64 <- function(x, i, has.na=TRUE, na.last=FALSE, decreasing=FALSE , radixbits=8L , ...) { stopifnot(radixbits %in% c(1L, 2L, 4L, 8L, 16L)) force(x) force(i) if (!is.integer(i)) stop("i must be integer") if (length(i) != length(x)) stop("lengths of x and i don't match") .Call(C_r_ram_integer64_radixorder , x = x , i = i , has_na = as.logical(has.na) , na_last = as.logical(na.last) , decreasing = as.logical(decreasing) , radixbits = as.integer(radixbits) , PACKAGE = "bit64" ) } #' @rdname ramsort.integer64 #' @export ramsort.integer64 <- function (x , has.na = TRUE , na.last=FALSE , decreasing = FALSE , stable = TRUE , optimize = c("time", "memory") , VERBOSE = FALSE , ... ) { optimize <- match.arg(optimize) if (is.null(names(x))){ if (optimize == "time"){ if (length(x)<2048L){ if (VERBOSE) cat("ramsort selected mergesort\n") mergesort(x, has.na = has.na, na.last = na.last, decreasing = decreasing) }else if (length(x)<16777216L){ if (VERBOSE) cat("ramsort selected radix8sort\n") radixsort(x, radixbits=8L, has.na = has.na, na.last = na.last, decreasing = decreasing) }else{ if (VERBOSE) cat("ramsort selected radix4sort\n") radixsort(x, radixbits=4L, has.na = has.na, na.last = na.last, decreasing = decreasing) } }else{ if (VERBOSE) cat("ramsort selected quicksort\n") quicksort(x, has.na = has.na, na.last = na.last, decreasing = decreasing) } }else{ if (stable || optimize == "time"){ i <- seq_along(x) if (length(x)<2048L){ if (VERBOSE) cat("ramsortorder selected mergesortorder\n") ret <- mergesortorder(x, i, has.na = has.na, na.last = na.last, decreasing = decreasing) }else if (length(x)<2097152L){ if (VERBOSE) cat("ramsortorder selected radix8sortorder\n") ret <- radixsortorder(x, i, radixbits=8L, has.na = has.na, na.last = na.last, decreasing = decreasing) }else{ if (VERBOSE) cat("ramsortorder selected radix4sortorder\n") ret <- radixsortorder(x, i, radixbits=4L, has.na = has.na, na.last = na.last, decreasing = decreasing) } }else{ if (VERBOSE) cat("ramsort selected quicksortorder\n") i <- seq_along(x) ret <- quicksortorder(x, i, has.na = has.na, na.last = na.last, decreasing = decreasing) } setattr(x, "names", names(x)[i]) ret } } #' @rdname ramsort.integer64 #' @export ramsortorder.integer64 <- function (x , i , has.na = TRUE , na.last=FALSE , decreasing = FALSE , stable = TRUE , optimize = c("time", "memory") , VERBOSE = FALSE , ... ) { optimize <- match.arg(optimize) if (is.null(names(x)) && is.null(names(i))){ if (stable || optimize == "time") { if (length(x)<2048L){ if (VERBOSE) cat("ramsortorder selected mergesortorder\n") mergesortorder(x, i, has.na = has.na, na.last = na.last, decreasing = decreasing) }else if (length(x)<16777216L){ if (VERBOSE) cat("ramsortorder selected radix8sortorder\n") radixsortorder(x, i, radixbits=8L, has.na = has.na, na.last = na.last, decreasing = decreasing) }else{ if (VERBOSE) cat("ramsortorder selected radix4sortorder\n") radixsortorder(x, i, radixbits=4L, has.na = has.na, na.last = na.last, decreasing = decreasing) } }else{ if (VERBOSE) cat("ramsortorder selected quicksortorder\n") quicksortorder(x, i, has.na = has.na, na.last = na.last, decreasing = decreasing) } }else stop("names not supported") } #' @rdname ramsort.integer64 #' @export ramorder.integer64 <- function (x , i , has.na = TRUE , na.last=FALSE , decreasing = FALSE , stable = TRUE , optimize = c("time", "memory") , VERBOSE = FALSE , ... ) { optimize <- match.arg(optimize) if (is.null(names(x)) && is.null(names(i))){ if (stable) { if (VERBOSE) cat("ramorder selected mergeorder\n") mergeorder(x, i, has.na = has.na, na.last = na.last, decreasing = decreasing) }else{ if (VERBOSE) cat("ramorder selected quickorder\n") quickorder(x, i, has.na = has.na, na.last = na.last, decreasing = decreasing) } }else stop("names not supported") } #' High-level intger64 methods for sorting and ordering #' #' Fast high-level methods for sorting and ordering. These are wrappers to #' [ramsort.integer64()] and friends and do not modify their arguments. #' #' @param x a vector to be sorted by [ramsort.integer64()] and #' [ramsortorder.integer64()], i.e. the output of [sort.integer64()] #' @param has.na boolean scalar defining whether the input vector might #' contain `NA`s. If we know we don't have NAs, this may speed-up. _Note_ #' that you risk a crash if there are unexpected `NA`s with `has.na=FALSE` #' @param na.last boolean scalar telling ramsort whether to sort `NA`s last #' or first. _Note_ that 'boolean' means that there is no third option #' `NA` as in [sort()] #' @param decreasing boolean scalar telling ramsort whether to sort #' increasing or decreasing #' @param stable boolean scalar defining whether stable sorting is needed. #' Allowing non-stable may speed-up. #' @param optimize by default ramsort optimizes for 'time' which requires #' more RAM, set to 'memory' to minimize RAM requirements and sacrifice speed #' @param VERBOSE cat some info about chosen method #' @param ... further arguments, passed from generics, ignored in methods #' #' @details see [sort()] and [order()] #' @return `sort` returns the sorted vector and `vector` returns the order positions. #' @keywords programming manip #' @seealso [`sort()`][sort.integer64], [sortcache()] #' @examples #' x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) #' x #' sort(x) #' message("the following has default optimize='time' which is faster but requires more RAM #' , this calls 'ramorder'") #' order.integer64(x) #' message("slower with less RAM, this calls 'ramsortorder'") #' order.integer64(x, optimize="memory") #' @name sort.integer64 NULL # nocov start if (FALSE){ library(bit64) x <- as.integer64(c(sample.int(10000000L),NA)) #system.time(sortcache(x))[3] # system.time(ordercache(x))[3] system.time(sortordercache(x))[3L] # system.time(s <- sort(x, na.last=FALSE, decreasing=FALSE))[3] # stopifnot(identical(s, {xs<-clone(x);ramsort(xs, na.last=FALSE, decreasing=FALSE);xs})) # system.time(s <- sort(x, na.last=TRUE, decreasing=FALSE))[3] # stopifnot(identical(s, {xs<-clone(x);ramsort(xs, na.last=TRUE, decreasing=FALSE);xs})) # system.time(s <- sort(x, na.last=FALSE, decreasing=TRUE))[3] # stopifnot(identical(s, {xs<-clone(x);ramsort(xs, na.last=FALSE, decreasing=TRUE);xs})) # system.time(s <- sort(x, na.last=TRUE, decreasing=TRUE))[3] # stopifnot(identical(s, {xs<-clone(x);ramsort(xs, na.last=TRUE, decreasing=TRUE);xs})) system.time(o <- order.integer64(x, na.last=FALSE, decreasing=FALSE))[3L] stopifnot(identical(o, {xo<-seq_along(x);ramorder(x, xo, na.last=FALSE, decreasing=FALSE);xo})) system.time(o <- order.integer64(x, na.last=TRUE, decreasing=FALSE))[3L] stopifnot(identical(o, {xo<-seq_along(x);ramorder(x, xo, na.last=TRUE, decreasing=FALSE);xo})) system.time(o <- order.integer64(x, na.last=FALSE, decreasing=TRUE))[3L] stopifnot(identical(o, {xo<-seq_along(x);ramorder(x, xo, na.last=FALSE, decreasing=TRUE);xo})) system.time(o <- order.integer64(x, na.last=TRUE, decreasing=TRUE))[3L] stopifnot(identical(o, {xo<-seq_along(x);ramorder(x, xo, na.last=TRUE, decreasing=TRUE);xo})) } # nocov end #' @rdname sort.integer64 #' @export sort.integer64 <- function(x , decreasing = FALSE , has.na = TRUE , na.last = TRUE , stable = TRUE , optimize = c("time", "memory") , VERBOSE = FALSE , ... ){ do.na.last <- is.na(na.last) || na.last cache_env <- cache(x) if (!is.null(cache_env$sort)){ if (do.na.last || decreasing){ s <- double(length(x)) .Call(C_r_ram_integer64_sortsrt , x = cache_env$sort , na_count = as.integer(na.count <- cache_env$na.count) , na_last = as.logical(do.na.last) , decreasing = as.logical(decreasing) , s = s , PACKAGE = "bit64" ) setattr(s, "class", "integer64") }else s <- cache_env$sort # here we save copying at all }else if (!is.null(cache_env$order)){ if (do.na.last || decreasing){ s <- double(length(x)) .Call(C_r_ram_integer64_sortsrt , x = x[cache_env$order] , na_count = as.integer(na.count <- cache_env$na.count) , na_last = as.logical(do.na.last) , decreasing = as.logical(decreasing) , s = s , PACKAGE = "bit64" ) setattr(s, "class", "integer64") }else s <- x[cache_env$order] }else{ if (identical(cache_env$na.count, 0L)) has.na <- FALSE s <- clone(x) na.count <- ramsort( s , has.na=has.na , na.last=do.na.last , decreasing=decreasing , stable=stable , optimize = optimize , VERBOSE = FALSE ) } if (is.na(na.last) && na.count) length(s) <- length(s) - na.count s } #' @rdname sort.integer64 #' @export order.integer64 <- function( ... , na.last = TRUE , decreasing = FALSE , has.na = TRUE , stable = TRUE , optimize = c("time", "memory") , VERBOSE = FALSE ){ do.na.last <- is.na(na.last) || na.last # COPY ON MODIFY is broken for reading from list(...) # because list(...) creates a copy of all ... and this invalidates our caches # therefore we go this sick workaround argsymbols <- as.list(substitute(list(...)))[-1L] argframe <- parent.frame() A <- function(i)eval(argsymbols[[i]], argframe) N <- length(argsymbols) if (N!=1L) stop("can only order one vector at the moment") x <- A(1L) cache_env <- cache(x) if (!is.null(cache_env$order)){ if (do.na.last || decreasing){ o <- integer(length(x)) if (is.null(cache_env$sort)){ .Call(C_r_ram_integer64_orderord , x = x , i = cache_env$order , na_count = as.integer(na.count <- cache_env$na.count) , na_last = as.logical(do.na.last) , decreasing = as.logical(decreasing) , o = o , PACKAGE = "bit64" ) }else{ .Call(C_r_ram_integer64_sortorderord , x = cache_env$sort , i = cache_env$order , na_count = as.integer(na.count <- cache_env$na.count) , na_last = as.logical(do.na.last) , decreasing = as.logical(decreasing) , o = o , PACKAGE = "bit64" ) } }else o <- cache_env$order # here we save copying at all }else{ if (identical(cache_env$na.count, 0L)) has.na <- FALSE optimize <- match.arg(optimize) o <- seq_along(x) if (optimize=="time"){ s <- clone(x) na.count <- ramsortorder(s, o , has.na=has.na , na.last=do.na.last , decreasing=decreasing , stable=stable , optimize = optimize , VERBOSE = FALSE ) }else{ na.count <- ramorder(x, o , has.na=has.na , na.last=do.na.last , decreasing=decreasing , stable=stable , optimize = optimize , VERBOSE = FALSE ) } } if (is.na(na.last) && na.count) length(o) <- length(o) - na.count o } bit64/R/sortuse64.R0000644000176200001440000004124014705122715013411 0ustar liggesusers# /* # R-Code for searching and merging # S3 atomic 64bit integers for R # (c) 2011 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2011-12-11 # Last changed: 2011-12-11 # */ #' Searching and other uses of sorting for 64bit integers #' #' This is roughly an implementation of hash functionality but based on sorting #' instead on a hashmap. Since sorting is more informative than hashing we #' can do some more interesting things. #' #' @param sorted a sorted [`integer64`] vector #' @param ... further arguments, passed from generics, ignored in methods #' @param method see Details #' #' @details #' #' | **sortfun** | **orderfun** | **sortorderfun** | **see also** | **description** | #' |------------:|-------------:|-----------------:|-------------------:|:----------------| #' | `sortnut` | `ordernut` | | | return number of tied and of unique values | #' | `sortfin` | `orderfin` | | [`%in%.integer64`] | return logical whether `x` is in `table` | #' | | `orderpos` | `sortorderpos` | [`match()`][match.integer64] | return positions of `x` in `table` | #' | | `orderdup` | `sortorderdup` | [`duplicated()`][duplicated.integer64] | return logical whether values are duplicated | #' | `sortuni` | `orderuni` | `sortorderuni` | [`unique()`][unique.integer64] | return unique values (=dimensiontable) | #' | | `orderupo` | `sortorderupo` | [`unique()`][unique.integer64] | return positions of unique values | #' | | `ordertie` | `sortordertie` | | return positions of tied values | #' | | `orderkey` | `sortorderkey` | | positions of values in vector of unique values (match in dimensiontable) | #' | `sorttab` | `ordertab` | `sortordertab` | [`table()`][table.integer64] | tabulate frequency of values | #' | | `orderrnk` | `sortorderrnk` | | rank averaging ties | #' | `sortqtl` | `orderqtl` | | | return quantiles given probabilities | #' #' The functions `sortfin`, `orderfin`, `orderpos` and `sortorderpos` each #' offer three algorithms for finding `x` in `table`. #' #' With `method=1L` each value of `x` is searched independently using #' _binary search_, this is fastest for small `table`s. #' #' With `method=2L` the values of `x` are first sorted and then searched using #' _doubly exponential search_, this is the best allround method. #' #' With `method=3L` the values of `x` are first sorted and then searched using #' simple merging, this is the fastest method if `table` is huge and `x` has #' similar size and distribution of values. #' #' With `method=NULL` the functions use a heuristic to determine the fastest #' algorithm. #' #' The functions `orderdup` and `sortorderdup` each offer two algorithms for #' setting the truth values in the return vector. #' #' With `method=1L` the return values are set directly which causes random #' write access on a possibly large return vector. #' #' With `method=2L` the return values are first set in a smaller bit-vector -- #' random access limited to a smaller memory region -- and finally written #' sequentially to the logical output vector. #' #' With `method=NULL` the functions use a heuristic to determine the fastest #' algorithm. #' #' @return see details #' @keywords programming manip #' @seealso [`match()`][match.integer64] #' @examples #' message("check the code of 'optimizer64' for examples:") #' print(optimizer64) #' @export sortnut <- function(sorted, ...) UseMethod("sortnut") #' @rdname sortnut #' @export sortnut.integer64 <- function(sorted, ...) { ret <- .Call(C_r_ram_integer64_sortnut, x = sorted, PACKAGE = "bit64") names(ret) <- c("nunique","nties") ret } #' @rdname sortnut #' @param table the original data with original order under the sorted vector #' @param order an [`integer`] order vector that turns 'table' into 'sorted' #' @export ordernut <- function(table, order, ...) UseMethod("ordernut") #' @rdname sortnut #' @export ordernut.integer64 <- function(table, order, ...) { ret <- .Call(C_r_ram_integer64_ordernut, table = as.integer64(table), order = as.integer(order), PACKAGE = "bit64") names(ret) <- c("nunique","nties") ret } #' @rdname sortnut #' @param x an [`integer64`] vector #' @export sortfin <- function(sorted, x, ...) UseMethod("sortfin") #' @rdname sortnut #' @export sortfin.integer64 <- function(sorted, x, method=NULL, ...) { n <- length(x) if (is.null(method)){ if (n<2048L){ method <- 1L }else if (n require(rhub) # > rhub_setup() # > rhub_doctor() # > rhub_check(platforms = c("linux", "macos", "windows", "ubuntu-clang", "ubuntu-gcc12")) # ✔ Found git repository at /home/jo/SIK/truecluster/bit64. # ✔ Found GitHub PAT. # ✔ Check started: linux, macos, windows, ubuntu-clang, ubuntu-gcc12 (aspherical-sphinx). # See for live output! .onLoad <- function(lib, pkg) { ##library.dynam("bit64", pkg, lib) use useDynLib(bit) in NAMESPACE instead ##packageStartupMessage("Loading package bit64 ", packageDescription("bit64", fields="Version")) } .onAttach <- function(libname, pkgname){ packageStartupMessage("Attaching package bit64") packageStartupMessage("package:bit64 (c) 2011-2017 Jens Oehlschlaegel") packageStartupMessage("creators: integer64 runif64 seq :") packageStartupMessage("coercion: as.integer64 as.vector as.logical as.integer as.double as.character as.bitstring") packageStartupMessage("logical operator: ! & | xor != == < <= >= >") packageStartupMessage("arithmetic operator: + - * / %/% %% ^") packageStartupMessage("math: sign abs sqrt log log2 log10") packageStartupMessage("math: floor ceiling trunc round") packageStartupMessage("querying: is.integer64 is.vector [is.atomic} [length] format print str") packageStartupMessage("values: is.na is.nan is.finite is.infinite") packageStartupMessage("aggregation: any all min max range sum prod") packageStartupMessage("cumulation: diff cummin cummax cumsum cumprod") packageStartupMessage("access: length<- [ [<- [[ [[<-") packageStartupMessage("combine: c rep cbind rbind as.data.frame") packageStartupMessage("WARNING don't use as subscripts") packageStartupMessage("WARNING semantics differ from integer") packageStartupMessage("for more help type ?bit64") } # nocov start .onUnload <- function(libpath){ packageStartupMessage("Unloading package bit64") library.dynam.unload("bit64", libpath) } # nocov end bit64/R/patch64.R0000644000176200001440000001017514706047365013017 0ustar liggesusers# /* # R-Code for patching S3 generics # S3 atomic 64bit integers for R # (c) 2011 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2011-12-11 # Last changed: 2011-12-11 # */ #' Turning base R functions into S3 generics for bit64 #' #' Turn those base functions S3 generic which are used in bit64 #' #' @usage #' from:to #' is.double(x) #' match(x, table, ...) #' x \%in\% table #' rank(x, ...) #' order(...) #' @aliases bit64S3 : :.default :.integer64 is.double is.double.default #' is.double.integer64 match match.default %in% %in%.default rank #' rank.default order order.default # @aliases table table.default #' @param x integer64 vector: the values to be matched, optionally carrying a #' cache created with [hashcache()] #' @param table integer64 vector: the values to be matched against, optionally #' carrying a cache created with [hashcache()] or [sortordercache()] #' @param from scalar denoting first element of sequence #' @param to scalar denoting last element of sequence #' @param ... ignored #' #' @details #' The following functions are turned into S3 generics in order to dispatch #' methods for [integer64()]: #' #' - [`:`] #' - [is.double()] #' - [match()] #' - [`%in%`] # - [table()] #' - [rank()] #' - [order()] #' #' @return [invisible()] #' @note #' - [is.double()] returns `FALSE` for [`integer64`] #' - [`:`] currently only dispatches at its first argument, thus #' `as.integer64(1):9` works but `1:as.integer64(9)` doesn't #' - [match()] currently only dispatches at its first argument and expects #' its second argument also to be integer64, otherwise throws an error. #' Beware of something like `match(2, as.integer64(0:3))` #' - [`%in%`] currently only dispatches at its first argument and expects #' its second argument also to be integer64, otherwise throws an error. #' Beware of something like `2 %in% as.integer64(0:3)` #' - [order()] currently only orders a single argument, trying more than #' one raises an error #' #' @seealso [bit64()], [S3] #' #' @examples #' is.double(as.integer64(1)) #' as.integer64(1):9 #' match(as.integer64(2), as.integer64(0:3)) #' as.integer64(2) %in% as.integer64(0:3) #' #' unique(as.integer64(c(1,1,2))) #' rank(as.integer64(c(1,1,2))) #' # %table(as.integer64(c(1,1,2))) # %table(as.integer64(c(1,1,2)),as.integer64(c(3,4,4))) # %table(as.integer64(c(1,1,2)),c(3,4,4)) # %table(c(1,1,2),as.integer64(c(3,4,4))) #' #' order(as.integer64(c(1,NA,2))) #' @keywords methods #' @name bit64S3 NULL # OCT 2013: bit64S3() at wish of CRAN maintainers replaced by direct conversion to S3 generics # in order to avoid assigning to globalenv `:` <- function(from,to) UseMethod(":") #' @export `:.default` <- function(from,to) base::`:`(from,to) #' @export `:.integer64` <- function(from, to)seq.integer64(from=from, to=to) is.double <- function(x) UseMethod("is.double") #' @rdname bit64S3 #' @export is.double.default <- function(x) base::is.double(x) #' @rdname bit64S3 #' @export is.double.integer64 <- function(x)FALSE # TODO(R>=4.2.0): Remove workarounds for match(). Needed for #85 and #111. #' @rdname bit64S3 #' @rawNamespace if (getRversion() >= "4.2.0") S3method(mtfrm,integer64) mtfrm.integer64 = function(x) as.character(x) match <- function(x, table, ...) UseMethod("match") #' @rdname bit64S3 #' @export match.default <- function(x, table, ...) { if (!exists("mtfrm", baseenv()) && is.integer64(table)) base::match(as.character(x), as.character(table), ...) # nocov else base::match(x, table, ...) } `%in%` <- function(x, table) UseMethod("%in%") #' @rdname bit64S3 #' @export `%in%.default` <- function(x, table) base::`%in%`(x, table) rank <- function(x, ...) UseMethod("rank") #' @rdname bit64S3 #' @export rank.default <- function(x, ...) base::rank(x, ...) # not yet able to combinewith other column types - better leave table() as is and hope for as.factor.integer64 #if (!exists("table.default")){ # "table" <- function(...) UseMethod("table") # "table.default" <- function(...) base::"table"(...) #} order <- function(...) UseMethod("order") #' @rdname bit64S3 #' @export order.default <- function(...) base::order(...) bit64/R/integer64.R0000644000176200001440000014613614705642333013357 0ustar liggesusers# /* # R-Code # S3 atomic 64bit integers for R # (c) 2011 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2011-12-11 # Last changed: 2011-12-11 #*/ #' Identity function for class 'integer64' #' #' This will discover any deviation between objects containing integer64 vectors. #' #' This is simply a wrapper to [identical()] with default arguments #' `num.eq = FALSE, single.NA = FALSE`. #' #' @param x,y Atomic vector of class 'integer64' #' @param num.eq,single.NA,attrib.as.set,ignore.bytecode,ignore.environment,ignore.srcref #' See [identical()]. #' @param ... Passed on to `identical()`. Only `extptr.as.ref=` is available as of R 4.4.1, #' and then only for versions of R >= 4.2.0. #' #' @return A single logical value, `TRUE` or `FALSE`, never `NA` and never #' anything other than a single value. #' @keywords classes manip #' @seealso [`==.integer64`] [identical()] [integer64()] #' @examples #' i64 <- as.double(NA); class(i64) <- "integer64" #' identical(i64-1, i64+1) #' identical.integer64(i64-1, i64+1) #' @name identical.integer64 NULL #' Coerce from integer64 #' #' Methods to coerce integer64 to other atomic types. 'as.bitstring' coerces #' to a human-readable bit representation (strings of zeroes and ones). #' The methods [format()], [as.character()], [as.double()], #' [as.logical()], [as.integer()] do what you would expect. #' #' @param x an integer64 vector #' @param keep.names FALSE, set to TRUE to keep a names vector #' @param ... further arguments to the [NextMethod()] #' #' @return `as.bitstring` returns a string of class 'bitstring'. #' #' The other methods return atomic vectors of the expected types #' #' @keywords classes manip #' @seealso [as.integer64.character()] [integer64()] #' @examples #' as.character(lim.integer64()) #' as.bitstring(lim.integer64()) #' as.bitstring(as.integer64(c( #' -2,-1,NA,0:2 #' ))) #' @name as.character.integer64 NULL #' Coerce to integer64 #' #' Methods to coerce from other atomic types to integer64. #' #' @param x an atomic vector #' @param keep.names FALSE, set to TRUE to keep a names vector #' @param ... further arguments to the [NextMethod()] #' #' @details #' `as.integer64.character` is realized using C function `strtoll` which #' does not support scientific notation. Instead of '1e6' use '1000000'. #' `as.integer64.bitstring` evaluates characters '0' and ' ' as zero-bit, #' all other one byte characters as one-bit, multi-byte characters are not allowed, #' strings shorter than 64 characters are treated as if they were left-padded with '0', #' strings longer than 64 bytes are mapped to `NA_INTEGER64` and a warning is emitted. #' #' @return The other methods return atomic vectors of the expected types #' #' @keywords classes manip #' @seealso [as.character.integer64()] [integer64()] #' @examples #' as.integer64(as.character(lim.integer64())) #' as.integer64( #' structure(c("1111111111111111111111111111111111111111111111111111111111111110", #' "1111111111111111111111111111111111111111111111111111111111111111", #' "1000000000000000000000000000000000000000000000000000000000000000", #' "0000000000000000000000000000000000000000000000000000000000000000", #' "0000000000000000000000000000000000000000000000000000000000000001", #' "0000000000000000000000000000000000000000000000000000000000000010" #' ), class = "bitstring") #' ) #' as.integer64( #' structure(c("............................................................... ", #' "................................................................", #' ". ", #' "", #' ".", #' "10" #' ), class = "bitstring") #' ) #' @name as.integer64.character NULL #' Extract or Replace Parts of an integer64 vector #' #' Methods to extract and replace parts of an integer64 vector. #' #' @param x an atomic vector #' @param i indices specifying elements to extract #' @param value an atomic vector with values to be assigned #' @param ... further arguments to the [NextMethod()] #' #' @note #' You should not subscript non-existing elements and not use `NA`s as subscripts. #' The current implementation returns `9218868437227407266` instead of `NA`. #' @returns A vector or scalar of class 'integer64' #' @keywords classes manip #' @seealso [`[`][base::Extract] [integer64()] #' @examples #' as.integer64(1:12)[1:3] #' x <- as.integer64(1:12) #' dim(x) <- c(3,4) #' x #' x[] #' x[,2:3] #' @name extract.replace.integer64 NULL #' Unary operators and functions for integer64 vectors #' #' Unary operators and functions for integer64 vectors. #' #' @param x an atomic vector of class 'integer64' #' @param base an atomic scalar (we save 50% log-calls by not allowing #' a vector base) #' @param digits integer indicating the number of decimal places (round) #' or significant digits (signif) to be used. Negative values are allowed #' (see [round()]) #' @param justify should it be right-justified (the default), left-justified, #' centred or left alone. #' @param center see [scale()] #' @param scale see [scale()] #' @param ... further arguments to the [NextMethod()] #' #' @returns #' [format()] returns a character vector #' #' [is.na()] and [`!`] return a logical vector #' #' [sqrt()], [log()], [log2()] and [log10()] return a double vector #' #' [sign()], [abs()], [floor()], [ceiling()], [trunc()] and #' [round()] return a vector of class 'integer64' #' #' [signif()] is not implemented #' #' @keywords classes manip #' @seealso [xor.integer64()] [integer64()] #' @examples #' sqrt(as.integer64(1:12)) #' @name format.integer64 NULL #' Binary operators for integer64 vectors #' #' Binary operators for integer64 vectors. #' #' @param e1 an atomic vector of class 'integer64' #' @param e2 an atomic vector of class 'integer64' #' @param x an atomic vector of class 'integer64' #' @param y an atomic vector of class 'integer64' #' #' @returns #' [`&`], [`|`], [xor()], [`!=`], [`==`], #' [`<`], [`<=`], [`>`], [`>=`] return a logical vector #' #' [`^`] and [`/`] return a double vector #' #' [`+`], [`-`], [`*`], [`%/%`], [`%%`] #' return a vector of class 'integer64' #' #' @keywords classes manip #' @seealso [format.integer64()] [integer64()] #' @examples #' as.integer64(1:12) - 1 #' options(integer64_semantics="new") #' d <- 2.5 #' i <- as.integer64(5) #' d/i # new 0.5 #' d*i # new 13 #' i*d # new 13 #' options(integer64_semantics="old") #' d/i # old: 0.4 #' d*i # old: 10 #' i*d # old: 13 #' @name xor.integer64 NULL #' Summary functions for integer64 vectors #' #' Summary functions for integer64 vectors. Function 'range' without arguments #' returns the smallest and largest value of the 'integer64' class. #' #' @param ... atomic vectors of class 'integer64' #' @param na.rm logical scalar indicating whether to ignore NAs #' @param finite logical scalar indicating whether to ignore NAs (just for #' compatibility with [range.default()]) #' #' @details #' The numerical summary methods always return `integer64`. Therefore the #' methods for `min`,`max` and `range` do not return `+Inf,-Inf` on empty #' arguments, but `+9223372036854775807, -9223372036854775807` (in this sequence). #' The same is true if only `NA`s are submitted with argument `na.rm=TRUE`. #' #' `lim.integer64` returns these limits in proper order #' `-9223372036854775807, +9223372036854775807` and without a [warning()]. #' #' @returns #' [all()] and [any()] return a logical scalar #' #' [range()] returns a integer64 vector with two elements #' #' [min()], [max()], [sum()] and [prod()] return a integer64 scalar #' #' @keywords classes manip #' @seealso [mean.integer64()] [cumsum.integer64()] [integer64()] #' @examples #' lim.integer64() #' range(as.integer64(1:12)) #' @name sum.integer64 NULL #' Cumulative Sums, Products, Extremes and lagged differences #' #' Cumulative Sums, Products, Extremes and lagged differences #' #' @param x an atomic vector of class 'integer64' #' @param lag see [diff()] #' @param differences see [diff()] #' @param ... ignored #' #' @returns #' [cummin()], [cummax()] , [cumsum()] and [cumprod()] #' return a integer64 vector of the same length as their input #' #' [diff()] returns a integer64 vector shorter by `lag*differences` elements #' #' @keywords classes manip #' @seealso [sum.integer64()] [integer64()] #' @examples #' cumsum(rep(as.integer64(1), 12)) #' diff(as.integer64(c(0,1:12))) #' cumsum(as.integer64(c(0, 1:12))) #' diff(cumsum(as.integer64(c(0,0,1:12))), differences=2) #' @name cumsum.integer64 NULL #' Concatenating integer64 vectors #' #' The ususal functions 'c', 'cbind' and 'rbind' #' #' @param ... two or more arguments coerced to 'integer64' and #' passed to [NextMethod()] #' @param recursive logical. If `recursive = TRUE`, the function #' recursively descends through lists (and pairlists) combining all #' their elements into a vector. #' #' @returns #' [c()] returns a integer64 vector of the total length of the input #' #' [cbind()] and [rbind()] return a integer64 matrix #' #' @note #' R currently only dispatches generic 'c' to method 'c.integer64' if the #' first argument is 'integer64' #' #' @keywords classes manip #' @seealso [rep.integer64()] [seq.integer64()] [as.data.frame.integer64()] #' [integer64()] #' #' @examples #' c(as.integer64(1), 2:6) #' cbind(1:6, as.integer(1:6)) #' rbind(1:6, as.integer(1:6)) #' @name c.integer64 NULL #' Replicate elements of integer64 vectors #' #' Replicate elements of integer64 vectors #' #' @param x a vector of 'integer64' to be replicated #' @param ... further arguments passed to [NextMethod()] #' #' @returns [rep()] returns a integer64 vector #' @keywords classes manip #' @seealso [c.integer64()] [rep.integer64()] #' [as.data.frame.integer64()] [integer64()] #' #' @examples #' rep(as.integer64(1:2), 6) #' rep(as.integer64(1:2), c(6,6)) #' rep(as.integer64(1:2), length.out=6) #' @name rep.integer64 NULL #' integer64: Sequence Generation #' #' Generating sequence of integer64 values #' #' @param from integer64 scalar (in order to dispatch the integer64 method of [seq()] #' @param to scalar #' @param by scalar #' @param length.out scalar #' @param along.with scalar #' @param ... ignored #' @details #' `seq.integer64` does coerce its arguments 'from', 'to' and 'by' to `integer64`. #' If not provided, the argument 'by' is automatically determined as `+1` or `-1`, #' but the size of 'by' is not calculated as in [seq()] (because this might result #' in a non-integer value). #' #' @returns an integer64 vector with the generated sequence #' @note #' In base R [`:`] currently is not generic and does not dispatch, see section #' "Limitations inherited from Base R" in [integer64()] #' #' @keywords classes manip #' @seealso [c.integer64()] [rep.integer64()] #' [as.data.frame.integer64()] [integer64()] #' @examples #' # colon not activated: as.integer64(1):12 #' seq(as.integer64(1), 12, 2) #' seq(as.integer64(1), by=2, length.out=6) #' @name seq.integer64 NULL #' integer64: Coercing to data.frame column #' #' Coercing integer64 vector to data.frame. #' #' @param x an integer64 vector #' @param ... passed to NextMethod [as.data.frame()] after removing the #' 'integer64' class attribute #' #' @returns a one-column data.frame containing an integer64 vector #' @details #' 'as.data.frame.integer64' is rather not intended to be called directly, #' but it is required to allow integer64 as data.frame columns. #' @note This is currently very slow -- any ideas for improvement? #' @keywords classes manip #' @seealso #' [cbind.integer64()] [integer64()] # as.vector.integer64 removed as requested by the CRAN maintainer [as.vector.integer64()] #' @examples #' as.data.frame.integer64(as.integer64(1:12)) #' data.frame(a=1:12, b=as.integer64(1:12)) #' @name as.data.frame.integer64 NULL #' integer64: Maintaining S3 class attribute #' #' Maintaining integer64 S3 class attribute. #' #' @param class NULL or a character vector of class attributes #' @param whichclass the (single) class name to add or remove from the class vector #' #' @returns NULL or a character vector of class attributes #' #' @keywords classes manip internal #' @seealso [oldClass()] [integer64()] #' @examples #' plusclass("inheritingclass","integer64") #' minusclass(c("inheritingclass","integer64"), "integer64") #' @name plusclass NULL #' Test if two integer64 vectors are all.equal #' #' A utility to compare integer64 objects 'x' and 'y' testing for #' ‘near equality’, see [all.equal()]. #' #' @param target a vector of 'integer64' or an object that can be coerced #' with [as.integer64()] #' @param current a vector of 'integer64' or an object that can be coerced #' with [as.integer64()] #' @param tolerance numeric > 0. Differences smaller than `tolerance` are #' not reported. The default value is close to `1.5e-8`. #' @param scale `NULL` or numeric > 0, typically of length 1 or #' `length(target)`. See Details. #' @param countEQ logical indicating if the `target == current` cases should #' be counted when computing the mean (absolute or relative) differences. #' The default, `FALSE` may seem misleading in cases where `target` and #' `current` only differ in a few places; see the extensive example. #' @param formatFUN a [function()] of two arguments, `err`, the relative, #' absolute or scaled error, and `what`, a character string indicating the #' _kind_ of error; maybe used, e.g., to format relative and absolute errors #' differently. #' @param ... further arguments are ignored #' @param check.attributes logical indicating if the [attributes()] of `target` #' and `current` (other than the names) should be compared. #' #' @returns #' Either ‘TRUE’ (‘NULL’ for ‘attr.all.equal’) or a vector of ‘mode’ #' ‘"character"’ describing the differences between ‘target’ and #' ‘current’. #' #' @details #' In [all.equal.numeric()] the type `integer` is treated as a proper subset #' of `double` i.e. does not complain about comparing `integer` with `double`. #' Following this logic `all.equal.integer64` treats `integer` as a proper #' subset of `integer64` and does not complain about comparing `integer` with #' `integer64`. `double` also compares without warning as long as the values #' are within [lim.integer64()], if `double` are bigger `all.equal.integer64` #' complains about the `all.equal.integer64 overflow warning`. For further #' details see [all.equal()]. #' #' @note #' [all.equal()] only dispatches to this method if the first argument is `integer64`, #' calling [all.equal()] with a `non-integer64` first and a `integer64` second argument #' gives undefined behavior! #' #' @seealso [all.equal()] #' @examples #' all.equal(as.integer64(1:10), as.integer64(0:9)) #' all.equal(as.integer64(1:10), as.integer(1:10)) #' all.equal(as.integer64(1:10), as.double(1:10)) #' all.equal(as.integer64(1), as.double(1e300)) #' @name all.equal.integer64 NULL # if (!exists(":.default")){ # ":.default" <- get(":") # ":" <- function(from,to)UseMethod(":") # } setOldClass("integer64") # contributed by Leonardo Silvestri with modifications of JO #' @rdname all.equal.integer64 #' @method all.equal integer64 #' @exportS3Method all.equal integer64 all.equal.integer64 <- function ( target , current , tolerance = sqrt(.Machine$double.eps) , scale = NULL , countEQ = FALSE , formatFUN = function(err, what) format(err) , ... , check.attributes = TRUE ) { if (!is.numeric(tolerance)) stop("'tolerance' should be numeric") if (!is.numeric(scale) && !is.null(scale)) stop("'scale' should be numeric or NULL") if (!is.logical(check.attributes)) stop(gettextf("'%s' must be logical", "check.attributes"), domain = NA) # JO: BEGIN respect that integer is a proper subset of integer64 like integer is a proper subset of double oldwarn <- getOption("warn") on.exit(options(warn=oldwarn)) options(warn=2L) if (!is.integer64(target)){ cl <- oldClass(target) oldClass(target) <- NULL target <- try(as.integer64(target)) if (inherits(target, 'try-error')) return(paste("while coercing 'target' to 'integer64':", attr(target, "condition")$message)) oldClass(target) <- c(cl, "integer64") } if (!is.integer64(current)){ cl <- oldClass(current) oldClass(current) <- NULL current <- try(as.integer64(current)) if (inherits(current, 'try-error')) return(paste("while coercing 'current' to 'integer64':", attr(current, "condition")$message)) oldClass(current) <- c(cl, "integer64") } # JO: END respect that integer is a proper subset of integer64 like integer is a proper subset of double msg <- NULL msg <- if (check.attributes) attr.all.equal(target, current, tolerance = tolerance, scale = scale, ...) if (data.class(target) != data.class(current)) { msg <- c(msg, paste0("target is ", data.class(target), ", current is ", data.class(current))) return(msg) } lt <- length(target) lc <- length(current) if (lt != lc) { if (!is.null(msg)) msg <- msg[-grep("\\bLengths\\b", msg)] msg <- c(msg, paste0("integer64: lengths (", lt, ", ", lc, ") differ")) return(msg) } out <- is.na(target) if (any(out != is.na(current))) { msg <- c(msg, paste("'is.NA' value mismatch:", sum(is.na(current)), "in current", sum(out), "in target")) return(msg) } out <- out | target == current if (all(out)) return(if (is.null(msg)) TRUE else msg) anyO <- any(out) sabst0 <- if (countEQ && anyO) mean(abs(target[out])) else 0.0 if (anyO) { keep <- which(!out) target <- target [keep] current <- current[keep] if(!is.null(scale) && length(scale) > 1L) { # TODO(R>=4.0.0): Try removing this ocl part when rep() dispatching WAI on all versions (#100) ocl = class(scale) scale = rep_len(scale, length(out))[keep] class(scale) = ocl } } N <- length(target) what <- if (is.null(scale)) { scale <- sabst0 + sum(abs(target)/N) if (is.finite(scale) && scale > tolerance) { "relative" } else { scale <- 1.0 "absolute" } } else { stopifnot(scale > 0.0) if (all(abs(scale - 1.0) < 1e-07)) "absolute" else "scaled" } xy <- sum(abs(target - current)/(N*scale)) if (is.na(xy) || xy > tolerance) msg <- c(msg, paste("Mean", what, "difference:", formatFUN(xy, what))) if (is.null(msg)) { TRUE } else msg } # nocov start if (FALSE){ require(bit64) a <- as.integer64(1L) b <- 10L oldClass(a) <- c("j", oldClass(a)) oldClass(b) <- c("j", oldClass(b)) all.equal(a,b) a <- 1.0 b <- 10L oldClass(a) <- c("j", oldClass(a)) oldClass(b) <- c("j", oldClass(b)) all.equal(a,b) a <- as.integer64(9e17) b <- 9e18 oldClass(a) <- c("j", oldClass(a)) oldClass(b) <- c("j", oldClass(b)) all.equal(a,b) a <- as.integer64(9e18) b <- 9e19 oldClass(a) <- c("j", oldClass(a)) oldClass(b) <- c("j", oldClass(b)) all.equal(a,b) a <- as.integer64(c(1L,NA)) b <- as.integer(c(1L,NA)) all.equal(a,b) a <- as.integer64(c(1L,NA)) b <- as.double(c(1L,NA)) all.equal(a,b) a <- as.integer64(c(1.0,Inf)) b <- as.integer(c(1.0,Inf)) all.equal(a,b) a <- as.integer64(c(1.0,Inf)) b <- as.double(c(1.0,Inf)) all.equal(a,b) } # nocov end # TODO(R>=4.2.0): Consider restoring extptr.as.ref= to the signature. #' @rdname identical.integer64 #' @exportS3Method identical integer64 #' @export identical.integer64 <- function(x, y , num.eq = FALSE , single.NA = FALSE , attrib.as.set = TRUE , ignore.bytecode = TRUE , ignore.environment = FALSE , ignore.srcref = TRUE , ... ) identical(x=x, y=y , num.eq = num.eq , single.NA = single.NA , attrib.as.set = attrib.as.set , ignore.bytecode = ignore.bytecode , ignore.environment = ignore.environment , ignore.srcref = ignore.srcref , ... ) #' @rdname as.integer64.character #' @export as.integer64 <- function (x, ...) UseMethod("as.integer64") #' @rdname as.character.integer64 #' @export as.bitstring <- function(x, ...) UseMethod("as.bitstring") #' @rdname plusclass #' @export minusclass <- function(class, whichclass){ if (length(class)){ i <- whichclass==class if (any(i)) class[!i] else class }else class } #' @export plusclass <- function(class, whichclass){ if (length(class)){ i <- whichclass==class if (any(i)) class else c(class, whichclass) }else whichclass } # nocov start if (FALSE){ # version until 0.9-7 binattr <- function(e1, e2) { d1 <- dim(e1) d2 <- dim(e2) n1 <- length(e1) n2 <- length(e2) if (length(d1)) { if (length(d2)) { if (!identical(dim(e1),dim(e2))) stop("non-conformable arrays") } else { if (n2>n1) stop("length(e2) does not match dim(e1)") if (n1%%n2) warning("length(e1) not a multiple length(e2)") } attributes(e1) } else if (length(d2)) { if (n1>n2) stop("length(e1) does not match dim(n2)") if (n2%%n1) warning("length(e2) not a multiple length(e1)") attributes(e2) } else { if (n1 n1 && n1) stop("length(e2) does not match dim(e1)") if (n2 && n1 %% n2) warning("length(e1) not a multiple length(e2)") } } else if (length(d2)) { if (n1 > n2 && n2) stop("length(e1) does not match dim(n2)") if (n1 && n2 %% n1) warning("length(e2) not a multiple length(e1)") } else { # nolint next: unnecessary_nesting_linter. Good parallelism. if (n1 < n2 && n1) { if (n1 && n2 %% n1) warning("length(e2) not a multiple length(e1)") } else { # nolint next: unnecessary_nesting_linter. Good parallelism. if (n2 && n1 %% n2) warning("length(e1) not a multiple length(e2)") } } ## in this part we mimic R's algo for selecting attributes: if (n1 == n2){ ## if same size take attribute from e1 if it exists, else from e2 if (n1==0L){ ae1 <- attributes(e1)[c("class","dim","dimnames")] ae2 <- attributes(e2)[c("class","dim","dimnames")] } ae1 <- attributes(e1) ae2 <- attributes(e2) nae1 <- names(attributes(e1)) nae2 <- names(attributes(e2)) if (n1==0L){ ae1 <- ae1[nae1 %in% c("class","dim","dimnames")] ae2 <- ae1[nae1 %in% c("class","dim","dimnames")] } allattr <- list() for (a in union(nae1, nae2)) if (a %in% nae1) allattr[[a]] <- ae1[[a]] else allattr[[a]] <- ae2[[a]] allattr }else if (n1 == 0L || n1 > n2) { attributes(e1) } else { attributes(e2) } } # as.matrix.integer64 <- function (x, ...) { # if (!is.matrix(x)){ # dim(x) <- c(length(x), 1L) # dimnames(x) <- if (!is.null(names(x))) list(names(x), NULL) else NULL # } # x # } # nocov start if (FALSE){ x <- integer64(0L) y <- integer64(0L) #dim(x) <- c(2L,2L) dim(y) <- c(0L,0L) dimnames(y) <- list(character(0L),character(0L)) #dim(x) <- c(1L,4L) #dim(y) <- c(4L,1L) attr(x,"x") <- "x" attr(y,"y") <- "y" z <- x - y z dim(z) dimnames(z) z <- y - x z dim(z) dimnames(z) ret <- "integer64(0L)" attributes(ret) <- list(dim = c(0L, 0L), class = character(0L), dimnames = list(NULL,NULL)) } # nocov end #' @rdname bit64-package #' @param length length of vector using [integer()] #' @return `integer64` returns a vector of 'integer64', i.e., #' a vector of [double()] decorated with class 'integer64'. #' @export integer64 <- function(length=0L){ ret <- double(length) oldClass(ret) <- "integer64" ret } #' @rdname bit64-package #' @param x an integer64 vector #' @export is.integer64 <- function(x) inherits(x, "integer64") #' @rdname as.integer64.character #' @export as.integer64.NULL <- function (x, ...){ ret <- double() oldClass(ret) <- "integer64" ret } #' @rdname as.integer64.character #' @export as.integer64.integer64 <- function(x, ...) x #' @rdname as.integer64.character #' @export as.integer64.double <- function(x, keep.names=FALSE, ...){ ret <- .Call(C_as_integer64_double, x, double(length(x))) if (keep.names) names(ret) <- names(x) oldClass(ret) <- "integer64" ret } #' @rdname as.integer64.character #' @export as.integer64.integer <- function(x, ...){ ret <- .Call(C_as_integer64_integer, x, double(length(x))) oldClass(ret) <- "integer64" ret } #' @rdname as.integer64.character #' @export as.integer64.logical <- as.integer64.integer #' @rdname as.integer64.character #' @export as.integer64.character <- function(x, ...){ n <- length(x) ret <- .Call(C_as_integer64_character, x, rep(NA_real_, n)) oldClass(ret) <- "integer64" ret } #' @rdname as.integer64.character #' @export as.integer64.factor <- function(x, ...) as.integer64(unclass(x), ...) #' @rdname as.character.integer64 #' @export as.double.integer64 <- function(x, keep.names=FALSE, ...) { ret <- .Call(C_as_double_integer64, x, double(length(x))) if (keep.names) names(ret) <- names(x) ret } #' @rdname as.character.integer64 #' @export as.integer.integer64 <- function(x, ...) { .Call(C_as_integer_integer64, x, integer(length(x))) } #' @rdname as.character.integer64 #' @export as.logical.integer64 <- function(x, ...) { .Call(C_as_logical_integer64, x, logical(length(x))) } #' @rdname as.character.integer64 #' @export as.character.integer64 <- function(x, ...) { n <- length(x) .Call(C_as_character_integer64, x, rep(NA_character_, n)) } #' @rdname as.character.integer64 #' @export as.bitstring.integer64 <- function(x, ...) { n <- length(x) ret <- .Call(C_as_bitstring_integer64, x, rep(NA_character_, n)) oldClass(ret) <- 'bitstring' ret } #' @rdname as.character.integer64 #' @export print.bitstring <- function(x, ...) { oldClass(x) <- minusclass(class(x), 'bitstring') NextMethod(x) } #' @rdname as.integer64.character #' @export as.integer64.bitstring <- function(x, ...){ ret <- .Call(C_as_integer64_bitstring, x, double(length(x))) oldClass(ret) <- "integer64" ret } # read.table expects S4 as() methods::setAs("character", "integer64", function(from) as.integer64.character(from)) methods::setAs("integer64", "character", function(from) as.character.integer64(from)) # this is a trick to generate NA_integer64_ for namespace export before # as.integer64() is available because dll is not loaded #' @rdname as.integer64.character #' @export NA_integer64_ <- unserialize(as.raw(c( 0x58, 0x0a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x03, 0x03, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x03, 0x0e, 0x00, 0x00, 0x00, 0x01, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x05, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x09, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x65, 0x72, 0x36, 0x34, 0x00, 0x00, 0x00, 0xfe ))) #' @rdname bit64-package #' @param value an integer64 vector of values to be assigned #' @export `length<-.integer64` <- function(x, value){ cl <- oldClass(x) n <- length(x) x <- NextMethod() oldClass(x) <- cl if (value>n) x[(n+1L):value] <- 0L x } #' @rdname format.integer64 #' @export format.integer64 <- function(x, justify="right", ...) { a <- attributes(x) x <- as.character(x) ret <- format(x, justify=justify, ...) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname bit64-package #' @param quote logical, indicating whether or not strings should be printed with surrounding quotes. #' @param ... further arguments to the [NextMethod()] #' @export print.integer64 <- function(x, quote=FALSE, ...) { a <- attributes(x) if (length(x)){ cat("integer64\n") ret <- as.character(x) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a print(ret, quote=quote, ...) }else{ cat("integer64(0)\n") } invisible(x) } #' @rdname bit64-package #' @param object an integer64 vector #' @param vec.len,give.head,give.length see [utils::str()] #' @export str.integer64 <- function(object , vec.len = strO$vec.len , give.head = TRUE , give.length = give.head , ... ){ strO <- strOptions() vec.len <- 2L*vec.len n <- length(object) if (n>vec.len) object <- object[seq_len(vec.len)] cat(if (give.head)paste0("integer64 ", if (give.length && n>1L) paste0("[1:",n,"] ")), paste(as.character(object), collapse=" "),if(n>vec.len)" ...", " \n", sep="") invisible() } # nocov start if (FALSE){ require(microbenchmark) require(bit64) x <- runif64(10000000L) microbenchmark(x[TRUE], times=10L) microbenchmark(x[NA], times=10L) i <- seq_along(x) i[1L] <- NA microbenchmark(x[i], times=10L) i <- rep(TRUE, length(x)) i[1L] <- NA microbenchmark(x[i], times=10L) i <- seq_along(x) microbenchmark(x[i], times=10L) i <- rep(TRUE, length(x)) microbenchmark(x[i], times=10L) } # nocov end #' @rdname extract.replace.integer64 #' @export `[.integer64` <- function(x, i, ...) { cl <- oldClass(x) ret <- NextMethod() # Begin NA-handling from Leonardo Silvestri if (!missing(i)){ if (inherits(i, "character")) { na_idx <- union(which(!(i %in% names(x))), which(is.na(i))) if (length(na_idx)) ret[na_idx] <- NA_integer64_ }else{ ni <- length(i) nx <- length(x) if (inherits(i, "logical")){ if (ni>nx){ na_idx <- is.na(i) | (i & seq_along(i)>nx) na_idx <- na_idx[is.na(i) | i] }else{ i <- i[is.na(i) | i] na_idx <- rep_len(is.na(i), length(ret)) } } else if (ni && min(i, na.rm=TRUE)>=0L) { i <- i[is.na(i) | i>0L] na_idx <- is.na(i) | i>length(x) } else { na_idx <- FALSE } if (any(na_idx)) ret[na_idx] <- NA_integer64_ } } # End NA-handling from Leonardo Silvestri oldClass(ret) <- cl remcache(ret) ret } `[.integer64` <- function(x, i, ...){ cl <- oldClass(x) ret <- NextMethod() # Begin NA-handling from Leonardo Silvestri if (!missing(i)){ if (inherits(i, "character")) { na_idx <- union(which(!(i %in% names(x))), which(is.na(i))) if (length(na_idx)) ret[na_idx] <- NA_integer64_ }else{ na_idx <- is.na(rep(TRUE, length(x))[i]) if (any(na_idx)) ret[na_idx] <- NA_integer64_ } } # End NA-handling from Leonardo Silvestri oldClass(ret) <- cl remcache(ret) ret } #' @rdname extract.replace.integer64 #' @export `[<-.integer64` <- function(x, ..., value) { cl <- oldClass(x) value <- as.integer64(value) ret <- NextMethod() oldClass(ret) <- cl ret } #' @rdname extract.replace.integer64 #' @export `[[.integer64` <- function(x, ...) { cl <- oldClass(x) ret <- NextMethod() oldClass(ret) <- cl ret } #' @rdname extract.replace.integer64 #' @export `[[<-.integer64` <- function(x, ..., value) { cl <- oldClass(x) value <- as.integer64(value) ret <- NextMethod() oldClass(ret) <- cl ret } #' @rdname c.integer64 #' @export c.integer64 <- function (..., recursive = FALSE) { l <- list(...) K <- length(l) for (k in 1:K){ if (recursive && is.list(l[[k]])){ l[[k]] <- do.call(c.integer64, c(l[[k]], list(recursive = TRUE))) }else{ if (!is.integer64(l[[k]])) { nam <- names(l[[k]]) l[[k]] <- as.integer64(l[[k]]) names(l[[k]]) <- nam } oldClass(l[[k]]) <- NULL } } ret <- do.call(c, l) oldClass(ret) <- "integer64" ret } #' @rdname c.integer64 #' @export cbind.integer64 <- function(...) { l <- list(...) K <- length(l) for (k in 1:K){ if (!is.integer64(l[[k]])){ nam <- names(l[[k]]) l[[k]] <- as.integer64(l[[k]]) names(l[[k]]) <- nam } oldClass(l[[k]]) <- NULL } ret <- do.call(cbind, l) oldClass(ret) <- "integer64" ret } #' @rdname c.integer64 #' @export rbind.integer64 <- function(...) { l <- list(...) K <- length(l) for (k in 1:K){ if (!is.integer64(l[[k]])){ nam <- names(l[[k]]) l[[k]] <- as.integer64(l[[k]]) names(l[[k]]) <- nam } oldClass(l[[k]]) <- NULL } ret <- do.call(rbind, l) oldClass(ret) <- "integer64" ret } # tenfold runtime if using attr() here instead of setattr() # as.data.frame.integer64 <- function(x, ...){ # cl <- oldClass(x) # oldClass(x) <- minusclass(cl, "integer64") # ret <- as.data.frame(x, ...) # k <- length(ret) # for (i in 1:k) # oldClass(ret[[i]]) <- cl # ret # } #' @rdname as.data.frame.integer64 #' @export as.data.frame.integer64 <- function(x, ...){ cl <- oldClass(x) on.exit(setattr(x, "class", cl)) setattr(x, "class", minusclass(cl, "integer64")) ret <- as.data.frame(x, ...) k <- length(ret) for (i in 1:k) setattr(ret[[i]], "class", cl) ret } #' @export rep.integer64 <- function(x, ...){ cl <- oldClass(x) ret <- NextMethod() oldClass(ret) <- cl ret } # FIXME no method dispatch for : `:.integer64` <- function(from, to){ from <- as.integer64(from) to <- as.integer64(to) ret <- .Call(C_seq_integer64, from, as.integer64(1L), double(as.integer(to-from+1L))) oldClass(ret) <- "integer64" ret } #' @export seq.integer64 <- function(from=NULL, to=NULL, by=NULL, length.out=NULL, along.with=NULL, ...){ if (is.null(length.out)) length.out <- length(along.with) else length.out <- as.integer(length.out) if (is.null(by)){ if (is.null(from) || is.null(to)) by <- as.integer64(1L) else by <- as.integer64(if (to < from) -1L else 1L) }else{ by <- as.integer64(by) if ((!is.null(from)) && (!is.null(to)) && sign(by)!=(if (to < from) -1L else 1L)) stop("wrong sign of 'by' argument") } if (is.null(from)){ if (length.out && length(to)) from <- to - (length.out-1L)*by else from <- as.integer64(1L) }else from <- as.integer64(from) if (!length(to)){ if (length.out) to <- from + (length.out-1L)*by else stop("not enough informatoin provided") } if (!length.out){ length.out <- (to-from) %/% by + 1L } if (length.out){ if (length.out==1L) return(from) else{ #return(cumsum(c(from, rep(by, length.out-1L)))) ret <- .Call(C_seq_integer64, from, by, double(as.integer(length.out))) oldClass(ret) <- "integer64" return(ret) } }else return(integer64()) } #' @rdname xor.integer64 #' @export `+.integer64` <- function(e1, e2){ if (missing(e2)) return(e1) a <- binattr(e1,e2) e1 <- as.integer64(e1) e2 <- as.integer64(e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- double(l) ret <- .Call(C_plus_integer64, e1, e2, ret) a$class <- plusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `-.integer64` <- function(e1, e2){ if (missing(e2)){ e2 <- e1 e1 <- 0L } a <- binattr(e1,e2) e1 <- as.integer64(e1) e2 <- as.integer64(e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- double(l) .Call(C_minus_integer64, e1, e2, ret) a$class <- plusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `%/%.integer64` <- function(e1, e2){ a <- binattr(e1,e2) e1 <- as.integer64(e1) e2 <- as.integer64(e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- double(l) .Call(C_intdiv_integer64, e1, e2, ret) a$class <- plusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `%%.integer64` <- function(e1, e2){ a <- binattr(e1,e2) e1 <- as.integer64(e1) e2 <- as.integer64(e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- double(l) .Call(C_mod_integer64, e1, e2, ret) a$class <- plusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `*.integer64` <- function(e1, e2){ a <- binattr(e1,e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- double(l) if (getOption("integer64_semantics", "old") == "old"){ if (is.double(e2)) # implies !is.integer64(e2) ret <- .Call(C_times_integer64_double, as.integer64(e1), e2, ret) else ret <- .Call(C_times_integer64_integer64, as.integer64(e1), as.integer64(e2), ret) }else{ # nolint next: unnecessary_nesting_linter. Good parallelism, and on a to-be-deprecated code path. if (is.double(e2)) # implies !is.integer64(e2) ret <- .Call(C_times_integer64_double, as.integer64(e1), e2, ret) else if (is.double(e1)) ret <- .Call(C_times_integer64_double, as.integer64(e2), e1, ret) else ret <- .Call(C_times_integer64_integer64, as.integer64(e1), as.integer64(e2), ret) } a$class <- plusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `^.integer64` <- function(e1, e2){ a <- binattr(e1,e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- double(l) if (is.double(e2)) # implies !is.integer64(e2) ret <- .Call(C_power_integer64_double, as.integer64(e1), e2, ret) else ret <- .Call(C_power_integer64_integer64, as.integer64(e1), as.integer64(e2), ret) a$class <- plusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `/.integer64` <- function(e1, e2){ a <- binattr(e1,e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- double(l) if (getOption("integer64_semantics", "old") == "old"){ if (is.double(e2)) # implies !is.integer64(e2) ret <- .Call(C_divide_integer64_double, as.integer64(e1), e2, ret) else ret <- .Call(C_divide_integer64_integer64, as.integer64(e1), as.integer64(e2), ret) }else{ # nolint next: unnecessary_nesting_linter. Good parallelism, and on a to-be-deprecated code path. if (is.double(e2)) # implies !is.integer64(e2) ret <- .Call(C_divide_integer64_double, as.integer64(e1), e2, ret) else if (is.double(e1)) ret <- .Call(C_divide_double_integer64, e1, e2, ret) else ret <- .Call(C_divide_integer64_integer64, as.integer64(e1), as.integer64(e2), ret) } a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname format.integer64 #' @export sign.integer64 <- function(x){ a <- attributes(x) ret <- .Call(C_sign_integer64, x, double(length(x))) attributes(ret) <- a ret } #' @rdname format.integer64 #' @export abs.integer64 <- function(x){ a <- attributes(x) ret <- .Call(C_abs_integer64, x, double(length(x))) attributes(ret) <- a ret } #' @rdname format.integer64 #' @export sqrt.integer64 <- function(x){ a <- attributes(x) ret <- .Call(C_sqrt_integer64, x, double(length(x))) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname format.integer64 #' @export log.integer64 <- function(x, base=NULL){ a <- attributes(x) l.x <- length(x) l.base <- length(base) l <- if (l.x==0L || (!is.null(base) && l.base==0L)) 0L else max(l.base,l.x) ret <- double(l) if (is.null(base)){ .Call(C_log_integer64, x, ret) }else if(length(base)==1L){ .Call(C_logbase_integer64, x, as.double(base), ret) }else{ .Call(C_logvect_integer64, x, as.double(base), ret) } a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname format.integer64 #' @export log10.integer64 <- function(x){ a <- attributes(x) ret <- .Call(C_log10_integer64, x, double(length(x))) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname format.integer64 #' @export log2.integer64 <- function(x){ a <- attributes(x) ret <- .Call(C_log2_integer64, x, double(length(x))) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname format.integer64 #' @export trunc.integer64 <- function(x, ...) x #' @rdname format.integer64 #' @export floor.integer64 <- function(x) x #' @rdname format.integer64 #' @export ceiling.integer64 <- function(x) x #' @rdname format.integer64 #' @export signif.integer64 <- function(x, digits=6L) x #' @rdname format.integer64 #' @export scale.integer64 <- function(x, center = TRUE, scale = TRUE) scale(as.double(x, keep.names=TRUE), center=center, scale=scale) #' @rdname format.integer64 #' @export round.integer64 <- function(x, digits=0L){ if (digits >= 0L) return(x) a <- attributes(x) b <- 10L^round(-digits) b2 <- b %/% 2L d <- (x %/% b) db <- d * b r <- abs(x-db) ret <- ifelse((r < b2) | (r == b2 & ((d %% 2L) == 0L)), db, db + sign(x)*b) #a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname sum.integer64 #' @export any.integer64 <- function(..., na.rm = FALSE){ l <- list(...) if (length(l)==1L){ .Call(C_any_integer64, l[[1L]], na.rm, logical(1L)) }else{ any(sapply(l, function(e){ if (is.integer64(e)){ .Call(C_any_integer64, e, na.rm, logical(1L)) }else{ any(e, na.rm = na.rm) } }), na.rm = na.rm) } } #' @rdname sum.integer64 #' @export all.integer64 <- function(..., na.rm = FALSE){ l <- list(...) if (length(l)==1L){ .Call(C_all_integer64, l[[1L]], na.rm, logical(1L)) }else{ all(sapply(l, function(e){ if (is.integer64(e)){ .Call(C_all_integer64, e, na.rm, logical(1L)) }else{ all(e, na.rm = na.rm) } }), na.rm = na.rm) } } #' @rdname sum.integer64 #' @export sum.integer64 <- function(..., na.rm = FALSE){ l <- list(...) if (length(l)==1L){ ret <- .Call(C_sum_integer64, l[[1L]], na.rm, double(1L)) oldClass(ret) <- "integer64" ret }else{ ret <- sapply(l, function(e){ if (is.integer64(e)){ .Call(C_sum_integer64, e, na.rm, double(1L)) }else{ as.integer64(sum(e, na.rm = na.rm)) } }) oldClass(ret) <- "integer64" sum(ret, na.rm = na.rm) } } #' @rdname sum.integer64 #' @export prod.integer64 <- function(..., na.rm = FALSE){ l <- list(...) if (length(l)==1L){ ret <- .Call(C_prod_integer64, l[[1L]], na.rm, double(1L)) oldClass(ret) <- "integer64" ret }else{ ret <- sapply(l, function(e){ if (is.integer64(e)){ .Call(C_prod_integer64, e, na.rm, double(1L)) }else{ as.integer64(prod(e, na.rm = na.rm)) } }) oldClass(ret) <- "integer64" prod(ret, na.rm = na.rm) } } #' @rdname sum.integer64 #' @export min.integer64 <- function(..., na.rm = FALSE){ l <- list(...) noval <- TRUE if (length(l)==1L){ if (length(l[[1L]])) noval <- FALSE ret <- .Call(C_min_integer64, l[[1L]], na.rm, double(1L)) oldClass(ret) <- "integer64" }else{ ret <- sapply(l, function(e){ if (length(e)) noval <<- FALSE if (is.integer64(e)){ .Call(C_min_integer64, e, na.rm, double(1L)) }else{ as.integer64(min(e, na.rm = na.rm)) } }) oldClass(ret) <- "integer64" ret <- min(ret, na.rm = na.rm) } if (noval) warning("no non-NA value, returning the highest possible integer64 value +9223372036854775807") ret } #' @rdname sum.integer64 #' @export max.integer64 <- function(..., na.rm = FALSE){ l <- list(...) noval <- TRUE if (length(l)==1L){ if (length(l[[1L]])) noval <- FALSE ret <- .Call(C_max_integer64, l[[1L]], na.rm, double(1L)) oldClass(ret) <- "integer64" }else{ ret <- sapply(l, function(e){ if (length(e)) noval <<- FALSE if (is.integer64(e)){ .Call(C_max_integer64, e, na.rm, double(1L)) }else{ as.integer64(max(e, na.rm = na.rm)) } }) oldClass(ret) <- "integer64" ret <- max(ret, na.rm = na.rm) } if (noval) warning("no non-NA value, returning the lowest possible integer64 value -9223372036854775807") ret } #' @rdname sum.integer64 #' @export range.integer64 <- function(..., na.rm = FALSE, finite = FALSE){ if (finite) na.rm = TRUE l <- list(...) noval <- TRUE if (length(l)==1L){ if (length(l[[1L]])) noval <- FALSE ret <- .Call(C_range_integer64, l[[1L]], na.rm, double(2L)) oldClass(ret) <- "integer64" }else{ ret <- unlist(sapply(l, function(e){ if (length(e)) noval <<- FALSE if (is.integer64(e)){ .Call(C_range_integer64, e, na.rm, double(2L)) }else{ as.integer64(range(e, na.rm = na.rm)) } })) oldClass(ret) <- "integer64" ret <- range(ret, na.rm = na.rm) } if (noval) warning("no non-NA value, returning c(+9223372036854775807, -9223372036854775807)") ret } #' @rdname sum.integer64 #' @export lim.integer64 <- function(){ ret <- .Call(C_lim_integer64, double(2L)) oldClass(ret) <- "integer64" ret } #' @rdname cumsum.integer64 #' @export diff.integer64 <- function(x, lag=1L, differences=1L, ...){ lag <- as.integer(lag) n <- length(x) d <- differences <- as.integer(differences) while(d > 0L){ n <- n - lag if (n <= 0L){ ret <- double() break } # not assigning ret<-.Call in the following is intended because faster if (d==differences){ ret <- double(n) .Call(C_diff_integer64, x, as.integer64(lag), as.integer64(n), ret) }else{ .Call(C_diff_integer64, ret, as.integer64(lag), as.integer64(n), ret) } d <- d - 1L } # length of ret is only change once here length(ret) <- n oldClass(ret) <- "integer64" ret } #' @rdname cumsum.integer64 #' @export cummin.integer64 <- function(x){ ret <- .Call(C_cummin_integer64, x, double(length(x))) oldClass(ret) <- "integer64" ret } #' @rdname cumsum.integer64 #' @export cummax.integer64 <- function(x){ ret <- .Call(C_cummax_integer64, x, double(length(x))) oldClass(ret) <- "integer64" ret } #' @rdname cumsum.integer64 #' @export cumsum.integer64 <- function(x){ ret <- .Call(C_cumsum_integer64, x, double(length(x))) oldClass(ret) <- "integer64" ret } #' @rdname cumsum.integer64 #' @export cumprod.integer64 <- function(x){ ret <- .Call(C_cumprod_integer64, x, double(length(x))) oldClass(ret) <- "integer64" ret } #' @rdname format.integer64 #' @export is.na.integer64 <- function(x) { a <- attributes(x) ret <- .Call(C_isna_integer64, x, logical(length(x))) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname format.integer64 #' @export is.finite.integer64 <- function(x) !is.na(x) #' @rdname format.integer64 #' @export is.infinite.integer64 <- function(x) rep(FALSE, length(x)) #' @rdname format.integer64 #' @export is.nan.integer64 <- function(x) rep(FALSE, length(x)) #' @rdname xor.integer64 #' @export `==.integer64` <- function(e1, e2){ a <- binattr(e1,e2) e1 <- as.integer64(e1) e2 <- as.integer64(e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- logical(l) .Call(C_EQ_integer64, e1, e2, ret) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `!=.integer64` <- function(e1, e2){ a <- binattr(e1,e2) e1 <- as.integer64(e1) e2 <- as.integer64(e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- logical(l) .Call(C_NE_integer64, e1, e2, ret) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `<.integer64` <- function(e1, e2){ a <- binattr(e1,e2) e1 <- as.integer64(e1) e2 <- as.integer64(e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- logical(l) .Call(C_LT_integer64, e1, e2, ret) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `<=.integer64` <- function(e1, e2){ a <- binattr(e1,e2) e1 <- as.integer64(e1) e2 <- as.integer64(e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- logical(l) .Call(C_LE_integer64, e1, e2, ret) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `>.integer64` <- function(e1, e2){ a <- binattr(e1,e2) e1 <- as.integer64(e1) e2 <- as.integer64(e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- logical(l) .Call(C_GT_integer64, e1, e2, ret) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `>=.integer64` <- function(e1, e2){ a <- binattr(e1,e2) e1 <- as.integer64(e1) e2 <- as.integer64(e2) l1 <- length(e1) l2 <- length(e2) l <- if (l1 == 0L || l2 == 0L) 0L else max(l1,l2) ret <- logical(l) .Call(C_GE_integer64, e1, e2, ret) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `&.integer64` <- function(e1, e2){ a <- binattr(e1,e2) ret <- as.logical(e1) & as.logical(e2) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export `|.integer64` <- function(e1, e2){ a <- binattr(e1,e2) ret <- as.logical(e1) | as.logical(e2) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname xor.integer64 #' @export xor.integer64 <- function(x, y){ a <- binattr(x,y) ret <- as.logical(x) != as.logical(y) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } #' @rdname format.integer64 #' @export `!.integer64` <- function(x) { a <- attributes(x) ret <- !as.logical(x) a$class <- minusclass(a$class, "integer64") attributes(ret) <- a ret } # as.vector.integer64 removed as requested by the CRAN maintainer # as.vector.integer64 <- function(x, mode="any"){ # ret <- NextMethod() # if (mode=="any") # oldClass(ret) <- "integer64" # ret # } # bug in R does not dispatch #' @exportS3Method is.vector integer64 is.vector.integer64 <- function(x, mode="any"){ cl <- minusclass(oldClass(x), "integer64") a <- attributes(x) a$class <- NULL a$names <- NULL if (is.na(match(mode, c("any","integer64"))) || length(cl) || length(a) ) FALSE else TRUE } #' @rdname as.character.integer64 #' @export as.list.integer64 <- function (x, ...) { ret <- NextMethod("as.list", x, ...) .Call(C_as_list_integer64, ret) } bit64/R/highlevel64.R0000644000176200001440000026061214705122715013662 0ustar liggesusers# /* # R-Code for matching and other functions based on hashing # S3 atomic 64bit integers for R # (c) 2012 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2011-12-11 # Last changed: 2011-12-11 # */ #' Function for measuring algorithmic performance of high-level and low-level integer64 functions #' #' @param nsmall size of smaller vector #' @param nbig size of larger bigger vector #' @param timefun a function for timing such as [bit::repeat.time()] or [system.time()] #' @param what a vector of names of high-level functions #' @param uniorder one of the order parameters that are allowed in [unique.integer64()] and [unipos.integer64()] #' @param taborder one of the order parameters that are allowed in [table.integer64()] #' @param plot set to FALSE to suppress plotting #' #' @details #' `benchmark64` compares the following scenarios for the following use cases: #' #' | **scenario name** | **explanation** | #' |------------------:|:------------------------------------------------| #' | 32-bit | applying Base R function to 32-bit integer data | #' | 64-bit | applying bit64 function to 64-bit integer data (with no cache) | #' | hashcache | ditto when cache contains [hashmap()], see [hashcache()] | #' | sortordercache | ditto when cache contains sorting and ordering, see [sortordercache()] | #' | ordercache | ditto when cache contains ordering only, see [ordercache()] | #' | allcache | ditto when cache contains sorting, ordering and hashing | #' #' | **use case name** | **explanation** | #' |------------------:|:----------------------------------------| #' | cache | filling the cache according to scenario | #' | match(s,b) | match small in big vector | #' | s %in% b | small %in% big vector | #' | match(b,s) | match big in small vector | #' | b %in% s | big %in% small vector | #' | match(b,b) | match big in (different) big vector | #' | b %in% b | big %in% (different) big vector | #' | duplicated(b) | duplicated of big vector | #' | unique(b) | unique of big vector | #' | table(b) | table of big vector | #' | sort(b) | sorting of big vector | #' | order(b) | ordering of big vector | #' | rank(b) | ranking of big vector | #' | quantile(b) | quantiles of big vector | #' | summary(b) | summary of of big vector | #' | SESSION | exemplary session involving multiple calls (including cache filling costs) | #' #' Note that the timings for the cached variants do _not_ contain the #' time costs of building the cache, except for the timing of the exemplary #' user session, where the cache costs are included in order to evaluate amortization. #' #' @return #' `benchmark64` returns a matrix with elapsed seconds, different high-level tasks #' in rows and different scenarios to solve the task in columns. The last row #' named 'SESSION' contains the elapsed seconds of the exemplary sesssion. #' #' `optimizer64` returns a dimensioned list with one row for each high-level #' function timed and two columns named after the values of the `nsmall` and #' `nbig` sample sizes. Each list cell contains a matrix with timings, #' low-level-methods in rows and three measurements `c("prep","both","use")` #' in columns. If it can be measured separately, `prep` contains the timing #' of preparatory work such as sorting and hashing, and `use` contains the #' timing of using the prepared work. If the function timed does both, #' preparation and use, the timing is in `both`. #' #' @seealso [integer64()] #' @examples #' message("this small example using system.time does not give serious timings\n #' this we do this only to run regression tests") #' benchmark64(nsmall=2^7, nbig=2^13, timefun=function(expr)system.time(expr, gcFirst=FALSE)) #' optimizer64(nsmall=2^7, nbig=2^13, timefun=function(expr)system.time(expr, gcFirst=FALSE) #' , plot=FALSE #' ) #'\dontrun{ #' message("for real measurement of sufficiently large datasets run this on your machine") #' benchmark64() #' optimizer64() #'} #' message("let's look at the performance results on Core i7 Lenovo T410 with 8 GB RAM") #' data(benchmark64.data) #' print(benchmark64.data) #' #' matplot(log2(benchmark64.data[-1,1]/benchmark64.data[-1,]) #' , pch=c("3", "6", "h", "s", "o", "a") #' , xlab="tasks [last=session]" #' , ylab="log2(relative speed) [bigger is better]" #' ) #' matplot(t(log2(benchmark64.data[-1,1]/benchmark64.data[-1,])) #' , type="b", axes=FALSE #' , lwd=c(rep(1, 14), 3) #' , xlab="context" #' , ylab="log2(relative speed) [bigger is better]" #' ) #' axis(1 #' , labels=c("32-bit", "64-bit", "hash", "sortorder", "order", "hash+sortorder") #' , at=1:6 #' ) #' axis(2) #' data(optimizer64.data) #' print(optimizer64.data) #' oldpar <- par(no.readonly = TRUE) #' par(mfrow=c(2,1)) #' par(cex=0.7) #' for (i in 1:nrow(optimizer64.data)){ #' for (j in 1:2){ #' tim <- optimizer64.data[[i,j]] #' barplot(t(tim)) #' if (rownames(optimizer64.data)[i]=="match") #' title(paste("match", colnames(optimizer64.data)[j], "in", colnames(optimizer64.data)[3-j])) #' else if (rownames(optimizer64.data)[i]=="%in%") #' title(paste(colnames(optimizer64.data)[j], "%in%", colnames(optimizer64.data)[3-j])) #' else #' title(paste(rownames(optimizer64.data)[i], colnames(optimizer64.data)[j])) #' } #' } #' par(mfrow=c(1,1)) #' @keywords misc #' @name benchmark64 NULL #' @describeIn benchmark64 compares high-level integer64 functions against the #' integer functions from Base R #' @export # nocov start benchmark64 <- function(nsmall=2L^16L, nbig=2L^25L, timefun=repeat.time) { message('\ncompare performance for a complete sessions of calls') s <- sample(nbig, nsmall, TRUE) b <- sample(nbig, nbig, TRUE) b2 <- sample(nbig, nbig, TRUE) tim1 <- double(6L) names(tim1) <- c("32-bit","64-bit","hashcache","sortordercache","ordercache","allcache") s <- as.integer(s) b <- as.integer(b) b2 <- as.integer(b2) for (i in 1:6){ message("\n=== ", names(tim1)[i], " ===") if (i==2L){ s <- as.integer64(s) b <- as.integer64(b) b2 <- as.integer64(b2) } tim1[i] <- 0L tim1[i] <- tim1[i] + timefun({ switch(as.character(i) , "3" = {hashcache(s); hashcache(b); hashcache(b2)} , "4" = {sortordercache(s); sortordercache(b); sortordercache(b2)} , "5" = {ordercache(s); ordercache(b); ordercache(b2)} , "6" = {hashcache(s); hashcache(b); hashcache(b2);sortordercache(s); sortordercache(b); sortordercache(b2)} ) })[3L] message('check data range, mean etc.') tim1[i] <- tim1[i] + timefun({ summary(b) })[3L] message('get all percentiles for plotting distribution shape') tim1[i] <- tim1[i] + timefun({ quantile(b, probs=seq(0.0, 1.0, 0.01)) })[3L] message('list the upper and lower permille of values') tim1[i] <- tim1[i] + timefun({ quantile(b, probs=c(0.001, 0.999)) sort(b, na.last=NA) })[3L] message('OK, for some of these values I want to see the complete ROW, so I need their positions in the data.frame') tim1[i] <- tim1[i] + timefun({ if(i==1L)order(b) else order.integer64(b) })[3L] message('check if any values are duplicated') tim1[i] <- tim1[i] + timefun({ anyDuplicated(b) })[3L] message('since not unique, then check distribution of frequencies') tim1[i] <- tim1[i] + timefun({ if(i==1L)tabulate(table(b, exclude=NULL)) else tabulate(table.integer64(b, return='list')$counts) })[3L] message("OK, let's plot the percentiles of unique values versus the percentiles allowing for duplicates") tim1[i] <- tim1[i] + timefun({ quantile(b, probs=seq(0.0, 1.0, 0.01)) quantile(unique(b), probs=seq(0.0, 1.0, 0.01)) })[3L] message('check whether we find a match for each fact in the dimension table') tim1[i] <- tim1[i] + timefun({ all(if(i==1L) b %in% s else "%in%.integer64"(b, s)) })[3L] message('check whether there are any dimension table entries not in the fact table') tim1[i] <- tim1[i] + timefun({ all(if(i==1L) s %in% b else "%in%.integer64"(s, b)) })[3L] message('check whether we find a match for each fact in a parallel fact table') tim1[i] <- tim1[i] + timefun({ all(if(i==1L) b %in% b2 else "%in%.integer64"(b, b2)) })[3L] message('find positions of facts in dimension table for joining') tim1[i] <- tim1[i] + timefun({ if(i==1L) match(b, s) else match.integer64(b, s) })[3L] message('find positions of facts in parallel fact table for joining') tim1[i] <- tim1[i] + timefun({ if(i==1L) match(b, b2) else match.integer64(b, b2) })[3L] message('out of curiosity: how well rank-correlated are fact and parallel fact table?') tim1[i] <- tim1[i] + timefun({ if (i==1L){ cor(rank(b, na.last="keep"), rank(b2, na.last="keep"), use="na.or.complete") }else{ cor(rank.integer64(b), rank.integer64(b2), use="na.or.complete") } })[3L] remcache(s) remcache(b) remcache(b2) print(round(rbind(seconds=tim1, factor=tim1[1L]/tim1), 3L)) } # 32-bit 64-bit hashcache sortordercache ordercache allcache # 196.510 8.963 8.242 5.183 12.325 6.043 # 32-bit 64-bit hashcache sortordercache ordercache allcache # 1.000 21.924 23.842 37.913 15.944 32.519 message("\nnow let's look more systematically at the components involved") s <- sample(nbig, nsmall, TRUE) b <- sample(nbig, nbig, TRUE) b2 <- sample(nbig, nbig, TRUE) tim2 <- matrix(0.0, 15L, 6L) dimnames(tim2) <- list(c("cache", "match(s,b)", "s %in% b", "match(b,s)", "b %in% s", "match(b,b)", "b %in% b", "duplicated(b)", "unique(b)", "table(b)", "sort(b)", "order(b)", "rank(b)", "quantile(b)", "summary(b)") , c("32-bit","64-bit","hashcache","sortordercache","ordercache","allcache")) s <- as.integer(s) b <- as.integer(b) b2 <- as.integer(b2) i <- 1L for (i in 1:6){ if (i==2L){ s <- as.integer64(s) b <- as.integer64(b) b2 <- as.integer64(b2) } if (i>2L)message(colnames(tim2)[i], " cache") tim2["cache",i] <- timefun({ switch(as.character(i) , "3" = {hashcache(s); hashcache(b); hashcache(b2)} , "4" = {sortordercache(s); sortordercache(b); sortordercache(b2)} , "5" = {ordercache(s); ordercache(b); ordercache(b2)} , "6" = {hashcache(s); hashcache(b); hashcache(b2);sortordercache(s); sortordercache(b); sortordercache(b2)} ) })[3L] message(colnames(tim2)[i], " match(s,b)") tim2["match(s,b)",i] <- timefun({ if (i==1L) match(s, b) else match.integer64(s, b) })[3L] message(colnames(tim2)[i], " s %in% b") tim2["s %in% b",i] <- timefun({ if (i==1L) s %in% b else "%in%.integer64"(s,b) })[3L] message(colnames(tim2)[i], " match(b,s)") tim2["match(b,s)",i] <- timefun({ if (i==1L) match(b, s) else match.integer64(b, s) })[3L] message(colnames(tim2)[i], " b %in% s") tim2["b %in% s",i] <- timefun({ if (i==1L) b %in% s else "%in%.integer64"(b,s) })[3L] message(colnames(tim2)[i], " match(b,b)") tim2["match(b,b)",i] <- timefun({ if (i==1L) match(b, b2) else match.integer64(b, b2) })[3L] message(colnames(tim2)[i], " b %in% b") tim2["b %in% b",i] <- timefun({ if (i==1L) b %in% b2 else "%in%.integer64"(b,b2) })[3L] message(colnames(tim2)[i], " duplicated(b)") tim2["duplicated(b)",i] <- timefun({ duplicated(b) })[3L] message(colnames(tim2)[i], " unique(b)") tim2["unique(b)",i] <- timefun({ unique(b) })[3L] message(colnames(tim2)[i], " table(b)") tim2["table(b)",i] <- timefun({ if(i==1L) table(b) else table.integer64(b, return='list') })[3L] message(colnames(tim2)[i], " sort(b)") tim2["sort(b)",i] <- timefun({ sort(b) })[3L] message(colnames(tim2)[i], " order(b)") tim2["order(b)",i] <- timefun({ if(i==1L) order(b) else order.integer64(b) })[3L] message(colnames(tim2)[i], " rank(b)") tim2["rank(b)",i] <- timefun({ if(i==1L) rank(b) else rank.integer64(b) })[3L] message(colnames(tim2)[i], " quantile(b)") tim2["quantile(b)",i] <- timefun({ quantile(b) })[3L] message(colnames(tim2)[i], " summary(b)") tim2["summary(b)",i] <- timefun({ summary(b) })[3L] remcache(s) remcache(b) remcache(b2) tim3 <- rbind(tim2, SESSION=tim1) #tim2 <- tim2[,1]/tim2 cat("seconds") print(round(tim3, 3L)) cat("factor") print(round(tim3[,1L]/tim3, 3L)) } # 32-bit 64-bit hashcache sortordercache ordercache allcache # cache 0.000 0.000 0.775 1.330 6.500 2.660 # match(s,b) 0.820 0.218 0.004 0.025 0.093 0.004 # s %in% b 0.810 0.234 0.003 0.022 0.093 0.003 # match(b,s) 0.450 0.228 0.232 0.224 0.224 0.226 # b %in% s 0.510 0.226 0.224 0.222 0.218 0.222 # match(b,b) 2.370 0.870 0.505 0.890 0.880 0.505 # b %in% b 2.350 0.850 0.480 0.865 0.870 0.483 # duplicated(b) 0.875 0.510 0.141 0.116 0.383 0.117 # unique(b) 0.930 0.555 0.447 0.156 0.427 0.450 # table(b) 110.340 0.725 0.680 0.234 0.575 0.202 # sort(b) 2.440 0.400 0.433 0.072 0.460 0.069 # order(b) 12.780 0.680 0.615 0.036 0.036 0.035 # rank(b) 13.480 0.860 0.915 0.240 0.545 0.246 # quantile(b) 0.373 0.400 0.410 0.000 0.000 0.000 # summary(b) 0.645 0.423 0.427 0.016 0.016 0.016 # TOTAL 149.173 7.179 6.291 4.448 11.320 5.239 # 32-bit 64-bit hashcache sortordercache ordercache allcache # cache 1 1.062 0.000 0.000 0.000 0.000 # match(s,b) 1 3.761 230.420 32.475 8.843 217.300 # s %in% b 1 3.462 234.090 36.450 8.735 237.386 # match(b,s) 1 1.974 1.940 2.009 2.009 1.991 # b %in% s 1 2.257 2.277 2.297 2.339 2.297 # match(b,b) 1 2.724 4.693 2.663 2.693 4.693 # b %in% b 1 2.765 4.896 2.717 2.701 4.862 # duplicated(b) 1 1.716 6.195 7.572 2.283 7.500 # unique(b) 1 1.676 2.082 5.972 2.180 2.067 # table(b) 1 152.193 162.265 471.538 191.896 546.238 # sort(b) 1 6.100 5.631 33.822 5.304 35.534 # order(b) 1 18.794 20.780 357.840 354.297 366.950 # rank(b) 1 15.674 14.732 56.167 24.734 54.797 # quantile(b) 1 0.933 0.911 804.907 806.027 810.133 # summary(b) 1 1.524 1.512 39.345 39.345 39.345 # TOTAL 1 20.778 23.712 33.534 13.177 28.476 tim3 } #' @describeIn benchmark64 compares for each high-level integer64 function the Base #' R integer function with several low-level integer64 functions with and #' without caching #' @export optimizer64 <- function(nsmall=2L^16L, nbig=2L^25L, timefun=repeat.time , what=c("match","%in%","duplicated","unique","unipos","table","rank","quantile") , uniorder = c("original", "values", "any") , taborder = c("values", "counts") , plot = TRUE ) { uniorder <- match.arg(uniorder) taborder <- match.arg(taborder) ret <- vector("list", 2L*length(what)) dim(ret) <- c(length(what), 2L) dimnames(ret) <- list(what, c(nsmall, nbig)) if (plot){ oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) par(mfrow=c(2L,1L)) } if ("match" %in% what){ message("match: timings of different methods") N1 <- c(nsmall, nbig) N2 <- c(nbig, nsmall) for (i in seq_along(N1)){ n1 <- N1[i] n2 <- N2[i] x1 <- c(sample(n2, n1-1L, TRUE), NA) x2 <- c(sample(n2, n2-1L, TRUE), NA) tim <- matrix(0.0, 9L, 3L) dimnames(tim) <- list(c("match","match.64","hashpos","hashrev","sortorderpos","orderpos","hashcache","sortorder.cache","order.cache"), c("prep","both","use")) tim["match","both"] <- timefun({ p <- match(x1, x2) })[3L] x1 <- as.integer64(x1) x2 <- as.integer64(x2) tim["match.64","both"] <- timefun({ p2 <- match.integer64(x1, x2) })[3L] stopifnot(identical(p2, p)) tim["hashpos","prep"] <- timefun({ h2 <- hashmap(x2) })[3L] tim["hashpos","use"] <- timefun({ p2 <- hashpos(h2, x1) })[3L] stopifnot(identical(p2, p)) tim["hashrev","prep"] <- timefun({ h1 <- hashmap(x1) })[3L] tim["hashrev","use"] <- timefun({ p1 <- hashrev(h1, x2) })[3L] stopifnot(identical(p1, p)) tim["sortorderpos","prep"] <- system.time({ s2 <- clone(x2) o2 <- seq_along(x2) ramsortorder(s2, o2, na.last=FALSE) })[3L] tim["sortorderpos","use"] <- timefun({ p2 <- sortorderpos(s2, o2, x1) })[3L] stopifnot(identical(p2, p)) tim["orderpos","prep"] <- timefun({ o2 <- seq_along(x2) ramorder(x2, o2, na.last=FALSE) })[3L] tim["orderpos","use"] <- timefun({ p2 <- orderpos(x2, o2, x1, method=2L) })[3L] stopifnot(identical(p2, p)) hashcache(x2) tim["hashcache","use"] <- timefun({ p2 <- match.integer64(x1, x2) })[3L] stopifnot(identical(p2, p)) remcache(x2) sortordercache(x2) tim["sortorder.cache","use"] <- timefun({ p2 <- match.integer64(x1, x2) })[3L] stopifnot(identical(p2, p)) remcache(x2) ordercache(x2) tim["order.cache","use"] <- timefun({ p2 <- match.integer64(x1, x2) })[3L] stopifnot(identical(p2, p)) remcache(x2) if (plot){ barplot(t(tim)) n <- format(c(n1, n2)) title(paste("match", n[1L], "in", n[2L])) } ret[["match",as.character(n1)]] <- tim } } if ("%in%" %in% what){ message("%in%: timings of different methods") N1 <- c(nsmall, nbig) N2 <- c(nbig, nsmall) for (i in seq_along(N1)){ n1 <- N1[i] n2 <- N2[i] x1 <- c(sample(n2, n1-1L, TRUE), NA) x2 <- c(sample(n2, n2-1L, TRUE), NA) tim <- matrix(0.0, 10L, 3L) dimnames(tim) <- list(c("%in%","match.64","%in%.64","hashfin","hashrin","sortfin","orderfin","hash.cache","sortorder.cache","order.cache"), c("prep","both","use")) tim["%in%","both"] <- timefun({ p <- x1 %in% x2 })[3L] x1 <- as.integer64(x1) x2 <- as.integer64(x2) tim["match.64","both"] <- timefun({ p2 <- match.integer64(x1,x2, nomatch = 0L) > 0L })[3L] stopifnot(identical(p2, p)) tim["%in%.64","both"] <- timefun({ p2 <- "%in%.integer64"(x1,x2) # this is using the custom version })[3L] stopifnot(identical(p2, p)) tim["hashfin","prep"] <- timefun({ h2 <- hashmap(x2) })[3L] tim["hashfin","use"] <- timefun({ p2 <- hashfin(h2, x1) })[3L] stopifnot(identical(p2, p)) tim["hashrin","prep"] <- timefun({ h1 <- hashmap(x1) })[3L] tim["hashrin","use"] <- timefun({ p1 <- hashrin(h1, x2) })[3L] stopifnot(identical(p2, p)) tim["sortfin","prep"] <- timefun({ s2 <- clone(x2) ramsort(s2, na.last=FALSE) })[3L] tim["sortfin","use"] <- timefun({ p2 <- sortfin(s2, x1) })[3L] stopifnot(identical(p2, p)) tim["orderfin","prep"] <- timefun({ o2 <- seq_along(x2) ramorder(x2, o2, na.last=FALSE) })[3L] tim["orderfin","use"] <- timefun({ p2 <- orderfin(x2, o2, x1) })[3L] stopifnot(identical(p2, p)) hashcache(x2) tim["hash.cache","use"] <- timefun({ p2 <- "%in%.integer64"(x1, x2) })[3L] stopifnot(identical(p2, p)) remcache(x2) sortordercache(x2) tim["sortorder.cache","use"] <- timefun({ p2 <- "%in%.integer64"(x1, x2) })[3L] stopifnot(identical(p2, p)) remcache(x2) ordercache(x2) tim["order.cache","use"] <- timefun({ p2 <- "%in%.integer64"(x1, x2) })[3L] stopifnot(identical(p2, p)) remcache(x2) if (plot){ barplot(t(tim)) n <- format(c(n1, n2)) title(paste(n[1L], "%in%", n[2L])) } ret[["%in%",as.character(n1)]] <- tim } } if ("duplicated" %in% what){ message("duplicated: timings of different methods") N <- c(nsmall, nbig) for (i in seq_along(N)){ n <- N[i] x <- c(sample(n, n-1L, TRUE), NA) tim <- matrix(0.0, 10L, 3L) dimnames(tim) <- list(c("duplicated","duplicated.64","hashdup","sortorderdup1","sortorderdup2","orderdup1","orderdup2" ,"hash.cache","sortorder.cache","order.cache") , c("prep","both","use")) tim["duplicated","both"] <- timefun({ p <- duplicated(x) })[3L] x <- as.integer64(x) tim["duplicated.64","both"] <- timefun({ p2 <- duplicated(x) })[3L] stopifnot(identical(p2, p)) tim["hashdup","prep"] <- timefun({ h <- hashmap(x) })[3L] tim["hashdup","use"] <- timefun({ p2 <- hashdup(h) })[3L] stopifnot(identical(p2, p)) tim["sortorderdup1","prep"] <- timefun({ s <- clone(x) o <- seq_along(x) ramsortorder(s, o, na.last=FALSE) nunique <- sortnut(s)[1L] })[3L] tim["sortorderdup1","use"] <- timefun({ p2 <- sortorderdup(s, o, method=1L) })[3L] stopifnot(identical(p2, p)) tim["sortorderdup2","prep"] <- tim["sortorderdup1","prep"] tim["sortorderdup2","use"] <- timefun({ p2 <- sortorderdup(s, o, method=2L) })[3L] stopifnot(identical(p2, p)) tim["orderdup1","prep"] <- timefun({ o <- seq_along(x) ramorder(x, o, na.last=FALSE) nunique <- ordernut(x,o)[1L] })[3L] tim["orderdup1","use"] <- timefun({ p2 <- orderdup(x, o, method=1L) })[3L] stopifnot(identical(p2, p)) tim["orderdup2","prep"] <- tim["orderdup1","prep"] tim["orderdup2","use"] <- timefun({ p2 <- orderdup(x, o, method=2L) })[3L] stopifnot(identical(p2, p)) hashcache(x) tim["hash.cache","use"] <- timefun({ p2 <- duplicated(x) })[3L] stopifnot(identical(p2, p)) remcache(x) sortordercache(x) tim["sortorder.cache","use"] <- timefun({ p2 <- duplicated(x) })[3L] stopifnot(identical(p2, p)) remcache(x) ordercache(x) tim["order.cache","use"] <- timefun({ p2 <- duplicated(x) })[3L] stopifnot(identical(p2, p)) remcache(x) if (plot){ barplot(t(tim), cex.names=0.7) title(paste0("duplicated(",n,")")) } ret[["duplicated",as.character(n)]] <- tim } } if ("unique" %in% what){ message("unique: timings of different methods") N <- c(nsmall, nbig) for (i in seq_along(N)){ n <- N[i] x <- c(sample(n, n-1L, TRUE), NA) tim <- matrix(0.0, 15L, 3L) dimnames(tim) <- list( c("unique","unique.64","hashmapuni","hashuni","hashunikeep","sortuni","sortunikeep","orderuni","orderunikeep","hashdup","sortorderdup" ,"hash.cache","sort.cache","sortorder.cache","order.cache") , c("prep","both","use")) tim["unique","both"] <- timefun({ p <- unique(x) })[3L] x <- as.integer64(x) p <- as.integer64(p) if (uniorder=="values") ramsort(p, na.last=FALSE) tim["unique.64","both"] <- timefun({ p2 <- unique(x, order=uniorder) })[3L] if (uniorder!="any") stopifnot(identical.integer64(p2, p)) tim["hashmapuni","both"] <- timefun({ p2 <- hashmapuni(x) })[3L] if (uniorder=="original") stopifnot(identical.integer64(p2, p)) tim["hashuni","prep"] <- timefun({ h <- hashmap(x) # for(r in 1:r)h <- hashmap(x, nunique=h$nunique) })[3L] tim["hashuni","use"] <- timefun({ p2 <- hashuni(h) })[3L] if (uniorder=="values") stopifnot(identical.integer64(sort(p2, na.last=FALSE), p)) tim["hashunikeep","prep"] <- tim["hashuni","prep"] tim["hashunikeep","use"] <- timefun({ p2 <- hashuni(h, keep.order=TRUE) })[3L] if (uniorder=="original") stopifnot(identical.integer64(p2, p)) tim["sortuni","prep"] <- timefun({ s <- clone(x) ramsort(s, na.last=FALSE) nunique <- sortnut(s)[1L] })[3L] tim["sortuni","use"] <- timefun({ p2 <- sortuni(s, nunique) })[3L] if (uniorder=="values") stopifnot(identical.integer64(sort(p2, na.last=FALSE), p)) tim["sortunikeep","prep"] <- timefun({ s <- clone(x) o <- seq_along(x) ramsortorder(s, o, na.last=FALSE) nunique <- sortnut(s)[1L] })[3L] tim["sortunikeep","use"] <- timefun({ p2 <- sortorderuni(x, s, o, nunique) })[3L] if (uniorder=="original") stopifnot(identical.integer64(p2, p)) tim["orderuni","prep"] <- timefun({ o <- seq_along(x) ramorder(x, o, na.last=FALSE) nunique <- ordernut(x,o)[1L] })[3L] tim["orderuni","use"] <- timefun({ p2 <- orderuni(x, o, nunique) })[3L] if (uniorder=="values") stopifnot(identical.integer64(sort(p2, na.last=FALSE), p)) tim["orderunikeep","prep"] <- tim["orderuni","prep"] tim["orderunikeep","use"] <- timefun({ p2 <- orderuni(x, o, nunique, keep.order=TRUE) nunique <- ordernut(x,o)[1L] })[3L] if (uniorder=="original") stopifnot(identical.integer64(p2, p)) tim["hashdup","prep"] <- tim["hashuni","prep"] tim["hashdup","use"] <- timefun({ p2 <- x[!hashdup(h)] })[3L] if (uniorder=="original") stopifnot(identical.integer64(p2, p)) tim["sortorderdup","prep"] <- tim["sortunikeep","prep"] tim["sortorderdup","use"] <- timefun({ p2 <- x[!sortorderdup(s, o)] })[3L] if (uniorder=="original") stopifnot(identical.integer64(p2, p)) hashcache(x) tim["hash.cache","use"] <- timefun({ p2 <- unique(x, order=uniorder) })[3L] if (uniorder!="any") stopifnot(identical.integer64(p2, p)) remcache(x) sortcache(x) tim["sort.cache","use"] <- timefun({ p2 <- unique(x, order=uniorder) })[3L] if (uniorder!="any") stopifnot(identical.integer64(p2, p)) remcache(x) sortordercache(x) tim["sortorder.cache","use"] <- timefun({ p2 <- unique(x, order=uniorder) })[3L] if (uniorder!="any") stopifnot(identical.integer64(p2, p)) remcache(x) ordercache(x) tim["order.cache","use"] <- timefun({ p2 <- unique(x, order=uniorder) })[3L] if (uniorder!="any") stopifnot(identical.integer64(p2, p)) remcache(x) if (plot){ barplot(t(tim), cex.names=0.7) title(paste0("unique(",n,", order=",uniorder,")")) } ret[["unique",as.character(n)]] <- tim } } if ("unipos" %in% what){ message("unipos: timings of different methods") N <- c(nsmall, nbig) for (i in seq_along(N)){ n <- N[i] x <- c(sample(n, n-1L, TRUE), NA) tim <- matrix(0.0, 14L, 3L) dimnames(tim) <- list( c("unique","unipos.64","hashmapupo","hashupo","hashupokeep","sortorderupo","sortorderupokeep","orderupo","orderupokeep","hashdup","sortorderdup" ,"hash.cache","sortorder.cache","order.cache") , c("prep","both","use")) tim["unique","both"] <- timefun({ unique(x) })[3L] x <- as.integer64(x) tim["unipos.64","both"] <- timefun({ p <- unipos(x, order=uniorder) })[3L] tim["hashmapupo","both"] <- timefun({ p2 <- hashmapupo(x) })[3L] if (uniorder=="original") stopifnot(identical(p2, p)) tim["hashupo","prep"] <- timefun({ h <- hashmap(x) # if nunique is small we could re-build the hashmap at a smaller size # h <- hashmap(x, nunique=h$nunique) })[3L] tim["hashupo","use"] <- timefun({ p2 <- hashupo(h) })[3L] if (uniorder=="values") stopifnot(identical(sort(p2, na.last=FALSE), sort(p, na.last=FALSE))) tim["hashupokeep","prep"] <- tim["hashupo","prep"] tim["hashupokeep","use"] <- timefun({ p2 <- hashupo(h, keep.order=TRUE) })[3L] if (uniorder=="original") stopifnot(identical(p2, p)) tim["sortorderupo","prep"] <- timefun({ s <- clone(x) o <- seq_along(x) ramsortorder(s, o, na.last=FALSE) nunique <- sortnut(s)[1L] })[3L] tim["sortorderupo","use"] <- timefun({ p2 <- sortorderupo(s, o, nunique) })[3L] if (uniorder=="values") stopifnot(identical(p2, p)) tim["sortorderupokeep","prep"] <- timefun({ s <- clone(x) o <- seq_along(x) ramsortorder(s, o, na.last=FALSE) nunique <- sortnut(s)[1L] })[3L] tim["sortorderupokeep","use"] <- timefun({ p2 <- sortorderupo(s, o, nunique, keep.order=TRUE) })[3L] if (uniorder=="original") stopifnot(identical(p2, p)) tim["orderupo","prep"] <- timefun({ o <- seq_along(x) ramorder(x, o, na.last=FALSE) nunique <- ordernut(x,o)[1L] })[3L] tim["orderupo","use"] <- timefun({ p2 <- orderupo(x, o, nunique) })[3L] if (uniorder=="values") stopifnot(identical(p2, p)) tim["orderupokeep","prep"] <- tim["orderupo","prep"] tim["orderupokeep","use"] <- timefun({ p2 <- orderupo(x, o, nunique, keep.order=TRUE) nunique <- ordernut(x,o)[1L] })[3L] if (uniorder=="original") stopifnot(identical(p2, p)) tim["hashdup","prep"] <- tim["hashupo","prep"] tim["hashdup","use"] <- timefun({ p2 <- (1:n)[!hashdup(h)] })[3L] if (uniorder=="original") stopifnot(identical(p2, p)) tim["sortorderdup","prep"] <- tim["sortorderupokeep","prep"] tim["sortorderdup","use"] <- timefun({ p2 <- (1:n)[!sortorderdup(s, o)] })[3L] if (uniorder=="original") stopifnot(identical(p2, p)) hashcache(x) tim["hash.cache","use"] <- timefun({ p2 <- unipos(x, order=uniorder) })[3L] if (uniorder!="any") stopifnot(identical(p2, p)) remcache(x) sortordercache(x) tim["sortorder.cache","use"] <- timefun({ p2 <- unipos(x, order=uniorder) })[3L] if (uniorder!="any") stopifnot(identical(p2, p)) remcache(x) ordercache(x) tim["order.cache","use"] <- timefun({ p2 <- unipos(x, order=uniorder) })[3L] if (uniorder!="any") stopifnot(identical(p2, p)) remcache(x) if (plot){ barplot(t(tim), cex.names=0.7) title(paste0("unipos(",n,", order=",uniorder,")")) } ret[["unipos",as.character(n)]] <- tim } } if ("table" %in% what){ message("table: timings of different methods") N <- c(nsmall, nbig) for (i in seq_along(N)){ n <- N[i] x <- c(sample.int(1024L, n-1L, replace=TRUE), NA) tim <- matrix(0.0, 13L, 3L) dimnames(tim) <- list(c("tabulate","table","table.64","hashmaptab","hashtab","hashtab2","sorttab","sortordertab","ordertab","ordertabkeep" ,"hash.cache","sort.cache","order.cache") , c("prep","both","use")) tim["tabulate","both"] <- timefun({ tabulate(x) })[3L] tim["table","both"] <- timefun({ p <- table(x, exclude=NULL) })[3L] p <- p[-length(p)] x <- as.integer64(x) tim["table.64","both"] <- timefun({ p2 <- table.integer64(x, order=taborder) })[3L] p2 <- p2[-1L] stopifnot(identical(p2, p)) tim["hashmaptab","both"] <- timefun({ p <- hashmaptab(x) })[3L] tim["hashtab","prep"] <- timefun({ h <- hashmap(x) })[3L] tim["hashtab","use"] <- timefun({ p2 <- hashtab(h) })[3L] stopifnot(identical(p2, p)) tim["hashtab2","prep"] <- tim["hashtab","prep"] + timefun({ h <- hashmap(x, nunique=h$nunique) })[3L] tim["hashtab2","use"] <- timefun({ p2 <- hashtab(h) })[3L] sortp <- function(p){ s <- p$values o <- seq_along(s) ramsortorder(s,o, na.last=FALSE) list(values=s, counts=p$counts[o]) } p <- sortp(p) p2 <- sortp(p2) stopifnot(identical(p2, p)) tim["sorttab","prep"] <- timefun({ s <- clone(x) ramsort(s, na.last=FALSE) nunique <- sortnut(s)[1L] })[3L] tim["sorttab","use"] <- timefun({ p2 <- list(values=sortuni(s, nunique), counts=sorttab(s, nunique)) })[3L] stopifnot(identical(p2, p)) tim["sortordertab","prep"] <- timefun({ s <- clone(x) o <- seq_along(x) ramsortorder(s, o, na.last=FALSE) nunique <- sortnut(s)[1L] })[3L] tim["sortordertab","use"] <- timefun({ p2 <- list(values=sortorderuni(x, s, o, nunique), counts=sortordertab(s, o)) })[3L] p2 <- sortp(p2) stopifnot(identical(p2, p)) tim["ordertab","prep"] <- timefun({ o <- seq_along(x) ramorder(x, o, na.last=FALSE) nunique <- ordernut(x, o)[1L] })[3L] tim["ordertab","use"] <- timefun({ p2 <- list(values=orderuni(x, o, nunique), counts=ordertab(x, o, nunique)) })[3L] stopifnot(identical(p2, p)) tim["ordertabkeep","prep"] <- tim["ordertab","prep"] tim["ordertabkeep","use"] <- timefun({ p2 <- list(values=orderuni(x, o, nunique, keep.order=TRUE), counts=ordertab(x, o, nunique, keep.order=TRUE)) })[3L] p2 <- sortp(p2) stopifnot(identical(p2, p)) hashcache(x) tim["hash.cache","use"] <- timefun({ p <- table.integer64(x, order=taborder) })[3L] remcache(x) sortordercache(x) tim["sort.cache","use"] <- timefun({ p2 <- table.integer64(x, order=taborder) })[3L] stopifnot(identical(p2, p)) remcache(x) ordercache(x) tim["order.cache","use"] <- timefun({ p2 <- table.integer64(x, order=taborder) })[3L] stopifnot(identical(p2, p)) remcache(x) if (plot){ barplot(t(tim), cex.names=0.7) title(paste0("table.integer64(",n,", order=",taborder,")")) } ret[["table",as.character(n)]] <- tim } } if ("rank" %in% what){ message("rank: timings of different methods") N <- c(nsmall, nbig) for (i in seq_along(N)){ n <- N[i] x <- c(sample(n, n-1L, TRUE), NA) tim <- matrix(0.0, 7L, 3L) dimnames(tim) <- list(c("rank","rank.keep","rank.64","sortorderrnk","orderrnk" ,"sort.cache","order.cache") , c("prep","both","use")) tim["rank","both"] <- timefun({ rank(x) })[3L] tim["rank.keep","both"] <- timefun({ p <- rank(x, na.last="keep") })[3L] x <- as.integer64(x) tim["rank.64","both"] <- timefun({ p2 <- rank.integer64(x) })[3L] stopifnot(identical(p2, p)) tim["sortorderrnk","prep"] <- timefun({ s <- clone(x) o <- seq_along(x) na.count <- ramsortorder(s, o, na.last=FALSE) })[3L] tim["sortorderrnk","use"] <- timefun({ p2 <- sortorderrnk(s, o, na.count) })[3L] stopifnot(identical(p2, p)) tim["orderrnk","prep"] <- timefun({ o <- seq_along(x) na.count <- ramorder(x, o, na.last=FALSE) })[3L] tim["orderrnk","use"] <- timefun({ p2 <- orderrnk(x, o, na.count) })[3L] stopifnot(identical(p2, p)) sortordercache(x) tim["sort.cache","use"] <- timefun({ p2 <- rank.integer64(x) })[3L] stopifnot(identical(p2, p)) remcache(x) ordercache(x) tim["order.cache","use"] <- timefun({ p2 <- rank.integer64(x) })[3L] stopifnot(identical(p2, p)) remcache(x) if (plot){ barplot(t(tim), cex.names=0.7) title(paste0("rank.integer64(",n,")")) } ret[["rank",as.character(n)]] <- tim } } if ("quantile" %in% what){ message("quantile: timings of different methods") N <- c(nsmall, nbig) for (i in seq_along(N)){ n <- N[i] x <- c(sample(n, n-1L, TRUE), NA) tim <- matrix(0.0, 6L, 3L) dimnames(tim) <- list(c("quantile","quantile.64","sortqtl","orderqtl" ,"sort.cache","order.cache") , c("prep","both","use")) tim["quantile","both"] <- timefun({ p <- quantile(x, type=1L, na.rm=TRUE) })[3L] p2 <- p p <- as.integer64(p2) names(p) <- names(p2) x <- as.integer64(x) tim["quantile.64","both"] <- timefun({ p2 <- quantile(x, na.rm=TRUE) })[3L] stopifnot(identical(p2, p)) tim["sortqtl","prep"] <- timefun({ s <- clone(x) na.count <- ramsort(s, na.last=FALSE) })[3L] tim["sortqtl","use"] <- timefun({ p2 <- sortqtl(s, na.count, seq(0.0, 1.0, 0.25)) })[3L] stopifnot(identical(unname(p2), unname(p))) tim["orderqtl","prep"] <- timefun({ o <- seq_along(x) na.count <- ramorder(x, o, na.last=FALSE) })[3L] tim["orderqtl","use"] <- timefun({ p2 <- orderqtl(x, o, na.count, seq(0.0, 1.0, 0.25)) })[3L] stopifnot(identical(unname(p2), unname(p))) sortordercache(x) tim["sort.cache","use"] <- timefun({ p2 <- quantile(x, na.rm=TRUE) })[3L] stopifnot(identical(p2, p)) remcache(x) ordercache(x) tim["order.cache","use"] <- timefun({ p2 <- quantile(x, na.rm=TRUE) })[3L] stopifnot(identical(p2, p)) remcache(x) if (plot){ barplot(t(tim), cex.names=0.7) title(paste0("quantile(",n,")")) } ret[["quantile",as.character(n)]] <- tim } } ret } # nocov end #' 64-bit integer matching #' #' `match` returns a vector of the positions of (first) matches of its first #' argument in its second. #' `%in%` is a more intuitive interface as a binary operator, which returns a #' logical vector indicating if there is a match or not for its left operand. #' #' @param x integer64 vector: the values to be matched, optionally carrying a #' cache created with [hashcache()] #' @param table integer64 vector: the values to be matched against, optionally #' carrying a cache created with [hashcache()] or [sortordercache()] #' @param nomatch the value to be returned in the case when no match is found. #' Note that it is coerced to integer. #' @param nunique NULL or the number of unique values of table (including NA). #' Providing `nunique` can speed-up matching when `table` has no cache. Note #' that a wrong nunique can cause undefined behaviour up to a crash. #' @param method NULL for automatic method selection or a suitable low-level #' method, see details #' @param ... ignored #' #' @details #' These functions automatically choose from several low-level functions #' considering the size of `x` and `table` and the availability of caches. #' #' Suitable methods for `%in%.integer64` are #' - [`hashpos`] (hash table lookup) #' - [`hashrev`] (reverse lookup) #' - [`sortorderpos`] (fast ordering) #' - [`orderpos`] (memory saving ordering). #' #' Suitable methods for `match.integer64` are #' - [`hashfin`] (hash table lookup) #' - [`hashrin`] (reverse lookup) #' - [`sortfin`] (fast sorting) #' - [`orderfin`] (memory saving ordering). #' #' @return #' A vector of the same length as `x`. #' #' `match`: An integer vector giving the position in `table` of #' the first match if there is a match, otherwise `nomatch`. #' #' If `x[i]` is found to equal `table[j]` then the value #' returned in the `i`-th position of the return value is `j`, #' for the smallest possible `j`. If no match is found, the value #' is `nomatch`. #' #' `%in%`: A logical vector, indicating if a match was located for #' each element of `x`: thus the values are `TRUE` or #' `FALSE` and never `NA`. #' #' @seealso [match()] #' @examples #' x <- as.integer64(c(NA, 0:9), 32) #' table <- as.integer64(c(1:9, NA)) #' match.integer64(x, table) #' "%in%.integer64"(x, table) #' #' x <- as.integer64(sample(c(rep(NA, 9), 0:9), 32, TRUE)) #' table <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) #' stopifnot(identical(match.integer64(x, table), match(as.integer(x), as.integer(table)))) #' stopifnot(identical("%in%.integer64"(x, table), as.integer(x) %in% as.integer(table))) #' #' \dontrun{ #' message("check when reverse hash-lookup beats standard hash-lookup") #' e <- 4:24 #' timx <- timy <- matrix(NA, length(e), length(e), dimnames=list(e,e)) #' for (iy in seq_along(e)) #' for (ix in 1:iy){ #' nx <- 2^e[ix] #' ny <- 2^e[iy] #' x <- as.integer64(sample(ny, nx, FALSE)) #' y <- as.integer64(sample(ny, ny, FALSE)) #' #hashfun(x, bits=as.integer(5)) #' timx[ix,iy] <- repeat.time({ #' hx <- hashmap(x) #' py <- hashrev(hx, y) #' })[3] #' timy[ix,iy] <- repeat.time({ #' hy <- hashmap(y) #' px <- hashpos(hy, x) #' })[3] #' #identical(px, py) #' print(round(timx[1:iy,1:iy]/timy[1:iy,1:iy], 2), na.print="") #' } #' #' message("explore best low-level method given size of x and table") #' B1 <- 1:27 #' B2 <- 1:27 #' tim <- array(NA, dim=c(length(B1), length(B2), 5) #' , dimnames=list(B1, B2, c("hashpos","hashrev","sortpos1","sortpos2","sortpos3"))) #' for (i1 in B1) #' for (i2 in B2) #' { #' b1 <- B1[i1] #' b2 <- B1[i2] #' n1 <- 2^b1 #' n2 <- 2^b2 #' x1 <- as.integer64(c(sample(n2, n1-1, TRUE), NA)) #' x2 <- as.integer64(c(sample(n2, n2-1, TRUE), NA)) #' tim[i1,i2,1] <- repeat.time({h <- hashmap(x2);hashpos(h, x1);rm(h)})[3] #' tim[i1,i2,2] <- repeat.time({h <- hashmap(x1);hashrev(h, x2);rm(h)})[3] #' s <- clone(x2); o <- seq_along(s); ramsortorder(s, o) #' tim[i1,i2,3] <- repeat.time(sortorderpos(s, o, x1, method=1))[3] #' tim[i1,i2,4] <- repeat.time(sortorderpos(s, o, x1, method=2))[3] #' tim[i1,i2,5] <- repeat.time(sortorderpos(s, o, x1, method=3))[3] #' rm(s,o) #' print(apply(tim, 1:2, function(ti)if(any(is.na(ti)))NA else which.min(ti))) #' } #' } #' @keywords manip logic #' @export match.integer64 <- function(x, table, nomatch = NA_integer_, nunique=NULL, method=NULL, ...){ stopifnot(is.integer64(x)) table <- as.integer64(table) cache_env <- cache(table) if (is.null(method)){ if (is.null(cache_env)){ nx <- length(x) if (is.null(nunique)) nunique <- length(table) btable <- as.integer(ceiling(log2(nunique*1.5))) bx <- as.integer(ceiling(log2(nx*1.5))) if (bx<=17L && btable>=16L){ method <- "hashrev" }else{ method <- "hashpos" } } else if (exists("hashmap", envir=cache_env, inherits=FALSE)) { method <- "hashpos" } else if (exists("sort", envir=cache_env, inherits=FALSE) && exists("order", envir=cache_env, inherits=FALSE) && (length(table)>length(x) || length(x)<4096L)) { method <- "sortorderpos" } else if (exists("order", envir=cache_env, inherits=FALSE) && (length(table)>length(x) || length(x)<4096L)) { method <- "orderpos" } else { nx <- length(x) if (is.null(nunique)){ if (exists("nunique", envir=cache_env, inherits=FALSE)) nunique <- cache_env$nunique else nunique <- length(table) } btable <- as.integer(ceiling(log2(nunique*1.5))) bx <- as.integer(ceiling(log2(nx*1.5))) if (bx<=17L && btable>=16L){ method <- "hashrev" } else { method <- "hashpos" } } } switch(method , hashpos={ if (is.null(cache_env) || !exists("hashmap", envir=cache_env, inherits=FALSE)){ if (exists("btable", inherits=FALSE)) h <- hashmap(table, hashbits=btable) else{ if (is.null(nunique)) nunique <- cache_env$nunique h <- hashmap(table, nunique=nunique) } }else h <- cache_env p <- hashpos(h, x, nomatch=nomatch) } , hashrev={ cache_env <- cache(x) if (is.null(cache_env) || !exists("hashmap", envir=cache_env, inherits=FALSE)){ if (exists("bx", inherits=FALSE)) h <- hashmap(x, bits=bx) else{ if (is.null(nunique)) nunique <- cache_env$nunique h <- hashmap(x, nunique=nunique) } }else h <- cache_env p <- hashrev(h, table, nomatch=nomatch) } , sortorderpos={ if (is.null(cache_env) || !exists("sort", cache_env) || !exists("order", cache_env)){ s <- clone(table) o <- seq_along(s) ramsortorder(s, o, na.last=FALSE) }else{ s <- get("sort", cache_env) o <- get("order", cache_env) } p <- sortorderpos(s, o, x, nomatch=nomatch) } , orderpos={ if (is.null(cache_env) || !exists("order", cache_env)){ o <- seq_along(s) ramorder(table, o, na.last=FALSE) }else{ o <- get("order", cache_env) } p <- orderpos(table, o, x, nomatch=nomatch) } , stop("unknown method ", method) ) p } #' @rdname match.integer64 #' @export `%in%.integer64` <- function(x, table, ...){ stopifnot(is.integer64(x)) table <- as.integer64(table) nunique <- NULL cache_env <- cache(table) if (is.null(cache_env)) { nx <- length(x) if (is.null(nunique)) nunique <- length(table) btable <- as.integer(ceiling(log2(nunique*1.5))) bx <- as.integer(ceiling(log2(nx*1.5))) if (bx<=17L && btable>=16L) { method <- "hashrin" } else { method <- "hashfin" } } else if (exists("hashmap", envir=cache_env, inherits=FALSE)) { method <- "hashfin" } else if (exists("sort", envir=cache_env, inherits=FALSE) && (length(table)>length(x) || length(x)<4096L)) { method <- "sortfin" } else if (exists("order", envir=cache_env, inherits=FALSE) && (length(table)>length(x) || length(x)<4096L)) { method <- "orderfin" } else { nx <- length(x) if (is.null(nunique)){ if (exists("nunique", envir=cache_env, inherits=FALSE)) nunique <- cache_env$nunique else nunique <- length(table) } btable <- as.integer(ceiling(log2(nunique*1.5))) bx <- as.integer(ceiling(log2(nx*1.5))) if (bx<=17L && btable>=16L) { method <- "hashrin" } else { method <- "hashfin" } } switch(method , hashfin={ if (is.null(cache_env) || !exists("hashmap", envir=cache_env, inherits=FALSE)){ if (exists("btable", inherits=FALSE)) h <- hashmap(table, hashbits=btable) else{ if (is.null(nunique)) nunique <- cache_env$nunique h <- hashmap(table, nunique=nunique) } }else h <- cache_env p <- hashfin(h, x) } , hashrin={ cache_env <- cache(x) if (is.null(cache_env) || !exists("hashmap", envir=cache_env, inherits=FALSE)){ if (exists("bx", inherits=FALSE)) h <- hashmap(x, bits=bx) else{ if (is.null(nunique)) nunique <- cache_env$nunique h <- hashmap(x, nunique=nunique) } }else h <- cache_env p <- hashrin(h, table) } , sortfin={ if (is.null(cache_env) || !exists("sort", cache_env)){ s <- clone(table) ramsort(s, na.last=FALSE) }else{ s <- get("sort", cache_env) } p <- sortfin(s, x) } , orderfin={ if (is.null(cache_env) || !exists("order", cache_env)){ o <- seq_along(s) ramorder(table, o, na.last=FALSE) }else{ o <- get("order", cache_env) } p <- orderfin(table, o, x) } , stop("unknown method ", method) ) p } #' Determine Duplicate Elements of integer64 #' #' `duplicated()` determines which elements of a vector or data frame are duplicates #' of elements with smaller subscripts, and returns a logical vector #' indicating which elements (rows) are duplicates. #' #' @param x a vector or a data frame or an array or `NULL`. #' @param incomparables ignored #' @param nunique NULL or the number of unique values (including NA). Providing #' `nunique` can speed-up matching when `x` has no cache. Note that a wrong #' `nunique` can cause undefined behaviour up to a crash. #' @param method NULL for automatic method selection or a suitable low-level #' method, see details #' @param ... ignored #' #' @details #' This function automatically chooses from several low-level functions #' considering the size of `x` and the availability of a cache. #' #' Suitable methods are #' - [`hashdup`] (hashing) #' - [`sortorderdup`] (fast ordering) #' - [`orderdup`] (memory saving ordering). #' #' @return `duplicated()`: a logical vector of the same length as `x`. #' @seealso [duplicated()], [unique.integer64()] #' @examples #' x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) #' duplicated(x) #' #' stopifnot(identical(duplicated(x), duplicated(as.integer(x)))) #' @keywords logic manip #' @export duplicated.integer64 <- function(x , incomparables = FALSE # dummy parameter , nunique = NULL , method = NULL , ... ){ stopifnot(identical(incomparables, FALSE)) cache_env <- cache(x) if (is.null(nunique) && !is.null(cache_env)) nunique <- cache_env$nunique if (is.null(method)){ if (is.null(cache_env)){ if (length(x)>50000000L) method <- "sortorderdup" # nocov. Too large for practical unit tests. else method <- "hashdup" } else if (exists("sort", envir=cache_env, inherits=FALSE) && exists("order", envir=cache_env, inherits=FALSE)) method <- "sortorderdup" else if (exists("hashmap", envir=cache_env, inherits=FALSE)) method <- "hashdup" else if (exists("order", envir=cache_env, inherits=FALSE)) method <- "orderdup" else if (length(x) > 50000000L) method <- "sortorderdup" else method <- "hashdup" } switch(method , hashdup={ if (is.null(cache_env) || !exists("hashmap", envir=cache_env, inherits=FALSE)) h <- hashmap(x, nunique=nunique) else h <- cache_env p <- hashdup(h) } , sortorderdup={ if (is.null(cache_env) || !exists("sort", cache_env, inherits=FALSE) || !exists("order", cache_env, inherits=FALSE)){ s <- clone(x) o <- seq_along(s) ramsortorder(s, o, na.last=FALSE) }else{ s <- get("sort", cache_env, inherits=FALSE) o <- get("order", cache_env, inherits=FALSE) } p <- sortorderdup(s, o) } , orderdup={ if (is.null(cache_env) || !exists("order", cache_env, inherits=FALSE)){ o <- seq_along(s) ramorder(x, o, na.last=FALSE) }else{ o <- get("order", cache_env, inherits=FALSE) } p <- orderdup(x, o) } , stop("unknown method ", method) ) p } #' Extract Unique Elements from integer64 #' #' `unique` returns a vector like `x` but with duplicate elements/rows removed. #' #' @param x a vector or a data frame or an array or `NULL`. #' @param incomparables ignored #' @param order The order in which unique values will be returned, see details #' @param nunique NULL or the number of unique values (including NA). Providing #' `nunique` can speed-up matching when `x` has no cache. Note that a wrong #' `nunique`` can cause undefined behaviour up to a crash. #' @param method NULL for automatic method selection or a suitable low-level #' method, see details #' @param ... ignored #' #' @details #' This function automatically chooses from several low-level functions #' considering the size of `x` and the availability of a cache. #' #' Suitable methods are #' - [`hashmapuni`] (simultaneously creating and using a hashmap) #' - [`hashuni`] (first creating a hashmap then using it) #' - [`sortuni`] (fast sorting for sorted order only) #' - [`sortorderuni`] (fast ordering for original order only) #' - [`orderuni`] (memory saving ordering). #' #' The default `order="original"` returns unique values in the order of the #' first appearance in `x` like in [unique()], this costs extra processing. #' `order="values"` returns unique values in sorted order like in [table()], #' this costs extra processing with the hash methods but comes for free. #' `order="any"` returns unique values in undefined order, possibly faster. #' For hash methods this will be a quasi random order, for sort methods this #' will be sorted order. #' #' @return For a vector, an object of the same type of `x`, but with only #' one copy of each duplicated element. No attributes are copied (so #' the result has no names). #' #' @seealso [unique()] for the generic, [unipos()] which gives the indices #' of the unique elements and [table.integer64()] which gives frequencies #' of the unique elements. #' #' @examples #' x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) #' unique(x) #' unique(x, order="values") #' #' stopifnot(identical(unique(x), x[!duplicated(x)])) #' stopifnot(identical(unique(x), as.integer64(unique(as.integer(x))))) #' stopifnot(identical(unique(x, order="values") #' , as.integer64(sort(unique(as.integer(x)), na.last=FALSE)))) #' #' @keywords manip logic #' @export unique.integer64 <- function(x , incomparables = FALSE # dummy parameter , order = c("original","values","any") , nunique = NULL , method = NULL , ... ){ stopifnot(identical(incomparables, FALSE)) order <- match.arg(order) cache_env <- cache(x) keep.order <- order == "original" if (is.null(nunique) && !is.null(cache_env)) nunique <- cache_env$nunique if (is.null(method)){ if (is.null(cache_env)){ if (order=="values") method <- "sortuni" else method <- "hashmapuni" }else{ switch(order , original = { if (exists("hashmap", envir=cache_env, inherits=FALSE)) method <- "hashuni" else if (exists("order", envir=cache_env, inherits=FALSE)){ if (exists("sort", envir=cache_env, inherits=FALSE)) method <- "sortorderuni" else method <- "orderuni" }else method <- "hashmapuni" } , values = { if (exists("sort", envir=cache_env, inherits=FALSE)) method <- "sortuni" else if (exists("order", envir=cache_env, inherits=FALSE)) method <- "orderuni" else if (exists("hashmap", envir=cache_env, inherits=FALSE) && cache_env$nunique= 2^63 hypothetical combinations") dims[[i]] <- sortuni(s, nu[[i]]) if (i==1L) x <- sortorderkey(s,o) - 1L else x <- x + d[[i]] * (sortorderkey(s,o) - 1L) } } cache_env <- cache(x) if (is.null(nunique) && !is.null(cache_env)) nunique <- cache_env$nunique if (is.null(method)){ if (is.null(cache_env)){ if (order=="values" && (is.null(nunique) || nunique>65536L)) method <- "sorttab" else method <- "hashmaptab" }else{ # nolint next: unnecessary_nesting_linter. Good parallelism. if (order=="values"){ if (exists("sort", envir=cache_env, inherits=FALSE)) method <- "sorttab" else if (exists("hashmap", envir=cache_env, inherits=FALSE) && cache_env$nunique1.0)) stop("p outside [0,1]") cache_env <- cache(x) if (is.null(method)){ if (is.null(cache_env)) method <- "sortqtl" else if (exists("sort", envir=cache_env, inherits=FALSE)) method <- "sortqtl" else if (exists("order", envir=cache_env, inherits=FALSE)) method <- "orderqtl" else method <- "sortqtl" } switch(method , sortqtl={ if (is.null(cache_env) || !exists("sort", cache_env, inherits=FALSE)){ s <- clone(x) na.count <- ramsort(s, na.last=FALSE) }else{ s <- get("sort", cache_env, inherits=FALSE) na.count <- get("na.count", cache_env, inherits=FALSE) } qs <- sortqtl(s, na.count, probs) } , orderqtl={ if (is.null(cache_env) || !exists("order", cache_env, inherits=FALSE)){ o <- seq_along(x) na.count <- ramorder(x, o, na.last=FALSE) }else{ o <- get("order", cache_env, inherits=FALSE) na.count <- get("na.count", cache_env, inherits=FALSE) } qs <- orderqtl(x, o, na.count, probs) } , stop("unknown method ", method) ) if (names){ np <- length(probs) dig <- max(2L, getOption("digits")) names(qs) <- paste0(if (np < 100L) formatC(100.0 * probs, format = "fg", width = 1L, digits = dig) else format(100.0 * probs, trim = TRUE, digits = dig), "%") } qs } #' @rdname qtile #' @param type an integer selecting the quantile algorithm, currently only #' 0 is supported, see details #' @param na.rm logical; if `TRUE`, any `NA` and `NaN`'s are removed from #' `x` before the quantiles are computed. #' @export quantile.integer64 <- function(x, probs = seq(0.0, 1.0, 0.25), na.rm = FALSE, names = TRUE, type=0L, ...) { if (type[[1L]]!=0L) stop("only type==0 ('qtile') supported") if (!na.rm && na.count(x)>0L) stop("missing values not allowed with 'na.rm='==FALSE") qtile.integer64(x, probs = probs, na.rm = na.rm, names = names, ...) } # TODO(R>=3.4.0): Drop this branch when median always gets '...' # adding ... (wish of Kurt Hornik 23.3.2017) if (is.na(match("...", names(formals(median))))){ # nocov start. Only run on old R. median_i64_impl_ <- function(x, na.rm=FALSE){ if (!na.rm && na.count(x)>0L) stop("missing values not allowed with 'na.rm='==FALSE") qtile.integer64(x, probs = 0.5, na.rm = na.rm, names = FALSE) } # nocov end. }else{ median_i64_impl_ <- function(x, na.rm=FALSE, ...){ if (!na.rm && na.count(x)>0L) stop("missing values not allowed with 'na.rm='==FALSE") qtile.integer64(x, probs = 0.5, na.rm = na.rm, names = FALSE) } } #' @rdname qtile #' @export median.integer64 <- median_i64_impl_ # mean.integer64 <- function(x, na.rm=FALSE){ # s <- sum(x, na.rm=na.rm) # if (!is.na(s)){ # if (na.rm) # s <- s%/%(length(x)-na.count(x)) # else # s <- s%/%length(x) # } # s # } #' @rdname qtile #' @export mean.integer64 <- function(x, na.rm=FALSE, ...) { ret <- .Call(C_mean_integer64, x, as.logical(na.rm), double(1L)) oldClass(ret) <- "integer64" ret } #' @rdname qtile #' @param object a integer64 vector #' @export summary.integer64 <- function (object, ...) { nas <- na.count(object) qq <- quantile(object, na.rm=TRUE) qq <- c(qq[1L:3L], mean(object, na.rm=TRUE), qq[4L:5L]) names(qq) <- c("Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.") if (any(nas)) c(qq, "NA's" = nas) else qq } bit64/R/hash64.R0000644000176200001440000004255214705122715012637 0ustar liggesusers# /* # R-Code for hashing # S3 atomic 64bit integers for R # (c) 2011 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2011-12-11 # Last changed: 2011-12-11 # */ #' Hashing for 64bit integers #' #' This is an explicit implementation of hash functionality that underlies #' matching and other functions in R. Explicit means that you can create, #' store and use hash functionality directly. One advantage is that you can #' re-use hashmaps, which avoid re-building hashmaps again and again. #' #' @param x an integer64 vector #' @param minfac minimum factor by which the hasmap has more elements compared to the data `x`, ignored if `hashbits` is given directly #' @param hashbits length of hashmap is `2^hashbits` #' @param cache an optional [cache()] object into which to put the hashmap (by default a new cache is created #' @param nunique giving _correct_ number of unique elements can help reducing the size of the hashmap #' @param nomatch the value to be returned if an element is not found in the hashmap #' @param keep.order determines order of results and speed: `FALSE` (the default) is faster and returns in the (pseudo)random order of the hash function, `TRUE` returns in the order of first appearance in the original data, but this requires extra work #' @param ... further arguments, passed from generics, ignored in methods #' #' @details #' | **function** | **see also** | **description** | #' |-------------:|----------------------------------------:|:----------------| #' | `hashfun` | `digest` | export of the hash function used in `hashmap` | #' | `hashmap` | [`match()`][match.integer64] | return hashmap | #' | `hashpos` | [`match()`][match.integer64] | return positions of `x` in `hashmap` | #' | `hashrev` | [`match()`][match.integer64] | return positions of `hashmap` in `x` | #' | `hashfin` | [`%in%.integer64`] | return logical whether `x` is in `hashmap` | #' | `hashrin` | [`%in%.integer64`] | return logical whether `hashmap` is in `x` | #' | `hashdup` | [`duplicated()`][duplicated.integer64] | return logical whether hashdat is duplicated using hashmap | #' | `hashuni` | [`unique()`][unique.integer64] | return unique values of hashmap | #' | `hashmapuni` | [`unique()`][unique.integer64] | return unique values of `x` | #' | `hashupo` | [`unique()`][unique.integer64] | return positions of unique values in hashdat | #' | `hashmapupo` | [`unique()`][unique.integer64] | return positions of unique values in `x` | #' | `hashtab` | [`table()`][table.integer64] | tabulate values of hashdat using hashmap in `keep.order=FALSE` | #' | `hashmaptab` | [`table()`][table.integer64] | tabulate values of `x` building hasmap on the fly in `keep.order=FALSE` | #' #' @return See Details #' @keywords programming manip #' @seealso [`match()`][match.integer64], [runif64()] #' @examples #' x <- as.integer64(sample(c(NA, 0:9))) #' y <- as.integer64(sample(c(NA, 1:9), 10, TRUE)) #' hashfun(y) #' hx <- hashmap(x) #' hy <- hashmap(y) #' ls(hy) #' hashpos(hy, x) #' hashrev(hx, y) #' hashfin(hy, x) #' hashrin(hx, y) #' hashdup(hy) #' hashuni(hy) #' hashuni(hy, keep.order=TRUE) #' hashmapuni(y) #' hashupo(hy) #' hashupo(hy, keep.order=TRUE) #' hashmapupo(y) #' hashtab(hy) #' hashmaptab(y) #' #' stopifnot(identical(match(as.integer(x),as.integer(y)),hashpos(hy, x))) #' stopifnot(identical(match(as.integer(x),as.integer(y)),hashrev(hx, y))) #' stopifnot(identical(as.integer(x) %in% as.integer(y), hashfin(hy, x))) #' stopifnot(identical(as.integer(x) %in% as.integer(y), hashrin(hx, y))) #' stopifnot(identical(duplicated(as.integer(y)), hashdup(hy))) #' stopifnot(identical(as.integer64(unique(as.integer(y))), hashuni(hy, keep.order=TRUE))) #' stopifnot(identical(sort(hashuni(hy, keep.order=FALSE)), sort(hashuni(hy, keep.order=TRUE)))) #' stopifnot(identical(y[hashupo(hy, keep.order=FALSE)], hashuni(hy, keep.order=FALSE))) #' stopifnot(identical(y[hashupo(hy, keep.order=TRUE)], hashuni(hy, keep.order=TRUE))) #' stopifnot(identical(hashpos(hy, hashuni(hy, keep.order=TRUE)), hashupo(hy, keep.order=TRUE))) #' stopifnot(identical(hashpos(hy, hashuni(hy, keep.order=FALSE)), hashupo(hy, keep.order=FALSE))) #' stopifnot(identical(hashuni(hy, keep.order=FALSE), hashtab(hy)$values)) #' stopifnot(identical(as.vector(table(as.integer(y), useNA="ifany")) #' , hashtab(hy)$counts[order.integer64(hashtab(hy)$values)])) #' stopifnot(identical(hashuni(hy, keep.order=TRUE), hashmapuni(y))) #' stopifnot(identical(hashupo(hy, keep.order=TRUE), hashmapupo(y))) #' stopifnot(identical(hashtab(hy), hashmaptab(y))) #' #' \dontrun{ #' message("explore speed given size of the hasmap in 2^hashbits and size of the data") #' message("more hashbits means more random access and less collisions") #' message("i.e. more data means less random access and more collisions") #' bits <- 24 #' b <- seq(-1, 0, 0.1) #' tim <- matrix(NA, length(b), 2, dimnames=list(b, c("bits","bits+1"))) #' for (i in 1:length(b)){ #' n <- as.integer(2^(bits+b[i])) #' x <- as.integer64(sample(n)) #' tim[i,1] <- repeat.time(hashmap(x, hashbits=bits))[3] #' tim[i,2] <- repeat.time(hashmap(x, hashbits=bits+1))[3] #' print(tim) #' matplot(b, tim) #' } #' message("we conclude that n*sqrt(2) is enough to avoid collisions") #' } #' @name hashmap NULL #' @rdname hashmap #' @export hashfun <- function(x, ...) UseMethod("hashfun") #' @rdname hashmap #' @export hashfun.integer64 <- function(x, minfac=1.41, hashbits=NULL, ...) { n <- length(x) if (is.null(hashbits)){ minlen <- ceiling(n*minfac) if (minlen > 0L) hashbits <- as.integer(ceiling(log2(minlen))) else hashbits <- 0L }else hashbits <- as.integer(hashbits) .Call(C_hashfun_integer64, x, hashbits, integer(n), PACKAGE = "bit64") } #' @rdname hashmap #' @export hashmap <- function(x, ...) UseMethod("hashmap") #' @rdname hashmap #' @export hashmap.integer64 <- function(x, nunique=NULL, minfac=1.41, hashbits=NULL, cache=NULL, ...) { if (is.null(nunique)){ nunique <- integer(1L) n <- length(x) }else{ nunique <- as.integer(nunique) n <- nunique } if (is.null(hashbits)){ minlen <- ceiling(n*minfac) if (minlen > 0L) hashbits <- as.integer(ceiling(log2(minlen))) else hashbits <- 0L }else hashbits <- as.integer(hashbits) nhash <- as.integer(2L^hashbits) hashmap <- integer(nhash) .Call(C_hashmap_integer64, x, hashbits, hashmap, nunique, PACKAGE = "bit64") if (is.null(cache)) cache <- newcache(x) else if (!bit::still.identical(x, get("x", envir=cache, inherits=FALSE))) stop("vector 'x' dissociated from cache") assign("hashmap", hashmap, envir=cache) assign("hashbits", hashbits, envir=cache) assign("nhash", nhash, envir=cache) assign("nunique", nunique, envir=cache) cache } #' @rdname hashmap #' @export hashpos <- function(cache, ...) UseMethod("hashpos") #' @rdname hashmap #' @export hashpos.cache_integer64 <- function(cache, x, nomatch = NA_integer_, ...) { hashbits <- get("hashbits", envir=cache, inherits=FALSE) hashmap <- get("hashmap", envir=cache, inherits=FALSE) hashdat <- get("x", envir=cache, inherits=FALSE) .Call(C_hashpos_integer64, as.integer64(x), hashdat, hashbits, hashmap, as.integer(nomatch), integer(length(x)), PACKAGE = "bit64") } #' @rdname hashmap #' @export hashrev <- function(cache, ...) UseMethod("hashrev") #' @rdname hashmap #' @export hashrev.cache_integer64 <- function(cache, x, nomatch = NA_integer_, ...) { hashbits <- get("hashbits", envir=cache, inherits=FALSE) hashmap <- get("hashmap", envir=cache, inherits=FALSE) hashdat <- get("x", envir=cache, inherits=FALSE) nunique <- get("nunique", envir=cache, inherits=FALSE) .Call(C_hashrev_integer64, as.integer64(x), hashdat, hashbits, hashmap, nunique, as.integer(nomatch), integer(length(hashdat)), PACKAGE = "bit64") } #' @rdname hashmap #' @export hashfin <- function(cache, ...) UseMethod("hashfin") #' @rdname hashmap #' @export hashfin.cache_integer64 <- function(cache, x, ...) { hashbits <- get("hashbits", envir=cache, inherits=FALSE) hashmap <- get("hashmap", envir=cache, inherits=FALSE) hashdat <- get("x", envir=cache, inherits=FALSE) .Call(C_hashfin_integer64, as.integer64(x), hashdat, hashbits, hashmap, logical(length(x)), PACKAGE = "bit64") } #' @rdname hashmap #' @export hashrin <- function(cache, ...) UseMethod("hashrin") #' @rdname hashmap #' @export hashrin.cache_integer64 <- function(cache, x, ...) { hashbits <- get("hashbits", envir=cache, inherits=FALSE) hashmap <- get("hashmap", envir=cache, inherits=FALSE) hashdat <- get("x", envir=cache, inherits=FALSE) .Call(C_hashrin_integer64, as.integer64(x), hashdat, hashbits, hashmap, nunique, logical(length(hashdat)), PACKAGE = "bit64") } #' @rdname hashmap #' @export hashdup <- function(cache, ...) UseMethod("hashdup") #' @rdname hashmap #' @export hashdup.cache_integer64 <- function(cache, ...) { hashbits <- get("hashbits", envir=cache, inherits=FALSE) hashmap <- get("hashmap", envir=cache, inherits=FALSE) hashdat <- get("x", envir=cache, inherits=FALSE) nunique <- get("nunique", envir=cache, inherits=FALSE) .Call(C_hashdup_integer64, hashdat, hashbits, hashmap, nunique, logical(length(hashdat)), PACKAGE = "bit64") } #' @rdname hashmap #' @export hashuni <- function(cache, ...) UseMethod("hashuni") #' @rdname hashmap #' @export hashuni.cache_integer64 <- function(cache, keep.order=FALSE, ...) { hashbits <- get("hashbits", envir=cache, inherits=FALSE) hashmap <- get("hashmap", envir=cache, inherits=FALSE) hashdat <- get("x", envir=cache, inherits=FALSE) nunique <- get("nunique", envir=cache, inherits=FALSE) ret <- .Call(C_hashuni_integer64, hashdat, hashbits, hashmap, as.logical(keep.order), double(nunique), PACKAGE = "bit64") oldClass(ret) <- "integer64" ret } #' @rdname hashmap #' @export hashupo <- function(cache, ...) UseMethod("hashupo") #' @rdname hashmap #' @export hashupo.cache_integer64 <- function(cache, keep.order=FALSE, ...) { hashbits <- get("hashbits", envir=cache, inherits=FALSE) hashmap <- get("hashmap", envir=cache, inherits=FALSE) hashdat <- get("x", envir=cache, inherits=FALSE) nunique <- get("nunique", envir=cache, inherits=FALSE) .Call(C_hashupo_integer64, hashdat, hashbits, hashmap, as.logical(keep.order), integer(nunique), PACKAGE = "bit64") } # just returns a vector of length nunique of counts of the values # at positions hashupo(, keep.order=FALSE) which are those of hashuni(, keep.order=FALSE) #' @rdname hashmap #' @export hashtab <- function(cache, ...) UseMethod("hashtab") #' @rdname hashmap #' @export hashtab.cache_integer64 <- function(cache, ...) { hashbits <- get("hashbits", envir=cache, inherits=FALSE) hashmap <- get("hashmap", envir=cache, inherits=FALSE) hashdat <- get("x", envir=cache, inherits=FALSE) nunique <- get("nunique", envir=cache, inherits=FALSE) ret <- .Call(C_hashtab_integer64, hashdat, hashbits, hashmap, nunique, PACKAGE = "bit64") attr(ret, "names") <- c("values","counts") ret } #' @rdname hashmap #' @export hashmaptab <- function(x, ...) UseMethod("hashmaptab") #' @rdname hashmap #' @export hashmaptab.integer64 <- function(x, nunique=NULL, minfac=1.5, hashbits=NULL, ...) { if (is.null(nunique)){ nunique <- integer(1L) n <- length(x) }else{ nunique <- as.integer(nunique) n <- nunique } if (is.null(hashbits)) hashbits <- as.integer(ceiling(log2(n*minfac))) else hashbits <- as.integer(hashbits) nhash <- as.integer(2L^hashbits) hashmap <- integer(nhash) ret <- .Call(C_hashmaptab_integer64, x, hashbits, hashmap, nunique, PACKAGE = "bit64") # theoretically we could use {hashmap, nunique} at this point the same way like after calling hashmap_integer64 attr(ret, "names") <- c("values","counts") ret } #' @rdname hashmap #' @export hashmapuni <- function(x, ...) UseMethod("hashmapuni") #' @rdname hashmap #' @export hashmapuni.integer64 <- function(x, nunique=NULL, minfac=1.5, hashbits=NULL, ...) { if (is.null(nunique)){ nunique <- integer(1L) n <- length(x) }else{ nunique <- as.integer(nunique) n <- nunique } if (is.null(hashbits)){ minlen <- ceiling(n*minfac) if (minlen > 0L) hashbits <- as.integer(ceiling(log2(minlen))) else hashbits <- 0L }else hashbits <- as.integer(hashbits) nhash <- as.integer(2L^hashbits) hashmap <- integer(nhash) ret <- .Call(C_hashmapuni_integer64, x, hashbits, hashmap, nunique, PACKAGE = "bit64") # theoretically we could use {hashmap, nunique} at this point the same way like after calling hashmap_integer64 oldClass(ret) <- "integer64" ret } #' @rdname hashmap #' @export hashmapupo <- function(x, ...) UseMethod("hashmapupo") #' @rdname hashmap #' @export hashmapupo.integer64 <- function(x, nunique=NULL, minfac=1.5, hashbits=NULL, ...) { if (is.null(nunique)){ nunique <- integer(1L) n <- length(x) }else{ nunique <- as.integer(nunique) n <- nunique } if (is.null(hashbits)){ minlen <- ceiling(n*minfac) if (minlen > 0L) hashbits <- as.integer(ceiling(log2(minlen))) else hashbits <- 0L }else hashbits <- as.integer(hashbits) nhash <- as.integer(2L^hashbits) hashmap <- integer(nhash) # theoretically we could use {hashmap, nunique} at this point the same way like after calling hashmap_integer64 .Call(C_hashmapupo_integer64, x, hashbits, hashmap, nunique, PACKAGE = "bit64") } #' integer64: random numbers #' #' Create uniform random 64-bit integers within defined range #' #' @param n length of return vector #' @param min lower inclusive bound for random numbers #' @param max upper inclusive bound for random numbers #' @param replace set to FALSE for sampleing from a finite pool, see [sample()] #' #' @return a integer64 vector #' #' @details #' For each random integer we call R's internal C interface `unif_rand()` twice. #' Each call is mapped to 2^32 unsigned integers. The two 32-bit patterns are #' concatenated to form the new integer64. This process is repeated until the #' result is not a `NA_INTEGER64_`. #' @keywords classes distribution sysdata #' @seealso [runif()], [hashfun()] #' #' @examples #' runif64(12) #' runif64(12, -16, 16) #' runif64(12, 0, as.integer64(2^60)-1) # not 2^60-1 ! #' var(runif(1e4)) #' var(as.double(runif64(1e4, 0, 2^40))/2^40) # ~ = 1/12 = .08333 #' #' table(sample(16, replace=FALSE)) #' table(runif64(16, 1, 16, replace=FALSE)) #' table(sample(16, replace=TRUE)) #' table(runif64(16, 1, 16, replace=TRUE)) #' #' @export runif64 <- function(n, min=lim.integer64()[1L], max=lim.integer64()[2L], replace = TRUE){ n <- as.integer(n) min <- as.integer64(min) max <- as.integer64(max) if (replace){ ret <- .Call(C_runif_integer64, n, min, max) oldClass(ret) <- "integer64" }else{ N <- n d <- max - min + 1L if (!is.na(d) && N > d) stop("cannot take a sample larger than the population when 'replace = FALSE'") if (!is.na(d) && n > d / (2.0*log(n, 64.0))){ ret <- .Call(C_runif_integer64, as.integer(d), as.integer64(min), as.integer64(max)) oldClass(ret) <- "integer64" ret <- sample(ret, n, FALSE) }else{ ret <- integer64() while (N > 0L){ ret <- unique(c(ret, Recall( if (N*1.05 < .Machine$integer.max) N*1.05 else N , min , max , replace=TRUE ))) N <- n - length(ret) } if (N != 0L) ret <- ret[1:n] } } ret } # nocov start if (FALSE){ require(bit64) require(microbenchmark) n <- 1000000L print(microbenchmark(runif64(n, 1.0, n), times=20L)) for (m in c(1.0, 2.0, 4.0, 8.0, 16.0)){ print(microbenchmark(runif64(n, 1.0, n*m, replace=FALSE), times=20L)) print(microbenchmark(sample(n*m, n, replace=FALSE), times=20L)) } print(microbenchmark(runif64(n, 1.0, replace=FALSE), times=20L)) library(bit64) n <- 10000000L x <- as.integer64(sample(n, n, TRUE)) t1 <- system.time({h <- hashmap(x)})[3L] t2 <- system.time({value <- hashuni(h)})[3L] t3 <- system.time({count <- hashtab(h)})[3L] t4 <- system.time({ret1 <- list(values=value, counts=count)})[3L] t1+t2+t3+t4 system.time({ret2 <- hashmaptab(x)})[3L] identical(ret1,ret2) x <- as.integer64(sample(n, n, TRUE)) system.time({ ret2 <- hashmaptab(x) cv2 <- sum(ret2$counts[ret2$counts > 1.0]) })[3L] system.time({ s <- clone(x) na.count <- ramsort(s, has.na = TRUE, na.last = FALSE, decreasing = FALSE, stable = FALSE, optimize = "time") cv <- .Call(C_r_ram_integer64_sortnut, x = s, PACKAGE = "bit64")[[2L]] }) cv cv2 nunique(x) length(value) length(count) length(t1$value) length(t1$count) value t1 count s <- clone(x); o <- seq_along(x); ramsortorder(s, o) t2 <- sortordertab(s,o) length(s) length(t2) library(bit64) n <- 1000000L r <- runif64(n, lim.integer64()[1L], lim.integer64()[2L]) identical(r, as.integer64(as.bitstring(r))) cbind(r,as.integer64(as.bitstring(r))) cbind(as.bitstring(r),as.bitstring(as.integer64(as.bitstring(r)))) #sum(duplicated(r)) #table.integer64(r) #range(r) log2(abs(range(r))) x <- seq(0.0, 1.0, 0.1) y <- quantile.integer64(r, x) z <- diff(y) plot(log2(z), type="b",ylim=c(0.0, max(log2(z)))) n <- 10000000L system.time(runif(n)) system.time(runif64(n)) } # nocov end bit64/R/data.R0000644000176200001440000001254014705122715012445 0ustar liggesusers#' Results of performance measurement on a Core i7 Lenovo T410 8 GB RAM under Windows 7 64bit #' #' These are the results of calling [benchmark64()] #' #' @format #' The format is: #' #' ``` #' num [1:16, 1:6] 2.55e-05 2.37 2.39 1.28 1.39 ... #' - attr(*, "dimnames")=List of 2 #' ..$ : chr [1:16] "cache" "match(s,b)" "s %in% b" "match(b,s)" ... #' ..$ : chr [1:6] "32-bit" "64-bit" "hashcache" "sortordercache" ... #' ``` #' @usage #' data(benchmark64.data) #' #' @examples #' data(benchmark64.data) #' print(benchmark64.data) #' matplot(log2(benchmark64.data[-1,1]/benchmark64.data[-1,]) #' , pch=c("3", "6", "h", "s", "o", "a") #' , xlab="tasks [last=session]" #' , ylab="log2(relative speed) [bigger is better]" #' ) #' matplot(t(log2(benchmark64.data[-1,1]/benchmark64.data[-1,])) #' , axes=FALSE #' , type="b" #' , lwd=c(rep(1, 14), 3) #' , xlab="context" #' , ylab="log2(relative speed) [bigger is better]" #' ) #' axis(1 #' , labels=c("32-bit", "64-bit", "hash", "sortorder", "order", "hash+sortorder") #' , at=1:6 #' ) #' axis(2) #' @keywords datasets "benchmark64.data" #' Results of performance measurement on a Core i7 Lenovo T410 8 GB RAM under Windows 7 64bit #' #' These are the results of calling [optimizer64()] #' #' @format #' The format is: #' #' ``` #' List of 16 #' $ : num [1:9, 1:3] 0 0 1.63 0.00114 2.44 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:9] "match" "match.64" "hashpos" "hashrev" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:10, 1:3] 0 0 0 1.62 0.00114 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:10] "%in%" "match.64" "%in%.64" "hashfin" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:10, 1:3] 0 0 0.00105 0.00313 0.00313 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:10] "duplicated" "duplicated.64" "hashdup" "sortorderdup1" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:15, 1:3] 0 0 0 0.00104 0.00104 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:15] "unique" "unique.64" "hashmapuni" "hashuni" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:14, 1:3] 0 0 0 0.000992 0.000992 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:14] "unique" "unipos.64" "hashmapupo" "hashupo" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:13, 1:3] 0 0 0 0 0.000419 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:13] "tabulate" "table" "table.64" "hashmaptab" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:7, 1:3] 0 0 0 0.00236 0.00714 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:7] "rank" "rank.keep" "rank.64" "sortorderrnk" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:6, 1:3] 0 0 0.00189 0.00714 0 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:6] "quantile" "quantile.64" "sortqtl" "orderqtl" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:9, 1:3] 0 0 0.00105 1.17 0 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:9] "match" "match.64" "hashpos" "hashrev" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:10, 1:3] 0 0 0 0.00104 1.18 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:10] "%in%" "match.64" "%in%.64" "hashfin" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:10, 1:3] 0 0 1.64 2.48 2.48 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:10] "duplicated" "duplicated.64" "hashdup" "sortorderdup1" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:15, 1:3] 0 0 0 1.64 1.64 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:15] "unique" "unique.64" "hashmapuni" "hashuni" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:14, 1:3] 0 0 0 1.62 1.62 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:14] "unique" "unipos.64" "hashmapupo" "hashupo" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:13, 1:3] 0 0 0 0 0.32 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:13] "tabulate" "table" "table.64" "hashmaptab" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:7, 1:3] 0 0 0 2.96 10.69 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:7] "rank" "rank.keep" "rank.64" "sortorderrnk" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' $ : num [1:6, 1:3] 0 0 1.62 10.61 0 ... #' ..- attr(*, "dimnames")=List of 2 #' .. ..$ : chr [1:6] "quantile" "quantile.64" "sortqtl" "orderqtl" ... #' .. ..$ : chr [1:3] "prep" "both" "use" #' - attr(*, "dim")= int [1:2] 8 2 #' - attr(*, "dimnames")=List of 2 #' ..$ : chr [1:8] "match" "%in%" "duplicated" "unique" ... #' ..$ : chr [1:2] "65536" "33554432" #' ``` #' #' @usage #' data(optimizer64.data) #' #' @examples #' data(optimizer64.data) #' print(optimizer64.data) #' oldpar <- par(no.readonly = TRUE) #' par(mfrow=c(2,1)) #' par(cex=0.7) #' for (i in 1:nrow(optimizer64.data)){ #' for (j in 1:2){ #' tim <- optimizer64.data[[i,j]] #' barplot(t(tim)) #' if (rownames(optimizer64.data)[i]=="match") #' title(paste("match", colnames(optimizer64.data)[j], "in", colnames(optimizer64.data)[3-j])) #' else if (rownames(optimizer64.data)[i]=="%in%") #' title(paste(colnames(optimizer64.data)[j], "%in%", colnames(optimizer64.data)[3-j])) #' else #' title(paste(rownames(optimizer64.data)[i], colnames(optimizer64.data)[j])) #' } #' } #' par(mfrow=c(1,1)) #' #' @keywords datasets "optimizer64.data" bit64/NEWS0000644000176200001440000002355514742210704011714 0ustar liggesusers# bit64 4.6.0-1 ## NOTICE OF PLANNED BREAKING CHANGES 1. {bit64} exports many S3 methods directly. Calling S3 methods directly is generally bad form; we should rely on the S3 dispatch system for this. Needing to export an S3 method is usually indicative of some deep issue that's otherwise hard to work around. I plan to un-export most if not all S3 methods in future versions. In this release, there will be no change in behavior besides this notice in the NEWS. Going forward, I see two types of S3 exports: (1) exports that have no discoverable direct usage (that is, a global GitHub search, which includes the CRAN mirror, turned up _no_ R code calling them directly, except perhaps in `:::` form, which would be unaffected by un-export); and (2) exports that _are_ observed to be called directly by some number of downstreams. With the former, I am more comfortable un-exporting more aggressively; with the latter, I will take a more gradual approach. Here are the S3 methods that are currently exported, for which I found no record of them being called directly: `-.integer64`, `:.default`, `:.integer64`, `!.integer64`, `!=.integer64`, `[.integer64`, `[[.integer64`, `[[<-.integer64`, `*.integer64`, `/.integer64`, `&.integer64`, `%/%.integer64`, `%%.integer64`, `%in%.default`, `%in%.integer64`, `^.integer64`, `+.integer64`, `<.integer64`, `<=.integer64`, `==.integer64`, `>.integer64`, `>=.integer64`, `|.integer64`, `all.equal.integer64`, `as.bitstring.integer64`, `as.integer64.factor`, `as.integer64.integer64`, `as.integer64.NULL`, `as.list.integer64`, `as.logical.integer64`, `cbind.integer64`, `ceiling.integer64`, `cummax.integer64`, `cummin.integer64`, `cumprod.integer64`, `cumsum.integer64`, `diff.integer64`, `duplicated.integer64`, `floor.integer64`, `hashdup.cache_integer64`, `hashfin.cache_integer64`, `hashfun.integer64`, `hashmap.integer64`, `hashmaptab.integer64`, `hashmapuni.integer64`, `hashmapupo.integer64`, `hashpos.cache_integer64`, `hashrev.cache_integer64`, `hashrin.cache_integer64`, `hashtab.cache_integer64`, `hashuni.cache_integer64`, `hashupo.cache_integer64`, `is.double.default`, `is.double.integer64`, `is.finite.integer64`, `is.infinite.integer64`, `is.nan.integer64`, `is.sorted.integer64`, `is.vector.integer64`, `keypos.integer64`, `length<-.integer64`, `log10.integer64`, `log2.integer64`, `match.default`, `match.integer64`, `mean.integer64`, `median.integer64`, `mergeorder.integer64`, `mergesort.integer64`, `mergesortorder.integer64`, `na.count.integer64`, `nties.integer64`, `nunique.integer64`, `nvalid.integer64`, `order.default`, `order.integer64`, `orderdup.integer64`, `orderfin.integer64`, `orderkey.integer64`, `ordernut.integer64`, `orderpos.integer64`, `orderqtl.integer64`, `orderrnk.integer64`, `ordertab.integer64`, `ordertie.integer64`, `orderuni.integer64`, `orderupo.integer64`, `prank.integer64`, `print.bitstring`, `prod.integer64`, `qtile.integer64`, `quantile.integer64`, `quickorder.integer64`, `quicksort.integer64`, `quicksortorder.integer64`, `radixorder.integer64`, `radixsort.integer64`, `radixsortorder.integer64`, `ramorder.integer64`, `ramsort.integer64`, `ramsortorder.integer64`, `range.integer64`, `rank.default`, `rbind.integer64`, `round.integer64`, `scale.integer64`, `shellorder.integer64`, `shellsort.integer64`, `shellsortorder.integer64`, `sign.integer64`, `signif.integer64`, `sort.integer64`, `sortfin.integer64`, `sortnut.integer64`, `sortorderdup.integer64`, `sortorderkey.integer64`, `sortorderpos.integer64`, `sortorderrnk.integer64`, `sortordertab.integer64`, `sortordertie.integer64`, `sortorderuni.integer64`, `sortorderupo.integer64`, `sortql.integer64`, `sorttab.integer64`, `sortuni.integer64`, `sqrt.integer64`, `summary.integer64`, `table.integer64`, `tiepos.integer64`, `trunc.integer64`, `unipos.integer64` Here are the S3 methods that are currently exported for which I _do_ find record of them being called directly: `abs.integer64`, `as.character.integer64`, `as.data.frame.integer64`, `as.double.integer64`, `as.integer.integer64`, `as.integer64.bitstring`, `as.integer64.character`, `as.integer64.double`, `as.integer64.integer`, `as.integer64.logical`, `c.integer64`, `format.integer64`, `identical.integer64`, `is.na.integer64`, `lim.integer64`, `max.integer64`, `min.integer64`, `print.integer64`, `rank.integer64`, `seq.integer64`, `str.integer64`, `sum.integer64`, `unique.integer64` In the next release (provisionally, 4.7.0), I will add a `warning()` to any S3 method in the former classification, while nothing will change for the latter classification. I may reach out to authors observed to call the methods directly. In the subsequent release (provisionally, 4.8.0), I will un-export any S3 method in the former classification, and add a `warning()` to any S3 method in the latter classification. In the sub-subsequent release (provisionally, 4.9.0), I will un-export any S3 method in the latter classification. Please reach out (e.g., the GitHub log for #76) if you have any concerns about this plan. 1. {bit64} lists {bit} as `Depends:`. IMO this form of dependency should be deprecated by R now that `Imports:` is widely available and well-supported for many years. In the next release (provisionally, 4.7.0), I will move bit to Imports. The practical implication is that currently, `library(bit64)` will make {bit} objects like `is.bit()` available for use without namespace-qualification. This practice makes code harder to read and maintain. Users relying on this in scripts can (1) write `library(bit)` to attach {bit} explicitly or (2) namespace-qualify all {bit} calls with `bit::`. Package authors relying on this can (1) add `import(bit)` to make the full {bit} namespace available or (2) namespace-qualify all {bit} calls with `bit::`; adding {bit} to `Imports:` or `Suggests:` will also be necessary. I will reach out to CRAN authors with any required changes. Depending on the impact size, I might make this transition more gradual (e.g. starting by re-exporting some or all {bit} functions from {bit64}, with warning, before un-exporting them in a subsequent release). ## NEW FEATURES 1. Implemented S3 methods for `rowSums()` and `colSums()`. Importantly they handle `NA` values correctly, #38. Thanks @vlulla for the request. Note that these are implemented as wrappers to `apply()` calls, so they may not be as efficient. PRs welcome for implementing the efficient equivalents. Note that by necessity, this grows the set of base exports overwritten to include `rowSums()` and `colSums()`, which are exported as S3 generics dispatching to `base::rowSums()` and `base::colSums()` by default. 1. Partially powering this is a new `aperm()` method for integer64 which allows `apply()` to work as intended. Using `apply()` directly may still strip the integer64 class; that may be supported later (see #87). 1. `is.na()` is supported for long vector input (more than `2^31` elements), #30. Thanks @ilia-kats for the request. Long vector support will be added on an as-needed basis as I don't have a great machine for testing these features -- PRs welcome! ## BUG FIXES 1. `all.equal.integer64()` gets the same fix for vector `scale=` to work as intended that `all.equal.numeric()` got in R 4.1.3, #23. 1. Made edits to `match()` to handle `is.integer64(table)` better for older versions of R, including a new `mtfrm()` method for integer64 objects in R>=4.2.0, #85 and #111. ## NOTES 1. After creating, developing, and maintaining {bit64} for about 13 years, Jens Oehlschlägel has decided to step down as maintainer of the package. Michael Chirico will take over in this duty. Thank you Jens for creating such a wonderful & important part of the R ecosystem! I don't have any major plans for new features, and mostly hope to keep the package running and up to date. Contributors most welcome! I am also trying to freshen up the code base to make contribution easier. 1. The R version dependency has increased from 3.0.1 (May 2013) to 3.4.0 (April 2017). We plan to keep roughly the same R dependency as {data.table}, i.e., as old as possibly for as long as possible, with some bias towards gradually bringing in new R features to reduce the maintenance overhead of a growing nest of workarounds to keep the package "fresh" for users of the latest R versions. Required package {bit} already requires R 3.4.0, so the old 3.0.1 requirement was effectively impossible anyway. 1. Default packages {methods}, {stats}, and {utils} are now `Imports:`, not `Depends:`, dependencies. `Depends:` is an out-dated mode of dependency in R. This will only affect the small audience of users that run R with `R_DEFAULT_PACKAGES=NULL` (or some other subset excluding some of these three), _and_ who are relying (perhaps implicitly) on {bit64} being responsible for attaching those packages. It is my intention to move {bit} from `Depends:` to `Imports:` as well, but this migration will be done more gingerly -- it is more conceivable that this will constitute a breaking change for some use cases, therefore it will be done in phases. Nothing is done in this release, but here is your earliest warning that from the next release, it will be a warning to rely on {bit64} to attach {bit} functions for you. 1. Package documentation is now managed with {roxygen2}, #61. I tried to retain everything in the original documentation, but the diff required to do so was quite unmanageable (5,000+ lines), so please alert me if anything looks amiss. Most importantly, I ensured the NAMESPACE remains unchanged. 1. The signature of `identical.integer64()` loses `extptr.as.ref=`, which is unavailable for R<4.2.0, but gains `...` to allow this argument in newer versions, #37. This retains the transparency of having all arguments named in the signature (and thus in `?identical.integer64` as well as available for tab-completion) while also retaining the old R version dependency R 3.3.0. # bit64 NEWS for versions 0.8-3 through 4.5.2 are now in [NEWS.0](https://github.com/r-lib/bit64/blob/master/NEWS.0) bit64/data/0000755000176200001440000000000014674440514012124 5ustar liggesusersbit64/data/optimizer64.data.rda0000644000176200001440000000416114674440514015722 0ustar liggesusersY PTiPqQL$@Y#s1Qa PDvWvda]Q$L3&IcuLIjhc& %[Qy{`wPi8s{{&ƤT(B9:]#d k7@ o2fN;~$H , A^o)&Du /$fŭdH< >~\Ц L)s,bo 6gtm,<ۺu& ٧ sʚ$|/N;+i=$7nf#ѕz- =L%0mw)eh ; = s`ɆvUHK*w5Eu5~60XTѷt,$|.=U ~;VxiLak!7&7Ŝ_evc-FR0ݩR˳RDEEJlQCtI+Hԓ!TIe>xڔ{nnCD µx\<ѥ~V%jptg%A즤n fxFlX]9o0wQi.H-1_!4m mf&yӥ=N}/94L=a$R$0~C`c;5E&}T< ʏ8򕜨7`^Vǜul_OK!c>a9-yFY -Ɖug[B_m {Cn1gu_EƦ5{!ڴٽb]g_p訁$× hLxs:KfDKԓcIik;h0Nn1ySNMn)[aXh">H):FIB8wښZx%xBfJ%4}4%b=޽ⷵj[qk ; =]=vn=_Dd?X\,i|~~/Lz\J!';@[λA $z!a Ɖmz%%q&ίH"l#pnَ5P |Rkgkk C 'X-8n~¸dc0s 9{^ҟ௤ Yߪt{FNtn$lه ɰz\?F7] fd2%MǎK KhL>#OK'-%l]I> s ZoQ\ 5~(}tK*iVܤgaeṚdQ )|=uKWO}䶑Y+6 dF1/9y>Dz|J^ߋ ? hySFw 5(s+W]u"vv}[~a|}@ cQ7_9ٜ@̳G؟JJJToPIddٛ]zt s@9"L$,#~6)/@*'ȳ[hͽZ22aL0~v)҇`7` ~rlځ*& z}Ҿh|"ʓcuBèr^ìȇh!y3nŞ/ɳ}֗sON]t!\"p&e=;.%0msrђg[!G"!"XD#jz|FArm%tA^D%T-j9]m! bȇ)WU5k.D 'T͋ndF646!wdt+A#bit64/src/0000755000176200001440000000000014742210743011775 5ustar liggesusersbit64/src/hash64.c0000644000176200001440000003444414705122715013247 0ustar liggesusers/* # C-Code for hashing and matching # S3 atomic 64bit integers for R # (c) 2011 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2011-12-11 # Last changed: 2012-10-22 #*/ /* for speed (should not really matter in this case as most time is spent in the hashing) */ // #define USE_RINTERNALS 1 #include #include //#include "timing.h" // This multiplicator was used in Simon Urbanek's package fastmatch for 32-bit integers //#define HASH64(X, SHIFT) (314159265358979323ULL * ((unsigned long long)(X)) >> (SHIFT)) // This multiplicator seems to work fine with 64bit integers #define HASH64(X, SHIFT) (0x9e3779b97f4a7c13ULL * ((unsigned long long)(X)) >> (SHIFT)) SEXP hashfun_integer64(SEXP x_, SEXP bits_, SEXP ret_){ int i, n = LENGTH(x_); long long * x = (long long *) REAL(x_); unsigned int * ret = (unsigned int *) INTEGER(ret_); int shift = 64 - asInteger(bits_); for(i=0; i0){ ret[hashpos[h]-1] = FALSE; nunique--; if (nunique<1) break; } return ret_; } SEXP hashuni_integer64(SEXP hashdat_, SEXP bits_, SEXP hashpos_, SEXP keep_order_, SEXP ret_){ int h, nh = LENGTH(hashpos_); int u, nu = LENGTH(ret_); long long * hashdat = (long long *) REAL(hashdat_); unsigned int * hashpos = (unsigned int *) INTEGER(hashpos_); long long * ret = (long long *) REAL(ret_); if (asLogical(keep_order_)){ int i; // int nx = LENGTH(hashdat_); int bits = asInteger(bits_); int shift = 64 - bits; long long v; for(u=0,i=0; u0){ ret[u++] = hashdat[hashpos[h]-1]; } } return ret_; } SEXP hashmapuni_integer64(SEXP x_, SEXP bits_, SEXP hashpos_, SEXP nunique_){ int i, nx = LENGTH(x_); int h, nh = LENGTH(hashpos_); int nu = 0; SEXP ret_; PROTECT_INDEX idx; PROTECT_WITH_INDEX(ret_ = allocVector(REALSXP, nx), &idx); long long * ret = (long long *) REAL(ret_); long long * x = (long long *) REAL(x_); unsigned int * hashpos = (unsigned int *) INTEGER(hashpos_); int bits = asInteger(bits_); int shift = 64 - bits; long long v; for(i=0; i0){ ret[u++] = hashpos[h]; } } return ret_; } SEXP hashmapupo_integer64(SEXP x_, SEXP bits_, SEXP hashpos_, SEXP nunique_){ int i, nx = LENGTH(x_); int h, nh = LENGTH(hashpos_); int nu = 0; SEXP ret_; PROTECT_INDEX idx; PROTECT_WITH_INDEX(ret_ = allocVector(INTSXP, nx), &idx); int * ret = INTEGER(ret_); long long * x = (long long *) REAL(x_); unsigned int * hashpos = (unsigned int *) INTEGER(hashpos_); int bits = asInteger(bits_); int shift = 64 - bits; long long v; for(i=0; i #include #include #include #include # include "integer64.h" /*****************************************************************************/ /** **/ /** DEFINITIONS AND MACROS **/ /** **/ /*****************************************************************************/ #define mod_iterate(n1,n2,i1,i2) for (i=i1=i2=0; iimax){ ret[i] = NA_INTEGER64; naflag = TRUE; }else ret[i] = (long long) x[i]; } } if (naflag)warning(INTEGER64_OVERFLOW_WARNING); return ret_; } SEXP as_integer64_integer(SEXP x_, SEXP ret_){ long long i, n = LENGTH(x_); long long * ret = (long long *) REAL(ret_); int * x = INTEGER(x_); for (i=0; irmax) naflag = TRUE; ret[i] = (double) x[i]; } } if (naflag)warning(INTEGER64_TODOUBLE_WARNING); return ret_; } SEXP as_integer_integer64(SEXP x_, SEXP ret_){ long long i, n = LENGTH(x_); long long * x = (long long *) REAL(x_); int * ret = INTEGER(ret_); Rboolean naflag = FALSE; for (i=0; iMAX_INTEGER32){ ret[i] = NA_INTEGER; naflag = TRUE; }else ret[i] = (int) x[i]; } } if (naflag)warning(INTEGER32_OVERFLOW_WARNING); return ret_; } SEXP as_logical_integer64(SEXP x_, SEXP ret_){ long long i, n = LENGTH(x_); long long * x = (long long *) REAL(x_); int * ret = INTEGER(ret_); for (i=0; i>= 1; } *str = 0; SET_STRING_ELT(ret_, i, mkChar(buff)); R_CheckUserInterrupt(); } return ret_; } SEXP as_integer64_bitstring(SEXP x_, SEXP ret_){ Rboolean naflag = FALSE; int i, k, l, n = LENGTH(x_); long long * ret = (long long *) REAL(ret_); unsigned long long mask; long long v; const char * str; for(i=0; iBITS_INTEGER64){ ret[i] = NA_INTEGER64; naflag = TRUE; break; } mask = 1; v = 0; for (k=l-1; k>=0; k--){ if (str[k] != '0' && str[k] != ' '){ v |= mask; } mask <<= 1; } ret[i] = v; R_CheckUserInterrupt(); } if (naflag)warning(BITSTRING_OVERFLOW_WARNING); return ret_; } __attribute__((no_sanitize("signed-integer-overflow"))) SEXP plus_integer64(SEXP e1_, SEXP e2_, SEXP ret_){ long long i, n = LENGTH(ret_); long long i1, n1 = LENGTH(e1_); long long i2, n2 = LENGTH(e2_); long long * e1 = (long long *) REAL(e1_); long long * e2 = (long long *) REAL(e2_); long long * ret = (long long *) REAL(ret_); Rboolean naflag = FALSE; mod_iterate(n1, n2, i1, i2) { PLUS64(e1[i1],e2[i2],ret[i],naflag) } if (naflag)warning(INTEGER64_OVERFLOW_WARNING); return ret_; } __attribute__((no_sanitize("signed-integer-overflow"))) SEXP minus_integer64(SEXP e1_, SEXP e2_, SEXP ret_){ long long i, n = LENGTH(ret_); long long i1, n1 = LENGTH(e1_); long long i2, n2 = LENGTH(e2_); long long * e1 = (long long *) REAL(e1_); long long * e2 = (long long *) REAL(e2_); long long * ret = (long long *) REAL(ret_); Rboolean naflag = FALSE; mod_iterate(n1, n2, i1, i2) { MINUS64(e1[i1],e2[i2],ret[i],naflag) } if (naflag)warning(INTEGER64_OVERFLOW_WARNING); return ret_; } __attribute__((no_sanitize("signed-integer-overflow"))) SEXP diff_integer64(SEXP x_, SEXP lag_, SEXP n_, SEXP ret_){ long long i, n = *((long long *) REAL(n_)); long long * x = (long long *) REAL(x_); long long * lag = (long long *) REAL(lag_); long long * ret = (long long *) REAL(ret_); long long vlag = *lag; long long v; Rboolean naflag = FALSE; for(i=0; i0) ? FALSE : TRUE; for(i=0; iret[0]){ ret[0] = e1[i]; } } }else{ for(i=0; iret[0]) ret[0] = e1[i]; } } } return ret_; } SEXP range_integer64(SEXP e1_, SEXP na_rm_, SEXP ret_){ long long i, n = LENGTH(e1_); long long * e1 = (long long *) REAL(e1_); long long * ret = (long long *) REAL(ret_); ret[0] = MAX_INTEGER64; ret[1] = MIN_INTEGER64; if (asLogical(na_rm_)){ for(i=0; iret[1]) ret[1] = e1[i]; } } }else{ for(i=0; iret[1]) ret[1] = e1[i]; } } } return ret_; } SEXP lim_integer64(SEXP ret_){ long long * ret = (long long *) REAL(ret_); ret[0] = MIN_INTEGER64; ret[1] = MAX_INTEGER64; return ret_; } SEXP cummin_integer64(SEXP e1_, SEXP ret_){ long long i, n = LENGTH(ret_); long long * e1 = (long long *) REAL(e1_); long long * ret = (long long *) REAL(ret_); if (n>0){ i=0; ret[i] = e1[i]; if(e1[i]!=NA_INTEGER64) for(i=1; i0){ i=0; ret[i] = e1[i]; if(e1[i]!=NA_INTEGER64) for(i=1; iret[i-1] ? e1[i] : ret[i-1]; } } for(i++; i0) ret[0] = e1[0]; for(i=1; i0) ret[0] = e1[0]; for(i=1; i0){ ret[0] = from[0]; for(i=1; iU32x2Repr.low = (unsigned int) floor(unif_rand()*4294967296 /* =2^32 */); x->U32x2Repr.high = (unsigned int) floor(unif_rand()*4294967296); } SEXP runif_integer64(SEXP n_, SEXP min_, SEXP max_){ int i, n=asInteger(n_); long long min = *((long long * ) REAL(min_)); long long max = *((long long * ) REAL(max_)); unsigned long long d; // max - min can overflow if (min < 0 && max > 0){ d = ((unsigned long long)(-min)) + ((unsigned long long)max) + 1; }else{ d = (max - min) + 1; } SEXP ret_; PROTECT(ret_ = allocVector(REALSXP, n)); long long * ret = (long long *) REAL(ret_); PunnedU32x2AndLongLong rand_draw; GetRNGstate(); for (i=0; i #include //#include #include "integer64.h" #include "bsearch.h" void R_Busy (int which); SEXP r_ram_integer64_nacount( SEXP x_ ) { int i,n = LENGTH(x_); ValueT *x = (ValueT *) REAL(x_); SEXP ret_; PROTECT( ret_ = allocVector(INTSXP, 1) ); int ret = 0; if (n){ R_Busy(1); for(i=0;i1) nties += ities; nunique++; lasti=i; } } if (lasti<(n-1)) nties += n - lasti; R_Busy(0); } INTEGER(ret_)[0]=nunique; INTEGER(ret_)[1]=nties; UNPROTECT(1); return ret_; } SEXP r_ram_integer64_ordernut( SEXP table_ , SEXP order_ ) { SEXP ret_; int i,lasti,ities,nties=0,nunique=0,n = LENGTH(table_); ValueT *table; table = (ValueT *) REAL(table_); IndexT *index = INTEGER(order_); PROTECT( ret_ = allocVector(INTSXP, 2) ); if (n){ R_Busy(1); nunique=1; lasti = 0; for(i=1;i1) nties += ities; nunique++; lasti=i; } } if (lasti<(n-1)) nties += n - lasti; R_Busy(0); } INTEGER(ret_)[0]=nunique; INTEGER(ret_)[1]=nties; UNPROTECT(1); return ret_; } SEXP r_ram_integer64_sortfin_asc( SEXP x_ /* data vector */ , SEXP sorted_ /* sorted table vector */ , SEXP method_ , SEXP ret_ ) { int i,n = LENGTH(x_); int pos,nt = LENGTH(sorted_); int n1 = nt-1; int method = asInteger(method_); ValueT *data; data = (ValueT *) REAL(x_); ValueT *sorted; sorted = (ValueT *) REAL(sorted_); int *ret = LOGICAL(ret_); R_Busy(1); DEBUG_INIT switch (method){ case 1:{ for(i=0;in1){ for (;in1){ for (;in1){ for (;in1){ for (;i=lasti;j--) ret[index[j]-1] = avgrank; lasti = i; } } avgrank = (lasti + 1 + i)/2.0; for (j=i-1;j>=lasti;j--) ret[index[j]-1] = avgrank; R_Busy(0); } return ret_; } SEXP r_ram_integer64_sortorderrnk_asc( SEXP sorted_ /* somehow sorted table vector */ , SEXP order_ /* sorted table vector */ , SEXP nacount_ , SEXP ret_ ) { int i,j,n = LENGTH(sorted_); ValueT *sorted = (ValueT *) REAL(sorted_); IndexT *index = INTEGER(order_); double * ret = REAL(ret_); double avgrank; int nacount = asInteger(nacount_); int lasti; if (n){ R_Busy(1); for (i=0;i=lasti;j--) ret[index[j]-1] = avgrank; lasti = i; } } avgrank = (lasti + 1 + i)/2.0; for (j=i-1;j>=lasti;j--) ret[index[j]-1] = avgrank; R_Busy(0); } return ret_; } SEXP r_ram_integer64_orderdup_asc( SEXP table_ /* sorted table vector */ , SEXP order_ /* sorted table vector */ , SEXP method_ , SEXP ret_ ) { int i,pos,n = LENGTH(table_); ValueT *table = (ValueT *) REAL(table_); IndexT *index = INTEGER(order_); int method = asInteger(method_); int * ret = LOGICAL(ret_); ValueT lastval; if (n){ R_Busy(1); switch (method){ case 1:{ for (i=0;ij+1){ for (;jj+1){ for (;jj+1){ for (;jj+1){ for (;j=0;l--) if (sorted[l]!=sorted[r]){ for (i=l+1;i<=r;i++,j++) ret[j] = sorted[i]; r=l; } for (i=l+1;i<=r;i++,j++) ret[j] = sorted[i]; }else{ for (i=0,j=0;i=0;l--) if (sorted[l]!=sorted[r]){ for (i=l+1;i<=r;i++,j++) ret[j] = index[i]; r=l; } for (i=l+1;i<=r;i++,j++) ret[j] = index[i]; }else{ for (i=0,j=0;i=0;l--) if (data[index[l]]!=data[index[r]]){ for (i=l+1;i<=r;i++,j++) ret[j] = index[i]; r=l; } for (i=l+1;i<=r;i++,j++) ret[j] = index[i]; }else{ for (i=0,j=0;i 0) ? ((y) < (z)) : ! ((y) < (z))) # define GOODIDIFF64(x, y, z) (!(OPPOSITE_SIGNS(x, y) && OPPOSITE_SIGNS(x, z))) #else # define GOODISUM64(x, y, z) ((long double) (x) + (long double) (y) == (z)) # define GOODIDIFF64(x, y, z) ((long double) (x) - (long double) (y) == (z)) #endif #define GOODIPROD64(x, y, z) ((long double) (x) * (long double) (y) == (z)) #define INTEGER32_OVERFLOW_WARNING "NAs produced by integer overflow" #define INTEGER64_OVERFLOW_WARNING "NAs produced by integer64 overflow" #define INTEGER64_DIVISION_BY_ZERO_WARNING "NAs produced due to division by zero" #define INTEGER64_NAN_CREATED_WARNING "NaNs produced" #define INTEGER64_TODOUBLE_WARNING "integer precision lost while converting to double" #define BITSTRING_OVERFLOW_WARNING "bitstrings longer than 64 bytes converted to NA, multibyte-characters not allowed" #define PLUS64(e1,e2,ret,naflag) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_INTEGER64; \ else { \ ret = e1 + e2; \ if (!GOODISUM64(e1, e2, ret)) \ ret = NA_INTEGER64; \ if (ret == NA_INTEGER64) \ naflag = TRUE; \ } #define MINUS64(e1,e2,ret,naflag) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_INTEGER64; \ else { \ ret = e1 - e2; \ if (!GOODIDIFF64(e1, e2, ret)) \ ret = NA_INTEGER64; \ if (ret == NA_INTEGER64) \ naflag = TRUE; \ } #define PROD64(e1,e2,ret,naflag) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_INTEGER64; \ else { \ ret = e1 * e2; \ if (!GOODIPROD64(e1, e2, ret)) \ ret = NA_INTEGER64; \ if (ret == NA_INTEGER64) \ naflag = TRUE; \ } #define PROD64REAL(e1,e2,ret,naflag,longret) \ if (e1 == NA_INTEGER64 || ISNAN(e2)) \ ret = NA_INTEGER64; \ else { \ longret = e1 * (long double) e2; \ if (isnan(longret) || longret>MAX_INTEGER64){ \ naflag = TRUE; \ ret = NA_INTEGER64; \ }else \ ret = llroundl(longret); \ } #define POW64(e1,e2,ret,naflag, longret) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_INTEGER64; \ else { \ longret = pow(e1, (long double) e2); \ if (isnan(longret)){ \ naflag = TRUE; \ ret = NA_INTEGER64; \ }else \ ret = llroundl(longret); \ } #define POW64REAL(e1,e2,ret,naflag,longret) \ if (e1 == NA_INTEGER64 || ISNAN(e2)) \ ret = NA_INTEGER64; \ else { \ longret = pow(e1, (long double) e2); \ if (isnan(longret)){ \ naflag = TRUE; \ ret = NA_INTEGER64; \ }else \ ret = llroundl(longret); \ } #define DIVIDE64REAL(e1,e2,ret,naflag) \ if (e1 == NA_INTEGER64 || ISNAN(e2)) \ ret = NA_REAL; \ else { \ if (e2==0) \ ret = NA_REAL; \ else \ ret = (double)((long double) e1 / (long double) e2); \ if (ISNAN(ret)) \ naflag = TRUE; \ } /* Ofek Shilon */ #define DIVIDEREAL64(e1,e2,ret,naflag) \ if (e2 == NA_INTEGER64 || ISNAN(e1)) \ ret = NA_REAL; \ else { \ if (e2==0) \ ret = NA_REAL; \ else \ ret = (double)((long double) e1 / (long double) e2); \ if (ISNAN(ret)) \ naflag = TRUE; \ } \ #define DIVIDE64(e1,e2,ret,naflag) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_REAL; \ else { \ if (e2==0) \ ret = NA_REAL; \ else \ ret = (double)((long double) e1 / (long double) e2); \ if (ISNAN(ret)) \ naflag = TRUE; \ } #define INTDIV64(e1,e2,ret,naflag) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_INTEGER64; \ else { \ if (e2==0) \ ret = NA_INTEGER64; \ else \ ret = e1 / e2; \ if (ret == NA_INTEGER64) \ naflag = TRUE; \ } #define MOD64(e1,e2,ret,naflag) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_INTEGER64; \ else { \ if (e2==0) \ ret = NA_INTEGER64; \ else \ ret = e1 / e2; \ if (ret == NA_INTEGER64) \ naflag = TRUE; \ else \ ret = e1 - e2 * ret; \ } #define MIN64(e1,e2,ret) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_INTEGER64; \ else { \ ret = (e1 < e2) ? e1 : e2; \ } #define MAX64(e1,e2,ret) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_INTEGER64; \ else { \ ret = (e1 < e2) ? e2 : e1; \ } #define ABS64(e1,ret) \ if (e1 == NA_INTEGER64) \ ret = NA_INTEGER64; \ else { \ ret = (e1 < 0) ? -e1 : e1; \ } #define SQRT64(e1, ret, naflag) \ if (e1 == NA_INTEGER64) \ ret = NA_REAL; \ else { \ if (e1 < 0) \ naflag = TRUE; \ ret = (double) sqrt((long double)e1); \ } #define LOG64(e1, ret, naflag) \ if (e1 == NA_INTEGER64) \ ret = NA_REAL; \ else { \ ret = (double) logl((long double)e1); \ if (isnan(ret)) \ naflag = TRUE; \ } #define LOGVECT64(e1, e2, ret, naflag) \ if (e1 == NA_INTEGER64) \ ret = NA_REAL; \ else { \ ret = (double) logl((long double)e1)/log(e2); \ if (isnan(ret)) \ naflag = TRUE; \ } #define LOGBASE64(e1, e2, ret, naflag) \ if (e1 == NA_INTEGER64) \ ret = NA_REAL; \ else { \ ret = (double) logl((long double)e1)/e2; \ if (isnan(ret)) \ naflag = TRUE; \ } #define LOG1064(e1, ret, naflag) \ if (e1 == NA_INTEGER64) \ ret = NA_REAL; \ else { \ ret =(double) log10l((long double)e1); \ if (isnan(ret)) \ naflag = TRUE; \ } #define LOG264(e1, ret, naflag) \ if (e1 == NA_INTEGER64) \ ret = NA_REAL; \ else { \ ret = (double) log2l((long double)e1); \ if (isnan(ret)) \ naflag = TRUE; \ } #define SIGN64(e1,ret) \ if (e1 == NA_INTEGER64) \ ret = NA_INTEGER64; \ else { \ ret = (e1 < 0) ? -1 : ((e1 > 0) ? 1 : 0); \ } #define EQ64(e1,e2,ret) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_LOGICAL; \ else { \ ret = (e1 == e2) ? TRUE : FALSE; \ } #define NE64(e1,e2,ret) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_LOGICAL; \ else { \ ret = (e1 != e2) ? TRUE : FALSE; \ } #define LT64(e1,e2,ret) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_LOGICAL; \ else { \ ret = (e1 < e2) ? TRUE : FALSE; \ } #define LE64(e1,e2,ret) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_LOGICAL; \ else { \ ret = (e1 <= e2) ? TRUE : FALSE; \ } #define GT64(e1,e2,ret) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_LOGICAL; \ else { \ ret = (e1 > e2) ? TRUE : FALSE; \ } #define GE64(e1,e2,ret) \ if (e1 == NA_INTEGER64 || e2 == NA_INTEGER64) \ ret = NA_LOGICAL; \ else { \ ret = (e1 >= e2) ? TRUE : FALSE; \ } /*****************************************************************************/ /** **/ /** TYPEDEFS AND STRUCTURES **/ /** **/ /*****************************************************************************/ /*****************************************************************************/ /** **/ /** EXPORTED VARIABLES **/ /** **/ /*****************************************************************************/ #ifndef _INTEGER64_C_SRC #endif /*****************************************************************************/ /** **/ /** EXPORTED FUNCTIONS **/ /** **/ /*****************************************************************************/ #endif /*****************************************************************************/ /** **/ /** EOF **/ /** **/ /*****************************************************************************/ bit64/src/cache.c0000644000176200001440000000254114705122715013206 0ustar liggesusers#include #include /* SEXP r_ram_truly_identical( SEXP x_ , SEXP y_ ) { SEXP ret_; Rboolean ret; if(!isVectorAtomic(x_)){ error("SEXP is not atomic vector"); return R_NilValue; } if (TYPEOF(x_)!=TYPEOF(y_)){ error("vectors don't have identic type"); return R_NilValue; } //somehow is DATAPTR not declared: ret = DATAPTR(x_)==DATAPTR(y_) ? TRUE : FALSE; switch (TYPEOF(x_)) { case CHARSXP: ret = CHAR(x_)==CHAR(y_) ? TRUE : FALSE; break; case LGLSXP: ret = LOGICAL(x_)==LOGICAL(y_) ? TRUE : FALSE; case INTSXP: ret = INTEGER(x_)==INTEGER(y_) ? TRUE : FALSE; break; case REALSXP: ret = REAL(x_)==REAL(y_) ? TRUE : FALSE; break; case CPLXSXP: ret = COMPLEX(x_)==COMPLEX(y_) ? TRUE : FALSE; break; case STRSXP: ret = STRING_PTR(x_)==STRING_PTR(y_) ? TRUE : FALSE; break; case VECSXP: ret = VECTOR_PTR(x_)==VECTOR_PTR(y_) ? TRUE : FALSE; case RAWSXP: ret = RAW(x_)==RAW(y_) ? TRUE : FALSE; break; default: error("unimplemented type in truly.identical"); return R_NilValue; } if (LENGTH(x_)!=LENGTH(y_)){ ret = FALSE; } PROTECT( ret_ = allocVector(LGLSXP, 1) ); INTEGER(ret_)[0] = ret; UNPROTECT(1); return ret_; } */ bit64/src/bsearch.c0000644000176200001440000002212214674440514013554 0ustar liggesusers/* # C-Code for binary search # (c) 2011 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2011-12-11 # Last changed: 2011-12-11 */ #include "bsearch.h" IndexT integer64_bsearch_asc_EQ(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_BSEARCH_ASC_EQ(data, l, r, value, return ) } IndexT integer64_bsearch_asc_GE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_BSEARCH_ASC_GE(data, l, r, value, return ) } IndexT integer64_bsearch_asc_GT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_BSEARCH_ASC_GT(data, l, r, value, return ) } IndexT integer64_bsearch_asc_LE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_BSEARCH_ASC_LE(data, l, r, value, return ) } IndexT integer64_bsearch_asc_LT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_BSEARCH_ASC_LT(data, l, r, value, return ) } IndexT integer64_bsearch_desc_EQ(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_BSEARCH_DESC_EQ(data, l, r, value, return ) } IndexT integer64_bsearch_desc_GE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_BSEARCH_DESC_GE(data, l, r, value, return ) } IndexT integer64_bsearch_desc_GT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_BSEARCH_DESC_GT(data, l, r, value, return ) } IndexT integer64_bsearch_desc_LE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_BSEARCH_DESC_LE(data, l, r, value, return ) } IndexT integer64_bsearch_desc_LT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_BSEARCH_DESC_LT(data, l, r, value, return ) } IndexT integer64_lsearch_asc_EQ(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_LSEARCH_ASC_EQ(data, l, r, value, return ) } IndexT integer64_lsearch_asc_GE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_LSEARCH_ASC_GE(data, l, r, value, return ) } IndexT integer64_lsearch_asc_GT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_LSEARCH_ASC_GT(data, l, r, value, return ) } IndexT integer64_lsearch_asc_LE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_LSEARCH_ASC_LE(data, l, r, value, return ) } IndexT integer64_lsearch_asc_LT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_LSEARCH_ASC_LT(data, l, r, value, return ) } IndexT integer64_lsearch_desc_EQ(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_LSEARCH_DESC_EQ(data, l, r, value, return ) } IndexT integer64_lsearch_desc_GE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_LSEARCH_DESC_GE(data, l, r, value, return ) } IndexT integer64_lsearch_desc_GT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_LSEARCH_DESC_GT(data, l, r, value, return ) } IndexT integer64_lsearch_desc_LE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_LSEARCH_DESC_LE(data, l, r, value, return ) } IndexT integer64_lsearch_desc_LT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_LSEARCH_DESC_LT(data, l, r, value, return ) } IndexT integer64_rsearch_asc_EQ(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_RSEARCH_ASC_EQ(data, l, r, value, return ) } IndexT integer64_rsearch_asc_GE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_RSEARCH_ASC_GE(data, l, r, value, return ) } IndexT integer64_rsearch_asc_GT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_RSEARCH_ASC_GT(data, l, r, value, return ) } IndexT integer64_rsearch_asc_LE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_RSEARCH_ASC_LE(data, l, r, value, return ) } IndexT integer64_rsearch_asc_LT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_RSEARCH_ASC_LT(data, l, r, value, return ) } IndexT integer64_rsearch_desc_EQ(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_RSEARCH_DESC_EQ(data, l, r, value, return ) } IndexT integer64_rsearch_desc_GE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_RSEARCH_DESC_GE(data, l, r, value, return ) } IndexT integer64_rsearch_desc_GT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_RSEARCH_DESC_GT(data, l, r, value, return ) } IndexT integer64_rsearch_desc_LE(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_RSEARCH_DESC_LE(data, l, r, value, return ) } IndexT integer64_rsearch_desc_LT(ValueT *data, IndexT l, IndexT r, ValueT value){ INTEGER64_RSEARCH_DESC_LT(data, l, r, value, return ) } IndexT integer64_bosearch_asc_EQ(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_BOSEARCH_ASC_EQ(data, index, l, r, value, return) } IndexT integer64_bosearch_asc_GE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_BOSEARCH_ASC_GE(data, index, l, r, value, return) } IndexT integer64_bosearch_asc_GT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_BOSEARCH_ASC_GT(data, index, l, r, value, return) } IndexT integer64_bosearch_asc_LE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_BOSEARCH_ASC_LE(data, index, l, r, value, return) } IndexT integer64_bosearch_asc_LT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_BOSEARCH_ASC_LT(data, index, l, r, value, return) } IndexT integer64_bosearch_desc_EQ(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_BOSEARCH_DESC_EQ(data, index, l, r, value, return) } IndexT integer64_bosearch_desc_GE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_BOSEARCH_DESC_GE(data, index, l, r, value, return) } IndexT integer64_bosearch_desc_GT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_BOSEARCH_DESC_GT(data, index, l, r, value, return) } IndexT integer64_bosearch_desc_LE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_BOSEARCH_DESC_LE(data, index, l, r, value, return) } IndexT integer64_bosearch_desc_LT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_BOSEARCH_DESC_LT(data, index, l, r, value, return) } IndexT integer64_losearch_asc_EQ(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_LOSEARCH_ASC_EQ(data, index, l, r, value, return) } IndexT integer64_losearch_asc_GE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_LOSEARCH_ASC_GE(data, index, l, r, value, return) } IndexT integer64_losearch_asc_GT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_LOSEARCH_ASC_GT(data, index, l, r, value, return) } IndexT integer64_losearch_asc_LE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_LOSEARCH_ASC_LE(data, index, l, r, value, return) } IndexT integer64_losearch_asc_LT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_LOSEARCH_ASC_LT(data, index, l, r, value, return) } IndexT integer64_losearch_desc_EQ(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_LOSEARCH_DESC_EQ(data, index, l, r, value, return) } IndexT integer64_losearch_desc_GE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_LOSEARCH_DESC_GE(data, index, l, r, value, return) } IndexT integer64_losearch_desc_GT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_LOSEARCH_DESC_GT(data, index, l, r, value, return) } IndexT integer64_losearch_desc_LE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_LOSEARCH_DESC_LE(data, index, l, r, value, return) } IndexT integer64_losearch_desc_LT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_LOSEARCH_DESC_LT(data, index, l, r, value, return) } IndexT integer64_rosearch_asc_EQ(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_ROSEARCH_ASC_EQ(data, index, l, r, value, return) } IndexT integer64_rosearch_asc_GE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_ROSEARCH_ASC_GE(data, index, l, r, value, return) } IndexT integer64_rosearch_asc_GT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_ROSEARCH_ASC_GT(data, index, l, r, value, return) } IndexT integer64_rosearch_asc_LE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_ROSEARCH_ASC_LE(data, index, l, r, value, return) } IndexT integer64_rosearch_asc_LT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_ROSEARCH_ASC_LT(data, index, l, r, value, return) } IndexT integer64_rosearch_desc_EQ(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_ROSEARCH_DESC_EQ(data, index, l, r, value, return) } IndexT integer64_rosearch_desc_GE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_ROSEARCH_DESC_GE(data, index, l, r, value, return) } IndexT integer64_rosearch_desc_GT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_ROSEARCH_DESC_GT(data, index, l, r, value, return) } IndexT integer64_rosearch_desc_LE(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_ROSEARCH_DESC_LE(data, index, l, r, value, return) } IndexT integer64_rosearch_desc_LT(ValueT *data, IndexT *index, IndexT l, IndexT r, ValueT value){ INTEGER64_ROSEARCH_DESC_LT(data, index, l, r, value, return) } bit64/src/sort64.c0000644000176200001440000017345314705122715013317 0ustar liggesusers/* # C-Code for sorting and ordering # S3 atomic 64bit integers for R # (c) 2011 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2011-12-11 # Last changed: 2011-12-11 */ #define _SORT64_C_SRC /*****************************************************************************/ /** **/ /** MODULES USED **/ /** **/ /*****************************************************************************/ #include "sort64.h" /*****************************************************************************/ /** **/ /** DEFINITIONS AND MACROS **/ /** **/ /*****************************************************************************/ #define SHELLARRAYSIZE 16 /*****************************************************************************/ /** **/ /** TYPEDEFS AND STRUCTURES **/ /** **/ /*****************************************************************************/ /*****************************************************************************/ /** **/ /** PROTOTYPYPES OF LOCAL FUNCTIONS **/ /** **/ /*****************************************************************************/ // static // returns uniform random index in range 0..(n-1) static IndexT randIndex( IndexT n // number of positions to random select from ); // returns one of {a,b,c} such that it represents the median of data[{a,b,c}] static IndexT ram_integer64_median3( ValueT *data // pointer to data , IndexT a // pos in data , IndexT b // pos in data , IndexT c // pos in data ); // returns one of {a,b,c} such that it represents the median of data[index[{a,b,c}]] static IndexT ram_integer64_median3index( ValueT *data // pointer to data , IndexT *index // index positions into data , IndexT a // pos in index , IndexT b // pos in index , IndexT c // pos in index ); /*****************************************************************************/ /** **/ /** EXPORTED VARIABLES **/ /** **/ /*****************************************************************************/ // no static no extern IndexT compare_counter; IndexT move_counter; /*****************************************************************************/ /** **/ /** GLOBAL VARIABLES **/ /** **/ /*****************************************************************************/ // static static const ValueT shellincs[SHELLARRAYSIZE] = {1073790977, 268460033, 67121153, 16783361, 4197377, 1050113, 262913, 65921, 16577, 4193, 1073, 281, 77, 23, 8, 1}; /*****************************************************************************/ /** **/ /** EXPORTED FUNCTIONS **/ /** **/ /*****************************************************************************/ // no extern /* { === NA handling for integer64 ================================================ */ // post sorting NA handling int ram_integer64_fixsortNA( ValueT *data // RETURNED: pointer to data vector , IndexT n // length of data vector , int has_na // 0 for pure doubles, 1 if NA or NaN can be present , int na_last // 0 for placing NA NaN left, 1 for placing NA NaN right , int decreasing // 0 for ascending, 1 for descending (must match the same parameter in sorting) ) { if (has_na){ IndexT i,nNA = 0 ; if (decreasing){ for (i=n-1; i>=0; i--){ if (ISNA_INTEGER64(data[i])) nNA++; else break; } if (!na_last){ for (;i>=0; i--) data[i+nNA] = data[i]; for (i=nNA-1;i>=0; i--) data[i] = NA_INTEGER64; } }else{ for (i=0; i=0; i--){ if (ISNA_INTEGER64(data[i])) nNA++; else break; } if (!na_last){ if (!auxindex) auxindex = (IndexT *) R_alloc(nNA, sizeof(IndexT)); offset = n-nNA; for (i=nNA-1;i>=0;i--) auxindex[i] = index[offset + i]; for (i=offset-1;i>=0;i--){ index[i+nNA] = index[i]; data[i+nNA] = data[i]; } for (i=nNA-1;i>=0;i--){ index[i] = auxindex[i]; data[i] = NA_INTEGER64; } } }else{ for (i=0; i=0; i--){ if (ISNA_INTEGER64(data[index[i]])) nNA++; else break; } if (!na_last){ if (!auxindex) auxindex = (IndexT *) R_alloc(nNA, sizeof(IndexT)); offset = n-nNA; for (i=nNA-1;i>=0;i--) auxindex[i] = index[offset + i]; for (i=offset-1;i>=0;i--){ index[i+nNA] = index[i]; } for (i=nNA-1;i>=0;i--){ index[i] = auxindex[i]; } } }else{ for (i=0; il;i--){ COMPEXCH(data[i-1], data[i], t) } for (i=l+2;i<=r;i++){ IndexT j=i; ValueT v; MOVE(v, data[i]) while (LESS(v,data[j-1])){ MOVE(data[j], data[j-1]) j--; } MOVE(data[j], v) } } // ascending insertion sortordering void ram_integer64_insertionsortorder_asc( ValueT *data // RETURNED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ) { IndexT i, ti; ValueT t; for (i=r;i>l;i--){ COMPEXCHi(data[i-1], data[i], t, index[i-1], index[i], ti) } for (i=l+2;i<=r;i++){ IndexT j=i, vi; ValueT v; MOVE(vi, index[i]) MOVE(v, data[i]) while (LESS(v,data[j-1])){ MOVE(index[j], index[j-1]) MOVE(data[j], data[j-1]) j--; } MOVE(index[j], vi) MOVE(data[j], v) } } // ascending insertion sortordering void ram_integer64_insertionorder_asc( ValueT *data // UNCHANGED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ) { IndexT i, ti; for (i=r;i>l;i--){ KEYCOMPEXCH(index[i-1], index[i], ti) } for (i=l+2;i<=r;i++){ IndexT j=i, vi; ValueT v; MOVE(vi, index[i]) MOVE(v, data[vi]) while (LESS(v,data[index[j-1]])){ MOVE(index[j], index[j-1]) j--; } MOVE(index[j], vi) } } // descending insertion sorting void ram_integer64_insertionsort_desc( ValueT *data // RETURNED: pointer to data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ) { IndexT i; ValueT t; for (i=l;i=l;i--){ IndexT j=i; ValueT v; MOVE(v, data[i]) while (LESS(v,data[j+1])){ MOVE(data[j], data[j+1]) j++; } MOVE(data[j], v) } } // descending insertion sortordering void ram_integer64_insertionsortorder_desc( ValueT *data // RETURNED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ) { IndexT i, ti; ValueT t; for (i=l;i=l;i--){ IndexT j=i, vi; ValueT v; MOVE(vi, index[i]) MOVE(v, data[i]) while (LESS(v,data[j+1])){ MOVE(index[j], index[j+1]) MOVE(data[j], data[j+1]) j++; } MOVE(index[j], vi) MOVE(data[j], v) } } // descending insertion sortordering void ram_integer64_insertionorder_desc( ValueT *data // UNCHANGED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ) { IndexT i, ti; for (i=l;i=l;i--){ IndexT j=i, vi; ValueT v; MOVE(vi, index[i]) MOVE(v, data[vi]) while (LESS(v,data[index[j+1]])){ MOVE(index[j], index[j+1]) j++; } MOVE(index[j], vi) } } /* } === pure C stable insertion sort for integer64 ================================================ */ /* { === pure C stable shell sort for integer64 ================================================ */ void ram_integer64_shellsort_asc(ValueT *data, IndexT l, IndexT r) { ValueT v; IndexT i, j, h, lh, t, n=r-l+1; for (t = 0; shellincs[t] > n; t++); for (h = shellincs[t]; t < SHELLARRAYSIZE; h = shellincs[++t]){ lh = l+h; for (i = lh; i <= r; i++) { MOVE(v, data[i]) j = i; while (j >= lh && LESS(v, data[j - h])){ MOVE(data[j], data[j - h]) j -= h; } MOVE(data[j], v) } } } void ram_integer64_shellsort_desc(ValueT *data, IndexT l, IndexT r) { ValueT v; IndexT i, j, h, lh, t, n=r-l+1; for (t = 0; shellincs[t] > n; t++); for (h = shellincs[t]; t < SHELLARRAYSIZE; h = shellincs[++t]){ lh = l+h; for (i = lh; i <= r; i++) { MOVE(v, data[i]) j = i; while (j >= lh && LESS(data[j - h], v)){ MOVE(data[j], data[j - h]) j -= h; } MOVE(data[j], v) } } } void ram_integer64_shellsortorder_asc(ValueT *data, IndexT *index, IndexT l, IndexT r) { ValueT v; IndexT vi, i, j, h, lh, t, n=r-l+1; for (t = 0; shellincs[t] > n; t++); for (h = shellincs[t]; t < SHELLARRAYSIZE; h = shellincs[++t]){ lh = l+h; for (i = lh; i <= r; i++) { MOVE(vi, index[i]) MOVE(v, data[i]) j = i; while (j >= lh && LESS(v, data[j - h])){ MOVE(index[j], index[j - h]) MOVE(data[j], data[j - h]) j -= h; } MOVE(index[j], vi) MOVE(data[j], v) } } } void ram_integer64_shellsortorder_desc(ValueT *data, IndexT *index, IndexT l, IndexT r) { ValueT v; IndexT vi, i, j, h, lh, t, n=r-l+1; for (t = 0; shellincs[t] > n; t++); for (h = shellincs[t]; t < SHELLARRAYSIZE; h = shellincs[++t]){ lh = l+h; for (i = lh; i <= r; i++) { MOVE(vi, index[i]) MOVE(v, data[i]) j = i; while (j >= lh && LESS(data[j - h], v)){ MOVE(index[j], index[j - h]) MOVE(data[j], data[j - h]) j -= h; } MOVE(index[j], vi) MOVE(data[j], v) } } } void ram_integer64_shellorder_asc(ValueT *data, IndexT *index, IndexT l, IndexT r) { ValueT v; IndexT vi, i, j, h, lh, t, n=r-l+1; for (t = 0; shellincs[t] > n; t++); for (h = shellincs[t]; t < SHELLARRAYSIZE; h = shellincs[++t]){ lh = l+h; for (i = lh; i <= r; i++) { MOVE(vi, index[i]) MOVE(v, data[vi]) j = i; while (j >= lh && LESS(v, data[index[j - h]])){ MOVE(index[j], index[j - h]) j -= h; } MOVE(index[j], vi) } } } void ram_integer64_shellorder_desc(ValueT *data, IndexT *index, IndexT l, IndexT r) { ValueT v; IndexT vi, i, j, h, lh, t, n=r-l+1; for (t = 0; shellincs[t] > n; t++); for (h = shellincs[t]; t < SHELLARRAYSIZE; h = shellincs[++t]){ lh = l+h; for (i = lh; i <= r; i++) { MOVE(vi, index[i]) MOVE(v, data[vi]) j = i; while (j >= lh && LESS(data[index[j - h]], v)){ MOVE(index[j], index[j - h]) j -= h; } MOVE(index[j], vi) } } } /* } === pure C stable shellsort sort for integer64 ================================================ */ /* { === pure C stable merge sort for integer64 ================================================ */ /* Sedgewick 8.1 Merging stable merge c=a+b where na=len(a) and nb=len(b) */ // ascending merge for sorting void ram_integer64_sortmerge_asc( ValueT *c // pointer to merge target data vector , ValueT *a // pointer to merge source data vector a , ValueT *b // pointer to merge source data vector b , IndexT na // number of elements in merge source vector a , IndexT nb // number of elements in merge source vector b ) { IndexT i,j,k,K=na+nb; for (i=0,j=0,k=0;k=0;k--){ if (i<0){ for (;k>=0;k--) MOVE(c[k],b[j--]) break; } if (j<0){ for (;k>=0;k--) MOVE(c[k],a[i--]) break; } if (LESS(a[i],b[j])) MOVE(c[k],a[i--]) else MOVE(c[k],b[j--]) } } void ram_integer64_ordermerge_desc(ValueT *data, IndexT *c, IndexT *a, IndexT *b, IndexT na, IndexT nb) { IndexT i,j,k,K=na+nb-1; for (i=na-1,j=nb-1,k=K;k>=0;k--){ if (i<0){ for (;k>=0;k--) MOVE(c[k],b[j--]) break; } if (j<0){ for (;k>=0;k--) MOVE(c[k],a[i--]) break; } if (KEYLESS(a[i],b[j])) MOVE(c[k],a[i--]) else MOVE(c[k],b[j--]) } } void ram_integer64_sortordermerge_desc(ValueT *c, ValueT *a, ValueT *b, IndexT *ci, IndexT *ai, IndexT *bi, IndexT na, IndexT nb) { IndexT i,j,k,K=na+nb-1; for (i=na-1,j=nb-1,k=K;k>=0;k--){ if (i<0){ for (;k>=0;k--){ MOVE(ci[k],bi[j]) MOVE(c[k],b[j--]) } break; } if (j<0){ for (;k>=0;k--){ MOVE(ci[k],ai[i]) MOVE(c[k],a[i--]) } break; } if (LESS(a[i],b[j])){ MOVE(ci[k],ai[i]) MOVE(c[k],a[i--]) }else{ MOVE(ci[k],bi[j]) MOVE(c[k],b[j--]) } } } // merge sorting b ascending leaving result in a (following Sedgewick 8.4 Mergesort with no copying) void ram_integer64_mergesort_asc_rec( ValueT *a // pointer to target data vector , ValueT *b // pointer to source data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ) { IndexT m; if (r-l <= INSERTIONSORT_LIMIT_MERGE){ ram_integer64_insertionsort_asc(a, l, r); return; } m = (l+r)/2; ram_integer64_mergesort_asc_rec(b, a, l, m); ram_integer64_mergesort_asc_rec(b, a, m+1, r); ram_integer64_sortmerge_asc(a+l, b+l, b+m+1, m-l+1, r-m); } // merge ordering b ascending leaving result in a (following Sedgewick 8.4 Mergesort with no copying) void ram_integer64_mergeorder_asc_rec( ValueT *data // pointer to data vector , IndexT *a // pointer to target index vector , IndexT *b // pointer to source index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ) { IndexT m; if (r-l <= INSERTIONSORT_LIMIT_MERGE){ ram_integer64_insertionorder_asc(data, a, l, r); return; } m = (l+r)/2; ram_integer64_mergeorder_asc_rec(data, b, a, l, m); ram_integer64_mergeorder_asc_rec(data, b, a, m+1, r); ram_integer64_ordermerge_asc(data, a+l, b+l, b+m+1, m-l+1, r-m); } // merge sortordering b ascending leaving result in a (following Sedgewick 8.4 Mergesort with no copying) void ram_integer64_mergesortorder_asc_rec( ValueT *a // pointer to target data vector , ValueT *b // pointer to source data vector , IndexT *ai // pointer to target index vector , IndexT *bi // pointer to source index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ) { IndexT m; if (r-l <= INSERTIONSORT_LIMIT_MERGE){ ram_integer64_insertionsortorder_asc(a, ai, l, r); return; } m = (l+r)/2; ram_integer64_mergesortorder_asc_rec(b, a, bi, ai, l, m); ram_integer64_mergesortorder_asc_rec(b, a, bi, ai, m+1, r); ram_integer64_sortordermerge_asc(a+l, b+l, b+m+1, ai+l, bi+l, bi+m+1, m-l+1, r-m); } void ram_integer64_mergesort_desc_rec(ValueT *a, ValueT *b, IndexT l, IndexT r) { IndexT m; if (r-l <= INSERTIONSORT_LIMIT_MERGE){ ram_integer64_insertionsort_desc(a, l, r); return; } m = (l+r)/2; ram_integer64_mergesort_desc_rec(b, a, l, m); ram_integer64_mergesort_desc_rec(b, a, m+1, r); ram_integer64_sortmerge_desc(a+l, b+l, b+m+1, m-l+1, r-m); } void ram_integer64_mergeorder_desc_rec(ValueT *data, IndexT *a, IndexT *b, IndexT l, IndexT r) { IndexT m; if (r-l <= INSERTIONSORT_LIMIT_MERGE){ ram_integer64_insertionorder_desc(data, a, l, r); return; } m = (l+r)/2; ram_integer64_mergeorder_desc_rec(data, b, a, l, m); ram_integer64_mergeorder_desc_rec(data, b, a, m+1, r); ram_integer64_ordermerge_desc(data, a+l, b+l, b+m+1, m-l+1, r-m); } void ram_integer64_mergesortorder_desc_rec(ValueT *a, ValueT *b, IndexT *ai, IndexT *bi, IndexT l, IndexT r) { IndexT m; if (r-l <= INSERTIONSORT_LIMIT_MERGE){ ram_integer64_insertionsortorder_desc(a, ai, l, r); return; } m = (l+r)/2; ram_integer64_mergesortorder_desc_rec(b, a, bi, ai, l, m); ram_integer64_mergesortorder_desc_rec(b, a, bi, ai, m+1, r); ram_integer64_sortordermerge_desc(a+l, b+l, b+m+1, ai+l, bi+l, bi+m+1, m-l+1, r-m); } /* } === pure C stable merge sort for integer64 ================================================ */ // ascending partitioning of data between l and r around pivot in r IndexT ram_integer64_quicksortpart_asc_no_sentinels( ValueT *data // pointer to data , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ) { IndexT i = l-1, j = r; ValueT t,v; MOVE(v, data[r]) for (;;){ ++i; while(LESS(data[i], v)){if (j<=i)break; ++i;}; // explicit stop condition --j; while(LESS(v, data[j])){if (j<=i)break; --j;}; // explicit stop condition if (j<=i)break; EXCH(data[i], data[j], t) } EXCH(data[i], data[r], t) return i; } IndexT ram_integer64_quicksortpart_desc_no_sentinels(ValueT *data, IndexT l, IndexT r){ IndexT i = l-1, j = r; ValueT t,v; MOVE(v, data[r]) for (;;){ ++i; while(LESS(v, data[i])){if (j<=i)break; ++i;}; // explicit stop condition --j; while(LESS(data[j], v)){if (j<=i)break; --j;}; // explicit stop condition if (j<=i)break; EXCH(data[i], data[j], t) } EXCH(data[i], data[r], t) return i; } IndexT ram_integer64_quicksortorderpart_asc_no_sentinels(ValueT *data, IndexT *index, IndexT l, IndexT r){ IndexT ti, i = l-1, j = r; ValueT t,v; MOVE(v, data[r]) for (;;){ ++i; while(LESS(data[i], v)){if (j<=i)break; ++i;}; // explicit stop condition --j; while(LESS(v, data[j])){if (j<=i)break; --j;}; // explicit stop condition if (j<=i)break; EXCH(index[i], index[j], ti) EXCH(data[i], data[j], t) } EXCH(index[i], index[r], ti) EXCH(data[i], data[r], t) return i; } IndexT ram_integer64_quicksortorderpart_desc_no_sentinels(ValueT *data, IndexT *index, IndexT l, IndexT r){ IndexT ti,i = l-1, j = r; ValueT t,v; MOVE(v, data[r]) for (;;){ ++i; while(LESS(v, data[i])){if (j<=i)break; ++i;}; // explicit stop condition --j; while(LESS(data[j], v)){if (j<=i)break; --j;}; // explicit stop condition if (j<=i)break; EXCH(index[i], index[j], ti) EXCH(data[i], data[j], t) } EXCH(index[i], index[r], ti) EXCH(data[i], data[r], t) return i; } IndexT ram_integer64_quickorderpart_asc_no_sentinels(ValueT *data, IndexT *index, IndexT l, IndexT r){ IndexT ti,i = l-1, j = r; ValueT v; MOVE(v, data[index[r]]) for (;;){ ++i; while(LESS(data[index[i]], v)){if (j<=i)break; ++i;}; // explicit stop condition --j; while(LESS(v, data[index[j]])){if (j<=i)break; --j;}; // explicit stop condition if (j<=i)break; EXCH(index[i], index[j], ti) } EXCH(index[i], index[r], ti) return i; } IndexT ram_integer64_quickorderpart_desc_no_sentinels(ValueT *data, IndexT *index, IndexT l, IndexT r){ IndexT ti, i = l-1, j = r; ValueT v; MOVE(v, data[index[r]]) for (;;){ ++i; while(LESS(v, data[index[i]])){if (j<=i)break; ++i;}; // explicit stop condition --j; while(LESS(data[index[j]], v)){if (j<=i)break; --j;}; // explicit stop condition if (j<=i)break; EXCH(index[i], index[j], ti) } EXCH(index[i], index[r], ti) return i; } void ram_integer64_quicksort_asc_mdr3_no_sentinels( ValueT *data , IndexT l, IndexT r ){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ ValueT t; IndexT m=(l+r)/2; m = ram_integer64_median3(data, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(data[m], data[r], t) m = ram_integer64_quicksortpart_asc_no_sentinels(data, l, r); ram_integer64_quicksort_asc_mdr3_no_sentinels(data, l, m-1); ram_integer64_quicksort_asc_mdr3_no_sentinels(data, m+1, r); } else ram_integer64_insertionsort_asc(data, l, r); } void ram_integer64_quicksortorder_asc_mdr3_no_sentinels(ValueT *data, IndexT *index, IndexT l, IndexT r){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ ValueT t; IndexT ti, m=(l+r)/2; m = ram_integer64_median3(data, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(index[m], index[r], ti) EXCH(data[m], data[r], t) m = ram_integer64_quicksortorderpart_asc_no_sentinels(data, index, l, r); ram_integer64_quicksortorder_asc_mdr3_no_sentinels(data, index, l, m-1); ram_integer64_quicksortorder_asc_mdr3_no_sentinels(data, index, m+1, r); } else ram_integer64_insertionsortorder_asc(data, index, l, r); } void ram_integer64_quickorder_asc_mdr3_no_sentinels(ValueT *data, IndexT *index, IndexT l, IndexT r){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ ValueT t; IndexT ti, m=(l+r)/2; m = ram_integer64_median3(data, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(index[m], index[r], ti) EXCH(data[m], data[r], t) m = ram_integer64_quickorderpart_asc_no_sentinels(data, index, l, r); ram_integer64_quickorder_asc_mdr3_no_sentinels(data, index, l, m-1); ram_integer64_quickorder_asc_mdr3_no_sentinels(data, index, m+1, r); } else ram_integer64_insertionorder_asc(data, index, l, r); } void ram_integer64_quicksort_desc_mdr3_no_sentinels(ValueT *data, IndexT l, IndexT r){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ ValueT t; IndexT m=(l+r)/2; m = ram_integer64_median3(data, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(data[m], data[r], t) m = ram_integer64_quicksortpart_desc_no_sentinels(data, l, r); ram_integer64_quicksort_desc_mdr3_no_sentinels(data, l, m-1); ram_integer64_quicksort_desc_mdr3_no_sentinels(data, m+1, r); } else ram_integer64_insertionsort_desc(data, l, r); } void ram_integer64_quicksortorder_desc_mdr3_no_sentinels(ValueT *data, IndexT *index, IndexT l, IndexT r){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ ValueT t; IndexT ti, m=(l+r)/2; m = ram_integer64_median3(data, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(index[m], index[r], ti) EXCH(data[m], data[r], t) m = ram_integer64_quicksortorderpart_desc_no_sentinels(data, index, l, r); ram_integer64_quicksortorder_desc_mdr3_no_sentinels(data, index, l, m-1); ram_integer64_quicksortorder_desc_mdr3_no_sentinels(data, index, m+1, r); } else ram_integer64_insertionsortorder_desc(data, index, l, r); } void ram_integer64_quickorder_desc_mdr3_no_sentinels(ValueT *data, IndexT *index, IndexT l, IndexT r){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ ValueT t; IndexT ti, m=(l+r)/2; m = ram_integer64_median3(data, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(index[m], index[r], ti) EXCH(data[m], data[r], t) m = ram_integer64_quickorderpart_desc_no_sentinels(data, index, l, r); ram_integer64_quickorder_desc_mdr3_no_sentinels(data, index, l, m-1); ram_integer64_quickorder_desc_mdr3_no_sentinels(data, index, m+1, r); } else ram_integer64_insertionorder_desc(data, index, l, r); } void ram_integer64_quicksort_asc_intro(ValueT *data, IndexT l, IndexT r, int restlevel) { IndexT m; if (restlevel>0){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ ValueT t; m=(l+r)/2; m = ram_integer64_median3(data, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(data[m], data[r], t) m = ram_integer64_quicksortpart_asc_no_sentinels(data, l, r); restlevel--; ram_integer64_quicksort_asc_intro(data, l, m-1, restlevel); ram_integer64_quicksort_asc_intro(data, m+1, r, restlevel); } else ram_integer64_insertionsort_asc(data, l, r); }else{ ram_integer64_shellsort_asc(data, l, r); } } void ram_integer64_quicksortorder_asc_intro(ValueT *data, IndexT *index, IndexT l, IndexT r, int restlevel) { IndexT m; if (restlevel>0){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ IndexT ti; ValueT t; m=(l+r)/2; m = ram_integer64_median3(data, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(index[m], index[r], ti) EXCH(data[m], data[r], t) m = ram_integer64_quicksortorderpart_asc_no_sentinels(data, index, l, r); restlevel--; ram_integer64_quicksortorder_asc_intro(data, index, l, m-1, restlevel); ram_integer64_quicksortorder_asc_intro(data, index, m+1, r, restlevel); } else ram_integer64_insertionsortorder_asc(data, index, l, r); }else{ ram_integer64_shellsortorder_asc(data, index, l, r); } } void ram_integer64_quickorder_asc_intro(ValueT *data, IndexT *index, IndexT l, IndexT r, int restlevel) { IndexT m; if (restlevel>0){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ IndexT ti; m=(l+r)/2; m = ram_integer64_median3index(data, index, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(index[m], index[r], ti) m = ram_integer64_quickorderpart_asc_no_sentinels(data, index, l, r); restlevel--; ram_integer64_quickorder_asc_intro(data, index, l, m-1, restlevel); ram_integer64_quickorder_asc_intro(data, index, m+1, r, restlevel); } else ram_integer64_insertionorder_asc(data, index, l, r); }else{ ram_integer64_shellorder_asc(data, index, l, r); } } void ram_integer64_quicksort_desc_intro(ValueT *data, IndexT l, IndexT r, int restlevel) { IndexT m; if (restlevel>0){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ ValueT t; m=(l+r)/2; m = ram_integer64_median3(data, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(data[m], data[r], t) m = ram_integer64_quicksortpart_desc_no_sentinels(data, l, r); restlevel--; ram_integer64_quicksort_desc_intro(data, l, m-1, restlevel); ram_integer64_quicksort_desc_intro(data, m+1, r, restlevel); } else ram_integer64_insertionsort_desc(data, l, r); }else{ ram_integer64_shellsort_desc(data, l, r); } } void ram_integer64_quicksortorder_desc_intro(ValueT *data, IndexT *index, IndexT l, IndexT r, int restlevel) { IndexT m; if (restlevel>0){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ IndexT ti; ValueT t; m=(l+r)/2; m = ram_integer64_median3(data, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(index[m], index[r], ti) EXCH(data[m], data[r], t) m = ram_integer64_quicksortorderpart_desc_no_sentinels(data, index, l, r); restlevel--; ram_integer64_quicksortorder_desc_intro(data, index, l, m-1, restlevel); ram_integer64_quicksortorder_desc_intro(data, index, m+1, r, restlevel); } else ram_integer64_insertionsortorder_desc(data, index, l, r); }else{ ram_integer64_shellsortorder_desc(data, index, l, r); } } void ram_integer64_quickorder_desc_intro(ValueT *data, IndexT *index, IndexT l, IndexT r, int restlevel) { IndexT m; if (restlevel>0){ if (INSERTIONSORT_LIMIT_QUICK < r-l){ IndexT ti; m=(l+r)/2; m = ram_integer64_median3(data, l+randIndex((r-l)/2), m, r-randIndex((r-l)/2)); EXCH(index[m], index[r], ti) m = ram_integer64_quickorderpart_desc_no_sentinels(data, index, l, r); restlevel--; ram_integer64_quickorder_desc_intro(data, index, l, m-1, restlevel); ram_integer64_quickorder_desc_intro(data, index, m+1, r, restlevel); } else ram_integer64_insertionorder_desc(data, index, l, r); }else{ ram_integer64_shellorder_desc(data, index, l, r); } } // LSB radix sorting void ram_integer64_radixsort( UValueT * data // RETURNED: pointer to data vector coerced to unsigned , UValueT * auxdata // MODIFIED: pointer to auxilliary data vector coerced to unsigned , IndexT * stats // MODIFIED: pointer to counting vector with nradixes*(pow(2, radixbits)+1) elements , IndexT ** pstats // MODIFIED: pointer to vector of pointers with nradixes elements , IndexT n // number of elements in data and auxdata , int nradixes // number of radixes where nradixes*radixbits==total number of bits , int radixbits // number of bits in radix where nradixes*radixbits==total number of bits , Rboolean decreasing // one of {0=ascending, 1=descending} ) { IndexT w,b,b2,i; int nbuckets = pow(2, radixbits); int nbuckets1 = nbuckets - 1; UValueT bitmask, signmask, tmppatt; int nradixes1 = nradixes-1; int wradixbits; // Rprintf("nradixes=%d radixbits=%d nbuckets=%d\n", nradixes, radixbits, nbuckets); R_FlushConsole(); // initialize bitmasks bitmask = 1; for (b=1;b> 1); // initialize pstats pointer for (w=0;w>= radixbits) & bitmask]++; pstats[nradixes1][ (((tmppatt >> radixbits) & bitmask) ^ signmask) ]++; } // cumulate stats and set skip-radix-flag if (decreasing){ for (w=0;w=0; i--){ b2 = stats[i]; if (b2==n) stats[nbuckets] = 0; // radix-noskip-flag stats[i] = b; b += b2; } } }else{ for (w=0;w>wradixbits & bitmask ]++], auxdata[i]) } }else{ for (i=0; i> wradixbits) & bitmask) ^ signmask) ]++], auxdata[i]) } } }else{ if (w==0){ for (i=0; i>wradixbits & bitmask ]++], data[i]) } }else{ for (i=0; i> wradixbits) & bitmask) ^ signmask) ]++], data[i]) } } } b++; } } // copy back in case of odd number of copies if (b%2){ for (i=0; i> 1); // initialize pstats pointer for (w=0;w>= radixbits) & bitmask]++; pstats[nradixes1][ (((tmppatt >> radixbits) & bitmask) ^ signmask) ]++; } // cumulate stats and set skip-radix-flag if (decreasing){ for (w=0;w=0; i--){ b2 = stats[i]; if (b2==n) stats[nbuckets] = 0; // radix-noskip-flag stats[i] = b; b += b2; } } }else{ for (w=0;w>wradixbits & bitmask ]++; MOVE(index[b2], auxindex[i]) MOVE(data[b2], auxdata[i]) } }else{ for (i=0; i> wradixbits) & bitmask) ^ signmask) ]++; MOVE(index[b2], auxindex[i]) MOVE(data[b2], auxdata[i]) } } }else{ if (w==0){ for (i=0; i>wradixbits & bitmask ]++; MOVE(auxindex[b2], index[i]) MOVE(auxdata[b2], data[i]) } }else{ for (i=0; i> wradixbits) & bitmask) ^ signmask) ]++; MOVE(auxindex[b2], index[i]) MOVE(auxdata[b2], data[i]) } } } b++; } } // copy back in case of odd number of copies if (b%2){ for (i=0; i> 1); // initialize pstats pointer for (w=0;w>= radixbits) & bitmask]++; pstats[nradixes1][ (((tmppatt >> radixbits) & bitmask) ^ signmask) ]++; } // cumulate stats and set skip-radix-flag if (decreasing){ for (w=0;w=0; i--){ b2 = stats[i]; if (b2==n) stats[nbuckets] = 0; // radix-noskip-flag stats[i] = b; b += b2; } } }else{ for (w=0;w>wradixbits & bitmask ]++; MOVE(index[b2], auxindex[i]) } }else{ for (i=0; i> wradixbits) & bitmask) ^ signmask) ]++; MOVE(index[b2], auxindex[i]) } } }else{ if (w==0){ for (i=0; i>wradixbits & bitmask ]++; MOVE(auxindex[b2], index[i]) } }else{ for (i=0; i> wradixbits) & bitmask) ^ signmask) ]++; MOVE(auxindex[b2], index[i]) } } } b++; } } // copy back in case of odd number of copies if (b%2){ for (i=0; i= n){} ; PutRNGstate(); return r; } // returns one of {a,b,c} such that it represents the median of data[{a,b,c}] static IndexT ram_integer64_median3( ValueT *data // pointer to data , IndexT a // pos in data , IndexT b // pos in data , IndexT c // pos in data ) { return LESS(data[a], data[b]) ? (LESS(data[b], data[c]) ? b : LESS(data[a], data[c]) ? c : a) : (LESS(data[c], data[b]) ? b : LESS(data[c], data[a]) ? c : a); } // returns one of {a,b,c} such that it represents the median of data[index[{a,b,c}]] static IndexT ram_integer64_median3index( ValueT *data // pointer to data , IndexT *index // index positions into data , IndexT a // pos in index , IndexT b // pos in index , IndexT c // pos in index ) { return KEYLESS(index[a], index[b]) ? (KEYLESS(index[b], index[c]) ? b : KEYLESS(index[a], index[c]) ? c : a) : (KEYLESS(index[c], index[b]) ? b : KEYLESS(index[c], index[a]) ? c : a); } /*****************************************************************************/ /** **/ /** R/C INTERFACE **/ /** **/ /*****************************************************************************/ SEXP r_ram_integer64_shellsort( SEXP x_ /* data vector */ , SEXP has_na_ /* logical scalar */ , SEXP na_last_ /* logical scalar */ , SEXP decreasing_ /* logical scalar */ ) { SEXP ret_; PROTECT( ret_ = allocVector(INTSXP, 1) ); int ret; int n = LENGTH(x_); Rboolean has_na = asLogical(has_na_); Rboolean na_last = asLogical(na_last_); Rboolean decreasing = asLogical(decreasing_); R_Busy(1); DEBUG_INIT ValueT *data; data = (ValueT *) REAL(x_); if (decreasing) ram_integer64_shellsort_desc(data, 0, n-1); else ram_integer64_shellsort_asc(data, 0, n-1); ret = ram_integer64_fixsortNA(data, n , has_na // 0 for pure doubles, 1 if NA or NaN can be present , na_last // 0 for NA NaN left, 1 for NA NaN right , decreasing // 0 for ascending, 1 for descending ); INTEGER(ret_)[0] = DEBUG_RETURN; R_Busy(0); UNPROTECT(1); return ret_; } SEXP r_ram_integer64_shellsortorder( SEXP x_ /* data vector */ , SEXP index_ /* index vector */ , SEXP has_na_ /* logical scalar */ , SEXP na_last_ /* logical scalar */ , SEXP decreasing_ /* logical scalar */ ) { SEXP ret_; PROTECT( ret_ = allocVector(INTSXP, 1) ); int ret; int n = LENGTH(x_); Rboolean has_na = asLogical(has_na_); Rboolean na_last = asLogical(na_last_); Rboolean decreasing = asLogical(decreasing_); R_Busy(1); DEBUG_INIT ValueT *data; data = (ValueT *) REAL(x_); IndexT *index = INTEGER(index_); if (decreasing) ram_integer64_shellsortorder_desc(data, index, 0, n-1); else ram_integer64_shellsortorder_asc(data, index, 0, n-1); ret = ram_integer64_fixsortorderNA(data, index, n , has_na // 0 for pure doubles, 1 if NA or NaN can be present , na_last // 0 for NA NaN left, 1 for NA NaN right , decreasing // 0 for ascending, 1 for descending , 0 // no auxindex ); INTEGER(ret_)[0] = DEBUG_RETURN; R_Busy(0); UNPROTECT(1); return ret_; } SEXP r_ram_integer64_shellorder( SEXP x_ /* data vector */ , SEXP index_ /* index vector */ , SEXP has_na_ /* logical scalar */ , SEXP na_last_ /* logical scalar */ , SEXP decreasing_ /* logical scalar */ ) { SEXP ret_; PROTECT( ret_ = allocVector(INTSXP, 1) ); int ret; int i,n = LENGTH(x_); Rboolean has_na = asLogical(has_na_); Rboolean na_last = asLogical(na_last_); Rboolean decreasing = asLogical(decreasing_); R_Busy(1); DEBUG_INIT ValueT *data; data = (ValueT *) REAL(x_); IndexT *index = INTEGER(index_); for (i=0;i #include #include // for NULL #include /* .Call calls */ extern SEXP as_list_integer64(SEXP); extern SEXP abs_integer64(SEXP, SEXP); extern SEXP all_integer64(SEXP, SEXP, SEXP); extern SEXP any_integer64(SEXP, SEXP, SEXP); extern SEXP as_bitstring_integer64(SEXP, SEXP); extern SEXP as_character_integer64(SEXP, SEXP); extern SEXP as_double_integer64(SEXP, SEXP); extern SEXP as_integer64_bitstring(SEXP, SEXP); extern SEXP as_integer64_character(SEXP, SEXP); extern SEXP as_integer64_double(SEXP, SEXP); extern SEXP as_integer64_integer(SEXP, SEXP); extern SEXP as_integer_integer64(SEXP, SEXP); extern SEXP as_logical_integer64(SEXP, SEXP); extern SEXP cummax_integer64(SEXP, SEXP); extern SEXP cummin_integer64(SEXP, SEXP); extern SEXP cumprod_integer64(SEXP, SEXP); extern SEXP cumsum_integer64(SEXP, SEXP); extern SEXP diff_integer64(SEXP, SEXP, SEXP, SEXP); extern SEXP divide_integer64_double(SEXP, SEXP, SEXP); extern SEXP divide_double_integer64(SEXP, SEXP, SEXP); /* Ofek Shilon */ extern SEXP divide_integer64_integer64(SEXP, SEXP, SEXP); extern SEXP EQ_integer64(SEXP, SEXP, SEXP); extern SEXP GE_integer64(SEXP, SEXP, SEXP); extern SEXP GT_integer64(SEXP, SEXP, SEXP); extern SEXP hashdup_integer64(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP hashfin_integer64(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP hashfun_integer64(SEXP, SEXP, SEXP); extern SEXP hashmap_integer64(SEXP, SEXP, SEXP, SEXP); extern SEXP hashmaptab_integer64(SEXP, SEXP, SEXP, SEXP); extern SEXP hashmapuni_integer64(SEXP, SEXP, SEXP, SEXP); extern SEXP hashmapupo_integer64(SEXP, SEXP, SEXP, SEXP); extern SEXP hashpos_integer64(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP hashrev_integer64(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP hashrin_integer64(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP hashtab_integer64(SEXP, SEXP, SEXP, SEXP); extern SEXP hashuni_integer64(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP hashupo_integer64(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP intdiv_integer64(SEXP, SEXP, SEXP); extern SEXP isna_integer64(SEXP, SEXP); extern SEXP LE_integer64(SEXP, SEXP, SEXP); extern SEXP lim_integer64(SEXP); extern SEXP log10_integer64(SEXP, SEXP); extern SEXP log2_integer64(SEXP, SEXP); extern SEXP logbase_integer64(SEXP, SEXP, SEXP); extern SEXP log_integer64(SEXP, SEXP); extern SEXP logvect_integer64(SEXP, SEXP, SEXP); extern SEXP LT_integer64(SEXP, SEXP, SEXP); extern SEXP max_integer64(SEXP, SEXP, SEXP); extern SEXP mean_integer64(SEXP, SEXP, SEXP); extern SEXP min_integer64(SEXP, SEXP, SEXP); extern SEXP minus_integer64(SEXP, SEXP, SEXP); extern SEXP mod_integer64(SEXP, SEXP, SEXP); extern SEXP NE_integer64(SEXP, SEXP, SEXP); extern SEXP plus_integer64(SEXP, SEXP, SEXP); extern SEXP power_integer64_double(SEXP, SEXP, SEXP); extern SEXP power_integer64_integer64(SEXP, SEXP, SEXP); extern SEXP prod_integer64(SEXP, SEXP, SEXP); extern SEXP range_integer64(SEXP, SEXP, SEXP); extern SEXP runif_integer64(SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_issorted_asc(SEXP); extern SEXP r_ram_integer64_mergeorder(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_mergesort(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_mergesortorder(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_nacount(SEXP); extern SEXP r_ram_integer64_orderdup_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_orderfin_asc(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_orderkey_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_ordernut(SEXP, SEXP); extern SEXP r_ram_integer64_orderord(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_orderpos_asc(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_orderrnk_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_ordertab_asc(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_ordertie_asc(SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_orderuni_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_orderupo_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_quickorder(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_quicksort(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_quicksortorder(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_radixorder(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_radixsort(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_radixsortorder(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_shellorder(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_shellsort(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_shellsortorder(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sortfin_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sortnut(SEXP); extern SEXP r_ram_integer64_sortorderdup_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sortorderkey_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sortorderord(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sortorderpos_asc(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sortorderrnk_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sortordertab_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sortordertie_asc(SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sortorderuni_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sortorderupo_asc(SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sortsrt(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_sorttab_asc(SEXP, SEXP); extern SEXP r_ram_integer64_sortuni_asc(SEXP, SEXP); /* extern SEXP r_ram_truly_identical(SEXP, SEXP); */ extern SEXP seq_integer64(SEXP, SEXP, SEXP); extern SEXP sign_integer64(SEXP, SEXP); extern SEXP sqrt_integer64(SEXP, SEXP); extern SEXP sum_integer64(SEXP, SEXP, SEXP); extern SEXP times_integer64_double(SEXP, SEXP, SEXP); extern SEXP times_integer64_integer64(SEXP, SEXP, SEXP); /* extern SEXP r_ram_integer64_radisort(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_onionsort(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_onionsortorder(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP r_ram_integer64_onionorder(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); */ static const R_CallMethodDef CallEntries[] = { {"as_list_integer64", (DL_FUNC) &as_list_integer64, 1}, {"abs_integer64", (DL_FUNC) &abs_integer64, 2}, {"all_integer64", (DL_FUNC) &all_integer64, 3}, {"any_integer64", (DL_FUNC) &any_integer64, 3}, {"as_bitstring_integer64", (DL_FUNC) &as_bitstring_integer64, 2}, {"as_character_integer64", (DL_FUNC) &as_character_integer64, 2}, {"as_double_integer64", (DL_FUNC) &as_double_integer64, 2}, {"as_integer64_bitstring", (DL_FUNC) &as_integer64_bitstring, 2}, {"as_integer64_character", (DL_FUNC) &as_integer64_character, 2}, {"as_integer64_double", (DL_FUNC) &as_integer64_double, 2}, {"as_integer64_integer", (DL_FUNC) &as_integer64_integer, 2}, {"as_integer_integer64", (DL_FUNC) &as_integer_integer64, 2}, {"as_logical_integer64", (DL_FUNC) &as_logical_integer64, 2}, {"cummax_integer64", (DL_FUNC) &cummax_integer64, 2}, {"cummin_integer64", (DL_FUNC) &cummin_integer64, 2}, {"cumprod_integer64", (DL_FUNC) &cumprod_integer64, 2}, {"cumsum_integer64", (DL_FUNC) &cumsum_integer64, 2}, {"diff_integer64", (DL_FUNC) &diff_integer64, 4}, {"divide_integer64_double", (DL_FUNC) ÷_integer64_double, 3}, {"divide_double_integer64", (DL_FUNC) ÷_double_integer64, 3}, /* Ofek Shilon */ {"divide_integer64_integer64", (DL_FUNC) ÷_integer64_integer64, 3}, {"EQ_integer64", (DL_FUNC) &EQ_integer64, 3}, {"GE_integer64", (DL_FUNC) &GE_integer64, 3}, {"GT_integer64", (DL_FUNC) >_integer64, 3}, {"hashdup_integer64", (DL_FUNC) &hashdup_integer64, 5}, {"hashfin_integer64", (DL_FUNC) &hashfin_integer64, 5}, {"hashfun_integer64", (DL_FUNC) &hashfun_integer64, 3}, {"hashmap_integer64", (DL_FUNC) &hashmap_integer64, 4}, {"hashmaptab_integer64", (DL_FUNC) &hashmaptab_integer64, 4}, {"hashmapuni_integer64", (DL_FUNC) &hashmapuni_integer64, 4}, {"hashmapupo_integer64", (DL_FUNC) &hashmapupo_integer64, 4}, {"hashpos_integer64", (DL_FUNC) &hashpos_integer64, 6}, {"hashrev_integer64", (DL_FUNC) &hashrev_integer64, 7}, {"hashrin_integer64", (DL_FUNC) &hashrin_integer64, 6}, {"hashtab_integer64", (DL_FUNC) &hashtab_integer64, 4}, {"hashuni_integer64", (DL_FUNC) &hashuni_integer64, 5}, {"hashupo_integer64", (DL_FUNC) &hashupo_integer64, 5}, {"intdiv_integer64", (DL_FUNC) &intdiv_integer64, 3}, {"isna_integer64", (DL_FUNC) &isna_integer64, 2}, {"LE_integer64", (DL_FUNC) &LE_integer64, 3}, {"lim_integer64", (DL_FUNC) &lim_integer64, 1}, {"log10_integer64", (DL_FUNC) &log10_integer64, 2}, {"log2_integer64", (DL_FUNC) &log2_integer64, 2}, {"logbase_integer64", (DL_FUNC) &logbase_integer64, 3}, {"log_integer64", (DL_FUNC) &log_integer64, 2}, {"logvect_integer64", (DL_FUNC) &logvect_integer64, 3}, {"LT_integer64", (DL_FUNC) <_integer64, 3}, {"max_integer64", (DL_FUNC) &max_integer64, 3}, {"mean_integer64", (DL_FUNC) &mean_integer64, 3}, {"min_integer64", (DL_FUNC) &min_integer64, 3}, {"minus_integer64", (DL_FUNC) &minus_integer64, 3}, {"mod_integer64", (DL_FUNC) &mod_integer64, 3}, {"NE_integer64", (DL_FUNC) &NE_integer64, 3}, {"plus_integer64", (DL_FUNC) &plus_integer64, 3}, {"power_integer64_double", (DL_FUNC) &power_integer64_double, 3}, {"power_integer64_integer64", (DL_FUNC) &power_integer64_integer64, 3}, {"prod_integer64", (DL_FUNC) &prod_integer64, 3}, {"range_integer64", (DL_FUNC) &range_integer64, 3}, {"runif_integer64", (DL_FUNC) &runif_integer64, 3}, {"r_ram_integer64_issorted_asc", (DL_FUNC) &r_ram_integer64_issorted_asc, 1}, {"r_ram_integer64_mergeorder", (DL_FUNC) &r_ram_integer64_mergeorder, 5}, {"r_ram_integer64_mergesort", (DL_FUNC) &r_ram_integer64_mergesort, 4}, {"r_ram_integer64_mergesortorder", (DL_FUNC) &r_ram_integer64_mergesortorder, 5}, {"r_ram_integer64_nacount", (DL_FUNC) &r_ram_integer64_nacount, 1}, {"r_ram_integer64_orderdup_asc", (DL_FUNC) &r_ram_integer64_orderdup_asc, 4}, {"r_ram_integer64_orderfin_asc", (DL_FUNC) &r_ram_integer64_orderfin_asc, 5}, {"r_ram_integer64_orderkey_asc", (DL_FUNC) &r_ram_integer64_orderkey_asc, 4}, {"r_ram_integer64_ordernut", (DL_FUNC) &r_ram_integer64_ordernut, 2}, {"r_ram_integer64_orderord", (DL_FUNC) &r_ram_integer64_orderord, 6}, {"r_ram_integer64_orderpos_asc", (DL_FUNC) &r_ram_integer64_orderpos_asc, 6}, {"r_ram_integer64_orderrnk_asc", (DL_FUNC) &r_ram_integer64_orderrnk_asc, 4}, {"r_ram_integer64_ordertab_asc", (DL_FUNC) &r_ram_integer64_ordertab_asc, 5}, {"r_ram_integer64_ordertie_asc", (DL_FUNC) &r_ram_integer64_ordertie_asc, 3}, {"r_ram_integer64_orderuni_asc", (DL_FUNC) &r_ram_integer64_orderuni_asc, 4}, {"r_ram_integer64_orderupo_asc", (DL_FUNC) &r_ram_integer64_orderupo_asc, 4}, {"r_ram_integer64_quickorder", (DL_FUNC) &r_ram_integer64_quickorder, 6}, {"r_ram_integer64_quicksort", (DL_FUNC) &r_ram_integer64_quicksort, 5}, {"r_ram_integer64_quicksortorder", (DL_FUNC) &r_ram_integer64_quicksortorder, 6}, {"r_ram_integer64_radixorder", (DL_FUNC) &r_ram_integer64_radixorder, 6}, {"r_ram_integer64_radixsort", (DL_FUNC) &r_ram_integer64_radixsort, 5}, {"r_ram_integer64_radixsortorder", (DL_FUNC) &r_ram_integer64_radixsortorder, 6}, {"r_ram_integer64_shellorder", (DL_FUNC) &r_ram_integer64_shellorder, 5}, {"r_ram_integer64_shellsort", (DL_FUNC) &r_ram_integer64_shellsort, 4}, {"r_ram_integer64_shellsortorder", (DL_FUNC) &r_ram_integer64_shellsortorder, 5}, {"r_ram_integer64_sortfin_asc", (DL_FUNC) &r_ram_integer64_sortfin_asc, 4}, {"r_ram_integer64_sortnut", (DL_FUNC) &r_ram_integer64_sortnut, 1}, {"r_ram_integer64_sortorderdup_asc", (DL_FUNC) &r_ram_integer64_sortorderdup_asc, 4}, {"r_ram_integer64_sortorderkey_asc", (DL_FUNC) &r_ram_integer64_sortorderkey_asc, 4}, {"r_ram_integer64_sortorderord", (DL_FUNC) &r_ram_integer64_sortorderord, 6}, {"r_ram_integer64_sortorderpos_asc", (DL_FUNC) &r_ram_integer64_sortorderpos_asc, 6}, {"r_ram_integer64_sortorderrnk_asc", (DL_FUNC) &r_ram_integer64_sortorderrnk_asc, 4}, {"r_ram_integer64_sortordertab_asc", (DL_FUNC) &r_ram_integer64_sortordertab_asc, 4}, {"r_ram_integer64_sortordertie_asc", (DL_FUNC) &r_ram_integer64_sortordertie_asc, 3}, {"r_ram_integer64_sortorderuni_asc", (DL_FUNC) &r_ram_integer64_sortorderuni_asc, 4}, {"r_ram_integer64_sortorderupo_asc", (DL_FUNC) &r_ram_integer64_sortorderupo_asc, 4}, {"r_ram_integer64_sortsrt", (DL_FUNC) &r_ram_integer64_sortsrt, 5}, {"r_ram_integer64_sorttab_asc", (DL_FUNC) &r_ram_integer64_sorttab_asc, 2}, {"r_ram_integer64_sortuni_asc", (DL_FUNC) &r_ram_integer64_sortuni_asc, 2}, /* {"r_ram_truly_identical", (DL_FUNC) &r_ram_truly_identical, 2},*/ {"seq_integer64", (DL_FUNC) &seq_integer64, 3}, {"sign_integer64", (DL_FUNC) &sign_integer64, 2}, {"sqrt_integer64", (DL_FUNC) &sqrt_integer64, 2}, {"sum_integer64", (DL_FUNC) &sum_integer64, 3}, {"times_integer64_double", (DL_FUNC) ×_integer64_double, 3}, {"times_integer64_integer64", (DL_FUNC) ×_integer64_integer64, 3}, /* {"r_ram_integer64_radisort", (DL_FUNC) &r_ram_integer64_radisort, 5}, {"r_ram_integer64_onionsort", (DL_FUNC) &r_ram_integer64_onionsort, 7}, {"r_ram_integer64_onionsortorder", (DL_FUNC) &r_ram_integer64_onionsortorder, 8}, {"r_ram_integer64_onionorder", (DL_FUNC) &r_ram_integer64_onionorder, 8}, */ {NULL, NULL, 0} }; void R_init_bit64(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); R_forceSymbols(dll, TRUE); } bit64/src/bsearch.h0000644000176200001440000007044314674440514013572 0ustar liggesusers/* # C-Header for binary search # (c) 2011 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2011-12-11 # Last changed: 2011-12-11 */ #include "sort64.h" #define INTEGER64_BSEARCH_ASC_DOWN(data, l, r, value) \ { \ IndexT m; \ while (lm){ \ if (LESS(data[g], value)){ \ l = g + 1; \ break; \ }else{ \ r = g; \ d *= 2; \ } \ }else{ \ if (LESS(data[m], value)) \ l = m + 1; \ else \ r = m; \ break; \ } \ } \ while (lm){ \ if (LESS(value, data[g])){ \ r = g; \ d *= 2; \ }else{ \ l = g + 1; \ break; \ } \ }else{ \ if (LESS(value, data[m])) \ r = m; \ else \ l = m + 1; \ break; \ } \ } \ while (lm){ \ if (GREATER(data[g], value)){ \ l = g + 1; \ break; \ }else{ \ r = g; \ d *= 2; \ } \ }else{ \ if (GREATER(data[m], value)) \ l = m + 1; \ else \ r = m; \ break; \ } \ } \ while (lm){ \ if (GREATER(value, data[g])){ \ r = g; \ d *= 2; \ }else{ \ l = g + 1; \ break; \ } \ }else{ \ if (GREATER(value, data[m])) \ r = m; \ else \ l = m + 1; \ break; \ } \ } \ while (lm){ \ if (LESS(data[index[g]], value)){ \ l = g + 1; \ break; \ }else{ \ r = g; \ d *= 2; \ } \ }else{ \ if (LESS(data[index[m]], value)) \ l = m + 1; \ else \ r = m; \ break; \ } \ } \ while (lm){ \ if (LESS(value, data[index[g]])){ \ r = g; \ d *= 2; \ }else{ \ l = g + 1; \ break; \ } \ }else{ \ if (LESS(value, data[index[m]])) \ r = m; \ else \ l = m + 1; \ break; \ } \ } \ while (lm){ \ if (GREATER(data[index[g]], value)){ \ l = g + 1; \ break; \ }else{ \ r = g; \ d *= 2; \ } \ }else{ \ if (GREATER(data[index[m]], value)) \ l = m + 1; \ else \ r = m; \ break; \ } \ } \ while (lm){ \ if (GREATER(value, data[index[g]])){ \ r = g; \ d *= 2; \ }else{ \ l = g + 1; \ break; \ } \ }else{ \ if (GREATER(value, data[index[m]])) \ r = m; \ else \ l = m + 1; \ break; \ } \ } \ while (l #include //#include //CRAN disallows rand: #include // rand #include "integer64.h" //#include "timing.h" /*****************************************************************************/ /** **/ /** DEFINITIONS AND MACROS **/ /** **/ /*****************************************************************************/ #define DEBUG_COUNTING 0 #define DEBUG_INIT // compare_counter = 0; move_counter = 0; //initTicks(); //#define DEBUG_RETURN getNewTicks() #define DEBUG_RETURN ret; // #define DEBUG_RETURN move_counter; #define DEBUG_DONE Rprintf("compare_counter=%d move_counter=%d\n", compare_counter, move_counter); R_FlushConsole(); //doneTicks(); #if defined(WIN32) || defined(WIN64) || defined(_WIN32_) || defined(_WIN64_) || defined(__WIN32__) || defined(__WIN64__) #define MULTI_THREADING 0 #else #define MULTI_THREADING 1 #endif #if MULTI_THREADING #include #endif // dummy for counting comp ops #define COUNTLESS #define LESS(A,B) ((A)<(B)) #define GREATER(A, B) LESS((B), (A)) //#define MOVE(TO,FROM){move_counter++; TO=FROM;} #define MOVE(TO,FROM) TO=FROM; #define EXCH(A,B,t) {MOVE(t,A) MOVE(A,B) MOVE(B,t)} #define COMPEXCH(A,B,t) if (LESS(B,A)) EXCH(A,B,t) #define KEY(A) (data[A]) #define KEYLESS(A,B) (LESS(KEY(A),KEY(B))) #define KEYCOMPEXCH(A,B,t) if (KEYLESS(B,A)) EXCH(A,B,t) #define COMPEXCHi(A,B,t,Ai,Bi,ti) if (LESS(B,A)) {EXCH(A,B,t) EXCH(Ai,Bi,ti)} #define INSERTIONSORT_LIMIT_MERGE 16 #define INSERTIONSORT_LIMIT_QUICK 16 /*****************************************************************************/ /** **/ /** TYPEDEFS AND STRUCTURES **/ /** **/ /*****************************************************************************/ typedef int IndexT; typedef long long ValueT; typedef unsigned long long UValueT; /*****************************************************************************/ /** **/ /** EXPORTED VARIABLES **/ /** **/ /*****************************************************************************/ #ifndef _SORT64_C_SRC extern IndexT compare_counter; extern IndexT move_counter; #endif /*****************************************************************************/ /** **/ /** EXPORTED FUNCTIONS **/ /** **/ /*****************************************************************************/ void R_Busy (int which); // post sorting NA handling int ram_integer64_fixsortNA( ValueT *data // RETURNED: pointer to data vector , IndexT n // length of data vector , int has_na // 0 for pure doubles, 1 if NA or NaN can be present , int na_last // 0 for placing NA NaN left, 1 for placing NA NaN right , int decreasing // 0 for ascending, 1 for descending (must match the same parameter in sorting) ); // post sortordering NA handling int ram_integer64_fixsortorderNA( ValueT *data // RETURNED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT n // length of vectors , int has_na // 0 for pure doubles, 1 if NA or NaN can be present , int na_last // 0 for placing NA NaN left, 1 for placing NA NaN right , int decreasing // 0 for ascending, 1 for descending (must match the same parameter in sorting) , IndexT *auxindex // MODIFIED: pointer to auxilliary index vector ); // post ordering NA handling int ram_integer64_fixorderNA( ValueT *data // UNCHANGED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT n // length of vectors , int has_na // 0 for pure doubles, 1 if NA or NaN can be present , int na_last // 0 for placing NA NaN left, 1 for placing NA NaN right , int decreasing // 0 for ascending, 1 for descending (must match the same parameter in sorting) , IndexT *auxindex // MODIFIED: pointer to auxilliary index vector ); // ascending insertion sorting void ram_integer64_insertionsort_asc( ValueT *data // RETURNED: pointer to data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending insertion sortordering void ram_integer64_insertionsortorder_asc( ValueT *data // RETURNED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending insertion sortordering void ram_integer64_insertionorder_asc( ValueT *data // UNCHANGED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending insertion sorting void ram_integer64_insertionsort_desc( ValueT *data // RETURNED: pointer to data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending insertion sortordering void ram_integer64_insertionsortorder_desc( ValueT *data // RETURNED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending insertion sortordering void ram_integer64_insertionorder_desc( ValueT *data // UNCHANGED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending shell sorting void ram_integer64_shellsort_asc( ValueT *data // RETURNED: pointer to data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending shell sortordering void ram_integer64_shellsortorder_asc( ValueT *data // RETURNED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending shell sortordering void ram_integer64_shellorder_asc( ValueT *data // UNCHANGED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending shell sorting void ram_integer64_shellsort_desc( ValueT *data // RETURNED: pointer to data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending shell sortordering void ram_integer64_shellsortorder_desc( ValueT *data // RETURNED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending shell sortordering void ram_integer64_shellorder_desc( ValueT *data // UNCHANGED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending merge for sorting void ram_integer64_sortmerge_asc( ValueT *c // RETURNED: pointer to merge target data vector , ValueT *a // UNCHANGED: pointer to merge source data vector a , ValueT *b // UNCHANGED: pointer to merge source data vector b , IndexT na // number of elements in merge source vector a , IndexT nb // number of elements in merge source vector b ); // ascending merge for ordering void ram_integer64_ordermerge_asc( ValueT *data // UNCHANGED: pointer to data vector , IndexT *c // RETURNED: pointer to merge target index vector , IndexT *a // UNCHANGED: pointer to merge source index vector a , IndexT *b // UNCHANGED: pointer to merge source index vector b , IndexT na // number of elements in merge source vector a , IndexT nb // number of elements in merge source vector b ); // ascending merge for sortordering void ram_integer64_sortordermerge_asc( ValueT *c // RETURNED: pointer to merge target data vector , ValueT *a // UNCHANGED: pointer to merge source data vector a , ValueT *b // UNCHANGED: pointer to merge source data vector b , IndexT *ci // RETURNED: pointer to merge target index vector , IndexT *ai // UNCHANGED: pointer to merge source index vector a , IndexT *bi // UNCHANGED: pointer to merge source index vector b , IndexT na // number of elements in merge source vector a , IndexT nb // number of elements in merge source vector b ); // descending merge for sorting void ram_integer64_sortmerge_desc( ValueT *c // RETURNED: pointer to merge target data vector , ValueT *a // UNCHANGED: pointer to merge source data vector a , ValueT *b // UNCHANGED: pointer to merge source data vector b , IndexT na // number of elements in merge source vector a , IndexT nb // number of elements in merge source vector b ); // descending merge for ordering void ram_integer64_ordermerge_desc( ValueT *data // UNCHANGED: pointer to data vector , IndexT *c // RETURNED: pointer to merge target index vector , IndexT *a // UNCHANGED: pointer to merge source index vector a , IndexT *b // UNCHANGED: pointer to merge source index vector b , IndexT na // number of elements in merge source vector a , IndexT nb // number of elements in merge source vector b ); // descending merge for sortordering void ram_integer64_sortordermerge_desc( ValueT *c // RETURNED: pointer to merge target data vector , ValueT *a // UNCHANGED: pointer to merge source data vector a , ValueT *b // UNCHANGED: pointer to merge source data vector b , IndexT *ci // RETURNED: pointer to merge target index vector , IndexT *ai // UNCHANGED: pointer to merge source index vector a , IndexT *bi // UNCHANGED: pointer to merge source index vector b , IndexT na // number of elements in merge source vector a , IndexT nb // number of elements in merge source vector b ); // merge sorts b ascending and leaves result in a (following Sedgewick 8.4 Mergesort with no copying) void ram_integer64_mergesort_asc_rec( ValueT *a // RETURNED: pointer to target data vector , ValueT *b // MODIFIED: pointer to source data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // merge sorting b ascending leaving result in a (following Sedgewick 8.4 Mergesort with no copying) void ram_integer64_mergeorder_asc_rec( ValueT *data // UNCHANGED: pointer to data vector , IndexT *a // RETURNED: pointer to target index vector , IndexT *b // MODIFIED: pointer to source index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // merge sortordering b ascending leaving result in a (following Sedgewick 8.4 Mergesort with no copying) void ram_integer64_mergesortorder_asc_rec( ValueT *a // RETURNED: pointer to target data vector , ValueT *b // MODIFIED: pointer to source data vector , IndexT *ai // RETURNED: pointer to target index vector , IndexT *bi // MODIFIED: pointer to source index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // merge sorts b descending and leaves result in a (following Sedgewick 8.4 Mergesort with no copying) void ram_integer64_mergesort_desc_rec( ValueT *a // RETURNED: pointer to target data vector , ValueT *b // MODIFIED: pointer to source data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // merge sorting b descending leaving result in a (following Sedgewick 8.4 Mergesort with no copying) void ram_integer64_mergeorder_desc_rec( ValueT *data // UNCHANGED: pointer to data vector , IndexT *a // RETURNED: pointer to target index vector , IndexT *b // MODIFIED: pointer to source index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // merge sortordering b descending leaving result in a (following Sedgewick 8.4 Mergesort with no copying) void ram_integer64_mergesortorder_desc_rec( ValueT *a // RETURNED: pointer to target data vector , ValueT *b // MODIFIED: pointer to source data vector , IndexT *ai // RETURNED: pointer to target index vector , IndexT *bi // MODIFIED: pointer to source index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending partitioning of data between l and r around pivot in r for quick sorting IndexT ram_integer64_quicksortpart_asc_no_sentinels( ValueT *data // RETURNED: pointer to data , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending partitioning of data between l and r around pivot in r for quick ordering IndexT ram_integer64_quickorderpart_asc_no_sentinels( ValueT *data // UNCHANGED: pointer to data , IndexT *index // RETURNED: pointer to index , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending partitioning of data between l and r around pivot in r for quick sortordering IndexT ram_integer64_quicksortorderpart_asc_no_sentinels( ValueT *data // RETURNED: pointer to data , IndexT *index // RETURNED: pointer to index , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending partitioning of data between l and r around pivot in r for quick sorting IndexT ram_integer64_quicksortpart_desc_no_sentinels( ValueT *data // RETURNED: pointer to data , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending partitioning of data between l and r around pivot in r for quick ordering IndexT ram_integer64_quickorderpart_desc_no_sentinels( ValueT *data // UNCHANGED: pointer to data , IndexT *index // RETURNED: pointer to index , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending partitioning of data between l and r around pivot in r for quick sortordering IndexT ram_integer64_quicksortorderpart_desc_no_sentinels( ValueT *data // RETURNED: pointer to data , IndexT *index // RETURNED: pointer to index , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending quick sorting void ram_integer64_quicksort_asc_mdr3_no_sentinels( ValueT *data // RETURNED: pointer to data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending quick sortordering void ram_integer64_quicksortorder_asc_mdr3_no_sentinels( ValueT *data // RETURNED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending quick sortordering void ram_integer64_quickorder_asc_mdr3_no_sentinels( ValueT *data // UNCHANGED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending quick sorting void ram_integer64_quicksort_desc_mdr3_no_sentinels( ValueT *data // RETURNED: pointer to data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending quick sortordering void ram_integer64_quicksortorder_desc_mdr3_no_sentinels( ValueT *data // RETURNED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // descending quick sortordering void ram_integer64_quickorder_desc_mdr3_no_sentinels( ValueT *data // UNCHANGED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted ); // ascending intro sorting (switches to shellsort when no restlevels left) void ram_integer64_quicksort_asc_intro( ValueT *data // RETURNED: pointer to data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted , int restlevel // number of remaining levels for quicksort recursion before switching to shellsort ); // ascending intro sortordering (switches to shellsort when no restlevels left) void ram_integer64_quicksortorder_asc_intro( ValueT *data // RETURNED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted , int restlevel // number of remaining levels for quicksort recursion before switching to shellsort ); // ascending intro sortordering (switches to shellsort when no restlevels left) void ram_integer64_quickorder_asc_intro( ValueT *data // UNCHANGED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted , int restlevel // number of remaining levels for quicksort recursion before switching to shellsort ); // descending intro sorting (switches to shellsort when no restlevels left) void ram_integer64_quicksort_desc_intro( ValueT *data // RETURNED: pointer to data vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted , int restlevel // number of remaining levels for quicksort recursion before switching to shellsort ); // descending intro sortordering (switches to shellsort when no restlevels left) void ram_integer64_quicksortorder_desc_intro( ValueT *data // RETURNED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted , int restlevel // number of remaining levels for quicksort recursion before switching to shellsort ); // descending intro sortordering (switches to shellsort when no restlevels left) void ram_integer64_quickorder_desc_intro( ValueT *data // UNCHANGED: pointer to data vector , IndexT *index // RETURNED: pointer to index vector , IndexT l // leftmost position to be sorted , IndexT r // rightmost position to be sorted , int restlevel // number of remaining levels for quicksort recursion before switching to shellsort ); // LSB radix sorting void ram_integer64_radixsort( UValueT * data // RETURNED: pointer to data vector coerced to unsigned , UValueT * auxdata // MODIFIED: pointer to auxilliary data vector coerced to unsigned , IndexT * stats // MODIFIED: pointer to counting vector with nradixes*(pow(2, radixbits)+1) elements , IndexT ** pstats // MODIFIED: pointer to vector of pointers with nradixes elements , IndexT n // number of elements in data and auxdata , int nradixes // number of radixes where nradixes*radixbits==total number of bits , int radixbits // number of bits in radix where nradixes*radixbits==total number of bits , Rboolean decreasing // one of {0=ascending, 1=descending} ); // LSB radix ordering void ram_integer64_radixorder( UValueT * data // UNCHANGED: pointer to data vector , IndexT * index // RETURNED: pointer to index vector , IndexT * auxindex // MODIFIED: pointer to auxilliary index vector , IndexT * stats // MODIFIED: pointer to counting vector with nradixes*(pow(2, radixbits)+1) elements , IndexT ** pstats // MODIFIED: pointer to vector of pointers with nradixes elements , IndexT n // number of elements in data and auxdata , int nradixes // number of radixes where nradixes*radixbits==total number of bits , int radixbits // number of bits in radix where nradixes*radixbits==total number of bits , Rboolean decreasing // one of {0=ascending, 1=descending} ); // LSB radix sortordering void ram_integer64_radixsortorder( UValueT * data // RETURNED: pointer to data vector coerced to unsigned , UValueT * auxdata // MODIFIED: pointer to auxilliary data vector coerced to unsigned , IndexT * index // RETURNED: pointer to index vector , IndexT * auxindex // MODIFIED: pointer to auxilliary index vector , IndexT * stats // MODIFIED: pointer to counting vector with nradixes*(pow(2, radixbits)+1) elements , IndexT ** pstats // MODIFIED: pointer to vector of pointers with nradixes elements , IndexT n // number of elements in data and auxdata , int nradixes // number of radixes where nradixes*radixbits==total number of bits , int radixbits // number of bits in radix where nradixes*radixbits==total number of bits , Rboolean decreasing // one of {0=ascending, 1=descending} ); #endif /*****************************************************************************/ /** **/ /** EOF **/ /** **/ /*****************************************************************************/ bit64/NAMESPACE0000644000176200001440000002566214706047365012450 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method("!",integer64) S3method("!=",integer64) S3method("%%",integer64) S3method("%/%",integer64) S3method("%in%",default) S3method("%in%",integer64) S3method("&",integer64) S3method("*",integer64) S3method("+",integer64) S3method("-",integer64) S3method("/",integer64) S3method(":",default) S3method(":",integer64) S3method("<",integer64) S3method("<=",integer64) S3method("==",integer64) S3method(">",integer64) S3method(">=",integer64) S3method("[",integer64) S3method("[<-",integer64) S3method("[[",integer64) S3method("[[<-",integer64) S3method("^",integer64) S3method("length<-",integer64) S3method("|",integer64) S3method(abs,integer64) S3method(all,integer64) S3method(all.equal,integer64) S3method(any,integer64) S3method(aperm,integer64) S3method(as.bitstring,integer64) S3method(as.character,integer64) S3method(as.data.frame,integer64) S3method(as.double,integer64) S3method(as.integer,integer64) S3method(as.integer64,"NULL") S3method(as.integer64,bitstring) S3method(as.integer64,character) S3method(as.integer64,double) S3method(as.integer64,factor) S3method(as.integer64,integer) S3method(as.integer64,integer64) S3method(as.integer64,logical) S3method(as.list,integer64) S3method(as.logical,integer64) S3method(c,integer64) S3method(cbind,integer64) S3method(ceiling,integer64) S3method(colSums,default) S3method(colSums,integer64) S3method(cummax,integer64) S3method(cummin,integer64) S3method(cumprod,integer64) S3method(cumsum,integer64) S3method(diff,integer64) S3method(duplicated,integer64) S3method(floor,integer64) S3method(format,integer64) S3method(hashdup,cache_integer64) S3method(hashfin,cache_integer64) S3method(hashfun,integer64) S3method(hashmap,integer64) S3method(hashmaptab,integer64) S3method(hashmapuni,integer64) S3method(hashmapupo,integer64) S3method(hashpos,cache_integer64) S3method(hashrev,cache_integer64) S3method(hashrin,cache_integer64) S3method(hashtab,cache_integer64) S3method(hashuni,cache_integer64) S3method(hashupo,cache_integer64) S3method(identical,integer64) S3method(is.double,default) S3method(is.double,integer64) S3method(is.finite,integer64) S3method(is.infinite,integer64) S3method(is.na,integer64) S3method(is.nan,integer64) S3method(is.sorted,integer64) S3method(is.vector,integer64) S3method(keypos,integer64) S3method(log,integer64) S3method(log10,integer64) S3method(log2,integer64) S3method(match,default) S3method(match,integer64) S3method(max,integer64) S3method(mean,integer64) S3method(median,integer64) S3method(mergeorder,integer64) S3method(mergesort,integer64) S3method(mergesortorder,integer64) S3method(min,integer64) S3method(na.count,integer64) S3method(nties,integer64) S3method(nunique,integer64) S3method(nvalid,integer64) S3method(order,default) S3method(order,integer64) S3method(orderdup,integer64) S3method(orderfin,integer64) S3method(orderkey,integer64) S3method(ordernut,integer64) S3method(orderpos,integer64) S3method(orderqtl,integer64) S3method(orderrnk,integer64) S3method(ordertab,integer64) S3method(ordertie,integer64) S3method(orderuni,integer64) S3method(orderupo,integer64) S3method(prank,integer64) S3method(print,bitstring) S3method(print,cache) S3method(print,integer64) S3method(prod,integer64) S3method(qtile,integer64) S3method(quantile,integer64) S3method(quickorder,integer64) S3method(quicksort,integer64) S3method(quicksortorder,integer64) S3method(radixorder,integer64) S3method(radixsort,integer64) S3method(radixsortorder,integer64) S3method(ramorder,integer64) S3method(ramsort,integer64) S3method(ramsortorder,integer64) S3method(range,integer64) S3method(rank,default) S3method(rank,integer64) S3method(rbind,integer64) S3method(rep,integer64) S3method(round,integer64) S3method(rowSums,default) S3method(rowSums,integer64) S3method(scale,integer64) S3method(seq,integer64) S3method(shellorder,integer64) S3method(shellsort,integer64) S3method(shellsortorder,integer64) S3method(sign,integer64) S3method(signif,integer64) S3method(sort,integer64) S3method(sortfin,integer64) S3method(sortnut,integer64) S3method(sortorderdup,integer64) S3method(sortorderkey,integer64) S3method(sortorderpos,integer64) S3method(sortorderrnk,integer64) S3method(sortordertab,integer64) S3method(sortordertie,integer64) S3method(sortorderuni,integer64) S3method(sortorderupo,integer64) S3method(sortqtl,integer64) S3method(sorttab,integer64) S3method(sortuni,integer64) S3method(sqrt,integer64) S3method(str,integer64) S3method(sum,integer64) S3method(summary,integer64) S3method(tiepos,integer64) S3method(trunc,integer64) S3method(unipos,integer64) S3method(unique,integer64) S3method(xor,integer64) export("!.integer64") export("!=.integer64") export("%%.integer64") export("%/%.integer64") export("%in%") export("%in%.default") export("%in%.integer64") export("&.integer64") export("*.integer64") export("+.integer64") export("-.integer64") export("/.integer64") export(":") export(":.default") export(":.integer64") export("<.integer64") export("<=.integer64") export("==.integer64") export(">.integer64") export(">=.integer64") export("[.integer64") export("[<-.integer64") export("[[.integer64") export("[[<-.integer64") export("^.integer64") export("length<-.integer64") export("|.integer64") export(NA_integer64_) export(abs.integer64) export(all.equal.integer64) export(all.integer64) export(any.integer64) export(as.bitstring) export(as.bitstring.integer64) export(as.character.integer64) export(as.data.frame.integer64) export(as.double.integer64) export(as.integer.integer64) export(as.integer64) export(as.integer64.NULL) export(as.integer64.bitstring) export(as.integer64.character) export(as.integer64.double) export(as.integer64.factor) export(as.integer64.integer) export(as.integer64.integer64) export(as.integer64.logical) export(as.list.integer64) export(as.logical.integer64) export(benchmark64) export(binattr) export(c.integer64) export(cache) export(cbind.integer64) export(ceiling.integer64) export(colSums) export(cummax.integer64) export(cummin.integer64) export(cumprod.integer64) export(cumsum.integer64) export(diff.integer64) export(duplicated.integer64) export(floor.integer64) export(format.integer64) export(getcache) export(hashcache) export(hashdup) export(hashdup.cache_integer64) export(hashfin) export(hashfin.cache_integer64) export(hashfun) export(hashfun.integer64) export(hashmap) export(hashmap.integer64) export(hashmaptab) export(hashmaptab.integer64) export(hashmapuni) export(hashmapuni.integer64) export(hashmapupo) export(hashmapupo.integer64) export(hashpos) export(hashpos.cache_integer64) export(hashrev) export(hashrev.cache_integer64) export(hashrin) export(hashrin.cache_integer64) export(hashtab) export(hashtab.cache_integer64) export(hashuni) export(hashuni.cache_integer64) export(hashupo) export(hashupo.cache_integer64) export(identical.integer64) export(integer64) export(is.double) export(is.double.default) export(is.double.integer64) export(is.finite.integer64) export(is.infinite.integer64) export(is.integer64) export(is.na.integer64) export(is.nan.integer64) export(is.sorted.integer64) export(is.vector.integer64) export(jamcache) export(keypos) export(keypos.integer64) export(lim.integer64) export(log.integer64) export(log10.integer64) export(log2.integer64) export(match) export(match.default) export(match.integer64) export(max.integer64) export(mean.integer64) export(median.integer64) export(mergeorder.integer64) export(mergesort.integer64) export(mergesortorder.integer64) export(min.integer64) export(minusclass) export(na.count.integer64) export(newcache) export(nties.integer64) export(nunique.integer64) export(nvalid.integer64) export(optimizer64) export(order) export(order.default) export(order.integer64) export(ordercache) export(orderdup) export(orderdup.integer64) export(orderfin) export(orderfin.integer64) export(orderkey) export(orderkey.integer64) export(ordernut) export(ordernut.integer64) export(orderpos) export(orderpos.integer64) export(orderqtl) export(orderqtl.integer64) export(orderrnk) export(orderrnk.integer64) export(ordertab) export(ordertab.integer64) export(ordertie) export(ordertie.integer64) export(orderuni) export(orderuni.integer64) export(orderupo) export(orderupo.integer64) export(plusclass) export(prank) export(prank.integer64) export(print.bitstring) export(print.cache) export(print.integer64) export(prod.integer64) export(qtile) export(qtile.integer64) export(quantile.integer64) export(quickorder.integer64) export(quicksort.integer64) export(quicksortorder.integer64) export(radixorder.integer64) export(radixsort.integer64) export(radixsortorder.integer64) export(ramorder.integer64) export(ramsort.integer64) export(ramsortorder.integer64) export(range.integer64) export(rank) export(rank.default) export(rank.integer64) export(rbind.integer64) export(remcache) export(rep.integer64) export(round.integer64) export(rowSums) export(runif64) export(scale.integer64) export(seq.integer64) export(setcache) export(shellorder.integer64) export(shellsort.integer64) export(shellsortorder.integer64) export(sign.integer64) export(signif.integer64) export(sort.integer64) export(sortcache) export(sortfin) export(sortfin.integer64) export(sortnut) export(sortnut.integer64) export(sortordercache) export(sortorderdup) export(sortorderdup.integer64) export(sortorderkey) export(sortorderkey.integer64) export(sortorderpos) export(sortorderpos.integer64) export(sortorderrnk) export(sortorderrnk.integer64) export(sortordertab) export(sortordertab.integer64) export(sortordertie) export(sortordertie.integer64) export(sortorderuni) export(sortorderuni.integer64) export(sortorderupo) export(sortorderupo.integer64) export(sortqtl) export(sortqtl.integer64) export(sorttab) export(sorttab.integer64) export(sortuni) export(sortuni.integer64) export(sqrt.integer64) export(str.integer64) export(sum.integer64) export(summary.integer64) export(table.integer64) export(tiepos) export(tiepos.integer64) export(trunc.integer64) export(unipos) export(unipos.integer64) export(unique.integer64) export(xor.integer64) if (getRversion() >= "4.2.0") S3method(mtfrm,integer64) importFrom(bit,clone) importFrom(bit,is.sorted) importFrom(bit,keyorder) importFrom(bit,keysort) importFrom(bit,keysortorder) importFrom(bit,mergeorder) importFrom(bit,mergesort) importFrom(bit,mergesortorder) importFrom(bit,na.count) importFrom(bit,nties) importFrom(bit,nunique) importFrom(bit,nvalid) importFrom(bit,quickorder) importFrom(bit,quicksort) importFrom(bit,quicksortorder) importFrom(bit,radixorder) importFrom(bit,radixsort) importFrom(bit,radixsortorder) importFrom(bit,ramorder) importFrom(bit,ramsort) importFrom(bit,ramsortorder) importFrom(bit,repeat.time) importFrom(bit,setattr) importFrom(bit,shellorder) importFrom(bit,shellsort) importFrom(bit,shellsortorder) importFrom(bit,xor) importFrom(graphics,barplot) importFrom(graphics,par) importFrom(graphics,title) importFrom(methods,as) importFrom(methods,is) importFrom(stats,cor) importFrom(stats,median) importFrom(stats,quantile) importFrom(utils,packageDescription) importFrom(utils,strOptions) importFrom(utils,tail) useDynLib(bit64, .registration = TRUE, .fixes = "C_") bit64/inst/0000755000176200001440000000000014706050365012165 5ustar liggesusersbit64/inst/doc/0000755000176200001440000000000014742210225012723 5ustar liggesusersbit64/inst/doc/ANNOUNCEMENT-0.8.txt0000644000176200001440000000622614706050365015676 0ustar liggesusersDear R-Core team, Dear Rcpp team and other package teams, Dear R users, The new package 'bit64' is available on CRAN for beta-testing and code-reviewing. Package 'bit64' provides fast serializable S3 atomic 64bit (signed) integers that can be used in vectors, matrices, arrays and data.frames. Methods are available for coercion from and to logicals, integers, doubles, characters as well as many elementwise and summary functions. Package 'bit64' has the following advantages over package 'int64' (which was sponsored by Google): - true atomic vectors usable with length, dim, names etc. - only S3, not S4 class system used to dispatch methods - less RAM consumption by factor 7 (under 64 bit OS) - faster operations by factor 4 to 2000 (under 64 bit OS) - no slow-down of R's garbage collection (as caused by the pure existence of 'int64' objects) - pure GPL, no copyrights from transnational commercial company While the advantage of the atomic S3 design over the complicated S4 object design is obvious, it is less obvious that an external package is the best way to enrich R with 64bit integers. An external package will not give us literals such as 1LL or directly allow us to address larger vectors than possible with base R. But it allows us to properly address larger vectors in other packages such as 'ff' or 'bigmemory' and it allows us to properly work with large surrogate keys from external databases. An external package realizing one data type also makes a perfect test bed to play with innovative performance enhancements. Performance tuned sorting and hashing are planned for the next release, which will give us fast versions of sort, order, merge, duplicated, unique, and table - for 64bit integers. For those who still hope that R's 'integer' will be 64bit some day, here is my key learning: migrating R's 'integer' from 32 to 64 bit would be RAM expensive. It would most likely require to also migrate R's 'double' from 64 to 128 bit - in order to again have a data type to which we can lossless coerce. The assumption that 'integer' is a proper subset of 'double' is scattered over R's semantics. We all expect that binary and n-ary functions such as '+' and 'c' do return 'double' and do not destroy information. With solely extending 64bit integers but not 128bit doubles, we have semantic changes potentially disappointing such expectations: integer64+double returns integer64 and does kill decimals. I did my best to make operations involving integer64 consistent and numerically stable - please consult the documentation at ?bit64 for details. Since this package is 'at risk' to create a lot of dependencies from other packages, I'd appreciate serious beta-testing and also code-review from the R-Core team. Please check the 'Limitations' sections at the help page and the numerics involving "long double" in C. If the conclusion is that this should be better done in Base R - I happly donate the code and drop this package. If we have to go with an external package for 64bit integers, it would be great if this work could convince the Rcpp team including Romain about the advantages of this approach. Shouldn't we join forces here? Best regards Jens Oehlschlägel Munich, 11.2.2012 bit64/inst/doc/ANNOUNCEMENT-0.9-Details.txt0000644000176200001440000002277414706050367017272 0ustar liggesusersI have used package 'bit64' as a testbed to explore a couple of approaches for implementing R's univariate algorithmic functionality efficiently. I have focused on single-threaded efficiency for two reasons: 1) Amdahl's law dictates that the more we parallelize, the more we depend on serial efficiency. 2) When working with truly big data it is not only absolute speed but also energy consumption that we care about. Under the hood package 'bit64' has multiple implementations of the same functionality, and high-level functions contain (yet simple heuristic) optimizers that choose among the available low-level functions. For example 'match' can choose between eight functions based on hashing or sorting/ordering. Function 'match' (and '%in%') has been accelerated by complementing lookup of 'x' in hashed 'table' by reverse lookup of 'table' in hashed 'x'. If 'x' is small and 'table' is big, reverse lookup avoids the cost of building a huge hashmap. As suggested in Simon Urbanek's package 'fastmatch', if 'match' is called multiple times with the same 'table', performance can be improved by re-using the hashmap implicitely built by 'match'. Beyond that, I have realized a couple of improvements: 1) Building the hashmap has now been singled out in a separate function 'hashmap' that explicitely returns an environment of class c("cache_integer64", "cache", "environment") containing the hashmap and some auxilliary data. 2) Instead of implicitely caching the hashmap as a side-effect when calling 'fastmatch', there are explicit functions for caching, for example 'hashcache' for attaching a cache with a hashmap, and 'remcache' for removing any cached data. 3) If the 'hashcache' function after hashing discovers that the number of unique values is much smaller than the total number of values, it will hash again using a much smaller hashmap: this typically saves a lot of RAM and accelerates usage of the hashmap because it reduces random access. 4) The cache layer has a mechanism for detecting outdated caches. This is even more important in the case of a cached hashmap, since R's typical hashmap only contains index pointers to the data, not the data itself (unlike in standard hashtables). As a result, an outdated cache might lead to a crash, if the data has changed since creation of the cached hashmap. The detection mechanism comes for free, since R does Copy-on-write and each change of a vector leads to memory reallocation: on each cache access we check for a modified vector address and remove the cache with a warning in case of a detected change. However, this method is of-course not failsafe in case of multiple changes. Therefore, until cache checking and removal is done in Base R, users using caching should carefully remove caches before modifying data. Users must also carefully remove caches before using functions that do in-place modifications such as 'ramsort', 'ramorder' and 'ramsortorder'. Users should also note that R's COPY-ON-MODIFY mechanism does more copying than one would expect: just reading from variable length arguments with the recommended 'list(...)' construct always copies the arguments and invalidates caches. For a workaround see the implementation of 'table.integer64'. 5) Beyond 'match', the package leverages speed gains of hashing or cached hashing for a couple of other high-level functions: '%in%', 'duplicated', 'unique', 'unipos' and 'table'. However, it turned out that only 'match', '%in%' and 'duplicated' benefit from a cached hashmap. For 'unique', 'unipos' and 'table' the cost of traversing an existing hashmap is as high as creating the hashmap from scratch. That leads to the undesireable effect that we need two implementations for each of these methods: one that simultaneously builds and uses the hashmap, and another that uses an existing hashmap (using the simultaneous method while a hashmap has been cached would duplicate RAM consumption). 6) Beyond leveraging hashing, all these high-level functions also have two low-level implementations that take advantage of (cached) ordering and (cached) sortordering instead (see order below). 6) Additional functions are implemented that benefit only from (cached) ordering and (cached) sortordering: 'sort', 'order', 'tiepos', 'keypos', 'rank', 'quantile' and dependants thereof ('median','summary','as.factor','as.ordered','table'). Method 'sort' is a cache-aware wrapper around 'ramsort', which depending on context chooses from multiple sorting algorithms (or from the cache): 'shellsort' (R's traditional inplace sorting algorithm), 'quicksort' (faster inplace), 'mergesort' (fast and stable), 'radixsort' (stable with linear scalability, for large datasets). The quicksort algorithm implemented here is in this context faster than the famous one of Bentley and McIllroy. It uses median of three random pivots and is like introsort protected against O(n^2) runtime (if a recursion limit is reached, it for now falls back to shellsort instead of heapsort). Function 'order.integer64' with option 'optimize = "memory"' calls 'ramorder' which chooses from a similar set of low-level algorithms. 'ramorder' - like in package 'ff' - is faster than ordering in Base R, but like 'order' in Base R still does the job by sorting index pointers to the data which creates heavy random access to the data. The novel 'ramsortorder' method realizes ordering close to the speed of sorting, by sorting index and data simultaneously and thereby avoiding heavy random access. Therefore the option 'optimize = "time"' is the default in 'order.integer64' and calls 'ramsortorder'. Function 'rank.integer64' implements only 'ties.method = "average"' and 'na.last="keep"' (the only sensible default, see e.g. 'cor'). Function 'prank.integer64' projects the values [min..max] via ranks [1..n] to [0..1]. 'qtile.integer64' is the inverse function of 'prank.integer64' and projects [0..1] to [min..max]. 'quantile.integer64' with 'type=0' and 'median.integer64' are convenience wrappers to 'qtile'. 'qtile' behaves very similar to 'quantile.default' with 'type=1' in that it only returns existing values, it is mostly symetric but it is using 'round' rather than 'floor'. Note that this implies that 'median.integer64' does not interpolate for even number of values (interpolation would create values that could not be represented as 64-bit integers). Function 'table.integer64' leverages hashing or sorting for counting frequencies of all unique values. This is by factor 3 slower than 'tabulate', but when called with 'return="list"' is by order of magnitude faster than 'table' (because 'table' wastes a lot of performance in large scale raw data manipulation before calling tabulate and in attaching the unique values as 'names' which loads heavy on the global string cache). When dealing with combinations of input vectors, 'table.integer64' can handle up to 2^63 hypothetical combinations and can return the existing combinations in a sparse format, whereas standard 'table' theoretically bails out at 2^31 (practically earlier due to RAM limitations) and insists on returning a full blown dense array. I compared the speed gains of hashing+usage versus sortordering+usage over a couple of univariate algorithmic operations: hashing and sortordering are competitive, with hashing rather winning for smaller and sortordering rather winning for larger vectors (due to better cache-obliviousness of sorting). The third option - ordering - is much slower, though competitive with Base R, and 50% RAM saving makes this an interesting option, especially when working with datasets close to the RAM limits. Though operations based on hashing can be faster than those on sortordering it is worth to note that if sortorder is cached, in most cases going with the sortorder-operation is faster than building the hashmap and using it. Thus sortordering seems a better RAM investement than hashing. It has the following advantages: - sortordering supports more functionality than hashing - sortordering gives better modularity (different from hashing, we can well separate *creating* and *using* the sortordering, because sorting permanently improves cache-locality) - without computational costs of keeping the original order ('keep.order=TRUE' in 'unique' and 'table'), sortorder gives sorted results while hashing gives random result order. If there are many unique values, fixing random order by sorting afterwards kills any performance benefit of hashing, compare for example the sequence {y <- unique(x); ind <- sort.list(y)} in 'factor'. - sorting better generalizes to very large data on disk compared to hashing - it is easier to lockfree parallelize sorting compared to hashing - creating the ordering quickly via sortordering and then caching only ordering (without the sorted data) is an interesting option to save RAM without too much speed loss - with ordering instead of sortordering there is an option to work with large borderline-sized datasets in-RAM These advantages of sorting over hashing are good news for my novel energy-efficient greeNsort® algorithms. The long term roadmap for packages 'bit64' and 'ff' is - demonstrate power of greeNsort® by accelerating integer64 sorting by yet another factor 2 - parallelization of important functions in bit64 - unifying the sort capabilities in ff with those in bit64 (logical, factor, integer, integer64, double) - generalizing the fast data management to all numeric data types (integer, integer64, double) - removing the 2^31-1 address limit in ff (rather using integer64 than double) - providing ff with proper disk sorting (reducing n*log(n) passes to 2 passes over the memory-mapped disk) © 2010-2012 Jens Oehlschlägel bit64/inst/doc/ANNOUNCEMENT-0.9.txt0000644000176200001440000000355514706050372015677 0ustar liggesusersDear R community, The new version of package 'bit64' - which extends R with fast 64-bit integers - now has fast (single-threaded) implementations of the most important univariate algorithmic operations (those based on hashing and sorting). Package 'bit64' now has methods for 'match', '%in%', 'duplicated', 'unique', 'table', 'sort', 'order', 'rank', 'quantile', 'median' and 'summary'. Regarding data management it has novel generics 'unipos' (positions of the unique values), 'tiepos' (positions of ties), 'keypos' (positions of values in a sorted unique table) and derived methods 'as.factor' and 'as.ordered'. This 64-bit functionality is implemented carefully to be not slower than the respective 32-bit operations in Base R and also to avoid excessive execution times observed with 'order', 'rank' and 'table' (speedup factors 20/16/200 respective). This increases the dataset size with wich we can work truly interactive. The speed is achieved by simple heuristic optimizers: the mentioned high-level functions choose the best from multiple low-level algorithms and further take advantage of a novel optional caching method. In an example R session using a couple of these operations the 64-bit integers performed 22x faster than base 32-bit integers, hash-caching improved this to 24x amortized, sortorder-caching was most efficient with 38x (caching both, hashing and sorting is not worth it with 32x at duplicated RAM consumption). Since the package covers the most important functions for (univariate) data exploration and data management, I think it is now appropriate to claim that R has sound 64-bit integer support, for example for working with keys or counts imported from large databases. For details concerning approach, implementation and roadmap please check the ANNOUNCEMENT-0.9-Details.txt file and the package help files. Kind regards Jens Oehlschlägel Munich, 22.10.2012 bit64/man/0000755000176200001440000000000014706047365011771 5ustar liggesusersbit64/man/sort.integer64.Rd0000644000176200001440000000472014705122715015050 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sort64.R \name{sort.integer64} \alias{sort.integer64} \alias{order.integer64} \title{High-level intger64 methods for sorting and ordering} \usage{ \method{sort}{integer64}( x, decreasing = FALSE, has.na = TRUE, na.last = TRUE, stable = TRUE, optimize = c("time", "memory"), VERBOSE = FALSE, ... ) \method{order}{integer64}( ..., na.last = TRUE, decreasing = FALSE, has.na = TRUE, stable = TRUE, optimize = c("time", "memory"), VERBOSE = FALSE ) } \arguments{ \item{x}{a vector to be sorted by \code{\link[=ramsort.integer64]{ramsort.integer64()}} and \code{\link[=ramsortorder.integer64]{ramsortorder.integer64()}}, i.e. the output of \code{\link[=sort.integer64]{sort.integer64()}}} \item{decreasing}{boolean scalar telling ramsort whether to sort increasing or decreasing} \item{has.na}{boolean scalar defining whether the input vector might contain \code{NA}s. If we know we don't have NAs, this may speed-up. \emph{Note} that you risk a crash if there are unexpected \code{NA}s with \code{has.na=FALSE}} \item{na.last}{boolean scalar telling ramsort whether to sort \code{NA}s last or first. \emph{Note} that 'boolean' means that there is no third option \code{NA} as in \code{\link[=sort]{sort()}}} \item{stable}{boolean scalar defining whether stable sorting is needed. Allowing non-stable may speed-up.} \item{optimize}{by default ramsort optimizes for 'time' which requires more RAM, set to 'memory' to minimize RAM requirements and sacrifice speed} \item{VERBOSE}{cat some info about chosen method} \item{...}{further arguments, passed from generics, ignored in methods} } \value{ \code{sort} returns the sorted vector and \code{vector} returns the order positions. } \description{ Fast high-level methods for sorting and ordering. These are wrappers to \code{\link[=ramsort.integer64]{ramsort.integer64()}} and friends and do not modify their arguments. } \details{ see \code{\link[=sort]{sort()}} and \code{\link[=order]{order()}} } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) x sort(x) message("the following has default optimize='time' which is faster but requires more RAM , this calls 'ramorder'") order.integer64(x) message("slower with less RAM, this calls 'ramsortorder'") order.integer64(x, optimize="memory") } \seealso{ \code{\link[=sort.integer64]{sort()}}, \code{\link[=sortcache]{sortcache()}} } \keyword{manip} \keyword{programming} bit64/man/rank.integer64.Rd0000644000176200001440000000235014705122715015011 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/highlevel64.R \name{rank.integer64} \alias{rank.integer64} \title{Sample Ranks from integer64} \usage{ \method{rank}{integer64}(x, method = NULL, ...) } \arguments{ \item{x}{a integer64 vector} \item{method}{NULL for automatic method selection or a suitable low-level method, see details} \item{...}{ignored} } \value{ A numeric vector of the same length as \code{x}. } \description{ Returns the sample ranks of the values in a vector. Ties (i.e., equal values) are averaged and missing values propagated. } \details{ This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. Suitable methods are \itemize{ \item \code{\link[=sortorderrnk]{sortorderrnk()}} (fast ordering) \item \code{\link[=orderrnk]{orderrnk()}} (memory saving ordering). } } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) rank.integer64(x) stopifnot(identical(rank.integer64(x), rank(as.integer(x) , na.last="keep", ties.method = "average"))) } \seealso{ \code{\link[=order.integer64]{order.integer64()}}, \code{\link[=rank]{rank()}} and \code{\link[=prank]{prank()}} for percent rank. } \keyword{univar} bit64/man/sum.integer64.Rd0000644000176200001440000000417614705122715014672 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{sum.integer64} \alias{sum.integer64} \alias{any.integer64} \alias{all.integer64} \alias{prod.integer64} \alias{min.integer64} \alias{max.integer64} \alias{range.integer64} \alias{lim.integer64} \title{Summary functions for integer64 vectors} \usage{ \method{any}{integer64}(..., na.rm = FALSE) \method{all}{integer64}(..., na.rm = FALSE) \method{sum}{integer64}(..., na.rm = FALSE) \method{prod}{integer64}(..., na.rm = FALSE) \method{min}{integer64}(..., na.rm = FALSE) \method{max}{integer64}(..., na.rm = FALSE) \method{range}{integer64}(..., na.rm = FALSE, finite = FALSE) lim.integer64() } \arguments{ \item{...}{atomic vectors of class 'integer64'} \item{na.rm}{logical scalar indicating whether to ignore NAs} \item{finite}{logical scalar indicating whether to ignore NAs (just for compatibility with \code{\link[=range.default]{range.default()}})} } \value{ \code{\link[=all]{all()}} and \code{\link[=any]{any()}} return a logical scalar \code{\link[=range]{range()}} returns a integer64 vector with two elements \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=sum]{sum()}} and \code{\link[=prod]{prod()}} return a integer64 scalar } \description{ Summary functions for integer64 vectors. Function 'range' without arguments returns the smallest and largest value of the 'integer64' class. } \details{ The numerical summary methods always return \code{integer64}. Therefore the methods for \code{min},\code{max} and \code{range} do not return \verb{+Inf,-Inf} on empty arguments, but \verb{+9223372036854775807, -9223372036854775807} (in this sequence). The same is true if only \code{NA}s are submitted with argument \code{na.rm=TRUE}. \code{lim.integer64} returns these limits in proper order \verb{-9223372036854775807, +9223372036854775807} and without a \code{\link[=warning]{warning()}}. } \examples{ lim.integer64() range(as.integer64(1:12)) } \seealso{ \code{\link[=mean.integer64]{mean.integer64()}} \code{\link[=cumsum.integer64]{cumsum.integer64()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{manip} bit64/man/all.equal.integer64.Rd0000644000176200001440000000627014705122715015741 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{all.equal.integer64} \alias{all.equal.integer64} \title{Test if two integer64 vectors are all.equal} \usage{ \method{all.equal}{integer64}( target, current, tolerance = sqrt(.Machine$double.eps), scale = NULL, countEQ = FALSE, formatFUN = function(err, what) format(err), ..., check.attributes = TRUE ) } \arguments{ \item{target}{a vector of 'integer64' or an object that can be coerced with \code{\link[=as.integer64]{as.integer64()}}} \item{current}{a vector of 'integer64' or an object that can be coerced with \code{\link[=as.integer64]{as.integer64()}}} \item{tolerance}{numeric > 0. Differences smaller than \code{tolerance} are not reported. The default value is close to \code{1.5e-8}.} \item{scale}{\code{NULL} or numeric > 0, typically of length 1 or \code{length(target)}. See Details.} \item{countEQ}{logical indicating if the \code{target == current} cases should be counted when computing the mean (absolute or relative) differences. The default, \code{FALSE} may seem misleading in cases where \code{target} and \code{current} only differ in a few places; see the extensive example.} \item{formatFUN}{a \code{\link[=function]{function()}} of two arguments, \code{err}, the relative, absolute or scaled error, and \code{what}, a character string indicating the \emph{kind} of error; maybe used, e.g., to format relative and absolute errors differently.} \item{...}{further arguments are ignored} \item{check.attributes}{logical indicating if the \code{\link[=attributes]{attributes()}} of \code{target} and \code{current} (other than the names) should be compared.} } \value{ Either ‘TRUE’ (‘NULL’ for ‘attr.all.equal’) or a vector of ‘mode’ ‘"character"’ describing the differences between ‘target’ and ‘current’. } \description{ A utility to compare integer64 objects 'x' and 'y' testing for ‘near equality’, see \code{\link[=all.equal]{all.equal()}}. } \details{ In \code{\link[=all.equal.numeric]{all.equal.numeric()}} the type \code{integer} is treated as a proper subset of \code{double} i.e. does not complain about comparing \code{integer} with \code{double}. Following this logic \code{all.equal.integer64} treats \code{integer} as a proper subset of \code{integer64} and does not complain about comparing \code{integer} with \code{integer64}. \code{double} also compares without warning as long as the values are within \code{\link[=lim.integer64]{lim.integer64()}}, if \code{double} are bigger \code{all.equal.integer64} complains about the \verb{all.equal.integer64 overflow warning}. For further details see \code{\link[=all.equal]{all.equal()}}. } \note{ \code{\link[=all.equal]{all.equal()}} only dispatches to this method if the first argument is \code{integer64}, calling \code{\link[=all.equal]{all.equal()}} with a \code{non-integer64} first and a \code{integer64} second argument gives undefined behavior! } \examples{ all.equal(as.integer64(1:10), as.integer64(0:9)) all.equal(as.integer64(1:10), as.integer(1:10)) all.equal(as.integer64(1:10), as.double(1:10)) all.equal(as.integer64(1), as.double(1e300)) } \seealso{ \code{\link[=all.equal]{all.equal()}} } bit64/man/seq.integer64.Rd0000644000176200001440000000253214705122715014650 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{seq.integer64} \alias{seq.integer64} \title{integer64: Sequence Generation} \arguments{ \item{from}{integer64 scalar (in order to dispatch the integer64 method of \code{\link[=seq]{seq()}}} \item{to}{scalar} \item{by}{scalar} \item{length.out}{scalar} \item{along.with}{scalar} \item{...}{ignored} } \value{ an integer64 vector with the generated sequence } \description{ Generating sequence of integer64 values } \details{ \code{seq.integer64} does coerce its arguments 'from', 'to' and 'by' to \code{integer64}. If not provided, the argument 'by' is automatically determined as \code{+1} or \code{-1}, but the size of 'by' is not calculated as in \code{\link[=seq]{seq()}} (because this might result in a non-integer value). } \note{ In base R \code{\link{:}} currently is not generic and does not dispatch, see section "Limitations inherited from Base R" in \code{\link[=integer64]{integer64()}} } \examples{ # colon not activated: as.integer64(1):12 seq(as.integer64(1), 12, 2) seq(as.integer64(1), by=2, length.out=6) } \seealso{ \code{\link[=c.integer64]{c.integer64()}} \code{\link[=rep.integer64]{rep.integer64()}} \code{\link[=as.data.frame.integer64]{as.data.frame.integer64()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{manip} bit64/man/unipos.Rd0000644000176200001440000000522614705122715013572 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/highlevel64.R \name{unipos} \alias{unipos} \alias{unipos.integer64} \title{Extract Positions of Unique Elements} \usage{ unipos(x, incomparables = FALSE, order = c("original", "values", "any"), ...) \method{unipos}{integer64}( x, incomparables = FALSE, order = c("original", "values", "any"), nunique = NULL, method = NULL, ... ) } \arguments{ \item{x}{a vector or a data frame or an array or \code{NULL}.} \item{incomparables}{ignored} \item{order}{The order in which positions of unique values will be returned, see details} \item{...}{ignored} \item{nunique}{NULL or the number of unique values (including NA). Providing \code{nunique} can speed-up when \code{x} has no cache. Note that a wrong \code{nunique} can cause undefined behaviour up to a crash.} \item{method}{NULL for automatic method selection or a suitable low-level method, see details} } \value{ an integer vector of positions } \description{ \code{unipos} returns the positions of those elements returned by \code{\link[=unique]{unique()}}. } \details{ This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. Suitable methods are \itemize{ \item \code{\link{hashmapupo}} (simultaneously creating and using a hashmap) \item \code{\link{hashupo}} (first creating a hashmap then using it) \item \code{\link{sortorderupo}} (fast ordering) \item \code{\link{orderupo}} (memory saving ordering). } The default \code{order="original"} collects unique values in the order of the first appearance in \code{x} like in \code{\link[=unique]{unique()}}, this costs extra processing. \code{order="values"} collects unique values in sorted order like in \code{\link[=table]{table()}}, this costs extra processing with the hash methods but comes for free. \code{order="any"} collects unique values in undefined order, possibly faster. For hash methods this will be a quasi random order, for sort methods this will be sorted order. } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) unipos(x) unipos(x, order="values") stopifnot(identical(unipos(x), (1:length(x))[!duplicated(x)])) stopifnot(identical(unipos(x), match.integer64(unique(x), x))) stopifnot(identical(unipos(x, order="values"), match.integer64(unique(x, order="values"), x))) stopifnot(identical(unique(x), x[unipos(x)])) stopifnot(identical(unique(x, order="values"), x[unipos(x, order="values")])) } \seealso{ \code{\link[=unique.integer64]{unique.integer64()}} for unique values and \code{\link[=match.integer64]{match.integer64()}} for general matching. } \keyword{logic} \keyword{manip} bit64/man/hashmap.Rd0000644000176200001440000001614414705122715013677 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hash64.R \name{hashmap} \alias{hashmap} \alias{hashfun} \alias{hashfun.integer64} \alias{hashmap.integer64} \alias{hashpos} \alias{hashpos.cache_integer64} \alias{hashrev} \alias{hashrev.cache_integer64} \alias{hashfin} \alias{hashfin.cache_integer64} \alias{hashrin} \alias{hashrin.cache_integer64} \alias{hashdup} \alias{hashdup.cache_integer64} \alias{hashuni} \alias{hashuni.cache_integer64} \alias{hashupo} \alias{hashupo.cache_integer64} \alias{hashtab} \alias{hashtab.cache_integer64} \alias{hashmaptab} \alias{hashmaptab.integer64} \alias{hashmapuni} \alias{hashmapuni.integer64} \alias{hashmapupo} \alias{hashmapupo.integer64} \title{Hashing for 64bit integers} \usage{ hashfun(x, ...) \method{hashfun}{integer64}(x, minfac = 1.41, hashbits = NULL, ...) hashmap(x, ...) \method{hashmap}{integer64}(x, nunique = NULL, minfac = 1.41, hashbits = NULL, cache = NULL, ...) hashpos(cache, ...) \method{hashpos}{cache_integer64}(cache, x, nomatch = NA_integer_, ...) hashrev(cache, ...) \method{hashrev}{cache_integer64}(cache, x, nomatch = NA_integer_, ...) hashfin(cache, ...) \method{hashfin}{cache_integer64}(cache, x, ...) hashrin(cache, ...) \method{hashrin}{cache_integer64}(cache, x, ...) hashdup(cache, ...) \method{hashdup}{cache_integer64}(cache, ...) hashuni(cache, ...) \method{hashuni}{cache_integer64}(cache, keep.order = FALSE, ...) hashupo(cache, ...) \method{hashupo}{cache_integer64}(cache, keep.order = FALSE, ...) hashtab(cache, ...) \method{hashtab}{cache_integer64}(cache, ...) hashmaptab(x, ...) \method{hashmaptab}{integer64}(x, nunique = NULL, minfac = 1.5, hashbits = NULL, ...) hashmapuni(x, ...) \method{hashmapuni}{integer64}(x, nunique = NULL, minfac = 1.5, hashbits = NULL, ...) hashmapupo(x, ...) \method{hashmapupo}{integer64}(x, nunique = NULL, minfac = 1.5, hashbits = NULL, ...) } \arguments{ \item{x}{an integer64 vector} \item{...}{further arguments, passed from generics, ignored in methods} \item{minfac}{minimum factor by which the hasmap has more elements compared to the data \code{x}, ignored if \code{hashbits} is given directly} \item{hashbits}{length of hashmap is \code{2^hashbits}} \item{nunique}{giving \emph{correct} number of unique elements can help reducing the size of the hashmap} \item{cache}{an optional \code{\link[=cache]{cache()}} object into which to put the hashmap (by default a new cache is created} \item{nomatch}{the value to be returned if an element is not found in the hashmap} \item{keep.order}{determines order of results and speed: \code{FALSE} (the default) is faster and returns in the (pseudo)random order of the hash function, \code{TRUE} returns in the order of first appearance in the original data, but this requires extra work} } \value{ See Details } \description{ This is an explicit implementation of hash functionality that underlies matching and other functions in R. Explicit means that you can create, store and use hash functionality directly. One advantage is that you can re-use hashmaps, which avoid re-building hashmaps again and again. } \details{ \tabular{rrl}{ \strong{function} \tab \strong{see also} \tab \strong{description} \cr \code{hashfun} \tab \code{digest} \tab export of the hash function used in \code{hashmap} \cr \code{hashmap} \tab \code{\link[=match.integer64]{match()}} \tab return hashmap \cr \code{hashpos} \tab \code{\link[=match.integer64]{match()}} \tab return positions of \code{x} in \code{hashmap} \cr \code{hashrev} \tab \code{\link[=match.integer64]{match()}} \tab return positions of \code{hashmap} in \code{x} \cr \code{hashfin} \tab \code{\link{\%in\%.integer64}} \tab return logical whether \code{x} is in \code{hashmap} \cr \code{hashrin} \tab \code{\link{\%in\%.integer64}} \tab return logical whether \code{hashmap} is in \code{x} \cr \code{hashdup} \tab \code{\link[=duplicated.integer64]{duplicated()}} \tab return logical whether hashdat is duplicated using hashmap \cr \code{hashuni} \tab \code{\link[=unique.integer64]{unique()}} \tab return unique values of hashmap \cr \code{hashmapuni} \tab \code{\link[=unique.integer64]{unique()}} \tab return unique values of \code{x} \cr \code{hashupo} \tab \code{\link[=unique.integer64]{unique()}} \tab return positions of unique values in hashdat \cr \code{hashmapupo} \tab \code{\link[=unique.integer64]{unique()}} \tab return positions of unique values in \code{x} \cr \code{hashtab} \tab \code{\link[=table.integer64]{table()}} \tab tabulate values of hashdat using hashmap in \code{keep.order=FALSE} \cr \code{hashmaptab} \tab \code{\link[=table.integer64]{table()}} \tab tabulate values of \code{x} building hasmap on the fly in \code{keep.order=FALSE} \cr } } \examples{ x <- as.integer64(sample(c(NA, 0:9))) y <- as.integer64(sample(c(NA, 1:9), 10, TRUE)) hashfun(y) hx <- hashmap(x) hy <- hashmap(y) ls(hy) hashpos(hy, x) hashrev(hx, y) hashfin(hy, x) hashrin(hx, y) hashdup(hy) hashuni(hy) hashuni(hy, keep.order=TRUE) hashmapuni(y) hashupo(hy) hashupo(hy, keep.order=TRUE) hashmapupo(y) hashtab(hy) hashmaptab(y) stopifnot(identical(match(as.integer(x),as.integer(y)),hashpos(hy, x))) stopifnot(identical(match(as.integer(x),as.integer(y)),hashrev(hx, y))) stopifnot(identical(as.integer(x) \%in\% as.integer(y), hashfin(hy, x))) stopifnot(identical(as.integer(x) \%in\% as.integer(y), hashrin(hx, y))) stopifnot(identical(duplicated(as.integer(y)), hashdup(hy))) stopifnot(identical(as.integer64(unique(as.integer(y))), hashuni(hy, keep.order=TRUE))) stopifnot(identical(sort(hashuni(hy, keep.order=FALSE)), sort(hashuni(hy, keep.order=TRUE)))) stopifnot(identical(y[hashupo(hy, keep.order=FALSE)], hashuni(hy, keep.order=FALSE))) stopifnot(identical(y[hashupo(hy, keep.order=TRUE)], hashuni(hy, keep.order=TRUE))) stopifnot(identical(hashpos(hy, hashuni(hy, keep.order=TRUE)), hashupo(hy, keep.order=TRUE))) stopifnot(identical(hashpos(hy, hashuni(hy, keep.order=FALSE)), hashupo(hy, keep.order=FALSE))) stopifnot(identical(hashuni(hy, keep.order=FALSE), hashtab(hy)$values)) stopifnot(identical(as.vector(table(as.integer(y), useNA="ifany")) , hashtab(hy)$counts[order.integer64(hashtab(hy)$values)])) stopifnot(identical(hashuni(hy, keep.order=TRUE), hashmapuni(y))) stopifnot(identical(hashupo(hy, keep.order=TRUE), hashmapupo(y))) stopifnot(identical(hashtab(hy), hashmaptab(y))) \dontrun{ message("explore speed given size of the hasmap in 2^hashbits and size of the data") message("more hashbits means more random access and less collisions") message("i.e. more data means less random access and more collisions") bits <- 24 b <- seq(-1, 0, 0.1) tim <- matrix(NA, length(b), 2, dimnames=list(b, c("bits","bits+1"))) for (i in 1:length(b)){ n <- as.integer(2^(bits+b[i])) x <- as.integer64(sample(n)) tim[i,1] <- repeat.time(hashmap(x, hashbits=bits))[3] tim[i,2] <- repeat.time(hashmap(x, hashbits=bits+1))[3] print(tim) matplot(b, tim) } message("we conclude that n*sqrt(2) is enough to avoid collisions") } } \seealso{ \code{\link[=match.integer64]{match()}}, \code{\link[=runif64]{runif64()}} } \keyword{manip} \keyword{programming} bit64/man/is.sorted.integer64.Rd0000644000176200001440000000405514705122715015774 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cache.R \name{is.sorted.integer64} \alias{is.sorted.integer64} \alias{na.count.integer64} \alias{nvalid.integer64} \alias{nunique.integer64} \alias{nties.integer64} \title{Small cache access methods} \usage{ \method{na.count}{integer64}(x, ...) \method{nvalid}{integer64}(x, ...) \method{is.sorted}{integer64}(x, ...) \method{nunique}{integer64}(x, ...) \method{nties}{integer64}(x, ...) } \arguments{ \item{x}{some object} \item{...}{ignored} } \value{ \code{is.sorted} returns a logical scalar, the other methods return an integer scalar. } \description{ These methods are packaged here for methods in packages \code{bit64} and \code{ff}. } \details{ All these functions benefit from a \code{\link[=sortcache]{sortcache()}}, \code{\link[=ordercache]{ordercache()}} or \code{\link[=sortordercache]{sortordercache()}}. \code{na.count()}, \code{nvalid()} and \code{nunique()} also benefit from a \code{\link[=hashcache]{hashcache()}}. } \section{Functions}{ \itemize{ \item \code{na.count(integer64)}: returns the number of \code{NA}s \item \code{nvalid(integer64)}: returns the number of valid data points, usually \code{\link[=length]{length()}} minus \code{na.count}. \item \code{is.sorted(integer64)}: checks for sortedness of \code{x} (NAs sorted first) \item \code{nunique(integer64)}: returns the number of unique values \item \code{nties(integer64)}: returns the number of tied values. }} \note{ If a \code{\link[=cache]{cache()}} exists but the desired value is not cached, then these functions will store their result in the cache. We do not consider this a relevant side-effect, since these small cache results do not have a relevant memory footprint. } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) length(x) na.count(x) nvalid(x) nunique(x) nties(x) table.integer64(x) x } \seealso{ \code{\link[=cache]{cache()}} for caching functions and \code{\link[=sortordercache]{sortordercache()}} for functions creating big caches } \keyword{environment} \keyword{methods} bit64/man/format.integer64.Rd0000644000176200001440000000515114705122715015350 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{format.integer64} \alias{format.integer64} \alias{sign.integer64} \alias{abs.integer64} \alias{sqrt.integer64} \alias{log.integer64} \alias{log10.integer64} \alias{log2.integer64} \alias{trunc.integer64} \alias{floor.integer64} \alias{ceiling.integer64} \alias{signif.integer64} \alias{scale.integer64} \alias{round.integer64} \alias{is.na.integer64} \alias{is.finite.integer64} \alias{is.infinite.integer64} \alias{is.nan.integer64} \alias{!.integer64} \title{Unary operators and functions for integer64 vectors} \usage{ \method{format}{integer64}(x, justify = "right", ...) \method{sign}{integer64}(x) \method{abs}{integer64}(x) \method{sqrt}{integer64}(x) \method{log}{integer64}(x, base = NULL) \method{log10}{integer64}(x) \method{log2}{integer64}(x) \method{trunc}{integer64}(x, ...) \method{floor}{integer64}(x) \method{ceiling}{integer64}(x) \method{signif}{integer64}(x, digits = 6L) \method{scale}{integer64}(x, center = TRUE, scale = TRUE) \method{round}{integer64}(x, digits = 0L) \method{is.na}{integer64}(x) \method{is.finite}{integer64}(x) \method{is.infinite}{integer64}(x) \method{is.nan}{integer64}(x) \method{!}{integer64}(x) } \arguments{ \item{x}{an atomic vector of class 'integer64'} \item{justify}{should it be right-justified (the default), left-justified, centred or left alone.} \item{...}{further arguments to the \code{\link[=NextMethod]{NextMethod()}}} \item{base}{an atomic scalar (we save 50\% log-calls by not allowing a vector base)} \item{digits}{integer indicating the number of decimal places (round) or significant digits (signif) to be used. Negative values are allowed (see \code{\link[=round]{round()}})} \item{center}{see \code{\link[=scale]{scale()}}} \item{scale}{see \code{\link[=scale]{scale()}}} } \value{ \code{\link[=format]{format()}} returns a character vector \code{\link[=is.na]{is.na()}} and \code{\link{!}} return a logical vector \code{\link[=sqrt]{sqrt()}}, \code{\link[=log]{log()}}, \code{\link[=log2]{log2()}} and \code{\link[=log10]{log10()}} return a double vector \code{\link[=sign]{sign()}}, \code{\link[=abs]{abs()}}, \code{\link[=floor]{floor()}}, \code{\link[=ceiling]{ceiling()}}, \code{\link[=trunc]{trunc()}} and \code{\link[=round]{round()}} return a vector of class 'integer64' \code{\link[=signif]{signif()}} is not implemented } \description{ Unary operators and functions for integer64 vectors. } \examples{ sqrt(as.integer64(1:12)) } \seealso{ \code{\link[=xor.integer64]{xor.integer64()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{manip} bit64/man/benchmark64.Rd0000644000176200001440000001371214705122715014360 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/highlevel64.R \name{benchmark64} \alias{benchmark64} \alias{optimizer64} \title{Function for measuring algorithmic performance of high-level and low-level integer64 functions} \usage{ benchmark64(nsmall = 2L^16L, nbig = 2L^25L, timefun = repeat.time) optimizer64( nsmall = 2L^16L, nbig = 2L^25L, timefun = repeat.time, what = c("match", "\%in\%", "duplicated", "unique", "unipos", "table", "rank", "quantile"), uniorder = c("original", "values", "any"), taborder = c("values", "counts"), plot = TRUE ) } \arguments{ \item{nsmall}{size of smaller vector} \item{nbig}{size of larger bigger vector} \item{timefun}{a function for timing such as \code{\link[bit:repeat.time]{bit::repeat.time()}} or \code{\link[=system.time]{system.time()}}} \item{what}{a vector of names of high-level functions} \item{uniorder}{one of the order parameters that are allowed in \code{\link[=unique.integer64]{unique.integer64()}} and \code{\link[=unipos.integer64]{unipos.integer64()}}} \item{taborder}{one of the order parameters that are allowed in \code{\link[=table.integer64]{table.integer64()}}} \item{plot}{set to FALSE to suppress plotting} } \value{ \code{benchmark64} returns a matrix with elapsed seconds, different high-level tasks in rows and different scenarios to solve the task in columns. The last row named 'SESSION' contains the elapsed seconds of the exemplary sesssion. \code{optimizer64} returns a dimensioned list with one row for each high-level function timed and two columns named after the values of the \code{nsmall} and \code{nbig} sample sizes. Each list cell contains a matrix with timings, low-level-methods in rows and three measurements \code{c("prep","both","use")} in columns. If it can be measured separately, \code{prep} contains the timing of preparatory work such as sorting and hashing, and \code{use} contains the timing of using the prepared work. If the function timed does both, preparation and use, the timing is in \code{both}. } \description{ Function for measuring algorithmic performance of high-level and low-level integer64 functions } \details{ \code{benchmark64} compares the following scenarios for the following use cases:\tabular{rl}{ \strong{scenario name} \tab \strong{explanation} \cr 32-bit \tab applying Base R function to 32-bit integer data \cr 64-bit \tab applying bit64 function to 64-bit integer data (with no cache) \cr hashcache \tab ditto when cache contains \code{\link[=hashmap]{hashmap()}}, see \code{\link[=hashcache]{hashcache()}} \cr sortordercache \tab ditto when cache contains sorting and ordering, see \code{\link[=sortordercache]{sortordercache()}} \cr ordercache \tab ditto when cache contains ordering only, see \code{\link[=ordercache]{ordercache()}} \cr allcache \tab ditto when cache contains sorting, ordering and hashing \cr } \tabular{rl}{ \strong{use case name} \tab \strong{explanation} \cr cache \tab filling the cache according to scenario \cr match(s,b) \tab match small in big vector \cr s \%in\% b \tab small \%in\% big vector \cr match(b,s) \tab match big in small vector \cr b \%in\% s \tab big \%in\% small vector \cr match(b,b) \tab match big in (different) big vector \cr b \%in\% b \tab big \%in\% (different) big vector \cr duplicated(b) \tab duplicated of big vector \cr unique(b) \tab unique of big vector \cr table(b) \tab table of big vector \cr sort(b) \tab sorting of big vector \cr order(b) \tab ordering of big vector \cr rank(b) \tab ranking of big vector \cr quantile(b) \tab quantiles of big vector \cr summary(b) \tab summary of of big vector \cr SESSION \tab exemplary session involving multiple calls (including cache filling costs) \cr } Note that the timings for the cached variants do \emph{not} contain the time costs of building the cache, except for the timing of the exemplary user session, where the cache costs are included in order to evaluate amortization. } \section{Functions}{ \itemize{ \item \code{benchmark64()}: compares high-level integer64 functions against the integer functions from Base R \item \code{optimizer64()}: compares for each high-level integer64 function the Base R integer function with several low-level integer64 functions with and without caching }} \examples{ message("this small example using system.time does not give serious timings\n this we do this only to run regression tests") benchmark64(nsmall=2^7, nbig=2^13, timefun=function(expr)system.time(expr, gcFirst=FALSE)) optimizer64(nsmall=2^7, nbig=2^13, timefun=function(expr)system.time(expr, gcFirst=FALSE) , plot=FALSE ) \dontrun{ message("for real measurement of sufficiently large datasets run this on your machine") benchmark64() optimizer64() } message("let's look at the performance results on Core i7 Lenovo T410 with 8 GB RAM") data(benchmark64.data) print(benchmark64.data) matplot(log2(benchmark64.data[-1,1]/benchmark64.data[-1,]) , pch=c("3", "6", "h", "s", "o", "a") , xlab="tasks [last=session]" , ylab="log2(relative speed) [bigger is better]" ) matplot(t(log2(benchmark64.data[-1,1]/benchmark64.data[-1,])) , type="b", axes=FALSE , lwd=c(rep(1, 14), 3) , xlab="context" , ylab="log2(relative speed) [bigger is better]" ) axis(1 , labels=c("32-bit", "64-bit", "hash", "sortorder", "order", "hash+sortorder") , at=1:6 ) axis(2) data(optimizer64.data) print(optimizer64.data) oldpar <- par(no.readonly = TRUE) par(mfrow=c(2,1)) par(cex=0.7) for (i in 1:nrow(optimizer64.data)){ for (j in 1:2){ tim <- optimizer64.data[[i,j]] barplot(t(tim)) if (rownames(optimizer64.data)[i]=="match") title(paste("match", colnames(optimizer64.data)[j], "in", colnames(optimizer64.data)[3-j])) else if (rownames(optimizer64.data)[i]=="\%in\%") title(paste(colnames(optimizer64.data)[j], "\%in\%", colnames(optimizer64.data)[3-j])) else title(paste(rownames(optimizer64.data)[i], colnames(optimizer64.data)[j])) } } par(mfrow=c(1,1)) } \seealso{ \code{\link[=integer64]{integer64()}} } \keyword{misc} bit64/man/matrix64.Rd0000644000176200001440000000353614705122715013735 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/matrix64.R \name{matrix64} \alias{matrix64} \alias{colSums} \alias{colSums.default} \alias{colSums.integer64} \alias{rowSums} \alias{rowSums.default} \alias{rowSums.integer64} \alias{aperm.integer64} \title{Working with integer64 arrays and matrices} \usage{ colSums(x, na.rm = FALSE, dims = 1L) \method{colSums}{default}(x, na.rm = FALSE, dims = 1L) \method{colSums}{integer64}(x, na.rm = FALSE, dims = 1L) rowSums(x, na.rm = FALSE, dims = 1L) \method{rowSums}{default}(x, na.rm = FALSE, dims = 1L) \method{rowSums}{integer64}(x, na.rm = FALSE, dims = 1L) \method{aperm}{integer64}(a, perm, ...) } \arguments{ \item{x}{An array of integer64 numbers.} \item{na.rm, dims}{Same interpretation as in \code{\link[=colSums]{colSums()}}.} \item{a, perm}{Passed on to \code{\link[=aperm]{aperm()}}.} \item{...}{Passed on to subsequent methods.} } \description{ These functions and methods facilitate working with integer64 objects stored in matrices. As ever, the primary motivation for having tailor-made functions here is that R's methods often receive input from bit64 and treat the vectors as doubles, leading to unexpected and/or incorrect results. } \details{ As of now, the \code{colSums()} and \code{rowSums()} methods are implemented as wrappers around equivalent \code{apply()} approaches, because re-using the default routine (and then applying integer64 to the result) does not work for objects with missing elements. Ideally this would eventually get its own dedicated C routine mimicking that of \code{colSums()} for integers; feature requests and PRs welcome. \code{aperm()} is required for \code{apply()} to work, in general, otherwise \code{FUN} gets applied to a class-stripped version of the input. } \examples{ A = as.integer64(1:6) dim(A) = 3:2 colSums(A) rowSums(A) aperm(A, 2:1) } bit64/man/as.data.frame.integer64.Rd0000644000176200001440000000174314705122715016467 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{as.data.frame.integer64} \alias{as.data.frame.integer64} \title{integer64: Coercing to data.frame column} \usage{ \method{as.data.frame}{integer64}(x, ...) } \arguments{ \item{x}{an integer64 vector} \item{...}{passed to NextMethod \code{\link[=as.data.frame]{as.data.frame()}} after removing the 'integer64' class attribute} } \value{ a one-column data.frame containing an integer64 vector } \description{ Coercing integer64 vector to data.frame. } \details{ 'as.data.frame.integer64' is rather not intended to be called directly, but it is required to allow integer64 as data.frame columns. } \note{ This is currently very slow -- any ideas for improvement? } \examples{ as.data.frame.integer64(as.integer64(1:12)) data.frame(a=1:12, b=as.integer64(1:12)) } \seealso{ \code{\link[=cbind.integer64]{cbind.integer64()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{manip} bit64/man/bit64S3.Rd0000644000176200001440000000546214706047365013425 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/patch64.R \name{bit64S3} \alias{bit64S3} \alias{:} \alias{:.default} \alias{:.integer64} \alias{is.double} \alias{is.double.default} \alias{is.double.integer64} \alias{match} \alias{match.default} \alias{\%in\%} \alias{\%in\%.default} \alias{rank} \alias{rank.default} \alias{order} \alias{order.default} \alias{mtfrm.integer64} \title{Turning base R functions into S3 generics for bit64} \usage{ from:to is.double(x) match(x, table, ...) x \%in\% table rank(x, ...) order(...) \method{is.double}{default}(x) \method{is.double}{integer64}(x) \method{mtfrm}{integer64}(x) \method{match}{default}(x, table, ...) \method{\%in\%}{default}(x, table) \method{rank}{default}(x, ...) \method{order}{default}(...) } \arguments{ \item{x}{integer64 vector: the values to be matched, optionally carrying a cache created with \code{\link[=hashcache]{hashcache()}}} \item{table}{integer64 vector: the values to be matched against, optionally carrying a cache created with \code{\link[=hashcache]{hashcache()}} or \code{\link[=sortordercache]{sortordercache()}}} \item{...}{ignored} \item{from}{scalar denoting first element of sequence} \item{to}{scalar denoting last element of sequence} } \value{ \code{\link[=invisible]{invisible()}} } \description{ Turn those base functions S3 generic which are used in bit64 } \details{ The following functions are turned into S3 generics in order to dispatch methods for \code{\link[=integer64]{integer64()}}: \itemize{ \item \code{\link{:}} \item \code{\link[=is.double]{is.double()}} \item \code{\link[=match]{match()}} \item \code{\link{\%in\%}} \item \code{\link[=rank]{rank()}} \item \code{\link[=order]{order()}} } } \note{ \itemize{ \item \code{\link[=is.double]{is.double()}} returns \code{FALSE} for \code{\link{integer64}} \item \code{\link{:}} currently only dispatches at its first argument, thus \code{as.integer64(1):9} works but \code{1:as.integer64(9)} doesn't \item \code{\link[=match]{match()}} currently only dispatches at its first argument and expects its second argument also to be integer64, otherwise throws an error. Beware of something like \code{match(2, as.integer64(0:3))} \item \code{\link{\%in\%}} currently only dispatches at its first argument and expects its second argument also to be integer64, otherwise throws an error. Beware of something like \code{2 \%in\% as.integer64(0:3)} \item \code{\link[=order]{order()}} currently only orders a single argument, trying more than one raises an error } } \examples{ is.double(as.integer64(1)) as.integer64(1):9 match(as.integer64(2), as.integer64(0:3)) as.integer64(2) \%in\% as.integer64(0:3) unique(as.integer64(c(1,1,2))) rank(as.integer64(c(1,1,2))) order(as.integer64(c(1,NA,2))) } \seealso{ \code{\link[=bit64]{bit64()}}, \link{S3} } \keyword{methods} bit64/man/xor.integer64.Rd0000644000176200001440000000403714705122715014672 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{xor.integer64} \alias{xor.integer64} \alias{binattr} \alias{+.integer64} \alias{-.integer64} \alias{\%/\%.integer64} \alias{\%\%.integer64} \alias{*.integer64} \alias{^.integer64} \alias{/.integer64} \alias{==.integer64} \alias{!=.integer64} \alias{<.integer64} \alias{<=.integer64} \alias{>.integer64} \alias{>=.integer64} \alias{&.integer64} \alias{|.integer64} \title{Binary operators for integer64 vectors} \usage{ binattr(e1, e2) \method{+}{integer64}(e1, e2) \method{-}{integer64}(e1, e2) \method{\%/\%}{integer64}(e1, e2) \method{\%\%}{integer64}(e1, e2) \method{*}{integer64}(e1, e2) \method{^}{integer64}(e1, e2) \method{/}{integer64}(e1, e2) \method{==}{integer64}(e1, e2) \method{!=}{integer64}(e1, e2) \method{<}{integer64}(e1, e2) \method{<=}{integer64}(e1, e2) \method{>}{integer64}(e1, e2) \method{>=}{integer64}(e1, e2) \method{&}{integer64}(e1, e2) \method{|}{integer64}(e1, e2) \method{xor}{integer64}(x, y) } \arguments{ \item{e1}{an atomic vector of class 'integer64'} \item{e2}{an atomic vector of class 'integer64'} \item{x}{an atomic vector of class 'integer64'} \item{y}{an atomic vector of class 'integer64'} } \value{ \code{\link{&}}, \code{\link{|}}, \code{\link[=xor]{xor()}}, \code{\link{!=}}, \code{\link{==}}, \code{\link{<}}, \code{\link{<=}}, \code{\link{>}}, \code{\link{>=}} return a logical vector \code{\link{^}} and \code{\link{/}} return a double vector \code{\link{+}}, \code{\link{-}}, \code{\link{*}}, \code{\link{\%/\%}}, \code{\link{\%\%}} return a vector of class 'integer64' } \description{ Binary operators for integer64 vectors. } \examples{ as.integer64(1:12) - 1 options(integer64_semantics="new") d <- 2.5 i <- as.integer64(5) d/i # new 0.5 d*i # new 13 i*d # new 13 options(integer64_semantics="old") d/i # old: 0.4 d*i # old: 10 i*d # old: 13 } \seealso{ \code{\link[=format.integer64]{format.integer64()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{manip} bit64/man/identical.integer64.Rd0000644000176200001440000000252414705122715016015 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{identical.integer64} \alias{identical.integer64} \title{Identity function for class 'integer64'} \usage{ identical.integer64( x, y, num.eq = FALSE, single.NA = FALSE, attrib.as.set = TRUE, ignore.bytecode = TRUE, ignore.environment = FALSE, ignore.srcref = TRUE, ... ) } \arguments{ \item{x, y}{Atomic vector of class 'integer64'} \item{num.eq, single.NA, attrib.as.set, ignore.bytecode, ignore.environment, ignore.srcref}{See \code{\link[=identical]{identical()}}.} \item{...}{Passed on to \code{identical()}. Only \verb{extptr.as.ref=} is available as of R 4.4.1, and then only for versions of R >= 4.2.0.} } \value{ A single logical value, \code{TRUE} or \code{FALSE}, never \code{NA} and never anything other than a single value. } \description{ This will discover any deviation between objects containing integer64 vectors. } \details{ This is simply a wrapper to \code{\link[=identical]{identical()}} with default arguments \verb{num.eq = FALSE, single.NA = FALSE}. } \examples{ i64 <- as.double(NA); class(i64) <- "integer64" identical(i64-1, i64+1) identical.integer64(i64-1, i64+1) } \seealso{ \code{\link{==.integer64}} \code{\link[=identical]{identical()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{manip} bit64/man/qtile.Rd0000644000176200001440000000566114705122715013376 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/highlevel64.R \name{qtile} \alias{qtile} \alias{qtile.integer64} \alias{quantile.integer64} \alias{median.integer64} \alias{mean.integer64} \alias{summary.integer64} \title{(Q)uan(Tile)s} \usage{ qtile(x, probs = seq(0, 1, 0.25), ...) \method{qtile}{integer64}(x, probs = seq(0, 1, 0.25), names = TRUE, method = NULL, ...) \method{quantile}{integer64}( x, probs = seq(0, 1, 0.25), na.rm = FALSE, names = TRUE, type = 0L, ... ) \method{median}{integer64}(x, na.rm = FALSE, ...) \method{mean}{integer64}(x, na.rm = FALSE, ...) \method{summary}{integer64}(object, ...) } \arguments{ \item{x}{a integer64 vector} \item{probs}{numeric vector of probabilities with values in \verb{[0,1]} - possibly containing \code{NA}s} \item{...}{ignored} \item{names}{logical; if \code{TRUE}, the result has a \code{names} attribute. Set to \code{FALSE} for speedup with many probs.} \item{method}{NULL for automatic method selection or a suitable low-level method, see details} \item{na.rm}{logical; if \code{TRUE}, any \code{NA} and \code{NaN}'s are removed from \code{x} before the quantiles are computed.} \item{type}{an integer selecting the quantile algorithm, currently only 0 is supported, see details} \item{object}{a integer64 vector} } \value{ \code{prank} returns a numeric vector of the same length as \code{x}. \code{qtile} returns a vector with elements from \code{x} at the relative positions specified by \code{probs}. } \description{ Function \code{\link[=prank.integer64]{prank.integer64()}} projects the values \verb{[min..max]} via ranks \verb{[1..n]} to \verb{[0..1]}. } \details{ \code{qtile.ineger64} is the inverse function of 'prank.integer64' and projects \verb{[0..1]} to \verb{[min..max]}. Functions \code{quantile.integer64} with \code{type=0} and \code{median.integer64} are convenience wrappers to \code{qtile}. Function \code{qtile} behaves very similar to \code{quantile.default} with \code{type=1} in that it only returns existing values, it is mostly symmetric but it is using 'round' rather than 'floor'. Note that this implies that \code{median.integer64} does not interpolate for even number of values (interpolation would create values that could not be represented as 64-bit integers). This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. Suitable methods are \itemize{ \item \code{\link{sortqtl}} (fast sorting) \item \code{\link{orderqtl}} (memory saving ordering). } } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) qtile(x, probs=seq(0, 1, 0.25)) quantile(x, probs=seq(0, 1, 0.25), na.rm=TRUE) median(x, na.rm=TRUE) summary(x) x <- x[!is.na(x)] stopifnot(identical(x, unname(qtile(x, probs=prank(x))))) } \seealso{ \code{\link[=rank.integer64]{rank.integer64()}} for simple ranks and \code{\link[=quantile]{quantile()}} for quantiles. } \keyword{univar} bit64/man/extract.replace.integer64.Rd0000644000176200001440000000226614705122715017150 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{extract.replace.integer64} \alias{extract.replace.integer64} \alias{[.integer64} \alias{[<-.integer64} \alias{[[.integer64} \alias{[[<-.integer64} \title{Extract or Replace Parts of an integer64 vector} \usage{ \method{[}{integer64}(x, i, ...) \method{[}{integer64}(x, ...) <- value \method{[[}{integer64}(x, ...) \method{[[}{integer64}(x, ...) <- value } \arguments{ \item{x}{an atomic vector} \item{i}{indices specifying elements to extract} \item{...}{further arguments to the \code{\link[=NextMethod]{NextMethod()}}} \item{value}{an atomic vector with values to be assigned} } \value{ A vector or scalar of class 'integer64' } \description{ Methods to extract and replace parts of an integer64 vector. } \note{ You should not subscript non-existing elements and not use \code{NA}s as subscripts. The current implementation returns \code{9218868437227407266} instead of \code{NA}. } \examples{ as.integer64(1:12)[1:3] x <- as.integer64(1:12) dim(x) <- c(3,4) x x[] x[,2:3] } \seealso{ \code{\link[base:Extract]{[}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{manip} bit64/man/hashcache.Rd0000644000176200001440000000446614705122715014171 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cache.R \name{hashcache} \alias{hashcache} \alias{sortcache} \alias{sortordercache} \alias{ordercache} \title{Big caching of hashing, sorting, ordering} \usage{ hashcache(x, nunique = NULL, ...) sortcache(x, has.na = NULL) sortordercache(x, has.na = NULL, stable = NULL) ordercache(x, has.na = NULL, stable = NULL, optimize = "time") } \arguments{ \item{x}{an atomic vector (note that currently only integer64 is supported)} \item{nunique}{giving \emph{correct} number of unique elements can help reducing the size of the hashmap} \item{...}{passed to \code{\link[=hashmap]{hashmap()}}} \item{has.na}{boolean scalar defining whether the input vector might contain \code{NA}s. If we know we don't have \code{NA}s, this may speed-up. \emph{Note} that you risk a crash if there are unexpected \code{NA}s with \code{has.na=FALSE}.} \item{stable}{boolean scalar defining whether stable sorting is needed. Allowing non-stable may speed-up.} \item{optimize}{by default ramsort optimizes for 'time' which requires more RAM, set to 'memory' to minimize RAM requirements and sacrifice speed.} } \value{ \code{x} with a \code{\link[=cache]{cache()}} that contains the result of the expensive operations, possible together with small derived information (such as \code{\link[=nunique.integer64]{nunique.integer64()}}) and previously cached results. } \description{ Functions to create cache that accelerates many operations } \details{ The result of relative expensive operations \code{\link[=hashmap]{hashmap()}}, \code{\link[bit:Sorting]{bit::ramsort()}}, \code{\link[bit:Sorting]{bit::ramsortorder()}}, and \code{\link[bit:Sorting]{bit::ramorder()}} can be stored in a cache in order to avoid multiple excutions. Unless in very specific situations, the recommended method is \code{hashsortorder} only. } \note{ Note that we consider storing the big results from sorting and/or ordering as a relevant side-effect, and therefore storing them in the cache should require a conscious decision of the user. } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) sortordercache(x) } \seealso{ \code{\link[=cache]{cache()}} for caching functions and \code{\link[=nunique.integer64]{nunique.integer64()}} for methods benefiting from small caches } \keyword{environment} bit64/man/optimizer64.data.Rd0000644000176200001440000001040614705122715015355 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data.R \docType{data} \name{optimizer64.data} \alias{optimizer64.data} \title{Results of performance measurement on a Core i7 Lenovo T410 8 GB RAM under Windows 7 64bit} \format{ The format is: \if{html}{\out{
}}\preformatted{List of 16 $ : num [1:9, 1:3] 0 0 1.63 0.00114 2.44 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:9] "match" "match.64" "hashpos" "hashrev" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:10, 1:3] 0 0 0 1.62 0.00114 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:10] "\%in\%" "match.64" "\%in\%.64" "hashfin" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:10, 1:3] 0 0 0.00105 0.00313 0.00313 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:10] "duplicated" "duplicated.64" "hashdup" "sortorderdup1" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:15, 1:3] 0 0 0 0.00104 0.00104 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:15] "unique" "unique.64" "hashmapuni" "hashuni" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:14, 1:3] 0 0 0 0.000992 0.000992 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:14] "unique" "unipos.64" "hashmapupo" "hashupo" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:13, 1:3] 0 0 0 0 0.000419 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:13] "tabulate" "table" "table.64" "hashmaptab" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:7, 1:3] 0 0 0 0.00236 0.00714 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:7] "rank" "rank.keep" "rank.64" "sortorderrnk" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:6, 1:3] 0 0 0.00189 0.00714 0 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:6] "quantile" "quantile.64" "sortqtl" "orderqtl" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:9, 1:3] 0 0 0.00105 1.17 0 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:9] "match" "match.64" "hashpos" "hashrev" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:10, 1:3] 0 0 0 0.00104 1.18 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:10] "\%in\%" "match.64" "\%in\%.64" "hashfin" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:10, 1:3] 0 0 1.64 2.48 2.48 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:10] "duplicated" "duplicated.64" "hashdup" "sortorderdup1" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:15, 1:3] 0 0 0 1.64 1.64 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:15] "unique" "unique.64" "hashmapuni" "hashuni" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:14, 1:3] 0 0 0 1.62 1.62 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:14] "unique" "unipos.64" "hashmapupo" "hashupo" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:13, 1:3] 0 0 0 0 0.32 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:13] "tabulate" "table" "table.64" "hashmaptab" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:7, 1:3] 0 0 0 2.96 10.69 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:7] "rank" "rank.keep" "rank.64" "sortorderrnk" ... .. ..$ : chr [1:3] "prep" "both" "use" $ : num [1:6, 1:3] 0 0 1.62 10.61 0 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:6] "quantile" "quantile.64" "sortqtl" "orderqtl" ... .. ..$ : chr [1:3] "prep" "both" "use" - attr(*, "dim")= int [1:2] 8 2 - attr(*, "dimnames")=List of 2 ..$ : chr [1:8] "match" "\%in\%" "duplicated" "unique" ... ..$ : chr [1:2] "65536" "33554432" }\if{html}{\out{
}} } \usage{ data(optimizer64.data) } \description{ These are the results of calling \code{\link[=optimizer64]{optimizer64()}} } \examples{ data(optimizer64.data) print(optimizer64.data) oldpar <- par(no.readonly = TRUE) par(mfrow=c(2,1)) par(cex=0.7) for (i in 1:nrow(optimizer64.data)){ for (j in 1:2){ tim <- optimizer64.data[[i,j]] barplot(t(tim)) if (rownames(optimizer64.data)[i]=="match") title(paste("match", colnames(optimizer64.data)[j], "in", colnames(optimizer64.data)[3-j])) else if (rownames(optimizer64.data)[i]=="\%in\%") title(paste(colnames(optimizer64.data)[j], "\%in\%", colnames(optimizer64.data)[3-j])) else title(paste(rownames(optimizer64.data)[i], colnames(optimizer64.data)[j])) } } par(mfrow=c(1,1)) } \keyword{datasets} bit64/man/ramsort.integer64.Rd0000644000176200001440000001306014705122715015545 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sort64.R \name{ramsort.integer64} \alias{ramsort.integer64} \alias{shellsort.integer64} \alias{shellsortorder.integer64} \alias{shellorder.integer64} \alias{mergesort.integer64} \alias{mergeorder.integer64} \alias{mergesortorder.integer64} \alias{quicksort.integer64} \alias{quicksortorder.integer64} \alias{quickorder.integer64} \alias{radixsort.integer64} \alias{radixsortorder.integer64} \alias{radixorder.integer64} \alias{ramsortorder.integer64} \alias{ramorder.integer64} \title{Low-level intger64 methods for in-RAM sorting and ordering} \usage{ \method{shellsort}{integer64}(x, has.na = TRUE, na.last = FALSE, decreasing = FALSE, ...) \method{shellsortorder}{integer64}(x, i, has.na = TRUE, na.last = FALSE, decreasing = FALSE, ...) \method{shellorder}{integer64}(x, i, has.na = TRUE, na.last = FALSE, decreasing = FALSE, ...) \method{mergesort}{integer64}(x, has.na = TRUE, na.last = FALSE, decreasing = FALSE, ...) \method{mergeorder}{integer64}(x, i, has.na = TRUE, na.last = FALSE, decreasing = FALSE, ...) \method{mergesortorder}{integer64}(x, i, has.na = TRUE, na.last = FALSE, decreasing = FALSE, ...) \method{quicksort}{integer64}( x, has.na = TRUE, na.last = FALSE, decreasing = FALSE, restlevel = floor(1.5 * log2(length(x))), ... ) \method{quicksortorder}{integer64}( x, i, has.na = TRUE, na.last = FALSE, decreasing = FALSE, restlevel = floor(1.5 * log2(length(x))), ... ) \method{quickorder}{integer64}( x, i, has.na = TRUE, na.last = FALSE, decreasing = FALSE, restlevel = floor(1.5 * log2(length(x))), ... ) \method{radixsort}{integer64}( x, has.na = TRUE, na.last = FALSE, decreasing = FALSE, radixbits = 8L, ... ) \method{radixsortorder}{integer64}( x, i, has.na = TRUE, na.last = FALSE, decreasing = FALSE, radixbits = 8L, ... ) \method{radixorder}{integer64}( x, i, has.na = TRUE, na.last = FALSE, decreasing = FALSE, radixbits = 8L, ... ) \method{ramsort}{integer64}( x, has.na = TRUE, na.last = FALSE, decreasing = FALSE, stable = TRUE, optimize = c("time", "memory"), VERBOSE = FALSE, ... ) \method{ramsortorder}{integer64}( x, i, has.na = TRUE, na.last = FALSE, decreasing = FALSE, stable = TRUE, optimize = c("time", "memory"), VERBOSE = FALSE, ... ) \method{ramorder}{integer64}( x, i, has.na = TRUE, na.last = FALSE, decreasing = FALSE, stable = TRUE, optimize = c("time", "memory"), VERBOSE = FALSE, ... ) } \arguments{ \item{x}{a vector to be sorted by \code{\link[=ramsort.integer64]{ramsort.integer64()}} and \code{\link[=ramsortorder.integer64]{ramsortorder.integer64()}}, i.e. the output of \code{\link[=sort.integer64]{sort.integer64()}}} \item{has.na}{boolean scalar defining whether the input vector might contain \code{NA}s. If we know we don't have NAs, this may speed-up. \emph{Note} that you risk a crash if there are unexpected \code{NA}s with \code{has.na=FALSE}} \item{na.last}{boolean scalar telling ramsort whether to sort \code{NA}s last or first. \emph{Note} that 'boolean' means that there is no third option \code{NA} as in \code{\link[=sort]{sort()}}} \item{decreasing}{boolean scalar telling ramsort whether to sort increasing or decreasing} \item{...}{further arguments, passed from generics, ignored in methods} \item{i}{integer positions to be modified by \code{\link[=ramorder.integer64]{ramorder.integer64()}} and \code{\link[=ramsortorder.integer64]{ramsortorder.integer64()}}, default is 1:n, in this case the output is similar to \code{\link[=order.integer64]{order.integer64()}}} \item{restlevel}{number of remaining recursionlevels before \code{quicksort} switches from recursing to \code{shellsort}} \item{radixbits}{size of radix in bits} \item{stable}{boolean scalar defining whether stable sorting is needed. Allowing non-stable may speed-up.} \item{optimize}{by default ramsort optimizes for 'time' which requires more RAM, set to 'memory' to minimize RAM requirements and sacrifice speed} \item{VERBOSE}{cat some info about chosen method} } \value{ These functions return the number of \code{NAs} found or assumed during sorting } \description{ Fast low-level methods for sorting and ordering. The \code{..sortorder} methods do sorting and ordering at once, which requires more RAM than ordering but is (almost) as fast as as sorting. } \details{ See \code{\link[bit:Sorting]{bit::ramsort()}} } \note{ Note that these methods purposely violate the functional programming paradigm: they are called for the side-effect of changing some of their arguments. The \code{sort}-methods change \code{x}, the \code{order}-methods change \code{i}, and the \code{sortoder}-methods change both \code{x} and \code{i} } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) x message("ramsort example") s <- clone(x) ramsort(s) message("s has been changed in-place - whether or not ramsort uses an in-place algorithm") s message("ramorder example") s <- clone(x) o <- seq_along(s) ramorder(s, o) message("o has been changed in-place - s remains unchanged") s o s[o] message("ramsortorder example") o <- seq_along(s) ramsortorder(s, o) message("s and o have both been changed in-place - this is much faster") s o } \seealso{ \code{\link[bit:Sorting]{bit::ramsort()}} for the generic, \code{ramsort.default} for the methods provided by package ff, \code{\link[=sort.integer64]{sort.integer64()}} for the sort interface and \code{\link[=sortcache]{sortcache()}} for caching the work of sorting } \keyword{manip} \keyword{programming} bit64/man/benchmark64.data.Rd0000644000176200001440000000232214705122715015263 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data.R \docType{data} \name{benchmark64.data} \alias{benchmark64.data} \title{Results of performance measurement on a Core i7 Lenovo T410 8 GB RAM under Windows 7 64bit} \format{ The format is: \if{html}{\out{
}}\preformatted{num [1:16, 1:6] 2.55e-05 2.37 2.39 1.28 1.39 ... - attr(*, "dimnames")=List of 2 ..$ : chr [1:16] "cache" "match(s,b)" "s \%in\% b" "match(b,s)" ... ..$ : chr [1:6] "32-bit" "64-bit" "hashcache" "sortordercache" ... }\if{html}{\out{
}} } \usage{ data(benchmark64.data) } \description{ These are the results of calling \code{\link[=benchmark64]{benchmark64()}} } \examples{ data(benchmark64.data) print(benchmark64.data) matplot(log2(benchmark64.data[-1,1]/benchmark64.data[-1,]) , pch=c("3", "6", "h", "s", "o", "a") , xlab="tasks [last=session]" , ylab="log2(relative speed) [bigger is better]" ) matplot(t(log2(benchmark64.data[-1,1]/benchmark64.data[-1,])) , axes=FALSE , type="b" , lwd=c(rep(1, 14), 3) , xlab="context" , ylab="log2(relative speed) [bigger is better]" ) axis(1 , labels=c("32-bit", "64-bit", "hash", "sortorder", "order", "hash+sortorder") , at=1:6 ) axis(2) } \keyword{datasets} bit64/man/match.integer64.Rd0000644000176200001440000001144214705122715015154 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/highlevel64.R \name{match.integer64} \alias{match.integer64} \alias{\%in\%.integer64} \title{64-bit integer matching} \usage{ \method{match}{integer64}(x, table, nomatch = NA_integer_, nunique = NULL, method = NULL, ...) \method{\%in\%}{integer64}(x, table, ...) } \arguments{ \item{x}{integer64 vector: the values to be matched, optionally carrying a cache created with \code{\link[=hashcache]{hashcache()}}} \item{table}{integer64 vector: the values to be matched against, optionally carrying a cache created with \code{\link[=hashcache]{hashcache()}} or \code{\link[=sortordercache]{sortordercache()}}} \item{nomatch}{the value to be returned in the case when no match is found. Note that it is coerced to integer.} \item{nunique}{NULL or the number of unique values of table (including NA). Providing \code{nunique} can speed-up matching when \code{table} has no cache. Note that a wrong nunique can cause undefined behaviour up to a crash.} \item{method}{NULL for automatic method selection or a suitable low-level method, see details} \item{...}{ignored} } \value{ A vector of the same length as \code{x}. \code{match}: An integer vector giving the position in \code{table} of the first match if there is a match, otherwise \code{nomatch}. If \code{x[i]} is found to equal \code{table[j]} then the value returned in the \code{i}-th position of the return value is \code{j}, for the smallest possible \code{j}. If no match is found, the value is \code{nomatch}. \code{\%in\%}: A logical vector, indicating if a match was located for each element of \code{x}: thus the values are \code{TRUE} or \code{FALSE} and never \code{NA}. } \description{ \code{match} returns a vector of the positions of (first) matches of its first argument in its second. \code{\%in\%} is a more intuitive interface as a binary operator, which returns a logical vector indicating if there is a match or not for its left operand. } \details{ These functions automatically choose from several low-level functions considering the size of \code{x} and \code{table} and the availability of caches. Suitable methods for \verb{\%in\%.integer64} are \itemize{ \item \code{\link{hashpos}} (hash table lookup) \item \code{\link{hashrev}} (reverse lookup) \item \code{\link{sortorderpos}} (fast ordering) \item \code{\link{orderpos}} (memory saving ordering). } Suitable methods for \code{match.integer64} are \itemize{ \item \code{\link{hashfin}} (hash table lookup) \item \code{\link{hashrin}} (reverse lookup) \item \code{\link{sortfin}} (fast sorting) \item \code{\link{orderfin}} (memory saving ordering). } } \examples{ x <- as.integer64(c(NA, 0:9), 32) table <- as.integer64(c(1:9, NA)) match.integer64(x, table) "\%in\%.integer64"(x, table) x <- as.integer64(sample(c(rep(NA, 9), 0:9), 32, TRUE)) table <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) stopifnot(identical(match.integer64(x, table), match(as.integer(x), as.integer(table)))) stopifnot(identical("\%in\%.integer64"(x, table), as.integer(x) \%in\% as.integer(table))) \dontrun{ message("check when reverse hash-lookup beats standard hash-lookup") e <- 4:24 timx <- timy <- matrix(NA, length(e), length(e), dimnames=list(e,e)) for (iy in seq_along(e)) for (ix in 1:iy){ nx <- 2^e[ix] ny <- 2^e[iy] x <- as.integer64(sample(ny, nx, FALSE)) y <- as.integer64(sample(ny, ny, FALSE)) #hashfun(x, bits=as.integer(5)) timx[ix,iy] <- repeat.time({ hx <- hashmap(x) py <- hashrev(hx, y) })[3] timy[ix,iy] <- repeat.time({ hy <- hashmap(y) px <- hashpos(hy, x) })[3] #identical(px, py) print(round(timx[1:iy,1:iy]/timy[1:iy,1:iy], 2), na.print="") } message("explore best low-level method given size of x and table") B1 <- 1:27 B2 <- 1:27 tim <- array(NA, dim=c(length(B1), length(B2), 5) , dimnames=list(B1, B2, c("hashpos","hashrev","sortpos1","sortpos2","sortpos3"))) for (i1 in B1) for (i2 in B2) { b1 <- B1[i1] b2 <- B1[i2] n1 <- 2^b1 n2 <- 2^b2 x1 <- as.integer64(c(sample(n2, n1-1, TRUE), NA)) x2 <- as.integer64(c(sample(n2, n2-1, TRUE), NA)) tim[i1,i2,1] <- repeat.time({h <- hashmap(x2);hashpos(h, x1);rm(h)})[3] tim[i1,i2,2] <- repeat.time({h <- hashmap(x1);hashrev(h, x2);rm(h)})[3] s <- clone(x2); o <- seq_along(s); ramsortorder(s, o) tim[i1,i2,3] <- repeat.time(sortorderpos(s, o, x1, method=1))[3] tim[i1,i2,4] <- repeat.time(sortorderpos(s, o, x1, method=2))[3] tim[i1,i2,5] <- repeat.time(sortorderpos(s, o, x1, method=3))[3] rm(s,o) print(apply(tim, 1:2, function(ti)if(any(is.na(ti)))NA else which.min(ti))) } } } \seealso{ \code{\link[=match]{match()}} } \keyword{logic} \keyword{manip} bit64/man/duplicated.integer64.Rd0000644000176200001440000000316714705122715016203 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/highlevel64.R \name{duplicated.integer64} \alias{duplicated.integer64} \title{Determine Duplicate Elements of integer64} \usage{ \method{duplicated}{integer64}(x, incomparables = FALSE, nunique = NULL, method = NULL, ...) } \arguments{ \item{x}{a vector or a data frame or an array or \code{NULL}.} \item{incomparables}{ignored} \item{nunique}{NULL or the number of unique values (including NA). Providing \code{nunique} can speed-up matching when \code{x} has no cache. Note that a wrong \code{nunique} can cause undefined behaviour up to a crash.} \item{method}{NULL for automatic method selection or a suitable low-level method, see details} \item{...}{ignored} } \value{ \code{duplicated()}: a logical vector of the same length as \code{x}. } \description{ \code{duplicated()} determines which elements of a vector or data frame are duplicates of elements with smaller subscripts, and returns a logical vector indicating which elements (rows) are duplicates. } \details{ This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. Suitable methods are \itemize{ \item \code{\link{hashdup}} (hashing) \item \code{\link{sortorderdup}} (fast ordering) \item \code{\link{orderdup}} (memory saving ordering). } } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) duplicated(x) stopifnot(identical(duplicated(x), duplicated(as.integer(x)))) } \seealso{ \code{\link[=duplicated]{duplicated()}}, \code{\link[=unique.integer64]{unique.integer64()}} } \keyword{logic} \keyword{manip} bit64/man/prank.Rd0000644000176200001440000000223514705122715013365 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/highlevel64.R \name{prank} \alias{prank} \alias{prank.integer64} \title{(P)ercent (Rank)s} \usage{ prank(x, ...) \method{prank}{integer64}(x, method = NULL, ...) } \arguments{ \item{x}{a integer64 vector} \item{...}{ignored} \item{method}{NULL for automatic method selection or a suitable low-level method, see details} } \value{ \code{prank} returns a numeric vector of the same length as \code{x}. } \description{ Function \code{prank.integer64} projects the values \verb{[min..max]} via ranks \verb{[1..n]} to \verb{[0..1]}. \code{\link[=qtile.integer64]{qtile.integer64()}} is the inverse function of 'prank.integer64' and projects \verb{[0..1]} to \verb{[min..max]}. } \details{ Function \code{prank.integer64} is based on \code{\link[=rank.integer64]{rank.integer64()}}. } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) prank(x) x <- x[!is.na(x)] stopifnot(identical(x, unname(qtile(x, probs=prank(x))))) } \seealso{ \code{\link[=rank.integer64]{rank.integer64()}} for simple ranks and \code{\link[=qtile]{qtile()}} for the inverse function quantiles. } \keyword{univar} bit64/man/as.integer64.character.Rd0000644000176200001440000000536114705122715016421 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \docType{data} \name{as.integer64.character} \alias{as.integer64.character} \alias{as.integer64} \alias{as.integer64.NULL} \alias{as.integer64.integer64} \alias{as.integer64.double} \alias{as.integer64.integer} \alias{as.integer64.logical} \alias{as.integer64.factor} \alias{as.integer64.bitstring} \alias{NA_integer64_} \title{Coerce to integer64} \format{ An object of class \code{integer64} of length 1. } \usage{ as.integer64(x, ...) \method{as.integer64}{`NULL`}(x, ...) \method{as.integer64}{integer64}(x, ...) \method{as.integer64}{double}(x, keep.names = FALSE, ...) \method{as.integer64}{integer}(x, ...) \method{as.integer64}{logical}(x, ...) \method{as.integer64}{character}(x, ...) \method{as.integer64}{factor}(x, ...) \method{as.integer64}{bitstring}(x, ...) NA_integer64_ } \arguments{ \item{x}{an atomic vector} \item{...}{further arguments to the \code{\link[=NextMethod]{NextMethod()}}} \item{keep.names}{FALSE, set to TRUE to keep a names vector} } \value{ The other methods return atomic vectors of the expected types } \description{ Methods to coerce from other atomic types to integer64. } \details{ \code{as.integer64.character} is realized using C function \code{strtoll} which does not support scientific notation. Instead of '1e6' use '1000000'. \code{as.integer64.bitstring} evaluates characters '0' and ' ' as zero-bit, all other one byte characters as one-bit, multi-byte characters are not allowed, strings shorter than 64 characters are treated as if they were left-padded with '0', strings longer than 64 bytes are mapped to \code{NA_INTEGER64} and a warning is emitted. } \examples{ as.integer64(as.character(lim.integer64())) as.integer64( structure(c("1111111111111111111111111111111111111111111111111111111111111110", "1111111111111111111111111111111111111111111111111111111111111111", "1000000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000010" ), class = "bitstring") ) as.integer64( structure(c("............................................................... ", "................................................................", ". ", "", ".", "10" ), class = "bitstring") ) } \seealso{ \code{\link[=as.character.integer64]{as.character.integer64()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{datasets} \keyword{manip} bit64/man/rep.integer64.Rd0000644000176200001440000000143514705122715014647 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{rep.integer64} \alias{rep.integer64} \title{Replicate elements of integer64 vectors} \arguments{ \item{x}{a vector of 'integer64' to be replicated} \item{...}{further arguments passed to \code{\link[=NextMethod]{NextMethod()}}} } \value{ \code{\link[=rep]{rep()}} returns a integer64 vector } \description{ Replicate elements of integer64 vectors } \examples{ rep(as.integer64(1:2), 6) rep(as.integer64(1:2), c(6,6)) rep(as.integer64(1:2), length.out=6) } \seealso{ \code{\link[=c.integer64]{c.integer64()}} \code{\link[=rep.integer64]{rep.integer64()}} \code{\link[=as.data.frame.integer64]{as.data.frame.integer64()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{manip} bit64/man/plusclass.Rd0000644000176200001440000000137214705122715014264 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{plusclass} \alias{plusclass} \alias{minusclass} \title{integer64: Maintaining S3 class attribute} \usage{ minusclass(class, whichclass) } \arguments{ \item{class}{NULL or a character vector of class attributes} \item{whichclass}{the (single) class name to add or remove from the class vector} } \value{ NULL or a character vector of class attributes } \description{ Maintaining integer64 S3 class attribute. } \examples{ plusclass("inheritingclass","integer64") minusclass(c("inheritingclass","integer64"), "integer64") } \seealso{ \code{\link[=oldClass]{oldClass()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{internal} \keyword{manip} bit64/man/table.integer64.Rd0000644000176200001440000001316114705122715015147 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/highlevel64.R \name{table.integer64} \alias{table.integer64} \title{Cross Tabulation and Table Creation for integer64} \usage{ table.integer64( ..., return = c("table", "data.frame", "list"), order = c("values", "counts"), nunique = NULL, method = NULL, dnn = list.names(...), deparse.level = 1L ) } \arguments{ \item{...}{one or more objects which can be interpreted as factors (including character strings), or a list (or data frame) whose components can be so interpreted. (For \code{as.table} and \code{as.data.frame}, arguments passed to specific methods.)} \item{return}{choose the return format, see details} \item{order}{By default results are created sorted by "values", or by "counts"} \item{nunique}{NULL or the number of unique values of table (including NA). Providing \code{nunique} can speed-up matching when \code{table} has no cache. Note that a wrong \code{nunique} can cause undefined behaviour up to a crash.} \item{method}{NULL for automatic method selection or a suitable low-level method, see details} \item{dnn}{the names to be given to the dimensions in the result (the \emph{dimnames names}).} \item{deparse.level}{controls how the default \code{dnn} is constructed. See Details.} } \value{ By default (with \code{return="table"}) \code{\link[=table]{table()}} returns a \emph{contingency table}, an object of class \code{"table"}, an array of integer values. Note that unlike S the result is always an array, a 1D array if one factor is given. Note also that for multidimensional arrays this is a \emph{dense} return structure which can dramatically increase RAM requirements (for large arrays with high mutual information, i.e. many possible input combinations of which only few occur) and that \code{\link[=table]{table()}} is limited to \code{2^31} possible combinations (e.g. two input vectors with 46340 unique values only). Finally note that the tabulated values or value-combinations are represented as \code{dimnames} and that the implied conversion of values to strings can cause \emph{severe} performance problems since each string needs to be integrated into R's global string cache. You can use the other \verb{return=} options to cope with these problems, the potential combination limit is increased from \code{2^31} to \code{2^63} with these options, RAM is only required for observed combinations and string conversion is avoided. With \code{return="data.frame"} you get a \emph{dense} representation as a \code{\link[=data.frame]{data.frame()}} (like that resulting from \code{as.data.frame(table(...))}) where only observed combinations are listed (each as a data.frame row) with the corresponding frequency counts (the latter as component named by \code{responseName}). This is the inverse of \code{\link[=xtabs]{xtabs()}}. With \code{return="list"} you also get a \emph{dense} representation as a simple \code{\link[=list]{list()}} with components \itemize{ \item \code{values} a integer64 vector of the technically tabulated values, for 1D this is the tabulated values themselves, for kD these are the values representing the potential combinations of input values \item \code{counts} the frequency counts \item \code{dims} only for kD: a list with the vectors of the unique values of the input dimensions } } \description{ \code{table.integer64} uses the cross-classifying integer64 vectors to build a contingency table of the counts at each combination of vector values. } \details{ This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. Suitable methods are \itemize{ \item \code{\link{hashmaptab}} (simultaneously creating and using a hashmap) \item \code{\link{hashtab}} (first creating a hashmap then using it) \item \code{\link{sortordertab}} (fast ordering) \item \code{\link{ordertab}} (memory saving ordering). } If the argument \code{dnn} is not supplied, the internal function \code{list.names} is called to compute the 'dimname names'. If the arguments in \code{...} are named, those names are used. For the remaining arguments, \code{deparse.level = 0} gives an empty name, \code{deparse.level = 1} uses the supplied argument if it is a symbol, and \code{deparse.level = 2} will deparse the argument. Arguments \code{exclude}, \code{useNA}, are not supported, i.e. \code{NA}s are always tabulated, and, different from \code{\link[=table]{table()}} they are sorted first if \code{order="values"}. } \note{ Note that by using \code{\link[=as.integer64.factor]{as.integer64.factor()}} we can also input factors into \code{table.integer64} -- only the \code{\link[=levels]{levels()}} get lost. } \examples{ message("pure integer64 examples") x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) y <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) z <- sample(c(rep(NA, 9), letters), 32, TRUE) table.integer64(x) table.integer64(x, order="counts") table.integer64(x, y) table.integer64(x, y, return="data.frame") message("via as.integer64.factor we can use 'table.integer64' also for factors") table.integer64(x, as.integer64(as.factor(z))) } \seealso{ \code{\link[=table]{table()}} for more info on the standard version coping with Base R's data types, \code{\link[=tabulate]{tabulate()}} which can faster tabulate \code{\link{integer}}s with a limited range \verb{[1L .. nL not too big]}, \code{\link[=unique.integer64]{unique.integer64()}} for the unique values without counting them and \code{\link[=unipos.integer64]{unipos.integer64()}} for the positions of the unique values. } \concept{contingency table} \concept{counts} \concept{frequencies} \concept{occurrences} \keyword{category} bit64/man/cumsum.integer64.Rd0000644000176200001440000000252514705122715015373 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{cumsum.integer64} \alias{cumsum.integer64} \alias{diff.integer64} \alias{cummin.integer64} \alias{cummax.integer64} \alias{cumprod.integer64} \title{Cumulative Sums, Products, Extremes and lagged differences} \usage{ \method{diff}{integer64}(x, lag = 1L, differences = 1L, ...) \method{cummin}{integer64}(x) \method{cummax}{integer64}(x) \method{cumsum}{integer64}(x) \method{cumprod}{integer64}(x) } \arguments{ \item{x}{an atomic vector of class 'integer64'} \item{lag}{see \code{\link[=diff]{diff()}}} \item{differences}{see \code{\link[=diff]{diff()}}} \item{...}{ignored} } \value{ \code{\link[=cummin]{cummin()}}, \code{\link[=cummax]{cummax()}} , \code{\link[=cumsum]{cumsum()}} and \code{\link[=cumprod]{cumprod()}} return a integer64 vector of the same length as their input \code{\link[=diff]{diff()}} returns a integer64 vector shorter by \code{lag*differences} elements } \description{ Cumulative Sums, Products, Extremes and lagged differences } \examples{ cumsum(rep(as.integer64(1), 12)) diff(as.integer64(c(0,1:12))) cumsum(as.integer64(c(0, 1:12))) diff(cumsum(as.integer64(c(0,0,1:12))), differences=2) } \seealso{ \code{\link[=sum.integer64]{sum.integer64()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{manip} bit64/man/unique.integer64.Rd0000644000176200001440000000534414705122715015372 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/highlevel64.R \name{unique.integer64} \alias{unique.integer64} \title{Extract Unique Elements from integer64} \usage{ \method{unique}{integer64}( x, incomparables = FALSE, order = c("original", "values", "any"), nunique = NULL, method = NULL, ... ) } \arguments{ \item{x}{a vector or a data frame or an array or \code{NULL}.} \item{incomparables}{ignored} \item{order}{The order in which unique values will be returned, see details} \item{nunique}{NULL or the number of unique values (including NA). Providing \code{nunique} can speed-up matching when \code{x} has no cache. Note that a wrong `nunique`` can cause undefined behaviour up to a crash.} \item{method}{NULL for automatic method selection or a suitable low-level method, see details} \item{...}{ignored} } \value{ For a vector, an object of the same type of \code{x}, but with only one copy of each duplicated element. No attributes are copied (so the result has no names). } \description{ \code{unique} returns a vector like \code{x} but with duplicate elements/rows removed. } \details{ This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. Suitable methods are \itemize{ \item \code{\link{hashmapuni}} (simultaneously creating and using a hashmap) \item \code{\link{hashuni}} (first creating a hashmap then using it) \item \code{\link{sortuni}} (fast sorting for sorted order only) \item \code{\link{sortorderuni}} (fast ordering for original order only) \item \code{\link{orderuni}} (memory saving ordering). } The default \code{order="original"} returns unique values in the order of the first appearance in \code{x} like in \code{\link[=unique]{unique()}}, this costs extra processing. \code{order="values"} returns unique values in sorted order like in \code{\link[=table]{table()}}, this costs extra processing with the hash methods but comes for free. \code{order="any"} returns unique values in undefined order, possibly faster. For hash methods this will be a quasi random order, for sort methods this will be sorted order. } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) unique(x) unique(x, order="values") stopifnot(identical(unique(x), x[!duplicated(x)])) stopifnot(identical(unique(x), as.integer64(unique(as.integer(x))))) stopifnot(identical(unique(x, order="values") , as.integer64(sort(unique(as.integer(x)), na.last=FALSE)))) } \seealso{ \code{\link[=unique]{unique()}} for the generic, \code{\link[=unipos]{unipos()}} which gives the indices of the unique elements and \code{\link[=table.integer64]{table.integer64()}} which gives frequencies of the unique elements. } \keyword{logic} \keyword{manip} bit64/man/bit64-package.Rd0000644000176200001440000011531714705122715014601 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bit64-package.R, R/integer64.R \docType{package} \name{bit64-package} \alias{bit64-package} \alias{bit64} \alias{is.integer.integer64} \alias{is.vector.integer64} \alias{integer64} \alias{is.integer64} \alias{length<-.integer64} \alias{print.integer64} \alias{str.integer64} \title{A S3 class for vectors of 64bit integers} \usage{ integer64(length = 0L) is.integer64(x) \method{length}{integer64}(x) <- value \method{print}{integer64}(x, quote = FALSE, ...) \method{str}{integer64}( object, vec.len = strO$vec.len, give.head = TRUE, give.length = give.head, ... ) } \arguments{ \item{length}{length of vector using \code{\link[=integer]{integer()}}} \item{x}{an integer64 vector} \item{value}{an integer64 vector of values to be assigned} \item{quote}{logical, indicating whether or not strings should be printed with surrounding quotes.} \item{...}{further arguments to the \code{\link[=NextMethod]{NextMethod()}}} \item{object}{an integer64 vector} \item{vec.len, give.head, give.length}{see \code{\link[utils:str]{utils::str()}}} } \value{ \code{integer64} returns a vector of 'integer64', i.e., a vector of \code{\link[=double]{double()}} decorated with class 'integer64'. } \description{ Package 'bit64' provides fast serializable S3 atomic 64bit (signed) integers that can be used in vectors, matrices, arrays and data.frames. Methods are available for coercion from and to logicals, integers, doubles, characters and factors as well as many elementwise and summary functions. \subsection{Version 0.8}{ With 'integer64' vectors you can store very large integers at the expense of 64 bits, which is by factor 7 better than 'int64' from package 'int64'. Due to the smaller memory footprint, the atomic vector architecture and using only S3 instead of S4 classes, most operations are one to three orders of magnitude faster: Example speedups are 4x for serialization, 250x for adding, 900x for coercion and 2000x for object creation. Also 'integer64' avoids an ongoing (potentially infinite) penalty for garbage collection observed during existence of 'int64' objects (see code in example section). } \subsection{Version 0.9}{ Package 'bit64' - which extends R with fast 64-bit integers - now has fast (single-threaded) implementations the most important univariate algorithmic operations (those based on hashing and sorting). We now have methods for 'match', '\%in\%', 'duplicated', 'unique', 'table', 'sort', 'order', 'rank', 'quantile', 'median' and 'summary'. Regarding data management we also have novel generics 'unipos' (positions of the unique values), 'tiepos' ( positions of ties), 'keypos' (positions of foreign keys in a sorted dimension table) and derived methods 'as.factor' and 'as.ordered'. This 64- bit functionality is implemented carefully to be not slower than the respective 32-bit operations in Base R and also to avoid outlying waiting times observed with 'order', 'rank' and 'table' (speedup factors 20/16/200 respective). This increases the dataset size with wich we can work truly interactive. The speed is achieved by simple heuristic optimizers in high- level functions choosing the best from multiple low-level algorithms and further taking advantage of a novel caching if activated. In an example R session using a couple of these operations the 64-bit integers performed 22x faster than base 32-bit integers, hash-caching improved this to 24x, sortorder-caching was most efficient with 38x (caching hashing and sorting is not worth it with 32x at duplicated RAM consumption). } } \note{ \code{integer64} are useful for handling database keys and exact counting in +-2^63. Do not use them as replacement for 32bit integers, integer64 are not supported for subscripting by R-core and they have different semantics when combined with double. Do understand that \code{integer64} can only be useful over \code{double} if we do not coerce it to \code{double}. While integer + double -> double + double -> double or 1L + 0.5 -> 1.5 for additive operations we coerce to \code{integer64} integer64 + double -> integer64 + integer64 -> integer64 hence as.integer64(1) + 0.5 -> 1LL + 0LL -> 1LL see section "Arithmetic precision and coercion" above } \section{Design considerations}{ 64 bit integers are related to big data: we need them to overcome address space limitations. Therefore performance of the 64 bit integer type is critical. In the S language -- designed in 1975 -- atomic objects were defined to be vectors for a couple of good reasons: simplicity, option for implicit parallelization, good cache locality. In recent years many analytical databases have learnt that lesson: column based data bases provide superior performance for many applications, the result are products such as MonetDB, Sybase IQ, Vertica, Exasol, Ingres Vectorwise. If we introduce 64 bit integers not natively in Base R but as an external package, we should at least strive to make them as 'basic' as possible. Therefore the design choice of bit64 not only differs from package int64, it is obvious: Like the other atomic types in Base R, we model data type 'integer64' as a contiguous \code{\link{atomic}} vector in memory, and we use the more basic \link{S3} class system, not \link{S4}. Like package int64 we want our 'integer64' to be \code{\link{serialize}}able, therefore we also use an existing data type as the basis. Again the choice is obvious: R has only one 64 bit data type: doubles. By using \code{\link{double}}s, \code{integer64} \code{\link{inherits}} some functionality such as \code{\link[=is.atomic]{is.atomic()}}, \code{\link[=length]{length()}}, \code{\link{length<-}}, \code{\link[=names]{names()}}, \code{\link{names<-}}, \code{\link[=dim]{dim()}}, \code{\link{dim<-}}, \code{\link[=dimnames]{dimnames()}}, \code{\link{dimnames<-}}. Our R level functions strictly follow the functional programming paradigm: no modification of arguments or other side-effects. Before version 0.93 we internally deviated from the strict paradigm in order to boost performance. Our C functions do not create new return values, instead we pass-in the memory to be returned as an argument. This gives us the freedom to apply the C-function to new or old vectors, which helps to avoid unnecessary memory allocation, unnecessary copying and unnecessary garbage collection. Prior to 0.93 \emph{within} our R functions we also deviated from conventional R programming by not using \code{\link{attr<-}} and \code{\link{attributes<-}} because they always did new memory allocation and copying in older R versions. If we wanted to set attributes of return values that we have freshly created, we instead used functions \code{\link[bit:getsetattr]{bit::setattr()}} and \code{\link[bit:getsetattr]{bit::setattributes()}}. From version 0.93 \code{bit::setattr()} is only used for manipulating \code{\link{cache}} objects, in \code{\link[=ramsort.integer64]{ramsort.integer64()}}, \code{\link[=sort.integer64]{sort.integer64()}}, and \code{\link[=as.data.frame.integer64]{as.data.frame.integer64()}}. } \section{Arithmetic precision and coercion}{ The fact that we introduce 64 bit long long integers -- without introducing 128-bit long doubles -- creates some subtle challenges: Unlike 32 bit \code{\link{integer}}s, the \code{integer64} are no longer a proper subset of \code{\link{double}}. If a binary arithmetic operation does involve a \code{double} and a \code{integer}, it is a no-brainer to return \code{double} without loss of information. If an \code{integer64} meets a \code{double}, it is not trivial what type to return. Switching to \code{integer64} limits our ability to represent very large numbers, switching to \code{double} limits our ability to distinguish \code{x} from \code{x+1}. Since the latter is the purpose of introducing 64 bit integers, we usually return \code{integer64} from functions involving \code{integer64}, for example in \code{\link[=c.integer64]{c()}}, \code{\link[=cbind.integer64]{cbind()}}, and \code{\link[=rbind.integer64]{rbind()}} Different from Base R, our operators \code{\link[=+.integer64]{+}}, \code{\link[=-.integer64]{-}}, \code{\link[=\%/\%.integer64]{\%/\%}}, and \code{\link[=\%\%.integer64]{\%\%}} coerce their arguments to \code{integer64} and always return \code{integer64}. The multiplication operator \code{\link[=*.integer64]{*}} coerces its first argument to \code{integer64} but allows its second argument to be also \code{double}: the second argument is internaly coerced to 'long double' and the result of the multiplication is returned as \code{integer64}. The division \code{\link[=/.integer64]{/}} and power \code{\link[=^.integer64]{^}} operators also coerce their first argument to \code{integer64} and coerce internally their second argument to 'long double', they return as \code{double}, like \code{\link[=sqrt.integer64]{sqrt()}}, \code{\link[=log.integer64]{log()}}, \code{\link[=log2.integer64]{log2()}}, and \code{\link[=log10.integer64]{log10()}} do.\tabular{ccccccccc}{ \strong{argument1} \tab \strong{op} \tab \strong{argument2} \tab \strong{->} \tab \strong{coerced1} \tab \strong{op} \tab \strong{coerced2} \tab \strong{->} \tab \strong{result} \cr integer64 \tab + \tab double \tab -> \tab integer64 \tab + \tab integer64 \tab -> \tab integer64 \cr double \tab + \tab integer64 \tab -> \tab integer64 \tab + \tab integer64 \tab -> \tab integer64 \cr integer64 \tab - \tab double \tab -> \tab integer64 \tab - \tab integer64 \tab -> \tab integer64 \cr double \tab - \tab integer64 \tab -> \tab integer64 \tab - \tab integer64 \tab -> \tab integer64 \cr integer64 \tab \%/\% \tab double \tab -> \tab integer64 \tab \%/\% \tab integer64 \tab -> \tab integer64 \cr double \tab \%/\% \tab integer64 \tab -> \tab integer64 \tab \%/\% \tab integer64 \tab -> \tab integer64 \cr integer64 \tab \%\% \tab double \tab -> \tab integer64 \tab \%\% \tab integer64 \tab -> \tab integer64 \cr double \tab \%\% \tab integer64 \tab -> \tab integer64 \tab \%\% \tab integer64 \tab -> \tab integer64 \cr integer64 \tab * \tab double \tab -> \tab integer64 \tab * \tab long double \tab -> \tab integer64 \cr double \tab * \tab integer64 \tab -> \tab integer64 \tab * \tab integer64 \tab -> \tab integer64 \cr integer64 \tab / \tab double \tab -> \tab integer64 \tab / \tab long double \tab -> \tab double \cr double \tab / \tab integer64 \tab -> \tab integer64 \tab / \tab long double \tab -> \tab double \cr integer64 \tab ^ \tab double \tab -> \tab integer64 \tab / \tab long double \tab -> \tab double \cr double \tab ^ \tab integer64 \tab -> \tab integer64 \tab / \tab long double \tab -> \tab double \cr } } \section{Creating and testing S3 class 'integer64'}{ Our creator function \code{integer64} takes an argument \code{length}, creates an atomic double vector of this length, attaches an S3 class attribute 'integer64' to it, and that's it. We simply rely on S3 method dispatch and interpret those 64-bit elements as 'long long int'. \code{\link[=is.double]{is.double()}} currently returns TRUE for \code{integer64} and might return \code{FALSE} in a later release. Consider \code{is.double()} to have undefined behavior and do query \code{\link[=is.integer64]{is.integer64()}} \emph{before} querying \code{is.double()}. The methods \code{\link[=is.integer64]{is.integer64()}} and \code{\link[=is.vector]{is.vector()}} both return \code{TRUE} for \code{integer64}. Note that we did not patch \code{\link[=storage.mode]{storage.mode()}} and \code{\link[=typeof]{typeof()}}, which both continue returning 'double'. Like for 32 bit \code{\link{integer}}, \code{\link[=mode]{mode()}} returns 'numeric' and \code{\link[=as.double]{as.double()}} tries coercing to \code{\link{double}}. It is possible that 'integer64' becomes a \code{vmode} in package ff. Further methods for creating \code{integer64} are \code{\link[=range.integer64]{range()}} which returns the range of the data type if calles without arguments, \code{\link[=rep.integer64]{rep()}}, \code{\link[=seq.integer64]{seq()}}. For all available methods on \code{integer64} vectors see the index below and the examples. } \section{Index of implemented methods}{ \tabular{rrl}{ \strong{creating, testing, printing} \tab \strong{see also} \tab \strong{description} \cr \code{NA_integer64_} \tab \code{\link{NA_integer_}} \tab NA constant \cr \code{integer64} \tab \code{\link{integer}} \tab create zero atomic vector \cr \code{\link[=runif64]{runif64()}} \tab \code{\link[=runif]{runif()}} \tab create random vector \cr \code{\link[=rep.integer64]{rep.integer64()}} \tab \code{\link[=rep]{rep()}} \tab \cr \code{\link[=seq.integer64]{seq.integer64()}} \tab \code{\link[=seq]{seq()}} \tab \cr \code{\link[=is.integer64]{is.integer64()}} \tab \code{\link[=is]{is()}} \tab \cr \tab \code{\link[=is.integer]{is.integer()}} \tab inherited from Base R \cr \code{\link[=is.vector.integer64]{is.vector.integer64()}} \tab \code{\link[=is.vector]{is.vector()}} \tab \cr \code{\link[=identical.integer64]{identical.integer64()}} \tab \code{\link[=identical]{identical()}} \tab \cr \code{\link{length<-.integer64}} \tab \code{\link{length<-}} \tab \cr \tab \code{\link[=length]{length()}} \tab inherited from Base R \cr \code{\link{names<-}} \tab inherited from Base R \tab \cr \tab \code{\link[=names]{names()}} \tab inherited from Base R \cr \tab \code{\link{dim<-}} \tab inherited from Base R \cr \tab \code{\link[=dim]{dim()}} \tab inherited from Base R \cr \tab \code{\link{dimnames<-}} \tab inherited from Base R \cr \tab \code{\link[=dimnames]{dimnames()}} \tab inherited from Base R \cr \tab \code{\link[=str]{str()}} \tab inherited from Base R, does not print values correctly \cr \code{\link[=print.integer64]{print.integer64()}} \tab \code{\link[=print]{print()}} \tab \cr \code{\link[=str.integer64]{str.integer64()}} \tab \code{\link[=str]{str()}} \tab \cr } \tabular{rrl}{ \strong{coercing to integer64} \tab \strong{see also} \tab \strong{description} \cr \code{\link[=as.integer64]{as.integer64()}} \tab \tab generic \cr \code{\link[=as.integer64.bitstring]{as.integer64.bitstring()}} \tab \code{\link[=as.bitstring]{as.bitstring()}} \tab \cr \code{\link[=as.integer64.character]{as.integer64.character()}} \tab \code{\link[=character]{character()}} \tab \cr \code{\link[=as.integer64.double]{as.integer64.double()}} \tab \code{\link[=double]{double()}} \tab \cr \code{\link[=as.integer64.integer]{as.integer64.integer()}} \tab \code{\link[=integer]{integer()}} \tab \cr \code{\link[=as.integer64.integer64]{as.integer64.integer64()}} \tab \code{integer64} \tab \cr \code{\link[=as.integer64.logical]{as.integer64.logical()}} \tab \code{\link[=logical]{logical()}} \tab \cr \code{\link[=as.integer64.NULL]{as.integer64.NULL()}} \tab \code{\link[=NULL]{NULL()}} \tab \cr } \tabular{rrl}{ \strong{coercing from integer64} \tab \strong{see also} \tab \strong{description} \cr \code{\link[=as.list.integer64]{as.list.integer64()}} \tab \code{\link[=as.list]{as.list()}} \tab generic \cr \code{\link[=as.bitstring]{as.bitstring()}} \tab \code{\link[=as.bitstring]{as.bitstring()}} \tab generic \cr \code{\link[=as.bitstring.integer64]{as.bitstring.integer64()}} \tab \tab \cr \code{\link[=as.character.integer64]{as.character.integer64()}} \tab \code{\link[=as.character]{as.character()}} \tab \cr \code{\link[=as.double.integer64]{as.double.integer64()}} \tab \code{\link[=as.double]{as.double()}} \tab \cr \code{\link[=as.integer.integer64]{as.integer.integer64()}} \tab \code{\link[=as.integer]{as.integer()}} \tab \cr \code{\link[=as.logical.integer64]{as.logical.integer64()}} \tab \code{\link[=as.logical]{as.logical()}} \tab \cr } \tabular{rrl}{ \strong{data structures} \tab \strong{see also} \tab \strong{description} \cr \code{\link[=c.integer64]{c.integer64()}} \tab \code{\link[=c]{c()}} \tab vector concatenate \cr \code{\link[=cbind.integer64]{cbind.integer64()}} \tab \code{\link[=cbind]{cbind()}} \tab column bind \cr \code{\link[=rbind.integer64]{rbind.integer64()}} \tab \code{\link[=rbind]{rbind()}} \tab row bind \cr \code{\link[=as.data.frame.integer64]{as.data.frame.integer64()}} \tab \code{\link[=as.data.frame]{as.data.frame()}} \tab coerce atomic object to data.frame \cr \tab \code{\link[=data.frame]{data.frame()}} \tab inherited from Base R since we have coercion \cr } \tabular{rrl}{ \strong{subscripting} \tab \strong{see also} \tab \strong{description} \cr \code{\link[=extract.replace.integer64]{[.integer64}} \tab \code{\link[base:Extract]{[}} \tab vector and array extract \cr \code{\link[=extract.replace.integer64]{[<-.integer64}} \tab \code{\link[base:Extract]{[<-}} \tab vector and array assign \cr \code{\link[=extract.replace.integer64]{[[.integer64}} \tab \code{\link[base:Extract]{[[}} \tab scalar extract \cr \code{\link[=extract.replace.integer64]{[[<-.integer64}} \tab \code{\link[base:Extract]{[[<-}} \tab scalar assign \cr } \tabular{rrl}{ \strong{binary operators} \tab \strong{see also} \tab \strong{description} \cr \code{\link{+.integer64}} \tab \code{\link{+}} \tab returns integer64 \cr \code{\link{-.integer64}} \tab \code{\link{-}} \tab returns integer64 \cr \code{\link{*.integer64}} \tab \code{\link{*}} \tab returns integer64 \cr \code{\link{^.integer64}} \tab \code{\link{^}} \tab returns double \cr \code{\link{/.integer64}} \tab \code{\link{/}} \tab returns double \cr \code{\link{\%/\%.integer64}} \tab \code{\link{\%/\%}} \tab returns integer64 \cr \code{\link{\%\%.integer64}} \tab \code{\link{\%\%}} \tab returns integer64 \cr } \tabular{rrl}{ \strong{comparison operators} \tab \strong{see also} \tab \strong{description} \cr \code{\link{==.integer64}} \tab \code{\link{==}} \tab \cr \code{\link{!=.integer64}} \tab \code{\link{!=}} \tab \cr \code{\link{<.integer64}} \tab \code{\link{<}} \tab \cr \code{\link{<=.integer64}} \tab \code{\link{<=}} \tab \cr \code{\link{>.integer64}} \tab \code{\link{>}} \tab \cr \code{\link{>=.integer64}} \tab \code{\link{>=}} \tab \cr } \tabular{rrl}{ \strong{logical operators} \tab \strong{see also} \tab \strong{description} \cr \code{\link{!.integer64}} \tab \code{\link{!}} \tab \cr \code{\link{&.integer64}} \tab \code{\link{&}} \tab \cr \code{\link[=xor.integer64]{|.integer64}} \tab \code{\link[base:Logic]{|}} \tab \cr \code{\link{xor.integer64}} \tab \code{\link[=xor]{xor()}} \tab \cr }\tabular{rrl}{ \strong{math functions} \tab \strong{see also} \tab \strong{description} \cr \code{\link[=is.na.integer64]{is.na.integer64()}} \tab \code{\link[=is.na]{is.na()}} \tab returns logical \cr \code{\link[=format.integer64]{format.integer64()}} \tab \code{\link[=format]{format()}} \tab returns character \cr \code{\link[=abs.integer64]{abs.integer64()}} \tab \code{\link[=abs]{abs()}} \tab returns integer64 \cr \code{\link[=sign.integer64]{sign.integer64()}} \tab \code{\link[=sign]{sign()}} \tab returns integer64 \cr \code{\link[=log.integer64]{log.integer64()}} \tab \code{\link[=log]{log()}} \tab returns double \cr \code{\link[=log10.integer64]{log10.integer64()}} \tab \code{\link[=log10]{log10()}} \tab returns double \cr \code{\link[=log2.integer64]{log2.integer64()}} \tab \code{\link[=log2]{log2()}} \tab returns double \cr \code{\link[=sqrt.integer64]{sqrt.integer64()}} \tab \code{\link[=sqrt]{sqrt()}} \tab returns double \cr \code{\link[=ceiling.integer64]{ceiling.integer64()}} \tab \code{\link[=ceiling]{ceiling()}} \tab dummy returning its argument \cr \code{\link[=floor.integer64]{floor.integer64()}} \tab \code{\link[=floor]{floor()}} \tab dummy returning its argument \cr \code{\link[=trunc.integer64]{trunc.integer64()}} \tab \code{\link[=trunc]{trunc()}} \tab dummy returning its argument \cr \code{\link[=round.integer64]{round.integer64()}} \tab \code{\link[=round]{round()}} \tab dummy returning its argument \cr \code{\link[=signif.integer64]{signif.integer64()}} \tab \code{\link[=signif]{signif()}} \tab dummy returning its argument \cr } \tabular{rrl}{ \strong{cumulative functions} \tab \strong{see also} \tab \strong{description} \cr \code{\link[=cummin.integer64]{cummin.integer64()}} \tab \code{\link[=cummin]{cummin()}} \tab \cr \code{\link[=cummax.integer64]{cummax.integer64()}} \tab \code{\link[=cummax]{cummax()}} \tab \cr \code{\link[=cumsum.integer64]{cumsum.integer64()}} \tab \code{\link[=cumsum]{cumsum()}} \tab \cr \code{\link[=cumprod.integer64]{cumprod.integer64()}} \tab \code{\link[=cumprod]{cumprod()}} \tab \cr \code{\link[=diff.integer64]{diff.integer64()}} \tab \code{\link[=diff]{diff()}} \tab \cr } \tabular{rrl}{ \strong{summary functions} \tab \strong{see also} \tab \strong{description} \cr \code{\link[=range.integer64]{range.integer64()}} \tab \code{\link[=range]{range()}} \tab \cr \code{\link[=min.integer64]{min.integer64()}} \tab \code{\link[=min]{min()}} \tab \cr \code{\link[=max.integer64]{max.integer64()}} \tab \code{\link[=max]{max()}} \tab \cr \code{\link[=sum.integer64]{sum.integer64()}} \tab \code{\link[=sum]{sum()}} \tab \cr \code{\link[=mean.integer64]{mean.integer64()}} \tab \code{\link[=mean]{mean()}} \tab \cr \code{\link[=prod.integer64]{prod.integer64()}} \tab \code{\link[=prod]{prod()}} \tab \cr \code{\link[=all.integer64]{all.integer64()}} \tab \code{\link[=all]{all()}} \tab \cr \code{\link[=any.integer64]{any.integer64()}} \tab \code{\link[=any]{any()}} \tab \cr } \tabular{rrl}{ \strong{algorithmically complex functions} \tab \strong{see also} \tab \strong{description (caching)} \cr \code{\link[=match.integer64]{match.integer64()}} \tab \code{\link[=match]{match()}} \tab position of x in table (h//o/so) \cr \code{\link{\%in\%.integer64}} \tab \code{\link{\%in\%}} \tab is x in table? (h//o/so) \cr \code{\link[=duplicated.integer64]{duplicated.integer64()}} \tab \code{\link[=duplicated]{duplicated()}} \tab is current element duplicate of previous one? (h//o/so) \cr \code{\link[=unique.integer64]{unique.integer64()}} \tab \code{\link[=unique]{unique()}} \tab (shorter) vector of unique values only (h/s/o/so) \cr \code{\link[=unipos.integer64]{unipos.integer64()}} \tab \code{\link[=unipos]{unipos()}} \tab positions corresponding to unique values (h/s/o/so) \cr \code{\link[=tiepos.integer64]{tiepos.integer64()}} \tab \code{\link[=tiepos]{tiepos()}} \tab positions of values that are tied (//o/so) \cr \code{\link[=keypos.integer64]{keypos.integer64()}} \tab \code{\link[=keypos]{keypos()}} \tab position of current value in sorted list of unique values (//o/so) \cr \code{\link[=table.integer64]{table.integer64()}} \tab \code{\link[=table]{table()}} \tab unique values and their frequencies (h/s/o/so) \cr \code{\link[=sort.integer64]{sort.integer64()}} \tab \code{\link[=sort]{sort()}} \tab sorted vector (/s/o/so) \cr \code{\link[=order.integer64]{order.integer64()}} \tab \code{\link[=order]{order()}} \tab positions of elements that would create sorted vector (//o/so) \cr \code{\link[=rank.integer64]{rank.integer64()}} \tab \code{\link[=rank]{rank()}} \tab (average) ranks of non-NAs, NAs kept in place (/s/o/so) \cr \code{\link[=quantile.integer64]{quantile.integer64()}} \tab \code{\link[=quantile]{quantile()}} \tab (existing) values at specified percentiles (/s/o/so) \cr \code{\link[=median.integer64]{median.integer64()}} \tab \code{\link[=median]{median()}} \tab (existing) value at percentile 0.5 (/s/o/so) \cr \code{\link[=summary.integer64]{summary.integer64()}} \tab \code{\link[=summary]{summary()}} \tab (/s/o/so) \cr \code{\link[=all.equal.integer64]{all.equal.integer64()}} \tab \code{\link[=all.equal]{all.equal()}} \tab test if two objects are (nearly) equal (/s/o/so) \cr } \tabular{rrl}{ \strong{helper functions} \tab \strong{see also} \tab \strong{description} \cr \code{\link[=minusclass]{minusclass()}} \tab \code{\link[=minusclass]{minusclass()}} \tab removing class attritbute \cr \code{\link[=plusclass]{plusclass()}} \tab \code{\link[=plusclass]{plusclass()}} \tab inserting class attribute \cr \code{\link[=binattr]{binattr()}} \tab \code{\link[=binattr]{binattr()}} \tab define binary op behaviour \cr } \tabular{rrl}{ \strong{tested I/O functions} \tab \strong{see also} \tab \strong{description} \cr \tab \code{\link[=read.table]{read.table()}} \tab inherited from Base R \cr \tab \code{\link[=write.table]{write.table()}} \tab inherited from Base R \cr \tab \code{\link[=serialize]{serialize()}} \tab inherited from Base R \cr \tab \code{\link[=unserialize]{unserialize()}} \tab inherited from Base R \cr \tab \code{\link[=save]{save()}} \tab inherited from Base R \cr \tab \code{\link[=load]{load()}} \tab inherited from Base R \cr \tab \code{\link[=dput]{dput()}} \tab inherited from Base R \cr \tab \code{\link[=dget]{dget()}} \tab inherited from Base R \cr } } \section{Limitations inherited from implementing 64 bit integers via an external package}{ \itemize{ \item \strong{vector size} of atomic vectors is still limited to \code{\link[=.Machine]{.Machine$integer.max}}. However, external memory extending packages such as ff or bigmemory can extend their address space now with \code{integer64}. Having 64 bit integers also help with those not so obvious address issues that arise once we exchange data with SQL databases and datawarehouses, which use big integers as surrogate keys, e.g. on indexed primary key columns. This puts R into a relatively strong position compared to certain commercial statistical softwares, which sell database connectivity but neither have the range of 64 bit integers, nor have integers at all, nor have a single numeric data type in their macro-glue-language. \item \strong{literals} such as \verb{123LL} would require changes to Base R, up to then we need to write (and call) \code{as.integer64(123L)} or \code{as.integer64(123)} or \code{as.integer64('123')}. Only the latter allows to specify numbers beyond Base R's numeric data types and therefore is the recommended way to use -- using only one way may facilitate migrating code to literals at a later stage. } } \section{Limitations inherited from Base R, Core team, can you change this?}{ \itemize{ \item \strong{\code{\link[=identical]{identical()}}} with default parameters does not distinguish all bit-patterns of doubles. For testing purposes we provide a wrapper \code{\link[=identical.integer64]{identical.integer64()}} that will distinguish all bit-patterns. It would be desireable to have a single call of \code{identical()} handle both, \code{\link{double}} and \code{integer64}. \item the \strong{colon} operator \link{:} officially does not dispatch S3 methods, however, we have made it generic: \if{html}{\out{
}}\preformatted{from <- lim.integer64()[1] to <- from+99 from:to }\if{html}{\out{
}} As a limitation remains: it will only dispatch at its first argument \code{from} but not at its second \code{to}. \item \strong{\code{\link[=is.double]{is.double()}}} does not dispatch S3 methods, However, we have made it generic and it will return \code{FALSE} on \code{integer64}. \item \strong{\code{\link[=c]{c()}}} only dispatches \code{\link[=c.integer64]{c.integer64()}} if the first argument is \code{integer64} and it does not recursively dispatch the proper method when called with argument \code{recursive=TRUE}. Therefore \code{c(list(integer64, integer64))} does not work and for now you can only call \code{c.integer64(list(x, x))}. \item \strong{generic binary operators} fail to dispatch \emph{any} user-defined S3 method if the two arguments have two different S3 classes. For example we have two classes \code{\link[bit:bit]{bit::bit}} and \code{\link[bit:bitwhich]{bit::bitwhich}} sparsely representing boolean vectors and we have methods \code{\link[bit:xor]{&.bit}} and \code{\link[bit:xor]{&.bitwhich}}. For an expression involving both as in \code{bit & bitwhich}, none of the two methods is dispatched. Instead a standard method is dispatched, which neither handles \code{bit} nor \code{bitwhich}. Although it lacks symmetry, the better choice would be to dispatch simply the method of the class of the first argument in case of class conflict. This choice would allow authors of extension packages providing coherent behaviour at least within their contributed classes. But as long as none of the package author's methods is dispatched, they cannot handle the conflicting classes at all. \item \strong{\code{\link[=unlist]{unlist()}}} is not generic and if it were, we would face similar problems as with \code{\link[=c]{c()}} \item \strong{\code{\link[=vector]{vector()}}} with argument \code{mode='integer64'} cannot work without adjustment of Base R \item \strong{\code{\link[=as.vector]{as.vector()}}} with argument \code{mode='integer64'} cannot work without adjustment of Base R \item \strong{\code{\link[=is.vector]{is.vector()}}} does not dispatch its method \code{\link[=is.vector.integer64]{is.vector.integer64()}} \item \strong{\code{\link[=mode<-]{mode<-()}}} drops the class 'integer64' which is returned from \code{as.integer64()}. Also it does not remove an existing class 'integer64' when assigning mode 'integer'. \item \strong{\code{\link[=storage.mode<-]{storage.mode<-()}}} does not support external data types such as \code{integer64} \item \strong{\code{\link[=matrix]{matrix()}}} does drop the 'integer64' class attribute. \item \strong{\code{\link[=array]{array()}}} does drop the 'integer64' class attribute. \itemize{ \item In current R versions (1.15.1) this can be circumvented by activating the function \code{as.vector.integer64()}. However, the CRAN maintainer has requested to remove \code{as.vector.integer64()}, even at the price of breaking previously working functionality of the package. } \item \strong{\code{\link[=str]{str()}}} does not print the values of \code{integer64} correctly } } \section{Further limitations}{ \itemize{ \item \strong{subscripting} non-existing elements and subscripting with \code{NA}s is currently not supported. Such subscripting currently returns \code{9218868437227407266} instead of \code{NA} (the \code{NA} value of the underlying double code). Following the full R behaviour here would either destroy performance or require extensive C-coding. } } \examples{ message("Using integer64 in vector") x <- integer64(8) # create 64 bit vector x is.atomic(x) # TRUE is.integer64(x) # TRUE is.numeric(x) # TRUE is.integer(x) # FALSE - debatable is.double(x) # FALSE - might change x[] <- 1:2 # assigned value is recycled as usual x[1:6] # subscripting as usual length(x) <- 13 # changing length as usual x rep(x, 2) # replicate as usual seq(as.integer64(1), 10) # seq.integer64 is dispatched on first given argument seq(to=as.integer64(10), 1) # seq.integer64 is dispatched on first given argument seq.integer64(along.with=x) # or call seq.integer64 directly # c.integer64 is dispatched only if *first* argument is integer64 ... x <- c(x,runif(length(x), max=100)) # ... and coerces everything to integer64 - including double x names(x) <- letters # use names as usual x message("Using integer64 in array - note that 'matrix' currently does not work") message("as.vector.integer64 removed as requested by the CRAN maintainer") message("as consequence 'array' also does not work anymore") message("we still can create a matrix or array by assigning 'dim'") y <- rep(as.integer64(NA), 12) dim(y) <- c(3,4) dimnames(y) <- list(letters[1:3], LETTERS[1:4]) y["a",] <- 1:2 # assigning as usual y y[1:2,-4] # subscripting as usual # cbind.integer64 dispatched on any argument and coerces everything to integer64 cbind(E=1:3, F=runif(3, 0, 100), G=c("-1","0","1"), y) message("Using integer64 in data.frame") str(as.data.frame(x)) str(as.data.frame(y)) str(data.frame(y)) str(data.frame(I(y))) d <- data.frame(x=x, y=runif(length(x), 0, 100)) d d$x message("Using integer64 with csv files") fi64 <- tempfile() write.csv(d, file=fi64, row.names=FALSE) e <- read.csv(fi64, colClasses=c("integer64", NA)) unlink(fi64) str(e) identical.integer64(d$x,e$x) message("Serializing and unserializing integer64") dput(d, fi64) e <- dget(fi64) identical.integer64(d$x,e$x) e <- d[,] save(e, file=fi64) rm(e) load(file=fi64) identical.integer64(d,e) \dontrun{ message("== Differences between integer64 and int64 ==") require(bit64) require(int64) message("-- integer64 is atomic --") is.atomic(integer64()) #is.atomic(int64()) str(integer64(3)) #str(int64(3)) message("-- The following performance numbers are measured under RWin64 --") message("-- under RWin32 the advantage of integer64 over int64 is smaller --") message("-- integer64 needs 7x/5x less RAM than int64 under 64/32 bit OS (and twice the RAM of integer as it should be) --") #as.vector(object.size(int64(1e6))/object.size(integer64(1e6))) as.vector(object.size(integer64(1e6))/object.size(integer(1e6))) message("-- integer64 creates 2000x/1300x faster than int64 under 64/32 bit OS (and 3x the time of integer) --") t32 <- system.time(integer(1e8)) t64 <- system.time(integer64(1e8)) #T64 <- system.time(int64(1e7))*10 # using 1e8 as above stalls our R on an i7 8 GB RAM Thinkpad #T64/t64 t64/t32 i32 <- sample(1e6) d64 <- as.double(i32) message("-- the following timings are rather conservative since timings of integer64 include garbage collection -- due to looped calls") message("-- integer64 coerces 900x/100x faster than int64 under 64/32 bit OS (and 2x the time of coercing to integer) --") t32 <- system.time(for(i in 1:1000)as.integer(d64)) t64 <- system.time(for(i in 1:1000)as.integer64(d64)) #T64 <- system.time(as.int64(d64))*1000 #T64/t64 t64/t32 td64 <- system.time(for(i in 1:1000)as.double(i32)) t64 <- system.time(for(i in 1:1000)as.integer64(i32)) #T64 <- system.time(for(i in 1:10)as.int64(i32))*100 #T64/t64 t64/td64 message("-- integer64 serializes 4x/0.8x faster than int64 under 64/32 bit OS (and less than 2x/6x the time of integer or double) --") t32 <- system.time(for(i in 1:10)serialize(i32, NULL)) td64 <- system.time(for(i in 1:10)serialize(d64, NULL)) i64 <- as.integer64(i32); t64 <- system.time(for(i in 1:10)serialize(i64, NULL)) rm(i64); gc() #I64 <- as.int64(i32); #T64 <- system.time(for(i in 1:10)serialize(I64, NULL)) #rm(I64); gc() #T64/t64 t64/t32 t64/td64 message("-- integer64 adds 250x/60x faster than int64 under 64/32 bit OS (and less than 6x the time of integer or double) --") td64 <- system.time(for(i in 1:100)d64+d64) t32 <- system.time(for(i in 1:100)i32+i32) i64 <- as.integer64(i32); t64 <- system.time(for(i in 1:100)i64+i64) rm(i64); gc() #I64 <- as.int64(i32); #T64 <- system.time(for(i in 1:10)I64+I64)*10 #rm(I64); gc() #T64/t64 t64/t32 t64/td64 message("-- integer64 sums 3x/0.2x faster than int64 (and at about 5x/60X the time of integer and double) --") td64 <- system.time(for(i in 1:100)sum(d64)) t32 <- system.time(for(i in 1:100)sum(i32)) i64 <- as.integer64(i32); t64 <- system.time(for(i in 1:100)sum(i64)) rm(i64); gc() #I64 <- as.int64(i32); #T64 <- system.time(for(i in 1:100)sum(I64)) #rm(I64); gc() #T64/t64 t64/t32 t64/td64 message("-- integer64 diffs 5x/0.85x faster than integer and double (int64 version 1.0 does not support diff) --") td64 <- system.time(for(i in 1:10)diff(d64, lag=2L, differences=2L)) t32 <- system.time(for(i in 1:10)diff(i32, lag=2L, differences=2L)) i64 <- as.integer64(i32); t64 <- system.time(for(i in 1:10)diff(i64, lag=2L, differences=2L)) rm(i64); gc() t64/t32 t64/td64 message("-- integer64 subscripts 1000x/340x faster than int64 (and at the same speed / 10x slower as integer) --") ts32 <- system.time(for(i in 1:1000)sample(1e6, 1e3)) t32<- system.time(for(i in 1:1000)i32[sample(1e6, 1e3)]) i64 <- as.integer64(i32); t64 <- system.time(for(i in 1:1000)i64[sample(1e6, 1e3)]) rm(i64); gc() #I64 <- as.int64(i32); #T64 <- system.time(for(i in 1:100)I64[sample(1e6, 1e3)])*10 #rm(I64); gc() #(T64-ts32)/(t64-ts32) (t64-ts32)/(t32-ts32) message("-- integer64 assigns 200x/90x faster than int64 (and 50x/160x slower than integer) --") ts32 <- system.time(for(i in 1:100)sample(1e6, 1e3)) t32 <- system.time(for(i in 1:100)i32[sample(1e6, 1e3)] <- 1:1e3) i64 <- as.integer64(i32); i64 <- system.time(for(i in 1:100)i64[sample(1e6, 1e3)] <- 1:1e3) rm(i64); gc() #I64 <- as.int64(i32); #I64 <- system.time(for(i in 1:10)I64[sample(1e6, 1e3)] <- 1:1e3)*10 #rm(I64); gc() #(T64-ts32)/(t64-ts32) (t64-ts32)/(t32-ts32) tdfi32 <- system.time(dfi32 <- data.frame(a=i32, b=i32, c=i32)) tdfsi32 <- system.time(dfi32[1e6:1,]) fi32 <- tempfile() tdfwi32 <- system.time(write.csv(dfi32, file=fi32, row.names=FALSE)) tdfri32 <- system.time(read.csv(fi32, colClasses=rep("integer", 3))) unlink(fi32) rm(dfi32); gc() i64 <- as.integer64(i32); tdfi64 <- system.time(dfi64 <- data.frame(a=i64, b=i64, c=i64)) tdfsi64 <- system.time(dfi64[1e6:1,]) fi64 <- tempfile() tdfwi64 <- system.time(write.csv(dfi64, file=fi64, row.names=FALSE)) tdfri64 <- system.time(read.csv(fi64, colClasses=rep("integer64", 3))) unlink(fi64) rm(i64, dfi64); gc() #I64 <- as.int64(i32); #tdfI64 <- system.time(dfI64<-data.frame(a=I64, b=I64, c=I64)) #tdfsI64 <- system.time(dfI64[1e6:1,]) #fI64 <- tempfile() #tdfwI64 <- system.time(write.csv(dfI64, file=fI64, row.names=FALSE)) #tdfrI64 <- system.time(read.csv(fI64, colClasses=rep("int64", 3))) #unlink(fI64) #rm(I64, dfI64); gc() message("-- integer64 coerces 40x/6x faster to data.frame than int64 (and factor 1/9 slower than integer) --") #tdfI64/tdfi64 tdfi64/tdfi32 message("-- integer64 subscripts from data.frame 20x/2.5x faster than int64 (and 3x/13x slower than integer) --") #tdfsI64/tdfsi64 tdfsi64/tdfsi32 message("-- integer64 csv writes about 2x/0.5x faster than int64 (and about 1.5x/5x slower than integer) --") #tdfwI64/tdfwi64 tdfwi64/tdfwi32 message("-- integer64 csv reads about 3x/1.5 faster than int64 (and about 2x slower than integer) --") #tdfrI64/tdfri64 tdfri64/tdfri32 rm(i32, d64); gc() message("-- investigating the impact on garbage collection: --") message("-- the fragmented structure of int64 messes up R's RAM --") message("-- and slows down R's gargbage collection just by existing --") td32 <- double(21) td32[1] <- system.time(d64 <- double(1e7))[3] for (i in 2:11)td32[i] <- system.time(gc(), gcFirst=FALSE)[3] rm(d64) for (i in 12:21)td32[i] <- system.time(gc(), gcFirst=FALSE)[3] t64 <- double(21) t64[1] <- system.time(i64 <- integer64(1e7))[3] for (i in 2:11)t64[i] <- system.time(gc(), gcFirst=FALSE)[3] rm(i64) for (i in 12:21)t64[i] <- system.time(gc(), gcFirst=FALSE)[3] #T64 <- double(21) #T64[1] <- system.time(I64 <- int64(1e7))[3] #for (i in 2:11)T64[i] <- system.time(gc(), gcFirst=FALSE)[3] #rm(I64) #for (i in 12:21)T64[i] <- system.time(gc(), gcFirst=FALSE)[3] #matplot(1:21, cbind(td32, t64, T64), pch=c("d","i","I"), log="y") matplot(1:21, cbind(td32, t64), pch=c("d","i"), log="y") } } \seealso{ \code{\link[=integer]{integer()}} in base R } \author{ \strong{Maintainer}: Michael Chirico \email{michaelchirico4@gmail.com} Authors: \itemize{ \item Jens Oehlschlägel } Other contributors: \itemize{ \item Leonardo Silvestri [contributor] \item Ofek Shilon [contributor] } } \keyword{classes} \keyword{internal} \keyword{manip} \keyword{package} bit64/man/as.character.integer64.Rd0000644000176200001440000000322414705122715016415 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{as.character.integer64} \alias{as.character.integer64} \alias{as.bitstring} \alias{as.double.integer64} \alias{as.integer.integer64} \alias{as.logical.integer64} \alias{as.bitstring.integer64} \alias{print.bitstring} \alias{as.list.integer64} \title{Coerce from integer64} \usage{ as.bitstring(x, ...) \method{as.double}{integer64}(x, keep.names = FALSE, ...) \method{as.integer}{integer64}(x, ...) \method{as.logical}{integer64}(x, ...) \method{as.character}{integer64}(x, ...) \method{as.bitstring}{integer64}(x, ...) \method{print}{bitstring}(x, ...) \method{as.list}{integer64}(x, ...) } \arguments{ \item{x}{an integer64 vector} \item{...}{further arguments to the \code{\link[=NextMethod]{NextMethod()}}} \item{keep.names}{FALSE, set to TRUE to keep a names vector} } \value{ \code{as.bitstring} returns a string of class 'bitstring'. The other methods return atomic vectors of the expected types } \description{ Methods to coerce integer64 to other atomic types. 'as.bitstring' coerces to a human-readable bit representation (strings of zeroes and ones). The methods \code{\link[=format]{format()}}, \code{\link[=as.character]{as.character()}}, \code{\link[=as.double]{as.double()}}, \code{\link[=as.logical]{as.logical()}}, \code{\link[=as.integer]{as.integer()}} do what you would expect. } \examples{ as.character(lim.integer64()) as.bitstring(lim.integer64()) as.bitstring(as.integer64(c( -2,-1,NA,0:2 ))) } \seealso{ \code{\link[=as.integer64.character]{as.integer64.character()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{manip} bit64/man/runif64.Rd0000644000176200001440000000254114705122715013547 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hash64.R \name{runif64} \alias{runif64} \title{integer64: random numbers} \usage{ runif64( n, min = lim.integer64()[1L], max = lim.integer64()[2L], replace = TRUE ) } \arguments{ \item{n}{length of return vector} \item{min}{lower inclusive bound for random numbers} \item{max}{upper inclusive bound for random numbers} \item{replace}{set to FALSE for sampleing from a finite pool, see \code{\link[=sample]{sample()}}} } \value{ a integer64 vector } \description{ Create uniform random 64-bit integers within defined range } \details{ For each random integer we call R's internal C interface \code{unif_rand()} twice. Each call is mapped to 2^32 unsigned integers. The two 32-bit patterns are concatenated to form the new integer64. This process is repeated until the result is not a \code{NA_INTEGER64_}. } \examples{ runif64(12) runif64(12, -16, 16) runif64(12, 0, as.integer64(2^60)-1) # not 2^60-1 ! var(runif(1e4)) var(as.double(runif64(1e4, 0, 2^40))/2^40) # ~ = 1/12 = .08333 table(sample(16, replace=FALSE)) table(runif64(16, 1, 16, replace=FALSE)) table(sample(16, replace=TRUE)) table(runif64(16, 1, 16, replace=TRUE)) } \seealso{ \code{\link[=runif]{runif()}}, \code{\link[=hashfun]{hashfun()}} } \keyword{classes} \keyword{distribution} \keyword{sysdata} bit64/man/sortnut.Rd0000644000176200001440000002034414705122715013771 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sortuse64.R \name{sortnut} \alias{sortnut} \alias{sortnut.integer64} \alias{ordernut} \alias{ordernut.integer64} \alias{sortfin} \alias{sortfin.integer64} \alias{orderfin} \alias{orderfin.integer64} \alias{orderpos} \alias{orderpos.integer64} \alias{sortorderpos} \alias{sortorderpos.integer64} \alias{orderdup} \alias{orderdup.integer64} \alias{sortorderdup} \alias{sortorderdup.integer64} \alias{sortuni} \alias{sortuni.integer64} \alias{orderuni} \alias{orderuni.integer64} \alias{sortorderuni} \alias{sortorderuni.integer64} \alias{orderupo} \alias{orderupo.integer64} \alias{sortorderupo} \alias{sortorderupo.integer64} \alias{ordertie} \alias{ordertie.integer64} \alias{sortordertie} \alias{sortordertie.integer64} \alias{sorttab} \alias{sorttab.integer64} \alias{ordertab} \alias{ordertab.integer64} \alias{sortordertab} \alias{sortordertab.integer64} \alias{orderkey} \alias{orderkey.integer64} \alias{sortorderkey} \alias{sortorderkey.integer64} \alias{orderrnk} \alias{orderrnk.integer64} \alias{sortorderrnk} \alias{sortorderrnk.integer64} \alias{sortqtl} \alias{sortqtl.integer64} \alias{orderqtl} \alias{orderqtl.integer64} \title{Searching and other uses of sorting for 64bit integers} \usage{ sortnut(sorted, ...) \method{sortnut}{integer64}(sorted, ...) ordernut(table, order, ...) \method{ordernut}{integer64}(table, order, ...) sortfin(sorted, x, ...) \method{sortfin}{integer64}(sorted, x, method = NULL, ...) orderfin(table, order, x, ...) \method{orderfin}{integer64}(table, order, x, method = NULL, ...) orderpos(table, order, x, ...) \method{orderpos}{integer64}(table, order, x, nomatch = NA, method = NULL, ...) sortorderpos(sorted, order, x, ...) \method{sortorderpos}{integer64}(sorted, order, x, nomatch = NA, method = NULL, ...) orderdup(table, order, ...) \method{orderdup}{integer64}(table, order, method = NULL, ...) sortorderdup(sorted, order, ...) \method{sortorderdup}{integer64}(sorted, order, method = NULL, ...) sortuni(sorted, nunique, ...) \method{sortuni}{integer64}(sorted, nunique, ...) orderuni(table, order, nunique, ...) \method{orderuni}{integer64}(table, order, nunique, keep.order = FALSE, ...) sortorderuni(table, sorted, order, nunique, ...) \method{sortorderuni}{integer64}(table, sorted, order, nunique, ...) orderupo(table, order, nunique, ...) \method{orderupo}{integer64}(table, order, nunique, keep.order = FALSE, ...) sortorderupo(sorted, order, nunique, keep.order = FALSE, ...) \method{sortorderupo}{integer64}(sorted, order, nunique, keep.order = FALSE, ...) ordertie(table, order, nties, ...) \method{ordertie}{integer64}(table, order, nties, ...) sortordertie(sorted, order, nties, ...) \method{sortordertie}{integer64}(sorted, order, nties, ...) sorttab(sorted, nunique, ...) \method{sorttab}{integer64}(sorted, nunique, ...) ordertab(table, order, nunique, ...) \method{ordertab}{integer64}(table, order, nunique, denormalize = FALSE, keep.order = FALSE, ...) sortordertab(sorted, order, ...) \method{sortordertab}{integer64}(sorted, order, denormalize = FALSE, ...) orderkey(table, order, na.skip.num = 0L, ...) \method{orderkey}{integer64}(table, order, na.skip.num = 0L, ...) sortorderkey(sorted, order, na.skip.num = 0L, ...) \method{sortorderkey}{integer64}(sorted, order, na.skip.num = 0L, ...) orderrnk(table, order, na.count, ...) \method{orderrnk}{integer64}(table, order, na.count, ...) sortorderrnk(sorted, order, na.count, ...) \method{sortorderrnk}{integer64}(sorted, order, na.count, ...) sortqtl(sorted, na.count, probs, ...) \method{sortqtl}{integer64}(sorted, na.count, probs, ...) orderqtl(table, order, na.count, probs, ...) \method{orderqtl}{integer64}(table, order, na.count, probs, ...) } \arguments{ \item{sorted}{a sorted \code{\link{integer64}} vector} \item{...}{further arguments, passed from generics, ignored in methods} \item{table}{the original data with original order under the sorted vector} \item{order}{an \code{\link{integer}} order vector that turns 'table' into 'sorted'} \item{x}{an \code{\link{integer64}} vector} \item{method}{see Details} \item{nomatch}{the value to be returned if an element is not found in the hashmap} \item{nunique}{number of unique elements, usually we get this from cache or call \code{sortnut} or \code{ordernut}} \item{keep.order}{determines order of results and speed: \code{FALSE} (the default) is faster and returns in sorted order, \code{TRUE} returns in the order of first appearance in the original data, but this requires extra work} \item{nties}{number of tied values, usually we get this from cache or call \code{sortnut} or \code{ordernut}} \item{denormalize}{FALSE returns counts of unique values, TRUE returns each value with its counts} \item{na.skip.num}{0 or the number of \code{NA}s. With 0, \code{NA}s are coded with 1L, with the number of \code{NA}s, these are coded with \code{NA}} \item{na.count}{the number of \code{NA}s, needed for this low-level function algorithm} \item{probs}{vector of probabilities in \verb{[0..1]} for which we seek quantiles} } \value{ see details } \description{ This is roughly an implementation of hash functionality but based on sorting instead on a hashmap. Since sorting is more informative than hashing we can do some more interesting things. } \details{ \tabular{rrrrl}{ \strong{sortfun} \tab \strong{orderfun} \tab \strong{sortorderfun} \tab \strong{see also} \tab \strong{description} \cr \code{sortnut} \tab \code{ordernut} \tab \tab \tab return number of tied and of unique values \cr \code{sortfin} \tab \code{orderfin} \tab \tab \code{\link{\%in\%.integer64}} \tab return logical whether \code{x} is in \code{table} \cr \tab \code{orderpos} \tab \code{sortorderpos} \tab \code{\link[=match.integer64]{match()}} \tab return positions of \code{x} in \code{table} \cr \tab \code{orderdup} \tab \code{sortorderdup} \tab \code{\link[=duplicated.integer64]{duplicated()}} \tab return logical whether values are duplicated \cr \code{sortuni} \tab \code{orderuni} \tab \code{sortorderuni} \tab \code{\link[=unique.integer64]{unique()}} \tab return unique values (=dimensiontable) \cr \tab \code{orderupo} \tab \code{sortorderupo} \tab \code{\link[=unique.integer64]{unique()}} \tab return positions of unique values \cr \tab \code{ordertie} \tab \code{sortordertie} \tab \tab return positions of tied values \cr \tab \code{orderkey} \tab \code{sortorderkey} \tab \tab positions of values in vector of unique values (match in dimensiontable) \cr \code{sorttab} \tab \code{ordertab} \tab \code{sortordertab} \tab \code{\link[=table.integer64]{table()}} \tab tabulate frequency of values \cr \tab \code{orderrnk} \tab \code{sortorderrnk} \tab \tab rank averaging ties \cr \code{sortqtl} \tab \code{orderqtl} \tab \tab \tab return quantiles given probabilities \cr } The functions \code{sortfin}, \code{orderfin}, \code{orderpos} and \code{sortorderpos} each offer three algorithms for finding \code{x} in \code{table}. With \code{method=1L} each value of \code{x} is searched independently using \emph{binary search}, this is fastest for small \code{table}s. With \code{method=2L} the values of \code{x} are first sorted and then searched using \emph{doubly exponential search}, this is the best allround method. With \code{method=3L} the values of \code{x} are first sorted and then searched using simple merging, this is the fastest method if \code{table} is huge and \code{x} has similar size and distribution of values. With \code{method=NULL} the functions use a heuristic to determine the fastest algorithm. The functions \code{orderdup} and \code{sortorderdup} each offer two algorithms for setting the truth values in the return vector. With \code{method=1L} the return values are set directly which causes random write access on a possibly large return vector. With \code{method=2L} the return values are first set in a smaller bit-vector -- random access limited to a smaller memory region -- and finally written sequentially to the logical output vector. With \code{method=NULL} the functions use a heuristic to determine the fastest algorithm. } \examples{ message("check the code of 'optimizer64' for examples:") print(optimizer64) } \seealso{ \code{\link[=match.integer64]{match()}} } \keyword{manip} \keyword{programming} bit64/man/keypos.Rd0000644000176200001440000000276214705122715013571 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/highlevel64.R \name{keypos} \alias{keypos} \alias{keypos.integer64} \title{Extract Positions in redundant dimension table} \usage{ keypos(x, ...) \method{keypos}{integer64}(x, method = NULL, ...) } \arguments{ \item{x}{a vector or a data frame or an array or \code{NULL}.} \item{...}{ignored} \item{method}{NULL for automatic method selection or a suitable low-level method, see details} } \value{ an integer vector of the same length as \code{x} containing positions relative to \code{sort(unique(x), na.last=FALSE)} } \description{ \code{keypos} returns the positions of the (fact table) elements that participate in their sorted unique subset (dimension table) } \details{ NAs are sorted first in the dimension table, see \code{\link[=ramorder.integer64]{ramorder.integer64()}}. This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. Suitable methods are \itemize{ \item \code{\link{sortorderkey}} (fast ordering) \item \code{\link{orderkey}} (memory saving ordering). } } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) keypos(x) stopifnot(identical(keypos(x), match.integer64(x, sort(unique(x), na.last=FALSE)))) } \seealso{ \code{\link[=unique.integer64]{unique.integer64()}} for the unique subset and \code{\link[=match.integer64]{match.integer64()}} for finding positions in a different vector. } \keyword{manip} \keyword{univar} bit64/man/tiepos.Rd0000644000176200001440000000267314705122715013563 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/highlevel64.R \name{tiepos} \alias{tiepos} \alias{tiepos.integer64} \title{Extract Positions of Tied Elements} \usage{ tiepos(x, ...) \method{tiepos}{integer64}(x, nties = NULL, method = NULL, ...) } \arguments{ \item{x}{a vector or a data frame or an array or \code{NULL}.} \item{...}{ignored} \item{nties}{NULL or the number of tied values (including NA). Providing \code{nties} can speed-up when \code{x} has no cache. Note that a wrong nties can cause undefined behaviour up to a crash.} \item{method}{NULL for automatic method selection or a suitable low-level method, see details} } \value{ an integer vector of positions } \description{ \code{tiepos} returns the positions of those elements that participate in ties. } \details{ This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. Suitable methods are \itemize{ \item \code{\link{sortordertie}} (fast ordering) \item \code{\link{ordertie}} (memory saving ordering). } } \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) tiepos(x) stopifnot(identical(tiepos(x), (1:length(x))[duplicated(x) | rev(duplicated(rev(x)))])) } \seealso{ \code{\link[=rank.integer64]{rank.integer64()}} for possibly tied ranks and \code{\link[=unipos.integer64]{unipos.integer64()}} for positions of unique values. } \keyword{manip} \keyword{univar} bit64/man/cache.Rd0000644000176200001440000000634714705122715013325 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cache.R \name{cache} \alias{cache} \alias{newcache} \alias{jamcache} \alias{setcache} \alias{getcache} \alias{remcache} \alias{print.cache} \title{Atomic Caching} \usage{ newcache(x) jamcache(x) cache(x) setcache(x, which, value) getcache(x, which) remcache(x) \method{print}{cache}(x, all.names = FALSE, pattern, ...) } \arguments{ \item{x}{an integer64 vector (or a cache object in case of \code{print.cache})} \item{which}{A character naming the object to be retrieved from the cache or to be stored in the cache} \item{value}{An object to be stored in the cache} \item{all.names, pattern}{passed to \code{\link[=ls]{ls()}} when listing the cache content} \item{...}{ignored} } \value{ See details } \description{ Functions for caching results attached to atomic objects } \details{ A \code{cache} is an \code{\link{environment}} attached to an atomic object with the \code{\link[=attr]{attribute}} name 'cache'. It contains at least a reference to the atomic object that carries the cache. This is used when accessing the cache to detect whether the object carrying the cache has been modified meanwhile. } \section{Functions}{ \itemize{ \item \code{newcache()}: creates a new cache referencing \code{x} \item \code{jamcache()}: forces \code{x} to have a cache \item \code{cache()}: returns the cache attached to \code{x} if it is not found to be outdated \item \code{setcache()}: assigns a value into the cache of \code{x} \item \code{getcache()}: gets cache value 'which' from \code{x} \item \code{remcache()}: removes the cache from \code{x} }} \examples{ x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE)) y <- x still.identical(x,y) y[1] <- NA still.identical(x,y) mycache <- newcache(x) ls(mycache) mycache rm(mycache) jamcache(x) cache(x) x[1] <- NA cache(x) getcache(x, "abc") setcache(x, "abc", 1) getcache(x, "abc") remcache(x) cache(x) } \seealso{ \code{\link[bit:still.identical]{bit::still.identical()}} for testing whether to symbols point to the same RAM. Functions that get and set small cache-content automatically when a cache is present: \code{\link[bit:Metadata]{bit::na.count()}}, \code{\link[bit:Metadata]{bit::nvalid()}}, \code{\link[bit:Metadata]{bit::is.sorted()}}, \code{\link[bit:Metadata]{bit::nunique()}} and \code{\link[bit:Metadata]{bit::nties()}} Setting big caches with a relevant memory footprint requires a conscious decision of the user: \code{\link{hashcache}}, \code{\link{sortcache}}, \code{\link{ordercache}}, \code{\link{sortordercache}} Functions that use big caches: \code{\link[=match.integer64]{match.integer64()}}, \code{\link{\%in\%.integer64}}, \code{\link[=duplicated.integer64]{duplicated.integer64()}}, \code{\link[=unique.integer64]{unique.integer64()}}, \code{\link[=unipos]{unipos()}}, \code{\link[=table.integer64]{table.integer64()}}, \code{\link[=keypos]{keypos()}}, \code{\link[=tiepos]{tiepos()}}, \code{\link[=rank.integer64]{rank.integer64()}}, \code{\link[=prank]{prank()}}, \code{\link[=qtile]{qtile()}}, \code{\link[=quantile.integer64]{quantile.integer64()}}, \code{\link[=median.integer64]{median.integer64()}}, and \code{\link[=summary.integer64]{summary.integer64()}} } \keyword{environment} bit64/man/c.integer64.Rd0000644000176200001440000000245214705122715014303 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/integer64.R \name{c.integer64} \alias{c.integer64} \alias{cbind.integer64} \alias{rbind.integer64} \title{Concatenating integer64 vectors} \usage{ \method{c}{integer64}(..., recursive = FALSE) \method{cbind}{integer64}(...) \method{rbind}{integer64}(...) } \arguments{ \item{...}{two or more arguments coerced to 'integer64' and passed to \code{\link[=NextMethod]{NextMethod()}}} \item{recursive}{logical. If \code{recursive = TRUE}, the function recursively descends through lists (and pairlists) combining all their elements into a vector.} } \value{ \code{\link[=c]{c()}} returns a integer64 vector of the total length of the input \code{\link[=cbind]{cbind()}} and \code{\link[=rbind]{rbind()}} return a integer64 matrix } \description{ The ususal functions 'c', 'cbind' and 'rbind' } \note{ R currently only dispatches generic 'c' to method 'c.integer64' if the first argument is 'integer64' } \examples{ c(as.integer64(1), 2:6) cbind(1:6, as.integer(1:6)) rbind(1:6, as.integer(1:6)) } \seealso{ \code{\link[=rep.integer64]{rep.integer64()}} \code{\link[=seq.integer64]{seq.integer64()}} \code{\link[=as.data.frame.integer64]{as.data.frame.integer64()}} \code{\link[=integer64]{integer64()}} } \keyword{classes} \keyword{manip} bit64/DESCRIPTION0000644000176200001440000000324614742226407012725 0ustar liggesusersPackage: bit64 Title: A S3 Class for Vectors of 64bit Integers Version: 4.6.0-1 Authors@R: c( person("Michael", "Chirico", email = "michaelchirico4@gmail.com", role = c("aut", "cre")), person("Jens", "Oehlschlägel", role = "aut"), person("Leonardo", "Silvestri", role = "ctb"), person("Ofek", "Shilon", role = "ctb") ) Depends: R (>= 3.4.0), bit (>= 4.0.0) Description: Package 'bit64' provides serializable S3 atomic 64bit (signed) integers. These are useful for handling database keys and exact counting in +-2^63. WARNING: do not use them as replacement for 32bit integers, integer64 are not supported for subscripting by R-core and they have different semantics when combined with double, e.g. integer64 + double => integer64. Class integer64 can be used in vectors, matrices, arrays and data.frames. Methods are available for coercion from and to logicals, integers, doubles, characters and factors as well as many elementwise and summary functions. Many fast algorithmic operations such as 'match' and 'order' support inter- active data exploration and manipulation and optionally leverage caching. License: GPL-2 | GPL-3 LazyLoad: yes ByteCompile: yes URL: https://github.com/r-lib/bit64 Encoding: UTF-8 Imports: graphics, methods, stats, utils Suggests: testthat (>= 3.0.3), withr Config/testthat/edition: 3 Config/needs/development: testthat RoxygenNote: 7.3.2 NeedsCompilation: yes Packaged: 2025-01-16 14:04:20 UTC; michael Author: Michael Chirico [aut, cre], Jens Oehlschlägel [aut], Leonardo Silvestri [ctb], Ofek Shilon [ctb] Maintainer: Michael Chirico Repository: CRAN Date/Publication: 2025-01-16 16:00:07 UTC