bit/0000755000176000001440000000000013620225451011060 5ustar ripleyusersbit/NAMESPACE0000755000176000001440000001250713264142560012312 0ustar ripleyusers# Namespace for bit (currently exporting (almost) everything in order to facilitate debugging) # (c) 2009 Jens Oehlschägel # Licence: GPL2 # Created: 2009-10-25 # Last changed: 2009-10-25 useDynLib(bit) importFrom(utils, packageDescription) export( # == this is a complete list of R functions and metadata of this package sorted by filenames, non-exported functions are commented away == # -- attrutil.R - modify attributes inplace without memory copying -- "setattr" , "setattributes" , "unattr" #, "named" # -- clone.R - deep copying -- , "clone.default" , "clone.list" , "still.identical" # -- generics.R - new generics for bit and ff -- ,"as.bit" ,"as.bitwhich" ,"as.which" ,"xor" ,"physical" ,"virtual" ,"clone" ,"ramsort" ,"shellsort" ,"quicksort" ,"mergesort" ,"radixsort" ,"keysort" ,"ramorder" ,"shellorder" ,"quickorder" ,"mergeorder" ,"radixorder" ,"keyorder" ,"ramsortorder" ,"shellsortorder" ,"quicksortorder" ,"mergesortorder" ,"radixsortorder" ,"keysortorder" ,"is.sorted" ,"na.count" ,"nvalid" ,"nunique" ,"nties" ,"is.sorted<-" ,"na.count<-" ,"nunique<-" ,"nties<-" # -- bit.R - 1-bit boolean vectors for R -- ,"!.bit" ,"!.bitwhich" ,"!=.bit" ,"!=.bitwhich" ,"&.bit" ,"&.bitwhich" ,"[.bit" ,"[[.bit" ,"[[<-.bit" ,"[<-.bit" ,"|.bit" ,"|.bitwhich" ,"==.bit" ,"==.bitwhich" ,"all.bit" ,"all.bitwhich" ,"all.ri" ,"any.bit" ,"any.bitwhich" ,"any.ri" ,"as.bit" ,"as.bit.bit" ,"as.bit.bitwhich" ,"as.bit.double" ,"as.bit.integer" ,"as.bit.logical" ,"as.bit.ri" ,"as.bit.which" ,"as.bitwhich" ,"as.bitwhich.bit" ,"as.bitwhich.bitwhich" ,"as.bitwhich.double" ,"as.bitwhich.integer" ,"as.bitwhich.logical" ,"as.bitwhich.ri" ,"as.bitwhich.which" ,"as.double.bit" ,"as.double.bitwhich" ,"as.double.ri" ,"as.integer.bit" ,"as.integer.bitwhich" ,"as.integer.ri" ,"as.logical.bit" ,"as.logical.bitwhich" ,"as.logical.ri" ,"as.which" ,"as.which.bit" ,"as.which.bitwhich" ,"as.which.default" ,"as.which.ri" ,"bit" ,"bit_done" ,"bit_init" ,"bitwhich" ,"c.bit" ,"c.bitwhich" ,"is.bit" ,"is.bitwhich" ,"is.ri" ,"length.bit" ,"length.bitwhich" ,"length.ri" ,"length<-.bit" ,"length<-.bitwhich" ,"max.bit" ,"max.bitwhich" ,"max.ri" ,"min.bit" ,"min.bitwhich" ,"min.ri" ,"physical" ,"physical<-" ,"physical.default" ,"physical<-.default" ,"print.bit" ,"print.bitwhich" ,"print.physical" ,"print.ri" ,"print.virtual" ,"range.bit" ,"range.bitwhich" ,"range.ri" ,"regtest.bit" ,"ri" ,"sum.bit" ,"sum.bitwhich" ,"sum.ri" ,"summary.bit" ,"summary.bitwhich" ,"summary.ri" ,"virtual" ,"virtual<-" ,"virtual.default" ,"virtual<-.default" ,"xor.bit" ,"xor.bitwhich" ,"xor.default" # -- rle.R - rle utilities for bit and ff -- ,"intisasc" ,"intisdesc" ,"intrle" ,"rlepack" ,"rlepack.integer" ,"rleunpack" ,"rleunpack.rlepack" ,"rev.rlepack" ,"unique.rlepack" ,"anyDuplicated.rlepack" # -- Chunking utilities for bit and ff -- ,"bbatch" ,"chunk" ,"chunk.default" ,"repfromto" ,"repfromto<-" ,"vecseq" # -- timing utilities for bit and ff -- ,"repeat.time" # -- zzz.R -- #,.Last.lib ) # -- bit.R - 1-bit boolean vectors for R -- S3method("!", bit) S3method("!", bitwhich) S3method("!=", bit) S3method("!=", bitwhich) S3method("&", bit) S3method("&", bitwhich) S3method("[", bit) S3method("[[", bit) S3method("[[<-", bit) S3method("[<-", bit) S3method("|", bit) S3method("|", bitwhich) S3method("==", bit) S3method("==", bitwhich) S3method("all", bit) S3method("all", bitwhich) S3method("all", ri) S3method("any", bit) S3method("any", bitwhich) S3method("any", ri) S3method("as.bit", bit) S3method("as.bit", bitwhich) S3method("as.bit", double) S3method("as.bit", integer) S3method("as.bit", logical) S3method("as.bit", ri) S3method("as.bit", which) S3method("as.bitwhich", bit) S3method("as.bitwhich", bitwhich) S3method("as.bitwhich", double) S3method("as.bitwhich", integer) S3method("as.bitwhich", logical) S3method("as.bitwhich", ri) S3method("as.bitwhich", which) S3method("as.double", bit) S3method("as.double", bitwhich) S3method("as.double", ri) S3method("as.integer", bit) S3method("as.integer", bitwhich) S3method("as.integer", ri) S3method("as.logical", bit) S3method("as.logical", bitwhich) S3method("as.logical", ri) S3method("as.which", bit) S3method("as.which", bitwhich) S3method("as.which", default) S3method("as.which", ri) S3method("c", bit) S3method("c", bitwhich) S3method("length", bit) S3method("length", bitwhich) S3method("length", ri) S3method("length<-", bit) S3method("length<-", bitwhich) S3method("max", bit) S3method("max", bitwhich) S3method("max", ri) S3method("min", bit) S3method("min", bitwhich) S3method("min", ri) S3method("physical", default) S3method("physical<-", default) S3method("print", bit) S3method("print", bitwhich) S3method("print", physical) S3method("print", ri) S3method("print", virtual) S3method("range", bit) S3method("range", bitwhich) S3method("range", ri) S3method("sum", bit) S3method("sum", bitwhich) S3method("sum", ri) S3method("summary", bit) S3method("summary", bitwhich) S3method("summary", ri) S3method("virtual", default) S3method("virtual<-", default) S3method("xor", bit) S3method("xor", bitwhich) S3method("xor", default) # -- rle.R - rle utilities for bit and ff -- S3method("rlepack", integer) S3method("rleunpack", rlepack) S3method("rev", rlepack) S3method("unique", rlepack) S3method("anyDuplicated", rlepack) # -- Chunking utilities for bit and ff -- S3method(chunk, default) # -- clone.R - deep copying -- S3method(clone, default) S3method(clone, list) bit/exec/0000755000176000001440000000000013072200400011770 5ustar ripleyusersbit/exec/make_rd.pl0000755000176000001440000000161513072200400013735 0ustar ripleyusers# reads the standard input line by line and writes out all lines # that begin with "#!". The output is splitted into several output # files as follows: Every time a line of the format "#! \name{} # is encountered, a file with the name ".Rd" is created and # the output is written into it (until the next line with this format # is found). Thus, the first line beginning with "#!" must of this # type, because otherwise the script would not know where to write # the output to. my $open = 0; while() { $line = $_; if( $line =~ /^#! ?(.*)/ ) { $line = $1; if( $line =~ /\\name{(.*)}/ ) { $f = $1; if( $open ) { close( OUT ); } open( OUT, ">$f.rd" ); $open = "true"; } if( $open ) { print OUT $line . "\n"; } } } close(OUT); bit/exec/prebuild.sh0000755000176000001440000000056113072200400014137 0ustar ripleyusers#!/bin/sh # Produce the Rd-files for the documentation from the R source files # # Prerequisites: # - Perl # - R_HOME must be set to the directory where R is installed echo "#### starting prebuild.sh" cd .. mkdir -p man cd man find ../R -name '*.[rR]' -exec cat \{\} \; | perl ../exec/make_rd.pl cd ../exec echo "#### prebuild.sh completed!" bit/man/0000755000176000001440000000000013072200400011617 5ustar ripleyusersbit/man/clone.rd0000755000176000001440000000200713264143261013266 0ustar ripleyusers\name{clone} \alias{clone} \alias{clone.list} \alias{clone.default} \alias{still.identical} \title{ Cloning ff and ram objects } \description{ \command{clone} physically duplicates objects and can additionally change some features, e.g. length. } \usage{ clone(x, \dots) \method{clone}{list}(x, \dots) \method{clone}{default}(x, \dots) still.identical(x, y) } \arguments{ \item{x}{ \code{x} } \item{y}{ \code{y} } \item{\dots}{ further arguments to the generic } } \details{ \command{clone} is generic. \command{clone.default} currently only handles atomics. \command{clone.list} recursively clones list elements. \command{still.identical} returns TRUE if the two atomic arguments still point to the same memory. } \value{ an object that is a deep copy of x } \author{ Jens Oehlschlägel } \seealso{ \code{\link[ff]{clone.ff}} } \examples{ x <- 1:12 y <- x still.identical(x,y) y[1] <- y[1] still.identical(x,y) y <- clone(x) still.identical(x,y) rm(x,y); gc() } \keyword{ IO } \keyword{ data } bit/man/bitwhich.rd0000755000176000001440000000311413264143261013767 0ustar ripleyusers\name{bitwhich} \alias{bitwhich} \alias{print.bitwhich} \title{ A class for vectors representing asymetric selections } \description{ A bitwhich object like the result of \code{\link{which}} and \code{\link{as.which}} does represent integer subscript positions, but bitwhich objects represent some subscripts rather with negative integers, if this needs less space. The extreme cases of selecting all/none subscripts are represented by TRUE/FALSE. This needs less RAM compared to \code{\link{logical}} (and often less than \code{\link{as.which}}). Logical operations are fast if the selection is asymetric (only few or almost all selected). } \usage{ bitwhich(maxindex, poslength = NULL, x = NULL) } \arguments{ \item{maxindex}{ the length of the vector (sum of all TRUEs and FALSEs) } \item{poslength}{ Only use if x is not NULL: the sum of all TRUEs } \item{x}{ Default NULL or FALSE or unique negative integers or unique positive integers or TRUE} } \value{ An object of class 'bitwhich' carrying two attributes \item{maxindex}{ see above } \item{poslength}{ see above } } \details{ class 'bitwhich' represents a boolean selection in one of the following ways \itemize{ \item FALSE to select nothing \item TRUE to select everything \item unique positive integers to select those \item unique negative integers to exclude those } } \author{ Jens Oehlschlägel } \seealso{ \code{\link{as.bitwhich}}, \code{\link{as.which}}, \code{\link{bit}} } \examples{ bitwhich(12, x=c(1,3), poslength=2) bitwhich(12, x=-c(1,3), poslength=10) } \keyword{ classes } \keyword{ logic } bit/man/ri.rd0000755000176000001440000000123313264143261012600 0ustar ripleyusers\name{ri} \alias{ri} \alias{print.ri} \title{ Range index } \description{ A range index can be used to extract or replace a continuous ascending part of the data } \usage{ ri(from, to = NULL, maxindex=NA) \method{print}{ri}(x, \dots) } \arguments{ \item{from}{ first position } \item{to}{ last posistion } \item{x}{ an object of class 'ri' } \item{maxindex}{ the maximal length of the object-to-be-subscripted (if known) } \item{\dots}{ further arguments } } \value{ A two element integer vector with class 'ri' } \author{ Jens Oehlschlägel } \seealso{ \code{\link[ff]{as.hi.ri}} } \examples{ bit(12)[ri(1,6)] } \keyword{ classes } \keyword{ logic } bit/man/bbatch.rd0000755000176000001440000000133313264143261013412 0ustar ripleyusers\name{bbatch} \alias{bbatch} \title{ Balanced Batch sizes } \description{ \command{bbatch} calculates batch sizes so that they have rather balanced sizes than very different sizes } \usage{ bbatch(N, B) } \arguments{ \item{N}{ total size } \item{B}{ desired batch size } } \value{ a list with components \item{ b }{ the batch size } \item{ nb }{ the number of batches } \item{ rb }{ the size of the rest } } \details{ Tries to have \code{rb==0} or \code{rb} as close to \code{b} as possible while guaranteing that \code{rb < b && (b - rb) <= min(nb, b)} } \author{ Jens Oehlschlägel } \seealso{ \code{\link{repfromto}}, \code{\link[ff]{ffvecapply}} } \examples{ bbatch(100, 24) } \keyword{ IO } \keyword{ data } bit/man/vecseq.rd0000755000176000001440000000314613264143261013461 0ustar ripleyusers\name{vecseq} \alias{vecseq} \title{ Vectorized Sequences } \description{ \command{vecseq} returns concatenated multiple sequences } \usage{ vecseq(x, y=NULL, concat=TRUE, eval=TRUE) } \arguments{ \item{x}{ vector of sequence start points } \item{y}{ vector of sequence end points (if \code{is.null(y)} then \code{x} are taken as endpoints, all starting at 1) } \item{concat}{ vector of sequence end points (if \code{is.null(y)} then \code{x} are taken as endpoints, all starting at 1) } \item{eval}{ vector of sequence end points (if \code{is.null(y)} then \code{x} are taken as endpoints, all starting at 1) } } \details{ This is a generalization of \code{\link{sequence}} in that you can choose sequence starts other than 1 and also have options to no concat and/or return a call instead of the evaluated sequence. } \value{ if \code{concat==FALSE} and \code{eval==FALSE} a list with n calls that generate sequences \cr if \code{concat==FALSE} and \code{eval==TRUE } a list with n sequences \cr if \code{concat==TRUE } and \code{eval==FALSE} a single call generating the concatenated sequences \cr if \code{concat==TRUE } and \code{eval==TRUE } an integer vector of concatentated sequences } \author{ Angelo Canty, Jens Oehlschlägel } \seealso{ \code{\link{:}}, \code{\link{seq}}, \code{\link{sequence}} } \examples{ sequence(c(3,4)) vecseq(c(3,4)) vecseq(c(1,11), c(5, 15)) vecseq(c(1,11), c(5, 15), concat=FALSE, eval=FALSE) vecseq(c(1,11), c(5, 15), concat=FALSE, eval=TRUE) vecseq(c(1,11), c(5, 15), concat=TRUE, eval=FALSE) vecseq(c(1,11), c(5, 15), concat=TRUE, eval=TRUE) } \keyword{ manip } bit/man/Extract.rd0000755000176000001440000000247013264143261013604 0ustar ripleyusers\name{Extract} \alias{[[.bit} \alias{[[<-.bit} \alias{[.bit} \alias{[<-.bit} \title{ Extract or replace part of an bit vector } \description{ Operators acting on bit objects to extract or replace parts. } \usage{ \method{[[}{bit}(x, i) \method{[[}{bit}(x, i) <- value \method{[}{bit}(x, i) \method{[}{bit}(x, i) <- value } \arguments{ \item{x}{ a bit object } \item{i}{ positive integer subscript } \item{value}{ new logical or integer values } } \details{ Since this package was created for high performance purposes, only positive integer subscripts make sense. Negative subscripts are converted to positive ones, beware the RAM consumption. Further subscript classes allowed for '[' and '[<-' are range indices \code{\link{ri}} and \code{\link{bitwhich}}. The '[' and '[<-' methods don't check whether the subscripts are positive integers in the allowed range. } \value{ The extractors \code{[[} and \code{[} return a logical scalar or vector. The replacment functions return a bit object. } \author{ Jens Oehlschlägel } \seealso{ \code{\link{bit}}, \code{\link{Extract}} } \examples{ x <- as.bit(c(FALSE, NA, TRUE)) x[] <- c(FALSE, NA, TRUE) x[1:2] x[-3] x[ri(1,2)] x[as.bitwhich(c(TRUE,TRUE,FALSE))] x[[1]] x[] <- TRUE x[1:2] <- FALSE x[[1]] <- TRUE } \keyword{ classes } \keyword{ logic } bit/man/bit-package.rd0000755000176000001440000003700213264143261014340 0ustar ripleyusers\name{bit-package} \alias{bit-package} \alias{bit} \alias{print.bit} \docType{package} \title{ A class for vectors of 1-bit booleans } \description{ Package 'bit' provides bitmapped vectors of booleans (no NAs), coercion from and to logicals, integers and integer subscripts; fast boolean operators and fast summary statistics. \cr With bit vectors you can store true binary booleans \{FALSE,TRUE\} at the expense of 1 bit only, on a 32 bit architecture this means factor 32 less RAM and factor 32 more speed on boolean operations. With this speed gain it even pays-off to convert to bit in order to avoid a single boolean operation on logicals or a single set operation on (longer) integer subscripts, the pay-off is dramatic when such components are used more than once. \cr Reading from and writing to bit is approximately as fast as accessing standard logicals - mostly due to R's time for memory allocation. The package allows to work with pre-allocated memory for return values by calling .Call() directly: when evaluating the speed of C-access with pre-allocated vector memory, coping from bit to logical requires only 70\% of the time for copying from logical to logical; and copying from logical to bit comes at a performance penalty of 150\%. \cr Since bit objects cannot be used as subsripts in R, a second class 'bitwhich' allows to store selections as efficiently as possible with standard R types. This is usefull either to represent parts of bit objects or to represent very asymetric selections. \cr Class 'ri' (range index) allows to select ranges of positions for chunked processing: all three classes 'bit', 'bitwhich' and 'ri' can be used for subsetting 'ff' objects (ff-2.1.0 and higher). } \usage{ bit(length) \method{print}{bit}(x, \dots) } \arguments{ \item{length}{ length of vector in bits } \item{x}{ a bit vector } \item{\dots}{ further arguments to print } } \details{ \tabular{ll}{ Package: \tab bit\cr Type: \tab Package\cr Version: \tab 1.1.0\cr Date: \tab 2012-06-05\cr License: \tab GPL-2\cr LazyLoad: \tab yes\cr Encoding: \tab latin1\cr } Index: \tabular{rrrrl}{ \bold{bit function} \tab \bold{bitwhich function} \tab \bold{ri function} \tab \bold{see also} \tab \bold{description} \cr \code{.BITS} \tab \tab \tab \code{\link{globalenv}} \tab variable holding number of bits on this system \cr \code{\link{bit_init}} \tab \tab \tab \code{\link{.First.lib}} \tab initially allocate bit-masks (done in .First.lib) \cr \code{\link{bit_done}} \tab \tab \tab \code{\link{.Last.lib}} \tab finally de-allocate bit-masks (done in .Last.lib) \cr \code{\link{bit}} \tab \code{\link{bitwhich}} \tab \code{\link{ri}} \tab \code{\link{logical}} \tab create bit object \cr \code{\link{print.bit}} \tab \code{\link{print.bitwhich}} \tab \code{\link{print.ri}} \tab \code{\link{print}} \tab print bit vector \cr \code{\link{length.bit}} \tab \code{\link{length.bitwhich}} \tab \code{\link{length.ri}} \tab \code{\link{length}} \tab get length of bit vector \cr \code{\link{length<-.bit}} \tab \code{\link{length<-.bitwhich}} \tab \tab \code{\link{length<-}} \tab change length of bit vector \cr \code{\link{c.bit}} \tab \code{\link{c.bitwhich}} \tab \tab \code{\link{c}} \tab concatenate bit vectors \cr \code{\link{is.bit}} \tab \code{\link{is.bitwhich}} \tab \code{\link{is.ri}} \tab \code{\link{is.logical}} \tab test for bit class \cr \code{\link{as.bit}} \tab \code{\link{as.bitwhich}} \tab \tab \code{\link{as.logical}} \tab generically coerce to bit or bitwhich \cr \code{\link{as.bit.logical}} \tab \code{\link{as.bitwhich.logical}} \tab \tab \code{\link{logical}} \tab coerce logical to bit vector (FALSE => FALSE, c(NA, TRUE) => TRUE) \cr \code{\link{as.bit.integer}} \tab \code{\link{as.bitwhich.integer}} \tab \tab \code{\link{integer}} \tab coerce integer to bit vector (0 => FALSE, ELSE => TRUE) \cr \code{\link{as.bit.double}} \tab \code{\link{as.bitwhich.double}} \tab \tab \code{\link{double}} \tab coerce double to bit vector (0 => FALSE, ELSE => TRUE) \cr \code{\link{as.double.bit}} \tab \code{\link{as.double.bitwhich}} \tab \code{\link{as.double.ri}} \tab \code{\link{as.double}} \tab coerce bit vector to double (0/1) \cr \code{\link{as.integer.bit}} \tab \code{\link{as.integer.bitwhich}} \tab \code{\link{as.integer.ri}} \tab \code{\link{as.integer}} \tab coerce bit vector to integer (0L/1L) \cr \code{\link{as.logical.bit}} \tab \code{\link{as.logical.bitwhich}} \tab \code{\link{as.logical.ri}} \tab \code{\link{as.logical}} \tab coerce bit vector to logical (FALSE/TRUE) \cr \code{\link{as.which.bit}} \tab \code{\link{as.which.bitwhich}} \tab \code{\link{as.which.ri}} \tab \code{\link{as.which}} \tab coerce bit vector to positive integer subscripts\cr \code{\link{as.bit.which}} \tab \code{\link{as.bitwhich.which}} \tab \tab \code{\link{bitwhich}} \tab coerce integer subscripts to bit vector \cr \code{\link{as.bit.bitwhich}} \tab \code{\link{as.bitwhich.bitwhich}}\tab \tab \tab coerce from bitwhich \cr \code{\link{as.bit.bit}} \tab \code{\link{as.bitwhich.bit}} \tab \tab \code{\link{UseMethod}} \tab coerce from bit \cr \code{\link{as.bit.ri}} \tab \code{\link{as.bitwhich.ri}} \tab \tab \tab coerce from range index \cr \code{\link[ff]{as.bit.ff}} \tab \tab \tab \code{\link[ff]{ff}} \tab coerce ff boolean to bit vector \cr \code{\link[ff]{as.ff.bit}} \tab \tab \tab \code{\link[ff]{as.ff}} \tab coerce bit vector to ff boolean \cr \code{\link[ff]{as.hi.bit}} \tab \code{\link[ff]{as.hi.bitwhich}} \tab \code{\link[ff]{as.hi.ri}} \tab \code{\link[ff]{as.hi}} \tab coerce to hybrid index (requires package ff) \cr \code{\link[ff]{as.bit.hi}} \tab \code{\link[ff]{as.bitwhich.hi}} \tab \tab \tab coerce from hybrid index (requires package ff) \cr \code{\link{[[.bit}} \tab \tab \tab \code{\link{[[}} \tab get single bit (index checked) \cr \code{\link{[[<-.bit}} \tab \tab \tab \code{\link{[[<-}} \tab set single bit (index checked) \cr \code{\link{[.bit}} \tab \tab \tab \code{\link{[}} \tab get vector of bits (unchecked) \cr \code{\link{[<-.bit}} \tab \tab \tab \code{\link{[<-}} \tab set vector of bits (unchecked) \cr \code{\link{!.bit}} \tab \code{\link{!.bitwhich}} \tab (works as second arg in \tab \code{\link{!}} \tab boolean NOT on bit \cr \code{\link{&.bit}} \tab \code{\link{&.bitwhich}} \tab bit and bitwhich ops) \tab \code{\link{&}} \tab boolean AND on bit \cr \code{\link{|.bit}} \tab \code{\link{|.bitwhich}} \tab \tab \code{\link{|}} \tab boolean OR on bit \cr \code{\link{xor.bit}} \tab \code{\link{xor.bitwhich}} \tab \tab \code{\link{xor}} \tab boolean XOR on bit \cr \code{\link{!=.bit}} \tab \code{\link{!=.bitwhich}} \tab \tab \code{\link{!=}} \tab boolean unequality (same as XOR) \cr \code{\link{==.bit}} \tab \code{\link{==.bitwhich}} \tab \tab \code{\link{==}} \tab boolean equality \cr \code{\link{all.bit}} \tab \code{\link{all.bitwhich}} \tab \code{\link{all.ri}} \tab \code{\link{all}} \tab aggregate AND \cr \code{\link{any.bit}} \tab \code{\link{any.bitwhich}} \tab \code{\link{any.ri}} \tab \code{\link{any}} \tab aggregate OR \cr \code{\link{min.bit}} \tab \code{\link{min.bitwhich}} \tab \code{\link{min.ri}} \tab \code{\link{min}} \tab aggregate MIN (first TRUE position) \cr \code{\link{max.bit}} \tab \code{\link{max.bitwhich}} \tab \code{\link{max.ri}} \tab \code{\link{max}} \tab aggregate MAX (last TRUE position) \cr \code{\link{range.bit}} \tab \code{\link{range.bitwhich}} \tab \code{\link{range.ri}} \tab \code{\link{range}} \tab aggregate [MIN,MAX] \cr \code{\link{sum.bit}} \tab \code{\link{sum.bitwhich}} \tab \code{\link{sum.ri}} \tab \code{\link{sum}} \tab aggregate SUM (count of TRUE) \cr \code{\link{summary.bit}} \tab \code{\link{summary.bitwhich}} \tab \code{\link{summary.ri}} \tab \code{\link{tabulate}} \tab aggregate c(nFALSE, nTRUE, minRange, maxRange) \cr \code{\link{regtest.bit}} \tab \tab \tab \tab regressiontests for the package \cr } } \value{ \code{bit} returns a vector of integer sufficiently long to store 'length' bits (but not longer) with an attribute 'n' and class 'bit' } \author{ Jens Oehlschlägel Maintainer: Jens Oehlschlägel } \note{ Currently operations on bit objects have some overhead from R-calls. Do expect speed gains for vectors of length ~ 10000 or longer. \cr Since this package was created for high performance purposes, only positive integer subscripts are allowed: All R-functions behave as expected - i.e. they do not change their arguments and create new return values. If you want to save the time for return value memory allocation, you must use \code{\link{.Call}} directly (see the dontrun example in \code{\link{sum.bit}}). } \keyword{ package } \keyword{ classes } \keyword{ logic } \seealso{ \code{\link{logical}} in base R and \code{\link[ff]{vmode}} in package 'ff' } \examples{ x <- bit(12) # create bit vector x # autoprint bit vector length(x) <- 16 # change length length(x) # get length x[[2]] # extract single element x[[2]] <- TRUE # replace single element x[1:2] # extract parts of bit vector x[1:2] <- TRUE # replace parts of bit vector as.which(x) # coerce bit to subscripts x <- as.bit.which(3:4, 4) # coerce subscripts to bit as.logical(x) # coerce bit to logical y <- as.bit(c(FALSE, TRUE, FALSE, TRUE)) # coerce logical to bit is.bit(y) # test for bit !x # boolean NOT x & y # boolean AND x | y # boolean OR xor(x, y) # boolean Exclusive OR x != y # boolean unequality (same as xor) x == y # boolean equality all(x) # aggregate AND any(x) # aggregate OR min(x) # aggregate MIN (integer version of ALL) max(x) # aggregate MAX (integer version of ANY) range(x) # aggregate [MIN,MAX] sum(x) # aggregate SUM (count of TRUE) summary(x) # aggregate count of FALSE and TRUE \dontrun{ message("\nEven for a single boolean operation transforming logical to bit pays off") n <- 10000000 x <- sample(c(FALSE, TRUE), n, TRUE) y <- sample(c(FALSE, TRUE), n, TRUE) system.time(x|y) system.time({ x <- as.bit(x) y <- as.bit(y) }) system.time( z <- x | y ) system.time( as.logical(z) ) message("Even more so if multiple operations are needed :-)") message("\nEven for a single set operation transforming subscripts to bit pays off\n") n <- 10000000 x <- sample(n, n/2) y <- sample(n, n/2) system.time( union(x,y) ) system.time({ x <- as.bit.which(x, n) y <- as.bit.which(y, n) }) system.time( as.which.bit( x | y ) ) message("Even more so if multiple operations are needed :-)") message("\nSome timings WITH memory allocation") n <- 2000000 l <- sample(c(FALSE, TRUE), n, TRUE) # copy logical to logical system.time(for(i in 1:100){ # 0.0112 l2 <- l l2[1] <- TRUE # force new memory allocation (copy on modify) rm(l2) })/100 # copy logical to bit system.time(for(i in 1:100){ # 0.0123 b <- as.bit(l) rm(b) })/100 # copy bit to logical b <- as.bit(l) system.time(for(i in 1:100){ # 0.009 l2 <- as.logical(b) rm(l2) })/100 # copy bit to bit b <- as.bit(l) system.time(for(i in 1:100){ # 0.009 b2 <- b b2[1] <- TRUE # force new memory allocation (copy on modify) rm(b2) })/100 l2 <- l # replace logical by TRUE system.time(for(i in 1:100){ l[] <- TRUE })/100 # replace bit by TRUE (NOTE that we recycle the assignment # value on R side == memory allocation and assignment first) system.time(for(i in 1:100){ b[] <- TRUE })/100 # THUS the following is faster system.time(for(i in 1:100){ b <- !bit(n) })/100 # replace logical by logical system.time(for(i in 1:100){ l[] <- l2 })/100 # replace bit by logical system.time(for(i in 1:100){ b[] <- l2 })/100 # extract logical system.time(for(i in 1:100){ l2[] })/100 # extract bit system.time(for(i in 1:100){ b[] })/100 message("\nSome timings WITHOUT memory allocation (Serge, that's for you)") n <- 2000000L l <- sample(c(FALSE, TRUE), n, TRUE) b <- as.bit(l) # read from logical, write to logical l2 <- logical(n) system.time(for(i in 1:100).Call("R_filter_getset", l, l2, PACKAGE="bit")) / 100 # read from bit, write to logical l2 <- logical(n) system.time(for(i in 1:100).Call("R_bit_get", b, l2, c(1L, n), PACKAGE="bit")) / 100 # read from logical, write to bit system.time(for(i in 1:100).Call("R_bit_set", b, l2, c(1L, n), PACKAGE="bit")) / 100 } } bit/man/setattributes.rd0000755000176000001440000000552413264143261015077 0ustar ripleyusers\name{setattributes} \alias{setattributes} \alias{setattr} \title{ Attribute setting by reference } \description{ Function \code{setattr} sets a singe attribute and function \code{setattributes} sets a list of attributes. } \usage{ setattr(x, which, value) setattributes(x, attributes) } \arguments{ \item{x}{ } \item{which}{ name of the attribute } \item{value}{ value of the attribute, use NULL to remove this attribute } \item{attributes}{ a named list of attribute values } } \details{ The attributes of 'x' are changed in place without copying x. function \code{setattributes} does only change the named attributes, it does not delete the non-names attributes like \code{\link{attributes}} does. } \value{ invisible(), we do not return the changed object to remind you of the fact that this function is called for its side-effect of changing its input object. } \references{ Writing R extensions -- System and foreign language interfaces -- Handling R objects in C -- Attributes (Version 2.11.1 (2010-06-03 ) R Development) } \author{ Jens Oehlschlägel } \seealso{ \code{\link{attr}} \code{\link{unattr}} } \examples{ x <- as.single(runif(10)) attr(x, "Csingle") f <- function(x)attr(x, "Csingle") <- NULL g <- function(x)setattr(x, "Csingle", NULL) f(x) x g(x) x \dontrun{ # restart R library(bit) mysingle <- function(length = 0){ ret <- double(length) setattr(ret, "Csingle", TRUE) ret } # show that mysinge gives exactly the same result as single identical(single(10), mysingle(10)) # look at the speedup and memory-savings of mysingle compared to single system.time(mysingle(1e7)) memory.size(max=TRUE) system.time(single(1e7)) memory.size(max=TRUE) # look at the memory limits # on my win32 machine the first line fails beause of not enough RAM, the second works x <- single(1e8) x <- mysingle(1e8) # .g. performance with factors x <- rep(factor(letters), length.out=1e7) x[1:10] # look how fast one can do this system.time(setattr(x, "levels", rev(letters))) x[1:10] # look at the performance loss in time caused by the non-needed copying system.time(levels(x) <- letters) x[1:10] # restart R library(bit) simplefactor <- function(n){ factor(rep(1:2, length.out=n)) } mysimplefactor <- function(n){ ret <- rep(1:2, length.out=n) setattr(ret, "levels", as.character(1:2)) setattr(ret, "class", "factor") ret } identical(simplefactor(10), mysimplefactor(10)) system.time(x <- mysimplefactor(1e7)) memory.size(max=TRUE) system.time(setattr(x, "levels", c("a","b"))) memory.size(max=TRUE) x[1:4] memory.size(max=TRUE) rm(x) gc() system.time(x <- simplefactor(1e7)) memory.size(max=TRUE) system.time(levels(x) <- c("x","y")) memory.size(max=TRUE) x[1:4] memory.size(max=TRUE) rm(x) gc() } } \keyword{ attributes } bit/man/c.bit.rd0000755000176000001440000000073113264143261013167 0ustar ripleyusers\name{c.bit} \alias{c.bit} \alias{c.bitwhich} \title{ Concatenating bit and bitwhich vectors } \description{ Creating new bit by concatenating bit vectors } \usage{ \method{c}{bit}(\dots) \method{c}{bitwhich}(\dots) } \arguments{ \item{\dots}{ bit objects } } \value{ An object of class 'bit' } \author{ Jens Oehlschlägel } \seealso{ \code{\link{c}}, \code{\link{bit}} , \code{\link{bitwhich}} } \examples{ c(bit(4), bit(4)) } \keyword{ classes } \keyword{ logic } bit/man/unattr.rd0000755000176000001440000000072213264143261013505 0ustar ripleyusers\name{unattr} \alias{unattr} \title{ Attribute removal } \description{ Returns object with attributes removed } \usage{ unattr(x) } \arguments{ \item{x}{ any R object } } \details{ attribute removal copies the object as usual } \value{ a similar object with attributes removed } \author{ Jens Oehlschlägel } \seealso{ \code{\link{attributes}}, \code{\link{setattributes}}, \code{\link{unclass}} } \examples{ bit(2)[] unattr(bit(2)[]) } \keyword{attribute} bit/man/is.bit.rd0000755000176000001440000000114313264143261013356 0ustar ripleyusers\name{is.bit} \alias{is.ri} \alias{is.bit} \alias{is.bitwhich} \title{ Testing for bit, bitwhich and ri selection classes } \description{ Test whether an object inherits from 'ri', 'bit' or 'bitwhich' } \usage{ is.ri(x) is.bit(x) is.bitwhich(x) } \arguments{ \item{x}{ an R object of unknown type } } \value{ TRUE or FALSE } \author{ Jens Oehlschlägel } \seealso{ \code{\link{is.logical}}, \code{\link{bit}}, \code{\link{bitwhich}} } \examples{ is.ri(TRUE) is.ri(ri(1,4,12)) is.bit(TRUE) is.bitwhich(TRUE) is.bit(as.bit(TRUE)) is.bitwhich(as.bitwhich(TRUE)) } \keyword{ classes } \keyword{ logic } bit/man/chunk.rd0000755000176000001440000000551013264143261013300 0ustar ripleyusers\name{chunk} \alias{chunk} \alias{chunk.default} \title{ Chunked range index } \description{ creates a sequence of range indexes using a syntax not completely unlike 'seq' } \usage{ chunk(\dots) \method{chunk}{default}(from = NULL, to = NULL, by = NULL, length.out = NULL, along.with = NULL , overlap = 0L, method = c("bbatch", "seq"), maxindex = NA, \dots) } \arguments{ \item{from}{ the starting value of the sequence. } \item{to}{ the (maximal) end value of the sequence. } \item{by}{ increment of the sequence } \item{length.out}{ desired length of the sequence. } \item{along.with}{ take the length from the length of this argument. } \item{overlap}{ number of values to overlap (will lower the starting value of the sequence, the first range becomes smaller } \item{method}{ default 'bbatch' will try to balance the chunk size, see \code{\link{bbatch}}, 'seq' will create chunks like \code{\link[base]{seq}} } \item{maxindex}{ passed to \code{\link{ri}} } \item{\dots}{ ignored } } \details{ \code{chunk} is generic, the default method is described here, other methods that automatically consider RAM needs are provided with package 'ff', see for example \code{\link[ff]{chunk.ffdf}} } \section{available methods}{ \code{chunk.default}, \code{\link[ff]{chunk.bit}}, \code{\link[ff]{chunk.ff_vector}}, \code{\link[ff]{chunk.ffdf}} } \value{ \code{chunk.default} returns a list of \code{\link{ri}} objects representing chunks of subscripts } \author{ Jens Oehlschlägel } \seealso{ \code{\link{ri}}, \code{\link[base]{seq}}, \code{\link{bbatch}} } \examples{ chunk(1, 100, by=30) chunk(1, 100, by=30, method="seq") \dontrun{ require(foreach) m <- 10000 k <- 1000 n <- m*k message("Four ways to loop from 1 to n. Slowest foreach to fastest chunk is 1700:1 on a dual core notebook with 3GB RAM\n") z <- 0L; print(k*system.time({it <- icount(m); foreach (i = it) \%do\% { z <- i; NULL }})) z z <- 0L print(system.time({i <- 0L; while (i } \examples{ system.time(1+1) repeat.time(1+1) system.time(sort(runif(1e6))) repeat.time(sort(runif(1e6))) } \keyword{utilities} bit/man/as.logical.bit.rd0000755000176000001440000000347113264143261014765 0ustar ripleyusers\name{as.logical.bit} \alias{as.logical.bit} \alias{as.integer.bit} \alias{as.double.bit} \alias{as.logical.bitwhich} \alias{as.integer.bitwhich} \alias{as.double.bitwhich} \alias{as.logical.ri} \alias{as.integer.ri} \alias{as.double.ri} \title{ Coercion from bit, bitwhich and ri to logical, integer, double } \description{ Coercing from bit to logical, integer, which. } \usage{ \method{as.logical}{bit}(x, \dots) \method{as.logical}{bitwhich}(x, \dots) \method{as.logical}{ri}(x, \dots) \method{as.integer}{bit}(x, \dots) \method{as.integer}{bitwhich}(x, \dots) \method{as.integer}{ri}(x, \dots) \method{as.double}{bit}(x, \dots) \method{as.double}{bitwhich}(x, \dots) \method{as.double}{ri}(x, \dots) } \arguments{ \item{x}{ an object of class \code{\link{bit}}, \code{\link{bitwhich}} or \code{\link{ri}} } \item{\dots}{ ignored } } \details{ Coercion from bit is quite fast because we use a double loop that fixes each word in a processor register. } \value{ \code{\link{as.logical}} returns a vector of \code{FALSE, TRUE}, \code{\link{as.integer}} and \code{\link{as.double}} return a vector of \code{0,1}. } \author{ Jens Oehlschlägel } \seealso{ \code{\link{as.bit}}, \code{\link{as.which}}, \code{\link{as.bitwhich}}, \code{\link[ff]{as.ff}}, \code{\link[ff]{as.hi}} } \examples{ x <- ri(2, 5, 10) y <- as.logical(x) y stopifnot(identical(y, as.logical(as.bit(x)))) stopifnot(identical(y, as.logical(as.bitwhich(x)))) y <- as.integer(x) y stopifnot(identical(y, as.integer(as.logical(x)))) stopifnot(identical(y, as.integer(as.bit(x)))) stopifnot(identical(y, as.integer(as.bitwhich(x)))) y <- as.double(x) y stopifnot(identical(y, as.double(as.logical(x)))) stopifnot(identical(y, as.double(as.bit(x)))) stopifnot(identical(y, as.double(as.bitwhich(x)))) } \keyword{ classes } \keyword{ logic } bit/man/ramsort.rd0000755000176000001440000001147313264143261013664 0ustar ripleyusers\name{ramsort} \alias{ramsort} \alias{shellsort} \alias{quicksort} \alias{mergesort} \alias{radixsort} \alias{keysort} \alias{ramorder} \alias{shellorder} \alias{quickorder} \alias{mergeorder} \alias{radixorder} \alias{keyorder} \alias{ramsortorder} \alias{shellsortorder} \alias{quicksortorder} \alias{mergesortorder} \alias{radixsortorder} \alias{keysortorder} \title{ Generics for in-RAM sorting and ordering } \description{ These are generic stubs for low-level sorting and ordering methods implemented in packages 'bit64' and 'ff'. The \code{..sortorder} methods do sorting and ordering at once, which requires more RAM than ordering but is (almost) as fast as as sorting. } \usage{ ramsort(x, \dots) ramorder(x, i, \dots) ramsortorder(x, i, \dots) mergesort(x, \dots) mergeorder(x, i, \dots) mergesortorder(x, i, \dots) quicksort(x, \dots) quickorder(x, i, \dots) quicksortorder(x, i, \dots) shellsort(x, \dots) shellorder(x, i, \dots) shellsortorder(x, i, \dots) radixsort(x, \dots) radixorder(x, i, \dots) radixsortorder(x, i, \dots) keysort(x, \dots) keyorder(x, i, \dots) keysortorder(x, i, \dots) } \arguments{ \item{x}{ a vector to be sorted by \code{\link{ramsort}} and \code{\link{ramsortorder}}, i.e. the output of \code{\link{sort}} } \item{i}{ integer positions to be modified by \code{\link{ramorder}} and \code{\link{ramsortorder}}, default is 1:n, in this case the output is similar to \code{\link{order}} } \item{\dots}{ further arguments to the sorting methods } } \details{ The \code{sort} generics do sort their argument 'x', some methods need temporary RAM of the same size as 'x'. The \code{order} generics do order their argument 'i' leaving 'x' as it was, some methods need temporary RAM of the same size as 'i'. The \code{sortorder} generics do sort their argument 'x' and order their argument 'i', this way of ordering is much faster at the price of requiring temporary RAM for both, 'x' and 'i', if the method requires temporary RAM. The \code{ram} generics are high-level functions containing an optimizer that chooses the 'best' algorithms given some context. } \section{Index of implemented methods}{ \tabular{rrl}{ \bold{generic} \tab \bold{ff} \tab \bold{bit64} \cr \code{ramsort} \tab \code{\link[ff]{ramsort.default}} \tab \code{\link[bit64]{ramsort.integer64}} \cr \code{shellsort} \tab \code{\link[ff]{shellsort.default}} \tab \code{\link[bit64]{shellsort.integer64}} \cr \code{quicksort} \tab \tab \code{\link[bit64]{quicksort.integer64}} \cr \code{mergesort} \tab \code{\link[ff]{mergesort.default}} \tab \code{\link[bit64]{mergesort.integer64}} \cr \code{radixsort} \tab \code{\link[ff]{radixsort.default}} \tab \code{\link[bit64]{radixsort.integer64}} \cr \code{keysort} \tab \code{\link[ff]{keysort.default}} \tab \cr \cr \bold{generic} \tab \bold{ff} \tab \bold{bit64} \cr \code{ramorder} \tab \code{\link[ff]{ramorder.default}} \tab \code{\link[bit64]{ramorder.integer64}} \cr \code{shellorder} \tab \code{\link[ff]{shellorder.default}} \tab \code{\link[bit64]{shellorder.integer64}} \cr \code{quickorder} \tab \tab \code{\link[bit64]{quickorder.integer64}} \cr \code{mergeorder} \tab \code{\link[ff]{mergeorder.default}} \tab \code{\link[bit64]{mergeorder.integer64}} \cr \code{radixorder} \tab \code{\link[ff]{radixorder.default}} \tab \code{\link[bit64]{radixorder.integer64}} \cr \code{keyorder} \tab \code{\link[ff]{keyorder.default}} \tab \cr \cr \bold{generic} \tab \bold{ff} \tab \bold{bit64} \cr \code{ramsortorder} \tab \tab \code{\link[bit64]{ramsortorder.integer64}} \cr \code{shellsortorder} \tab \tab \code{\link[bit64]{shellsortorder.integer64}} \cr \code{quicksortorder} \tab \tab \code{\link[bit64]{quicksortorder.integer64}} \cr \code{mergesortorder} \tab \tab \code{\link[bit64]{mergesortorder.integer64}} \cr \code{radixsortorder} \tab \tab \code{\link[bit64]{radixsortorder.integer64}} \cr \code{keysortorder} \tab \tab \cr } } \note{ Note that these methods purposely violate the functional programming paradigm: they are called for the side-effect of changing some of their arguments. The rationale behind this is that sorting is very RAM-intensive and in certain situations we might not want to allocate additional memory if not necessary to do so. The \code{sort}-methods change \code{x}, the \code{order}-methods change \code{i}, and the \code{sortoder}-methods change both \code{x} and \code{i} You as the user are responsible to create copies of the input data 'x' and 'i' if you need non-modified versions. } \value{ These functions return the number of \code{NAs} found or assumed during sorting } \author{ Jens Oehlschlägel } \keyword{univar} \keyword{manip} \keyword{arith} \seealso{ \code{\link{sort}} and \code{\link{order}} in base R} bit/man/LogicBit.rd0000755000176000001440000000523013264143261013663 0ustar ripleyusers\name{LogicBit} \alias{LogicBit} \alias{!.bit} \alias{!.bitwhich} \alias{&.bit} \alias{&.bitwhich} \alias{|.bit} \alias{|.bitwhich} \alias{==.bit} \alias{==.bitwhich} \alias{!=.bit} \alias{!=.bitwhich} \alias{xor} \alias{xor.default} \alias{xor.bit} \alias{xor.bitwhich} \title{ Boolean operators and functions for class bit } \description{ Boolean 'negation', 'and', 'or' and 'exclusive or'. } \usage{ \method{!}{bit}(x) \method{!}{bitwhich}(x) \method{&}{bit}(e1, e2) \method{&}{bitwhich}(e1, e2) \method{|}{bit}(e1, e2) \method{|}{bitwhich}(e1, e2) \method{==}{bit}(e1, e2) \method{==}{bitwhich}(e1, e2) \method{!=}{bit}(e1, e2) \method{!=}{bitwhich}(e1, e2) xor(x, y) \method{xor}{default}(x, y) \method{xor}{bit}(x, y) \method{xor}{bitwhich}(x, y) } \arguments{ \item{x}{ a bit vector (or one logical vector in binary operators) } \item{y}{ a bit vector or an logical vector } \item{e1}{ a bit vector or an logical vector } \item{e2}{ a bit vector or an logical vector } } \details{ Binary operators and function \code{xor} can combine 'bit' objects and 'logical' vectors. They do not recycle, thus the lengths of objects must match. Boolean operations on bit vectors are extremely fast because they are implemented using C's bitwise operators. If one argument is 'logical' it is converted to 'bit'. \cr Binary operators and function \code{xor} can combine 'bitwhich' objects and other vectors. They do not recycle, thus the lengths of objects must match. Boolean operations on bitwhich vectors are fast if the distribution of TRUE and FALSE is very asymetric. If one argument is not 'bitwhich' it is converted to 'bitwhich'. \cr The \code{xor} function has been made generic and \code{xor.default} has been implemented much faster than R's standard \code{\link[base]{xor}}. This was possible because actually boolean function \code{xor} and comparison operator \code{!=} do the same (even with NAs), and \code{!=} is much faster than the multiple calls in \code{(x | y) & !(x & y)} } \value{ An object of class 'bit' (or 'bitwhich') } \author{ Jens Oehlschlägel } \seealso{ \code{\link{bit}}, \code{\link{Logic}} } \examples{ x <- as.bit(c(FALSE, FALSE, FALSE, NA, NA, NA, TRUE, TRUE, TRUE)) yl <- c(FALSE, NA, TRUE, FALSE, NA, TRUE, FALSE, NA, TRUE) y <- as.bit(yl) !x x & y x | y xor(x, y) x != y x == y x & yl x | yl xor(x, yl) x != yl x == yl x <- as.bitwhich(c(FALSE, FALSE, FALSE, NA, NA, NA, TRUE, TRUE, TRUE)) yl <- c(FALSE, NA, TRUE, FALSE, NA, TRUE, FALSE, NA, TRUE) y <- as.bitwhich(yl) !x x & y x | y xor(x, y) x != y x == y x & yl x | yl xor(x, yl) x != yl x == yl } \keyword{ classes } \keyword{ logic } bit/man/bit_init.rd0000755000176000001440000000117413264143261013773 0ustar ripleyusers\name{bit_init} \alias{bit_init} \alias{bit_done} \alias{.BITS} \title{ Initializing bit masks } \description{ Functions to allocate (and de-allocate) bit masks } \usage{ bit_init() bit_done() } \details{ The C-code operates with bit masks. The memory for these is allocated dynamically. \code{bit_init} is called by \code{\link{.First.lib}} and \code{bit_done} is called by \code{\link{.Last.lib}}. You don't need to care about these under normal circumstances. } \value{ NULL } \author{ Jens Oehlschlägel } \seealso{ \code{\link{bit}} } \examples{ bit_done() bit_init() } \keyword{ classes } \keyword{ logic } bit/man/as.bitwhich.rd0000755000176000001440000000244013264143261014372 0ustar ripleyusers\name{as.bitwhich} \alias{as.bitwhich} \alias{as.bitwhich.bit} \alias{as.bitwhich.bitwhich} \alias{as.bitwhich.ri} \alias{as.bitwhich.which} \alias{as.bitwhich.integer} \alias{as.bitwhich.double} \alias{as.bitwhich.logical} \title{ Coercing to bitwhich } \description{ Functions to coerce to bitwhich } \usage{ as.bitwhich(x, \dots) \method{as.bitwhich}{bitwhich}(x, \dots) \method{as.bitwhich}{ri}(x, \dots) \method{as.bitwhich}{bit}(x, range=NULL, \dots) \method{as.bitwhich}{which}(x, maxindex, \dots) \method{as.bitwhich}{integer}(x, \dots) \method{as.bitwhich}{double}(x, \dots) \method{as.bitwhich}{logical}(x, \dots) } \arguments{ \item{x}{ An object of class 'bitwhich', 'integer', 'logical' or 'bit' or an integer vector as resulting from 'which' } \item{maxindex}{ the length of the new bitwhich vector } \item{range}{ a \code{\link{ri}} or an integer vector of length==2 giving a range restriction for chunked processing } \item{\dots}{ further arguments } } \value{ a value of class \code{\link{bitwhich}} } \author{ Jens Oehlschlägel } \seealso{ \code{\link{bitwhich}}, \code{\link{as.bit}} } \examples{ as.bitwhich(c(FALSE, FALSE, FALSE)) as.bitwhich(c(FALSE, FALSE, TRUE)) as.bitwhich(c(FALSE, TRUE, TRUE)) as.bitwhich(c(TRUE, TRUE, TRUE)) } \keyword{ classes } \keyword{ logic } bit/man/is.sorted.rd0000755000176000001440000000213113264143261014076 0ustar ripleyusers\name{is.sorted} \alias{is.sorted} \alias{na.count} \alias{nvalid} \alias{nunique} \alias{nties} \alias{is.sorted<-} \alias{na.count<-} \alias{nunique<-} \alias{nties<-} \title{ Generics related to cache access } \description{ These generics are packaged here for methods in packages \code{bit64} and \code{ff}. } \usage{ is.sorted(x, \dots) is.sorted(x, \dots) <- value na.count(x, \dots) na.count(x, \dots) <- value nvalid(x, \dots) nunique(x, \dots) nunique(x, \dots) <- value nties(x, \dots) nties(x, \dots) <- value } \arguments{ \item{x}{ some object } \item{value}{ value assigned on responsibility of the user } \item{\dots}{ ignored } } \details{ see help of the available methods } \value{ see help of the available methods } \author{ Jens Oehlschlägel } \seealso{ \code{\link[bit64]{is.sorted.integer64}}, \code{\link[bit64]{na.count.integer64}}, \code{\link[bit64]{nvalid.integer64}}, \code{\link[bit64]{nunique.integer64}}, \code{\link[bit64]{nties.integer64}} \cr } \examples{ methods("na.count") } \keyword{ environment } \keyword{ methods } bit/man/regtest.bit.rd0000755000176000001440000000132113264143261014416 0ustar ripleyusers\name{regtest.bit} \alias{regtest.bit} \title{ Regressiontests for bit } \description{ Test package bit for correctness } \usage{ regtest.bit(N = 100) } \arguments{ \item{N}{ number of random test runs } } \details{ random data of random length are generated and correctness of package functions tested on these } \value{ a vector of class 'logical' or 'integer' } \author{ Jens Oehlschlägel } \seealso{ \code{\link{bit}}, \code{\link{as.bit}}, \code{\link{as.logical}}, \code{\link{as.integer}}, \code{\link{which}} } \examples{ if (regtest.bit()){ message("regtest.bit is OK") }else{ message("regtest.bit failed") } \dontrun{ regtest.bit(10000) } } \keyword{ classes } \keyword{ logic } bit/man/length.bit.rd0000755000176000001440000000707613264143261014237 0ustar ripleyusers\name{length.bit} \alias{length.bit} \alias{length.bitwhich} \alias{length.ri} \alias{length<-.bit} \alias{length<-.bitwhich} \title{ Getting and setting length of bit, bitwhich and ri objects } \description{ Query the number of bits in a \code{\link{bit}} vector or change the number of bits in a bit vector. \cr Query the number of bits in a \code{\link{bitwhich}} vector or change the number of bits in a bit vector. \cr } \usage{ \method{length}{ri}(x) \method{length}{bit}(x) \method{length}{bitwhich}(x) \method{length}{bit}(x) <- value \method{length}{bitwhich}(x) <- value } \arguments{ \item{x}{ a \code{\link{bit}}, \code{\link{bitwhich}} or \code{\link{ri}} object } \item{value}{ the new number of bits } } \details{ NOTE that the length does NOT reflect the number of selected (\code{TRUE}) bits, it reflects the sum of both, \code{TRUE} and \code{FALSE} bits. Increasing the length of a \code{\link{bit}} object will set new bits to \code{FALSE}. The behaviour of increasing the length of a \code{\link{bitwhich}} object is different and depends on the content of the object: \itemize{ \item{TRUE}{all included, new bits are set to \code{TRUE}} \item{positive integers}{some included, new bits are set to \code{FALSE}} \item{negative integers}{some excluded, new bits are set to \code{TRUE}} \item{FALSE}{all excluded:, new bits are set to \code{FALSE}} } Decreasing the length of bit or bitwhich removes any previous information about the status bits above the new length. } \value{ the length A bit vector with the new length } \author{ Jens Oehlschlägel } \seealso{ \code{\link{length}}, \code{\link[=sum.bit]{sum}}, \code{\link[ff]{poslength}}, \code{\link[ff]{maxindex}} } \examples{ stopifnot(length(ri(1, 1, 32))==32) x <- as.bit(ri(32, 32, 32)) stopifnot(length(x)==32) stopifnot(sum(x)==1) length(x) <- 16 stopifnot(length(x)==16) stopifnot(sum(x)==0) length(x) <- 32 stopifnot(length(x)==32) stopifnot(sum(x)==0) x <- as.bit(ri(1, 1, 32)) stopifnot(length(x)==32) stopifnot(sum(x)==1) length(x) <- 16 stopifnot(length(x)==16) stopifnot(sum(x)==1) length(x) <- 32 stopifnot(length(x)==32) stopifnot(sum(x)==1) x <- as.bitwhich(bit(32)) stopifnot(length(x)==32) stopifnot(sum(x)==0) length(x) <- 16 stopifnot(length(x)==16) stopifnot(sum(x)==0) length(x) <- 32 stopifnot(length(x)==32) stopifnot(sum(x)==0) x <- as.bitwhich(!bit(32)) stopifnot(length(x)==32) stopifnot(sum(x)==32) length(x) <- 16 stopifnot(length(x)==16) stopifnot(sum(x)==16) length(x) <- 32 stopifnot(length(x)==32) stopifnot(sum(x)==32) x <- as.bitwhich(ri(32, 32, 32)) stopifnot(length(x)==32) stopifnot(sum(x)==1) length(x) <- 16 stopifnot(length(x)==16) stopifnot(sum(x)==0) length(x) <- 32 stopifnot(length(x)==32) stopifnot(sum(x)==0) x <- as.bitwhich(ri(2, 32, 32)) stopifnot(length(x)==32) stopifnot(sum(x)==31) length(x) <- 16 stopifnot(length(x)==16) stopifnot(sum(x)==15) length(x) <- 32 stopifnot(length(x)==32) stopifnot(sum(x)==31) x <- as.bitwhich(ri(1, 1, 32)) stopifnot(length(x)==32) stopifnot(sum(x)==1) length(x) <- 16 stopifnot(length(x)==16) stopifnot(sum(x)==1) length(x) <- 32 stopifnot(length(x)==32) stopifnot(sum(x)==1) x <- as.bitwhich(ri(1, 31, 32)) stopifnot(length(x)==32) stopifnot(sum(x)==31) message("NOTE the change from 'some excluded' to 'all excluded' here") length(x) <- 16 stopifnot(length(x)==16) stopifnot(sum(x)==16) length(x) <- 32 stopifnot(length(x)==32) stopifnot(sum(x)==32) } \keyword{ classes } \keyword{ logic } bit/man/as.which.rd0000755000176000001440000000263013264143261013674 0ustar ripleyusers\name{as.which} \alias{as.which} \alias{as.which.default} \alias{as.which.bitwhich} \alias{as.which.bit} \alias{as.which.ri} \title{ Coercion to (positive) integer positions } \description{ Coercing to something like the result of which \code{\link{which}} } \usage{ as.which(x, \dots) \method{as.which}{default}(x, \dots) \method{as.which}{ri}(x, \dots) \method{as.which}{bit}(x, range = NULL, \dots) \method{as.which}{bitwhich}(x, \dots) } \arguments{ \item{x}{ an object of classes \code{\link{bit}}, \code{\link{bitwhich}}, \code{\link{ri}} or something on which \code{\link{which}} works } \item{range}{ a \code{\link{ri}} or an integer vector of length==2 giving a range restriction for chunked processing } \item{\dots}{ further arguments (passed to \code{\link{which}} for the default method, ignored otherwise) } } \details{ \code{as.which.bit} returns a vector of subscripts with class 'which' } \value{ a vector of class 'logical' or 'integer' } \author{ Jens Oehlschlägel } \seealso{ \code{\link{as.bit}}, \code{\link{as.logical}}, \code{\link{as.integer}}, \code{\link{as.which}}, \code{\link{as.bitwhich}}, \code{\link[ff]{as.ff}}, \code{\link[ff]{as.hi}} } \examples{ r <- ri(5, 20, 100) x <- as.which(r) x stopifnot(identical(x, as.which(as.logical(r)))) stopifnot(identical(x, as.which(as.bitwhich(r)))) stopifnot(identical(x, as.which(as.bit(r)))) } \keyword{ classes } \keyword{ logic } bit/DESCRIPTION0000755000176000001440000000417613620225451012601 0ustar ripleyusersPackage: bit Title: A Class for Vectors of 1-Bit Booleans Version: 1.1-15.2 Date: 2020-01-14 Authors@R: c(person("Jens", "Oehlschlägel", role = c('aut', 'cre'), email = "Jens.Oehlschlaegel@truecluster.com"), person("Brian", "Ripley", role = 'ctb')) Depends: R (>= 2.9.2) Description: True boolean datatype (no NAs), coercion from and to logicals, integers and integer subscripts; fast boolean operators and fast summary statistics. With 'bit' vectors you can store true binary booleans {FALSE,TRUE} at the expense of 1 bit only, on a 32 bit architecture this means factor 32 less RAM and ~ factor 32 more speed on boolean operations. Due to overhead of R calls, actual speed gain depends on the size of the vector: expect gains for vectors of size > 10000 elements. Even for one-time boolean operations it can pay-off to convert to bit, the pay-off is obvious, when such components are used more than once. Reading from and writing to bit is approximately as fast as accessing standard logicals - mostly due to R's time for memory allocation. The package allows to work with pre-allocated memory for return values by calling .Call() directly: when evaluating the speed of C-access with pre-allocated vector memory, coping from bit to logical requires only 70% of the time for copying from logical to logical; and copying from logical to bit comes at a performance penalty of 150%. the package now contains further classes for representing logical selections: 'bitwhich' for very skewed selections and 'ri' for selecting ranges of values for chunked processing. All three index classes can be used for subsetting 'ff' objects (ff-2.1-0 and higher). License: GPL-2 LazyLoad: yes ByteCompile: yes Encoding: UTF-8 NeedsCompilation: yes Packaged: 2020-02-09 13:51:41 UTC; ripley Author: Jens Oehlschlägel [aut, cre], Brian Ripley [ctb] Maintainer: ORPHANED X-CRAN-Original-Maintainer: Jens Oehlschlägel X-CRAN-Comment: Orphaned on 2020-01-13 as check errors were not corrected. Repository: CRAN Date/Publication: 2020-02-10 10:06:33 UTC bit/src/0000755000176000001440000000000013620002620011636 5ustar ripleyusersbit/src/attrutil.c0000755000176000001440000000040513302612346013665 0ustar ripleyusers#include #include void R_bit_set_attr(SEXP x, SEXP which, SEXP value) { setAttrib(x, install(CHAR(STRING_ELT(which, 0))), value); /* xx looking at R sources setAttrib would directly accept a string, however this is not documented */ } bit/src/rle.c0000755000176000001440000000643113607262544012615 0ustar ripleyusers/* # fast rle handling for bit and ff # (c) 2007-2009 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2007-08-24 # Last changed: 2007-11-29 */ #include #include SEXP first_zero(SEXP x) { int i; int n = LENGTH(x); int *p = INTEGER(x); SEXP ret_; PROTECT( ret_ = allocVector(INTSXP, 1) ); INTEGER(ret_)[0] = 0; if (n){ for (i=0;ip[i-1]){ status = FALSE; break; } } } INTEGER(ret_)[0] = status; UNPROTECT(1); return ret_; } /* create integer rle NOTE if rle is not efficient we return NULL instead of an rle object */ SEXP int_rle(SEXP x_) { register int lv,ln,i,c=0; int n2, n = LENGTH(x_); if (n<3) return R_NilValue; n2 = n / 3; /* xx max RAM requirement 2x, but rle only if at least 2/3 savings, using 2 instead of 3 would need 50% more time, have max RAM requirement 2.5x for savings of any size */ int *x = INTEGER(x_); int *val, *len, *values, *lengths; SEXP ret_, lengths_, values_, names_, class_; val = Calloc(n2, int); len = Calloc(n2, int); if (n){ lv = x[0]; ln = 1; for (i=1;i #include #include // for NULL #include /* .Call calls */ extern SEXP R_bit_all(SEXP, SEXP); extern SEXP R_bit_and(SEXP, SEXP, SEXP); extern SEXP R_bit_any(SEXP, SEXP); extern SEXP R_bit_as_hi(SEXP, SEXP, SEXP); extern SEXP R_bit_done(); extern SEXP R_bit_equal(SEXP, SEXP, SEXP); extern SEXP R_bit_extract(SEXP, SEXP, SEXP, SEXP); extern SEXP R_bit_get(SEXP, SEXP, SEXP); extern SEXP R_bit_get_integer(SEXP, SEXP, SEXP); extern SEXP R_bit_init(SEXP); extern SEXP R_bit_max(SEXP, SEXP); extern SEXP R_bit_min(SEXP, SEXP); extern SEXP R_bit_not(SEXP); extern SEXP R_bit_or(SEXP, SEXP, SEXP); extern SEXP R_bit_replace(SEXP, SEXP, SEXP); extern SEXP R_bit_set(SEXP, SEXP, SEXP); extern SEXP R_bit_set_attr(SEXP, SEXP, SEXP); extern SEXP R_bit_set_integer(SEXP, SEXP, SEXP); extern SEXP R_bit_shiftcopy(SEXP, SEXP, SEXP, SEXP); extern SEXP R_bit_sum(SEXP, SEXP); extern SEXP R_bit_vecseq(SEXP, SEXP); extern SEXP R_bit_which(SEXP, SEXP, SEXP, SEXP); extern SEXP R_bit_xor(SEXP, SEXP, SEXP); extern SEXP first_zero(SEXP); extern SEXP int_check_ascending(SEXP); extern SEXP int_check_descending(SEXP); extern SEXP int_rle(SEXP); extern SEXP r_ram_truly_identical(SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { {"R_bit_all", (DL_FUNC) &R_bit_all, 2}, {"R_bit_and", (DL_FUNC) &R_bit_and, 3}, {"R_bit_any", (DL_FUNC) &R_bit_any, 2}, {"R_bit_as_hi", (DL_FUNC) &R_bit_as_hi, 3}, {"R_bit_done", (DL_FUNC) &R_bit_done, 0}, {"R_bit_equal", (DL_FUNC) &R_bit_equal, 3}, {"R_bit_extract", (DL_FUNC) &R_bit_extract, 4}, {"R_bit_get", (DL_FUNC) &R_bit_get, 3}, {"R_bit_get_integer", (DL_FUNC) &R_bit_get_integer, 3}, {"R_bit_init", (DL_FUNC) &R_bit_init, 1}, {"R_bit_max", (DL_FUNC) &R_bit_max, 2}, {"R_bit_min", (DL_FUNC) &R_bit_min, 2}, {"R_bit_not", (DL_FUNC) &R_bit_not, 1}, {"R_bit_or", (DL_FUNC) &R_bit_or, 3}, {"R_bit_replace", (DL_FUNC) &R_bit_replace, 3}, {"R_bit_set", (DL_FUNC) &R_bit_set, 3}, {"R_bit_set_attr", (DL_FUNC) &R_bit_set_attr, 3}, {"R_bit_set_integer", (DL_FUNC) &R_bit_set_integer, 3}, {"R_bit_shiftcopy", (DL_FUNC) &R_bit_shiftcopy, 4}, {"R_bit_sum", (DL_FUNC) &R_bit_sum, 2}, {"R_bit_vecseq", (DL_FUNC) &R_bit_vecseq, 2}, {"R_bit_which", (DL_FUNC) &R_bit_which, 4}, {"R_bit_xor", (DL_FUNC) &R_bit_xor, 3}, {"first_zero", (DL_FUNC) &first_zero, 1}, {"int_check_ascending", (DL_FUNC) &int_check_ascending, 1}, {"int_check_descending", (DL_FUNC) &int_check_descending, 1}, {"int_rle", (DL_FUNC) &int_rle, 1}, {"r_ram_truly_identical", (DL_FUNC) &r_ram_truly_identical, 2}, {NULL, NULL, 0} }; void R_init_bit(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } bit/src/chunkutil.c0000755000176000001440000000211613302612346014024 0ustar ripleyusers/* # C-Code for chunk utilities # (c) 2012 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2012-12-02 # Last changed: 2012-12-02 */ #include #define USE_RINTERNALS #include SEXP R_bit_vecseq(SEXP x_, SEXP y_) { int *x,*y,*ret; register int val, lim; R_len_t K,k,n,i; SEXP ret_; // if (!isInteger(x_)) // error("x must be an integer vector"); // if (!isInteger(y_)) // error("y must be an integer vector"); K = LENGTH(x_); // if (LENGTH(y_) != K) error("x and y must be the same length"); x = INTEGER(x_); y = INTEGER(y_); n = 0; for (k=0; k=lim){ ret[i++] = val--; } } } UNPROTECT(1); return(ret_); } bit/src/bit.c0000644000176000001440000006154613607366604012620 0ustar ripleyusers/* 1-bit boolean vectors for R first bit is stored in lowest (rightmost) bit of first word remember that rightshifting is dangerous because we use the sign position Copyright 2008 Jens Oehlschlägel Corrections (C) 2020 Brian Ripley */ #include #include #include // Configuration: set this to 32 or 64 and keep in sync with .BITS in bit.R #define BITS 32 // Configuration: set this to BITS-1 #define LASTBIT 31 #define TRUE 1 /* & bitwise and | bitwise or ^ bitwise xor ~ bitwise not */ #if BITS==64 typedef unsigned long long int bitint; #else typedef unsigned int bitint; #endif static bitint *mask0, *mask1; static void bit_init(int bits){ if (bits != BITS) error("R .BITS and C BITS are not in synch"); if (bits-1 != LASTBIT) error("R .BITS and C LASTBIT are not in synch"); mask0 = calloc(BITS, sizeof(bitint)); mask1 = calloc(BITS, sizeof(bitint)); bitint b = 1; int i; for (i=0; i> 1) & mask0[LASTBIT]) >> downshiftrest; /* special treatment of the leftmost bit in downshift to make sure the downshift is filled with zeros */ /* now copy into part using OR */ btarget[target_j] |= bsource[source_j] << upshift; target_j++; for (; source_j> 1) & mask0[LASTBIT]) >> downshiftrest ) | ( bsource[source_j+1] << upshift ); /* special treatment of the leftmost bit in downshift to make sure the downshift is filled with zeros */ } }else{ for (; source_j> 1) & mask0[LASTBIT]) >> (upshift-1)) << upshift; /* & mask0[LASTBIT] */ btarget[target_j1] |= ( ((bsource[source_j1] >> 1) & mask0[LASTBIT]) >> downshiftrest ); /* special treatment of the leftmost bit in downshift to make sure the downshift is filled with zeros */ }else{ btarget[target_j1] = bsource[source_j1]; } } } static void bit_get(bitint *b, int *l, int from, int to, int nb){ from--; to--; register bitint word; register int i=0; register int k=from%BITS; register int j=from/BITS; register int k1=to%BITS; register int j1=to/BITS; if (j < 0 || j >= nb) error("attempting index %d/%d\n", j, nb); if (j1 < 0 || j1 >= nb) error("attempting index %d/%d\n", j, nb); if (j= nb) error("attempting index %d/%d\n", j, nb); if (j1 < 0 || j1 >= nb) error("attempting index %d/%d\n", j, nb); if (jj0){ word = b[j]; for(; k>=0; k--){ if (!(word & mask1[k])) l[h++] = i; i++; } for (j--; j>j0; j--){ word = b[j]; for(k=LASTBIT ;k>=0 ;k--){ if (!(word & mask1[k])) l[h++] = i; i++; } } k = LASTBIT; } if (j==j0){ word = b[j]; for( ;k>=k0 ;k--){ if (!(word & mask1[k])) l[h++] = i; i++; } } } static int bit_extract(bitint *b, int nb, int *i, int *l, int n, int nb_){ register int ii, il, ib, j, k; for (ii = 0, il = 0; ii < n; ii++){ if (i[ii] != 0) { // NA is -ve ib = i[ii]; // This is needed for [.bit; disallowed in R code for [[.bit if (ib == NA_INTEGER) {l[il++] = NA_INTEGER; continue;} ib--; if (ib < nb){ j = ib/BITS; if (j < 0 || j >= nb_) error("attempting index %d/%d\n", j, nb); k = ib%BITS; l[il++] = b[j] & mask1[k] ? 1 : 0; } else l[il++] = NA_INTEGER; } } return il; } // This gave valgrind errors, invalid read and write static void bit_replace(bitint *b, int *i, int *l, int n, int nb){ register int ii, ib, j, k; for (ii=0; ii 0){ // NA indices are not allowed. ib = i[ii] - 1; j = ib/BITS; if (j < 0 || j >= nb) error("attempting index %d/%d\n", j, nb); k = ib%BITS; if (l[ii]==TRUE) b[j] |= mask1[k]; else b[j] &= mask0[k]; } } } static void bit_not(bitint *b, int n){ register int i; for (i=0; i= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; for(; k= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; for(k=0; k= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; for(; k<=k1; k++){ if (word & mask1[k]) s++; } } return s; } static int bit_all(bitint *b, int from, int to, int nb){ from--; to--; register bitint word; register int k=from%BITS; register int j=from/BITS; register int k1=to%BITS; register int j1=to/BITS; if (j= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; for(; k= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; for(; k<=k1; k++){ if (!(word & mask1[k])) return 0; } } return 1; } static int bit_any(bitint *b, int from, int to, int nb){ from--; to--; register bitint word; register int k=from%BITS; register int j=from/BITS; register int k1=to%BITS; register int j1=to/BITS; if (j= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; for(; k= nb) error("attempting index %d/%d\n", j, nb); if(b[j]) return 1; } k=0; } if (j==j1){ if (j < 0 || j >= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; for(; k<=k1; k++){ if(b[j]) return 1; } } return 0; } static int bit_min(bitint *b, int from, int to, int nb){ from--; to--; register bitint word; register int k=from%BITS; register int j=from/BITS; register int k1=to%BITS; register int j1=to/BITS; if (j= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; if(word){ for(; k= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; if (word) for(k=0; k= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; if (word) for(; k<=k1; k++){ if (word & mask1[k]){ return j*BITS+k+1; } } } return NA_INTEGER; } static int bit_max(bitint *b, int from, int to, int nb){ from--; to--; register bitint word; register int k0=from%BITS; register int j0=from/BITS; register int k=to%BITS; register int j=to/BITS; if (j>j0){ if (j < 0 || j >= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; if (word){ for(; k>=0; k--){ if (word & mask1[k]) return j*BITS+k+1; } } for (j--; j>j0; j--){ if (j < 0 || j >= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; if (word){ for(k=LASTBIT; k>=0; k--){ if (word & mask1[k]) return j*BITS+k+1; } } } k=LASTBIT; } if (j==j0){ if (j < 0 || j >= nb) error("attempting index %d/%d\n", j, nb); word = b[j]; if (word){ for(; k>=k0; k--){ if (word & mask1[k]) return j*BITS+k+1; } } } return NA_INTEGER; } /* R interface functions -------------------- */ // this alters btarget SEXP R_bit_shiftcopy( SEXP bsource_ /* bit source */ , SEXP btarget_ /* bit target: assumed FALSE in the target positions and above */ , SEXP otarget_ /* offset target */ , SEXP n_ /* number of bits to copy */ ){ bitint *bsource = (bitint*) INTEGER(bsource_); bitint *btarget = (bitint*) INTEGER(btarget_); int otarget = asInteger(otarget_); int n = asInteger(n_); bit_shiftcopy(bsource, btarget, otarget, n); return(btarget_); } // this alters l_ SEXP R_bit_get(SEXP b_, SEXP l_, SEXP range_){ bitint *b = (bitint*) INTEGER(b_); int *l = LOGICAL(l_); int *range = INTEGER(range_); bit_get(b, l, range[0], range[1], LENGTH(b_)); return(l_); } // this alters l_ SEXP R_bit_get_integer(SEXP b_, SEXP l_, SEXP range_){ bitint *b = (bitint*) INTEGER(b_); int *l = INTEGER(l_); int *range = INTEGER(range_); bit_get(b, l, range[0], range[1], LENGTH(b_)); return(l_); } // this alters b_ SEXP R_bit_set(SEXP b_, SEXP l_, SEXP range_){ bitint *b = (bitint*) INTEGER(b_); int *l = LOGICAL(l_); int *range = INTEGER(range_); bit_set(b, l, range[0], range[1], LENGTH(b_)); return(b_); } // this alters b_ SEXP R_bit_set_integer(SEXP b_, SEXP l_, SEXP range_){ bitint *b = (bitint*) INTEGER(b_); int *l = INTEGER(l_); int *range = INTEGER(range_); bit_set(b, l, range[0], range[1], LENGTH(b_)); return(b_); } SEXP R_bit_which(SEXP b_, SEXP s_, SEXP range_, SEXP negative_){ bitint *b = (bitint*) INTEGER(b_); int *range = INTEGER(range_); int s = asInteger(s_); SEXP ret_; int *ret; if (asLogical(negative_)){ // negative return PROTECT( ret_ = allocVector(INTSXP,s) ); ret = INTEGER(ret_); bit_which_negative(b, ret, range[0], range[1]); }else{ // positive return PROTECT( ret_ = allocVector(INTSXP,s) ); ret = INTEGER(ret_); bit_which_positive(b, ret, range[0], range[1], 0); } UNPROTECT(1); return(ret_); } #define HANDLE_TRUE \ d = i - li; \ li = i; \ if (d==ld){ \ ln++; \ }else{ \ val[c] = ld; \ len[c] = ln; \ s+=ln; \ c++; \ if (c==n2){ \ Free(val); \ Free(len); \ last = NA_INTEGER; j=j1 + 1; break; \ } \ ld = d; \ ln = 1; \ } /* last=0 means aborting rle */ SEXP R_bit_as_hi(SEXP b_, SEXP range_, SEXP offset_) { bitint *b = (bitint*) INTEGER(b_); int *range = INTEGER(range_); int offset = asInteger(offset_); SEXP ret_, first_, dat_, last_, len_, retnames_, rlepackclass_; int protectcount = 0; register bitint word; register int k=(range[0]-1)%BITS; register int j=(range[0]-1)/BITS; int k1=(range[1]-1)%BITS; int j1=(range[1]-1)/BITS; int first = NA_INTEGER; int last = -1; /* setting this to NA_INTEGER means: abort rle */ int c = 0; /* rle position */ register int i = NA_INTEGER; /* position */ register int li = NA_INTEGER; /* last position */ register int d = NA_INTEGER; /* difference */ register int ld = NA_INTEGER; /* last difference */ register int ln = 0; /* counter of last difference */ int s = 1; /* sum of TRUE */ /* begin determine first and first increment d (stored in last_diff ld) */ if (j=3){ /* see function intrle in package ff: max RAM requirement 2x, but rle only if at least 2/3 savings, using 2 instead of 3 would need 50% more time, have max RAM requirement 2.5x for savings of any size NOTE that n is a fuzzy worst case estimate of the number of TRUEs i.e. in some cases we miss the rle abort and use rle although simple positions would cost less RAM */ int *val, *len; int n2 = n / 3; val = Calloc(n2, int); len = Calloc(n2, int); i=first+ld; k=(i+1)%BITS; j=(i+1)/BITS; //Rprintf("first=%d li=%d\n", first, li); /* begin determine increments */ if (j #include // SEXP R_bit_named(SEXP x){ // SEXP ret_; // PROTECT( ret_ = allocVector(INTSXP,1) ); // INTEGER(ret_)[0] = NAMED(x); // UNPROTECT(1); // return ret_; // } SEXP r_ram_truly_identical( SEXP x_ , SEXP y_ ) { SEXP ret_; Rboolean ret; if(!isVectorAtomic(x_)){ error("SEXP is not atomic vector"); return R_NilValue; } if (TYPEOF(x_)!=TYPEOF(y_)){ error("vectors don't have identic type"); return R_NilValue; } //somehow is DATAPTR not declared: ret = DATAPTR(x_)==DATAPTR(y_) ? TRUE : FALSE; switch (TYPEOF(x_)) { case CHARSXP: ret = CHAR(x_)==CHAR(y_) ? TRUE : FALSE; break; case LGLSXP: ret = LOGICAL(x_)==LOGICAL(y_) ? TRUE : FALSE; case INTSXP: ret = INTEGER(x_)==INTEGER(y_) ? TRUE : FALSE; break; case REALSXP: ret = REAL(x_)==REAL(y_) ? TRUE : FALSE; break; case CPLXSXP: ret = COMPLEX(x_)==COMPLEX(y_) ? TRUE : FALSE; break; case STRSXP: ret = STRING_PTR(x_)==STRING_PTR(y_) ? TRUE : FALSE; break; case VECSXP: ret = VECTOR_PTR(x_)==VECTOR_PTR(y_) ? TRUE : FALSE; case RAWSXP: ret = RAW(x_)==RAW(y_) ? TRUE : FALSE; break; default: error("unimplemented type in truly.identical"); return R_NilValue; } if (LENGTH(x_)!=LENGTH(y_)){ ret = FALSE; } PROTECT( ret_ = allocVector(LGLSXP, 1) ); INTEGER(ret_)[0] = ret; UNPROTECT(1); return ret_; } bit/NEWS0000755000176000001440000001745213607365702011604 0ustar ripleyusers CHANGES IN bit VERSION 1.1-15.1 - This version is formally orphaned. - Added C-level sanity checks (which would have detected the problems found in 1.1-15. - Declare as static many functions/variables in bit.c. CHANGES IN bit VERSION 1.1-15 It seems the maintainer (who had been pretty unresponsive for years) has abandoned this so, it has been orphaned. - There were invalid write/read errors reported by valgrind, which was traced to a design error in length<-.bit() -- it could only handle reducing the length. And there was an example in regtest.bit which stretched. - example(regtest.bit) was reporting a failure (but not giving an error). This was because NA indices were not implemented (and disabled by the change in 1.1-14). - Added registration of .Call()s CHANGES IN bit VERSION 1.1-14 BUG FIXES o bit[i] and bit[i]<-v now check for non-positive integers which prevents a segfault when bit[NA] or bit[NA]<-v CHANGES IN bit VERSION 1.1-13 USER VISIBLE CHANGES o logical NA is now mapped to bit FALSE as in ff booleans o extractor function '[.bit' with positive numeric subscripts (integer, double, bitwhich) now behaves like '[.logical' and returns NA for out-of-bound requests and no element for 0 o extractor function '[[.bit' with positive numeric (integer, double, bitwhich) subscripts now behaves like '[[.logical' and throws an error for out-of-bound requests o extractor function '[.bit' with range index subscripts (ri) subscripts now behaves like '[[.bit' and throws an error for out-of-bound requests o assignment functions '[<-.bit' and '[[<-.bit' with positive numeric (integer, double, bitwhich) subscripts now behave like '[<-.logical' and '[[<-.logical' and silently increase vector length if necessary o assignment function '[<-.bit' with range index subscripts (ri) now behaves like '[[<-.bit' and silently increases vector length if necessary o rlepack() is now a generic with a method for class 'integer' o rleunpack() is now a generic with a method for class 'rlepack' o unique.rlepack() now gives correct results for unordered sequences o anyDuplicated.rlepack() now returns the position of the first duplicate and gives correct results for unordered sequences TUNING o The package can now compiled with 64bit words instead of 32bit words, since we only measured a minor speedup, we left 32bit as the default. BUG FIXES o extractor and assignment functions now check for legal (positive) subscript bounds, hence illegally large subscripts or zero no longer cause memory violations CHANGES IN bit VERSION 1.1-12 NEW FEATURES o function still.identical() has been moved to here from package bit64 o generic 'clone' and methods clone.default and clone.list have been moved to here from package ff BUG FIXES o bit[bitwhich] is now subscripting properly (VALGRIND) o UBSAN should no longer complain about left shift of int (although that never was a problem) CHANGES IN bit VERSION 1.1-10 TUNING o function 'vecseq' now calls C-code when calling with the default parameters 'concat=TRUE, eval=TRUE' (wish of Matthew Dowle) BUG FIXES o all.bit no longer ignores TRUE values in the second and following words (spotted by Nelson Chen) CHANGES IN bit VERSION 1.1-9 NEW FEATURES o new function 'repeat.time' for adaptive timing CODE ORGANIZATION o generics for sorting and ordering have been moved from 'ff' to 'bit' CHANGES IN bit VERSION 1.1-7 USER VISIBLE CHANGES o all calls to 'seq.int' have been replaced by 'seq_along' or 'seq_len' o most calls to 'cat' have been replaced by 'message' BUG FIXES o chunk.default now works with chunk(from=2, to=3, by=1) thanks to Edwin de Jonge CHANGES IN bit VERSION 1.1-5 NEW FEATURES o new utility functions setattr() and setattributes() allow to set attributes by reference (unlike attr()<- attributes()<- without copying the object) o new utility unattr() returns copy of input with attributes removed USER VISIBLE CHANGES o certain operations like creating a bit object are even faster now: need half the time and RAM through the use of setattr() instead of attr()<- o [.bit now decorates its logical return vector with attr(,'vmode')='boolean', i.e. we retain the information that there are no NAs. BUG FIXES o .onLoad() no longer calls installed.packages() which substantially improves startup time (thanks to Brian Ripley) CHANGES IN bit VERSION 1.1-2 USER VISIBLE CHANGES o The package now has a namespace CHANGES IN bit VERSION 1.1-1 USER VISIBLE CHANGES o Function 'chunk' has been made generic, the default method provides the previous behavior. o New method to increase length of bitwhich objects. o Added further coercion methods. provides the previous behavior. BUG FIXES o as.bitwhich.ri now generates correct negative subscripts. CHANGES IN bit VERSION 1.1-0 NEW FEATURES o New class 'bitwhich' stores subscript positions in most efficient way: TRUE for all()==TRUE, FALSE for !any()==TRUE. otherwise positive or negative subscripts, whatever needs less RAM. Coercion functions and logical operators are available, the latter being efficient for very asymetric (skewed) distributions: selecting or exlcuding small factions of the data. o New class 'ri' (range index) allows to select ranges of positions for chunked processing: all three classes 'bit', 'bitwhich' and 'ri' can be used for subsetting 'ff' objects (ff-2.1.0 and higher). o New c() method for 'bit' and 'bitwhich' objects which behaves like c(logical). o The bit methods sum(), any(), all(), min(), max(), range(), summary() and which() now support a range argument that allows to restrict the range of evaluation for chunked processing. o New utilities for chunked processing: bbatch, repfromto, chunk, vecseq. USER VISIBLE CHANGES o reducing length of bit objects will now set hidden bits to FALSE, such that subsequent length increase behaves consistent with bit objects that had never been reduced in length: new bits are FALSE o 'which' is no longer turned into a generic. Use 'bitwhich' instead, or, 'as.which' if you need strictly positive subscripts. o 'which.bit' has been renamed to 'as.which.bit'. It no longer has parameter 'negative' and always returns positive subscripts (wish of Stavros Macrakis). It now has second parameter 'range' in order to return subscripts for chunked processing (note that the bitwhich representation is not suitable for chunked processing). In order to facilitate coercion, the return vector of 'as.which' now has class 'which'. o the internal structure of a bit object has been changed to align with ff ram objects: the bitlength of a bit object is no longer stored in attr(bit, "n"), instead in attr(attr(bit, "physical"), "Length"), which is accessible via physical(bit)$Length, but should be accessed usually via length(bit). o the semantics of 'min', 'max' and 'range' have been changed. They now refer to the positions of TRUE in the bit vector (and thus are consistent with bitwhich rather than with logical. The 'summary' method now returns four elements c("FALSE"=, "TRUE"=, "Min."=, "Max."=). BUG FIXES o which.bit no longer returns integer() for a bit vector that has all TRUE KNOWN PROBLEMS / TODOs o NAs are mapped to TRUE in 'bit' and to FALSE in 'ff' booleans. Might be aligned in a future release. Don't use bit if you have NAs - or map NAs explicitely. bit/R/0000755000176000001440000000000013620003030011244 5ustar ripleyusersbit/R/zzz.R0000755000176000001440000000162113072200400012230 0ustar ripleyusers.onLoad <- function(lib, pkg) { ##library.dynam("bit", pkg, lib) use useDynLib(bit) in NAMESPACE instead ##packageStartupMessage("Loading package bit ", packageDescription("bit", fields="Version")) bit_init() } .onAttach <- function(libname, pkgname){ packageStartupMessage("Attaching package bit") packageStartupMessage("package:bit (c) 2008-2012 Jens Oehlschlaegel (GPL-2)") packageStartupMessage("creators: bit bitwhich") packageStartupMessage("coercion: as.logical as.integer as.bit as.bitwhich which") packageStartupMessage("operator: ! & | xor != ==") packageStartupMessage("querying: print length any all min max range sum summary") packageStartupMessage("bit access: length<- [ [<- [[ [[<-") packageStartupMessage("for more help type ?bit") } .onUnload <- function(libpath){ packageStartupMessage("Unloading package bit") bit_done() library.dynam.unload("bit", libpath) } bit/R/attrutil.R0000755000176000001440000001102613264143053013260 0ustar ripleyusers# attribute utilities for ff and bit # (c) 2010 Jens Oehlschlägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2007-08-24 # Last changed: 2007-10-25 # WARNING: these functions are called for the side-effect of changing their arguments # this can save RAM and avoid unnecessary copying # but it can easily have unexpected effects # Only use them if you know what you do - and even then think twice #! \name{setattributes} #! \alias{setattributes} #! \alias{setattr} #! \title{ #! Attribute setting by reference #! } #! \description{ #! Function \code{setattr} sets a singe attribute and function \code{setattributes} sets a list of attributes. #! } #! \usage{ #! setattr(x, which, value) #! setattributes(x, attributes) #! } #! \arguments{ #! \item{x}{ #! } #! \item{which}{ #! name of the attribute #! } #! \item{value}{ #! value of the attribute, use NULL to remove this attribute #! } #! \item{attributes}{ #! a named list of attribute values } #! } #! \details{ #! The attributes of 'x' are changed in place without copying x. function \code{setattributes} does only change the named attributes, it does not delete the non-names attributes like \code{\link{attributes}} does. #! } #! \value{ #! invisible(), we do not return the changed object to remind you of the fact that this function is called for its side-effect of changing its input object. #! } #! \references{ #! Writing R extensions -- System and foreign language interfaces -- Handling R objects in C -- Attributes (Version 2.11.1 (2010-06-03 ) R Development) #! } #! \author{ #! Jens Oehlschlägel #! } #! #! \seealso{ #! \code{\link{attr}} \code{\link{unattr}} #! } #! \examples{ #! x <- as.single(runif(10)) #! attr(x, "Csingle") #! #! f <- function(x)attr(x, "Csingle") <- NULL #! g <- function(x)setattr(x, "Csingle", NULL) #! #! f(x) #! x #! g(x) #! x #! #! \dontrun{ #! #! # restart R #! library(bit) #! #! mysingle <- function(length = 0){ #! ret <- double(length) #! setattr(ret, "Csingle", TRUE) #! ret #! } #! #! # show that mysinge gives exactly the same result as single #! identical(single(10), mysingle(10)) #! #! # look at the speedup and memory-savings of mysingle compared to single #! system.time(mysingle(1e7)) #! memory.size(max=TRUE) #! system.time(single(1e7)) #! memory.size(max=TRUE) #! #! # look at the memory limits #! # on my win32 machine the first line fails beause of not enough RAM, the second works #! x <- single(1e8) #! x <- mysingle(1e8) #! #! # .g. performance with factors #! x <- rep(factor(letters), length.out=1e7) #! x[1:10] #! # look how fast one can do this #! system.time(setattr(x, "levels", rev(letters))) #! x[1:10] #! # look at the performance loss in time caused by the non-needed copying #! system.time(levels(x) <- letters) #! x[1:10] #! #! #! # restart R #! library(bit) #! #! simplefactor <- function(n){ #! factor(rep(1:2, length.out=n)) #! } #! #! mysimplefactor <- function(n){ #! ret <- rep(1:2, length.out=n) #! setattr(ret, "levels", as.character(1:2)) #! setattr(ret, "class", "factor") #! ret #! } #! #! identical(simplefactor(10), mysimplefactor(10)) #! #! system.time(x <- mysimplefactor(1e7)) #! memory.size(max=TRUE) #! system.time(setattr(x, "levels", c("a","b"))) #! memory.size(max=TRUE) #! x[1:4] #! memory.size(max=TRUE) #! rm(x) #! gc() #! #! system.time(x <- simplefactor(1e7)) #! memory.size(max=TRUE) #! system.time(levels(x) <- c("x","y")) #! memory.size(max=TRUE) #! x[1:4] #! memory.size(max=TRUE) #! rm(x) #! gc() #! #! } #! #! } #! \keyword{ attributes } setattr <- function(x, which, value) { .Call("R_bit_set_attr", x, which, value, PACKAGE="bit") invisible() } setattributes <- function(x, attributes) { nam <- names(attributes) for (i in seq_len(length(attributes))){ .Call("R_bit_set_attr", x, nam[[i]], attributes[[i]], PACKAGE="bit") } invisible() } #! \name{unattr} #! \alias{unattr} #! \title{ #! Attribute removal #! } #! \description{ #! Returns object with attributes removed #! } #! \usage{ #! unattr(x) #! } #! \arguments{ #! \item{x}{ #! any R object #! } #! } #! \details{ #! attribute removal copies the object as usual #! } #! \value{ #! a similar object with attributes removed #! } #! \author{ #! Jens Oehlschlägel #! } #! #! \seealso{ #! \code{\link{attributes}}, \code{\link{setattributes}}, \code{\link{unclass}} #! } #! \examples{ #! bit(2)[] #! unattr(bit(2)[]) #! } #! \keyword{attribute} unattr <- function(x){ attributes(x) <- NULL x } bit/R/generics.R0000755000176000001440000002073513264143124013215 0ustar ripleyusers# 1-bit boolean vectors for R # (c) 2008-2009 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # source("D:/mwp/eanalysis/bit/R/generics.R") clone <- function(x, ...)UseMethod("clone") as.bit <- function(x, ...) UseMethod("as.bit", x) as.which <- function (x, ...) UseMethod("as.which") as.bitwhich <- function(x, ...) UseMethod("as.bitwhich") xor <- function(x, y) UseMethod("xor", x) physical <- function(x)UseMethod("physical") "physical<-" <- function(x, value)UseMethod("physical<-") virtual <- function(x)UseMethod("virtual") "virtual<-" <- function(x, value)UseMethod("virtual<-") #! \name{ramsort} #! \alias{ramsort} #! \alias{shellsort} #! \alias{quicksort} #! \alias{mergesort} #! \alias{radixsort} #! \alias{keysort} #! \alias{ramorder} #! \alias{shellorder} #! \alias{quickorder} #! \alias{mergeorder} #! \alias{radixorder} #! \alias{keyorder} #! \alias{ramsortorder} #! \alias{shellsortorder} #! \alias{quicksortorder} #! \alias{mergesortorder} #! \alias{radixsortorder} #! \alias{keysortorder} #! \title{ #! Generics for in-RAM sorting and ordering #! } #! \description{ #! These are generic stubs for low-level sorting and ordering methods implemented in packages #! 'bit64' and 'ff'. #! The \code{..sortorder} methods do sorting and ordering at once, which requires more RAM than ordering but is (almost) as fast as as sorting. #! } #! \usage{ #! ramsort(x, \dots) #! ramorder(x, i, \dots) #! ramsortorder(x, i, \dots) #! mergesort(x, \dots) #! mergeorder(x, i, \dots) #! mergesortorder(x, i, \dots) #! quicksort(x, \dots) #! quickorder(x, i, \dots) #! quicksortorder(x, i, \dots) #! shellsort(x, \dots) #! shellorder(x, i, \dots) #! shellsortorder(x, i, \dots) #! radixsort(x, \dots) #! radixorder(x, i, \dots) #! radixsortorder(x, i, \dots) #! keysort(x, \dots) #! keyorder(x, i, \dots) #! keysortorder(x, i, \dots) #! } #! \arguments{ #! \item{x}{ a vector to be sorted by \code{\link{ramsort}} and \code{\link{ramsortorder}}, i.e. the output of \code{\link{sort}} } #! \item{i}{ integer positions to be modified by \code{\link{ramorder}} and \code{\link{ramsortorder}}, default is 1:n, in this case the output is similar to \code{\link{order}} } #! \item{\dots}{ further arguments to the sorting methods } #! } #! \details{ #! The \code{sort} generics do sort their argument 'x', some methods need temporary RAM of the same size as 'x'. #! The \code{order} generics do order their argument 'i' leaving 'x' as it was, #! some methods need temporary RAM of the same size as 'i'. #! The \code{sortorder} generics do sort their argument 'x' and order their argument 'i', #! this way of ordering is much faster at the price of requiring temporary RAM for both, #! 'x' and 'i', if the method requires temporary RAM. #! The \code{ram} generics are high-level functions containing an optimizer that chooses the 'best' algorithms given some context. #! } #! \section{Index of implemented methods}{ #! \tabular{rrl}{ #! \bold{generic} \tab \bold{ff} \tab \bold{bit64} \cr #! \code{ramsort} \tab \code{\link[ff]{ramsort.default}} \tab \code{\link[bit64]{ramsort.integer64}} \cr #! \code{shellsort} \tab \code{\link[ff]{shellsort.default}} \tab \code{\link[bit64]{shellsort.integer64}} \cr #! \code{quicksort} \tab \tab \code{\link[bit64]{quicksort.integer64}} \cr #! \code{mergesort} \tab \code{\link[ff]{mergesort.default}} \tab \code{\link[bit64]{mergesort.integer64}} \cr #! \code{radixsort} \tab \code{\link[ff]{radixsort.default}} \tab \code{\link[bit64]{radixsort.integer64}} \cr #! \code{keysort} \tab \code{\link[ff]{keysort.default}} \tab \cr #! \cr #! \bold{generic} \tab \bold{ff} \tab \bold{bit64} \cr #! \code{ramorder} \tab \code{\link[ff]{ramorder.default}} \tab \code{\link[bit64]{ramorder.integer64}} \cr #! \code{shellorder} \tab \code{\link[ff]{shellorder.default}} \tab \code{\link[bit64]{shellorder.integer64}} \cr #! \code{quickorder} \tab \tab \code{\link[bit64]{quickorder.integer64}} \cr #! \code{mergeorder} \tab \code{\link[ff]{mergeorder.default}} \tab \code{\link[bit64]{mergeorder.integer64}} \cr #! \code{radixorder} \tab \code{\link[ff]{radixorder.default}} \tab \code{\link[bit64]{radixorder.integer64}} \cr #! \code{keyorder} \tab \code{\link[ff]{keyorder.default}} \tab \cr #! \cr #! \bold{generic} \tab \bold{ff} \tab \bold{bit64} \cr #! \code{ramsortorder} \tab \tab \code{\link[bit64]{ramsortorder.integer64}} \cr #! \code{shellsortorder} \tab \tab \code{\link[bit64]{shellsortorder.integer64}} \cr #! \code{quicksortorder} \tab \tab \code{\link[bit64]{quicksortorder.integer64}} \cr #! \code{mergesortorder} \tab \tab \code{\link[bit64]{mergesortorder.integer64}} \cr #! \code{radixsortorder} \tab \tab \code{\link[bit64]{radixsortorder.integer64}} \cr #! \code{keysortorder} \tab \tab \cr #! } #! } #! \note{ #! Note that these methods purposely violate the functional programming paradigm: they are called for the side-effect of changing some of their arguments. #! The rationale behind this is that sorting is very RAM-intensive and in certain #! situations we might not want to allocate additional memory if not necessary to do so. #! The \code{sort}-methods change \code{x}, the \code{order}-methods change \code{i}, and the \code{sortoder}-methods change both \code{x} and \code{i} #! You as the user are responsible to create copies of the input data 'x' and 'i' #! if you need non-modified versions. #! } #! \value{ #! These functions return the number of \code{NAs} found or assumed during sorting #! } #! \author{ #! Jens Oehlschlägel #! } #! \keyword{univar} #! \keyword{manip} #! \keyword{arith} #! \seealso{ \code{\link{sort}} and \code{\link{order}} in base R} ramsort <- function(x, ...)UseMethod("ramsort") ramorder <- function(x, i, ...)UseMethod("ramorder") ramsortorder <- function(x, i, ...)UseMethod("ramsortorder") mergesort <- function(x, ...)UseMethod("mergesort") mergeorder <- function(x, i, ...)UseMethod("mergeorder") mergesortorder <- function(x, i, ...)UseMethod("mergesortorder") quicksort <- function(x, ...)UseMethod("quicksort") quickorder <- function(x, i, ...)UseMethod("quickorder") quicksortorder <- function(x, i, ...)UseMethod("quicksortorder") shellsort <- function(x, ...)UseMethod("shellsort") shellorder <- function(x, i, ...)UseMethod("shellorder") shellsortorder <- function(x, i, ...)UseMethod("shellsortorder") radixsort <- function(x, ...)UseMethod("radixsort") radixorder <- function(x, i, ...)UseMethod("radixorder") radixsortorder <- function(x, i, ...)UseMethod("radixsortorder") keysort <- function(x, ...)UseMethod("keysort") keyorder <- function(x, i, ...)UseMethod("keyorder") keysortorder <- function(x, i, ...)UseMethod("keysortorder") #! \name{is.sorted} #! \alias{is.sorted} #! \alias{na.count} #! \alias{nvalid} #! \alias{nunique} #! \alias{nties} #! \alias{is.sorted<-} #! \alias{na.count<-} #! \alias{nunique<-} #! \alias{nties<-} #! \title{ #! Generics related to cache access #! } #! \description{ #! These generics are packaged here for methods in packages \code{bit64} and \code{ff}. #! } #! \usage{ #! is.sorted(x, \dots) #! is.sorted(x, \dots) <- value #! na.count(x, \dots) #! na.count(x, \dots) <- value #! nvalid(x, \dots) #! nunique(x, \dots) #! nunique(x, \dots) <- value #! nties(x, \dots) #! nties(x, \dots) <- value #! } #! \arguments{ #! \item{x}{ #! some object #! } #! \item{value}{ #! value assigned on responsibility of the user #! } #! \item{\dots}{ #! ignored #! } #! } #! \details{ #! see help of the available methods #! } #! \value{ #! see help of the available methods #! } #! \author{ #! Jens Oehlschlägel #! } #! \seealso{ #! \code{\link[bit64]{is.sorted.integer64}}, \code{\link[bit64]{na.count.integer64}}, \code{\link[bit64]{nvalid.integer64}}, \code{\link[bit64]{nunique.integer64}}, \code{\link[bit64]{nties.integer64}} \cr #! } #! \examples{ #! methods("na.count") #! } #! \keyword{ environment } #! \keyword{ methods } is.sorted <- function(x, ...)UseMethod("is.sorted") "is.sorted<-" <- function(x, ..., value)UseMethod("is.sorted<-") na.count <- function(x, ...)UseMethod("na.count") "na.count<-" <- function(x, ..., value)UseMethod("na.count<-") nvalid <- function(x, ...)UseMethod("nvalid") nunique <- function(x, ...)UseMethod("nunique") "nunique<-" <- function(x, ..., value)UseMethod("nunique<-") nties <- function(x, ...)UseMethod("nties") "nties<-" <- function(x, ..., value)UseMethod("nties<-") bit/R/rle.R0000755000176000001440000001563113264143137012203 0ustar ripleyusers# rle utilities for bit and ff # (c) 2007-2009 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2007-09-03 # Last changed: 2007-10-25 # source("D:/mwp/eanalysis/bit/R/rle.R") #! \name{intrle} #! \alias{intrle} #! \alias{intisasc} #! \alias{intisdesc} #! \title{ Hybrid Index, C-coded utilities } #! \description{ #! These C-coded utilitites speed up index preprocessing considerably #! } #! \usage{ #! intrle(x) #! intisasc(x) #! intisdesc(x) #! } #! \arguments{ #! \item{x}{ an integer vector } #! } #! \details{ #! \code{intrle} is by factor 50 faster and needs less RAM (2x its input vector) compared to \code{\link{rle}} which needs 9x the RAM of its input vector. #! This is achieved because we allow the C-code of \code{intrle} to break when it turns out, that rle-packing will not achieve a #! compression factor of 3 or better. #! \cr #! \code{intisasc} is a faster version of \code{\link{is.unsorted}}: it checks whether \code{x} is sorted and returns NA \code{x} contains NAs. #! \cr #! \code{intisdesc} checks for being sorted descending and assumes that the input \code{x} contains no NAs (is used after \code{intisasc} and does not check for NAs). #! } #! \value{ #! \code{intrle} returns an object of class \code{\link{rle}} or NULL, if rle-compression is not efficient (compression factor <3 or length(x)<3). #! \cr #! \code{intisasc} returns one of \code{FALSE, NA, TRUE} #! \cr #! \code{intisdesc} returns one of \code{FALSE, TRUE} (if the input contains NAs, the output is undefined) #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link[ff]{hi}}, \code{\link{rle}}, \code{\link{is.unsorted}}, \code{\link[ff]{is.sorted}} } #! \examples{ #! intrle(sample(1:100)) #! intrle(diff(1:100)) #! intisasc(1:100) #! intisasc(100:1) #! intisasc(c(NA, 1:100)) #! intisdesc(1:100) #! intisdesc(100:1) #! } #! #! \keyword{ IO } #! \keyword{ data } # -- check for sorting and NAs, 0s can be checked later when sorted ------------------ # NA=NAs FALSE=NotAscending TRUE=OK intisasc <- function(x){ stopifnot(is.integer(x)) .Call("int_check_ascending", x, PACKAGE="bit") } # FALSE=NotDescending TRUE=OK (assumes no NAs, i.e. need to use intisasc first) intisdesc <- function(x){ stopifnot(is.integer(x)) .Call("int_check_descending", x, PACKAGE="bit") } # -- fast and efficient rle ------------------ # integer only # returns rle object only if n>2 && rle is efficient (length(values)+lengths(lengths))<=length(x) # returns NULL if n<3 || rle is inefficient intrle <- function(x){ stopifnot(is.integer(x)) .Call("int_rle", x, PACKAGE="bit") } # -- basic sequence packing and unpacking --------------------------------------------------- #! \name{rlepack} #! \alias{rlepack} #! \alias{rlepack.integer} #! \alias{rleunpack} #! \alias{rleunpack.rlepack} #! \alias{rev.rlepack} #! \alias{unique.rlepack} #! \alias{anyDuplicated.rlepack} #! \title{ Hybrid Index, rle-pack utilities } #! \description{ #! Basic utilities for rle packing and unpacking and apropriate methods for \code{\link{rev}} and \code{\link{unique}}. #! } #! \usage{ #! rlepack(x, \dots) #! \method{rlepack}{integer}(x, pack = TRUE, \dots) #! rleunpack(x) #! \method{rleunpack}{rlepack}(x) #! \method{rev}{rlepack}(x) #! \method{unique}{rlepack}(x, incomparables = FALSE, \dots) #! \method{anyDuplicated}{rlepack}(x, incomparables = FALSE, \dots) #! } #! \arguments{ #! \item{x}{ in 'rlepack' an integer vector, in the other functions an object of class 'rlepack'} #! \item{pack}{ FALSE to suppress packing } #! \item{incomparables}{ just to keep R CMD CHECK quiet (not used) } #! \item{\dots}{ just to keep R CMD CHECK quiet (not used) } #! } #! \value{ #! A list with components #! \item{ first }{ the first element of the packed sequence } #! \item{ dat }{ either an object of class \code{\link{rle}} or the complete input vector \code{x} if rle-packing is not efficient } #! \item{ last }{ the last element of the packed sequence } #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link[ff]{hi}}, \code{\link{intrle}}, \code{\link{rle}}, \code{\link{rev}}, \code{\link{unique}} } #! \examples{ #! x <- rlepack(rep(0L, 10)) #!\dontshow{ #! for (x in list(10:1, 1:10, c(10:1,1:10), c(1:10,10:1), sample(100), sample(100, 100, TRUE), sample(10, 100, TRUE))){ #! stopifnot(identical(rleunpack(rlepack(x)), x)) #! stopifnot(identical(rleunpack(unique(rlepack(x))), unique(x))) #! stopifnot(identical(anyDuplicated(rlepack(x)), anyDuplicated(x))) #! } #!} #! } #! \keyword{ IO } #! \keyword{ data } #setOldClass("rlepack") rlepack <- function(x, ...) UseMethod("rlepack") rlepack.integer <- function( x , pack = TRUE # TRUE / FALSE , ... # dummy to keep R CMD check quiet ){ stopifnot(is.integer(x)) n <- length(x) if (n>1){ if (pack) r <- intrle(diff(x)) # returns NULL if rle is inefficient, old condition was 2*length(r$lengths)1L) sum(x$dat$lengths[1:(w-1L)]) + 2L else 2L else 0L } else 0L }else{ anyDuplicated(x$dat) } } bit/R/clone.R0000755000176000001440000000335113264143112012506 0ustar ripleyusers# clone utilities for bit,bit64,ff # (c) 2014 Jens Oehlschlägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2014-03-02 #! \name{clone} #! \alias{clone} #! \alias{clone.list} #! \alias{clone.default} #! \alias{still.identical} #! \title{ Cloning ff and ram objects } #! \description{ #! \command{clone} physically duplicates objects and can additionally change some features, e.g. length. #! } #! \usage{ #! clone(x, \dots) #! \method{clone}{list}(x, \dots) #! \method{clone}{default}(x, \dots) #! still.identical(x, y) #! } #! \arguments{ #! \item{x}{ \code{x} } #! \item{y}{ \code{y} } #! \item{\dots}{ further arguments to the generic } #! } #! \details{ #! \command{clone} is generic. #! \command{clone.default} currently only handles atomics. #! \command{clone.list} recursively clones list elements. #! \command{still.identical} returns TRUE if the two atomic arguments still point to the same memory. #! } #! \value{ #! an object that is a deep copy of x #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link[ff]{clone.ff}} } #! \examples{ #! x <- 1:12 #! y <- x #! still.identical(x,y) #! y[1] <- y[1] #! still.identical(x,y) #! y <- clone(x) #! still.identical(x,y) #! rm(x,y); gc() #! } #! \keyword{ IO } #! \keyword{ data } # named <- function(x) # .Call("R_bit_named", x, PACKAGE="bit") still.identical <- function(x, y){ .Call("r_ram_truly_identical", x = x, y = y, PACKAGE = "bit") } clone.default <- function(x , ... # passed to clone ){ if (is.atomic(x)){ if (length(x)) x[1] <- x[1] # force a copy around COPY ON MODIFY x }else{ stop("clone not defined for type") } } clone.list <- function(x , ... # passed to clone ){ lapply(x, clone, ...) } bit/R/bit.R0000755000176000001440000023566413620002774012206 0ustar ripleyusers# 1-bit boolean vectors for R # (c) 2008-2009 Jens Oehlschägel # (C) 2020 Brian Ripley # Licence: GPL2 # Provided 'as is', use at your own risk # currently |.bit and |.bitwhich are bypassed if we ask for bit | bitwhich # xx explore/write Ops.bit Ops.bitwhich # xx bit_extract should be comlemented with # source("C:/mwp/eanalysis/bit/R/bit.R") # Configuration: set this to 32L or 64L and keep in sync with BITS in bit.c .BITS <- 32L #! \name{bit-package} #! \alias{bit-package} #! \alias{bit} #! \alias{print.bit} #! \docType{package} #! \title{ #! A class for vectors of 1-bit booleans #! } #! \description{ #! Package 'bit' provides bitmapped vectors of booleans (no NAs), #! coercion from and to logicals, integers and integer subscripts; #! fast boolean operators and fast summary statistics. \cr #! #! With bit vectors you can store true binary booleans \{FALSE,TRUE\} at the expense #! of 1 bit only, on a 32 bit architecture this means factor 32 less RAM and #! factor 32 more speed on boolean operations. With this speed gain it even #! pays-off to convert to bit in order to avoid a single boolean operation on #! logicals or a single set operation on (longer) integer subscripts, the pay-off #! is dramatic when such components are used more than once. \cr #! #! Reading from and writing to bit is approximately as fast as accessing standard #! logicals - mostly due to R's time for memory allocation. The package allows to #! work with pre-allocated memory for return values by calling .Call() directly: #! when evaluating the speed of C-access with pre-allocated vector memory, coping #! from bit to logical requires only 70\% of the time for copying from logical to #! logical; and copying from logical to bit comes at a performance penalty of 150\%. \cr #! #! Since bit objects cannot be used as subsripts in R, a second class 'bitwhich' #! allows to store selections as efficiently as possible with standard R types. #! This is usefull either to represent parts of bit objects or to represent #! very asymetric selections. \cr #! #! Class 'ri' (range index) allows to select ranges of positions for chunked processing: #! all three classes 'bit', 'bitwhich' and 'ri' can be used for subsetting 'ff' objects (ff-2.1.0 and higher). #! } #! \usage{ #! bit(length) #! \method{print}{bit}(x, \dots) #! } #! \arguments{ #! \item{length}{ length of vector in bits } #! \item{x}{ a bit vector } #! \item{\dots}{ further arguments to print } #! } #! \details{ #! \tabular{ll}{ #! Package: \tab bit\cr #! Type: \tab Package\cr #! Version: \tab 1.1.0\cr #! Date: \tab 2012-06-05\cr #! License: \tab GPL-2\cr #! LazyLoad: \tab yes\cr #! Encoding: \tab latin1\cr #! } #! #! Index: #! \tabular{rrrrl}{ #! \bold{bit function} \tab \bold{bitwhich function} \tab \bold{ri function} \tab \bold{see also} \tab \bold{description} \cr #! \code{.BITS} \tab \tab \tab \code{\link{globalenv}} \tab variable holding number of bits on this system \cr #! \code{\link{bit_init}} \tab \tab \tab \code{\link{.First.lib}} \tab initially allocate bit-masks (done in .First.lib) \cr #! \code{\link{bit_done}} \tab \tab \tab \code{\link{.Last.lib}} \tab finally de-allocate bit-masks (done in .Last.lib) \cr #! \code{\link{bit}} \tab \code{\link{bitwhich}} \tab \code{\link{ri}} \tab \code{\link{logical}} \tab create bit object \cr #! \code{\link{print.bit}} \tab \code{\link{print.bitwhich}} \tab \code{\link{print.ri}} \tab \code{\link{print}} \tab print bit vector \cr #! \code{\link{length.bit}} \tab \code{\link{length.bitwhich}} \tab \code{\link{length.ri}} \tab \code{\link{length}} \tab get length of bit vector \cr #! \code{\link{length<-.bit}} \tab \code{\link{length<-.bitwhich}} \tab \tab \code{\link{length<-}} \tab change length of bit vector \cr #! \code{\link{c.bit}} \tab \code{\link{c.bitwhich}} \tab \tab \code{\link{c}} \tab concatenate bit vectors \cr #! \code{\link{is.bit}} \tab \code{\link{is.bitwhich}} \tab \code{\link{is.ri}} \tab \code{\link{is.logical}} \tab test for bit class \cr #! \code{\link{as.bit}} \tab \code{\link{as.bitwhich}} \tab \tab \code{\link{as.logical}} \tab generically coerce to bit or bitwhich \cr #! \code{\link{as.bit.logical}} \tab \code{\link{as.bitwhich.logical}} \tab \tab \code{\link{logical}} \tab coerce logical to bit vector (FALSE => FALSE, c(NA, TRUE) => TRUE) \cr #! \code{\link{as.bit.integer}} \tab \code{\link{as.bitwhich.integer}} \tab \tab \code{\link{integer}} \tab coerce integer to bit vector (0 => FALSE, ELSE => TRUE) \cr #! \code{\link{as.bit.double}} \tab \code{\link{as.bitwhich.double}} \tab \tab \code{\link{double}} \tab coerce double to bit vector (0 => FALSE, ELSE => TRUE) \cr #! \code{\link{as.double.bit}} \tab \code{\link{as.double.bitwhich}} \tab \code{\link{as.double.ri}} \tab \code{\link{as.double}} \tab coerce bit vector to double (0/1) \cr #! \code{\link{as.integer.bit}} \tab \code{\link{as.integer.bitwhich}} \tab \code{\link{as.integer.ri}} \tab \code{\link{as.integer}} \tab coerce bit vector to integer (0L/1L) \cr #! \code{\link{as.logical.bit}} \tab \code{\link{as.logical.bitwhich}} \tab \code{\link{as.logical.ri}} \tab \code{\link{as.logical}} \tab coerce bit vector to logical (FALSE/TRUE) \cr #! \code{\link{as.which.bit}} \tab \code{\link{as.which.bitwhich}} \tab \code{\link{as.which.ri}} \tab \code{\link{as.which}} \tab coerce bit vector to positive integer subscripts\cr #! \code{\link{as.bit.which}} \tab \code{\link{as.bitwhich.which}} \tab \tab \code{\link{bitwhich}} \tab coerce integer subscripts to bit vector \cr #! \code{\link{as.bit.bitwhich}} \tab \code{\link{as.bitwhich.bitwhich}}\tab \tab \tab coerce from bitwhich \cr #! \code{\link{as.bit.bit}} \tab \code{\link{as.bitwhich.bit}} \tab \tab \code{\link{UseMethod}} \tab coerce from bit \cr #! \code{\link{as.bit.ri}} \tab \code{\link{as.bitwhich.ri}} \tab \tab \tab coerce from range index \cr #! \code{\link[ff]{as.bit.ff}} \tab \tab \tab \code{\link[ff]{ff}} \tab coerce ff boolean to bit vector \cr #! \code{\link[ff]{as.ff.bit}} \tab \tab \tab \code{\link[ff]{as.ff}} \tab coerce bit vector to ff boolean \cr #! \code{\link[ff]{as.hi.bit}} \tab \code{\link[ff]{as.hi.bitwhich}} \tab \code{\link[ff]{as.hi.ri}} \tab \code{\link[ff]{as.hi}} \tab coerce to hybrid index (requires package ff) \cr #! \code{\link[ff]{as.bit.hi}} \tab \code{\link[ff]{as.bitwhich.hi}} \tab \tab \tab coerce from hybrid index (requires package ff) \cr #! \code{\link{[[.bit}} \tab \tab \tab \code{\link{[[}} \tab get single bit (index checked) \cr #! \code{\link{[[<-.bit}} \tab \tab \tab \code{\link{[[<-}} \tab set single bit (index checked) \cr #! \code{\link{[.bit}} \tab \tab \tab \code{\link{[}} \tab get vector of bits (unchecked) \cr #! \code{\link{[<-.bit}} \tab \tab \tab \code{\link{[<-}} \tab set vector of bits (unchecked) \cr #! \code{\link{!.bit}} \tab \code{\link{!.bitwhich}} \tab (works as second arg in \tab \code{\link{!}} \tab boolean NOT on bit \cr #! \code{\link{&.bit}} \tab \code{\link{&.bitwhich}} \tab bit and bitwhich ops) \tab \code{\link{&}} \tab boolean AND on bit \cr #! \code{\link{|.bit}} \tab \code{\link{|.bitwhich}} \tab \tab \code{\link{|}} \tab boolean OR on bit \cr #! \code{\link{xor.bit}} \tab \code{\link{xor.bitwhich}} \tab \tab \code{\link{xor}} \tab boolean XOR on bit \cr #! \code{\link{!=.bit}} \tab \code{\link{!=.bitwhich}} \tab \tab \code{\link{!=}} \tab boolean unequality (same as XOR) \cr #! \code{\link{==.bit}} \tab \code{\link{==.bitwhich}} \tab \tab \code{\link{==}} \tab boolean equality \cr #! \code{\link{all.bit}} \tab \code{\link{all.bitwhich}} \tab \code{\link{all.ri}} \tab \code{\link{all}} \tab aggregate AND \cr #! \code{\link{any.bit}} \tab \code{\link{any.bitwhich}} \tab \code{\link{any.ri}} \tab \code{\link{any}} \tab aggregate OR \cr #! \code{\link{min.bit}} \tab \code{\link{min.bitwhich}} \tab \code{\link{min.ri}} \tab \code{\link{min}} \tab aggregate MIN (first TRUE position) \cr #! \code{\link{max.bit}} \tab \code{\link{max.bitwhich}} \tab \code{\link{max.ri}} \tab \code{\link{max}} \tab aggregate MAX (last TRUE position) \cr #! \code{\link{range.bit}} \tab \code{\link{range.bitwhich}} \tab \code{\link{range.ri}} \tab \code{\link{range}} \tab aggregate [MIN,MAX] \cr #! \code{\link{sum.bit}} \tab \code{\link{sum.bitwhich}} \tab \code{\link{sum.ri}} \tab \code{\link{sum}} \tab aggregate SUM (count of TRUE) \cr #! \code{\link{summary.bit}} \tab \code{\link{summary.bitwhich}} \tab \code{\link{summary.ri}} \tab \code{\link{tabulate}} \tab aggregate c(nFALSE, nTRUE, minRange, maxRange) \cr #! \code{\link{regtest.bit}} \tab \tab \tab \tab regressiontests for the package \cr #! } #! } #! \value{ #! \code{bit} returns a vector of integer sufficiently long to store 'length' bits #! (but not longer) with an attribute 'n' and class 'bit' #! } #! \author{ #! Jens Oehlschlägel #! #! Maintainer: Jens Oehlschlägel #! } #! \note{ #! Currently operations on bit objects have some overhead from R-calls. Do expect speed gains for vectors #! of length ~ 10000 or longer. \cr #! Since this package was created for high performance purposes, only positive integer subscripts are allowed: #! All R-functions behave as expected - i.e. they do not change their arguments and create new return values. #! If you want to save the time for return value memory allocation, you must use \code{\link{.Call}} directly #! (see the dontrun example in \code{\link{sum.bit}}). #! } #! \keyword{ package } #! \keyword{ classes } #! \keyword{ logic } #! \seealso{ \code{\link{logical}} in base R and \code{\link[ff]{vmode}} in package 'ff' } #! \examples{ #! x <- bit(12) # create bit vector #! x # autoprint bit vector #! length(x) <- 16 # change length #! length(x) # get length #! x[[2]] # extract single element #! x[[2]] <- TRUE # replace single element #! x[1:2] # extract parts of bit vector #! x[1:2] <- TRUE # replace parts of bit vector #! as.which(x) # coerce bit to subscripts #! x <- as.bit.which(3:4, 4) # coerce subscripts to bit #! as.logical(x) # coerce bit to logical #! y <- as.bit(c(FALSE, TRUE, FALSE, TRUE)) # coerce logical to bit #! is.bit(y) # test for bit #! !x # boolean NOT #! x & y # boolean AND #! x | y # boolean OR #! xor(x, y) # boolean Exclusive OR #! x != y # boolean unequality (same as xor) #! x == y # boolean equality #! all(x) # aggregate AND #! any(x) # aggregate OR #! min(x) # aggregate MIN (integer version of ALL) #! max(x) # aggregate MAX (integer version of ANY) #! range(x) # aggregate [MIN,MAX] #! sum(x) # aggregate SUM (count of TRUE) #! summary(x) # aggregate count of FALSE and TRUE #! #! \dontrun{ #! message("\nEven for a single boolean operation transforming logical to bit pays off") #! n <- 10000000 #! x <- sample(c(FALSE, TRUE), n, TRUE) #! y <- sample(c(FALSE, TRUE), n, TRUE) #! system.time(x|y) #! system.time({ #! x <- as.bit(x) #! y <- as.bit(y) #! }) #! system.time( z <- x | y ) #! system.time( as.logical(z) ) #! message("Even more so if multiple operations are needed :-)") #! #! message("\nEven for a single set operation transforming subscripts to bit pays off\n") #! n <- 10000000 #! x <- sample(n, n/2) #! y <- sample(n, n/2) #! system.time( union(x,y) ) #! system.time({ #! x <- as.bit.which(x, n) #! y <- as.bit.which(y, n) #! }) #! system.time( as.which.bit( x | y ) ) #! message("Even more so if multiple operations are needed :-)") #! #! message("\nSome timings WITH memory allocation") #! n <- 2000000 #! l <- sample(c(FALSE, TRUE), n, TRUE) #! # copy logical to logical #! system.time(for(i in 1:100){ # 0.0112 #! l2 <- l #! l2[1] <- TRUE # force new memory allocation (copy on modify) #! rm(l2) #! })/100 #! # copy logical to bit #! system.time(for(i in 1:100){ # 0.0123 #! b <- as.bit(l) #! rm(b) #! })/100 #! # copy bit to logical #! b <- as.bit(l) #! system.time(for(i in 1:100){ # 0.009 #! l2 <- as.logical(b) #! rm(l2) #! })/100 #! # copy bit to bit #! b <- as.bit(l) #! system.time(for(i in 1:100){ # 0.009 #! b2 <- b #! b2[1] <- TRUE # force new memory allocation (copy on modify) #! rm(b2) #! })/100 #! #! #! l2 <- l #! # replace logical by TRUE #! system.time(for(i in 1:100){ #! l[] <- TRUE #! })/100 #! # replace bit by TRUE (NOTE that we recycle the assignment #! # value on R side == memory allocation and assignment first) #! system.time(for(i in 1:100){ #! b[] <- TRUE #! })/100 #! # THUS the following is faster #! system.time(for(i in 1:100){ #! b <- !bit(n) #! })/100 #! #! # replace logical by logical #! system.time(for(i in 1:100){ #! l[] <- l2 #! })/100 #! # replace bit by logical #! system.time(for(i in 1:100){ #! b[] <- l2 #! })/100 #! # extract logical #! system.time(for(i in 1:100){ #! l2[] #! })/100 #! # extract bit #! system.time(for(i in 1:100){ #! b[] #! })/100 #! #! message("\nSome timings WITHOUT memory allocation (Serge, that's for you)") #! n <- 2000000L #! l <- sample(c(FALSE, TRUE), n, TRUE) #! b <- as.bit(l) #! # read from logical, write to logical #! l2 <- logical(n) #! system.time(for(i in 1:100).Call("R_filter_getset", l, l2, PACKAGE="bit")) / 100 #! # read from bit, write to logical #! l2 <- logical(n) #! system.time(for(i in 1:100).Call("R_bit_get", b, l2, c(1L, n), PACKAGE="bit")) / 100 #! # read from logical, write to bit #! system.time(for(i in 1:100).Call("R_bit_set", b, l2, c(1L, n), PACKAGE="bit")) / 100 #! #! } #! } #! \name{bit_init} #! \alias{bit_init} #! \alias{bit_done} #! \alias{.BITS} #! \title{ Initializing bit masks } #! \description{ #! Functions to allocate (and de-allocate) bit masks #! } #! \usage{ #! bit_init() #! bit_done() #! } #! \details{ #! The C-code operates with bit masks. #! The memory for these is allocated dynamically. #! \code{bit_init} is called by \code{\link{.First.lib}} #! and \code{bit_done} is called by \code{\link{.Last.lib}}. #! You don't need to care about these under normal circumstances. #! } #! \value{ #! NULL #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{bit}} } #! \examples{ #! bit_done() #! bit_init() #! } #! \keyword{ classes } #! \keyword{ logic } # initialize and finalize the bit-mask vectors used in C bit_init <- function() .Call("R_bit_init", .BITS, PACKAGE="bit") bit_done <- function() .Call("R_bit_done", PACKAGE="bit") # creator for empty bit vector bit <- function(length){ length <- as.integer(length) if (length %% .BITS) n <- length %/% .BITS + 1L else n <- length %/% .BITS if (.BITS==64L) x <- integer(2L*n) else x <- integer(n) #physical(x) <- list(vmode="boolean") #virtual(x) <- list(Length=length) #class(x) <- "bit" # tuning p <- list() v <- list() attributes(p) <- list(vmode="boolean", class="physical") attributes(v) <- list(Length=length, class="virtual") attributes(x) <- list(physical=p, virtual=v, class="bit") x } print.bit <- function(x, ...){ n <- length(x) cat("bit length=", n, " occupying only ", length(unclass(x)), " integers\n", sep="") if (n>16){ y <- c(x[1:8], "..", x[(n-7L):n]) names(y) <- c(1:8, "", (n-7L):n) print(y, quote=FALSE, ...) }else if(n){ y <- c(x[]) names(y) <- c(1:n) print(y, quote=FALSE, ...) } } #! \name{bitwhich} #! \alias{bitwhich} #! \alias{print.bitwhich} #! \title{ A class for vectors representing asymetric selections } #! \description{ #! A bitwhich object like the result of \code{\link{which}} and \code{\link{as.which}} does represent integer subscript positions, #! but bitwhich objects represent some subscripts rather with negative integers, if this needs less space. #! The extreme cases of selecting all/none subscripts are represented by TRUE/FALSE. #! This needs less RAM compared to \code{\link{logical}} (and often less than \code{\link{as.which}}). #! Logical operations are fast if the selection is asymetric (only few or almost all selected). #! } #! \usage{ #! bitwhich(maxindex, poslength = NULL, x = NULL) #! } #! \arguments{ #! \item{maxindex}{ the length of the vector (sum of all TRUEs and FALSEs) } #! \item{poslength}{ Only use if x is not NULL: the sum of all TRUEs } #! \item{x}{ Default NULL or FALSE or unique negative integers or unique positive integers or TRUE} #! } #! \value{ #! An object of class 'bitwhich' carrying two attributes #! \item{maxindex}{ see above } #! \item{poslength}{ see above } #! } #! \details{ #! class 'bitwhich' represents a boolean selection in one of the following ways #! \itemize{ #! \item FALSE to select nothing #! \item TRUE to select everything #! \item unique positive integers to select those #! \item unique negative integers to exclude those #! } #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{as.bitwhich}}, \code{\link{as.which}}, \code{\link{bit}} } #! \examples{ #! bitwhich(12, x=c(1,3), poslength=2) #! bitwhich(12, x=-c(1,3), poslength=10) #! } #! \keyword{ classes } #! \keyword{ logic } bitwhich <- function(maxindex, poslength=NULL, x=NULL){ if (is.null(x)){ x <- FALSE poslength <- 0L }else{ poslength <- as.integer(poslength) } attr(x, "maxindex") <- as.integer(maxindex) attr(x, "poslength") <- poslength # NOTE: here we want one (1) copy of x to not modify argument x # therefore we did not replace the oldClass assignment with a call to setttattr oldClass(x) <- "bitwhich" x } print.bitwhich <- function(x, ...){ cat("bitwhich: ", sum(x), "/", length(x), "\n", sep="") } #! \name{is.bit} #! \alias{is.ri} #! \alias{is.bit} #! \alias{is.bitwhich} #! \title{ Testing for bit, bitwhich and ri selection classes } #! \description{ #! Test whether an object inherits from 'ri', 'bit' or 'bitwhich' #! } #! \usage{ #! is.ri(x) #! is.bit(x) #! is.bitwhich(x) #! } #! \arguments{ #! \item{x}{ an R object of unknown type } #! } #! \value{ #! TRUE or FALSE #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{is.logical}}, \code{\link{bit}}, \code{\link{bitwhich}} } #! \examples{ #! is.ri(TRUE) #! is.ri(ri(1,4,12)) #! is.bit(TRUE) #! is.bitwhich(TRUE) #! is.bit(as.bit(TRUE)) #! is.bitwhich(as.bitwhich(TRUE)) #! } #! \keyword{ classes } #! \keyword{ logic } is.ri <- function(x) inherits(x, "ri") is.bit <- function(x) inherits(x, "bit") is.bitwhich <- function(x) inherits(x, "bitwhich") #! \name{length.bit} #! \alias{length.bit} #! \alias{length.bitwhich} #! \alias{length.ri} #! \alias{length<-.bit} #! \alias{length<-.bitwhich} #! \title{ Getting and setting length of bit, bitwhich and ri objects } #! \description{ #! Query the number of bits in a \code{\link{bit}} vector or change the number of bits in a bit vector. \cr #! Query the number of bits in a \code{\link{bitwhich}} vector or change the number of bits in a bit vector. \cr #! } #! \usage{ #! \method{length}{ri}(x) #! \method{length}{bit}(x) #! \method{length}{bitwhich}(x) #! \method{length}{bit}(x) <- value #! \method{length}{bitwhich}(x) <- value #! } #! \arguments{ #! \item{x}{ a \code{\link{bit}}, \code{\link{bitwhich}} or \code{\link{ri}} object } #! \item{value}{ the new number of bits } #! } #! \details{ #! NOTE that the length does NOT reflect the number of selected (\code{TRUE}) bits, it reflects the sum of both, \code{TRUE} and \code{FALSE} bits. #! Increasing the length of a \code{\link{bit}} object will set new bits to \code{FALSE}. #! The behaviour of increasing the length of a \code{\link{bitwhich}} object is different and depends on the content of the object: #! \itemize{ #! \item{TRUE}{all included, new bits are set to \code{TRUE}} #! \item{positive integers}{some included, new bits are set to \code{FALSE}} #! \item{negative integers}{some excluded, new bits are set to \code{TRUE}} #! \item{FALSE}{all excluded:, new bits are set to \code{FALSE}} #! } #! Decreasing the length of bit or bitwhich removes any previous information about the status bits above the new length. #! } #! \value{ #! the length A bit vector with the new length #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{length}}, \code{\link[=sum.bit]{sum}}, \code{\link[ff]{poslength}}, \code{\link[ff]{maxindex}} } #! \examples{ #! stopifnot(length(ri(1, 1, 32))==32) #! #! x <- as.bit(ri(32, 32, 32)) #! stopifnot(length(x)==32) #! stopifnot(sum(x)==1) #! length(x) <- 16 #! stopifnot(length(x)==16) #! stopifnot(sum(x)==0) #! length(x) <- 32 #! stopifnot(length(x)==32) #! stopifnot(sum(x)==0) #! #! x <- as.bit(ri(1, 1, 32)) #! stopifnot(length(x)==32) #! stopifnot(sum(x)==1) #! length(x) <- 16 #! stopifnot(length(x)==16) #! stopifnot(sum(x)==1) #! length(x) <- 32 #! stopifnot(length(x)==32) #! stopifnot(sum(x)==1) #! #! x <- as.bitwhich(bit(32)) #! stopifnot(length(x)==32) #! stopifnot(sum(x)==0) #! length(x) <- 16 #! stopifnot(length(x)==16) #! stopifnot(sum(x)==0) #! length(x) <- 32 #! stopifnot(length(x)==32) #! stopifnot(sum(x)==0) #! #! x <- as.bitwhich(!bit(32)) #! stopifnot(length(x)==32) #! stopifnot(sum(x)==32) #! length(x) <- 16 #! stopifnot(length(x)==16) #! stopifnot(sum(x)==16) #! length(x) <- 32 #! stopifnot(length(x)==32) #! stopifnot(sum(x)==32) #! #! x <- as.bitwhich(ri(32, 32, 32)) #! stopifnot(length(x)==32) #! stopifnot(sum(x)==1) #! length(x) <- 16 #! stopifnot(length(x)==16) #! stopifnot(sum(x)==0) #! length(x) <- 32 #! stopifnot(length(x)==32) #! stopifnot(sum(x)==0) #! #! x <- as.bitwhich(ri(2, 32, 32)) #! stopifnot(length(x)==32) #! stopifnot(sum(x)==31) #! length(x) <- 16 #! stopifnot(length(x)==16) #! stopifnot(sum(x)==15) #! length(x) <- 32 #! stopifnot(length(x)==32) #! stopifnot(sum(x)==31) #! #! x <- as.bitwhich(ri(1, 1, 32)) #! stopifnot(length(x)==32) #! stopifnot(sum(x)==1) #! length(x) <- 16 #! stopifnot(length(x)==16) #! stopifnot(sum(x)==1) #! length(x) <- 32 #! stopifnot(length(x)==32) #! stopifnot(sum(x)==1) #! #! x <- as.bitwhich(ri(1, 31, 32)) #! stopifnot(length(x)==32) #! stopifnot(sum(x)==31) #! message("NOTE the change from 'some excluded' to 'all excluded' here") #! length(x) <- 16 #! stopifnot(length(x)==16) #! stopifnot(sum(x)==16) #! length(x) <- 32 #! stopifnot(length(x)==32) #! stopifnot(sum(x)==32) #! } #! \keyword{ classes } #! \keyword{ logic } length.bit <- function(x) virtual(x)$Length ## The original design here was broken when extending vectors "length<-.bit" <- function(x, value) { value <- as.integer(value) lx <- length(x) if (value != lx) { if (value < lx) { # shrinking if (dn <- value %% .BITS) { n <- value %/% .BITS + 1L ## NB: this changes x in-place .Call("R_bit_replace", x, (value+1L):(value+dn), logical(dn), PACKAGE = "bit") } else { n <- value %/% .BITS } } else { # growing ## underlying vector is already padded with zeroes so just ## extend if needed. n <- .BITS * ceiling(value/.BITS) } pattr <- attr(x, "physical") vattr <- attr(x, "virtual") cl <- oldClass(x) attr(x, "class") <- NULL ## shrink or grow underlying vector: might be a noop length(x) <- if (.BITS == 64L) 2L*n else n attr(vattr, "Length") <- value attr(x, "physical") <- pattr attr(x, "virtual") <- vattr attr(x, "class") <- cl x } else x } length.bitwhich <- function(x) attr(x, "maxindex") "length<-.bitwhich" <- function(x, value){ if (value!=length(x)){ value <- as.integer(value) if (is.integer(x)){ cl <- oldClass(x) oldClass(x) <- NULL if (x[1]>0){ x <- x[x <= value] l <- length(x) if (l==0) x <- FALSE else if (l==value) x <- TRUE else if (l>(value%/%2L)) x <- -as.integer(seq_len(value))[-x] attr(x, "poslength") <- l }else{ x <- x[x >= -value] l <- length(x) if (l==0) x <- TRUE else if (l==value) x <- FALSE else if (!((value-l)>(value%/%2L))) x <- -as.integer(seq_len(value))[-x] attr(x, "poslength") <- value - l } oldClass(x) <- cl }else if(x){ attr(x, "poslength") <- value } attr(x, "maxindex") <- value } x } #! \name{c.bit} #! \alias{c.bit} #! \alias{c.bitwhich} #! \title{ Concatenating bit and bitwhich vectors } #! \description{ #! Creating new bit by concatenating bit vectors #! } #! \usage{ #! \method{c}{bit}(\dots) #! \method{c}{bitwhich}(\dots) #! } #! \arguments{ #! \item{\dots}{ bit objects } #! } #! \value{ #! An object of class 'bit' #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{c}}, \code{\link{bit}} , \code{\link{bitwhich}} } #! \examples{ #! c(bit(4), bit(4)) #! } #! \keyword{ classes } #! \keyword{ logic } c.bit <- function(...){ l <- list(...) nl <- length(l) nold <- sapply(l, length) nnew <- sum(nold) ncum <- cumsum(nold) offsets <- c(0L, ncum[-length(ncum)]) x <- bit(nnew) for (i in as.integer(seq.int(from=1, to=nl, by=1))){ b <- as.bit(l[[i]]) ## returns in x: names not needed .Call("R_bit_shiftcopy", bsource_=b, btarget_=x, otarget_=offsets[i], n_=nold[i], PACKAGE="bit") } x } c.bitwhich <- function(...){ l <- list(...) if (length(l)==1) l[[1]] else as.bitwhich(do.call("c", lapply(l, as.bit))) } #! \name{as.bit} #! \alias{as.bit} #! \alias{as.bit.bit} #! \alias{as.bit.logical} #! \alias{as.bit.integer} #! \alias{as.bit.double} #! \alias{as.bit.bitwhich} #! \alias{as.bit.which} #! \alias{as.bit.ri} #! \title{ Coercing to bit } #! \description{ #! Coercing to bit vector #! } #! \usage{ #! as.bit(x, \dots) #! \method{as.bit}{bit}(x, \dots) #! \method{as.bit}{logical}(x, \dots) #! \method{as.bit}{integer}(x, \dots) #! \method{as.bit}{bitwhich}(x, \dots) #! \method{as.bit}{which}(x, length, \dots) #! \method{as.bit}{ri}(x, \dots) #! } #! \arguments{ #! \item{x}{ an object of class \code{\link{bit}}, \code{\link{logical}}, \code{\link{integer}}, \code{\link{bitwhich}} or an integer from \code{\link{as.which}} or a boolean \code{\link[ff:vmode]{ff}} } #! \item{length}{ the length of the new bit vector } #! \item{\dots}{ further arguments } #! } #! \details{ #! Coercing to bit is quite fast because we use a double loop that fixes each word in a processor register #! } #! \note{ #! Zero is coerced to FALSE, all other numbers including NA are coerced to TRUE. #! This differs from the NA-to-FALSE coercion in package ff and may change in the future. #! } #! \value{ #! \code{is.bit} returns FALSE or TRUE, \code{as.bit} returns a vector of class 'bit' #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{bit}}, \code{\link[bit:as.logical.bit]{as.logical}} } #! \examples{ #! x <- as.bit(c(FALSE, NA, TRUE)) #! as.bit(x) #! as.bit.which(c(1,3,4), 12) #! } #! \keyword{ classes } #! \keyword{ logic } as.bit.bit <- function(x, ...) x as.bit.logical <- function(x, ...){ n <- length(x) b <- bit(n) .Call("R_bit_set", b, x, c(1L, n), PACKAGE="bit") } as.bit.integer <- function(x, ...){ n <- length(x) b <- bit(n) .Call("R_bit_set_integer", b, x, c(1L, n), PACKAGE="bit") } as.bit.double <- function(x, ...){ n <- length(x) b <- bit(n) .Call("R_bit_set_integer", b, as.integer(x), c(1L, n), PACKAGE="bit") } as.bit.bitwhich <- function(x, ...){ n <- length(x) p <- sum(x) b <- bit(n) if (is.logical(x)){ if (p==n) b[] <- TRUE }else{ oldClass(x) <- NULL x <- as.integer(x) if (x[1]<0){ b[-x] <- TRUE # remember that negative indices are not allowed (and the assignment value is recycled to the length of the index) b <- !b }else{ b[x] <- TRUE } } b } as.bit.which <- function(x, length, ...){ b <- bit(length) if (length(x)){ x <- as.integer(x) if (x[1]<0){ b[-x] <- TRUE # remember that negative indices are not allowed (and the assignment value is recycled to the length of the index) b <- !b }else{ b[x] <- TRUE } } b } as.bit.ri <- function(x, ...){ b <- bit(length(x)) b[x] <- TRUE b } #! \name{as.logical.bit} #! \alias{as.logical.bit} #! \alias{as.integer.bit} #! \alias{as.double.bit} #! \alias{as.logical.bitwhich} #! \alias{as.integer.bitwhich} #! \alias{as.double.bitwhich} #! \alias{as.logical.ri} #! \alias{as.integer.ri} #! \alias{as.double.ri} #! \title{ Coercion from bit, bitwhich and ri to logical, integer, double } #! \description{ #! Coercing from bit to logical, integer, which. #! } #! \usage{ #! \method{as.logical}{bit}(x, \dots) #! \method{as.logical}{bitwhich}(x, \dots) #! \method{as.logical}{ri}(x, \dots) #! \method{as.integer}{bit}(x, \dots) #! \method{as.integer}{bitwhich}(x, \dots) #! \method{as.integer}{ri}(x, \dots) #! \method{as.double}{bit}(x, \dots) #! \method{as.double}{bitwhich}(x, \dots) #! \method{as.double}{ri}(x, \dots) #! } #! \arguments{ #! \item{x}{ an object of class \code{\link{bit}}, \code{\link{bitwhich}} or \code{\link{ri}} } #! \item{\dots}{ ignored } #! } #! \details{ #! Coercion from bit is quite fast because we use a double loop that fixes each word in a processor register. #! } #! \value{ #! \code{\link{as.logical}} returns a vector of \code{FALSE, TRUE}, \code{\link{as.integer}} and \code{\link{as.double}} return a vector of \code{0,1}. #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{as.bit}}, \code{\link{as.which}}, \code{\link{as.bitwhich}}, \code{\link[ff]{as.ff}}, \code{\link[ff]{as.hi}} } #! \examples{ #! x <- ri(2, 5, 10) #! y <- as.logical(x) #! y #! stopifnot(identical(y, as.logical(as.bit(x)))) #! stopifnot(identical(y, as.logical(as.bitwhich(x)))) #! #! y <- as.integer(x) #! y #! stopifnot(identical(y, as.integer(as.logical(x)))) #! stopifnot(identical(y, as.integer(as.bit(x)))) #! stopifnot(identical(y, as.integer(as.bitwhich(x)))) #! #! y <- as.double(x) #! y #! stopifnot(identical(y, as.double(as.logical(x)))) #! stopifnot(identical(y, as.double(as.bit(x)))) #! stopifnot(identical(y, as.double(as.bitwhich(x)))) #! } #! \keyword{ classes } #! \keyword{ logic } as.logical.bit <- function(x, ...){ l <- logical(length(x)) .Call("R_bit_get", x, l, c(1L, length(x)), PACKAGE="bit") } as.integer.bit <- function(x, ...){ l <- integer(length(x)) .Call("R_bit_get_integer", x, l, c(1L, length(x)), PACKAGE="bit") } as.double.bit <- function(x, ...){ l <- integer(length(x)) as.double(.Call("R_bit_get_integer", x, l, c(1L, length(x)), PACKAGE="bit")) } as.logical.ri <- function(x, ...){ if (is.na(x[3])) stop("cannot coerce to logical from ri object with unknown maxindex") ret <- logical(x[3]) ret[x[1]:x[2]] <- TRUE ret } as.integer.ri <- function(x, ...){ if (is.na(x[3])) stop("cannot coerce to integer from ri object with unknown maxindex") ret <- integer(x[3]) ret[x[1]:x[2]] <- 1L ret } as.double.ri <- function(x, ...){ if (is.na(x[3])) stop("cannot coerce to integer from ri object with unknown maxindex") ret <- double(x[3]) ret[x[1]:x[2]] <- 1 ret } #! \name{as.which} #! \alias{as.which} #! \alias{as.which.default} #! \alias{as.which.bitwhich} #! \alias{as.which.bit} #! \alias{as.which.ri} #! \title{ Coercion to (positive) integer positions } #! \description{ #! Coercing to something like the result of which \code{\link{which}} #! } #! \usage{ #! as.which(x, \dots) #! \method{as.which}{default}(x, \dots) #! \method{as.which}{ri}(x, \dots) #! \method{as.which}{bit}(x, range = NULL, \dots) #! \method{as.which}{bitwhich}(x, \dots) #! } #! \arguments{ #! \item{x}{ an object of classes \code{\link{bit}}, \code{\link{bitwhich}}, \code{\link{ri}} or something on which \code{\link{which}} works } #! \item{range}{ a \code{\link{ri}} or an integer vector of length==2 giving a range restriction for chunked processing } #! \item{\dots}{ further arguments (passed to \code{\link{which}} for the default method, ignored otherwise) } #! } #! \details{ #! \code{as.which.bit} returns a vector of subscripts with class 'which' #! } #! \value{ #! a vector of class 'logical' or 'integer' #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{as.bit}}, \code{\link{as.logical}}, \code{\link{as.integer}}, \code{\link{as.which}}, \code{\link{as.bitwhich}}, \code{\link[ff]{as.ff}}, \code{\link[ff]{as.hi}} } #! \examples{ #! r <- ri(5, 20, 100) #! x <- as.which(r) #! x #! #! stopifnot(identical(x, as.which(as.logical(r)))) #! stopifnot(identical(x, as.which(as.bitwhich(r)))) #! stopifnot(identical(x, as.which(as.bit(r)))) #! } #! \keyword{ classes } #! \keyword{ logic } as.which.default <- function(x, ...){ ret <- which(x) oldClass(ret) <- "which" ret } as.which.ri <- function(x, ...){ ret <- x[1]:x[2] oldClass(ret) <- "which" ret } as.which.bit <- function(x, range=NULL, ...){ if (is.null(range)) range <- c(1L, length(x)) else{ range <- as.integer(range[1:2]) if (range[1]<1L || range[2]>length(x)) stop("illegal range") } s <- sum(x, range=range) n <- range[2] - range[1] + 1L if (s==0L){ x <- integer() }else if (s==n){ x <- as.integer(seq.int(from=range[1], to=range[2], by=1)) }else x <- .Call("R_bit_which", x, s, range, negative=FALSE, PACKAGE="bit") oldClass(x) <- "which" x } as.which.bitwhich <- function(x, ...){ if (is.logical(x)){ if (unclass(x)) x <- as.integer(seq_len(length(x))) else x <- integer() }else{ if (x[[1]]<0) x <- as.integer(seq_len(length(x)))[x] else{ attributes(x) <- NULL } } oldClass(x) <- "which" x } #! \name{as.bitwhich} #! \alias{as.bitwhich} #! \alias{as.bitwhich.bit} #! \alias{as.bitwhich.bitwhich} #! \alias{as.bitwhich.ri} #! \alias{as.bitwhich.which} #! \alias{as.bitwhich.integer} #! \alias{as.bitwhich.double} #! \alias{as.bitwhich.logical} #! \title{ Coercing to bitwhich } #! \description{ #! Functions to coerce to bitwhich #! } #! \usage{ #! as.bitwhich(x, \dots) #! \method{as.bitwhich}{bitwhich}(x, \dots) #! \method{as.bitwhich}{ri}(x, \dots) #! \method{as.bitwhich}{bit}(x, range=NULL, \dots) #! \method{as.bitwhich}{which}(x, maxindex, \dots) #! \method{as.bitwhich}{integer}(x, \dots) #! \method{as.bitwhich}{double}(x, \dots) #! \method{as.bitwhich}{logical}(x, \dots) #! } #! \arguments{ #! \item{x}{ An object of class 'bitwhich', 'integer', 'logical' or 'bit' or an integer vector as resulting from 'which' } #! \item{maxindex}{ the length of the new bitwhich vector } #! \item{range}{ a \code{\link{ri}} or an integer vector of length==2 giving a range restriction for chunked processing } #! \item{\dots}{ further arguments } #! } #! \value{ #! a value of class \code{\link{bitwhich}} #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{bitwhich}}, \code{\link{as.bit}} } #! \examples{ #! as.bitwhich(c(FALSE, FALSE, FALSE)) #! as.bitwhich(c(FALSE, FALSE, TRUE)) #! as.bitwhich(c(FALSE, TRUE, TRUE)) #! as.bitwhich(c(TRUE, TRUE, TRUE)) #! } #! \keyword{ classes } #! \keyword{ logic } as.bitwhich.bitwhich <- function(x, ...){ x } as.bitwhich.which <- function(x, maxindex, ...){ poslength <- length(x) if (missing(maxindex)) stop("you must provide maxindex with as.bitwhich.integer()") if (poslength==0) bitwhich(maxindex, poslength, FALSE) else if (poslength==maxindex) bitwhich(maxindex, poslength, TRUE) else if (poslength>(maxindex%/%2L)){ bitwhich(maxindex, poslength, -as.integer(seq_len(maxindex))[-x]) }else{ bitwhich(maxindex, poslength, x) } } as.bitwhich.ri <- function(x, ...){ poslength <- sum(x) maxindex <- length(x) if (poslength==0) bitwhich(maxindex, poslength, FALSE) else if (poslength==maxindex) bitwhich(maxindex, poslength, TRUE) else if (poslength>(maxindex%/%2L)){ if (x[1]>1L) a <- 1:(x[1]-1L) else a <- integer() if (x[2](maxindex%/%2L)){ bitwhich(maxindex, poslength, -which(!x)) }else{ bitwhich(maxindex, poslength, which(x)) } } as.bitwhich.bit <- function(x, range=NULL, ...){ maxindex <- length(x) if (is.null(range)) range <- c(1L, maxindex) else{ range <- as.integer(range[1:2]) if (range[1]<1L || range[2]>maxindex) stop("illegal range") } poslength <- sum(x, range=range, na.rm=TRUE) if (poslength==0) bitwhich(maxindex, poslength, FALSE) else if (poslength==maxindex) bitwhich(maxindex, poslength, TRUE) else{ if (poslength>(maxindex%/%2L)){ bitwhich(maxindex, poslength, .Call("R_bit_which", x, maxindex - poslength, range=range, negative=TRUE, PACKAGE="bit")) }else{ bitwhich(maxindex, poslength, .Call("R_bit_which", x, poslength, range=range, negative=FALSE, PACKAGE="bit")) } } } as.integer.bitwhich <- function(x, ...){ n <- length(x) if (is.logical(x)){ if (sum(x)==n) rep(1L, n) else rep(0L, n) }else{ ret <- integer(n) ret[x] <- 1L ret } } as.double.bitwhich <- function(x, ...){ n <- length(x) if (is.logical(x)){ if (sum(x)==n) rep(1, n) else rep(0, n) }else{ ret <- double(n) ret[x] <- 1 ret } } as.logical.bitwhich <- function(x, ...){ n <- length(x) p <- sum(x) if (p==0){ rep(FALSE, length(x)) }else if (p==n){ rep(TRUE, length(x)) }else{ ret <- logical(length(x)) ret[x] <- TRUE ret } } # xx #"[.bitwhich" <- function(x, i){ # if (inherits(i, "bitwhich")){ # nx <- length(x) # ni <- length(i) # px <- poslength(x) # pi <- poslength(i) # if (is.logical(x)){ # if (is.logical(i)){ # if (unclass(x) && unclass(i)) # return(bitwhich()) # else # return() # }else{ # } # }else{ # if (is.logical(i)){ # }else{ # } # } # }else # stop("not implemented") #} #! \name{LogicBit} #! \alias{LogicBit} #! \alias{!.bit} #! \alias{!.bitwhich} #! \alias{&.bit} #! \alias{&.bitwhich} #! \alias{|.bit} #! \alias{|.bitwhich} #! \alias{==.bit} #! \alias{==.bitwhich} #! \alias{!=.bit} #! \alias{!=.bitwhich} #! \alias{xor} #! \alias{xor.default} #! \alias{xor.bit} #! \alias{xor.bitwhich} #! \title{ Boolean operators and functions for class bit } #! \description{ #! Boolean 'negation', 'and', 'or' and 'exclusive or'. #! } #! \usage{ #! \method{!}{bit}(x) #! \method{!}{bitwhich}(x) #! \method{&}{bit}(e1, e2) #! \method{&}{bitwhich}(e1, e2) #! \method{|}{bit}(e1, e2) #! \method{|}{bitwhich}(e1, e2) #! \method{==}{bit}(e1, e2) #! \method{==}{bitwhich}(e1, e2) #! \method{!=}{bit}(e1, e2) #! \method{!=}{bitwhich}(e1, e2) #! xor(x, y) #! \method{xor}{default}(x, y) #! \method{xor}{bit}(x, y) #! \method{xor}{bitwhich}(x, y) #! } #! \arguments{ #! \item{x}{ a bit vector (or one logical vector in binary operators) } #! \item{y}{ a bit vector or an logical vector } #! \item{e1}{ a bit vector or an logical vector } #! \item{e2}{ a bit vector or an logical vector } #! } #! \details{ #! Binary operators and function \code{xor} can combine 'bit' objects and 'logical' vectors. #! They do not recycle, thus the lengths of objects must match. Boolean operations on bit vectors are extremely fast #! because they are implemented using C's bitwise operators. If one argument is 'logical' it is converted to 'bit'. \cr #! #! Binary operators and function \code{xor} can combine 'bitwhich' objects and other vectors. #! They do not recycle, thus the lengths of objects must match. Boolean operations on bitwhich vectors are fast #! if the distribution of TRUE and FALSE is very asymetric. If one argument is not 'bitwhich' it is converted to 'bitwhich'. \cr #! #! The \code{xor} function has been made generic and \code{xor.default} has been implemented much faster than R's standard \code{\link[base]{xor}}. #! This was possible because actually boolean function \code{xor} and comparison operator \code{!=} do the same (even with NAs), and \code{!=} is much faster than the multiple calls in \code{(x | y) & !(x & y)} #! } #! \value{ #! An object of class 'bit' (or 'bitwhich') #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{bit}}, \code{\link{Logic}} } #! \examples{ #! x <- as.bit(c(FALSE, FALSE, FALSE, NA, NA, NA, TRUE, TRUE, TRUE)) #! yl <- c(FALSE, NA, TRUE, FALSE, NA, TRUE, FALSE, NA, TRUE) #! y <- as.bit(yl) #! !x #! x & y #! x | y #! xor(x, y) #! x != y #! x == y #! x & yl #! x | yl #! xor(x, yl) #! x != yl #! x == yl #! #! x <- as.bitwhich(c(FALSE, FALSE, FALSE, NA, NA, NA, TRUE, TRUE, TRUE)) #! yl <- c(FALSE, NA, TRUE, FALSE, NA, TRUE, FALSE, NA, TRUE) #! y <- as.bitwhich(yl) #! !x #! x & y #! x | y #! xor(x, y) #! x != y #! x == y #! x & yl #! x | yl #! xor(x, yl) #! x != yl #! x == yl #! } #! \keyword{ classes } #! \keyword{ logic } "!.bit" <- function(x){ if (length(x)){ ret <- x ret[1] <- ret[1] # force duplication .Call("R_bit_not", ret, PACKAGE="bit") }else{ x } } "&.bit" <- function(e1, e2){ n <- length(e1) if(n!=length(e2)) stop("length(e1) != length(e2)") e1 <- as.bit(e1) e2 <- as.bit(e2) ret <- bit(n) .Call("R_bit_and", e1, e2, ret, PACKAGE="bit") } "|.bit" <- function(e1, e2){ n <- length(e1) if(n!=length(e2)) stop("length(e1) != length(e2)") e1 <- as.bit(e1) e2 <- as.bit(e2) ret <- bit(n) .Call("R_bit_or", e1, e2, ret, PACKAGE="bit") } xor.default <- function(x,y){ as.logical(x) != as.logical(y) } "xor.bit" <- function(x, y){ n <- length(x) if(n!=length(y)) stop("length(x) != length(y)") x <- as.bit(x) y <- as.bit(y) ret <- bit(n) .Call("R_bit_xor", x, y, ret, PACKAGE="bit") } "!=.bit" <- function(e1, e2){ n <- length(e1) if(n!=length(e2)) stop("length(e1) != length(e2)") e1 <- as.bit(e1) e2 <- as.bit(e2) ret <- bit(n) .Call("R_bit_xor", e1, e2, ret, PACKAGE="bit") } "==.bit" <- function(e1, e2){ n <- length(e1) if(n!=length(e2)) stop("length(e1) != length(e2)") e1 <- as.bit(e1) e2 <- as.bit(e2) ret <- bit(n) .Call("R_bit_equal", e1, e2, ret, PACKAGE="bit") } "!.bitwhich" <- function(x){ n <- length(x) p <- sum(x) if (is.logical(x)){ if (p==n){ bitwhich(maxindex=n, poslength=0L, FALSE) }else{ bitwhich(maxindex=n, poslength=n, TRUE) } }else{ bitwhich(maxindex=n, poslength=n-p, -x) } } "&.bitwhich" <- function(e1, e2){ e1 <- as.bitwhich(e1) e2 <- as.bitwhich(e2) n <- c(length(e1), length(e2)) if(n[1]!=n[2]) stop("length(e1) != length(e2)") p <- c(sum(e1), sum(e2)) if (p[1]==0 || p[2]==0) return(bitwhich(n[1], 0L, FALSE)) if (p[1]==n[1]) return(e2) if (p[2]==n[2]) return(e1) #negative <- p>(n%/%2L) negative <- c(e1[1]<0, e2[1]<0) if (negative[1]){ if (negative[2]){ ret <- union(e1, e2) return( bitwhich(maxindex=n[1], poslength=n[1]-length(ret), ret) ) }else{ ret <- setdiff(e2, !e1) return( bitwhich(maxindex=n[1], poslength=length(ret), if (length(ret)) ret else FALSE) ) } }else{ if (negative[2]){ ret <- setdiff(e1, !e2) return( bitwhich(maxindex=n[1], poslength=length(ret), if (length(ret)) ret else FALSE) ) }else{ ret <- intersect(e1, e2) return( bitwhich(maxindex=n[1], poslength=length(ret), if (length(ret)) ret else FALSE) ) } } #as.bitwhich(as.bit(e1) & as.bit(e2)) } "|.bitwhich" <- function(e1, e2){ e1 <- as.bitwhich(e1) e2 <- as.bitwhich(e2) n <- c(length(e1), length(e2)) if(n[1]!=n[2]) stop("length(e1) != length(e2)") p <- c(sum(e1), sum(e2)) if (p[1]==n[1] || p[2]==n[2]) return(bitwhich(n[1], n[1], TRUE)) if (p[1]==0) return(e2) if (p[2]==0) return(e1) #negative <- p>(n%/%2L) negative <- c(e1[1]<0, e2[1]<0) if (negative[1]){ if (negative[2]){ ret <- intersect(e1, e2) return( bitwhich(maxindex=n[1], poslength=n[1]-length(ret), if (length(ret)) ret else TRUE) ) }else{ ret <- setdiff(e1, !e2) return( bitwhich(maxindex=n[1], poslength=n[1]-length(ret), if (length(ret)) ret else TRUE) ) } }else{ if (negative[2]){ ret <- setdiff(e2, !e1) return( bitwhich(maxindex=n[1], poslength=n[1]-length(ret), if (length(ret)) ret else TRUE) ) }else{ ret <- union(e1, e2) return( bitwhich(maxindex=n[1], poslength=length(ret), ret) ) } } #as.bitwhich(as.bit(e1) | as.bit(e2)) } "xor.bitwhich" <- function(x, y){ x <- as.bitwhich(x) y <- as.bitwhich(y) n <- c(length(x), length(y)) if(n[1]!=n[2]) stop("length(x) != length(y)") p <- c(sum(x), sum(y)) if (p[1]==0) return(y) if (p[1]==n[1]) return(!y) if (p[2]==0) return(x) if (p[2]==n[2]) return(!x) #negative <- p>(n%/%2L) negative <- c(x[1]<0, y[1]<0) if (negative[1]){ if (negative[2]){ ret <- -union(setdiff(y, x), setdiff(x, y)) return( bitwhich(maxindex=n[1], poslength=length(ret), if (length(ret)) ret else FALSE) ) }else{ ret <- union(-setdiff(y, !x), setdiff(x, !y)) return( bitwhich(maxindex=n[1], poslength=n[1]-length(ret), if (length(ret)) ret else TRUE) ) } }else{ if (negative[2]){ ret <- union(-setdiff(x, !y), setdiff(y, !x)) return( bitwhich(maxindex=n[1], poslength=n[1]-length(ret), if (length(ret)) ret else TRUE) ) }else{ ret <- setdiff(union(x, y), intersect(x, y)) return( bitwhich(maxindex=n[1], poslength=length(ret), if (length(ret)) ret else FALSE) ) } } #as.bitwhich(xor(as.bit(x), as.bit(y))) } "!=.bitwhich" <- function(e1, e2) xor.bitwhich(e1, e2) "==.bitwhich" <- function(e1, e2) !xor.bitwhich(e1, e2) #! \name{Summary} #! \alias{all.bit} #! \alias{any.bit} #! \alias{min.bit} #! \alias{max.bit} #! \alias{range.bit} #! \alias{sum.bit} #! \alias{summary.bit} #! \alias{all.bitwhich} #! \alias{any.bitwhich} #! \alias{min.bitwhich} #! \alias{max.bitwhich} #! \alias{range.bitwhich} #! \alias{sum.bitwhich} #! \alias{summary.bitwhich} #! \alias{all.ri} #! \alias{any.ri} #! \alias{min.ri} #! \alias{max.ri} #! \alias{range.ri} #! \alias{sum.ri} #! \alias{summary.ri} #! \title{ Summaries of bit vectors } #! \description{ #! Fast aggregation functions for bit vectors. #! } #! \usage{ #! \method{all}{bit}(x, range = NULL, \dots) #! \method{any}{bit}(x, range = NULL, \dots) #! \method{min}{bit}(x, range = NULL, \dots) #! \method{max}{bit}(x, range = NULL, \dots) #! \method{range}{bit}(x, range = NULL, \dots) #! \method{sum}{bit}(x, range = NULL, \dots) #! \method{summary}{bit}(object, range = NULL, \dots) #! \method{all}{bitwhich}(x, \dots) #! \method{any}{bitwhich}(x, \dots) #! \method{min}{bitwhich}(x, \dots) #! \method{max}{bitwhich}(x, \dots) #! \method{range}{bitwhich}(x, \dots) #! \method{sum}{bitwhich}(x, \dots) #! \method{summary}{bitwhich}(object, \dots) #! \method{all}{ri}(x, \dots) #! \method{any}{ri}(x, \dots) #! \method{min}{ri}(x, \dots) #! \method{max}{ri}(x, \dots) #! \method{range}{ri}(x, \dots) #! \method{sum}{ri}(x, \dots) #! \method{summary}{ri}(object, \dots) #! } #! \arguments{ #! \item{x}{ an object of class bit or bitwhich } #! \item{object}{ an object of class bit } #! \item{range}{ a \code{\link{ri}} or an integer vector of length==2 giving a range restriction for chunked processing } #! \item{\dots}{ formally required but not used } #! } #! \details{ #! Bit summaries are quite fast because we use a double loop that fixes each word in a processor register. #! Furthermore we break out of looping as soon as possible. #! } #! \value{ #! as expected #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{bit}}, \code{\link{all}}, \code{\link{any}}, \code{\link{min}}, \code{\link{max}}, \code{\link{range}}, \code{\link{sum}}, \code{\link{summary}} } #! \examples{ #! x <- as.bit(c(TRUE, TRUE)) #! all(x) #! any(x) #! min(x) #! max(x) #! range(x) #! sum(x) #! summary(x) #! #! x <- as.bitwhich(c(TRUE, TRUE)) #! all(x) #! any(x) #! min(x) #! max(x) #! range(x) #! sum(x) #! summary(x) #! #! \dontrun{ #! n <- .Machine$integer.max #! x <- !bit(n) #! N <- 1000000L # batchsize #! B <- n \%/\% N # number of batches #! R <- n \%\% N # rest #! #! message("Batched sum (52.5 sec on Centrino duo)") #! system.time({ #! s <- 0L #! for (b in 1:B){ #! s <- s + sum(x[((b-1L)*N+1L):(b*N)]) #! } #! if (R) #! s <- s + sum(x[(n-R+1L):n]) #! }) #! #! message("Batched sum saving repeated memory allocation for the return vector #! (44.4 sec on Centrino duo)") #! system.time({ #! s <- 0L #! l <- logical(N) #! for (b in 1:B){ #! .Call("R_bit_extract", x, length(x), ((b-1L)*N+1L):(b*N), l, PACKAGE = "bit") #! s <- s + sum(l) #! } #! if (R) #! s <- s + sum(x[(n-R+1L):n]) #! }) #! #! message("C-coded sum (3.1 sec on Centrino duo)") #! system.time(sum(x)) #! } #! } #! \keyword{ classes } #! \keyword{ logic } sum.bit <- function(x, range=NULL, ...){ if (is.null(range)) range <- c(1L, length(x)) else{ range <- as.integer(range[1:2]) if (range[1]<1L || range[2]>length(x)) stop("illegal range") } .Call("R_bit_sum", x, range, PACKAGE="bit") } all.bit <- function(x, range=NULL, ...){ if (is.null(range)) range <- c(1L, length(x)) else{ range <- as.integer(range[1:2]) if (range[1]<1L || range[2]>length(x)) stop("illegal range") } .Call("R_bit_all", x, range, PACKAGE="bit") } any.bit <- function(x, range=NULL, ...){ if (is.null(range)) range <- c(1L, length(x)) else{ range <- as.integer(range[1:2]) if (range[1]<1L || range[2]>length(x)) stop("illegal range") } .Call("R_bit_any", x, range, PACKAGE="bit") } min.bit <- function(x, range=NULL, ...){ if (is.null(range)) range <- c(1L, length(x)) else{ range <- as.integer(range[1:2]) if (range[1]<1L || range[2]>length(x)) stop("illegal range") } .Call("R_bit_min", x, range, PACKAGE="bit") } max.bit <- function(x, range=NULL, ...){ if (is.null(range)) range <- c(1L, length(x)) else{ range <- as.integer(range[1:2]) if (range[1]<1L || range[2]>length(x)) stop("illegal range") } .Call("R_bit_max", x, range, PACKAGE="bit") } range.bit <- function(x, range=NULL, ...){ if (is.null(range)) range <- c(1L, length(x)) else{ range <- as.integer(range[1:2]) if (range[1]<1L || range[2]>length(x)) stop("illegal range") } ret <- integer(2) ret[1] <- .Call("R_bit_min", x, range, PACKAGE="bit") if (is.na(ret[1])) ret[2] <- NA else ret[2] <- .Call("R_bit_max", x, range, PACKAGE="bit") ret } summary.bit <- function(object, range=NULL, ...){ if (is.null(range)) range <- c(1L, length(object)) else{ range <- as.integer(range[1:2]) if (range[1]<1L || range[2]>length(object)) stop("illegal range") } s <- sum(object, range=range) r <- range(object, range=range) c("FALSE"=range[2]-range[1]+1L-s, "TRUE"=s, "Min."=r[1], "Max."=r[2]) } sum.bitwhich <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'bitwhich'") attr(x, "poslength") } all.bitwhich <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'bitwhich'") attr(x, "poslength") == attr(x, "maxindex") } any.bitwhich <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'bitwhich'") attr(x, "poslength") > 0L } min.bitwhich <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'bitwhich'") n <- attr(x, "maxindex") p <- attr(x, "poslength") if (p==0) return(as.integer(NA)) if (p==n) return(n) #negative <- p>(n%/%2L) negative <- x[1]<0 if (negative){ min(as.bit(x)) }else{ min(unclass(x)) } } max.bitwhich <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'bitwhich'") n <- attr(x, "maxindex") p <- attr(x, "poslength") if (p==0) return(as.integer(NA)) if (p==n) return(n) #negative <- p>(n%/%2L) negative <- x[1]<0 if (negative){ max(as.bit(x)) }else{ max(unclass(x)) } } range.bitwhich <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'bitwhich'") n <- attr(x, "maxindex") p <- attr(x, "poslength") if (p==0) return(as.integer(NA)) if (p==n) return(n) #negative <- p>(n%/%2L) negative <- x[1]<0 if (negative){ range(as.bit(x)) }else{ range(unclass(x)) } } summary.bitwhich <- function(object, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'bitwhich'") n <- attr(object, "maxindex") p <- attr(object, "poslength") if (p==0) return(as.integer(NA)) if (p==n) return(n) #negative <- p>(n%/%2L) negative <- object[1]<0 if (negative){ r <- range(as.bit(object)) }else{ r <- range(object) } c("FALSE"=n-p, "TRUE"=p, "Min."=r[1], "Max."=r[2]) } if (FALSE){ library(bit) # test correctness of max.bit for (n in c(0, 1, 2, 31, 32, 33, 63, 64, 65, 95, 96, 97, 127,128,129)){ for (to1 in seq_len(n)){ cat("n", n, "to", to1, "\n") for (from1 in seq.int(from=1, to=to1, by=1L)){ x <- bit(n) if (!identical(max(x, from=from1, to=to1), as.integer(NA))) stop("wrong") for (i in seq_len(n)){ x[i] <- TRUE if (!identical(i, max(x, from=from1, to=to1))) stop("wrong") } } } } # test correctness of min.bit for (n in c(0, 1, 2, 31, 32, 33, 63, 64, 65, 95, 96, 97, 127,128,129)){ for (to1 in seq_len(n)){ cat("n", n, "to", to1, "\n") for (from1 in seq.int(from=1, to=to1, by=1L)){ x <- bit(n) if (!identical(min(x, from=from1, to=to1), as.integer(NA))) stop("wrong") for (i in rev(seq_len(n))){ x[i] <- TRUE if (!identical(i, min(x, from=from1, to=to1))) stop("wrong") } } } } } #! \name{Extract} #! \alias{[[.bit} #! \alias{[[<-.bit} #! \alias{[.bit} #! \alias{[<-.bit} #! \title{ Extract or replace part of an bit vector } #! \description{ #! Operators acting on bit objects to extract or replace parts. #! } #! \usage{ #! \method{[[}{bit}(x, i) #! \method{[[}{bit}(x, i) <- value #! \method{[}{bit}(x, i) #! \method{[}{bit}(x, i) <- value #! } #! \arguments{ #! \item{x}{ a bit object } #! \item{i}{ positive integer subscript } #! \item{value}{ new logical or integer values } #! } #! \details{ #! Since this package was created for high performance purposes, only positive integer subscripts make sense. #! Negative subscripts are converted to positive ones, beware the RAM consumption. #! Further subscript classes allowed for '[' and '[<-' are range indices \code{\link{ri}} and \code{\link{bitwhich}}. #! The '[' and '[<-' methods don't check whether the subscripts are positive integers in the allowed range. #! } #! \value{ #! The extractors \code{[[} and \code{[} return a logical scalar or vector. #! The replacment functions return a bit object. #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{bit}}, \code{\link{Extract}} } #! \examples{ #! x <- as.bit(c(FALSE, NA, TRUE)) #! x[] <- c(FALSE, NA, TRUE) #! x[1:2] #! x[-3] #! x[ri(1,2)] #! x[as.bitwhich(c(TRUE,TRUE,FALSE))] #! x[[1]] #! x[] <- TRUE #! x[1:2] <- FALSE #! x[[1]] <- TRUE #! } #! \keyword{ classes } #! \keyword{ logic } "[[.bit" <- function(x, i){ if (length(i)!=1) stop("subscript length not 1") if (is.numeric(i)){ i <- as.integer(i) if (is.na(i) || i<1L || i>length(x)) stop("subscript must be positive integer (or double) within length") ret <- logical(1L) attr(ret, "vmode") <- "boolean" .Call("R_bit_extract", x, length(x), i, ret, PACKAGE="bit") }else stop("subscript must be positive integer (or double) within length") } "[[<-.bit" <- function(x, i, value){ if (length(i)!=1) stop("subscript length not 1") if (length(value)!=1) stop("value length not 1") if (is.numeric(i)){ i <- as.integer(i) if (is.na(i) || i<1L) stop("subscript must be positive integer (or double)") if ((mi <- max(i))>length(x)) length(x) <- mi value2 <- as.logical(value) ## NB: this changes x in-place .Call("R_bit_replace", x, i, value2, PACKAGE="bit") }else stop("subscript must be positive integer (or double) within length") } if (FALSE){ library(ff) library(bit) a <- bit(100) a[1] <- T a[100] <- T a[] a[99:100] a[range=c(99,100)] a[range=c(1,100)] <- TRUE a a[range=c(1,100)] <- FALSE a } "[.bit" <- function(x, i){ nx <- length(x) if ( missing(i) ){ ret <- logical(nx) .Call("R_bit_get", x, ret, range=c(1L, nx), PACKAGE="bit") }else if(is.numeric(i)){ if (inherits(i, "ri")){ if (i[1]<1L || i[2]>nx ) stop("illegal range index 'ri'") ret <- logical(i[2]-i[1]+1L) .Call("R_bit_get", x, ret, range=i, PACKAGE="bit") }else{ if (inherits(i, "bitwhich")){ i <- as.which(i) n <- length(i) }else{ i <- as.integer(i) n <- length(i) if (n && i[1]<0){ i <- (as.integer(seq_along(x)))[i] n <- length(i) } } ret <- logical(n) if (n) .Call("R_bit_extract", x, nx, i, ret, PACKAGE="bit") } }else if(is.logical(i)){ if (length(i)!=1 || is.na(i)){ stop("only scalar TRUE or FALSE allowed") }else{ if (i){ ret <- logical(nx) .Call("R_bit_get", x, ret, range=c(1L, nx), PACKAGE="bit") }else{ ret <- logical() } } }else stop("subscript must be integer (or double) or bitwhich or TRUE or FALSE") attr(ret, "vmode") <- "boolean" ret } "[<-.bit" <- function(x, i, value){ nx <- length(x) if ( missing(i) ){ if (length(value)==nx){ value2 <- as.logical(value) }else{ value2 <- logical(nx) value2[] <- value } .Call("R_bit_set", x, value2, range=c(1L, nx), PACKAGE="bit") }else if(is.numeric(i)){ if (inherits(i, "ri")){ if (i[1]<1L) stop("illegal range index 'ri'") if (i[2]>nx) length(x) <- i[2] n <- i[2] - i[1] + 1L if (length(value)==n){ value2 <- as.logical(value) }else{ value2 <- logical(n) value2[] <- value } .Call("R_bit_set", x, value2, range=i, PACKAGE="bit") }else{ if (inherits(i, "bitwhich")){ i <- as.which(i) n <- length(i) }else{ i <- as.integer(i) n <- length(i) if (n && i[1]<0){ i <- (as.integer(seq_along(x)))[i] n <- length(i) } } if ((mi <- max(i))>nx) length(x) <- mi if (length(value)==n){ value2 <- as.logical(value) }else{ value2 <- logical(n) value2[] <- value } ## NB: this changes x in-place .Call("R_bit_replace", x, i, value2, PACKAGE="bit") } }else if (is.logical(i)){ if (length(i)!=1 || is.na(i)){ stop("only scalar TRUE or FALSE allowed") }else{ if (i){ if (length(value)==nx){ value2 <- as.logical(value) }else{ value2 <- logical(nx) value2[] <- value } .Call("R_bit_set", x, value2, range=c(1L, nx), PACKAGE="bit") }else{ x } } }else stop("subscript must be integer (or double) or bitwhich or TRUE or FALSE") } #! \name{ri} #! \alias{ri} #! \alias{print.ri} #! \title{ Range index } #! \description{ #! A range index can be used to extract or replace a continuous ascending part of the data #! } #! \usage{ #! ri(from, to = NULL, maxindex=NA) #! \method{print}{ri}(x, \dots) #! #! } #! \arguments{ #! \item{from}{ first position } #! \item{to}{ last posistion } #! \item{x}{ an object of class 'ri' } #! \item{maxindex}{ the maximal length of the object-to-be-subscripted (if known) } #! \item{\dots}{ further arguments } #! } #! \value{ #! A two element integer vector with class 'ri' #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link[ff]{as.hi.ri}} } #! \examples{ #! bit(12)[ri(1,6)] #! } #! \keyword{ classes } #! \keyword{ logic } ri <- function(from, to=NULL, maxindex=NA){ if (is.null(to)){ x <- as.integer(c(from, maxindex)) }else{ x <- as.integer(c(from, to, maxindex)) } maxindex = maxindex if (length(x)!=3 ) stop("range must have exactly three elements") if (x[[1]]<1L) stop("range must at least select one element") if (x[[1]]>x[[2]]) stop("lower bound must be smaller or equal than upper bound") if (!is.na(x[[3]]) && x[[2]]>x[[3]]) stop("lower and upper bound must be smaller or equal to maxindex") oldClass(x) <- "ri" x } print.ri <- function(x, ...) cat("range index (ri) from", x[[1]], "to", x[[2]], "maxindex", x[[3]], "\n") length.ri <- function(x) x[[3]] all.ri <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'ri'") x[[1]]<=1L && x[[2]]>=x[[3]] } any.ri <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'ri'") TRUE } min.ri <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'ri'") x[[1]] } max.ri <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'ri'") x[[2]] } range.ri <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'ri'") x[1:2] } sum.ri <- function(x, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'ri'") x[[2]] - x[[1]] + 1L } summary.ri <- function(object, ...){ if (any(names(match.call(expand.dots = TRUE))=="range")) stop("parameter 'range' allowed only for 'bit' but not for 'ri'") s <- object[[2]] - object[[1]] + 1L c(`FALSE` = object[[3]] - s, `TRUE` = s, Min. = object[[1]], Max. = object[[2]]) } #! \name{physical} #! \alias{physical} #! \alias{physical<-} #! \alias{virtual} #! \alias{virtual<-} #! \alias{physical.default} #! \alias{physical<-.default} #! \alias{virtual.default} #! \alias{virtual<-.default} #! \alias{print.physical} #! \alias{print.virtual} #! \title{ Physical and virtual attributes } #! \description{ #! Compatibility functions (to package ff) for getting and setting physical and virtual attributes. #! } #! \usage{ #! physical(x) #! virtual(x) #! physical(x) <- value #! virtual(x) <- value #! \method{physical}{default}(x) #! \method{virtual}{default}(x) #! \method{physical}{default}(x) <- value #! \method{virtual}{default}(x) <- value #! \method{print}{physical}(x, \dots) #! \method{print}{virtual}(x, \dots) #! } #! \arguments{ #! \item{x}{ a ff or ram object } #! \item{value}{ a list with named elements } #! \item{\dots}{ further arguments } #! } #! \details{ #! ff objects have physical and virtual attributes, which have different copying semantics: #! physical attributes are shared between copies of ff objects while virtual attributes might differ between copies. #! \code{\link[ff]{as.ram}} will retain some physical and virtual atrributes in the ram clone, #! such that \code{\link[ff]{as.ff}} can restore an ff object with the same attributes. #! } #! \value{ #! \command{physical} and \command{virtual} returns a list with named elements #! } #! \author{ Jens Oehlschlägel } #! \seealso{ #! \code{\link[ff]{physical.ff}}, \code{\link[ff]{physical.ffdf}} #! } #! \examples{ #! physical(bit(12)) #! virtual(bit(12)) #! } #! \keyword{ IO } #! \keyword{ data } #! \keyword{ attribute } # this version without vmode() will be overwritte by the version in package ff physical.default <- function(x){ p <- attributes(attr(x, "physical")) p <- p[is.na(match(names(p), "class"))] p } "physical<-.default" <- function(x, value){ attributes(attr(x, "physical")) <- c(value, list(class="physical")) x } virtual.default <- function(x){ v <- attributes(attr(x, "virtual")) v[is.na(match(names(v), "class"))] } "virtual<-.default" <- function(x, value){ attributes(attr(x, "virtual")) <- c(value, list(class="virtual")) x } print.physical <- function(x, ...){ cat("(hidden, use physical(x) to access the physical attributes and vmode(x) for accessing vmode)\n") invisible() } print.virtual <- function(x, ...){ cat("(hidden, use virtual(x) to access the virtual attributes)\n") invisible() } # not exported - just here to avoid cross calling the dll from ff R_bit_as_hi <- function(x, range, offset) .Call("R_bit_as_hi", x, range, offset, PACKAGE="bit") #! \name{regtest.bit} #! \alias{regtest.bit} #! \title{ Regressiontests for bit } #! \description{ #! Test package bit for correctness #! } #! \usage{ #! regtest.bit(N = 100) #! } #! \arguments{ #! \item{N}{ number of random test runs } #! } #! \details{ #! random data of random length are generated and correctness of package functions tested on these #! } #! \value{ #! a vector of class 'logical' or 'integer' #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{bit}}, \code{\link{as.bit}}, \code{\link{as.logical}}, \code{\link{as.integer}}, \code{\link{which}} } #! \examples{ #! if (regtest.bit()){ #! message("regtest.bit is OK") #! }else{ #! message("regtest.bit failed") #! } #! #! \dontrun{ #! regtest.bit(10000) #! } #! } #! \keyword{ classes } #! \keyword{ logic } regtest.bit <- function( N = 100 # number of repetitions for random regression tests ) { #.BITS <- bit:::.BITS # available in package namespace OK <- TRUE pool <- c(FALSE, TRUE) if (!identical(unattr(as.bit(c(FALSE,NA,TRUE))[]), c(FALSE,FALSE,TRUE))){ message("bit error: wrong coercion of triboolean to (bi)boolean") OK <- FALSE } l <- TRUE b <- as.bit(l) i <- -c(1, 0, 1, NA) if (!inherits(try(b[i], silent=TRUE), "try-error")){ message("bit error: did not throw on mixing zero with negative subscripts") OK <- FALSE } i <- c(2, 1, 0, 1, NA) if (!identical(l[i],unattr(b[i]))){ message("\nregression test difference between b[i] and l[i]") print(l[i]) print(unattr(b[i])) OK <- FALSE } l[0] <- TRUE b[0] <- TRUE if (!identical(l,unattr(b[]))){ message("\nregression test difference after assigning at R position zero") print(l) print(unattr(b[])) OK <- FALSE } l[2] <- TRUE b[2] <- TRUE if (!identical(ifelse(is.na(l), FALSE, l),unattr(b[]))){ message("\nregression test difference after assigning after vector length (at 2)") print(l) print(unattr(b[])) OK <- FALSE } l[.BITS+1] <- FALSE b[.BITS+1] <- NA if (!identical(ifelse(is.na(l), FALSE, l),unattr(b[]))){ message("\nregression test difference after assigning after vector length (at .BITS+1)") print(l) print(unattr(b[])) OK <- FALSE } if (!identical(ifelse(is.na(l[TRUE]), FALSE, l[TRUE]),unattr(b[TRUE]))){ message("\nregression test difference after subscripting with scalar TRUE") print(l) print(unattr(b[])) OK <- FALSE } if (!identical(ifelse(is.na(l[FALSE]), FALSE, l[FALSE]),unattr(b[FALSE]))){ message("\nregression test difference after subscripting with scalar FALSE") print(l) print(unattr(b[])) OK <- FALSE } for (i in 1:N){ n <- sample(1:(2*.BITS), 1) l <- sample(pool, n, TRUE) # check direct coercion b <- as.bit(l) l2 <- as.logical(b) if (!identical(l,l2)){ message("\nregression test difference between logical") print(l) message("and as.logical(as.bit(logical))") print(l2) OK <- FALSE } # summary functions with logical return s <- c(all=all(l), any=any(l)) s2 <- c(all=all(b), any=any(b)) if (!identical(s,s2)){ message("\nregression test difference between logical summaries") print(s) message("and bit summaries") print(s2) OK <- FALSE } # summary functions with integer return if (any(l)){ s <- c(min=min(as.which(l)), max=max(as.which(l)), range=range(as.which(l)), sum=sum(l), summary=c("FALSE"=length(l)-sum(l), "TRUE"=sum(l), "Min."=min(as.which(l)), "Max."=max(as.which(l)))) }else{ s <- c( min=as.integer(NA), max=as.integer(NA), range=c(as.integer(NA), as.integer(NA)), sum=sum(l), summary=c("FALSE"=length(l)-sum(l), "TRUE"=sum(l), "Min."=as.integer(NA), "Max."=as.integer(NA)) ) } s2 <- c(min=min(b), max=max(b), range=range(b), sum=sum(b), summary=summary(b)) if (!identical(s,s2)){ message("\nregression test difference between logical summaries") print(s) message("and bit summaries") print(s2) OK <- FALSE } # check positive whichs w <- as.which(l) w2 <- as.which(as.bit.which(w, n)) if (!identical(w,w2)){ message("\nregression test difference between which") print(w) message("and as.which(as.bit.which(which))") print(w2) OK <- FALSE } # check automatic whichs (pos or neg whatever shorter) s <- sum(l) if (s==0){ w <- FALSE }else if (s==n){ w <- TRUE }else if (s>(n%/%2L)){ w <- -rev(which(!l)) }else{ w <- which(l) } w2 <- as.vector(as.bitwhich(as.bit(l))) if (!identical(w,w2)){ message("\nregression test difference between which") print(w) message("and as.which(as.bit.which(which))") print(w2) OK <- FALSE } # check boolean operators l2 <- sample(c(FALSE, TRUE), n, TRUE) b2 <- as.bit(l2) ops <- c( NOT = identical(!l, as.logical(!b)) , AND = identical(l&l2, as.logical(b&b2)) , OR = identical(l|l2, as.logical(b|b2)) , XOR = identical(xor(l,l2), as.logical(xor(b,b2))) , NEQ = identical(l!=l2, as.logical(b!=b2)) , EQ = identical(l==l2, as.logical(b==b2)) ) if (!all(ops)){ message("\nbit differs for boolean operators(s)") print(ops) print(cbind(l=l, l2=l)) OK <- FALSE } w <- as.bitwhich(l) w2 <- as.bitwhich(l2) ops <- c( NOT = identical(!l, as.logical(!w)) , AND = identical(l&l2, as.logical(w&w2)) , OR = identical(l|l2, as.logical(w|w2)) , XOR = identical(xor(l,l2), as.logical(xor(w,w2))) , NEQ = identical(l!=l2, as.logical(w!=w2)) , EQ = identical(l==l2, as.logical(w==w2)) ) if (!all(ops)){ message("\nbitwhich differs for boolean operators(s)") print(ops) print(cbind(l=l, l2=l)) OK <- FALSE } rm(l2,b2,w2) # check extractors n2 <- sample(1:n, 1) j <- sample(1:n, n2) if (!identical(l[j], unattr(b[j]))){ message("\nregression test difference when extracting") OK <- FALSE } # check replacement (index) new <- sample(pool, n2, TRUE) l[j] <- new b[j] <- new if (!identical(l, unattr(b[]))){ message("\nregression test difference when replacing with index") OK <- FALSE } # check replacement (recycle) if (n%%2){ new <- sample(pool, 1) l[] <- new b[] <- new }else{ l[] <- pool b[] <- pool } if (!identical(l, as.logical(b))){ message("\nregression test difference when replacing with recylcling") OK <- FALSE } } l0 <- c(FALSE, FALSE, FALSE) l1 <- c(FALSE, FALSE, TRUE) l2 <- c(FALSE, TRUE, TRUE) l3 <- c(TRUE, TRUE, TRUE) bw0 <- as.bitwhich(l0) bw1 <- as.bitwhich(l1) bw2 <- as.bitwhich(l2) bw3 <- as.bitwhich(l3) OK <- OK && identical(l0, as.logical(bw0)) OK <- OK && identical(l1, as.logical(bw1)) OK <- OK && identical(l2, as.logical(bw2)) OK <- OK && identical(l3, as.logical(bw3)) OK <- OK && identical(l0 & l0, as.logical(bw0 & bw0)) OK <- OK && identical(l0 & l1, as.logical(bw0 & bw1)) OK <- OK && identical(l0 & l2, as.logical(bw0 & bw2)) OK <- OK && identical(l0 & l3, as.logical(bw0 & bw3)) OK <- OK && identical(l1 & l0, as.logical(bw1 & bw0)) OK <- OK && identical(l1 & l1, as.logical(bw1 & bw1)) OK <- OK && identical(l1 & l2, as.logical(bw1 & bw2)) OK <- OK && identical(l1 & l3, as.logical(bw1 & bw3)) OK <- OK && identical(l2 & l0, as.logical(bw2 & bw0)) OK <- OK && identical(l2 & l1, as.logical(bw2 & bw1)) OK <- OK && identical(l2 & l2, as.logical(bw2 & bw2)) OK <- OK && identical(l2 & l3, as.logical(bw2 & bw3)) OK <- OK && identical(l3 & l0, as.logical(bw3 & bw0)) OK <- OK && identical(l3 & l1, as.logical(bw3 & bw1)) OK <- OK && identical(l3 & l2, as.logical(bw3 & bw2)) OK <- OK && identical(l3 & l3, as.logical(bw3 & bw3)) OK <- OK && identical(l0 | l0, as.logical(bw0 | bw0)) OK <- OK && identical(l0 | l1, as.logical(bw0 | bw1)) OK <- OK && identical(l0 | l2, as.logical(bw0 | bw2)) OK <- OK && identical(l0 | l3, as.logical(bw0 | bw3)) OK <- OK && identical(l1 | l0, as.logical(bw1 | bw0)) OK <- OK && identical(l1 | l1, as.logical(bw1 | bw1)) OK <- OK && identical(l1 | l2, as.logical(bw1 | bw2)) OK <- OK && identical(l1 | l3, as.logical(bw1 | bw3)) OK <- OK && identical(l2 | l0, as.logical(bw2 | bw0)) OK <- OK && identical(l2 | l1, as.logical(bw2 | bw1)) OK <- OK && identical(l2 | l2, as.logical(bw2 | bw2)) OK <- OK && identical(l2 | l3, as.logical(bw2 | bw3)) OK <- OK && identical(l3 | l0, as.logical(bw3 | bw0)) OK <- OK && identical(l3 | l1, as.logical(bw3 | bw1)) OK <- OK && identical(l3 | l2, as.logical(bw3 | bw2)) OK <- OK && identical(l3 | l3, as.logical(bw3 | bw3)) OK <- OK && identical(xor(l0,l0), as.logical(xor(bw0,bw0))) OK <- OK && identical(xor(l0,l1), as.logical(xor(bw0,bw1))) OK <- OK && identical(xor(l0,l2), as.logical(xor(bw0,bw2))) OK <- OK && identical(xor(l0,l3), as.logical(xor(bw0,bw3))) OK <- OK && identical(xor(l1,l0), as.logical(xor(bw1,bw0))) OK <- OK && identical(xor(l1,l1), as.logical(xor(bw1,bw1))) OK <- OK && identical(xor(l1,l2), as.logical(xor(bw1,bw2))) OK <- OK && identical(xor(l1,l3), as.logical(xor(bw1,bw3))) OK <- OK && identical(xor(l2,l0), as.logical(xor(bw2,bw0))) OK <- OK && identical(xor(l2,l1), as.logical(xor(bw2,bw1))) OK <- OK && identical(xor(l2,l2), as.logical(xor(bw2,bw2))) OK <- OK && identical(xor(l2,l3), as.logical(xor(bw2,bw3))) OK <- OK && identical(xor(l3,l0), as.logical(xor(bw3,bw0))) OK <- OK && identical(xor(l3,l1), as.logical(xor(bw3,bw1))) OK <- OK && identical(xor(l3,l2), as.logical(xor(bw3,bw2))) OK <- OK && identical(xor(l3,l3), as.logical(xor(bw3,bw3))) OK <- OK && identical(c(l0,l0), as.logical(c(bw0,bw0))) OK <- OK && identical(c(l0,l1), as.logical(c(bw0,bw1))) OK <- OK && identical(c(l0,l2), as.logical(c(bw0,bw2))) OK <- OK && identical(c(l0,l3), as.logical(c(bw0,bw3))) OK <- OK && identical(c(l1,l0), as.logical(c(bw1,bw0))) OK <- OK && identical(c(l1,l1), as.logical(c(bw1,bw1))) OK <- OK && identical(c(l1,l2), as.logical(c(bw1,bw2))) OK <- OK && identical(c(l1,l3), as.logical(c(bw1,bw3))) OK <- OK && identical(c(l2,l0), as.logical(c(bw2,bw0))) OK <- OK && identical(c(l2,l1), as.logical(c(bw2,bw1))) OK <- OK && identical(c(l2,l2), as.logical(c(bw2,bw2))) OK <- OK && identical(c(l2,l3), as.logical(c(bw2,bw3))) OK <- OK && identical(c(l3,l0), as.logical(c(bw3,bw0))) OK <- OK && identical(c(l3,l1), as.logical(c(bw3,bw1))) OK <- OK && identical(c(l3,l2), as.logical(c(bw3,bw2))) OK <- OK && identical(c(l3,l3), as.logical(c(bw3,bw3))) N <- 2L*.BITS l <- logical(N) b <- bit(N) for (i in 1:N){ l[i] <- TRUE b[i] <- TRUE if (!identical(l,as.logical(b))){ message("\nregression test difference when replacing at position", i, "") OK <- FALSE } } OK } bit/R/timeutil.R0000755000176000001440000000327113264143150013245 0ustar ripleyusers# timing utilities for ff and bit # (c) 2012 Jens Oehlschlägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2012-05-28 #! \name{repeat.time} #! \alias{repeat.time} #! \title{ #! Adaptive timer #! } #! \description{ #! Repeats timing expr until minSec is reached #! } #! \usage{ #! repeat.time(expr, gcFirst = TRUE, minSec = 0.5, envir=parent.frame()) #! } #! \arguments{ #! \item{expr}{Valid \R expression to be timed.} #! \item{gcFirst}{Logical - should a garbage collection be performed #! immediately before the timing? Default is \code{TRUE}.} #! \item{minSec}{number of seconds to repeat at least} #! \item{envir}{the environment in which to evaluate \code{expr} (by default the calling frame)} #! } #! \value{ #! A object of class \code{"proc_time"}: see #! \code{\link{proc.time}} for details. #! } #! \seealso{ #! \code{\link{system.time}} #! } #! \author{ #! Jens Oehlschlägel #! } #! \examples{ #! system.time(1+1) #! repeat.time(1+1) #! system.time(sort(runif(1e6))) #! repeat.time(sort(runif(1e6))) #! } #! \keyword{utilities} repeat.time <- function (expr, gcFirst = TRUE, minSec=0.5, envir=parent.frame()) { ppt <- function(y) { if (!is.na(y[4L])) y[1L] <- y[1L] + y[4L] if (!is.na(y[5L])) y[2L] <- y[2L] + y[5L] y[1L:3L] } if (!exists("proc.time")) return(rep(NA_real_, 5L)) if (gcFirst) gc(FALSE) time <- proc.time() on.exit(cat("Timing stopped at:", ppt(proc.time() - time), "\n")) r <- 0L while((proc.time()[3]-time[3]) < minSec){ r <- r + 1L eval(substitute(expr), envir=envir) } new.time <- proc.time() on.exit() structure((new.time - time)/r, class = "proc_time") } bit/R/chunkutil.R0000755000176000001440000003076213264143103013422 0ustar ripleyusers# Chunking utilities for bit and ff # (c) 2007-2009 Jens Oehlschägel # Licence: GPL2 # Provided 'as is', use at your own risk # Created: 2007-09-03 # Last changed: 2007-10-25 # source("D:/mwp/eanalysis/bit/R/chunkutil.R") #! \name{bbatch} #! \alias{bbatch} #! \title{ Balanced Batch sizes } #! \description{ #! \command{bbatch} calculates batch sizes so that they have rather balanced sizes than very different sizes #! } #! \usage{ #! bbatch(N, B) #! } #! \arguments{ #! \item{N}{ total size } #! \item{B}{ desired batch size } #! } #! \value{ #! a list with components #! \item{ b }{ the batch size } #! \item{ nb }{ the number of batches } #! \item{ rb }{ the size of the rest } #! } #! \details{ #! Tries to have \code{rb==0} or \code{rb} as close to \code{b} as possible while guaranteing that \code{rb < b && (b - rb) <= min(nb, b)} #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{repfromto}}, \code{\link[ff]{ffvecapply}} } #! \examples{ #! bbatch(100, 24) #! } #! \keyword{ IO } #! \keyword{ data } # non-vectorized #bbatch <- #function(N,B){ # N <- as.integer(N) # B <- as.integer(B) # RB <- N %% B # NB <- N %/% B # if (RB){ # cc <- min((B - RB) %/% NB, (B - RB) %/% (NB + 1L)) # if (cc){ # rb <- RB + cc * NB # b <- B - cc # if (rb==b){ # return(list(b=b, nb=NB+1L, rb=0L)) # }else{ # return(list(b=b, nb=NB, rb=rb)) # } # }else{ # return(list(b=B, nb=NB, rb=RB)) # } # }else{ # return(list(b=B, nb=NB, rb=RB)) # } #} # balanced batchsizes bbatch <- function(N,B){ if (any(B<1)) stop("B too small") N <- as.integer(N) B <- as.integer(B) RB <- N %% B NB <- N %/% B cc <- pmin((B - RB) %/% NB, (B - RB) %/% (NB + 1L)) cc[RB==0 | NB == 0] <- 0 i <- cc > 0 RB[i] <- RB[i] + cc[i] * NB[i] B[i] <- B[i] - cc[i] j <- i & (RB[i] == B[i]) NB[i & j] <- NB[i & j] + 1L RB[i & j] <- 0L i <- (RB>0) & (NB == 0) B[i] <- RB[i] NB[i] <- 1L RB[i] <- 0L return(list(b=B, nb=NB, rb=RB)) } #! \name{repfromto} #! \alias{repfromto} #! \alias{repfromto<-} #! \title{ Virtual recycling } #! \description{ #! \command{repfromto} virtually recylcles object \code{x} and cuts out positions \code{from .. to} #! } #! \usage{ #! repfromto(x, from, to) #! repfromto(x, from, to) <- value #! } #! \arguments{ #! \item{x}{ an object from which to recycle } #! \item{from}{ first position to return } #! \item{to}{ last position to return } #! \item{value}{ value to assign } #! } #! \details{ #! \code{repfromto} is a generalization of \code{\link{rep}}, where \code{rep(x, n) == repfromto(x, 1, n)}. #! You can see this as an R-side (vector) solution of the \code{mod_iterate} macro in arithmetic.c #! } #! \value{ #! a vector of length \code{from - to + 1} #! } #! \author{ Jens Oehlschlägel } #! \seealso{ \code{\link{rep}}, \code{\link[ff]{ffvecapply}} } #! \examples{ #! message("a simple example") #! repfromto(0:9, 11, 20) #! } #! \keyword{ IO } #! \keyword{ data } repfromto <- function(x, from, to){ nx <- length(x) if (nx){ from <- as.integer(from) to <- as.integer(to) if (to>nx){ N <- to - from + 1L from <- (from-1L)%%nx + 1L to <- to%%nx # NOTE: fetch in sequence pre-main-post in case is.ff(x) if (from<=to && NN) length.out <- N by <- N %/% length.out }else stop("'length.out' must be scalar") } }else{ if (length(by)==1){ by <- as.integer(by) if (by<1) stop("'by' must be > 0") length.out <- (N - 1L) %/% by + 1L }else stop("'by' must be scalar") } if (method=="bbatch") by <- bbatch(N, by)$b if (length.out>1L){ from <- cumsum(c(from, rep(by, length.out - 1L))) to <- c(from[-1], from[1] + N) - 1L # fixed by Edwin de Jonge, 18.1.2011 if (overlap>0) from[-1] <- from[-1] - overlap } n <- length(from) s <- seq_len(n) ret <- vector("list", n) for (i in s){ ret[[i]] <- ri(from[i], to[i], maxindex) } ret } #! \name{vecseq} #! \alias{vecseq} #! \title{ Vectorized Sequences } #! \description{ #! \command{vecseq} returns concatenated multiple sequences #! } #! \usage{ #! vecseq(x, y=NULL, concat=TRUE, eval=TRUE) #! } #! \arguments{ #! \item{x}{ vector of sequence start points } #! \item{y}{ vector of sequence end points (if \code{is.null(y)} then \code{x} are taken as endpoints, all starting at 1) } #! \item{concat}{ vector of sequence end points (if \code{is.null(y)} then \code{x} are taken as endpoints, all starting at 1) } #! \item{eval}{ vector of sequence end points (if \code{is.null(y)} then \code{x} are taken as endpoints, all starting at 1) } #! } #! \details{ #! This is a generalization of \code{\link{sequence}} in that you can choose sequence starts other than 1 and also have options to no concat and/or return a call instead of the evaluated sequence. #! } #! \value{ #! if \code{concat==FALSE} and \code{eval==FALSE} a list with n calls that generate sequences \cr #! if \code{concat==FALSE} and \code{eval==TRUE } a list with n sequences \cr #! if \code{concat==TRUE } and \code{eval==FALSE} a single call generating the concatenated sequences \cr #! if \code{concat==TRUE } and \code{eval==TRUE } an integer vector of concatentated sequences #! } #! \author{ Angelo Canty, Jens Oehlschlägel } #! \seealso{ \code{\link{:}}, \code{\link{seq}}, \code{\link{sequence}} } #! \examples{ #! sequence(c(3,4)) #! vecseq(c(3,4)) #! vecseq(c(1,11), c(5, 15)) #! vecseq(c(1,11), c(5, 15), concat=FALSE, eval=FALSE) #! vecseq(c(1,11), c(5, 15), concat=FALSE, eval=TRUE) #! vecseq(c(1,11), c(5, 15), concat=TRUE, eval=FALSE) #! vecseq(c(1,11), c(5, 15), concat=TRUE, eval=TRUE) #! } #! \keyword{ manip } vecseq <- function(x, y=NULL, concat=TRUE, eval=TRUE){ if (missing(y)){ y <- x x <- 1L } if (concat){ if (eval){ # pure R version was: eval(parse(text=paste("c(",paste(x,y,sep=":",collapse=","),")"))) # now calling C-code nx <- length(x) ny <- length(y) if (nx.Rd files, where is derived from the "#! \name{}" in the first line bit/inst/ANNOUNCEMENT-1.0.txt0000755000176000001440000000273613264143377015011 0ustar ripleyusersDear R community, Package 'bit' Version 1.0 is available on CRAN. It provides bitmapped vectors of booleans (no NAs), coercion from and to logicals, integers and integer subscripts; fast boolean operators and fast summary statistics. With bit vectors you can store true binary booleans {FALSE,TRUE} at the expense of 1 bit only, on a 32 bit architecture this means factor 32 less RAM and factor 32 more speed on boolean operations. With this speed gain it even pays-off to convert to bit in order to avoid a single boolean operation on logicals or a single set operation on (longer) integer subscripts, the pay-off is dramatic when such components are used more than once. Reading from and writing to bit is approximately as fast as accessing standard logicals - mostly due to R's time for memory allocation. The package allows to work with pre-allocated memory for return values by calling .Call() directly: when evaluating the speed of C-access with pre-allocated vector memory, coping from bit to logical requires only 70% of the time for copying from logical to logical; and copying from logical to bit comes at a performance penalty of 150%. Functions 'which' and 'xor' are made S3 generic, 'xor.default' is implemented much faster than in base R (this should go into base R). The package has automated regression-tests and is hopefully useful for better handling large datasets, together with packages 'rindex' and 'ff'. Best regards Jens Oehlschlägel Munich, 10.10.2008