matrixStats/0000755000176200001440000000000014535706561012611 5ustar liggesusersmatrixStats/NAMESPACE0000644000176200001440000000412314535663507014032 0ustar liggesusers# Generated by roxygen2: do not edit by hand export(allValue) export(allocArray) export(allocMatrix) export(allocVector) export(anyMissing) export(anyValue) export(binCounts) export(binMeans) export(colAlls) export(colAnyMissings) export(colAnyNAs) export(colAnys) export(colAvgsPerRowSet) export(colCollapse) export(colCounts) export(colCummaxs) export(colCummins) export(colCumprods) export(colCumsums) export(colDiffs) export(colIQRDiffs) export(colIQRs) export(colLogSumExps) export(colMadDiffs) export(colMads) export(colMaxs) export(colMeans2) export(colMedians) export(colMins) export(colOrderStats) export(colProds) export(colQuantiles) export(colRanges) export(colRanks) export(colSdDiffs) export(colSds) export(colSums2) export(colTabulates) export(colVarDiffs) export(colVars) export(colWeightedMads) export(colWeightedMeans) export(colWeightedMedians) export(colWeightedSds) export(colWeightedVars) export(count) export(diff2) export(indexByRow) export(iqr) export(iqrDiff) export(logSumExp) export(madDiff) export(mean2) export(product) export(rowAlls) export(rowAnyMissings) export(rowAnyNAs) export(rowAnys) export(rowAvgsPerColSet) export(rowCollapse) export(rowCounts) export(rowCummaxs) export(rowCummins) export(rowCumprods) export(rowCumsums) export(rowDiffs) export(rowIQRDiffs) export(rowIQRs) export(rowLogSumExps) export(rowMadDiffs) export(rowMads) export(rowMaxs) export(rowMeans2) export(rowMedians) export(rowMins) export(rowOrderStats) export(rowProds) export(rowQuantiles) export(rowRanges) export(rowRanks) export(rowSdDiffs) export(rowSds) export(rowSums2) export(rowTabulates) export(rowVarDiffs) export(rowVars) export(rowWeightedMads) export(rowWeightedMeans) export(rowWeightedMedians) export(rowWeightedSds) export(rowWeightedVars) export(sdDiff) export(signTabulate) export(sum2) export(t_tx_OP_y) export(validateIndices) export(varDiff) export(weightedMad) export(weightedMean) export(weightedMedian) export(weightedSd) export(weightedVar) export(x_OP_y) importFrom(stats,mad) importFrom(stats,median) importFrom(stats,quantile) useDynLib("matrixStats", .registration = TRUE, .fixes = "C_") matrixStats/man/0000755000176200001440000000000014535663001013353 5ustar liggesusersmatrixStats/man/rowCollapse.Rd0000644000176200001440000000412314522461565016143 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowCollapse.R \name{rowCollapse} \alias{rowCollapse} \alias{colCollapse} \title{Extracts one cell per row (column) from a matrix} \usage{ rowCollapse(x, idxs, rows = NULL, dim. = dim(x), ..., useNames = TRUE) colCollapse(x, idxs, cols = NULL, dim. = dim(x), ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{idxs}{An index \code{\link[base]{vector}} of (maximum) length N (K) specifying the columns (rows) to be extracted.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} } \value{ Returns a \code{\link[base]{vector}} of length N (K). } \description{ Extracts one cell per row (column) from a matrix. The implementation is optimized for memory and speed. } \examples{ x <- matrix(1:27, ncol = 3) y <- rowCollapse(x, 1) stopifnot(identical(y, x[, 1])) y <- rowCollapse(x, 2) stopifnot(identical(y, x[, 2])) y <- rowCollapse(x, c(1, 1, 1, 1, 1, 3, 3, 3, 3)) stopifnot(identical(y, c(x[1:5, 1], x[6:9, 3]))) y <- rowCollapse(x, 1:3) print(y) y_truth <- c(x[1, 1], x[2, 2], x[3, 3], x[4, 1], x[5, 2], x[6, 3], x[7, 1], x[8, 2], x[9, 3]) stopifnot(identical(y, y_truth)) } \seealso{ \emph{Matrix indexing} to index elements in matrices and arrays, cf. \code{\link[base]{[}}(). } \author{ Henrik Bengtsson } \keyword{utilities} matrixStats/man/rowRanges.Rd0000644000176200001440000000453414522461565015626 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowRanges.R \name{rowRanges} \alias{rowRanges} \alias{rowMins} \alias{rowMaxs} \alias{colRanges} \alias{colMins} \alias{colMaxs} \title{Gets the range of values in each row (column) of a matrix} \usage{ rowRanges(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) rowMins(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) rowMaxs(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) colRanges(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) colMins(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) colMaxs(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ \code{rowRanges()} (\code{colRanges()}) returns a \code{\link[base]{numeric}} Nx2 (Kx2) \code{\link[base]{matrix}}, where N (K) is the number of rows (columns) for which the ranges are calculated. \code{rowMins()/rowMaxs()} (\code{colMins()/colMaxs()}) returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Gets the range of values in each row (column) of a matrix. } \seealso{ \code{\link{rowOrderStats}}() and \code{\link[base]{pmin.int}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowMeans2.Rd0000644000176200001440000000365714522461565015541 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowMeans2.R \name{rowMeans2} \alias{rowMeans2} \alias{colMeans2} \title{Calculates the mean for each row (column) in a matrix} \usage{ rowMeans2(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE, dim. = dim(x), ..., useNames = TRUE) colMeans2(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE, dim. = dim(x), ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{refine}{If \code{\link[base:logical]{TRUE}} and \code{x} is \code{\link[base]{numeric}}, then extra effort is used to calculate the average with greater numerical precision, otherwise not.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the mean for each row (column) in a matrix. } \details{ The implementation of \code{rowMeans2()} and \code{colMeans2()} is optimized for both speed and memory. } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowOrderStats.Rd0000644000176200001440000000441714522461565016501 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowOrderStats.R \name{rowOrderStats} \alias{rowOrderStats} \alias{colOrderStats} \title{Gets an order statistic for each row (column) in a matrix} \usage{ rowOrderStats(x, rows = NULL, cols = NULL, which, dim. = dim(x), ..., useNames = TRUE) colOrderStats(x, rows = NULL, cols = NULL, which, dim. = dim(x), ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{which}{An \code{\link[base]{integer}} index in [1,K] ([1,N]) indicating which order statistic to be returned.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Gets an order statistic for each row (column) in a matrix. } \details{ The implementation of \code{rowOrderStats()} is optimized for both speed and memory. To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there is a unique implementation for \code{\link[base]{integer}} matrices. } \section{Missing values}{ This method does \emph{not} handle missing values, that is, the result corresponds to having \code{na.rm = FALSE} (if such an argument would be available). } \seealso{ See \code{rowMeans()} in \code{\link[base]{colSums}}(). } \author{ The native implementation of \code{rowOrderStats()} was adopted by Henrik Bengtsson from Robert Gentleman's \code{rowQ()} in the \pkg{Biobase} package. } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/signTabulate.Rd0000644000176200001440000000164014372747711016277 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/signTabulate.R \name{signTabulate} \alias{signTabulate} \title{Calculates the number of negative, zero, positive and missing values} \usage{ signTabulate(x, idxs = NULL, ...) } \arguments{ \item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{name}}d \code{\link[base]{numeric}} \code{\link[base]{vector}}. } \description{ Calculates the number of negative, zero, positive and missing values in a \code{\link[base]{numeric}} vector. For \code{\link[base]{double}} vectors, the number of negative and positive infinite values are also counted. } \seealso{ \code{\link[base]{sign}}(). } \author{ Henrik Bengtsson } \keyword{internal} matrixStats/man/weightedMean.Rd0000644000176200001440000000545014406445217016253 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/weightedMean.R \name{weightedMean} \alias{weightedMean} \title{Weighted Arithmetic Mean} \usage{ weightedMean(x, w = NULL, idxs = NULL, na.rm = FALSE, refine = FALSE, ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values. If a missing-value weight exists, the result is always a missing value.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{refine}{If \code{\link[base:logical]{TRUE}} and \code{x} is \code{\link[base]{numeric}}, then extra effort is used to calculate the average with greater numerical precision, otherwise not.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. If \code{x} is of zero length, then \code{NaN} is returned, which is consistent with \code{\link[base]{mean}}(). } \description{ Computes the weighted sample mean of a numeric vector. } \section{Missing values}{ This function handles missing values consistently with \code{\link[stats]{weighted.mean}}. More precisely, if \code{na.rm = FALSE}, then any missing values in either \code{x} or \code{w} will give result \code{NA_real_}. If \code{na.rm = TRUE}, then all \code{(x, w)} data points for which \code{x} is missing are skipped. Note that if both \code{x} and \code{w} are missing for a data points, then it is also skipped (by the same rule). However, if only \code{w} is missing, then the final results will always be \code{NA_real_} regardless of \code{na.rm}. } \examples{ x <- 1:10 n <- length(x) w <- rep(1, times = n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) # Pull the mean towards zero w[1] <- 5 m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) # Put even more weight on the zero w[1] <- 8.5 m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) # All weight on the first value w[1] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) # All weight on the last value w[1] <- 1 w[n] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) # All weights set to zero w <- rep(0, times = n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) } \seealso{ \code{\link[base]{mean}}() and \code{\link[stats]{weighted.mean}}. } \author{ Henrik Bengtsson } \keyword{robust} \keyword{univar} matrixStats/man/anyMissing.Rd0000644000176200001440000000424414535663001015767 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/anyMissing.R \name{anyMissing} \alias{anyMissing} \alias{colAnyMissings} \alias{rowAnyMissings} \alias{colAnyNAs} \alias{rowAnyNAs} \title{Checks if there are any missing values in an object or not} \usage{ anyMissing(x, idxs = NULL, ...) colAnyMissings(x, rows = NULL, cols = NULL, ..., useNames = TRUE) rowAnyMissings(x, rows = NULL, cols = NULL, ..., useNames = TRUE) colAnyNAs(x, rows = NULL, cols = NULL, ..., useNames = TRUE) rowAnyNAs(x, rows = NULL, cols = NULL, ..., useNames = TRUE) } \arguments{ \item{x}{A \code{\link[base]{vector}}, a \code{\link[base]{list}}, a \code{\link[base]{matrix}}, a \code{\link[base]{data.frame}}, or \code{\link[base]{NULL}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{...}{Not used.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns \code{\link[base:logical]{TRUE}} if a missing value was detected, otherwise \code{\link[base:logical]{FALSE}}. } \description{ Checks if there are any missing values in an object or not. \emph{Please use \code{base::anyNA()} instead of \code{anyMissing()}, \code{colAnyNAs()} instead of \code{colAnyMissings()}, and \code{rowAnyNAs()} instead of \code{rowAnyMissings()}.} } \details{ The implementation of this method is optimized for both speed and memory. The method will return \code{\link[base:logical]{TRUE}} as soon as a missing value is detected. } \examples{ x <- rnorm(n = 1000) x[seq(300, length(x), by = 100)] <- NA stopifnot(anyMissing(x) == any(is.na(x))) } \seealso{ Starting with R v3.1.0, there is \code{anyNA()} in the \pkg{base}, which provides the same functionality as \code{anyMissing()}. } \author{ Henrik Bengtsson } \keyword{iteration} \keyword{logic} matrixStats/man/rowAvgsPerColSet.Rd0000644000176200001440000001044314372747711017067 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowAvgsPerColSet.R \name{rowAvgsPerColSet} \alias{rowAvgsPerColSet} \alias{colAvgsPerRowSet} \title{Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows)} \usage{ rowAvgsPerColSet(X, W = NULL, rows = NULL, S, FUN = rowMeans, ..., na.rm = NA, tFUN = FALSE) colAvgsPerRowSet(X, W = NULL, cols = NULL, S, FUN = colMeans, ..., na.rm = NA, tFUN = FALSE) } \arguments{ \item{X}{A \code{\link[base]{numeric}} NxM \code{\link[base]{matrix}}.} \item{W}{An optional \code{\link[base]{numeric}} NxM \code{\link[base]{matrix}} of weights.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{S}{An \code{\link[base]{integer}} KxJ \code{\link[base]{matrix}} specifying the J subsets. Each column holds K column (row) indices for the corresponding subset.} \item{FUN}{The row-by-row (column-by-column) \code{\link[base]{function}} used to average over each subset of \code{X}. This function must accept a \code{\link[base]{numeric}} NxK (KxM) \code{\link[base]{matrix}} and the \code{\link[base]{logical}} argument \code{na.rm}, and return a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (M).} \item{...}{Additional arguments passed to then \code{FUN} \code{\link[base]{function}}.} \item{na.rm}{(logical) Argument passed to \code{FUN()} as \code{na.rm = na.rm}. If \code{\link[base:logical]{NA}} (default), then \code{na.rm = TRUE} is used if \code{X} or \code{S} holds missing values, otherwise \code{na.rm = FALSE}.} \item{tFUN}{If \code{\link[base:logical]{TRUE}}, the NxK (KxM) \code{\link[base]{matrix}} passed to \code{FUN()} is transposed first.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} } \value{ Returns a \code{\link[base]{numeric}} JxN (MxJ) \code{\link[base]{matrix}}, where row names equal \code{rownames(X)} (\code{colnames(S)}) and column names \code{colnames(S)} (\code{colnames(X)}). } \description{ Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows). Each subset is averaged independently of the others. } \details{ If argument \code{S} is a single column vector with indices \code{1:N}, then \code{rowAvgsPerColSet(X, S = S, FUN = rowMeans)} gives the same result as \code{rowMeans(X)}. Analogously, for \code{colAvgsPerRowSet()}. } \examples{ X <- matrix(rnorm(20 * 6), nrow = 20, ncol = 6) rownames(X) <- LETTERS[1:nrow(X)] colnames(X) <- letters[1:ncol(X)] print(X) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply rowMeans() for 3 sets of 2 columns # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 3 S <- matrix(1:ncol(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s\%d", 1:nbr_of_sets) print(S) Z <- rowAvgsPerColSet(X, S = S) print(Z) # Validation Z0 <- cbind(s1 = rowMeans(X[, 1:2]), s2 = rowMeans(X[, 3:4]), s3 = rowMeans(X[, 5:6])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply colMeans() for 5 sets of 4 rows # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 5 S <- matrix(1:nrow(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s\%d", 1:nbr_of_sets) print(S) Z <- colAvgsPerRowSet(X, S = S) print(Z) # Validation Z0 <- rbind(s1 = colMeans(X[ 1:4, ]), s2 = colMeans(X[ 5:8, ]), s3 = colMeans(X[ 9:12, ]), s4 = colMeans(X[13:16, ]), s5 = colMeans(X[17:20, ])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # When there is only one "complete" set # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 1 S <- matrix(1:ncol(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s\%d", 1:nbr_of_sets) print(S) Z <- rowAvgsPerColSet(X, S = S, FUN = rowMeans) print(Z) Z0 <- rowMeans(X) stopifnot(identical(drop(Z), Z0)) nbr_of_sets <- 1 S <- matrix(1:nrow(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s\%d", 1:nbr_of_sets) print(S) Z <- colAvgsPerRowSet(X, S = S, FUN = colMeans) print(Z) Z0 <- colMeans(X) stopifnot(identical(drop(Z), Z0)) } \author{ Henrik Bengtsson } \keyword{internal} \keyword{utilities} matrixStats/man/rowProds.Rd0000644000176200001440000000463014522461565015473 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/product.R, R/rowProds.R \name{product} \alias{product} \alias{rowProds} \alias{colProds} \title{Calculates the product for each row (column) in a matrix} \usage{ product(x, idxs = NULL, na.rm = FALSE, ...) rowProds(x, rows = NULL, cols = NULL, na.rm = FALSE, method = c("direct", "expSumLog"), ..., useNames = TRUE) colProds(x, rows = NULL, cols = NULL, na.rm = FALSE, method = c("direct", "expSumLog"), ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{...}{Not used.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{method}{A \code{\link[base]{character}} string specifying how each product is calculated.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the product for each row (column) in a matrix. } \details{ If \code{method = "expSumLog"}, then then \code{\link{product}}() function is used, which calculates the product via the logarithmic transform (treating negative values specially). This improves the precision and lowers the risk for numeric overflow. If \code{method = "direct"}, the direct product is calculated via the \code{\link[base]{prod}}() function. } \section{Missing values}{ Note, if \code{method = "expSumLog"}, \code{na.rm = FALSE}, and \code{x} contains missing values (\code{\link[base]{NA}} or \code{\link[base:is.finite]{NaN}}), then the calculated value is also missing value. Note that it depends on platform whether \code{\link[base:is.finite]{NaN}} or \code{\link[base]{NA}} is returned when an \code{\link[base:is.finite]{NaN}} exists, cf. \code{\link[base]{is.nan}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowDiffs.Rd0000644000176200001440000000354214522461565015440 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowDiffs.R \name{rowDiffs} \alias{rowDiffs} \alias{colDiffs} \title{Calculates difference for each row (column) in a matrix} \usage{ rowDiffs(x, rows = NULL, cols = NULL, lag = 1L, differences = 1L, dim. = dim(x), ..., useNames = TRUE) colDiffs(x, rows = NULL, cols = NULL, lag = 1L, differences = 1L, dim. = dim(x), ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{lag}{An \code{\link[base]{integer}} specifying the lag.} \item{differences}{An \code{\link[base]{integer}} specifying the order of difference.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} Nx(K-1) or (N-1)xK \code{\link[base]{matrix}}. } \description{ Calculates difference for each row (column) in a matrix. } \examples{ x <- matrix(1:27, ncol = 3) d1 <- rowDiffs(x) print(d1) d2 <- t(colDiffs(t(x))) stopifnot(all.equal(d2, d1)) } \seealso{ See also \code{\link{diff2}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowLogSumExps.Rd0000644000176200001440000000316414522461565016453 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowLogSumExps.R \name{rowLogSumExps} \alias{rowLogSumExps} \alias{colLogSumExps} \title{Accurately computes the logarithm of the sum of exponentials across rows or columns} \usage{ rowLogSumExps(lx, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(lx), ..., useNames = TRUE) colLogSumExps(lx, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(lx), ..., useNames = TRUE) } \arguments{ \item{lx}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}. Typically \code{lx} are \eqn{log(x)} values.} \item{rows, cols}{A \code{\link[base]{vector}} indicating subset of rows (and/or columns) to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, any missing values are ignored, otherwise not.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Accurately computes the logarithm of the sum of exponentials across rows or columns. } \section{Benchmarking}{ These methods are implemented in native code and have been optimized for speed and memory. } \seealso{ To calculate the same on vectors, \code{\link{logSumExp}}(). } \author{ Native implementation by Henrik Bengtsson. Original R code by Nakayama ??? (Japan). } \keyword{array} matrixStats/man/rowCumsums.Rd0000644000176200001440000000525414522461565016043 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowCumsums.R \name{rowCumsums} \alias{rowCumsums} \alias{colCumsums} \alias{rowCumprods} \alias{colCumprods} \alias{rowCummins} \alias{colCummins} \alias{rowCummaxs} \alias{colCummaxs} \title{Cumulative sums, products, minima and maxima for each row (column) in a matrix} \usage{ rowCumsums(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) colCumsums(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) rowCumprods(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) colCumprods(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) rowCummins(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) colCummins(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) rowCummaxs(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) colCummaxs(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}} of the same mode as \code{x}, except when \code{x} is of mode \code{\link[base]{logical}}, then the return type is \code{\link[base]{integer}}. } \description{ Cumulative sums, products, minima and maxima for each row (column) in a matrix. } \examples{ x <- matrix(1:12, nrow = 4, ncol = 3) print(x) yr <- rowCumsums(x) print(yr) yc <- colCumsums(x) print(yc) yr <- rowCumprods(x) print(yr) yc <- colCumprods(x) print(yc) yr <- rowCummaxs(x) print(yr) yc <- colCummaxs(x) print(yc) yr <- rowCummins(x) print(yr) yc <- colCummins(x) print(yc) } \seealso{ See \code{\link[base]{cumsum}}(), \code{\link[base]{cumprod}}(), \code{\link[base]{cummin}}(), and \code{\link[base]{cummax}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{univar} matrixStats/man/rowSums2.Rd0000644000176200001440000000330214522461565015410 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowSums2.R \name{rowSums2} \alias{rowSums2} \alias{colSums2} \title{Calculates the sum for each row (column) in a matrix} \usage{ rowSums2(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) colSums2(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the sum for each row (column) in a matrix. } \details{ The implementation of \code{rowSums2()} and \code{colSums2()} is optimized for both speed and memory. } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowWeightedMeans.Rd0000644000176200001440000000504014522461565017124 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowWeightedMeans.R \name{rowWeightedMeans} \alias{rowWeightedMeans} \alias{colWeightedMeans} \title{Calculates the weighted means for each row (column) in a matrix} \usage{ rowWeightedMeans(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) colWeightedMeans(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{w}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length K (N).} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the weighted means for each row (column) in a matrix. } \details{ The implementations of these methods are optimized for both speed and memory. If no weights are given, the corresponding \code{rowMeans()}/\code{colMeans()} is used. } \examples{ x <- matrix(rnorm(20), nrow = 5, ncol = 4) print(x) # Non-weighted row averages mu_0 <- rowMeans(x) mu <- rowWeightedMeans(x) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (uniform weights) w <- rep(2.5, times = ncol(x)) mu <- rowWeightedMeans(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (excluding some columns) w <- c(1, 1, 0, 1) mu_0 <- rowMeans(x[, (w == 1), drop = FALSE]) mu <- rowWeightedMeans(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (excluding some columns) w <- c(0, 1, 0, 0) mu_0 <- rowMeans(x[, (w == 1), drop = FALSE]) mu <- rowWeightedMeans(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted averages by rows and columns w <- 1:4 mu_1 <- rowWeightedMeans(x, w = w) mu_2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(mu_2, mu_1)) } \seealso{ See \code{rowMeans()} and \code{colMeans()} in \code{\link[base]{colSums}}() for non-weighted means. See also \code{\link[stats]{weighted.mean}}. } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowRanks.Rd0000644000176200001440000001213214522461565015456 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowRanks.R \name{rowRanks} \alias{rowRanks} \alias{colRanks} \title{Gets the rank of the elements in each row (column) of a matrix} \usage{ rowRanks(x, rows = NULL, cols = NULL, ties.method = c("max", "average", "first", "last", "random", "max", "min", "dense"), dim. = dim(x), ..., useNames = TRUE) colRanks(x, rows = NULL, cols = NULL, ties.method = c("max", "average", "first", "last", "random", "max", "min", "dense"), dim. = dim(x), preserveShape = FALSE, ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{ties.method}{A \code{\link[base]{character}} string specifying how ties are treated. For details, see below.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} \item{preserveShape}{A \code{\link[base]{logical}} specifying whether the \code{\link[base]{matrix}} returned should preserve the input shape of \code{x}, or not.} } \value{ A \code{\link[base]{matrix}} of type \code{\link[base]{integer}} is returned, unless \code{ties.method = "average"} when it is of type \code{\link[base]{numeric}}. The \code{rowRanks()} function always returns an NxK \code{\link[base]{matrix}}, where N (K) is the number of rows (columns) whose ranks are calculated. The \code{colRanks()} function returns an NxK \code{\link[base]{matrix}}, if \code{preserveShape = TRUE}, otherwise a KxN \code{\link[base]{matrix}}. Any \code{\link[base]{names}} of \code{x} are ignored and absent in the result. } \description{ Gets the rank of the elements in each row (column) of a matrix. } \details{ These functions rank values and treats missing values the same way as \code{\link[base]{rank}}(). For equal values ("ties"), argument \code{ties.method} determines how these are ranked among each other. More precisely, for the following values of \code{ties.method}, each index set of ties consists of: \itemize{ \item{\code{"first"} - increasing values that are all unique} \item{\code{"last"} - decreasing values that are all unique} \item{\code{"min"} - identical values equaling the minimum of their original ranks} \item{\code{"max"} - identical values equaling the maximum of their original ranks} \item{\code{"average"} - identical values that equal the sample mean of their original ranks. Because the average is calculated, the returned ranks may be non-integer values} \item{\code{"random"} - randomly shuffled values of their original ranks.} \item{\code{"dense"} - increasing values that are all unique and, contrary to \code{"first"}, never contain any gaps} } For more information on \code{ties.method = "dense"}, see \code{frank()} of the \pkg{data.table} package. For more information on the other alternatives, see \code{\link[base]{rank}}(). Note that, due to different randomization strategies, the shuffling order produced by these functions when using \code{ties.method = "random"} does not reproduce that of \code{\link[base]{rank}}(). \emph{WARNING: For backward-compatibility reasons, the default is \code{ties.method = "max"}, which differs from \code{\link[base]{rank}}() which uses \code{ties.method = "average"} by default. Since we plan to change the default behavior in a future version, we recommend to explicitly specify the intended value of argument \code{ties.method}.} } \section{Missing values}{ Missing values are ranked as \code{NA_integer_}, as with \code{na.last = "keep"} in the \code{\link[base]{rank}}() function. } \section{Performance}{ The implementation is optimized for both speed and memory. To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there is a unique implementation for \code{\link[base]{integer}} matrices. Furthermore, it is more memory efficient to do \code{colRanks(x, preserveShape = TRUE)} than \code{t(colRanks(x, preserveShape = FALSE))}. } \seealso{ For developers, see also Section Utility functions' in 'Writing R Extensions manual', particularly the native functions \code{R_qsort_I()} and \code{R_qsort_int_I()}. } \author{ Hector Corrada Bravo and Harris Jaffee. Peter Langfelder for adding 'ties.method' support. Brian Montgomery for adding more 'ties.method's. Henrik Bengtsson adapted the original native implementation of \code{rowRanks()} from Robert Gentleman's \code{rowQ()} in the \pkg{Biobase} package. } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/varDiff.Rd0000644000176200001440000001004614522461565015233 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/varDiff.R \name{varDiff} \alias{varDiff} \alias{sdDiff} \alias{madDiff} \alias{iqrDiff} \alias{rowVarDiffs} \alias{colVarDiffs} \alias{rowSdDiffs} \alias{colSdDiffs} \alias{rowMadDiffs} \alias{colMadDiffs} \alias{rowIQRDiffs} \alias{colIQRDiffs} \title{Estimation of scale based on sequential-order differences} \usage{ varDiff(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) sdDiff(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) madDiff(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, constant = 1.4826, ...) iqrDiff(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) rowVarDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) colVarDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) rowSdDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) colSdDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) rowMadDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) colMadDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) rowIQRDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) colIQRDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) } \arguments{ \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N or a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{diff}{The positional distance of elements for which the difference should be calculated.} \item{trim}{A \code{\link[base]{double}} in [0,1/2] specifying the fraction of observations to be trimmed from each end of (sorted) \code{x} before estimation.} \item{...}{Not used.} \item{constant}{A scale factor adjusting for asymptotically normal consistency.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length 1, length N, or length K. } \description{ Estimation of scale based on sequential-order differences, corresponding to the scale estimates provided by \code{\link[stats]{var}}, \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and \code{\link[stats]{IQR}}. } \details{ Note that n-order difference MAD estimates, just like the ordinary MAD estimate by \code{\link[stats]{mad}}, apply a correction factor such that the estimates are consistent with the standard deviation under Gaussian distributions. The interquartile range (IQR) estimates does \emph{not} apply such a correction factor. If asymptotically normal consistency is wanted, the correction factor for IQR estimate is \code{1 / (2 * qnorm(3/4))}, which is half of that used for MAD estimates, which is \code{1 / qnorm(3/4)}. This correction factor needs to be applied manually, i.e. there is no \code{constant} argument for the IQR functions. } \references{ [1] J. von Neumann et al., \emph{The mean square successive difference}. Annals of Mathematical Statistics, 1941, 12, 153-162.\cr } \seealso{ For the corresponding non-differentiated estimates, see \code{\link[stats]{var}}, \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and \code{\link[stats]{IQR}}. Internally, \code{\link{diff2}}() is used which is a faster version of \code{\link[base]{diff}}(). } \author{ Henrik Bengtsson } \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowTabulates.Rd0000644000176200001440000000426414522461565016333 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowTabulates.R \name{rowTabulates} \alias{rowTabulates} \alias{colTabulates} \title{Tabulates the values in a matrix by row (column).} \usage{ rowTabulates(x, rows = NULL, cols = NULL, values = NULL, ..., useNames = TRUE) colTabulates(x, rows = NULL, cols = NULL, values = NULL, ..., useNames = TRUE) } \arguments{ \item{x}{An \code{\link[base]{integer}}, a \code{\link[base]{logical}}, or a \code{\link[base]{raw}} NxK \code{\link[base]{matrix}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{values}{An \code{\link[base]{vector}} of J values of count. If \code{\link[base]{NULL}}, all (unique) values are counted.} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a NxJ (KxJ) \code{\link[base]{matrix}} where N (K) is the number of row (column) \code{\link[base]{vector}}s tabulated and J is the number of values counted. } \description{ Tabulates the values in a matrix by row (column). } \details{ An alternative to these functions, is to use \code{table(x, row(x))} and \code{table(x, col(x))}, with the exception that the latter do not support the \code{\link[base]{raw}} data type. When there are no missing values in \code{x}, we have that \code{all(rowTabulates(x) == t(table(x, row(x))))} and \code{all(colTabulates(x) == t(table(x, col(x))))}. When there are missing values, we have that \code{all(rowTabulates(x) == t(table(x, row(x), useNA = "always")[, seq_len(nrow(x))]))} and \code{all(colTabulates(x) == t(table(x, col(x), useNA = "always")[, seq_len(ncol(x))]))}. } \examples{ x <- matrix(1:5, nrow = 10, ncol = 5) print(x) print(rowTabulates(x)) print(colTabulates(x)) # Count only certain values print(rowTabulates(x, values = 1:3)) y <- as.raw(x) dim(y) <- dim(x) print(y) print(rowTabulates(y)) print(colTabulates(y)) } \author{ Henrik Bengtsson } \keyword{utilities} matrixStats/man/sum2.Rd0000644000176200001440000000636614406445217014547 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sum2.R \name{sum2} \alias{sum2} \title{Fast sum over subset of vector elements} \usage{ sum2(x, idxs = NULL, na.rm = FALSE, mode = typeof(x), ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{mode}{A \code{\link[base]{character}} string specifying the data type of the return value. Default is to use the same mode as argument \code{x}, unless it is logical when it defaults to \code{"integer"}.} \item{...}{Not used.} } \value{ Returns a scalar of the data type specified by argument \code{mode}. If \code{mode = "integer"}, then integer overflow occurs if the \emph{sum} is outside the range of defined integer values. Note that the intermediate sum (\code{sum(x[1:n])}) is internally represented as a floating point value and will therefore never be outside of the range. If \code{mode = "integer"} and \code{typeof(x) == "double"}, then a warning is generated. } \description{ Computes the sum of all or a subset of values. } \details{ \code{sum2(x, idxs)} gives equivalent results as \code{sum(x[idxs])}, but is faster and more memory efficient since it avoids the actual subsetting which requires copying of elements and garbage collection thereof. Furthermore, \code{sum2(x, mode = "double")} is equivalent to \code{sum(as.numeric(x))} and may therefore be used to avoid integer overflow(*), but at the same time is much more memory efficient that the regular \code{sum()} function when \code{x} is an \code{\link[base]{integer}} vector. (*) \emph{In R (>= 3.5.0), \code{sum(x)} will no longer integer overflow and return \code{NA_integer_}. Instead it will return the correct sum in form of a double value.} } \examples{ x <- 1:10 n <- length(x) idxs <- seq(from = 1, to = n, by = 2) s1 <- sum(x[idxs]) # 25 s2 <- sum2(x, idxs = idxs) # 25 stopifnot(identical(s1, s2)) idxs <- seq(from = n, to = 1, by = -2) s1 <- sum(x[idxs]) # 25 s2 <- sum2(x, idxs = idxs) # 25 stopifnot(identical(s1, s2)) s1 <- sum(x) # 55 s2 <- sum2(x) # 55 stopifnot(identical(s1, s2)) # Total gives integer overflow x <- c(.Machine$integer.max, 1L, -.Machine$integer.max) s1 <- sum(x[1:2]) # NA_integer_ in R (< 3.5.0) s2 <- sum2(x[1:2]) # NA_integer_ # Total gives integer overflow (coerce to numeric) s1 <- sum(as.numeric(x[1:2])) # 2147483648 s2 <- sum2(as.numeric(x[1:2])) # 2147483648 s3 <- sum2(x[1:2], mode = "double") # 2147483648 w/out copy stopifnot(identical(s1, s2)) stopifnot(identical(s1, s3)) # Cumulative sum would give integer overflow but not the total s1 <- sum(x) # 1L s2 <- sum2(x) # 1L stopifnot(identical(s1, s2)) } \seealso{ \code{\link[base]{sum}}(). To efficiently average over a subset, see \code{\link{mean2}}(). } \author{ Henrik Bengtsson } \keyword{internal} \keyword{univar} matrixStats/man/weightedVar.Rd0000644000176200001440000000574614522461565016136 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/weightedVar.R \name{weightedVar} \alias{weightedVar} \alias{weightedSd} \alias{rowWeightedVars} \alias{colWeightedVars} \alias{rowWeightedSds} \alias{colWeightedSds} \title{Weighted variance and weighted standard deviation} \usage{ weightedVar(x, w = NULL, idxs = NULL, na.rm = FALSE, center = NULL, ...) weightedSd(...) rowWeightedVars(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) colWeightedVars(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) rowWeightedSds(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) colWeightedSds(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) } \arguments{ \item{x}{\code{\link[base]{vector}} of type \code{\link[base]{integer}}, \code{\link[base]{numeric}}, or \code{\link[base]{logical}}.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{center}{Optional \code{\link[base]{numeric}} scalar specifying the center location of the data. If \code{\link[base]{NULL}}, it is estimated from data.} \item{...}{Not used.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \description{ Computes a weighted variance / standard deviation of a numeric vector or across rows or columns of a matrix. } \details{ The estimator used here is the same as the one used by the "unbiased" estimator of the \bold{Hmisc} package. More specifically, \code{weightedVar(x, w = w) == Hmisc::wtd.var(x, weights = w)}, } \section{Missing values}{ This function handles missing values consistently with \code{\link{weightedMean}}(). More precisely, if \code{na.rm = FALSE}, then any missing values in either \code{x} or \code{w} will give result \code{NA_real_}. If \code{na.rm = TRUE}, then all \code{(x, w)} data points for which \code{x} is missing are skipped. Note that if both \code{x} and \code{w} are missing for a data points, then it is also skipped (by the same rule). However, if only \code{w} is missing, then the final results will always be \code{NA_real_} regardless of \code{na.rm}. } \seealso{ For the non-weighted variance, see \code{\link[stats]{var}}. } \author{ Henrik Bengtsson } \keyword{robust} \keyword{univar} matrixStats/man/weightedMedian.Rd0000644000176200001440000001255414372747711016601 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/weightedMedian.R \name{weightedMedian} \alias{weightedMedian} \title{Weighted Median Value} \usage{ weightedMedian(x, w = NULL, idxs = NULL, na.rm = FALSE, interpolate = is.null(ties), ties = NULL, ...) } \arguments{ \item{x}{\code{\link[base]{vector}} of type \code{\link[base]{integer}}, \code{\link[base]{numeric}}, or \code{\link[base]{logical}}.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{a logical value indicating whether \code{\link[base]{NA}} values in \code{x} should be stripped before the computation proceeds, or not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s is done.} \item{interpolate}{If \code{\link[base:logical]{TRUE}}, linear interpolation is used to get a consistent estimate of the weighted median.} \item{ties}{If \code{interpolate == FALSE}, a character string specifying how to solve ties between two \code{x}'s that are satisfying the weighted median criteria. Note that at most two values can satisfy the criteria. When \code{ties} is \code{"min"} ("lower weighted median"), the smaller value of the two is returned and when it is \code{"max"} ("upper weighted median"), the larger value is returned. If \code{ties} is \code{"mean"}, the mean of the two values is returned. Finally, if \code{ties} is \code{"weighted"} (or \code{\link[base]{NULL}}) a weighted average of the two are returned, where the weights are weights of all values \code{x[i] <= x[k]} and \code{x[i] >= x[k]}, respectively.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. For the \code{n} elements \code{x = c(x[1], x[2], ..., x[n])} with positive weights \code{w = c(w[1], w[2], ..., w[n])} such that \code{sum(w) = S}, the \emph{weighted median} is defined as the element \code{x[k]} for which the total weight of all elements \code{x[i] < x[k]} is less or equal to \code{S/2} and for which the total weight of all elements \code{x[i] > x[k]} is less or equal to \code{S/2} (c.f. [1]). When using linear interpolation, the weighted mean of \code{x[k-1]} and \code{x[k]} with weights \code{S[k-1]} and \code{S[k]} corresponding to the cumulative weights of those two elements is used as an estimate. If \code{w} is missing then all elements of \code{x} are given the same positive weight. If all weights are zero, \code{\link[base:NA]{NA_real_}} is returned. If one or more weights are \code{Inf}, it is the same as these weights have the same weight and the others have zero. This makes things easier for cases where the weights are result of a division with zero. If there are missing values in \code{w} that are part of the calculation (after subsetting and dropping missing values in \code{x}), then the final result is always \code{NA} of the same type as \code{x}. The weighted median solves the following optimization problem: \deqn{\alpha^* = \arg_\alpha \min \sum_{i = 1}^{n} w_i |x_i-\alpha|} where \eqn{x = (x_1, x_2, \ldots, x_n)} are scalars and \eqn{w = (w_1, w_2, \ldots, w_n)} are the corresponding "weights" for each individual \eqn{x} value. } \description{ Computes a weighted median of a numeric vector. } \examples{ x <- 1:10 n <- length(x) m1 <- median(x) # 5.5 m2 <- weightedMedian(x) # 5.5 stopifnot(identical(m1, m2)) w <- rep(1, times = n) m1 <- weightedMedian(x, w) # 5.5 (default) m2 <- weightedMedian(x, ties = "weighted") # 5.5 (default) m3 <- weightedMedian(x, ties = "min") # 5 m4 <- weightedMedian(x, ties = "max") # 6 stopifnot(identical(m1, m2)) # Pull the median towards zero w[1] <- 5 m1 <- weightedMedian(x, w) # 3.5 y <- c(rep(0, times = w[1]), x[-1]) # Only possible for integer weights m2 <- median(y) # 3.5 stopifnot(identical(m1, m2)) # Put even more weight on the zero w[1] <- 8.5 weightedMedian(x, w) # 2 # All weight on the first value w[1] <- Inf weightedMedian(x, w) # 1 # All weight on the last value w[1] <- 1 w[n] <- Inf weightedMedian(x, w) # 10 # All weights set to zero w <- rep(0, times = n) weightedMedian(x, w) # NA # Simple benchmarking bench <- function(N = 1e5, K = 10) { x <- rnorm(N) gc() t <- c() t[1] <- system.time(for (k in 1:K) median(x))[3] t[2] <- system.time(for (k in 1:K) weightedMedian(x))[3] t <- t / t[1] names(t) <- c("median", "weightedMedian") t } print(bench(N = 5, K = 100)) print(bench(N = 50, K = 100)) print(bench(N = 200, K = 100)) print(bench(N = 1000, K = 100)) print(bench(N = 10e3, K = 20)) print(bench(N = 100e3, K = 20)) } \references{ [1] T.H. Cormen, C.E. Leiserson, R.L. Rivest, Introduction to Algorithms, The MIT Press, Massachusetts Institute of Technology, 1989. } \seealso{ \code{\link[stats]{median}}, \code{\link[base]{mean}}() and \code{\link{weightedMean}}(). } \author{ Henrik Bengtsson and Ola Hossjer, Centre for Mathematical Sciences, Lund University. Thanks to Roger Koenker, Econometrics, University of Illinois, for the initial ideas. } \keyword{robust} \keyword{univar} matrixStats/man/rowCounts.Rd0000644000176200001440000000615714522461565015665 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowCounts.R \name{rowCounts} \alias{rowCounts} \alias{colCounts} \alias{count} \title{Counts the number of occurrences of a specific value} \usage{ rowCounts(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) colCounts(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) count(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{value}{A value to search for.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} } \value{ \code{rowCounts()} (\code{colCounts()}) returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of length N (K). \code{count()} returns a scalar of type \code{\link[base]{integer}} if the count is less than 2^31-1 (= \code{.Machine$integer.max}) otherwise a scalar of type \code{\link[base]{double}}. } \description{ The row- and column-wise functions take either a matrix or a vector as input. If a vector, then argument \code{dim.} must be specified and fulfill \code{prod(dim.) == length(x)}. The result will be identical to the results obtained when passing \code{matrix(x, nrow = dim.[1L], ncol = dim.[2L])}, but avoids having to temporarily create/allocate a matrix, if only such is needed only for these calculations. } \examples{ x <- matrix(0:11, nrow = 4, ncol = 3) x[2:3, 2:3] <- 2:5 x[3, 3] <- NA_integer_ print(x) print(rowCounts(x, value = 2)) ## [1] 0 1 NA 0 print(colCounts(x, value = 2)) ## [1] 1 1 NA print(colCounts(x, value = NA_integer_)) ## [1] 0 0 1 print(rowCounts(x, value = 2, na.rm = TRUE)) ## [1] 0 1 1 0 print(colCounts(x, value = 2, na.rm = TRUE)) ## [1] 1 1 0 print(rowAnys(x, value = 2)) ## [1] FALSE TRUE TRUE FALSE print(rowAnys(x, value = NA_integer_)) ## [1] FALSE FALSE TRUE FALSE print(colAnys(x, value = 2)) ## [1] TRUE TRUE NA print(colAnys(x, value = 2, na.rm = TRUE)) ## [1] TRUE TRUE FALSE print(colAlls(x, value = 2)) ## [1] FALSE FALSE FALSE } \seealso{ rowAlls } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{logic} \keyword{univar} matrixStats/man/rowIQRs.Rd0000644000176200001440000000417014522461565015221 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowIQRs.R \name{rowIQRs} \alias{rowIQRs} \alias{colIQRs} \alias{iqr} \title{Estimates of the interquartile range for each row (column) in a matrix} \usage{ rowIQRs(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) colIQRs(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) iqr(x, idxs = NULL, na.rm = FALSE, ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{...}{Additional arguments passed to \code{\link{rowQuantiles}}() (\code{colQuantiles()}).} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Estimates of the interquartile range for each row (column) in a matrix. } \section{Missing values}{ Contrary to \code{\link[stats]{IQR}}, which gives an error if there are missing values and \code{na.rm = FALSE}, \code{iqr()} and its corresponding row and column-specific functions return \code{\link[base]{NA}}_real_. } \examples{ set.seed(1) x <- matrix(rnorm(50 * 40), nrow = 50, ncol = 40) str(x) # Row IQRs q <- rowIQRs(x) print(q) q0 <- apply(x, MARGIN = 1, FUN = IQR) stopifnot(all.equal(q0, q)) # Column IQRs q <- colIQRs(x) print(q) q0 <- apply(x, MARGIN = 2, FUN = IQR) stopifnot(all.equal(q0, q)) } \seealso{ See \code{\link[stats]{IQR}}. See \code{\link{rowSds}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowQuantiles.Rd0000644000176200001440000000516214522461565016352 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowQuantiles.R \name{rowQuantiles} \alias{rowQuantiles} \alias{colQuantiles} \title{Estimates quantiles for each row (column) in a matrix} \usage{ rowQuantiles(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, digits = 7L, ..., useNames = TRUE, drop = TRUE) colQuantiles(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, digits = 7L, ..., useNames = TRUE, drop = TRUE) } \arguments{ \item{x}{An \code{\link[base]{integer}}, \code{\link[base]{numeric}} or \code{\link[base]{logical}} NxK \code{\link[base]{matrix}} with N >= 0.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{probs}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of J probabilities in [0, 1].} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{type}{An \code{\link[base]{integer}} specifying the type of estimator. See \code{\link[stats]{quantile}} for more details.} \item{digits}{An \code{\link[base]{integer}} specifying the precision of the formatted percentages. Not used when `useNames = FALSE`. In **matrixStats** (< 0.63.0), the default used to be `max(2L, getOption("digits"))` inline with R (< 4.1.0).} \item{...}{Additional arguments passed to \code{\link[stats]{quantile}}.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} \item{drop}{If TRUE, singleton dimensions in the result are dropped, otherwise not.} } \value{ Returns a NxJ (KxJ) \code{\link[base]{matrix}}, where N (K) is the number of rows (columns) for which the J quantiles are calculated. The return type is either integer or numeric depending on \code{type}. } \description{ Estimates quantiles for each row (column) in a matrix. } \examples{ set.seed(1) x <- matrix(rnorm(50 * 40), nrow = 50, ncol = 40) str(x) probs <- c(0.25, 0.5, 0.75) # Row quantiles q <- rowQuantiles(x, probs = probs) print(q) q_0 <- apply(x, MARGIN = 1, FUN = quantile, probs = probs) stopifnot(all.equal(q_0, t(q))) # Column IQRs q <- colQuantiles(x, probs = probs) print(q) q_0 <- apply(x, MARGIN = 2, FUN = quantile, probs = probs) stopifnot(all.equal(q_0, t(q))) } \seealso{ \code{\link[stats]{quantile}}. } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/matrixStats-package.Rd0000644000176200001440000000167314372747711017577 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/999.package.R \docType{package} \name{matrixStats-package} \alias{matrixStats-package} \alias{matrixStats} \title{Package matrixStats} \description{ High-performing functions operating on rows and columns of matrices, e.g. col / rowMedians(), col / rowRanks(), and col / rowSds(). Functions optimized per data type and for subsetted calculations such that both memory usage and processing time is minimized. There are also optimized vector-based methods, e.g. binMeans(), madDiff() and weightedMedian(). } \section{How to cite this package}{ Henrik Bengtsson (2017). matrixStats: Functions that Apply to Rows and Columns of Matrices (and to Vectors). R package version 0.52.2. https://github.com/HenrikBengtsson/matrixStats } \author{ Henrik Bengtsson, Hector Corrada Bravo, Robert Gentleman, Ola Hossjer, Harris Jaffee, Dongcan Jiang, Peter Langfelder } \keyword{package} matrixStats/man/indexByRow.Rd0000644000176200001440000000256214372747711015753 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/indexByRow.R \name{indexByRow} \alias{indexByRow} \title{Translates matrix indices by rows into indices by columns} \usage{ indexByRow(dim, idxs = NULL, ...) } \arguments{ \item{dim}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length two specifying the length of the "template" matrix.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{...}{Not used.} } \value{ Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of indices. } \description{ Translates matrix indices by rows into indices by columns. } \section{Known limitations}{ The current implementation does not support long-vector indices, because both input and output indices are of type integers. This means that the indices in argument \code{idxs} can only be in range [1,2^31-1]. Using a greater value will be coerced to \code{NA_integer_}. Moreover, returned indices can only be in the same range [1,2^31-1]. } \examples{ dim <- c(5, 4) X <- matrix(NA_integer_, nrow = dim[1], ncol = dim[2]) Y <- t(X) idxs <- seq_along(X) # Assign by columns X[idxs] <- idxs print(X) # Assign by rows Y[indexByRow(dim(Y), idxs)] <- idxs print(Y) stopifnot(X == t(Y)) } \author{ Henrik Bengtsson } \keyword{iteration} \keyword{logic} matrixStats/man/binMeans.Rd0000644000176200001440000000500214406445217015377 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/binMeans.R \name{binMeans} \alias{binMeans} \title{Fast mean calculations in non-overlapping bins} \usage{ binMeans(y, x, idxs = NULL, bx, na.rm = TRUE, count = TRUE, right = FALSE, ...) } \arguments{ \item{y}{A \code{\link[base]{numeric}} or \code{\link[base]{logical}} \code{\link[base]{vector}} of K values to calculate means on.} \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K positions for to be binned.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{bx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B + 1 ordered positions specifying the B > 0 bins \code{[bx[1], bx[2])}, \code{[bx[2], bx[3])}, ..., \code{[bx[B], bx[B + 1])}.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values in \code{y} are dropped before calculating the mean, otherwise not.} \item{count}{If \code{\link[base:logical]{TRUE}}, the number of data points in each bins is returned as attribute \code{count}, which is an \code{\link[base]{integer}} \code{\link[base]{vector}} of length B.} \item{right}{If \code{\link[base:logical]{TRUE}}, the bins are right-closed (left open), otherwise left-closed (right open).} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length B. } \description{ Computes the sample means in non-overlapping bins } \details{ \code{binMeans(x, bx, right = TRUE)} gives equivalent results as \code{rev(binMeans(-x, bx = sort(-bx), right = FALSE))}, but is faster. } \section{Missing and non-finite values}{ Data points where either of \code{y} and \code{x} is missing are dropped (and therefore are also not counted). Non-finite values in \code{y} are not allowed and gives an error. Missing values in \code{bx} are not allowed and gives an error. } \examples{ x <- 1:200 mu <- double(length(x)) mu[1:50] <- 5 mu[101:150] <- -5 y <- mu + rnorm(length(x)) # Binning bx <- c(0, 50, 100, 150, 200) + 0.5 y_s <- binMeans(y, x = x, bx = bx) plot(x, y) for (kk in seq_along(y_s)) { lines(bx[c(kk, kk + 1)], y_s[c(kk, kk)], col = "blue", lwd = 2) } } \references{ [1] R-devel thread \emph{Fastest non-overlapping binning mean function out there?} on Oct 3, 2012\cr } \seealso{ \code{\link{binCounts}}(). \code{\link[stats]{aggregate}} and \code{\link[base]{mean}}(). } \author{ Henrik Bengtsson with initial code contributions by Martin Morgan [1]. } \keyword{univar} matrixStats/man/rowSds.Rd0000644000176200001440000000503114522461565015131 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowMads.R, R/rowSds.R \name{rowMads} \alias{rowMads} \alias{colMads} \alias{rowSds} \alias{colSds} \title{Standard deviation estimates for each row (column) in a matrix} \usage{ rowMads(x, rows = NULL, cols = NULL, center = NULL, constant = 1.4826, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) colMads(x, rows = NULL, cols = NULL, center = NULL, constant = 1.4826, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) rowSds(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE, center = NULL, dim. = dim(x), ..., useNames = TRUE) colSds(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE, center = NULL, dim. = dim(x), ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{center}{(optional) The center, defaults to the row means for the SD estimators and row medians for the MAD estimators.} \item{constant}{A scale factor. See \code{\link[stats]{mad}} for details.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Additional arguments passed to \code{rowMeans()} and \code{rowSums()}.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} \item{refine}{If \code{\link[base:logical]{TRUE}}, `center` is NULL, and \code{x} is \code{\link[base]{numeric}}, then extra effort is used to calculate the average with greater numerical precision, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Standard deviation estimates for each row (column) in a matrix. } \seealso{ \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and \code{\link[stats:cor]{var}}. \code{\link{rowIQRs}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/validateIndices.Rd0000644000176200001440000000155114406445217016740 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/validateIndices.R \name{validateIndices} \alias{validateIndices} \title{Validate indices} \usage{ validateIndices(idxs = NULL, maxIdx, allowOutOfBound = TRUE) } \arguments{ \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{maxIdx}{The possible max index.} \item{allowOutOfBound}{Allow positive out of bound to indicate \code{\link[base]{NA}}.} } \value{ Returns a validated integers list indicating the indices. If some of the indices cannot be represented as an integer, the indices are returned as doubles. } \description{ Computes validated positive indices from given indices. } \details{ \emph{WARNING: This function is defunct and will be removed in a future version.} } \keyword{internal} matrixStats/man/binCounts.Rd0000644000176200001440000000364114372747711015624 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/binCounts.R \name{binCounts} \alias{binCounts} \title{Fast element counting in non-overlapping bins} \usage{ binCounts(x, idxs = NULL, bx, right = FALSE, ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K positions for to be binned and counted.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{bx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B + 1 ordered positions specifying the B > 0 bins \code{[bx[1], bx[2])}, \code{[bx[2], bx[3])}, ..., \code{[bx[B], bx[B + 1])}.} \item{right}{If \code{\link[base:logical]{TRUE}}, the bins are right-closed (left open), otherwise left-closed (right open).} \item{...}{Not used.} } \value{ Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of length B with non-negative integers. } \description{ Counts the number of elements in non-overlapping bins } \details{ \code{binCounts(x, bx, right = TRUE)} gives equivalent results as \code{rev(binCounts(-x, bx = rev(-bx), right = FALSE))}, but is faster and more memory efficient. } \section{Missing and non-finite values}{ Missing values in \code{x} are ignored/dropped. Missing values in \code{bx} are not allowed and gives an error. } \seealso{ An alternative for counting occurrences within bins is \code{\link[graphics]{hist}}, e.g. \code{hist(x, breaks = bx, plot = FALSE)$counts}. That approach is ~30-60\% slower than \code{binCounts(..., right = TRUE)}. To count occurrences of indices \code{x} (positive \code{\link[base]{integer}}s) in \code{[1, B]}, use \code{tabulate(x, nbins = B)}, where \code{x} does \emph{not} have to be sorted first. For details, see \code{\link[base]{tabulate}}(). To average values within bins, see \code{\link{binMeans}}(). } \author{ Henrik Bengtsson } \keyword{univar} matrixStats/man/mean2.Rd0000644000176200001440000000432414372747711014661 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mean2.R \name{mean2} \alias{mean2} \title{Fast averaging over subset of vector elements} \usage{ mean2(x, idxs = NULL, na.rm = FALSE, refine = TRUE, ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{refine}{If \code{\link[base:logical]{TRUE}} and \code{x} is \code{\link[base]{numeric}}, then extra effort is used to calculate the average with greater numerical precision, otherwise not.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \description{ Computes the sample mean of all or a subset of values. } \details{ \code{mean2(x, idxs)} gives equivalent results as \code{mean(x[idxs])}, but is faster and more memory efficient since it avoids the actual subsetting which requires copying of elements and garbage collection thereof. If \code{x} is \code{\link[base]{numeric}} and \code{refine = TRUE}, then a two-pass scan is used to calculate the average. The first scan calculates the total sum and divides by the number of (non-missing) values. In the second scan, this average is refined by adding the residuals towards the first average. The \code{\link[base]{mean}}() uses this approach. \code{mean2(..., refine = FALSE)} is almost twice as fast as \code{mean2(..., refine = TRUE)}. } \examples{ x <- 1:10 n <- length(x) idxs <- seq(from = 1, to = n, by = 2) s1 <- mean(x[idxs]) # 25 s2 <- mean2(x, idxs = idxs) # 25 stopifnot(identical(s1, s2)) idxs <- seq(from = n, to = 1, by = -2) s1 <- mean(x[idxs]) # 25 s2 <- mean2(x, idxs = idxs) # 25 stopifnot(identical(s1, s2)) s1 <- mean(x) # 55 s2 <- mean2(x) # 55 stopifnot(identical(s1, s2)) } \seealso{ \code{\link[base]{mean}}(). To efficiently sum over a subset, see \code{\link{sum2}}(). } \author{ Henrik Bengtsson } \keyword{internal} \keyword{univar} matrixStats/man/rowMedians.Rd0000644000176200001440000000413214522461565015761 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowMedians.R \name{rowMedians} \alias{rowMedians} \alias{colMedians} \title{Calculates the median for each row (column) in a matrix} \usage{ rowMedians(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) colMedians(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows, cols}{A \code{\link[base]{vector}} indicating subset of rows (and/or columns) to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the median for each row (column) in a matrix. } \details{ The implementation of \code{rowMedians()} and \code{colMedians()} is optimized for both speed and memory. To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there is a special implementation for \code{\link[base]{integer}} matrices. That is, if \code{x} is an \code{\link[base]{integer}} \code{\link[base]{matrix}}, then \code{rowMedians(as.double(x))} (\code{rowMedians(as.double(x))}) would require three times the memory of \code{rowMedians(x)} (\code{colMedians(x)}), but all this is avoided. } \seealso{ See \code{\link{rowWeightedMedians}()} and \code{colWeightedMedians()} for weighted medians. For mean estimates, see \code{\link{rowMeans2}()} and \code{\link[base:colSums]{rowMeans}()}. } \author{ Henrik Bengtsson, Harris Jaffee } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/x_OP_y.Rd0000644000176200001440000000435314522461565015053 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/x_OP_y.R \name{x_OP_y} \alias{x_OP_y} \alias{t_tx_OP_y} \title{Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)'} \usage{ x_OP_y(x, y, OP, xrows = NULL, xcols = NULL, yidxs = NULL, commute = FALSE, na.rm = FALSE) t_tx_OP_y(x, y, OP, xrows = NULL, xcols = NULL, yidxs = NULL, commute = FALSE, na.rm = FALSE) } \arguments{ \item{x}{A \code{\link[base]{numeric}} or \code{\link[base]{logical}} NxK \code{\link[base]{matrix}}.} \item{y}{A \code{\link[base]{numeric}} or \code{\link[base]{logical}} \code{\link[base]{vector}} of length L.} \item{OP}{A \code{\link[base]{character}} specifying which operator to use.} \item{xrows, xcols}{A \code{\link[base]{vector}} indicating subset of rows (and/or columns) to operate over 'x'. If \code{\link[base]{NULL}}, no subsetting is done.} \item{yidxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over 'y'. If \code{\link[base]{NULL}}, no subsetting is done.} \item{commute}{If \code{\link[base:logical]{TRUE}}, 'y OP x' ('t(y OP t(x))') is calculated, otherwise 'x OP y' ('t(t(x) OP y)').} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} } \value{ Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}. } \description{ Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)', where OP can be +, -, *, and /. For + and *, na.rm = TRUE will drop missing values first. } \section{Missing values}{ If \code{na.rm = TRUE}, then missing values are "dropped" before applying the operator to each pair of values. For instance, if \code{x[1, 1]} is a missing value, then the result of \code{x[1, 1] + y[1]} equals \code{y[1]}. If also \code{y[1]} is a missing value, then the result is a missing value. This only applies to additions and multiplications. For subtractions and divisions, argument \code{na.rm} is ignored. } \examples{ x <- matrix(c(1, 2, 3, NA, 5, 6), nrow = 3, ncol = 2) # Add 'y' to each column y <- 1:2 z0 <- x + y z1 <- x_OP_y(x, y, OP = "+") print(z1) stopifnot(all.equal(z1, z0)) # Add 'y' to each row y <- 1:3 z0 <- t(t(x) + y) z1 <- t_tx_OP_y(x, y, OP = "+") print(z1) stopifnot(all.equal(z1, z0)) } \author{ Henrik Bengtsson } \keyword{internal} matrixStats/man/diff2.Rd0000644000176200001440000000226114436403546014643 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/diff2.R \name{diff2} \alias{diff2} \title{Fast lagged differences} \usage{ diff2(x, idxs = NULL, lag = 1L, differences = 1L, ..., useNames = TRUE) } \arguments{ \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{lag}{An \code{\link[base]{integer}} specifying the lag.} \item{differences}{An \code{\link[base]{integer}} specifying the order of difference.} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N - \code{differences}. } \description{ Computes the lagged and iterated differences. } \examples{ diff2(1:10) } \seealso{ \code{\link[base]{diff}}(). } \author{ Henrik Bengtsson } \keyword{internal} \keyword{univar} matrixStats/man/rowAlls.Rd0000644000176200001440000000654614522461565015307 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowAlls.R \name{rowAlls} \alias{rowAlls} \alias{colAlls} \alias{allValue} \alias{rowAnys} \alias{colAnys} \alias{anyValue} \title{Checks if a value exists / does not exist in each row (column) of a matrix} \usage{ rowAlls(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) colAlls(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) allValue(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) rowAnys(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) colAnys(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) anyValue(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{value}{A value to search for.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} } \value{ \code{rowAlls()} (\code{colAlls()}) returns an \code{\link[base]{logical}} \code{\link[base]{vector}} of length N (K). Analogously for \code{rowAnys()} (\code{rowAlls()}). } \description{ Checks if a value exists / does not exist in each row (column) of a matrix. } \details{ These functions takes either a matrix or a vector as input. If a vector, then argument \code{dim.} must be specified and fulfill \code{prod(dim.) == length(x)}. The result will be identical to the results obtained when passing \code{matrix(x, nrow = dim.[1L], ncol = dim.[2L])}, but avoids having to temporarily create/allocate a matrix, if only such is needed only for these calculations. } \section{Logical \code{value}}{ When \code{value} is logical, the result is as if the function is applied on \code{as.logical(x)}. More specifically, if \code{x} is numeric, then all zeros are treated as \code{FALSE}, non-zero values as \code{TRUE}, and all missing values as \code{NA}. } \examples{ x <- matrix(FALSE, nrow = 10, ncol = 5) x[3:7, c(2, 4)] <- TRUE x[2:4, ] <- TRUE x[, 1] <- TRUE x[5, ] <- FALSE x[, 5] <- FALSE print(x) print(rowCounts(x)) # 1 4 4 4 0 3 3 1 1 1 print(colCounts(x)) # 9 5 3 5 0 print(rowAnys(x)) print(which(rowAnys(x))) # 1 2 3 4 6 7 8 9 10 print(colAnys(x)) print(which(colAnys(x))) # 1 2 3 4 } \seealso{ rowCounts } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{logic} \keyword{univar} matrixStats/man/weightedMad.Rd0000644000176200001440000000537614522461565016106 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/weightedMad.R \name{weightedMad} \alias{weightedMad} \alias{rowWeightedMads} \alias{colWeightedMads} \title{Weighted Median Absolute Deviation (MAD)} \usage{ weightedMad(x, w = NULL, idxs = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ...) rowWeightedMads(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ..., useNames = TRUE) colWeightedMads(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ..., useNames = TRUE) } \arguments{ \item{x}{\code{\link[base]{vector}} of type \code{\link[base]{integer}}, \code{\link[base]{numeric}}, or \code{\link[base]{logical}}.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{constant}{A \code{\link[base]{numeric}} scale factor, cf. \code{\link[stats]{mad}}.} \item{center}{Optional \code{\link[base]{numeric}} scalar specifying the center location of the data. If \code{\link[base]{NULL}}, it is estimated from data.} \item{...}{Not used.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \description{ Computes a weighted MAD of a numeric vector. } \section{Missing values}{ Missing values are dropped at the very beginning, if argument \code{na.rm} is \code{\link[base:logical]{TRUE}}, otherwise not. } \examples{ x <- 1:10 n <- length(x) m1 <- mad(x) m2 <- weightedMad(x) stopifnot(identical(m1, m2)) w <- rep(1, times = n) m1 <- weightedMad(x, w) stopifnot(identical(m1, m2)) # All weight on the first value w[1] <- Inf m <- weightedMad(x, w) stopifnot(m == 0) # All weight on the first two values w[1:2] <- Inf m1 <- mad(x[1:2]) m2 <- weightedMad(x, w) stopifnot(identical(m1, m2)) # All weights set to zero w <- rep(0, times = n) m <- weightedMad(x, w) stopifnot(is.na(m)) } \seealso{ For the non-weighted MAD, see \code{\link[stats]{mad}}. Internally \code{\link{weightedMedian}}() is used to calculate the weighted median. } \author{ Henrik Bengtsson } \keyword{robust} \keyword{univar} matrixStats/man/logSumExp.Rd0000644000176200001440000000622014372747711015577 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/logSumExp.R \name{logSumExp} \alias{logSumExp} \title{Accurately computes the logarithm of the sum of exponentials} \usage{ logSumExp(lx, idxs = NULL, na.rm = FALSE, ...) } \arguments{ \item{lx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}}. Typically \code{lx} are \eqn{log(x)} values.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \description{ Accurately computes the logarithm of the sum of exponentials, that is, \eqn{log(sum(exp(lx)))}. If \eqn{lx = log(x)}, then this is equivalently to calculating \eqn{log(sum(x))}. } \details{ This function, which avoid numerical underflow, is often used when computing the logarithm of the sum of small numbers (\eqn{|x| << 1}) such as probabilities. This is function is more accurate than \code{log(sum(exp(lx)))} when the values of \eqn{x = exp(lx)} are \eqn{|x| << 1}. The implementation of this function is based on the observation that \deqn{ log(a + b) = [ la = log(a), lb = log(b) ] = log( exp(la) + exp(lb) ) = la + log ( 1 + exp(lb - la) ) } Assuming \eqn{la > lb}, then \eqn{|lb - la| < |lb|}, and it is less likely that the computation of \eqn{1 + exp(lb - la)} will not underflow/overflow numerically. Because of this, the overall result from this function should be more accurate. Analogously to this, the implementation of this function finds the maximum value of \code{lx} and subtracts it from the remaining values in \code{lx}. } \section{Benchmarking}{ This method is optimized for correctness, that avoiding underflowing. It is implemented in native code that is optimized for speed and memory. } \examples{ ## EXAMPLE #1 lx <- c(1000.01, 1000.02) y0 <- log(sum(exp(lx))) print(y0) ## Inf y1 <- logSumExp(lx) print(y1) ## 1000.708 ## EXAMPLE #2 lx <- c(-1000.01, -1000.02) y0 <- log(sum(exp(lx))) print(y0) ## -Inf y1 <- logSumExp(lx) print(y1) ## -999.3218 ## EXAMPLE #3 ## R-help thread 'Beyond double-precision?' on May 9, 2009. set.seed(1) x <- runif(50) ## The logarithm of the harmonic mean y0 <- log(1 / mean(1 / x)) print(y0) ## -1.600885 lx <- log(x) y1 <- log(length(x)) - logSumExp(-lx) print(y1) ## [1] -1.600885 # Sanity check stopifnot(all.equal(y1, y0)) } \references{ [1] R Core Team, \emph{Writing R Extensions}, v3.0.0, April 2013. \cr [2] Laurent El Ghaoui, \emph{Hyper-Textbook: Optimization Models and Applications}, University of California at Berkeley, August 2012. (Chapter 'Log-Sum-Exp (LSE) Function and Properties') \cr [3] R-help thread \emph{logsumexp function in R}, 2011-02-17. \url{https://stat.ethz.ch/pipermail/r-help/2011-February/269205.html}\cr } \seealso{ To compute this function on rows or columns of a matrix, see \code{\link{rowLogSumExps}}(). For adding \emph{two} double values in native code, R provides the C function \code{logspace_add()} [1]. For properties of the log-sum-exponential function, see [2]. } \author{ Henrik Bengtsson } matrixStats/man/allocMatrix.Rd0000644000176200001440000000205414372747711016134 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/allocMatrix.R \name{allocMatrix} \alias{allocMatrix} \alias{allocVector} \alias{allocArray} \title{Allocates an empty vector, matrix or array} \usage{ allocMatrix(nrow, ncol, value = 0, ...) allocVector(length, value = 0, ...) allocArray(dim, value = 0, ...) } \arguments{ \item{value}{A \code{\link[base]{numeric}} scalar that all elements will have as value.} \item{length, nrow, ncol, dim}{\code{\link[base]{numeric}}s specifying the dimension of the created \code{\link[base]{vector}}, \code{\link[base]{matrix}} or \code{\link[base]{array}}.} } \value{ Returns a \code{\link[base]{vector}}, \code{\link[base]{matrix}} and \code{\link[base]{array}} respectively of the same data type as \code{value}. } \description{ Allocates an empty vector, matrix or array faster than the corresponding function in R. } \seealso{ See also \code{\link[base]{vector}}, \code{\link[base]{matrix}} and \code{\link[base]{array}}. } \author{ Henrik Bengtsson } \keyword{internal} \keyword{programming} matrixStats/man/rowWeightedMedians.Rd0000644000176200001440000000516414522461565017450 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowWeightedMedians.R \name{rowWeightedMedians} \alias{rowWeightedMedians} \alias{colWeightedMedians} \title{Calculates the weighted medians for each row (column) in a matrix} \usage{ rowWeightedMedians(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) colWeightedMedians(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{w}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length K (N).} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{...}{Additional arguments passed to \code{\link{weightedMedian}}().} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the weighted medians for each row (column) in a matrix. } \details{ The implementations of these methods are optimized for both speed and memory. If no weights are given, the corresponding \code{\link{rowMedians}}()/\code{colMedians()} is used. } \examples{ x <- matrix(rnorm(20), nrow = 5, ncol = 4) print(x) # Non-weighted row averages mu_0 <- rowMedians(x) mu <- rowWeightedMedians(x) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (uniform weights) w <- rep(2.5, times = ncol(x)) mu <- rowWeightedMedians(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (excluding some columns) w <- c(1, 1, 0, 1) mu_0 <- rowMedians(x[, (w == 1), drop = FALSE]) mu <- rowWeightedMedians(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (excluding some columns) w <- c(0, 1, 0, 0) mu_0 <- rowMedians(x[, (w == 1), drop = FALSE]) mu <- rowWeightedMedians(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted averages by rows and columns w <- 1:4 mu_1 <- rowWeightedMedians(x, w = w) mu_2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(mu_2, mu_1)) } \seealso{ Internally, \code{\link{weightedMedian}}() is used. See \code{\link{rowMedians}}() and \code{colMedians()} for non-weighted medians. } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowVars.Rd0000644000176200001440000001051414522461565015315 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowVars.R \name{rowVars} \alias{rowVars} \alias{colVars} \title{Variance estimates for each row (column) in a matrix} \usage{ rowVars(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE, center = NULL, dim. = dim(x), ..., useNames = TRUE) colVars(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE, center = NULL, dim. = dim(x), ..., useNames = TRUE) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{refine}{If \code{\link[base:logical]{TRUE}}, `center` is NULL, and \code{x} is \code{\link[base]{numeric}}, then extra effort is used to calculate the average with greater numerical precision, otherwise not.} \item{center}{(optional; a vector or length N (K)) If the row (column) means are already estimated, they can be pre-specified using this argument. This avoid re-estimating them again. _Warning: It is important that a non-biased sample mean estimate is passed. If not, then the variance estimate of the spread will also be biased._ If NULL (default), the row/column means are estimated internally.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Additional arguments passed to \code{rowMeans()} and \code{rowSums()}.} \item{useNames}{If \code{\link[base:logical]{TRUE}} (default), names attributes of the result are set, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Variance estimates for each row (column) in a matrix. } \section{Providing center estimates}{ The sample variance is estimated as \eqn{n/(n-1) * mean((x - center)^2)}, where \eqn{center} is estimated as the sample mean, by default. In matrixStats (< 0.58.0), \eqn{n/(n-1) * (mean(x^2) - center^2)} was used. Both formulas give the same result _when_ `center` is the sample mean estimate. Argument `center` can be used to provide an already existing estimate. It is important that the sample mean estimate is passed. If not, then the variance estimate of the spread will be biased. For the time being, in order to lower the risk for such mistakes, argument `center` is occasionally validated against the sample-mean estimate. If a discrepancy is detected, an informative error is provided to prevent incorrect variance estimates from being used. For performance reasons, this check is only performed once every 50 times. The frequency can be controlled by R option `matrixStats.vars.formula.freq`, whose default can be set by environment variable `R_MATRIXSTATS_VARS_FORMULA_FREQ`. } \examples{ set.seed(1) x <- matrix(rnorm(20), nrow = 5, ncol = 4) print(x) # Row averages print(rowMeans(x)) print(rowMedians(x)) # Column averages print(colMeans(x)) print(colMedians(x)) # Row variabilities print(rowVars(x)) print(rowSds(x)) print(rowMads(x)) print(rowIQRs(x)) # Column variabilities print(rowVars(x)) print(colSds(x)) print(colMads(x)) print(colIQRs(x)) # Row ranges print(rowRanges(x)) print(cbind(rowMins(x), rowMaxs(x))) print(cbind(rowOrderStats(x, which = 1), rowOrderStats(x, which = ncol(x)))) # Column ranges print(colRanges(x)) print(cbind(colMins(x), colMaxs(x))) print(cbind(colOrderStats(x, which = 1), colOrderStats(x, which = nrow(x)))) x <- matrix(rnorm(2000), nrow = 50, ncol = 40) # Row standard deviations d <- rowDiffs(x) s1 <- rowSds(d) / sqrt(2) s2 <- rowSds(x) print(summary(s1 - s2)) # Column standard deviations d <- colDiffs(x) s1 <- colSds(d) / sqrt(2) s2 <- colSds(x) print(summary(s1 - s2)) } \seealso{ See \code{rowMeans()} and \code{rowSums()} in \code{\link[base]{colSums}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/DESCRIPTION0000644000176200001440000000405514535706561014323 0ustar liggesusersPackage: matrixStats Version: 1.2.0 Depends: R (>= 2.12.0) Suggests: base64enc, ggplot2, knitr, markdown, microbenchmark, R.devices, R.rsp VignetteBuilder: R.rsp Title: Functions that Apply to Rows and Columns of Matrices (and to Vectors) Authors@R: c( person("Henrik", "Bengtsson", role=c("aut", "cre", "cph"), email="henrikb@braju.com"), person("Constantin", "Ahlmann-Eltze", role = "ctb"), person("Hector", "Corrada Bravo", role="ctb"), person("Robert", "Gentleman", role="ctb"), person("Jan", "Gleixner", role="ctb"), person("Peter", "Hickey", role="ctb"), person("Ola", "Hossjer", role="ctb"), person("Harris", "Jaffee", role="ctb"), person("Dongcan", "Jiang", role="ctb"), person("Peter", "Langfelder", role="ctb"), person("Brian", "Montgomery", role="ctb"), person("Angelina", "Panagopoulou", role="ctb"), person("Hugh", "Parsonage", role="ctb"), person("Jakob Peder", "Pettersen", role="ctb")) Author: Henrik Bengtsson [aut, cre, cph], Constantin Ahlmann-Eltze [ctb], Hector Corrada Bravo [ctb], Robert Gentleman [ctb], Jan Gleixner [ctb], Peter Hickey [ctb], Ola Hossjer [ctb], Harris Jaffee [ctb], Dongcan Jiang [ctb], Peter Langfelder [ctb], Brian Montgomery [ctb], Angelina Panagopoulou [ctb], Hugh Parsonage [ctb], Jakob Peder Pettersen [ctb] Maintainer: Henrik Bengtsson Description: High-performing functions operating on rows and columns of matrices, e.g. col / rowMedians(), col / rowRanks(), and col / rowSds(). Functions optimized per data type and for subsetted calculations such that both memory usage and processing time is minimized. There are also optimized vector-based methods, e.g. binMeans(), madDiff() and weightedMedian(). License: Artistic-2.0 LazyLoad: TRUE NeedsCompilation: yes ByteCompile: TRUE URL: https://github.com/HenrikBengtsson/matrixStats BugReports: https://github.com/HenrikBengtsson/matrixStats/issues RoxygenNote: 7.2.3 Packaged: 2023-12-11 19:55:37 UTC; henrik Repository: CRAN Date/Publication: 2023-12-11 22:30:09 UTC matrixStats/build/0000755000176200001440000000000014535664471013713 5ustar liggesusersmatrixStats/build/vignette.rds0000644000176200001440000000041514535664471016252 0ustar liggesusersmPN0tMR/S=WpAW+ިƗS6$ZKkڳX1Rg)K3J5m=ErQqt9gx0WJTtܔN)4e[}p_=Ӌ b,h1\D RC 58ܙʸy Fy)Rxh\t)¤]+!x.nJ7W,I},D+>G^5ݿ` 4matrixStats/tests/0000755000176200001440000000000014522461565013751 5ustar liggesusersmatrixStats/tests/diff2_subset.R0000644000176200001440000000153314522461565016455 0ustar liggesuserslibrary("matrixStats") diff2_R <- function(..., useNames=NA){ res <- diff(...) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) for (setNames in c(TRUE, FALSE)) { if (setNames) names(x) <- LETTERS[1:6] else names(x) <- NULL for (l in 1:2) { for (d in 1:2) { for (idxs in index_cases) { for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { validateIndicesTestVector(x, idxs, ftest = diff2, fsure = diff2_R, lag = l, differences = d, useNames = useNames) } } } } }matrixStats/tests/count.R0000644000176200001440000000512014520313754015214 0ustar liggesuserslibrary("matrixStats") count_R <- function(x, value = TRUE, na.rm = FALSE, ...) { if (is.na(value)) { counts <- sum(is.na(x)) } else { counts <- sum(x == value, na.rm = na.rm) } as.integer(counts) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: integer and numeric # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- runif(20 * 5, min = -3, max = 3) x[sample.int(length(x), size = 7)] <- 0 storage.mode(x) <- mode for (na.rm in c(FALSE, TRUE)) { # Count zeros n0 <- count_R(x, value = 0, na.rm = na.rm) n1 <- count(x, value = 0, na.rm = na.rm) stopifnot(identical(n1, n0)) all <- allValue(x, value = 0, na.rm = na.rm) any <- anyValue(x, value = 0, na.rm = na.rm) # Count NAs n0 <- count_R(x, value = NA, na.rm = na.rm) n1 <- count(x, value = NA, na.rm = na.rm) stopifnot(identical(n1, n0)) all <- allValue(x, value = NA, na.rm = na.rm) any <- anyValue(x, value = NA, na.rm = na.rm) if (mode == "integer") { ux <- unique(as.vector(x)) n0 <- n1 <- integer(length(x)) for (value in ux) { n0 <- n0 + count_R(x, value = value, na.rm = na.rm) n1 <- n1 + count(x, value = value, na.rm = na.rm) stopifnot(identical(n1, n0)) } stopifnot(all(n0 == ncol(x))) } # if (mode == "integer") } # for (na.rm ...) } # for (mode ...) # All NAs na_list <- list(NA_integer_, NA_real_, NaN) for (na_value in na_list) { x <- rep(na_value, times = 10L) for (na.rm in c(FALSE, TRUE)) { n0 <- count_R(x, na.rm = na.rm) n1 <- count(x, na.rm = na.rm) stopifnot(identical(n1, n0)) # Count NAs n0 <- count_R(x, value = NA, na.rm = na.rm) n1 <- count(x, value = NA, na.rm = na.rm) stopifnot(identical(n1, n0)) any <- anyValue(x, value = NA, na.rm = na.rm) all <- allValue(x, value = NA, na.rm = na.rm) stopifnot(any) stopifnot(all) } } # for (na_value ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: logical # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- logical(length = 10L) x[3:7] <- TRUE # Row/column counts for (na.rm in c(FALSE, TRUE)) { n0 <- count_R(x, na.rm = na.rm) n1 <- count(x, na.rm = na.rm) stopifnot(identical(n1, n0)) n_true <- count(x, value = TRUE, na.rm = na.rm) n_false <- count(x, value = FALSE, na.rm = na.rm) stopifnot(n_true + n_false == ncol(x)) # Count NAs n0 <- count_R(x, value = NA, na.rm = na.rm) n1 <- count(x, value = NA, na.rm = na.rm) stopifnot(identical(n1, n0)) } matrixStats/tests/rowCounts.R0000644000176200001440000001503614522461565016104 0ustar liggesuserslibrary("matrixStats") rowCounts_R <- function(x, value = TRUE, na.rm = FALSE, ...) { if (is.na(value)) { counts <- apply(x, MARGIN = 1L, FUN = function(x) sum(is.na(x)) ) } else { counts <- apply(x, MARGIN = 1L, FUN = function(x) sum(x == value, na.rm = na.rm) ) } # Preserve names attribute names <- names(counts) counts <- as.integer(counts) if (isTRUE(useNames) && !is.null(names)) names(counts) <- names counts } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: integer and numeric # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(runif(10 * 5, min = -3, max = 3), nrow = 10L, ncol = 5L) x[sample.int(length(x), size = 7L)] <- 0 storage.mode(x) <- mode dimnames = list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { # Count zeros r0 <- rowCounts_R(x, value = 0, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, value = 0, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), value = 0, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) # Count NAs r0 <- rowCounts_R(x, value = NA, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, value = NA, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), value = NA, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) if (mode == "integer") { ux <- unique(as.vector(x)) r0 <- r1 <- r2 <- integer(nrow(x)) for (value in ux) { r0 <- r0 + rowCounts_R(x, value = value, na.rm = na.rm, useNames = useNames) r1 <- r1 + rowCounts(x, value = value, na.rm = na.rm, useNames = useNames) r2 <- r2 + colCounts(t(x), value = value, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) } stopifnot(all(r0 == ncol(x))) } # if (mode == "integer") } # for (useNames ...) } # for (na.rm ...) } # for (setDimnames ...) } # for (mode ...) # All NAs na_list <- list(NA_integer_, NA_real_, NaN) for (na_value in na_list) { x <- matrix(na_value, nrow = 10L, ncol = 5L) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowCounts_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) # Count NAs r0 <- rowCounts_R(x, value = NA, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, value = NA, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), value = NA, na.rm = na.rm, useNames = useNames) stopifnot(all(r0 == ncol(x))) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) } } } } # for (na_value ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: logical # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(FALSE, nrow = 10L, ncol = 4L) x[7:8, 2:3] <- TRUE x[1:3, ] <- TRUE x[, 1] <- TRUE x[4, ] <- FALSE x[, 4] <- FALSE x[2, ] <- FALSE x[3, ] <- TRUE # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column counts for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowCounts_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) r_true <- rowCounts(x, value = TRUE, na.rm = na.rm, useNames = useNames) r_false <- rowCounts(x, value = FALSE, na.rm = na.rm, useNames = useNames) stopifnot(r_true + r_false == ncol(x)) c_true <- colCounts(x, value = TRUE, na.rm = na.rm, useNames = useNames) c_false <- colCounts(x, value = FALSE, na.rm = na.rm, useNames = useNames) stopifnot(c_true + c_false == nrow(x)) # Count NAs r0 <- rowCounts_R(x, value = NA, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, value = NA, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), value = NA, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: character (not sure if this should be supported) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(rep(letters, length.out = 10 * 5), nrow = 10L, ncol = 5L) x[2:3, 3:4] <- NA_character_ # Row/column counts for (na.rm in c(FALSE, TRUE)) { for (value in c("g", NA_character_)) { r0 <- rowCounts_R(x, value = value, na.rm = na.rm) r1 <- rowCounts(x, value = value, na.rm = na.rm) r2 <- colCounts(t(x), value = value, na.rm = na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) c <- count(x[1, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[1])) c <- count(x[2, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[2])) } } # NA row x <- matrix(0, nrow = 2L, ncol = 2L) x[1, ] <- NA_integer_ dimnames <- list(letters[1:2], LETTERS[1:2]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowCounts(x, value = 0, useNames = useNames) r1 <- rowCounts_R(x, value = 0, useNames = useNames) stopifnot(identical(r0, r1)) } } matrixStats/tests/rowSds_subset.R0000644000176200001440000000652714522461565016754 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowSds_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ sigma <- apply(x, MARGIN = 1L, FUN = sd, na.rm = na.rm) }) stopifnot(!any(is.infinite(sigma))) # Keep naming support consistency same as rowSds() if (is.null(center) || ncol(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(sigma) <- NULL } else if (isFALSE(useNames)) names(sigma) <- NULL sigma } colSds_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ sigma <- apply(x, MARGIN = 2L, FUN = sd, na.rm = na.rm) }) stopifnot(!any(is.infinite(sigma))) # Keep naming support consistency same as colSds() if (is.null(center) || nrow(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(sigma) <- NULL } else if (isFALSE(useNames)) names(sigma) <- NULL sigma } rowSds_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowWeightedMeans(x, cols = cols, na.rm = na.rm, useNames = FALSE) sigma <- rowSds(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(sigma))) sigma } colSds_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colWeightedMeans(x, rows = rows, na.rm = na.rm, useNames = FALSE) sigma <- colSds(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(sigma))) sigma } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowSds, fsure = rowSds_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowSds_center, fsure = rowSds_R, na.rm = na.rm, center = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colSds, fsure = rowSds_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colSds_center, fsure = rowSds_R, na.rm = na.rm, center = TRUE, useNames = useNames) } } } matrixStats/tests/rowLogSumExps_subset.R0000644000176200001440000000307414522461565020263 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowLogSumExps_R <- function(x, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = function(rx, ...) { log(sum(exp(rx), ...)) }, ...) if (isFALSE(useNames)) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowLogSumExps, fsure = rowLogSumExps_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colLogSumExps, fsure = rowLogSumExps_R, na.rm = na.rm, useNames = useNames) } } } } } matrixStats/tests/rowCumsums.R0000644000176200001440000001230414522461565016260 0ustar liggesuserslibrary("matrixStats") rowCumsums_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cumsum)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames(y) <- if (isTRUE(useNames)) dimnames(x) else NULL y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - dimnames <- list(letters[1:10], LETTERS[1:5]) # to check dimnames attribute for (mode in c("logical", "integer", "double")) { for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumsums_R(x, useNames = useNames) r1 <- rowCumsums(x, useNames = useNames) r2 <- t(colCumsums(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (add_na ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumsums_R(x, useNames = useNames) r1 <- rowCumsums(x, useNames = useNames) r2 <- t(colCumsums(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - dimnames <- list("a", "A") # to check dimnames attribute for (mode in c("logical", "integer", "double")) { x <- matrix(0, nrow = 1L, ncol = 1L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) r0 <- rowCumsums_R(x) r1 <- rowCumsums(x) r2 <- t(colCumsums(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # Check dimnames attribute dimnames(x) <- dimnames # r0 <- rowCumsums_R(x) # > r0 # a # [1,] 0 r1 <- rowCumsums(x, useNames = TRUE) r2 <- t(colCumsums(t(x), useNames = TRUE)) stopifnot(identical(dimnames(r1), dimnames)) stopifnot(identical(dimnames(r2), dimnames)) dimnames(x) <- NULL } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - names <- LETTERS[1:5] # to check dimnames attribute for (mode in c("logical", "integer", "double")) { cat("mode: ", mode, "\n", sep = "") value <- 0 storage.mode(value) <- mode value2 <- value if (mode == "logical") value2 <- 0L # A 0x0 matrix x <- matrix(value, nrow = 0L, ncol = 0L) str(x) r0 <- matrix(value2, nrow = nrow(x), ncol = ncol(x)) r1 <- rowCumsums(x) r2 <- t(colCumsums(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A 0xK matrix x <- matrix(value, nrow = 0L, ncol = 5L) str(x) colnames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) colnames(x) <- colnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowCumsums_R(x, useNames = useNames) r1 <- rowCumsums(x, useNames = useNames) r2 <- t(rowCumsums(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) # A Nx0 matrix x <- matrix(value, nrow = 5L, ncol = 0L) str(x) rownames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) rownames(x) <- rownames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowCumsums_R(x, useNames = useNames) r1 <- rowCumsums(x, useNames = useNames) r2 <- t(rowCumsums(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) matrixStats/tests/weightedMedian_subset.R0000644000176200001440000000252714520313754020377 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") for (mode in c("numeric", "integer")) { x <- runif(6, min = -6, max = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- mode if (mode == "numeric") w[1] <- Inf for (idxs in index_cases) { validateIndicesTestVector_w(x, w, idxs, ftest = weightedMedian, fsure = weightedMedian, na.rm = TRUE) validateIndicesTestVector_w(x, w, idxs, ftest = weightedMedian, fsure = weightedMedian, na.rm = FALSE) for (ties in c("weighted", "mean", "min", "max")) { validateIndicesTestVector_w(x, w, idxs, ftest = weightedMedian, fsure = weightedMedian, na.rm = TRUE, ties = ties) validateIndicesTestVector_w(x, w, idxs, ftest = weightedMedian, fsure = weightedMedian, na.rm = FALSE, ties = ties) } } } matrixStats/tests/rowLogSumExps.R0000644000176200001440000001341414522461565016675 0ustar liggesusers# Test inspired by the harmonic mean example in R-help # thread '[R] Beyond double-precision?' on May 9, 2009. library("matrixStats") library("stats") logSumExp0 <- function(lx) { idx_max <- which.max(lx) log1p(sum(exp(lx[-idx_max] - lx[idx_max]))) + lx[idx_max] } n <- 200L set.seed(1) for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(runif(n, min = 1.0, max = 3.0), nrow = 20L) storage.mode(x) <- mode str(x) # The logarithm of the harmonic mean by rows y_h <- log(1 / rowMeans(1 / x)) str(y_h) lx_neg <- -log(x) y0 <- log(ncol(x)) - apply(lx_neg, MARGIN = 1L, FUN = logSumExp0) stopifnot(all.equal(y0, y_h)) y1 <- log(ncol(x)) - apply(lx_neg, MARGIN = 1L, FUN = logSumExp) stopifnot(all.equal(y1, y0)) y2 <- log(ncol(x)) - rowLogSumExps(lx_neg) stopifnot(all.equal(y2, y0)) y3 <- log(ncol(x)) - colLogSumExps(t(lx_neg)) stopifnot(all.equal(y3, y0)) # The logarithm of the harmonic mean by columns y_h <- log(1 / colMeans(1 / x)) str(y_h) y0 <- log(nrow(x)) - apply(lx_neg, MARGIN = 2L, FUN = logSumExp0) stopifnot(all.equal(y0, y_h)) y1 <- log(nrow(x)) - apply(lx_neg, MARGIN = 2L, FUN = logSumExp) stopifnot(all.equal(y1, y0)) y2 <- log(nrow(x)) - colLogSumExps(lx_neg) stopifnot(all.equal(y2, y0)) y3 <- log(nrow(x)) - rowLogSumExps(t(lx_neg)) stopifnot(all.equal(y3, y0)) # Testing names rownames(lx_neg) <- seq_len(nrow(x)) colnames(lx_neg) <- seq_len(ncol(x)) y2 <- rowLogSumExps(lx_neg, useNames = TRUE) stopifnot(identical(names(y2), rownames(lx_neg))) y3 <- colLogSumExps(t(lx_neg), useNames = TRUE) stopifnot(identical(names(y3), rownames(lx_neg))) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Zero-size matrices lx <- matrix(numeric(0L), nrow = 0L, ncol = 0L) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) y <- colLogSumExps(lx) print(y) stopifnot(length(y) == ncol(lx)) ## Zero-height matrices lx <- matrix(numeric(0L), nrow = 0L, ncol = 5L) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) y <- colLogSumExps(lx) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(all(y == -Inf)) ## Zero-width matrices lx <- matrix(numeric(0L), nrow = 5L, ncol = 0L) y <- colLogSumExps(lx) print(y) stopifnot(length(y) == ncol(lx)) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(y == -Inf)) ## Matrices with one element lx <- matrix(1.0, nrow = 1L, ncol = 1L) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(y == lx)) y <- colLogSumExps(lx) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(all(y == lx)) ## All missing values lx <- matrix(NA_real_, nrow = 1L, ncol = 1L) y <- rowLogSumExps(lx, na.rm = TRUE) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(identical(y, -Inf)) lx <- matrix(NA_real_, nrow = 1L, ncol = 1L) y <- colLogSumExps(lx, na.rm = TRUE) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(identical(y, -Inf)) lx <- matrix(NA_real_, nrow = 2L, ncol = 2L) y <- rowLogSumExps(lx, na.rm = TRUE) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(y == -Inf)) y <- rowLogSumExps(lx, na.rm = FALSE) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(is.na(y) & !is.nan(y))) lx <- matrix(NA_real_, nrow = 2L, ncol = 2L) y <- colLogSumExps(lx, na.rm = TRUE) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(all(y == -Inf)) y <- colLogSumExps(lx, na.rm = FALSE) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(all(is.na(y) & !is.nan(y))) ## +Inf values lx <- matrix(c(1, 2, +Inf), nrow = 3L, ncol = 2L) y <- colLogSumExps(lx, na.rm = TRUE) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(all(y == +Inf)) ## multiple -Inf values lx <- matrix(c(-Inf, -Inf), nrow = 2L, ncol = 3L) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(y == -Inf)) lx <- matrix(c(-Inf, 5, -Inf), nrow = 2L, ncol = 3L, byrow = TRUE) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(y == 5)) ## Bug report #104 (https://github.com/HenrikBengtsson/matrixStats/issues/104) ## (This would core dump on Windows) x <- matrix(0.0, nrow = 2L, ncol = 32762L) y <- colLogSumExps(x) str(y) ## Bug report #120 (https://github.com/HenrikBengtsson/matrixStats/issues/120) ## (This would error if x had rownames/colnames and non-NULL rows/cols were ## used) x <- matrix(runif(6), nrow = 2L, ncol = 3L, dimnames = list(c("A", "B"), c("a", "b", "c"))) y <- colLogSumExps(x, cols = 3:1, useNames = TRUE) stopifnot(names(y) == c("c", "b", "a")) y <- rowLogSumExps(x, rows = 2, useNames = TRUE) stopifnot(names(y) == "B") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Check names attributes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowLogSumExps_R <- function(x, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = function(rx, ...) { log(sum(exp(rx), ...)) }, ...) if (isFALSE(useNames)) names(res) <- NULL res } x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6L, ncol = 6L) # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowLogSumExps_R(x, useNames = useNames) y1 <- rowLogSumExps(x, useNames = useNames) y2 <- colLogSumExps(t(x), useNames = useNames) stopifnot(all.equal(y1, y0)) stopifnot(all.equal(y2, y0)) } } matrixStats/tests/x_OP_y.R0000644000176200001440000000731314520313754015267 0ustar liggesuserslibrary("matrixStats") x_OP_y_R <- function(x, y, OP, na.rm = FALSE) { if (na.rm) { xnok <- is.na(x) ynok <- is.na(y) anok <- xnok & ynok unit <- switch(OP, "+" = 0, "-" = NA_real_, "*" = 1, "/" = NA_real_, stop("Unknown 'OP' operator: ", OP) ) x[xnok] <- unit y[ynok] <- unit } ans <- switch(OP, "+" = x + y, "-" = x - y, "*" = x * y, "/" = x / y, stop("Unknown 'OP' operator: ", OP) ) if (na.rm) { ans[anok] <- NA_real_ } ans } # x_OP_y_R() t_tx_OP_y_R <- function(x, y, OP, na.rm = FALSE) { t(x_OP_y_R(x = t(x), y = y, OP = OP, na.rm = na.rm)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # No missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1:16, nrow = 4, ncol = 4) y <- 1:nrow(x) storage.mode(y) <- storage.mode(x) for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("OP = '%s', na.rm = %s\n", OP, na.rm)) a0 <- x_OP_y_R(x, y, OP, na.rm = na.rm) a1 <- x_OP_y(x, y, OP, na.rm = na.rm) str(a1) stopifnot(all.equal(a1, a0)) b0 <- t_tx_OP_y_R(x, y, OP, na.rm = na.rm) b1 <- t_tx_OP_y(x, y, OP, na.rm = na.rm) str(b1) stopifnot(all.equal(b1, b0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values in x, y, or both. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (which in c("x", "y", "both")) { x <- matrix(1:16, nrow = 4, ncol = 4) y <- 1:nrow(x) storage.mode(y) <- storage.mode(x) if (which == "x") { x[3:6] <- NA_real_ } else if (which == "y") { y[c(1, 3)] <- NA_real_ } else if (which == "both") { x[3:6] <- NA_real_ y[c(1, 3)] <- NA_real_ } for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("OP = '%s', na.rm = %s\n", OP, na.rm)) a0 <- x_OP_y_R(x, y, OP, na.rm = na.rm) a1 <- x_OP_y(x, y, OP, na.rm = na.rm) str(a1) stopifnot(all.equal(a1, a0)) b0 <- t_tx_OP_y_R(x, y, OP, na.rm = na.rm) b1 <- t_tx_OP_y(x, y, OP, na.rm = na.rm) str(b1) stopifnot(all.equal(b1, b0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Length differences # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1:8, nrow = 2, ncol = 4) y <- 1:ncol(x) storage.mode(y) <- storage.mode(x) for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("OP = '%s', na.rm = %s\n", OP, na.rm)) a0 <- x_OP_y_R(x, y, OP, na.rm = na.rm) a1 <- x_OP_y(x, y, OP, na.rm = na.rm) str(a1) stopifnot(all.equal(a1, a0)) b0 <- t_tx_OP_y_R(x, y, OP, na.rm = na.rm) b1 <- t_tx_OP_y(x, y, OP, na.rm = na.rm) str(b1) stopifnot(all.equal(b1, b0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - xs <- list( A = matrix(1:2, nrow = 2, ncol = 2), B = matrix(NA_integer_, nrow = 2, ncol = 2) ) ys <- list( A = 1L, B = NA_integer_ ) for (x in xs) { for (y in ys) { for (mode in c("logical", "integer", "double")) { storage.mode(x) <- mode storage.mode(y) <- mode str(list(x = x, y = y)) for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("mode = '%s', OP = '%s', na.rm = %s\n", mode, OP, na.rm)) suppressWarnings({ z0 <- x_OP_y_R(x, y, OP, na.rm = na.rm) z <- x_OP_y(x, y, OP, na.rm = na.rm) }) str(z) stopifnot(all.equal(z, z0)) } } } # for (mode ...) } # for (y ...) } # for (x ...) matrixStats/tests/rowIQRs_subset.R0000644000176200001440000000366314522461565017037 0ustar liggesuserslibrary("matrixStats") rowIQRs_R <- function(x, na.rm = FALSE, ..., useNames = NA) { quantile_na <- function(x, ..., na.rm = FALSE) { if (!na.rm && anyMissing(x)) return(c(NA_real_, NA_real_)) quantile(x, ..., na.rm = na.rm) } q <- apply(x, MARGIN = 1L, FUN = quantile_na, probs = c(0.25, 0.75), na.rm = na.rm) rownames(q) <- NULL # Not needed anymore # Preserve names attribute dim(q) <- c(2L, nrow(x)) colnames(q) <- if (isTRUE(useNames)) rownames(x) else NULL q[2L, , drop = TRUE] - q[1L, , drop = TRUE] } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) for (idxs in index_cases) { for (na.rm in c(TRUE, FALSE)) { validateIndicesTestVector(x, idxs, ftest = iqr, fsure = function(x, na.rm) { dim(x) <- c(1L, length(x)) rowIQRs_R(x, na.rm = na.rm) }, na.rm = na.rm) } } x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowIQRs, fsure = rowIQRs_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colIQRs, fsure = rowIQRs_R, na.rm = na.rm, useNames = useNames) } } } matrixStats/tests/weightedVar_etal.R0000644000176200001440000000376014520313754017352 0ustar liggesuserslibrary("matrixStats") fcns <- list( weightedVar = weightedVar, weightedSd = weightedSd, weightedMad = weightedMad ) for (name in names(fcns)) { cat(sprintf("%s()...\n", name)) fcn <- fcns[[name]] for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") n <- 15L x <- runif(n, min = -5, max = 5) storage.mode(x) <- mode str(x) for (add_na in c(FALSE, TRUE)) { cat("add_na: ", add_na, "\n", sep = "") if (add_na) { x[c(5, 7)] <- NA } str(x) for (na.rm in c(FALSE, TRUE)) { cat("na.rm: ", na.rm, "\n", sep = "") cat("Weights are not specified (all are 1)\n") m1 <- fcn(x, na.rm = na.rm) str(list(m1 = m1)) cat("All weights are 1\n") w <- rep(1, times = n) m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("First weight is 5\n") # Pull the mean towards zero w[1] <- 5 str(w) m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("All weights are 0\n") # All weights set to zero w <- rep(0, times = n) m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("First weight is 8.5\n") # Put even more weight on the zero w[1] <- 8.5 m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("First weight is Inf\n") # All weight on the first value w[1] <- Inf m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("Last weight is Inf\n") # All weight on the last value w[1] <- 1 w[n] <- Inf m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("Last weight is NA\n") # All weight on the last value w[1] <- 1 w[n] <- NA_real_ m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) } # for (na.rm ...) } # for (add_na ...) } # for (mode ...) cat(sprintf("%s()...DONE\n", name)) } # for (name ...) matrixStats/tests/binMeans,binCounts_subset.R0000644000176200001440000000315714520313754021156 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Naive R implementation of binMeans() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - binMeans0 <- function(y, x, bx, na.rm = TRUE, count = TRUE, right = FALSE) { n_smooth <- length(bx) - 1L res <- double(n_smooth) counts <- rep(NaN, times = n_smooth) if (na.rm) { keep <- !is.na(x) & !is.na(y) x <- x[keep] y <- y[keep] } # For each bin... for (kk in seq_len(n_smooth)) { if (right) { idxs <- which(bx[kk] < x & x <= bx[kk + 1L]) } else { idxs <- which(bx[kk] <= x & x < bx[kk + 1L]) } y_kk <- y[idxs] res[kk] <- mean(y_kk) counts[kk] <- length(idxs) } # for (kk ...) if (count) attr(res, "count") <- counts res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") y <- runif(6, min = -6, max = 6) x <- runif(6, min = -6, max = 6) storage.mode(x) <- "integer" bx <- c(-6, 0, 3, 4, 10) for (idxs in index_cases) { for (na.rm in c(TRUE, FALSE)) { validateIndicesTestVector_w(y, x, idxs, ftest = binMeans, fsure = binMeans0, bx = bx, na.rm = na.rm, count = TRUE, right = FALSE) validateIndicesTestVector_w(y, x, idxs, ftest = binMeans, fsure = binMeans0, bx = bx, na.rm = na.rm, count = TRUE, right = TRUE) } } matrixStats/tests/rowProds.R0000644000176200001440000000675514522461565015730 0ustar liggesuserslibrary("matrixStats") rowProds_R <- function(x, FUN = prod, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = FUN, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } all.equal.na <- function(target, current, ...) { # Computations involving NaN may return NaN or NA, cf. ?is.nan current[is.nan(current)] <- NA_real_ target[is.nan(target)] <- NA_real_ all.equal(target, current, ...) } for (mode in c("integer", "double")) { # Missing values x <- matrix(c(1, NA, NaN, 1, 1, 0, 1, 0), nrow = 4, ncol = 2) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:2]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowProds_R(x, na.rm = TRUE, useNames = useNames) print(y0) y1 <- rowProds(x, na.rm = TRUE, useNames = useNames) print(y1) y2 <- colProds(t(x), na.rm = TRUE, useNames = useNames) print(y2) stopifnot(all.equal(y1, y0)) stopifnot(all.equal(y2, y1)) # Missing values y0 <- rowProds_R(x, na.rm = FALSE, useNames = useNames) print(y0) y1 <- rowProds(x, na.rm = FALSE, useNames = useNames) print(y1) y2 <- colProds(t(x), na.rm = FALSE, useNames = useNames) print(y2) stopifnot(all.equal(y1, y0)) stopifnot(all.equal(y2, y1)) # "Empty" rows y0 <- rowProds_R(x[integer(0), , drop = FALSE], na.rm = FALSE, useNames = useNames) print(y0) y1 <- rowProds(x[integer(0), , drop = FALSE], na.rm = FALSE, useNames = useNames) print(y1) y2 <- colProds(t(x[integer(0), , drop = FALSE]), na.rm = FALSE, useNames = useNames) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal(y2, y1)) stopifnot(length(y1) == 0L) # Using product() y1 <- rowProds(x, method = "expSumLog", na.rm = FALSE, useNames = useNames) print(y1) y2 <- colProds(t(x), method = "expSumLog", na.rm = FALSE, useNames = useNames) print(y2) stopifnot(all.equal(y2, y1)) } } } # for (mode ...) # Bug report 2012-06-25 x <- matrix(c(1, 1, 1, 1, 1, 0, 1, 0), nrow = 4, ncol = 2) y0 <- rowProds_R(x) print(y0) y1 <- rowProds(x) print(y1) y2 <- colProds(t(x)) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal.na(y1, x[, 1] * x[, 2])) stopifnot(all.equal.na(y2, y1)) # Check names attribute dimnames(x) <- dimnames y0 <- rowProds_R(x, useNames = TRUE) print(y0) y1 <- rowProds(x, useNames = TRUE) print(y1) y2 <- colProds(t(x), useNames = TRUE) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal.na(y1, x[, 1] * x[, 2])) stopifnot(all.equal.na(y2, y1)) # Bug report 2014-03-25 ("all rows contains a zero") x <- matrix(c(0, 1, 1, 0), nrow = 2, ncol = 2) # To check names attribute dimnames <- list(letters[1:2], LETTERS[1:2]) y0 <- rowProds_R(x) print(y0) y1 <- rowProds(x) print(y1) y2 <- colProds(t(x)) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal.na(y1, c(0, 0))) stopifnot(all.equal.na(y2, y1)) # Check names attribute dimnames(x) <- dimnames y0 <- rowProds_R(x, useNames = TRUE) print(y0) y1 <- rowProds(x, useNames = TRUE) print(y1) y2 <- colProds(t(x), useNames = TRUE) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal.na(y2, y1)) matrixStats/tests/sum2.R0000644000176200001440000001772114520313754014764 0ustar liggesuserslibrary("matrixStats") options(warn = 1) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) sum2_R <- function(x, na.rm = FALSE, idxs = NULL) { if (is.null(idxs)) { sum(x, na.rm = na.rm) } else { sum(x[idxs], na.rm = na.rm) } } # sum2_R() cat("Consistency checks:\n") for (kk in 1:20) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape n <- sample(1e3, size = 1L) x <- rnorm(n, sd = 100) # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1L) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Integer or double? if ((kk %% 4) %in% c(2, 0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } na.rm <- sample(c(TRUE, FALSE), size = 1L) # Sum over all y0 <- sum2_R(x, na.rm = na.rm) y1 <- sum2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) # Sum over subset nidxs <- sample(n, size = 1L) idxs <- sample(n, size = nidxs) y0 <- sum2_R(x, na.rm = na.rm, idxs = idxs) y1 <- sum2(x, na.rm = na.rm, idxs = idxs) stopifnot(all.equal(y1, y0)) if (storage.mode(x) == "integer") { storage.mode(x) <- "logical" y0 <- sum2_R(x, na.rm = na.rm) y1 <- sum2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y0 <- sum2_R(x, na.rm = na.rm, idxs = idxs) y1 <- sum2(x, na.rm = na.rm, idxs = idxs) stopifnot(all.equal(y1, y0)) } } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (n in 0:2) { for (na.rm in c(FALSE, TRUE)) { x <- rep(NA_real_, times = n) y0 <- sum(x, na.rm = na.rm) y <- sum2(x, na.rm = na.rm) stopifnot(all.equal(y, y0)) x <- rep(NA_integer_, times = n) y0 <- sum(x, na.rm = na.rm) y <- sum2(x, na.rm = na.rm) stopifnot(all.equal(y, y0)) x <- rep(NA, times = n) y0 <- sum(x, na.rm = na.rm) y <- sum2(x, na.rm = na.rm) stopifnot(all.equal(y, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (na.rm in c(FALSE, TRUE)) { # Summing over zero elements (integers) x <- integer(0) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) x <- 1:10 idxs <- integer(0) s1 <- sum(x[idxs], na.rm = na.rm) s2 <- sum2(x, idxs = idxs, na.rm = na.rm) stopifnot(identical(s1, s2)) # Summing over NA_integer_:s x <- rep(NA_integer_, times = 10L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) x <- rep(NA_integer_, times = 10L) idxs <- 1:5 s1 <- sum(x[idxs], na.rm = na.rm) s2 <- sum2(x, idxs = idxs, na.rm = na.rm) stopifnot(identical(s1, s2)) # Summing over zero elements (doubles) x <- double(0) s1 <- sum(x) s2 <- sum2(x) stopifnot( identical(s1, 0), identical(s1, s2) ) x <- as.double(1:10) idxs <- integer(0) s1 <- sum(x[idxs]) s2 <- sum2(x, idxs = idxs) stopifnot( identical(s1, 0), identical(s1, s2) ) # Summing over NA_real_:s x <- rep(NA_real_, times = 10L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot( !na.rm || s1 == 0, identical(s1, s2) ) x <- rep(NA_real_, times = 10L) idxs <- 1:5 s1 <- sum(x[idxs], na.rm = na.rm) s2 <- sum2(x, idxs = idxs, na.rm = na.rm) stopifnot( !na.rm || s1 == 0, identical(s1, s2) ) # Summing over -Inf:s x <- rep(-Inf, times = 3L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot( is.infinite(s1) && s1 < 0, identical(s1, s2) ) # Summing over +Inf:s x <- rep(+Inf, times = 3L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) stopifnot( is.infinite(s1) && s1 > 0, identical(s1, s2) ) # Summing over mix of -Inf:s and +Inf:s x <- rep(c(-Inf, +Inf), times = 3L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot( is.nan(s1), identical(s1, s2) ) # Summing over mix of -Inf:s and +Inf:s and numerics x <- rep(c(-Inf, +Inf, 3.14), times = 2L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot( is.nan(s1), identical(s1, s2) ) # Summing over mix of NaN, NA, +Inf, and numerics x <- c(NaN, NA, +Inf, 3.14) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) if (na.rm) { stopifnot( is.infinite(s1) && s1 > 0, identical(s2, s1) ) } else { stopifnot(is.na(s1), is.na(s2)) ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that s1 is identical to s0. } # Summing over mix of NaN, NA, +Inf, and numerics x <- c(NA, NaN, +Inf, 3.14) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) if (na.rm) { stopifnot( is.infinite(s1) && s1 > 0, identical(s2, s1) ) } else { stopifnot(is.na(s1), is.na(s2)) ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that s1 is identical to s0. } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summing of large integers # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- c(.Machine$integer.max, 1L, -.Machine$integer.max) # Total gives integer overflow s1 <- sum(x[1:2]) # NA_integer_ w/ warning s2 <- sum2(x[1:2]) # NA_integer_ w/ warning stopifnot( getRversion() >= "3.5.0" || identical(s1, NA_integer_), identical(s2, NA_integer_) ) ## Assert above warning res <- tryCatch({ s2 <- sum2(x[1:2]) }, warning = identity) stopifnot(inherits(res, "warning")) # Total gives integer overflow (coerce to numeric) s1 <- sum(as.numeric(x[1:2])) # 2147483648 s2 <- sum2(as.numeric(x[1:2])) # 2147483648 s3 <- sum2(x[1:2], mode = "double") # 2147483648 stopifnot( identical(s1, 2147483648), identical(s1, s2), identical(s1, s3) ) # Cumulative sum would give integer overflow but not the total s1 <- sum(x) # 1L s2 <- sum2(x) # 1L stopifnot( identical(s1, 1L), identical(s1, s2) ) # Input is double but coersing result to integer x <- c(1, 2, 3.1) s1 <- sum2(x) s2 <- sum2(x, mode = "integer") stopifnot( identical(as.integer(s1), s2) ) ## Assert above warning res <- tryCatch({ s2 <- sum2(x, mode = "integer") }, warning = identity) stopifnot(inherits(res, "warning")) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summing of large doubles # - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Double overflow x <- rep(.Machine$double.xmax, times = 2L) y0 <- sum(x) print(y0) y <- sum2(x) print(y) stopifnot( is.infinite(y) && y > 0, identical(y, y0) ) x <- rep(-.Machine$double.xmax, times = 2L) y0 <- sum(x) print(y0) y <- sum2(x) print(y) stopifnot( is.infinite(y) && y < 0, identical(y, y0) ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'idxs' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:10 idxs_list <- list( integer = 1:5, double = as.double(1:5), logical = (x <= 5) ) for (idxs in idxs_list) { cat("idxs:\n") str(idxs) s1 <- sum(x[idxs], na.rm = TRUE) s2 <- sum2(x, idxs = idxs, na.rm = TRUE) stopifnot(identical(s1, s2)) } matrixStats/tests/rowQuantiles.R0000644000176200001440000002052314522461565016573 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowQuantiles_R <- function(x, probs = c(0, 0.25, 0.50, 0.75, 1), na.rm = FALSE, drop = TRUE, type = 7L, ..., useNames = NA) { q <- apply(x, MARGIN = 1L, FUN = function(x, probs, na.rm) { if (!na.rm && any(is.na(x))) { na_value <- NA_real_ if (type != 7L) storage.mode(na_value) <- storage.mode(x) rep(na_value, times = length(probs)) } else { as.vector(quantile(x, probs = probs, na.rm = na.rm, type = type, names = FALSE, ...)) } }, probs = probs, na.rm = na.rm) if (!is.null(dim(q))) q <- t(q) else dim(q) <- c(nrow(x), length(probs)) colnames(q) <- matrixStats:::quantile_probs_names(probs) rownames(q) <- rownames(x) if (isFALSE(useNames)) dimnames(q) <- NULL if (drop) q <- drop(q) q } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test with multiple quantiles # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(1:40 + 0.1, nrow = 8, ncol = 5) storage.mode(x) <- mode dimnames <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) str(x) probs <- c(0, 0.5, 1) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { q0 <- rowQuantiles_R(x, probs = probs, useNames = useNames) print(q0) q1 <- rowQuantiles(x, probs = probs, useNames = useNames) print(q1) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical") storage.mode(q1) <- storage.mode(q0) stopifnot(all.equal(q1, q0)) q2 <- colQuantiles(t(x), probs = probs, useNames = useNames) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical") storage.mode(q2) <- storage.mode(q0) stopifnot(all.equal(q2, q0)) } } } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test with a single quantile # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(1:40, nrow = 8, ncol = 5) storage.mode(x) <- mode dimnames <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) str(x) probs <- c(0.5) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { q0 <- rowQuantiles_R(x, probs = probs, useNames = useNames) print(q0) q1 <- rowQuantiles(x, probs = probs, useNames = useNames) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical") storage.mode(q1) <- storage.mode(q0) print(q1) stopifnot(all.equal(q1, q0)) q2 <- colQuantiles(t(x), probs = probs, useNames = useNames) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical") storage.mode(q2) <- storage.mode(q0) stopifnot(all.equal(q2, q0)) } } } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) probs <- seq(from = 0, to = 1, by = 0.25) cat("Consistency checks:\n") n_sims <- if (Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 6L else 24L for (kk in seq_len(n_sims)) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape dim <- sample(20:60, size = 2L) n <- prod(dim) x <- rnorm(n, sd = 100) dim(x) <- dim dimnames <- lapply(dim(x), FUN = function(n) rep(letters, length.out = n)) # Add NAs? has_na <- ((kk %% 2) == 0L) if (has_na) { cat("Adding NAs\n") nna <- sample(n, size = 1) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Logical, integer, or double? mode <- "numeric" if ((kk %% 6) %in% 1:2) { cat("Coercing to logical\n") mode <- "logical" } else if ((kk %% 6) %in% 3:4) { cat("Coercing to integers\n") mode <- "integer" } storage.mode(x) <- mode str(x) # rowQuantiles(): for (type in 1:9) { cat(sprintf("type=%d, has_na=%s:\n", type, has_na)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { q0 <- rowQuantiles_R(x, probs = probs, na.rm = has_na, type = type, useNames = useNames) q1 <- rowQuantiles(x, probs = probs, na.rm = has_na, type = type, useNames = useNames) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical" && !has_na && type == 7L) storage.mode(q1) <- storage.mode(q0) stopifnot(all.equal(q1, q0)) q2 <- colQuantiles(t(x), probs = probs, na.rm = has_na, type = type, useNames = useNames) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical" && !has_na && type == 7L) storage.mode(q2) <- storage.mode(q0) stopifnot(all.equal(q2, q0)) } } } } # for (kk ...) for (mode in c("logical", "integer", "double")) { naValue <- NA_real_ storage.mode(naValue) <- mode someValue <- 1 storage.mode(someValue) <- mode for (type in 1:9) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NA # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(naValue, nrow = 3L, ncol = 4L) dimnames <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { qr0 <- rowQuantiles_R(x, type = type, useNames = useNames) qr <- rowQuantiles(x, type = type, useNames = useNames) stopifnot(identical(qr, qr0)) # x <- matrix(naValue, nrow = 4L, ncol = 3L) qc <- colQuantiles(t(x), type = type, useNames = useNames) stopifnot(identical(qc, qr)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Empty matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - probs <- c(0, 0.25, 0.75, 1) x <- matrix(naValue, nrow = 0L, ncol = 0L) dimnames(x) <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) q <- rowQuantiles(x, probs = probs, type = type, useNames = TRUE) stopifnot(identical(dim(q), c(nrow(x), length(probs)))) q <- colQuantiles(x, probs = probs, type = type, useNames = TRUE) stopifnot(identical(dim(q), c(ncol(x), length(probs)))) x <- matrix(naValue, nrow = 2L, ncol = 0L) dimnames(x) <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) q <- rowQuantiles(x, probs = probs, type = type, useNames = TRUE) stopifnot(identical(dim(q), c(nrow(x), length(probs)))) x <- matrix(naValue, nrow = 0L, ncol = 2L) dimnames(x) <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) q <- colQuantiles(x, probs = probs, type = type, useNames = TRUE) stopifnot(identical(dim(q), c(ncol(x), length(probs)))) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Single column matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - probs <- c(0, 0.25, 0.75, 1) x <- matrix(someValue, nrow = 2L, ncol = 1L) dimnames(x) <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) qr <- rowQuantiles(x, probs = probs, type = type, useNames = TRUE) print(qr) x <- matrix(someValue, nrow = 1L, ncol = 2L) dimnames(x) <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) qc <- colQuantiles(x, probs = probs, type = type, useNames = TRUE) print(qc) stopifnot(identical(qc, qr)) } } matrixStats/tests/varDiff_etal.R0000644000176200001440000000536014520313754016460 0ustar liggesuserslibrary("matrixStats") set.seed(1) x <- rnorm(1e4) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Variance estimators # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sigma2_a <- var(x) cat(sprintf("var(x) = %g\n", sigma2_a)) sigma2_b <- varDiff(x) cat(sprintf("varDiff(x) = %g\n", sigma2_b)) d <- abs(sigma2_b - sigma2_a) cat(sprintf("Absolute difference = %g\n", d)) stopifnot(d < 0.02) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Standard deviation estimators # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sigma_a <- sd(x) cat(sprintf("sd(x) = %g\n", sigma_a)) sigma_b <- sdDiff(x) cat(sprintf("sdDiff(x) = %g\n", sigma_b)) d <- abs(sigma_b - sigma_a) cat(sprintf("Absolute difference = %g\n", d)) stopifnot(d < 0.01) # Sanity checks stopifnot(abs(sigma2_a - sigma_a ^ 2) < 1e-9) stopifnot(abs(sigma2_b - sigma_b ^ 2) < 1e-9) sigma_a2 <- mad(x) cat(sprintf("mad(x) = %g\n", sigma_a2)) sigma_b2 <- madDiff(x) cat(sprintf("madDiff(x) = %g\n", sigma_b2)) d <- abs(sigma_b2 - sigma_a2) cat(sprintf("Absolute difference = %g\n", d)) stopifnot(d < 0.05) sigma_a3 <- IQR(x) cat(sprintf("IQR(x) = %g\n", sigma_a3)) sigma_b3 <- iqrDiff(x) cat(sprintf("iqrDiff(x) = %g\n", sigma_b3)) d <- abs(sigma_b3 - sigma_a3) cat(sprintf("Absolute difference = %g\n", d)) stopifnot(d < 0.05) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Trimmed estimators # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - y <- x outliers <- sample(length(x), size = 0.1 * length(x)) y[outliers] <- 100 * y[outliers] sigma_ao <- sd(y[-outliers]) cat(sprintf("sd(y) = %g\n", sigma_ao)) sigma_bo <- sdDiff(y[-outliers]) cat(sprintf("sdDiff(y) = %g\n", sigma_bo)) d <- abs(sigma_b - sigma_a) cat(sprintf("Absolute difference = %g\n", d)) stopifnot(d < 0.01) sigma_bot <- sdDiff(y, trim = 0.05) cat(sprintf("sdDiff(y, trim = 0.05) = %g\n", sigma_bot)) d <- abs(sigma_bot - sigma_a) cat(sprintf("Absolute difference = %g\n", d)) #stopifnot(d < 1e-3) sigma_cot <- madDiff(y, trim = 0.05) cat(sprintf("madDiff(y, trim = 0.05) = %g\n", sigma_cot)) sigma_dot <- iqrDiff(y, trim = 0.05) cat(sprintf("iqrDiff(y, trim = 0.05) = %g\n", sigma_dot)) fcns <- list( varDiff = varDiff, sdDiff = sdDiff, madDiff = madDiff, iqrDiff = iqrDiff ) for (name in names(fcns)) { cat(sprintf("%s()...\n", name)) fcn <- fcns[[name]] for (mode in c("integer", "double")) { cat("mode: ", mode, "", sep = "") for (n in 0:3) { x <- runif(n, min = -5, max = 5) storage.mode(x) <- mode str(x) y <- fcn(x) yt <- fcn(x, trim = 0.1) str(list("non-trimmed" = y, trimmed = yt)) } # for (mode ...) } cat(sprintf("%s()...DONE\n", name)) } # for (name ...) matrixStats/tests/rowCollapse_subset.R0000644000176200001440000000441214522461565017754 0ustar liggesuserslibrary("matrixStats") rowCollapse_R <- function(x, idxs, ..., useNames = NA) { ans <- c() storage.mode(ans) <- storage.mode(x) for (ii in seq_len(length(idxs))) { ans[ii] <- x[ii, idxs[ii]] } # Preserve names attribute if (isTRUE(useNames)) { names <- rownames(x) if (!is.null(names)) names(ans) <- names } ans } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) for (rows in index_cases) { if (is.null(rows)) rows <- seq_len(nrow(x)) for (idxs in list(2L, seq_len(6L))) { for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { suppressWarnings({ actual <- tryCatch(rowCollapse(x, idxs, rows = rows, useNames = useNames), error = function(c) "error") expect <- tryCatch({ idxs_0 <- rep(idxs, length.out = nrow(x))[rows] rowCollapse_R(x[rows, , drop = FALSE], idxs_0, useNames = useNames) }, error = function(c) "error") }) stopifnot(all.equal(actual, expect)) suppressWarnings({ actual <- tryCatch(colCollapse(t(x), idxs, cols = rows, useNames = useNames), error = function(c) "error") }) stopifnot(all.equal(actual, expect)) # Check names attribute dimnames(x) <- dimnames suppressWarnings({ actual <- tryCatch(rowCollapse(x, idxs, rows = rows, useNames = useNames), error = function(c) "error") expect <- tryCatch({ idxs_0 <- rep(idxs, length.out = nrow(x))[rows] rowCollapse_R(x[rows, , drop = FALSE], idxs_0, useNames = useNames) }, error = function(c) "error") }) stopifnot(all.equal(actual, expect)) suppressWarnings({ actual <- tryCatch(colCollapse(t(x), idxs, cols = rows, useNames = useNames), error = function(c) "error") }) stopifnot(all.equal(actual, expect)) dimnames(x) <- NULL } } } matrixStats/tests/rowAllAnys_subset.R0000644000176200001440000002422314522461565017557 0ustar liggesuserslibrary("matrixStats") rowAlls_R <- function(x, value = TRUE, na.rm = FALSE, ..., useNames = NA) { if (is.na(value)) { res <- apply(is.na(x), MARGIN = 1L, FUN = all, na.rm = na.rm) } else { y <- x == value # Preserve dimnames attribute dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!isTRUE(all.equal(dim(y), dim))) { dim(y) <- dim dimnames(y) <- dimnames(x) } res <- apply(y, MARGIN = 1L, FUN = all, na.rm = na.rm) } if (is.na(useNames) || !useNames) names(res) <- NULL res } rowAnys_R <- function(x, value = TRUE, na.rm = FALSE, ..., useNames = NA) { if (is.na(value)) { res <- apply(is.na(x), MARGIN = 1L, FUN = any, na.rm = na.rm) } else { y <- x == value # Preserve dimnames attribute dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!isTRUE(all.equal(dim(y), dim))) { dim(y) <- dim dimnames(y) <- dimnames(x) } res <- apply(y, MARGIN = 1L, FUN = any, na.rm = na.rm) } if (is.na(useNames) || !useNames) names(res) <- NULL res } rowAnyMissings_R <- function(x, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = anyMissing) if (is.na(useNames) || !useNames) names(res) <- NULL res } all_R <- function(x, value = TRUE, ...) { if (is.na(value)) { all(is.na(x), ...) } else { all(x == value, ...) } } any_R <- function(x, value = TRUE, ...) { if (is.na(value)) { any(is.na(x), ...) } else { any(x == value, ...) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" x[2:3, ] <- NA_integer_ x[2, 1] <- 0L x[4:5, ] <- 0L x[4, 6] <- NA_integer_ # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = 0, na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = 0, na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = NA_integer_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = 0, na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = 0, na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = NA_integer_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = 0, na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = 0, na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = NA_integer_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = 0, na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = 0, na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = NA_integer_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnyMissings, fsure = rowAnyMissings_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnyMissings, fsure = rowAnyMissings_R, useNames = useNames) } } } for (rr in seq_len(nrow(x))) { for (idxs in index_cases) { validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = 0, na.rm = TRUE) validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = 0, na.rm = FALSE) validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = NA_integer_) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = 0, na.rm = TRUE) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = 0, na.rm = FALSE) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = NA_integer_) } } storage.mode(x) <- "character" # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = "0", na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = "0", na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = NA_character_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = "0", na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = "0", na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = NA_character_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = "0", na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = "0", na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = NA_character_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = "0", na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = "0", na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = NA_character_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnyMissings, fsure = rowAnyMissings_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnyMissings, fsure = rowAnyMissings_R, useNames = useNames) } } } } for (rr in seq_len(nrow(x))) { for (idxs in index_cases) { validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = "0", na.rm = TRUE) validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = "0", na.rm = FALSE) validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = NA_integer_) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = "0", na.rm = TRUE) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = "0", na.rm = FALSE) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = NA_integer_) } } matrixStats/tests/binMeans,binCounts.R0000644000176200001440000001037414520313754017570 0ustar liggesuserslibrary("matrixStats") library("stats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Naive R implementation of binMeans() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - binMeans0 <- function(y, x, bx, na.rm = TRUE, count = TRUE, right = FALSE) { n_smooth <- length(bx) - 1L res <- double(n_smooth) counts <- rep(NaN, times = n_smooth) if (na.rm) { keep <- !is.na(x) & !is.na(y) x <- x[keep] y <- y[keep] } # For each bin... for (kk in seq_len(n_smooth)) { if (right) { idxs <- which(bx[kk] < x & x <= bx[kk + 1L]) } else { idxs <- which(bx[kk] <= x & x < bx[kk + 1L]) } y_kk <- y[idxs] res[kk] <- mean(y_kk) counts[kk] <- length(idxs) } # for (kk ...) if (count) attr(res, "count") <- counts res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Case #1 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:100 nx <- length(x) y <- double(nx) y[1:25] <- 5 y[51:75] <- -5 y <- y + rnorm(nx) # Bins bx <- c(0.5, 25.5, 50.5, 75.5, 100.5) y_smooth0 <- binMeans0(y, x = x, bx = bx) y_smooth <- binMeans(y, x = x, bx = bx) n_smooth <- binCounts(x, bx = bx) # Sanity check stopifnot(all.equal(y_smooth, y_smooth0)) stopifnot(all.equal(attr(y_smooth, "count"), n_smooth)) y_smooth0r <- rev(binMeans0(y, x = -x, bx = rev(-bx), count = FALSE, right = TRUE)) y_smoothr <- rev(binMeans(y, x = -x, bx = rev(-bx), count = FALSE, right = TRUE)) # Sanity check stopifnot(all.equal(y_smooth0r, y_smooth0, check.attributes = FALSE)) stopifnot(all.equal(y_smoothr, y_smooth0r)) # Integer input y <- as.integer(y) y_smooth0 <- binMeans0(y, x = x, bx = bx) y_smooth <- binMeans(y, x = x, bx = bx) n_smooth <- binCounts(x, bx = bx) # Sanity check stopifnot(is.integer(y), all.equal(y_smooth, y_smooth0), all.equal(attr(y_smooth, "count"), n_smooth)) # Logical input y <- as.logical(y) y_smooth0 <- binMeans0(y, x = x, bx = bx) y_smooth <- binMeans(y, x = x, bx = bx) n_smooth <- binCounts(x, bx = bx) # Sanity check stopifnot(is.logical(y), all.equal(y_smooth, y_smooth0), all.equal(attr(y_smooth, "count"), n_smooth)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Case #2 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nx <- 1e3 x <- runif(nx) y <- runif(nx) nb <- 10 bx <- do.call(seq, c(as.list(range(x)), length.out = nb)) bx1 <- c(bx[-1], bx[nb] + 1) y_smooth0 <- binMeans0(y, x = x, bx = bx1) y_smooth <- binMeans(y, x = x, bx = bx1) n_smooth <- binCounts(x, bx = bx1) y_smoothr <- rev(binMeans(y, x = -x, bx = rev(-bx1), right = TRUE)) # Sanity check stopifnot(all.equal(y_smooth, y_smooth0)) stopifnot(all.equal(attr(y_smooth, "count"), n_smooth)) stopifnot(all.equal(y_smoothr, y_smooth, check.attributes = FALSE)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Empty bins # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- c(6:8, 16:19) nx <- length(x) y <- runif(nx) bx <- c(0, 5, 10, 15, 20, 25) y_smooth0 <- binMeans0(y, x = x, bx = bx) y_smooth <- binMeans(y, x = x, bx = bx) n_smooth <- binCounts(x, bx = bx) stopifnot(all.equal(attr(y_smooth, "count"), n_smooth)) stopifnot(all.equal(y_smooth, y_smooth0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:100 x[50] <- NA_integer_ nx <- length(x) y <- double(nx) y[1:25] <- 5 y[51:75] <- -5 y[82:92] <- NA_real_ y <- y + rnorm(nx) # Bins bx <- c(0.5, 25.5, 75.5, 82.5, 100.5) y_smooth0 <- binMeans0(y, x = x, bx = bx) y_smooth <- binMeans(y, x = x, bx = bx) # Sanity check stopifnot(all.equal(y_smooth, y_smooth0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Exception handling # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Zero bin bounderies (invalid bin definition) bx <- double(0L) res <- try(y_smooth <- binMeans(x = 1:5, y = 1:5, bx = bx), silent = TRUE) stopifnot(inherits(res, "try-error")) # One bin boundery (invalid bin definition) bx <- double(1L) res <- try(y_smooth <- binMeans(x = 1:5, y = 1:5, bx = bx), silent = TRUE) stopifnot(inherits(res, "try-error")) matrixStats/tests/rowVars.R0000644000176200001440000002220514522461565015540 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") options(matrixStats.vars.formula.freq = Inf) ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowVars_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = var, na.rm = na.rm) }) stopifnot(!any(is.infinite(res))) # Keep naming support consistency same as rowVars() if (is.null(center) || ncol(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } colVars_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 2L, FUN = var, na.rm = na.rm) }) stopifnot(!any(is.infinite(res))) # Keep naming support consistency same as colVars() if (is.null(center) || ncol(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } rowVars_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowWeightedMeans(x, cols = cols, na.rm = na.rm, useNames = FALSE) res <- rowVars(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } colVars_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colWeightedMeans(x, rows = rows, na.rm = na.rm, useNames = FALSE) res <- colVars(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } rowVars_center_naive <- function(x, rows = NULL, cols = NULL, center = NULL, na.rm = FALSE, ..., useNames = NA) { x <- sweep(x, MARGIN = 1, STATS = as.array(center), FUN = "-") x[is.infinite(center), ] <- NaN res <- rowVars(x, rows = rows, cols = cols, center = rep(0, times = nrow(x)), na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } colVars_center_naive <- function(x, rows = NULL, cols = NULL, center = NULL, na.rm = FALSE, ..., useNames = NA) { x <- sweep(x, MARGIN = 2, STATS = as.array(center), FUN = "-") x[, is.infinite(center)] <- NaN res <- colVars(x, rows = rows, cols = cols, center = rep(0, times = ncol(x)), na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs or Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { if (mode == "integer") { specials <- c(0L, NA_integer_) delta <- 0L } else { specials <- c(0, NA_real_, Inf) delta <- 0.1 } for (special in specials) { cat("special = ", special, "\n", sep = "") x <- matrix(1:50 + delta, nrow = 10L, ncol = 5L) x[3:7, c(2, 4)] <- special cat("mode: ", mode, "\n", sep = "") str(x) stopifnot(storage.mode(x) == mode) # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column variance for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") center <- rowMeans(x, na.rm = na.rm) r0 <- rowVars_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowVars(x, na.rm = na.rm, useNames = useNames) r1b <- rowVars_center(x, na.rm = na.rm, useNames = useNames) r1c <- rowVars(x, center = center, na.rm = na.rm, useNames = useNames) tx <- t(x) r2 <- colVars(tx, na.rm = na.rm, useNames = useNames) r2b <- colVars_center(tx, na.rm = na.rm, useNames = useNames) r2c <- colVars(tx, center = center, na.rm = na.rm, useNames = useNames) tx <- NULL stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r1b, r1c)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r2b, r2c)) center <- colMeans(x, na.rm = na.rm) r3 <- colVars(x, center = center, na.rm = na.rm, useNames = useNames) r3b <- colVars_center_naive(x, center = center, na.rm = na.rm, useNames = useNames) r3c <- rowVars(t(x), center = center, na.rm = na.rm, useNames = useNames) r3d <- rowVars_center_naive(t(x), center = center, na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r3b, r3)) stopifnot(all.equal(r3c, r3)) stopifnot(all.equal(r3d, r3)) stopifnot( !any(is.infinite(r0)), !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r3)), !any(is.infinite(r1b)), !any(is.infinite(r1c)), !any(is.infinite(r2b)), !any(is.infinite(r2c)), !any(is.infinite(r3b)), !any(is.infinite(r3c)), !any(is.infinite(r3d)) ) } } } } # for (special ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column variance for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowVars_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowVars(x, na.rm = na.rm, useNames = useNames) r1b <- rowVars_center(x, na.rm = na.rm, useNames = useNames) r2 <- colVars(t(x), na.rm = na.rm, useNames = useNames) r2b <- colVars_center(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r2b)) stopifnot( !any(is.infinite(r0)), !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r1b)), !any(is.infinite(r2b)) ) } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 1L, ncol = 1L) dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column variance for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowVars_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowVars(x, na.rm = na.rm, useNames = useNames) r1b <- rowVars_center(x, na.rm = na.rm, useNames = useNames) r2 <- colVars(t(x), na.rm = na.rm, useNames = useNames) r2b <- colVars_center(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r2b)) stopifnot( !any(is.infinite(r0)), !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r1b)), !any(is.infinite(r2b)) ) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # center and .dim # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - m <- matrix(1:12, nrow = 3L, ncol = 4L) a <- array(m, dim = c(3, 2, 2)) stopifnot(prod(dim(a)) == prod(dim(m))) y0 <- rowVars(m, dim. = dim(m)) print(y0) y1 <- rowVars(a, dim. = dim(m)) print(y1) stopifnot(identical(y1, y0)) stopifnot( !any(is.infinite(y0)), !any(is.infinite(y1)) ) mu <- rowMeans(m) y0 <- rowVars(m, center = mu, dim. = dim(m)) print(y0) y1 <- rowVars(a, center = mu, dim. = dim(m)) print(y1) stopifnot(identical(y1, y0)) stopifnot( !any(is.infinite(y0)), !any(is.infinite(y1)) ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # https://github.com/HenrikBengtsson/matrixStats/issues/195 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(c(1,2,3,4), nrow = 2L, ncol = 2L) x[1,1] <- NA_real_ x[1,2] <- Inf center <- colMeans2(x, na.rm = TRUE) y <- colVars(x, center = center, na.rm = TRUE) stopifnot(!any(is.infinite(y))) x <- t(x) center <- rowMeans2(x, na.rm = TRUE) y <- rowVars(x, center = center, na.rm = TRUE) stopifnot(!any(is.infinite(y))) matrixStats/tests/rowSums2.R0000644000176200001440000002437114522461565015644 0ustar liggesuserslibrary("matrixStats") rowSums2_R <- function(x, na.rm = FALSE, ..., useNames = NA) { ## FIXME: sum() may overflow for integers, whereas ## base::rowSums() doesn't. What should rowSums2() do? ## apply(x, MARGIN = 1L, FUN = sum, na.rm = na.rm) res <- rowSums(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } colSums2_R <- function(x, na.rm = FALSE, ..., useNames = NA) { ## FIXME: sum() may overflow for integers, whereas ## base::colSums() doesn't. What should colSums2() do? ## apply(x, MARGIN = 2L, FUN = sum, na.rm = na.rm) res <- colSums(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } for (mode in c("integer", "logical", "double")) { x <- matrix(-4:4, nrow = 3, ncol = 3) storage.mode(x) <- mode if (mode == "double") x <- x + 0.1 # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Single-element matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Single-element matrix:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(1, nrow = 1, ncol = 1) storage.mode(x) <- mode # To check names attribute dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Empty matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Empty matrix:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(integer(0), nrow = 0, ncol = 0) storage.mode(x) <- mode y0 <- rowSums2_R(x, na.rm = FALSE) y1 <- rowSums2(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE) y1 <- colSums2(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NAs:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(NA_integer_, nrow = 3, ncol = 3) storage.mode(x) <- mode # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowSums2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = TRUE, useNames = useNames) y1 <- colSums2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NaNs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NaNs:\n") x <- matrix(NA_real_, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowSums2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = TRUE, useNames = useNames) y1 <- colSums2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All Infs:\n") x <- matrix(Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All -Infs:\n") x <- matrix(-Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Infs and -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Infs and -Infs:\n") x <- matrix(c(-Inf, +Inf), nrow = 4, ncol = 4) # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: NaNs and NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Infs and -Infs:\n") x <- matrix(c(NaN, NA_real_), nrow = 4, ncol = 4) y0 <- rowSums(x, na.rm = FALSE) str(y0) stopifnot(all(is.na(y0)), length(unique(y0)) >= 1L) y1 <- rowSums2(x, na.rm = FALSE) str(y0) stopifnot(all(is.na(y1)), length(unique(y1)) >= 1L) stopifnot(all.equal(y1, y0)) y0 <- colSums(x, na.rm = FALSE) stopifnot(all(is.na(y0)), length(unique(y0)) == 1L) y1 <- colSums2(x, na.rm = FALSE) stopifnot(all(is.na(y1)), length(unique(y1)) == 1L) ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that y1 is identical to y0. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Integer overflow with ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Integer overflow with ties:\n") x <- matrix(.Machine$integer.max, nrow = 4, ncol = 4) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) cat("Consistency checks:\n") n_sims <- if (Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 4L else 20L for (kk in seq_len(n_sims)) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape dim <- sample(50:200, size = 2) n <- prod(dim) x <- rnorm(n, sd = 100) dim(x) <- dim # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Mode? modes <- "double" if ((kk %% 4) %in% c(2, 0)) { modes <- c("integer", "logical") } for (mode in modes) { if (mode != "double") { cat(sprintf("Coercing from %s to %s\n", storage.mode(x), mode)) storage.mode(x) <- mode } na.rm <- sample(c(TRUE, FALSE), size = 1) # rowSums2(): y0 <- rowSums2_R(x, na.rm = na.rm) y1 <- rowSums2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- colSums2(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) # colSums2(): y0 <- colSums2_R(x, na.rm = na.rm) y1 <- colSums2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- rowSums2(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) } } # for (kk ...) matrixStats/tests/rowSums2_subset.R0000644000176200001440000000337014522461565017225 0ustar liggesuserslibrary("matrixStats") rowSums2_R <- function(x, na.rm = FALSE, ..., useNames = NA) { ## FIXME: sum() may overflow for integers, whereas ## base::rowSums() doesn't. What should rowSums2() do? ## apply(x, MARGIN = 1L, FUN = sum, na.rm = na.rm) res <- rowSums(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } colSums2_R <- function(x, na.rm = FALSE, ..., useNames = NA) { ## FIXME: sum() may overflow for integers, whereas ## base::colSums() doesn't. What should colSums2() do? ## apply(x, MARGIN = 2L, FUN = sum, na.rm = na.rm) res <- colSums(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowSums2, fsure = rowSums2_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colSums2, fsure = rowSums2_R, na.rm = na.rm, useNames = useNames) } } } } } matrixStats/tests/signTabulate_subset.R0000644000176200001440000000165114520313754020100 0ustar liggesuserslibrary("matrixStats") signTabulate0 <- function(x, ...) { nneg <- sum(x < 0, na.rm = TRUE) nzero <- sum(x == 0, na.rm = TRUE) npos <- sum(x > 0, na.rm = TRUE) nna <- sum(is.na(x)) nneginf <- sum(is.infinite(x) & x < 0, na.rm = TRUE) nposinf <- sum(is.infinite(x) & x > 0, na.rm = TRUE) res <- c(nneg, nzero, npos, nna, nneginf, nposinf) res <- as.double(res) names(res) <- c("-1", "0", "+1", "NA", "-Inf", "+Inf") if (is.integer(x)) res <- res[1:4] res } # signTabulate0() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) x[2:3, 4:5] <- +Inf x[4:5, 1:2] <- -Inf for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = signTabulate, fsure = signTabulate0) } matrixStats/tests/rowRanks_subset.R0000644000176200001440000000453614522461565017277 0ustar liggesuserslibrary("matrixStats") rowRanks_R <- function(x, ties.method = "average", ..., useNames = NA) { ans <- t(apply(x, MARGIN = 1L, FUN = rank, na.last = "keep", ties.method = ties.method)) # Preserve dimnames attribute? dim(ans) <- dim(x) dimnames(ans) <- if (isTRUE(useNames)) dimnames(x) else NULL ans } colRanks_R <- function(x, ties.method, preserveShape = FALSE, ..., useNames = NA) { ans <- t(apply(x, MARGIN = 2L, FUN = rank, na.last = "keep", ties.method = ties.method)) # Preserve dimnames attribute? tx <- t(x) dim(ans) <- dim(tx) dimnames(ans) <- if (isTRUE(useNames)) dimnames(tx) else NULL if (preserveShape) ans <- t(ans) ans } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check dimnames attribute dimnames <- list(letters[1:6], LETTERS[1:6]) colRanks_R_t <- function(x, rows, cols, ..., useNames = NA) { t(colRanks(t(x), rows = cols, cols = rows, preserveShape = TRUE, ..., useNames = useNames)) } # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowRanks, fsure = rowRanks_R, ties.method = "average", useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = colRanks_R_t, fsure = rowRanks_R, ties.method = "average", useNames = useNames) for (perserveShape in c(TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = colRanks, fsure = colRanks_R, ties.method = "average", perserveShape = perserveShape, useNames = useNames) } } } } matrixStats/tests/logSumExp.R0000644000176200001440000000620514520313754016014 0ustar liggesuserslibrary("matrixStats") library("stats") logSumExp_R <- function(lx, na.rm = FALSE) { log(sum(exp(lx), na.rm = na.rm)) } ## R-help thread \emph{'[R] Beyond double-precision?'} on May 9, 2009. for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") set.seed(1) x <- runif(20, min = 1.0, max = 3.0) storage.mode(x) <- mode str(x) ## The logarithm of the harmonic mean y0 <- log(1 / mean(1 / x)) print(y0) ## -1.600885 lx <- log(x) y1 <- log(length(x)) - logSumExp(-lx) print(y1) ## [1] -1.600885 # Sanity check stopifnot(all.equal(y1, y0)) y2 <- log(length(x)) - logSumExp_R(-lx) # Sanity check stopifnot(all.equal(y2, y0)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## NA values lx <- c(1, 2, 3) lx[2] <- NA_real_ y0 <- logSumExp_R(lx, na.rm = FALSE) y <- logSumExp(lx, na.rm = FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) y0 <- logSumExp_R(lx, na.rm = TRUE) y <- logSumExp(lx, na.rm = TRUE) print(y) stopifnot(all.equal(y, y0)) ## NaN values lx <- c(1, 2, 3) lx[2] <- NaN y0 <- logSumExp_R(lx, na.rm = FALSE) y <- logSumExp(lx, na.rm = FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) y0 <- logSumExp_R(lx, na.rm = TRUE) y <- logSumExp(lx, na.rm = TRUE) print(y) stopifnot(all.equal(y, y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Zero-length vectors lx <- numeric(0L) y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, -Inf)) stopifnot(all.equal(y, y0)) ## Vectors of length one lx <- 1.0 y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, lx)) stopifnot(all.equal(y, y0)) lx <- NA_real_ y0 <- logSumExp_R(lx, na.rm = TRUE) y <- logSumExp(lx, na.rm = TRUE) print(y) stopifnot(identical(y, -Inf)) stopifnot(all.equal(y, y0)) ## All missing values lx <- c(NA_real_, NA_real_) y0 <- logSumExp_R(lx, na.rm = TRUE) y <- logSumExp(lx, na.rm = TRUE) print(y) stopifnot(identical(y, -Inf)) stopifnot(all.equal(y, y0)) lx <- c(NA_real_, NA_real_) y0 <- logSumExp_R(lx, na.rm = FALSE) y <- logSumExp(lx, na.rm = FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) ## +Inf values lx <- c(1, 2, +Inf) y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, +Inf)) stopifnot(all.equal(y, y0)) ## First element is a missing value, cf. PR #33 lx <- c(NA_real_, 1) y0 <- logSumExp_R(lx) print(y0) y <- logSumExp(lx, na.rm = FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) y0 <- logSumExp_R(lx, na.rm = TRUE) print(y0) y <- logSumExp(lx, na.rm = TRUE) print(y) stopifnot(identical(y, 1)) stopifnot(all.equal(y, y0)) ## Multiple -Inf values, cf. issue #84 lx <- c(-Inf, -Inf) y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, -Inf)) stopifnot(all.equal(y, y0)) lx <- c(-Inf, 5, -Inf) y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, 5)) stopifnot(all.equal(y, y0)) matrixStats/tests/rowOrderStats_subset.R0000644000176200001440000000342614522461565020310 0ustar liggesuserslibrary("matrixStats") rowOrderStats_R <- function(x, probs, ..., useNames = NA) { ans <- apply(x, MARGIN = 1L, FUN = quantile, probs = probs, type = 3L) # Remove Attributes if (is.na(useNames) || !useNames || length(ans) == 0L) attributes(ans) <- NULL ans } # rowOrderStats_R() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) probs <- 0.3 # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { if (is.null(cols)) which <- round(probs * ncol(x)) else { xxrows <- rows suppressWarnings({ xx <- tryCatch(x[, cols, drop = FALSE], error = function(c) "error") if (identical(xx, "error")) which <- 0L else which <- round(probs * ncol(xx)) }) } if (which == 0L) next validateIndicesTestMatrix(x, rows, cols, ftest = rowOrderStats, fsure = rowOrderStats_R, which = which, probs = probs, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colOrderStats, fsure = rowOrderStats_R, which = which, probs = probs, useNames = useNames) } } } } matrixStats/tests/indexByRow.R0000644000176200001440000000412314520313754016160 0ustar liggesuserslibrary("matrixStats") indexByRow_R1 <- function(dim, idxs = NULL, ...) { n <- prod(dim) x <- matrix(seq_len(n), nrow = dim[2L], ncol = dim[1L], byrow = TRUE) if (!is.null(idxs)) x <- x[idxs] as.vector(x) } indexByRow_R2 <- function(dim, idxs = NULL, ...) { n <- prod(dim) if (is.null(idxs)) { x <- matrix(seq_len(n), nrow = dim[2L], ncol = dim[1L], byrow = TRUE) as.vector(x) } else { idxs <- idxs - 1L cols <- idxs %/% dim[2L] rows <- idxs %% dim[2L] cols + dim[1L] * rows + 1L } } dim <- c(5L, 4L) x <- matrix(NA_integer_, nrow = dim[1L], ncol = dim[2L]) y <- t(x) idxs_by_cols <- seq_along(x) # Assign by columns x[idxs_by_cols] <- idxs_by_cols print(x) # Truth y0 <- t(x) idxs_by_rows <- as.vector(y0) # Assert idxs <- indexByRow(dim) stopifnot(all.equal(idxs, idxs_by_rows)) y <- x y[idxs_by_rows] <- idxs print(y) stopifnot(all(as.vector(y) == as.vector(x))) idxs_R1 <- indexByRow_R1(dim) stopifnot(all.equal(idxs_R1, idxs_by_rows)) idxs_R2 <- indexByRow_R2(dim) stopifnot(all.equal(idxs_R2, idxs_by_rows)) # Assert idxs_by_cols <- seq(from = 1, to = length(x), by = 3L) idxs_by_rows <- as.vector(t(x)[idxs_by_cols]) idxs <- indexByRow(dim, idxs = idxs_by_cols) stopifnot(all(idxs == idxs_by_rows)) idxs_R1 <- indexByRow_R1(dim, idxs = idxs_by_cols) stopifnot(all(idxs_R1 == idxs_by_rows)) idxs_R2 <- indexByRow_R2(dim, idxs = idxs_by_cols) stopifnot(all(idxs_R2 == idxs_by_rows)) ## DEFUNCT: Backward compatibility res <- tryCatch({ idxs1 <- indexByRow(x) }, error = identity) stopifnot(inherits(res, "error")) ## Exceptions: ## Too large matrices are not supported, which happens ## when prod(dim) > .Machine$integer.max dim_too_large <- c(.Machine$integer.max, 2L) res <- tryCatch({ idxs <- indexByRow(dim_too_large, idxs = 1L) }, error = identity) stopifnot(inherits(res, "error")) ## Non-positive indices are not supported res <- tryCatch({ idxs <- indexByRow(c(1,1), idxs = 0L) }, error = identity) stopifnot(inherits(res, "error")) res <- tryCatch({ idxs <- indexByRow(c(1,1), idxs = -1L) }, error = identity) stopifnot(inherits(res, "error")) matrixStats/tests/anyMissing_subset.R0000644000176200001440000000100614520313754017571 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(4, min = -3, max = 3) x[2] <- NA for (mode in c("integer", "numeric")) { storage.mode(x) <- mode for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = anyMissing, fsure = function(x, ...) { anyValue(x, value = NA) }) } } matrixStats/tests/rowDiffs.R0000644000176200001440000000777014522461565015672 0ustar liggesuserslibrary("matrixStats") rowDiffs_R <- function(x, lag = 1L, differences = 1L, ..., useNames = NA) { ncol2 <- ncol(x) - lag * differences if (ncol2 <= 0) { y <- matrix(x[integer(0L)], nrow = nrow(x), ncol = 0L) # Preserve names attribute if (isTRUE(useNames) && !is.null(rownames(x))) rownames(y) <- rownames(x) return(y) } suppressWarnings({ y <- apply(x, MARGIN = 1L, FUN = diff, lag = lag, differences = differences) }) y <- t(y) # Preserve dimnames attribute dim(y) <- c(nrow(x), ncol2) if (isTRUE(useNames) && !is.null(dimnames(x))) { colnames <- colnames(x) if (!is.null(colnames)) { len <- length(colnames) colnames <- colnames[(len - ncol2 + 1):len] } dimnames(y) <- list(rownames(x), colnames) } else dimnames(y) <- NULL y } set.seed(0x42) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(sample(10 * 8) + 0.1, nrow = 10L, ncol = 8L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } storage.mode(x) <- mode str(x) dimnames <- list(letters[1:10], LETTERS[1:8]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check dimnames attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { for (lag in 1:4) { for (differences in 1:3) { cat(sprintf("mode: %s, lag = %d, differences = %d\n", mode, lag, differences)) # Row/column ranges r0 <- rowDiffs_R(x, lag = lag, differences = differences, useNames = useNames) r1 <- rowDiffs(x, lag = lag, differences = differences, useNames = useNames) r2 <- t(colDiffs(t(x), lag = lag, differences = differences, useNames = useNames)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } } } # for (useNames ...) } # for (setDimnames ...) } # for (add_na ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(NA_real_, nrow = 10L, ncol = 5L) storage.mode(x) <- mode str(x) dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check dimnames attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowDiffs_R(x, useNames = useNames) r1 <- rowDiffs(x, useNames = useNames) r2 <- t(colDiffs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 1L, ncol = 1L) dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check dimnames attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowDiffs_R(x, useNames = useNames) r1 <- rowDiffs(x, useNames = useNames) r2 <- t(colDiffs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } # for (useNames ...) } # for (setDimnames ...) matrixStats/tests/varDiff_etal_subset.R0000644000176200001440000000207014520313754020040 0ustar liggesuserslibrary("matrixStats") fcns <- list( varDiff = varDiff, sdDiff = sdDiff, madDiff = madDiff, iqrDiff = iqrDiff ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") for (name in names(fcns)) { cat(sprintf("subsetted tests on %s()...\n", name)) fcn <- fcns[[name]] for (mode in c("numeric", "integer")) { x <- runif(6, min = -6, max = 6) storage.mode(x) <- mode trim <- runif(1, min = 0, max = 0.5) if (mode == "numeric") x[1] <- Inf for (diff in 1:2) { for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = fcn, fsure = fcn, na.rm = TRUE, diff = diff, trim = trim) validateIndicesTestVector(x, idxs, ftest = fcn, fsure = fcn, na.rm = FALSE, diff = diff, trim = trim) } } } cat(sprintf("%s()...DONE\n", name)) } matrixStats/tests/rowMeans2.R0000644000176200001440000002456514522461565015765 0ustar liggesuserslibrary("matrixStats") rowMeans_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- rowMeans(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } colMeans_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- colMeans(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } for (mode in c("integer", "logical", "double")) { x <- matrix(-4:4, nrow = 3, ncol = 3) storage.mode(x) <- mode if (mode == "double") x <- x + 0.1 # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Single-element matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Single-element matrix:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(1, nrow = 1, ncol = 1) storage.mode(x) <- mode # To check names attribute dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Empty matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Empty matrix:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(integer(0), nrow = 0, ncol = 0) storage.mode(x) <- mode y0 <- rowMeans(x, na.rm = FALSE) y1 <- rowMeans2(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) y0 <- colMeans(x, na.rm = FALSE) y1 <- colMeans2(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NAs:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(NA_integer_, nrow = 3, ncol = 3) storage.mode(x) <- mode # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowMeans2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = TRUE, useNames = useNames) y1 <- colMeans2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NaNs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NaNs:\n") x <- matrix(NA_real_, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowMeans2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = TRUE, useNames = useNames) y1 <- colMeans2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All Infs:\n") x <- matrix(Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All -Infs:\n") x <- matrix(-Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Infs and -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Infs and -Infs:\n") x <- matrix(c(-Inf, +Inf), nrow = 4, ncol = 4) # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: NaNs and NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: NaNs and NAs:\n") x <- matrix(c(NaN, NA_real_), nrow = 4, ncol = 4) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) str(y0) stopifnot(all(is.na(y0)), length(unique(y0)) >= 1L) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) str(y1) stopifnot(all(is.na(y1)), length(unique(y1)) >= 1L) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) stopifnot(all(is.na(y0)), length(unique(y0)) == 1L) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all(is.na(y1)), length(unique(y1)) == 1L) stopifnot(all.equal(y1, y0)) } } ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that y1 is identical to y0. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Integer overflow with ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Integer overflow with ties:\n") x <- matrix(.Machine$integer.max, nrow = 4, ncol = 4) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) cat("Consistency checks:\n") n_sims <- if (Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 4L else 20L for (kk in seq_len(n_sims)) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape dim <- sample(50:200, size = 2) n <- prod(dim) x <- rnorm(n, sd = 100) dim(x) <- dim # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Mode? modes <- "double" if ((kk %% 4) %in% c(2, 0)) { modes <- c("integer", "logical") } for (mode in modes) { if (mode != "double") { cat(sprintf("Coercing from %s to %s\n", storage.mode(x), mode)) storage.mode(x) <- mode } na.rm <- sample(c(TRUE, FALSE), size = 1) # rowMeans2(): y0 <- rowMeans(x, na.rm = na.rm) y1 <- rowMeans2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- colMeans2(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) # colMeans2(): y0 <- colMeans(x, na.rm = na.rm) y1 <- colMeans2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- rowMeans2(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) } } # for (kk ...) matrixStats/tests/psortKM.R0000644000176200001440000000246014520313754015467 0ustar liggesuserslibrary("matrixStats") library("utils") ## utils::str # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - psortKM_R <- function(x, k, m) { x <- sort(x) x[(k - m + 1):k] } psortKM_R2 <- function(x, k, m) { partial <- (k - m + 1):k x <- sort.int(x, partial = partial) x[partial] } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) cat("Consistency checks:\n") x <- 1:30 x[18:20] <- 20 y <- sample(x) cat("x:\n") str(x) cat("sample(x):\n") str(y) for (k in c(1L, 2L, 20L, 21L, length(x))) { for (m in 1:min(5L, k)) { px0 <- psortKM_R(x, k = k, m = m) px0b <- psortKM_R2(x, k = k, m = m) stopifnot(identical(px0b, px0)) px1 <- matrixStats:::.psortKM(x, k = k, m = m) cat(sprintf(".psortKM(x, k = %d, m = %d):\n", k, m)) print(px1) stopifnot(identical(px1, px0)) py0 <- psortKM_R(y, k = k, m = m) py0b <- psortKM_R2(y, k = k, m = m) stopifnot(identical(py0b, py0)) py1 <- matrixStats:::.psortKM(y, k = k, m = m) cat(sprintf(".psortKM(y, k = %d, m = %d):\n", k, m)) print(py1) stopifnot(identical(py1, py0)) stopifnot(identical(py1, px1)) } # for (m ...) } # for (k ...) matrixStats/tests/rowRanks.R0000644000176200001440000001403514522461565015705 0ustar liggesuserslibrary("matrixStats") dense_rank <- function(x) match(x, table = sort(unique(x))) rowRanks_R <- function(x, ties.method, ..., useNames = NA) { if (ties.method == "dense") { res <- t(apply(x, MARGIN = 1L, FUN = dense_rank)) } else { res <- t(apply(x, MARGIN = 1L, FUN = rank, na.last = "keep", ties.method = ties.method)) } # Preserve dimnames attribute? dim(res) <- dim(x) dimnames(res) <- if (isTRUE(useNames)) dimnames(x) else NULL res } colRanks_R <- function(x, ties.method, preserveShape = FALSE, ..., useNames = NA) { if (ties.method == "dense") { res <- t(apply(x, MARGIN = 2L, FUN = dense_rank)) } else { res <- t(apply(x, MARGIN = 2L, FUN = rank, na.last = "keep", ties.method = ties.method)) } # Preserve dimnames attribute? tx <- t(x) dim(res) <- dim(tx) dimnames(res) <- if (isTRUE(useNames)) dimnames(tx) else NULL if (preserveShape) res <- t(res) res } set.seed(1) cat("Consistency checks:\n") xs <- vector("list", length = 4L) for (kk in 1:4) { # Simulate data in a matrix of any shape dim <- sample(40:80, size = 2L) n <- prod(dim) x <- rnorm(n, sd = 10) dim(x) <- dim # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1L) x[sample(length(x), size = nna)] <- NA_real_ } # Integer or double? if ((kk %% 4) %in% c(2, 0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } xs[[kk]] <- x } # for (kk ...) str(xs) for (kk in 1:4) { cat("Random test #", kk, "\n", sep = "") x <- xs[[kk]] tx <- t(x) for (ties in c("max", "min", "average", "first", "last", "dense")) { cat(sprintf("ties.method = %s\n", ties)) # rowRanks(): y1 <- matrixStats::rowRanks(x, ties.method = ties) if (ties != "last" || getRversion() >= "3.3.0") { y2 <- rowRanks_R(x, ties.method = ties) stopifnot(identical(y1, y2)) } y3 <- matrixStats::colRanks(tx, ties.method = ties) stopifnot(identical(y1, y3)) # colRanks(): y1 <- matrixStats::colRanks(x, ties.method = ties) if (ties != "last" || getRversion() >= "3.3.0") { y2 <- colRanks_R(x, ties.method = ties) stopifnot(identical(y1, y2)) } y3 <- matrixStats::rowRanks(tx, ties.method = ties) stopifnot(identical(y1, y3)) } } # for (kk ...) # Note, below we know ties.method %in% c("min", "max", "average") is correct cat("Consistency checks for random:\n") tolerance <- 0.1 nsamples <- 10000 for (kk in 1:4) { cat("Random test #", kk, "\n", sep = "") x <- xs[[kk]] tx <- t(x) for (ties in c("random")) { cat(sprintf("ties.method = %s\n", ties)) ## rowRanks(): y0 <- rowRanks_R(x, ties.method = ties) y1 <- matrixStats::rowRanks(x, ties.method = ties) ## Assert symmetric rank differences d <- y1 - y0 stopifnot(all(rowSums(d) == 0, na.rm = TRUE)) ## Assert within [min, max] y2min <- matrixStats::rowRanks(x, ties.method = "min") y2max <- matrixStats::rowRanks(x, ties.method = "max") stopifnot(all(y1 >= y2min, na.rm = TRUE) && all(y1 <= y2max, na.rm = TRUE)) ## Assert near average y1list <- replicate(nsamples, matrixStats::rowRanks(x, ties.method = ties), simplify = FALSE) y1mean <- Reduce(`+`, y1list) / nsamples y2avg <- matrixStats::rowRanks(x, ties.method = "average") stopifnot(all(abs(y1mean - y2avg) < tolerance, na.rm = TRUE)) ## colRanks(): y0 <- colRanks_R(x, ties.method = ties) y1 <- matrixStats::colRanks(x, ties.method = ties) ## Assert symmetric rank differences d <- y1 - y0 stopifnot(all(rowSums(d) == 0, na.rm = TRUE)) ## Assert within [min, max] y2min <- matrixStats::colRanks(x, ties.method = "min") y2max <- matrixStats::colRanks(x, ties.method = "max") stopifnot(all(y1 >= y2min, na.rm = TRUE) && all(y1 <= y2max, na.rm = TRUE)) y1list <- replicate(nsamples, matrixStats::colRanks(x, ties.method = ties), simplify = FALSE) y1mean <- Reduce(`+`, y1list) / nsamples ## Assert near average y2avg <- matrixStats::colRanks(x, ties.method = "average") stopifnot(all(abs(y1mean - y2avg) < tolerance, na.rm = TRUE)) } } # for (kk ...) ## Exception handling x <- matrix(1:12, nrow = 3L, ncol = 4L) y <- try(rowRanks(x, ties.method = "unknown"), silent = TRUE) stopifnot(inherits(y, "try-error")) y <- try(colRanks(x, ties.method = "unknown"), silent = TRUE) stopifnot(inherits(y, "try-error")) dimnames <- list(letters[1:3], LETTERS[1:4]) for (mode in c("integer", "double")){ storage.mode(x) <- mode # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { for (ties in c("max", "min", "average", "first", "last", "dense", "random")) { cat(sprintf("ties.method = %s\n", ties)) # rowRanks(): y1 <- matrixStats::rowRanks(x, ties.method = ties, useNames = useNames) if (ties != "last" || getRversion() >= "3.3.0") { y2 <- rowRanks_R(x, ties.method = ties, useNames = useNames) stopifnot(identical(y1, y2)) } y3 <- matrixStats::colRanks(t(x), ties.method = ties, useNames = useNames) stopifnot(identical(y1, y3)) # colRanks(): y1 <- matrixStats::colRanks(x, ties.method = ties, useNames = useNames) if (ties != "last" || getRversion() >= "3.3.0") { y2 <- colRanks_R(x, ties.method = ties, useNames = useNames) stopifnot(identical(y1, y2)) } y3 <- matrixStats::rowRanks(t(x), ties.method = ties, useNames = useNames) stopifnot(identical(y1, y3)) # Check preserveShape y1 <- matrixStats::colRanks(x, ties.method = ties, preserveShape = TRUE, useNames = useNames) if (ties != "last" || getRversion() >= "3.3.0") { y2 <- colRanks_R(x, ties.method = ties, preserveShape = TRUE, useNames = useNames) stopifnot(identical(y1, y2)) } } } } } matrixStats/tests/rowVarDiffs.R0000644000176200001440000000641014522461565016331 0ustar liggesuserslibrary("matrixStats") fcns <- list( rowVarDiffs = list(rowVarDiffs, colVarDiffs), rowSdDiffs = list(rowSdDiffs, colSdDiffs), rowMadDiffs = list(rowMadDiffs, colMadDiffs), rowIQRDiffs = list(rowIQRDiffs, colIQRDiffs) ) for (fcn in names(fcns)) { cat(sprintf("%s()...\n", fcn)) row_fcn <- fcns[[fcn]][[1L]] col_fcn <- fcns[[fcn]][[2L]] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50 + 0.1, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r1 <- row_fcn(x, na.rm = na.rm, useNames = useNames) r2 <- col_fcn(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) } } } } # for (add_na ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r1 <- row_fcn(x, na.rm = na.rm, useNames = useNames) r2 <- col_fcn(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 1L, ncol = 1L) dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r1 <- row_fcn(x, na.rm = na.rm, useNames = useNames) r2 <- col_fcn(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) } } } cat(sprintf("%s()...DONE\n", fcn)) } # for (fcn ...) matrixStats/tests/product_subset.R0000644000176200001440000000107214520313754017133 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) storage.mode(x) <- "integer" for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = product, fsure = prod, na.rm = TRUE) validateIndicesTestVector(x, idxs, ftest = product, fsure = prod, na.rm = FALSE) } matrixStats/tests/weightedMedian.R0000644000176200001440000000625014520313754017007 0ustar liggesuserslibrary("matrixStats") x <- 1:5 y <- weightedMedian(x) y <- weightedMedian(x, w = c(NA, Inf, NA, Inf, NA), na.rm = TRUE) print(y) y <- weightedMedian(x, w = c(NA, Inf, NA, Inf, NA), na.rm = FALSE) print(y) stopifnot(is.na(y)) x <- 1:10 n <- length(x) y1 <- median(x) # 5.5 y2 <- weightedMedian(x) # 5.5 stopifnot(all.equal(y1, y2)) w <- rep(1, times = n) y1 <- weightedMedian(x, w) # 5.5 (default) y2a <- weightedMedian(x, ties = "weighted") # 5.5 (default) y2b <- weightedMedian(x, ties = "min") # 5 y2c <- weightedMedian(x, ties = "max") # 6 stopifnot(all.equal(y2a, y1)) y3 <- weightedMedian(x, w) # 5.5 (default) # Pull the median towards zero w[1] <- 5 y1 <- weightedMedian(x, w) # 3.5 y <- c(rep(0, times = w[1]), x[-1]) # Only possible for integer weights y2 <- median(y) # 3.5 stopifnot(all.equal(y1, y2)) # Put even more weight on the zero w[1] <- 8.5 y <- weightedMedian(x, w) # 2 # All weight on the first value w[1] <- Inf y <- weightedMedian(x, w) # 1 # All weight on the last value w[1] <- 1 w[n] <- Inf y <- weightedMedian(x, w) # 10 # All weights set to zero w <- rep(0, times = n) y <- weightedMedian(x, w) # NA x <- 1:4 w <- rep(1, times = 4) for (mode in c("integer", "double")) { storage.mode(x) <- mode for (ties in c("weighted", "mean", "min", "max")) { cat(sprintf("ties = %s\n", ties)) y <- weightedMedian(x, w, ties = ties) } } set.seed(0x42) y <- weightedMedian(x = double(0L)) print(y) stopifnot(length(y) == 1L) stopifnot(is.na(y)) y <- weightedMedian(x = x[1]) print(y) stopifnot(length(y) == 1L) stopifnot(all.equal(y, x[1])) n <- 1e3 x <- runif(n) w <- runif(n, min = 0, max = 1) for (mode in c("integer", "double")) { storage.mode(x) <- mode for (ties in c("weighted", "mean", "min", "max")) { y <- weightedMedian(x, w, ties = ties) cat(sprintf("mode = %s, ties = %s, result = %g\n", mode, ties, y)) } } # A large vector n <- 1e5 x <- runif(n) w <- runif(n, min = 0, max = 1) y <- weightedMedian(x, w) y <- weightedMedian(x, w, ties = "min") # Single Number xs <- c(1, NA_integer_) ws <- c(1, NA_integer_) for (x in xs) { for (w in ws) { y <- weightedMedian(x = x, w = w) if (is.na(w)) z <- NA_real_ else z <- x[1] stopifnot(all.equal(y, z)) } } ## Logical x1 <- c(TRUE, FALSE, TRUE) w0 <- c(0, 0, 0) stopifnot(!is.finite(weightedMedian(x1, w0)), !is.infinite(weightedMedian(x1, w0))) w1 <- c(1, 1, 1) stopifnot(weightedMedian(x1, w1) == 1) w2 <- c(1, 2, 3) stopifnot(weightedMedian(x1, w2) == 1) ### NA stopifnot(is.na(weightedMedian(c(TRUE, FALSE, NA), c(1, 2, 3))), all.equal(weightedMedian(c(TRUE, FALSE, NA), c(1, 2, 3), na.rm = TRUE), weightedMedian(c(TRUE, FALSE), c(1, 2)))) ### Identical to as.integer() x <- rcauchy(100) w <- abs(rcauchy(100)) stopifnot(all.equal(weightedMedian(x > 0, w), weightedMedian(as.integer(x > 0), w))) matrixStats/tests/rowWeightedVars.R0000644000176200001440000002340614522461565017225 0ustar liggesuserslibrary("matrixStats") set.seed(1) x <- matrix(rnorm(20), nrow = 5L, ncol = 4L) print(x) # To check names attribute dimnames <- list(letters[1:5], LETTERS[1:4]) # Weighted row variances (uniform weights - all w = 1) # Non-weighted row variances x_est0 <- rowVars(x) w <- rep(1, times = ncol(x)) x_est1 <- rowWeightedVars(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = w, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowVars(x, useNames = TRUE) if (!matrixStats:::isUseNamesNADefunct()) { x_est1 <- rowWeightedVars(x, w = w, useNames = NA) x_est2 <- colWeightedVars(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) } x_est1 <- rowWeightedVars(x, w = w, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row variances (uniform weights - all w = 3) x3 <- cbind(x, x, x) x_est0 <- rowVars(x3) w <- rep(3, times = ncol(x)) x_est1 <- rowWeightedVars(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = w, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x3 <- cbind(x, x, x) x_est0 <- rowVars(x3, useNames = TRUE) if (!matrixStats:::isUseNamesNADefunct()) { x_est1 <- rowWeightedVars(x, w = w, useNames = NA) x_est2 <- colWeightedVars(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) } x_est1 <- rowWeightedVars(x, w = w, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row variances (excluding some columns) w <- c(1, 1, 0, 1) x_est0 <- rowVars(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedVars(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = w, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowVars(x[, (w == 1), drop = FALSE], useNames = TRUE) if (!matrixStats:::isUseNamesNADefunct()) { x_est1 <- rowWeightedVars(x, w = w, useNames = NA) x_est2 <- colWeightedVars(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) } x_est1 <- rowWeightedVars(x, w = w, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row variances (excluding some columns) w <- c(0, 1, 0, 0) x_est0 <- rowVars(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedVars(x, w = w) #stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = w) stopifnot(all.equal(x_est2, x_est1)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = w, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowVars(x[, (w == 1), drop = FALSE], useNames = TRUE) if (!matrixStats:::isUseNamesNADefunct()) { x_est1 <- rowWeightedVars(x, w = w, useNames = NA) x_est2 <- colWeightedVars(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) } x_est1 <- rowWeightedVars(x, w = w, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row variances (all zero weights) w <- c(0, 0, 0, 0) x_est0 <- rowVars(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedVars(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = w, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowVars(x[, (w == 1), drop = FALSE], useNames = TRUE) if (!matrixStats:::isUseNamesNADefunct()) { x_est1 <- rowWeightedVars(x, w = w, useNames = NA) x_est2 <- colWeightedVars(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) } x_est1 <- rowWeightedVars(x, w = w, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted variances by rows and columns w <- 1:4 # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est1 <- rowWeightedVars(x, w = w, useNames = useNames) print(x_est1) x_est2 <- colWeightedVars(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est2, x_est1)) } } x[sample(length(x), size = 0.3 * length(x))] <- NA print(x) # Non-weighted row variances with missing values x_est0 <- rowVars(x, na.rm = TRUE) x_est1 <- rowWeightedVars(x, w = rep(1, times = ncol(x)), na.rm = TRUE) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = rep(1, times = ncol(x)), na.rm = TRUE) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = rep(1, times = ncol(x)), na.rm = TRUE, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = rep(1, times = ncol(x)), na.rm = TRUE, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowVars(x, na.rm = TRUE, useNames = TRUE) x_est1 <- rowWeightedVars(x, w = rep(1, times = ncol(x)), na.rm = TRUE, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = rep(1, times = ncol(x)), na.rm = TRUE, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row variances with missing values # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est1 <- rowWeightedVars(x, w = w, na.rm = TRUE, useNames = useNames) print(x_est1) x_est2 <- colWeightedVars(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est2, x_est1)) } } # Weighted variances by rows and columns w <- 1:4 # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est1 <- rowWeightedVars(x, w = w, na.rm = TRUE, useNames = useNames) print(x_est1) x_est2 <- colWeightedVars(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est2, x_est1)) } } # Weighted row standard deviation (excluding some columns) w <- c(1, 1, 0, 1) ## FIXME: rowVars()/rowSds() needs na.rm = FALSE (wrong default) x_est0 <- rowSds(x[, (w == 1), drop = FALSE], na.rm = FALSE) x_est1 <- rowWeightedSds(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedSds(t(x), w = w, na.rm = FALSE) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedSds(x, w = w, na.rm = FALSE, useNames = FALSE) x_est2 <- colWeightedSds(t(x), w = w, na.rm = FALSE, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowSds(x[, (w == 1), drop = FALSE], na.rm = FALSE, useNames = TRUE) if (!matrixStats:::isUseNamesNADefunct()) { x_est1 <- rowWeightedSds(x, w = w, na.rm = FALSE, useNames = NA) x_est2 <- colWeightedSds(t(x), w = w, na.rm = FALSE, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) } x_est1 <- rowWeightedSds(x, w = w, na.rm = FALSE, useNames = TRUE) x_est2 <- colWeightedSds(t(x), w = w, na.rm = FALSE, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row MADs (excluding some columns) w <- c(1, 1, 0, 1) x_est0 <- rowMads(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMads(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMads(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedMads(x, w = w, useNames = FALSE) x_est2 <- colWeightedMads(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowMads(x[, (w == 1), drop = FALSE], useNames = TRUE) if (!matrixStats:::isUseNamesNADefunct()) { x_est1 <- rowWeightedMads(x, w = w, useNames = NA) x_est2 <- colWeightedMads(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) } x_est1 <- rowWeightedMads(x, w = w, useNames = TRUE) x_est2 <- colWeightedMads(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL matrixStats/tests/rowWeightedVars_subset.R0000644000176200001440000000341314522461565020606 0ustar liggesuserslibrary("matrixStats") fcns <- list( weightedVar = c(rowWeightedVars, colWeightedVars), weightedSd = c(rowWeightedSds, colWeightedSds), weightedMad = c(rowWeightedMads, colWeightedMads) ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) for (fcn in names(fcns)) { cat(sprintf("subsetted tests on matrix %s()...\n", fcn)) row_fcn <- fcns[[fcn]][[1]] col_fcn <- fcns[[fcn]][[2]] for (mode in c("numeric", "integer")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- mode if (mode == "numeric") w[1] <- Inf # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix_w(x, w, rows, cols, ftest = row_fcn, fsure = row_fcn, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix_w(x, w, rows, cols, fcoltest = col_fcn, fsure = row_fcn, na.rm = na.rm, useNames = useNames) } } } } cat(sprintf("%s()...DONE\n", fcn)) } matrixStats/tests/binCounts_subset.R0000644000176200001440000000174014520313754017421 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - binCounts_hist <- function(x, bx, right = FALSE, ...) { n0 <- graphics::hist(x, breaks = bx, right = right, include.lowest = TRUE, plot = FALSE)$counts } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) storage.mode(x) <- "integer" bx <- c(-6, 0, 3, 4, 10) for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = binCounts, fsure = binCounts_hist, bx = bx, right = FALSE) validateIndicesTestVector(x, idxs, ftest = binCounts, fsure = binCounts_hist, bx = bx, right = TRUE) } matrixStats/tests/anyMissing.R0000644000176200001440000000477314520313754016222 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- data.frame( logical = c(TRUE, FALSE, TRUE), integer = 1:3, double = seq(from = 1.0, to = 3.0, by = 1.0), complex = seq(from = 1.0, to = 3.0, by = 1.0) + 1.0i, character = letters[1:3], stringsAsFactors = FALSE ) modes <- names(data) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("NULL...\n") stopifnot(identical(anyMissing(NULL), FALSE)) cat("NULL...done\n") cat("raw...\n") stopifnot(identical(anyMissing(as.raw(0:2)), FALSE)) cat("raw...done\n") cat("list(NULL)...\n") stopifnot(identical(anyMissing(list(NULL)), FALSE)) cat("list(NULL)...done\n") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Scalars, vectors, and matrices of various modes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in modes) { cat(sprintf("Mode: %s...\n", mode)) values <- data[[mode]] # Scalars cat(" scalar\n") x <- values[1L] print(x) stopifnot(identical(anyMissing(x), FALSE)) is.na(x) <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) # Vectors cat(" vector\n") x <- values print(x) stopifnot(identical(anyMissing(x), FALSE)) is.na(x)[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) # Matrices cat(" matrix\n") x <- matrix(c(values, values), ncol = 2L) print(x) stopifnot(identical(anyMissing(x), FALSE)) is.na(x)[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) cat(sprintf("Mode: %s...done\n", mode)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data frames # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("data.frame...\n") x <- data stopifnot(identical(anyMissing(x), FALSE)) for (mode in modes) { x <- data is.na(x[[mode]])[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) } # for (mode ...) cat("data.frame...done\n") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Lists # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("list...\n") x <- as.list(data) stopifnot(identical(anyMissing(x), FALSE)) for (mode in modes) { x <- as.list(data) is.na(x[[mode]])[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) } # for (mode ...) cat("list...done\n") matrixStats/tests/validateIndices.R0000644000176200001440000000236214520313754017161 0ustar liggesuserslibrary(matrixStats) source("utils/validateIndicesFramework.R") oopts <- options(matrixStats.validateIndices = "ignore") ftest <- function(x, idxs) validateIndices(idxs, length(x)) x <- 1:6 for (idxs in index_cases) { for (mode in c("integer", "numeric", "logical")) { if (!is.null(idxs)) { suppressWarnings({storage.mode(idxs) <- mode}) } validateIndicesTestVector(x, idxs, ftest = ftest, fsure = identity) } } ftest <- function(x, idxs) validateIndices(idxs, length(x)) for (idxs in index_error_cases) { validateIndicesTestVector(x, idxs, ftest = ftest, fsure = identity) } ftest <- function(x, rows, cols) { vr <- validateIndices(rows, dim(x)[1], FALSE) vc <- validateIndices(cols, dim(x)[2], FALSE) stopifnot(all((vr > 0 & vr <= dim(x)[1]) | is.na(vr))) stopifnot(all((vc > 0 & vc <= dim(x)[2]) | is.na(vc))) suppressWarnings(x <- x[vr, vc, drop = FALSE]) x } x <- matrix(1:36, nrow = 6, ncol = 6) for (rows in index_cases) { for (cols in index_cases) { validateIndicesTestMatrix(x, rows, cols, ftest = ftest, fsure = identity) } } # Testing for 64-bit builds (64 bits = 8 bytes) if (.Machine$sizeof.pointer == 8L) { validateIndices(c(1.25*2^40, 3, 1*2^38, 1, 1.4*2^39), maxIdx = 1*2^41) } options(oopts) matrixStats/tests/logSumExp_subset.R0000644000176200001440000000123314520313754017375 0ustar liggesuserslibrary("matrixStats") logSumExp_R <- function(lx, na.rm = FALSE) { log(sum(exp(lx), na.rm = na.rm)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = logSumExp, fsure = logSumExp_R, na.rm = FALSE) validateIndicesTestVector(x, idxs, ftest = logSumExp, fsure = logSumExp_R, na.rm = TRUE) } matrixStats/tests/rowWeightedMedians.R0000644000176200001440000002112114522461565017662 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowWeightedMedians_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = weightedMedian, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as rowWeightedMedians() if (!is.null(w)) { if (isFALSE(useNames)) names(res) <- NULL } else if (is.na(useNames) || !useNames) names(res) <- NULL res } colWeightedMedians_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 2L, FUN = weightedMedian, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as colWeightedMedians() if (!is.null(w)) { if (isFALSE(useNames)) names(res) <- NULL } else if (is.na(useNames) || !useNames) names(res) <- NULL res } set.seed(1) x <- matrix(rnorm(20), nrow = 5, ncol = 4) print(x) # To check names attribute dimnames <- list(letters[1:5], LETTERS[1:4]) # Non-weighted row medians x_est0 <- rowMedians(x) x_est1 <- rowWeightedMedians(x) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x)) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowMedians(x, useNames = useNames) x_est1 <- rowWeightedMedians(x, useNames = useNames) x_est2 <- colWeightedMedians(t(x), useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) } } # Weighted row medians (uniform weights) w <- rep(2.5, times = ncol(x)) x_est0 <- rowMedians(x) x_est1 <- rowWeightedMedians(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row medians (excluding some columns) w <- c(1, 1, 0, 1) x_est0 <- rowMedians(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMedians(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row medians (excluding some columns) w <- c(0, 1, 0, 0) x_est0 <- rowMedians(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMedians(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row medians (all zero weights) w <- c(0, 0, 0, 0) x_est0 <- rowMedians(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMedians(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted medians by rows and columns w <- 1:4 x_est1 <- rowWeightedMedians(x, w = w) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row medians with missing values x_est0 <- apply(x, MARGIN = 1L, FUN = weightedMedian, w = w, na.rm = TRUE) print(x_est0) x_est1 <- rowWeightedMedians(x, w = w, na.rm = TRUE) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, na.rm = TRUE, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, na.rm = TRUE, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted medians by rows and columns w <- 1:4 x_est1 <- rowWeightedMedians(x, w = w, na.rm = TRUE) x_est2 <- colWeightedMedians(t(x), w = w, na.rm = TRUE) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, na.rm = TRUE, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, na.rm = TRUE, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Inf weight x <- matrix(1:2, nrow = 1, ncol = 2) w <- c(7, Inf) x_est1 <- rowWeightedMedians(x, w = w) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(identical(2, x_est1)) stopifnot(identical(2, x_est2)) # Test with and without dimnames on x dimnames <- list("a", LETTERS[1:2]) for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } matrixStats/tests/rowWeightedMedians_subset.R0000644000176200001440000000407014522461565021253 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowWeightedMedians_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = weightedMedian, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as rowWeightedMedians() if (!is.null(w)) { if (isFALSE(useNames)) names(res) <- NULL } else if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) for (mode in c("numeric", "integer", "logical")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- if (mode == "logical") "integer" else mode if (mode == "numeric") w[1] <- Inf # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix_w(x, w, rows, cols, ftest = rowWeightedMedians, fsure = rowWeightedMedians_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix_w(x, w, rows, cols, fcoltest = colWeightedMedians, fsure = rowWeightedMedians_R, na.rm = na.rm, useNames = useNames) } } } } matrixStats/tests/rowMedians_subset.R0000644000176200001440000000305414522461565017573 0ustar liggesuserslibrary("matrixStats") rowMedians_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = median, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } colMedians_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 2L, FUN = median, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowMedians, fsure = rowMedians_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMedians, fsure = rowMedians_R, na.rm = na.rm, useNames = useNames) } } } matrixStats/tests/rowQuantiles_subset.R0000644000176200001440000000410414522461565020155 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowQuantiles_R <- function(x, probs, na.rm = FALSE, drop = TRUE, ..., useNames = NA) { q <- apply(x, MARGIN = 1L, FUN = function(x, probs, na.rm) { if (!na.rm && any(is.na(x))) { na_value <- NA_real_ storage.mode(na_value) <- storage.mode(x) rep(na_value, times = length(probs)) } else { as.vector(quantile(x, probs = probs, na.rm = na.rm, names = FALSE, ...)) } }, probs = probs, na.rm = na.rm) if (!is.null(dim(q))) q <- t(q) else dim(q) <- c(nrow(x), length(probs)) colnames(q) <- matrixStats:::quantile_probs_names(probs) rownames(q) <- rownames(x) if (isFALSE(useNames)) dimnames(q) <- NULL if (drop) q <- drop(q) q } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) dimnames <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) probs <- c(0, 0.25, 0.75, 1) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowQuantiles, fsure = rowQuantiles_R, probs = probs, na.rm = na.rm, drop = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colQuantiles, fsure = rowQuantiles_R, probs = probs, na.rm = na.rm, drop = FALSE, useNames = useNames) } } } matrixStats/tests/rowSds.R0000644000176200001440000001357514522461565015370 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowSds_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ sigma <- apply(x, MARGIN = 1L, FUN = sd, na.rm = na.rm) }) stopifnot(!any(is.infinite(sigma))) # Keep naming support consistency same as rowSds() if (is.null(center) || ncol(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(sigma) <- NULL } else if (isFALSE(useNames)) names(sigma) <- NULL sigma } colSds_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ sigma <- apply(x, MARGIN = 2L, FUN = sd, na.rm = na.rm) }) stopifnot(!any(is.infinite(sigma))) # Keep naming support consistency same as colSds() if (is.null(center) || nrow(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(sigma) <- NULL } if (isFALSE(useNames)) names(sigma) <- NULL sigma } rowSds_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowWeightedMeans(x, cols = cols, na.rm = na.rm, useNames = FALSE) sigma <- rowSds(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(sigma))) sigma } colSds_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colWeightedMeans(x, rows = rows, na.rm = na.rm, useNames = FALSE) sigma <- colSds(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(sigma))) sigma } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50 + 0.1, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowSds_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowSds(x, na.rm = na.rm, useNames = useNames) r1b <- rowSds_center(x, na.rm = na.rm, useNames = useNames) r2 <- colSds(t(x), na.rm = na.rm, useNames = useNames) r2b <- colSds_center(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r2b)) stopifnot( !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r1b)), !any(is.infinite(r2b)) ) } } } } # for (add_na ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowSds_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowSds(x, na.rm = na.rm, useNames = useNames) r1b <- rowSds_center(x, na.rm = na.rm, useNames = useNames) r2 <- colSds(t(x), na.rm = na.rm, useNames = useNames) r2b <- colSds_center(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r2b)) stopifnot( !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r1b)), !any(is.infinite(r2b)) ) } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 1L, ncol = 1L) dimnames <- list("a", "A") for (na.rm in c(FALSE, TRUE)) { cat("na.rm = ", na.rm, "\n", sep = "") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowSds_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowSds(x, na.rm = na.rm, useNames = useNames) r1b <- rowSds_center(x, na.rm = na.rm, useNames = useNames) r2 <- colSds(t(x), na.rm = na.rm, useNames = useNames) r2b <- colSds_center(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r2b)) stopifnot( !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r1b)), !any(is.infinite(r2b)) ) } } } } matrixStats/tests/mean2.R0000644000176200001440000001241214520313754015070 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) mean2_R <- function(x, na.rm = FALSE, idxs = NULL) { if (is.null(idxs)) { mean(x, na.rm = na.rm) } else { mean(x[idxs], na.rm = na.rm) } } # mean2_R() cat("Consistency checks:\n") for (kk in 1:20) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape n <- sample(100L, size = 1L) x <- rnorm(n, sd = 100) # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1L) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Integer or double? if ((kk %% 4) %in% c(2, 0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } na.rm <- sample(c(TRUE, FALSE), size = 1L) # Sum over all y0 <- mean2_R(x, na.rm = na.rm) y1 <- mean2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) # Sum over subset nidxs <- sample(n, size = 1L) idxs <- sample(n, size = nidxs) y0 <- mean2_R(x, na.rm = na.rm, idxs = idxs) y1 <- mean2(x, na.rm = na.rm, idxs = idxs) stopifnot(all.equal(y1, y0)) if (storage.mode(x) == "integer") { storage.mode(x) <- "logical" y0 <- mean2_R(x, na.rm = na.rm) y1 <- mean2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y0 <- mean2_R(x, na.rm = na.rm, idxs = idxs) y1 <- mean2(x, na.rm = na.rm, idxs = idxs) stopifnot(all.equal(y1, y0)) } } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (na.rm in c(FALSE, TRUE)) { # Averaging over zero elements (integers) x <- integer(0) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) x <- 1:5 idxs <- integer(0) s1 <- mean(x[idxs], na.rm = na.rm) s2 <- mean2(x, idxs = idxs, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over NA_integer_:s x <- rep(NA_integer_, times = 5L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) x <- rep(NA_integer_, times = 5L) idxs <- 1:3 s1 <- mean(x[idxs], na.rm = na.rm) s2 <- mean2(x, idxs = idxs, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over zero elements (doubles) x <- double(0) s1 <- mean(x) s2 <- mean2(x) stopifnot(identical(s1, s2)) x <- as.double(1:10) idxs <- integer(0) s1 <- mean(x[idxs]) s2 <- mean2(x, idxs = idxs) stopifnot(identical(s1, s2)) # Averaging over NA_real_:s x <- rep(NA_real_, times = 5L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) x <- rep(NA_real_, times = 5L) idxs <- 1:3 s1 <- mean(x[idxs], na.rm = na.rm) s2 <- mean2(x, idxs = idxs, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over -Inf:s x <- rep(-Inf, times = 3L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over +Inf:s x <- rep(+Inf, times = 3L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over mix of -Inf:s and +Inf:s x <- rep(c(-Inf, +Inf), times = 3L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over mix of -Inf:s and +Inf:s and numerics x <- rep(c(-Inf, +Inf, 3.14), times = 2L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over mix of NaN, NA, +Inf, and numerics x <- c(NaN, NA, +Inf, 3.14) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) if (na.rm) { stopifnot(identical(s2, s1)) } else { stopifnot(is.na(s1), is.na(s2)) ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that s1 is identical to s0. } # Averaging over mix of NaN, NA_real_, +Inf, and numerics x <- c(NA_real_, NaN, +Inf, 3.14) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) if (na.rm) { stopifnot(identical(s2, s1)) } else { stopifnot(is.na(s1), is.na(s2)) ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that s1 is identical to s0. } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'idxs' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:5 idxs_list <- list( integer = 1:3, double = as.double(1:3), logical = (x <= 3) ) for (idxs in idxs_list) { cat("idxs:\n") str(idxs) s1 <- mean(x[idxs], na.rm = TRUE) s2 <- mean2(x, idxs = idxs, na.rm = TRUE) stopifnot(identical(s1, s2)) } matrixStats/tests/rowProds_subset.R0000644000176200001440000000272214522461565017303 0ustar liggesuserslibrary("matrixStats") rowProds_R <- function(x, FUN = prod, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = FUN, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowProds, fsure = rowProds_R, method = "expSumLog", FUN = product, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colProds, fsure = rowProds_R, method = "expSumLog", FUN = product, na.rm = na.rm, useNames = useNames) } } } } } matrixStats/tests/count_subset.R0000644000176200001440000000166614520313754016614 0ustar liggesuserslibrary("matrixStats") count_R <- function(x, value = TRUE, na.rm = FALSE, ...) { if (is.na(value)) { counts <- sum(is.na(x)) } else { counts <- sum(x == value, na.rm = na.rm) } as.integer(counts) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -3, max = 3) storage.mode(x) <- "integer" for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = count, fsure = count_R, value = 0, na.rm = TRUE) validateIndicesTestVector(x, idxs, ftest = count, fsure = count_R, value = 0, na.rm = FALSE) validateIndicesTestVector(x, idxs, ftest = count, fsure = count_R, value = NA_integer_) } matrixStats/tests/benchmark.R0000644000176200001440000000075614520313754016030 0ustar liggesusers## 1. Don't test with valgrind ## 2. Test only R (>= 3.0.2) because of that's what knitr requires if (getRversion() >= "3.0.2" && Sys.getenv("_R_CHECK_USE_VALGRIND_") == "") { ## 3. Make sure all suggested packages are installed / can be loaded pkgs <- c("base64enc", "ggplot2", "knitr", "markdown", "microbenchmark", "R.devices", "R.rsp") if (all(unlist(lapply(pkgs, FUN = requireNamespace)))) { html <- matrixStats:::benchmark("binCounts") print(html) } rm(list = "pkgs") } matrixStats/tests/weightedVar.R0000644000176200001440000000475514520313754016352 0ustar liggesuserslibrary("matrixStats") weightedVar_R <- function(x, w) { mu <- weighted.mean(x, w = w) sum(w * (x - mu) ^ 2) / (sum(w) - 1) } n <- 10 x <- as.double(1:n) message("*** weightedVar() ...") message("- Zero elements") m0 <- var(integer(0)) m1 <- weightedVar(integer(0), w = integer(0)) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- One elements") m0 <- var(1) m1 <- weightedVar(1) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Uniform weights (all w = 1)") m0 <- var(x) w <- rep(1, times = n) m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Uniform weights (all w = 3)") m0 <- var(rep(x, each = 3)) w <- rep(3, times = n) m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Uniform weights on the first five elements") idxs <- 1:5 m0 <- var(x[1:5]) w <- rep(0, times = n) w[idxs] <- 1 m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Uniform weights on every second elements") idxs <- seq(from = 1, to = n, by = 2) m0 <- var(x[idxs]) w <- rep(0, times = n) w[idxs] <- 1 m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- All weights are zero") idxs <- integer(0L) m0 <- var(x[idxs]) w <- rep(0, times = n) w[idxs] <- 1 m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Infinite weight on first element") idxs <- 1L m0 <- var(x[idxs]) w <- rep(0, times = n) w[idxs] <- Inf m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Missing-value weight on first element") idxs <- 1L w <- rep(1, times = n) w[idxs] <- NA_real_ m1 <- weightedVar(x, w = w) str(list(m1 = m1)) stopifnot(identical(m1, NA_real_)) message("- Frequency weights") ## From https://en.wikipedia.org/wiki/Weighted_arithmetic_mean y <- c(2, 2, 4, 5, 5, 5) x <- unique(y) w <- table(y) stopifnot(names(w) == x) m0 <- weightedVar(x, w = w) m1 <- var(y) stopifnot(all.equal(m1, m0)) m2 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1, m2 = m2)) stopifnot(all.equal(m2, m0)) ## From https://github.com/HenrikBengtsson/matrixStats/issues/72 large <- c(21, 8, 26, 1, 15, 33, 12, 25, 0, 84) years <- c(41706, 9301, 33678, 3082, 27040, 44188, 10049, 30591, 2275, 109831) m0 <- weightedVar(large, w = years) m1 <- weightedVar(large, w = years) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("*** weightedVar() ... DONE") matrixStats/tests/rowCounts_subset.R0000644000176200001440000000663314522461565017474 0ustar liggesuserslibrary("matrixStats") rowCounts_R <- function(x, value = TRUE, na.rm = FALSE, ..., useNames = NA) { if (is.na(value)) { counts <- apply(x, MARGIN = 1L, FUN = function(x) sum(is.na(x)) ) } else { counts <- apply(x, MARGIN = 1L, FUN = function(x) sum(x == value, na.rm = na.rm) ) } # Preserve names attribute names <- names(counts) counts <- as.integer(counts) if (isTRUE(useNames) && !is.null(names)) names(counts) <- names counts } # rowCounts_R() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) x[2:3, 3:4] <- NA_real_ storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCounts, fsure = rowCounts_R, value = 0, na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colCounts, fsure = rowCounts_R, value = 0, na.rm = TRUE, useNames = useNames) for (value in c(0, NA_integer_)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCounts, fsure = rowCounts_R, value = value, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colCounts, fsure = rowCounts_R, value = value, useNames = useNames) } } } } } x <- matrix(rep(letters, length.out = 6 * 6), nrow = 6, ncol = 6) x[2:3, 3:4] <- NA_character_ # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowCounts, fsure = rowCounts_R, value = "g", na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colCounts, fsure = rowCounts_R, value = "g", na.rm = TRUE, useNames = useNames) for (value in c("g", NA_character_)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCounts, fsure = rowCounts_R, value = value, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colCounts, fsure = rowCounts_R, value = value, useNames = useNames) } } } } matrixStats/tests/utils/0000755000176200001440000000000014436154117015105 5ustar liggesusersmatrixStats/tests/utils/validateIndicesFramework.R0000644000176200001440000001302314436154117022175 0ustar liggesuserslibrary("matrixStats") validateIndicesTestVector <- function(x, idxs, ftest, fsure, debug = FALSE, ...) { if (debug) cat(sprintf("idxs=%s, type=%s\n", toString(idxs), toString(typeof(idxs)))) suppressWarnings({ actual <- tryCatch(ftest(x, idxs = idxs, ...), error = function(c) "error") expect <- tryCatch({ if (!is.null(idxs)) x <- x[idxs] fsure(x, ...) }, error = function(c) "error") }) if (debug) cat(sprintf("actual=%s\nexpect=%s\n", toString(actual), toString(expect))) stopifnot(all.equal(actual, expect)) } validateIndicesTestVector_w <- function(x, w, idxs, ftest, fsure, debug = FALSE, ...) { if (debug) cat(sprintf("idxs=%s, type=%s\n", toString(idxs), toString(typeof(idxs)))) suppressWarnings({ actual <- tryCatch(ftest(x, w, idxs = idxs, ...), error = function(c) "error") expect <- tryCatch({ if (!is.null(idxs)) { x <- x[idxs] w <- w[idxs] } fsure(x, w, ...) }, error = function(c) "error") }) if (debug) cat(sprintf("actual=%s\nexpect=%s\n", toString(actual), toString(expect))) stopifnot(all.equal(actual, expect)) } validateIndicesTestMatrix <- function(x, rows, cols, ftest, fcoltest, fsure, debug = FALSE, ...) { if (debug) { cat(sprintf("rows=%s; type=%s\n", toString(rows), toString(typeof(rows)))) cat(sprintf("cols=%s; type=%s\n", toString(cols), toString(typeof(cols)))) } suppressWarnings({ if (missing(fcoltest)) { actual <- tryCatch(ftest(x, rows = rows, cols = cols, ...), error = function(c) "error") } else { actual <- tryCatch(fcoltest(t(x), rows = cols, cols = rows, ...), error = function(c) "error") } expect <- tryCatch({ if (!is.null(rows) && !is.null(cols)) { x <- x[rows, cols, drop = FALSE] } else if (!is.null(rows)) { x <- x[rows, , drop = FALSE] } else if (!is.null(cols)) { x <- x[, cols, drop = FALSE] } fsure(x, ...) }, error = function(c) "error") }) if (debug) cat(sprintf("actual=%s\nexpect=%s\n", toString(actual), toString(expect))) stopifnot(all.equal(actual, expect)) } validateIndicesTestMatrix_w <- function(x, w, rows, cols, ftest, fcoltest, fsure, debug = FALSE, ...) { if (debug) { cat(sprintf("rows=%s; type=%s\n", toString(rows), toString(typeof(rows)))) cat(sprintf("cols=%s; type=%s\n", toString(cols), toString(typeof(cols)))) } suppressWarnings({ if (missing(fcoltest)) { actual <- tryCatch(ftest(x, w, rows = rows, cols = cols, ...), error = function(c) "error") } else { actual <- tryCatch(fcoltest(t(x), w, rows = cols, cols = rows, ...), error = function(c) "error") } expect <- tryCatch({ if (!is.null(rows) && !is.null(cols)) { x <- x[rows, cols, drop = FALSE] w <- w[cols] } else if (!is.null(rows)) { x <- x[rows, , drop = FALSE] } else if (!is.null(cols)) { x <- x[, cols, drop = FALSE] w <- w[cols] } fsure(x, w, ...) }, error = function(c) "error") }) if (debug) cat(sprintf("actual=%s\nexpect=%s\n", toString(actual), toString(expect))) stopifnot(all.equal(actual, expect)) } index_cases <- list() # negative indices with duplicates index_cases[[length(index_cases) + 1]] <- c(-4, 0, 0, -3, -1, -3, -1) # positive indices index_cases[[length(index_cases) + 1]] <- c(3, 5, 1) # positive indices with duplicates index_cases[[length(index_cases) + 1]] <- c(3, 0, 0, 5, 1, 5, 5) # positive indices out of ranges index_cases[[length(index_cases) + 1]] <- 4:9 # negative out of ranges: just ignore index_cases[[length(index_cases) + 1]] <- c(-5, 0, -3, -1, -9) # negative indices exclude all index_cases[[length(index_cases) + 1]] <- -1:-6 # idxs is single number index_cases[[length(index_cases) + 1]] <- 4 index_cases[[length(index_cases) + 1]] <- -4 index_cases[[length(index_cases) + 1]] <- 0 # idxs is empty index_cases[[length(index_cases) + 1]] <- integer() # NA in idxs index_cases[[length(index_cases) + 1]] <- c(NA_real_, 0, 2) # Inf in idxs index_cases[[length(index_cases) + 1]] <- c(-Inf, -1) index_cases[[length(index_cases) + 1]] <- c(NA_real_, 0, 2, Inf) # single logical index_cases[[length(index_cases) + 1]] <- NA index_cases[[length(index_cases) + 1]] <- TRUE index_cases[[length(index_cases) + 1]] <- FALSE # full logical idxs index_cases[[length(index_cases) + 1]] <- c(FALSE, TRUE, FALSE, TRUE, TRUE, FALSE) # too many logical idxs index_cases[[length(index_cases) + 1]] <- c(FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE) # insufficient idxs index_cases[[length(index_cases) + 1]] <- c(FALSE, TRUE) index_cases[[length(index_cases) + 1]] <- c(FALSE, TRUE, NA) index_cases[[length(index_cases) + 1]] <- c(FALSE, TRUE, NA, FALSE) # NULL index_cases[length(index_cases) + 1] <- list(NULL) index_error_cases <- list() # mixed positive and negative indices index_error_cases[[length(index_cases) + 1]] <- 1:-1 # mixed positive, negative and zero indices index_error_cases[[length(index_cases) + 1]] <- c(-4, 0, 0, 1) # NA in idxs index_error_cases[[length(index_cases) + 1]] <- c(NA_real_, -2) matrixStats/tests/weightedMean.R0000644000176200001440000000447014520313754016474 0ustar liggesuserslibrary("matrixStats") for (mode in c("logical", "integer", "double")) { cat("mode: ", mode, "", sep = "") n <- 2L x <- runif(n, min = -5, max = 5) if (mode == "logical") { x <- x > 0 } storage.mode(x) <- mode str(x) cat("All weights are 1\n") w <- rep(1, times = n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) cat("First weight is 5\n") # Pull the mean towards zero w[1] <- 5 str(w) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) cat("All weights are 0\n") # All weights set to zero w <- rep(0, times = n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) cat("First weight is 8.5\n") # Put even more weight on the zero w[1] <- 8.5 m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) cat("First weight is Inf\n") # All weight on the first value w[1] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) cat("Last weight is Inf\n") # All weight on the last value w[1] <- 1 w[n] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) } # for (mode ...) message("*** Testing for missing values") # NA tests xs <- list(A = c(1, 2, 3), B = c(NA, 2, 3), C = c(NA, 2, 3)) ws <- list(A = c(1, 1, 1), B = c(NA, 1, 1), C = c(1, NA, 1)) ## NOTE: The (B, B) case with na.rm = TRUE is interesting because ## even if NAs in 'w' are not dropped by na.rm = TRUE, this one ## is because 'x' is dropped and therefore that first element ## is skipped in the computation. It basically does ## keep <- !is.na(x); x <- x[keep]; w <- w[keep] ## without looking at 'w'. for (x in xs) { for (mode in c("logical", "integer", "double")) { storage.mode(x) <- mode for (w in ws) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("mode: %s, na.rm = %s\n", mode, na.rm)) str(list(x = x, w = w)) m0 <- weighted.mean(x, w, na.rm = na.rm) m1 <- weightedMean(x, w, na.rm = na.rm) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) } } } } matrixStats/tests/sum2_subset.R0000644000176200001440000000106214520313754016340 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) storage.mode(x) <- "integer" for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = sum2, fsure = sum, na.rm = FALSE) validateIndicesTestVector(x, idxs, ftest = sum2, fsure = sum, na.rm = TRUE) } matrixStats/tests/rowAvgsPerColSet_subset.R0000644000176200001440000000266114520313754020671 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) #W <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) for (rows in index_cases) { for (cols in index_cases) { if (is.null(rows)) { rows <- seq_len(nrow(x)) rows_finite <- rows } else { rows_finite <- rows[is.finite(rows)] } if (is.null(cols)) { cols <- seq_len(ncol(x)) cols_finite <- cols } else { cols_finite <- cols[is.finite(cols)] } suppressWarnings({ actual <- tryCatch({ rowAvgsPerColSet(x, rows = rows, S = matrix(cols, ncol = 1), FUN = rowMeans) }, error = function(c) "error") expect <- tryCatch({ rowMeans(x[rows, cols_finite, drop = FALSE], na.rm = TRUE) }, error = function(c) "error") }) stopifnot(all.equal(as.vector(actual), expect)) suppressWarnings({ actual <- tryCatch({ colAvgsPerRowSet(x, cols = cols, S = matrix(rows, ncol = 1), FUN = colMeans) }, error = function(c) "error") expect <- tryCatch({ colMeans(x[rows_finite, cols, drop = FALSE], na.rm = TRUE) }, error = function(c) "error") }) stopifnot(all.equal(as.vector(actual), expect)) } } matrixStats/tests/rowMads.R0000644000176200001440000002400514522461565015511 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") rowMads_R <- function(x, na.rm = FALSE, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = mad, na.rm = na.rm) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } colMads_R <- function(x, na.rm = FALSE, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 2L, FUN = mad, na.rm = na.rm) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } rowMads_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowMedians(x, cols = cols, na.rm = na.rm, useNames = FALSE) rowMads(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) } colMads_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colMedians(x, rows = rows, na.rm = na.rm, useNames = FALSE) colMads(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 3x3 matrix (no ties) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(c(1, 2, 3, 2, 3, 4, 3, 4, 5) + 0.1, nrow = 3, ncol = 3) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check name attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("rowMads():\n") center <- rowMedians(x, na.rm = TRUE, useNames = useNames) r0 <- rowMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- rowMads(x, na.rm = TRUE, useNames = useNames) r1b <- rowMads_center(x, na.rm = TRUE, useNames = useNames) r1c <- rowMads(x, center = center, na.rm = TRUE, useNames = useNames) r2 <- colMads(tx, na.rm = TRUE, useNames = useNames) r2b <- colMads_center(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r1c, r1)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r2b, r2)) cat("colMads():\n") center <- colMedians(x, na.rm = TRUE, useNames = useNames) r0 <- colMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- colMads(x, na.rm = TRUE, useNames = useNames) r1b <- colMads_center(x, na.rm = TRUE, useNames = useNames) r1c <- colMads(x, center = center, na.rm = TRUE, useNames = useNames) r2 <- rowMads(tx, na.rm = TRUE, useNames = useNames) r2b <- rowMads_center(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r1c, r1)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r2b, r2)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Ties: a 4x4 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(c(1:4, 2:5, 3:6, 4:7) + 0.1, nrow = 4, ncol = 4) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) tx <- t(x) # To check name attribute dimnames <- list(letters[1:4], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("rowMads():\n") r0 <- rowMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- rowMads(x, na.rm = TRUE, useNames = useNames) r2 <- colMads(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) cat("colMads():\n") r0 <- colMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- colMads(x, na.rm = TRUE, useNames = useNames) r2 <- rowMads(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } } } tx <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # No ties: a 3x3 matrix with an NA value # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(c(1, 2, 3, 2, 3, 4, 3, 4, 5) + 0.1, nrow = 3, ncol = 3) x[2, 2] <- NA_real_ cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) tx <- t(x) # To check name attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("rowMads():\n") r0 <- rowMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- rowMads(x, na.rm = TRUE, useNames = useNames) r2 <- colMads(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) cat("colMads():\n") r0 <- colMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- colMads(x, na.rm = TRUE, useNames = useNames) r2 <- rowMads(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } } } tx <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } # To check name attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") cat("rowMads():\n") center <- rowMedians(x, na.rm = na.rm, useNames = useNames) r0 <- rowMads_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowMads(x, na.rm = na.rm, useNames = useNames) r2 <- rowMads(x, center = center, na.rm = na.rm, useNames = useNames) r3 <- colMads(tx, na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r2, r1)) stopifnot(all.equal(r3, r0)) stopifnot(all.equal(r3, r1)) stopifnot(all.equal(r3, r2)) cat("colMads():\n") center <- colMedians(x, na.rm = na.rm, useNames = useNames) r0 <- colMads_R(x, na.rm = na.rm, useNames = useNames) r1 <- colMads(x, na.rm = na.rm, useNames = useNames) r2 <- colMads(x, center = center, na.rm = na.rm, useNames = useNames) r3 <- rowMads(tx, na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r2, r1)) stopifnot(all.equal(r3, r0)) stopifnot(all.equal(r3, r1)) stopifnot(all.equal(r3, r2)) } } } tx <- NULL } # for (add_na ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(NA_real_, nrow = 10L, ncol = 5L) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowMads_R(x, na.rm = na.rm, useNames = useNames) if (na.rm) r0[is.na(r0)] <- NaN r1 <- rowMads(x, na.rm = na.rm, useNames = useNames) r2 <- colMads(tx, na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } } } tx <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 1L, ncol = 1L) dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowMads_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowMads(x, na.rm = na.rm, useNames = useNames) r2 <- colMads(tx, na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 0x0 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(double(0), nrow = 0, ncol = 0) tx <- t(x) for (na.rm in c(FALSE, TRUE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowMads_R(x, na.rm = na.rm) r1 <- rowMads(x, na.rm = na.rm) r2 <- colMads(tx, na.rm = na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } matrixStats/tests/signTabulate.R0000644000176200001440000000203614520313754016511 0ustar liggesuserslibrary("matrixStats") signTabulate0 <- function(x, ...) { nneg <- sum(x < 0, na.rm = TRUE) nzero <- sum(x == 0, na.rm = TRUE) npos <- sum(x > 0, na.rm = TRUE) nna <- sum(is.na(x)) nneginf <- sum(is.infinite(x) & x < 0, na.rm = TRUE) nposinf <- sum(is.infinite(x) & x > 0, na.rm = TRUE) res <- c(nneg, nzero, npos, nna, nneginf, nposinf) res <- as.double(res) names(res) <- c("-1", "0", "+1", "NA", "-Inf", "+Inf") if (is.integer(x)) res <- res[1:4] res } # signTabulate0() # Simulate data set.seed(0xBEEF) n <- 100L x <- runif(n) x[sample(n, size = 0.1 * n)] <- 0 x[sample(n, size = 0.1 * n)] <- NA_real_ x[sample(n, size = 0.1 * n)] <- -Inf x[sample(n, size = 0.1 * n)] <- +Inf # Doubles message("Doubles:") counts0 <- signTabulate0(x) print(counts0) counts1 <- signTabulate(x) print(counts1) stopifnot(identical(counts1, counts0)) # Integers message("Integers:") x <- suppressWarnings(as.integer(x)) counts0 <- signTabulate0(x) print(counts0) counts1 <- signTabulate(x) print(counts1) stopifnot(identical(counts1, counts0)) matrixStats/tests/rowCollapse.R0000644000176200001440000000542314522461565016372 0ustar liggesuserslibrary("matrixStats") x <- matrix(1:27, ncol = 3) # To check names attribute dimnames <- list(letters[1:9], LETTERS[1:3]) rowCollapse_R <- function(x, idxs, ..., useNames = NA) { res <- x[, idxs] # Preserve names attribute? if (is.na(useNames) || !useNames) names(res) <- NULL res } idxs <- 1L # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y_truth <- rowCollapse_R(x, idxs, useNames = useNames) y <- rowCollapse(x, idxs, useNames = useNames) stopifnot(identical(y, y_truth)) y2 <- colCollapse(t(x), idxs, useNames = useNames) stopifnot(identical(y2, y)) } } idxs <- 2L # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y_truth <- rowCollapse_R(x, idxs, useNames = useNames) y <- rowCollapse(x, idxs, useNames = useNames) stopifnot(identical(y, y_truth)) y2 <- colCollapse(t(x), idxs, useNames = useNames) stopifnot(identical(y2, y)) } } rowCollapse_R <- function(x, idxs, ..., useNames = NA) { res <- c(x[1:5, 1], x[6:9, 3]) # Preserve names attribute? if (is.na(useNames) || !useNames) names(res) <- NULL res } idxs <- c(1, 1, 1, 1, 1, 3, 3, 3, 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y_truth <- rowCollapse_R(x, idxs, useNames = useNames) y <- rowCollapse(x, idxs, useNames = useNames) stopifnot(identical(y, y_truth)) y2 <- colCollapse(t(x), idxs, useNames = useNames) stopifnot(identical(y2, y)) } } rowCollapse_R <- function(x, idxs, ..., useNames = NA) { res <- c(x[1, 1], x[2, 2], x[3, 3], x[4, 1], x[5, 2], x[6, 3], x[7, 1], x[8, 2], x[9, 3]) # Preserve names attribute? if (isTRUE(useNames)) { names <- rownames(x) if (!is.null(names)) names(res) <- names } res } idxs <- 1:3 # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y_truth <- rowCollapse_R(x, idxs, useNames = useNames) y <- rowCollapse(x, idxs, useNames = useNames) stopifnot(identical(y, y_truth)) y2 <- colCollapse(t(x), idxs, useNames = useNames) stopifnot(identical(y2, y)) } } matrixStats/tests/rowAllAnys.R0000644000176200001440000002514014522461565016171 0ustar liggesuserslibrary("matrixStats") rowAlls_R <- function(x, value = TRUE, na.rm = FALSE, ..., useNames = NA) { if (is.na(value)) { res <- apply(is.na(x), MARGIN = 1L, FUN = all, na.rm = na.rm) } else { y <- x == value # Preserve dimnames attribute dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!isTRUE(all.equal(dim(y), dim))) { dim(y) <- dim dimnames(y) <- dimnames(x) } res <- apply(y, MARGIN = 1L, FUN = all, na.rm = na.rm) } if (is.na(useNames) || !useNames) names(res) <- NULL res } rowAnys_R <- function(x, value = TRUE, na.rm = FALSE, ..., useNames = NA) { if (is.na(value)) { res <- apply(is.na(x), MARGIN = 1L, FUN = any, na.rm = na.rm) } else { y <- x == value # Preserve dimnames attribute dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!isTRUE(all.equal(dim(y), dim))) { dim(y) <- dim dimnames(y) <- dimnames(x) } res <- apply(y, MARGIN = 1L, FUN = any, na.rm = na.rm) } if (is.na(useNames) || !useNames) names(res) <- NULL res } rowAnyMissings_R <- function(x, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = anyMissing) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: logical # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(FALSE, nrow = 10L, ncol = 4L) x[7:8, 2:3] <- TRUE x[1:3, ] <- TRUE x[, 1] <- TRUE x[4, ] <- FALSE x[, 4] <- FALSE x[2, ] <- FALSE x[3, ] <- TRUE # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:4]) for (kk in 1:3) { if (kk == 2) { x[2, 2] <- NA } else if (kk == 3) { x[, 2] <- NA x[2, ] <- NA } # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { dimnames(x) <- if (setDimnames) dimnames else NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { m0 <- rowAlls_R(x, na.rm = na.rm, useNames = useNames) m1 <- rowAlls(x, na.rm = na.rm, useNames = useNames) m2 <- colAlls(t(x), na.rm = na.rm, useNames = useNames) str(list("all()", m0 = m0, m1 = m1, m2 = m2)) stopifnot(identical(m1, m0)) stopifnot(identical(m2, m0)) m0 <- rowAnys_R(x, na.rm = na.rm, useNames = useNames) m1 <- rowAnys(x, na.rm = na.rm, useNames = useNames) m2 <- colAnys(t(x), na.rm = na.rm, useNames = useNames) str(list("any()", m0 = m0, m1 = m1, m2 = m2)) stopifnot(identical(m1, m0)) stopifnot(identical(m2, m0)) m0 <- rowAnyMissings_R(x, useNames = useNames) m1 <- rowAnyMissings(x, useNames = useNames) m2 <- colAnyMissings(t(x), useNames = useNames) str(list("anyMissing()", m0 = m0, m1 = m1, m2 = m2)) stopifnot(identical(m1, m0)) stopifnot(identical(m2, m0)) } } } } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: integer # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(rep(1:6, length.out = 4 * 5), nrow = 4L, ncol = 5L) x[2, ] <- 7L x[3, 1] <- 7L x[2:3, 3:4] <- NA_integer_ # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:5]) # Row/column counts value <- 7L # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { dimnames(x) <- if (setDimnames) dimnames else NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowAlls_R(x, value = value, na.rm = na.rm, useNames = useNames) r1 <- rowAlls(x, value = value, na.rm = na.rm, useNames = useNames) r2 <- colAlls(t(x), value = value, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) if (!is.na(useNames) && !useNames && !setDimnames) { for (rr in seq_len(nrow(x))) { c <- allValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[rr])) c <- allValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[rr])) } } r0 <- rowAnys_R(x, value = value, na.rm = na.rm, useNames = useNames) r1 <- rowAnys(x, value = value, na.rm = na.rm, useNames = useNames) r2 <- colAnys(t(x), value = value, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) if (!is.na(useNames) && !useNames && !setDimnames) { for (rr in seq_len(nrow(x))) { c <- anyValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[rr])) c <- anyValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[rr])) } } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # rowAlls(x) et al. on numeric 'x' with logical 'value' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 4L, ncol = 5L) x[2:4, 2] <- (1:3) / 4 x[2, 2:4] <- (1:3) / 4 x[3:4, 3] <- (3:4) / 4 x[3, 3:4] <- (3:4) / 4 x[1:4, 5] <- (1:4) / 5 x[4, 4] <- NA_real_ # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:5]) for (value in c(TRUE, FALSE)) { for (na.rm in c(FALSE, TRUE)) { y0 <- suppressWarnings(apply(x, MARGIN = 1L, FUN = function(e) any(as.logical(e) == value, na.rm = na.rm))) y <- rowAnys(x, na.rm = na.rm, value = value) stopifnot(identical(y, y0)) # Check names attribute dimnames(x) <- dimnames y <- rowAnys(x, na.rm = na.rm, value = value, useNames = FALSE) stopifnot(all.equal(y, y0)) y0 <- suppressWarnings(apply(x, MARGIN = 1L, FUN = function(e) any(as.logical(e) == value, na.rm = na.rm))) y <- rowAnys(x, na.rm = na.rm, value = value, useNames = TRUE) stopifnot(all.equal(y, y0)) dimnames(x) <- NULL y0 <- suppressWarnings(apply(x, MARGIN = 2L, FUN = function(e) any(as.logical(e) == value, na.rm = na.rm))) y <- colAnys(x, na.rm = na.rm, value = value) stopifnot(identical(y, y0)) # Check names attribute dimnames(x) <- dimnames y <- colAnys(x, na.rm = na.rm, value = value, useNames = FALSE) stopifnot(all.equal(y, y0)) y0 <- suppressWarnings(apply(x, MARGIN = 2L, FUN = function(e) any(as.logical(e) == value, na.rm = na.rm))) y <- colAnys(x, na.rm = na.rm, value = value, useNames = TRUE) stopifnot(all.equal(y, y0)) dimnames(x) <- NULL y0 <- suppressWarnings(apply(x, MARGIN = 1L, FUN = function(e) all(as.logical(e) == value, na.rm = na.rm))) y <- rowAlls(x, na.rm = na.rm, value = value) stopifnot(identical(y, y0)) # Check names attribute dimnames(x) <- dimnames y <- rowAlls(x, na.rm = na.rm, value = value, useNames = FALSE) stopifnot(all.equal(y, y0)) y0 <- suppressWarnings(apply(x, MARGIN = 1L, FUN = function(e) all(as.logical(e) == value, na.rm = na.rm))) y <- rowAlls(x, na.rm = na.rm, value = value, useNames = TRUE) stopifnot(all.equal(y, y0)) dimnames(x) <- NULL y0 <- suppressWarnings(apply(x, MARGIN = 2L, FUN = function(e) all(as.logical(e) == value, na.rm = na.rm))) y <- colAlls(x, na.rm = na.rm, value = value) stopifnot(identical(y, y0)) print(y0) # Check names attribute dimnames(x) <- dimnames y <- colAlls(x, na.rm = na.rm, value = value, useNames = FALSE) stopifnot(all.equal(y, y0)) y0 <- suppressWarnings(apply(x, MARGIN = 2L, FUN = function(e) all(as.logical(e) == value, na.rm = na.rm))) y <- colAlls(x, na.rm = na.rm, value = value, useNames = TRUE) stopifnot(all.equal(y, y0)) dimnames(x) <- NULL } ## for (na.rm ...) } ## for(value ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: character (not sure if this should be supported) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - all_R <- function(x, value = TRUE, ...) { if (is.na(value)) { all(is.na(x), ...) } else { all(x == value, ...) } } any_R <- function(x, value = TRUE, ...) { if (is.na(value)) { any(is.na(x), ...) } else { any(x == value, ...) } } x <- matrix(rep(letters, length.out = 10 * 5), nrow = 10L, ncol = 5L) x[2, ] <- "g" x[2:4, 3:4] <- NA_character_ # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { dimnames(x) <- if (setDimnames) dimnames else NULL # Row/column counts for (value in c("g", NA_character_)) { for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowAlls_R(x, value = value, na.rm = na.rm, useNames = useNames) r1 <- rowAlls(x, value = value, na.rm = na.rm, useNames = useNames) r2 <- colAlls(t(x), value = value, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) if (!is.na(useNames) && !useNames && !setDimnames) { for (rr in seq_len(nrow(x))) { c0 <- all_R(x[rr, ], value, na.rm = na.rm) c <- allValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[rr])) stopifnot(identical(c, c0)) } } r0 <- rowAnys_R(x, value = value, na.rm = na.rm, useNames = useNames) r1 <- rowAnys(x, value = value, na.rm = na.rm, useNames = useNames) r2 <- colAnys(t(x), value = value, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) if (!is.na(useNames) && !useNames && !setDimnames) { for (rr in seq_len(nrow(x))) { c0 <- any_R(x[rr, ], value, na.rm = na.rm) c <- anyValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, c0)) stopifnot(identical(c, r1[rr])) } } } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # NA 0 test # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 3L, ncol = 3L) x[1, ] <- c(NA_real_, NA_real_, 0) x[3, ] <- c(1, 0, 1) dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { dimnames(x) <- if (setDimnames) dimnames else NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowAnys_R(x, value = 0, useNames = useNames) r1 <- rowAnys(x, value = 0, useNames = useNames) stopifnot(identical(r0, r1)) } } matrixStats/tests/rowWeightedMeans.R0000644000176200001440000002441214522461565017353 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowWeightedMeans_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = weighted.mean, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as rowWeightedMeans() idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (na.rm) na.rm <- anyMissing(x) if ((!is.null(w) && nw == 0L) || isFALSE(na.rm)) { if (is.na(useNames) || !useNames) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } colWeightedMeans_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 2L, FUN = weighted.mean, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as colWeightedMeans() idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (!is.null(w) && nw == 0L) { if (is.na(useNames) || !useNames) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } set.seed(1) x <- matrix(rnorm(20), nrow = 5, ncol = 4) print(x) # To check names attribute dimnames <- list(letters[1:5], LETTERS[1:4]) # Non-weighted row averages x_est0 <- rowMeans(x) x_est1 <- rowWeightedMeans(x) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x)) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedMeans(x, useNames = FALSE) x_est2 <- colWeightedMeans(t(x), useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowMeans(x) x_est1 <- rowWeightedMeans(x, useNames = TRUE) x_est2 <- colWeightedMeans(t(x), useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row averages (uniform weights) w <- rep(2.5, times = ncol(x)) x_est0 <- rowMeans(x) x_est1 <- rowWeightedMeans(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row averages (excluding some columns) w <- c(1, 1, 0, 1) x_est0 <- rowMeans(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMeans(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row averages (excluding some columns) w <- c(0, 1, 0, 0) x_est0 <- rowMeans(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMeans(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row averages (all zero weights) w <- c(0, 0, 0, 0) x_est0 <- rowMeans(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMeans(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted averages by rows and columns w <- 1:4 x_est1 <- rowWeightedMeans(x, w = w) print(x_est1) x_est2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } x[sample(length(x), size = 0.3 * length(x))] <- NA print(x) # Non-weighted row averages with missing values x_est0 <- rowMeans(x, na.rm = TRUE) x_est1 <- rowWeightedMeans(x, na.rm = TRUE) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), na.rm = TRUE) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedMeans(x, na.rm = TRUE, useNames = FALSE) x_est2 <- colWeightedMeans(t(x), na.rm = TRUE, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowMeans(x, na.rm = TRUE) x_est1 <- rowWeightedMeans(x, na.rm = TRUE, useNames = TRUE) x_est2 <- colWeightedMeans(t(x), na.rm = TRUE, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row averages with missing values x_est0 <- apply(x, MARGIN = 1L, FUN = weighted.mean, w = w, na.rm = TRUE) print(x_est0) x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), w = w, na.rm = TRUE) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, na.rm = TRUE, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, na.rm = TRUE, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted averages by rows and columns w <- 1:4 x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE) x_est2 <- colWeightedMeans(t(x), w = w, na.rm = TRUE) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, na.rm = TRUE, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, na.rm = TRUE, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # w contains missing value w[1] <- NA_integer_ x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE) x_est2 <- colWeightedMeans(t(x), w = w, na.rm = TRUE) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, na.rm = TRUE, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, na.rm = TRUE, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } x_est1 <- rowWeightedMeans(x, w = w, na.rm = FALSE) x_est2 <- colWeightedMeans(t(x), w = w, na.rm = FALSE) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, na.rm = FALSE, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, na.rm = FALSE, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } matrixStats/tests/rowRanges_subset.R0000644000176200001440000000533314522461565017434 0ustar liggesuserslibrary("matrixStats") rowMins_R <- function(x, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = min, ...) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } # rowMins_R() rowMaxs_R <- function(x, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = max, ...) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } # rowMaxs_R() rowRanges_R <- function(x, ..., useNames = NA) { suppressWarnings({ ans <- t(apply(x, MARGIN = 1L, FUN = range, ...)) }) # Preserve rownames attribute dim <- c(dim(x)[1], 2L) if (!isTRUE(all.equal(dim(ans), dim))) { dim(ans) <- dim rownames <- rownames(x) if (!is.null(dimnames)) rownames(ans) <- rownames } if (is.na(useNames) || !useNames) dimnames(ans) <- NULL ans } # rowRanges_R() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check rownames/names attributes dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowRanges, fsure = rowRanges_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowMins, fsure = rowMins_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowMaxs, fsure = rowMaxs_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colRanges, fsure = rowRanges_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMins, fsure = rowMins_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMaxs, fsure = rowMaxs_R, na.rm = na.rm, useNames = useNames) } } } matrixStats/tests/rowAvgsPerColSet.R0000644000176200001440000001055114520313754017301 0ustar liggesuserslibrary("matrixStats") X <- matrix(rnorm(20 * 6), nrow = 20, ncol = 6) rownames(X) <- LETTERS[1:nrow(X)] colnames(X) <- letters[1:ncol(X)] print(X) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply rowMeans() for 3 sets of 2 columns # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 3L S <- matrix(1:ncol(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s%d", 1:nbr_of_sets) print(S) Z <- rowAvgsPerColSet(X, S = S) print(Z) # Validation Z0 <- cbind(s1 = rowMeans(X[, 1:2]), s2 = rowMeans(X[, 3:4]), s3 = rowMeans(X[, 5:6])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply colMeans() for 5 sets of 4 rows # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 5L S <- matrix(1:nrow(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s%d", 1:nbr_of_sets) print(S) Z <- colAvgsPerRowSet(X, S = S) print(Z) # Validation Z0 <- rbind(s1 = colMeans(X[1:4, ]), s2 = colMeans(X[5:8, ]), s3 = colMeans(X[9:12, ]), s4 = colMeans(X[13:16, ]), s5 = colMeans(X[17:20, ])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # When there is only one "complete" set # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 1L S <- matrix(1:ncol(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s%d", 1:nbr_of_sets) print(S) Z <- rowAvgsPerColSet(X, S = S, FUN = rowMeans) print(Z) Z0 <- rowMeans(X) stopifnot(identical(drop(Z), Z0)) nbr_of_sets <- 1L S <- matrix(1:nrow(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s%d", 1:nbr_of_sets) print(S) Z <- colAvgsPerRowSet(X, S = S, FUN = colMeans) print(Z) Z0 <- colMeans(X) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Use weights # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 3L S <- matrix(1:ncol(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s%d", 1:nbr_of_sets) print(S) W <- matrix(runif(length(X)), nrow = nrow(X), ncol = ncol(X)) Z1 <- rowAvgsPerColSet(X, W = W, S = S, FUN = rowWeightedMeans) print(Z1) Z2 <- colAvgsPerRowSet(X, W = W, S = S, FUN = colWeightedMeans) print(Z2) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Result should always be a matrix, including when nrow(X) <= 1 # (https://github.com/HenrikBengtsson/matrixStats/issues/108) # - - - - - - - - - - - - - - - - - - - - - - - - - - X <- matrix(1:3, nrow = 1L, ncol = 3L) S <- matrix(1, nrow = 1L, ncol = 1L) Z1 <- rowAvgsPerColSet(X, S = S) stopifnot(is.matrix(Z1)) Z2 <- rowAvgsPerColSet(X, S = S, rows = 0) stopifnot(is.matrix(Z2)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Works with many, one or zero columns / rows # (https://github.com/HenrikBengtsson/matrixStats/issues/172) # - - - - - - - - - - - - - - - - - - - - - - - - - - S <- cbind(1:2, 3:4, 5:6) X <- matrix(rnorm(2 * 6), nrow = 6, ncol = 2) Z2 <- colAvgsPerRowSet(X, S = S, FUN = colSums2) Z2_ref <- rbind(colSums2(X[S[,1], ,drop=FALSE]), colSums2(X[S[,2], ,drop=FALSE]), colSums2(X[S[,3], ,drop=FALSE])) stopifnot(identical(Z2, Z2_ref)) X <- matrix(rnorm(6), nrow = 6, ncol = 1) Z1 <- colAvgsPerRowSet(X, S = S, FUN = colSums2) Z1_ref <- rbind(colSums2(X[S[,1], ,drop=FALSE]), colSums2(X[S[,2], ,drop=FALSE]), colSums2(X[S[,3], ,drop=FALSE])) stopifnot(identical(Z1, Z1_ref)) X <- matrix(numeric(0), nrow = 6, ncol = 0) Z0 <- colAvgsPerRowSet(X, S = S, FUN = colSums2) Z0_ref <- matrix(numeric(0), nrow = ncol(S), ncol = 0) stopifnot(identical(Z0, unname(Z0_ref))) S <- rbind(1:4, 5:8) X <- matrix(rnorm(n = 2 * 8), nrow = 2, ncol = 8) Z2 <- rowAvgsPerColSet(X, S = S, FUN = rowMeans2) Z2_ref <- cbind(rowMeans2(X[,S[,1],drop=FALSE]), rowMeans2(X[,S[,2],drop=FALSE]), rowMeans2(X[,S[,3],drop=FALSE]), rowMeans2(X[,S[,4],drop=FALSE])) stopifnot(identical(Z2, Z2_ref)) X <- matrix(rnorm(n = 8), nrow = 1, ncol = 8) Z1 <- rowAvgsPerColSet(X, S = S, FUN = rowMeans2) Z1_ref <- cbind(rowMeans2(X[,S[,1],drop=FALSE]), rowMeans2(X[,S[,2],drop=FALSE]), rowMeans2(X[,S[,3],drop=FALSE]), rowMeans2(X[,S[,4],drop=FALSE])) stopifnot(identical(Z1, Z1_ref)) X <- matrix(numeric(0), nrow = 0, ncol = 8) Z0 <- rowAvgsPerColSet(X, S = S, FUN = rowMeans2) Z0_ref <- matrix(numeric(0), nrow = 0, ncol = ncol(S)) stopifnot(identical(Z0, Z0_ref)) matrixStats/tests/weightedMean_subset.R0000644000176200001440000000146114520313754020056 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") for (mode in c("numeric", "integer")) { x <- runif(6, min = -6, max = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- mode if (mode == "numeric") w[1] <- Inf for (idxs in index_cases) { validateIndicesTestVector_w(x, w, idxs, ftest = weightedMean, fsure = weighted.mean, na.rm = TRUE, refine = TRUE) validateIndicesTestVector_w(x, w, idxs, ftest = weightedMean, fsure = weighted.mean, na.rm = FALSE, refine = TRUE) } } matrixStats/tests/rowRanges.R0000644000176200001440000001673014522461565016052 0ustar liggesuserslibrary("matrixStats") rowMins_R <- function(x, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = min, ...) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } # rowMins_R() rowMaxs_R <- function(x, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = max, ...) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } # rowMaxs_R() rowRanges_R <- function(x, ..., useNames = NA) { suppressWarnings({ ans <- t(apply(x, MARGIN = 1L, FUN = range, ...)) }) # Preserve rownames attribute dim <- c(dim(x)[1], 2L) if (!isTRUE(all.equal(dim(ans), dim))) { dim(ans) <- dim rownames <- rownames(x) if (!is.null(dimnames)) rownames(ans) <- rownames } if (is.na(useNames) || !useNames) dimnames(ans) <- NULL ans } # rowRanges_R() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50 + 0.1, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } storage.mode(x) <- mode str(x) # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column extremes for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") # Ranges cat("range:\n") r0 <- rowRanges_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowRanges(x, na.rm = na.rm, useNames = useNames) r2 <- colRanges(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) # Min cat("min:\n") m0 <- rowMins_R(x, na.rm = na.rm, useNames = useNames) m1 <- rowMins(x, na.rm = na.rm, useNames = useNames) m2 <- colMins(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(m1, m2)) stopifnot(all.equal(m1, m0)) # Max cat("max:\n") m0 <- rowMaxs_R(x, na.rm = na.rm, useNames = useNames) m1 <- rowMaxs(x, na.rm = na.rm, useNames = useNames) m2 <- colMaxs(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(m1, m2)) stopifnot(all.equal(m1, m0)) } } } } # for (add_na ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(NA_real_, nrow = 10L, ncol = 5L) storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowRanges_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowRanges(x, na.rm = na.rm, useNames = useNames) r2 <- colRanges(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) } } } } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Nx0 matrix x <- matrix(double(0L), nrow = 5L, ncol = 0L) r0 <- rowRanges_R(x) #r1 <- rowRanges(x) #r_truth <- matrix(c(Inf, -Inf), nrow = nrow(x), ncol = 2L, byrow = TRUE) #stopifnot(all.equal(r1, r_truth)) # 0xN matrix x <- t(x) #r1 <- colRanges(x) #stopifnot(all.equal(r1, r_truth)) # Nx1 matrix x <- matrix(1:5, nrow = 5L, ncol = 1L) # To check names attribute dimnames <- list(letters[1:5], "A") r1 <- rowRanges(x) r_truth <- matrix(1:5, nrow = nrow(x), ncol = 2L, byrow = FALSE) stopifnot(all.equal(r1, r_truth)) # Check names attribute dimnames(x) <- dimnames r0 <- rowRanges_R(x, useNames = TRUE) r1 <- rowRanges(x, useNames = TRUE) stopifnot(all.equal(r1, r0)) dimnames(x) <- NULL # 1xN matrix x <- t(x) r1 <- colRanges(x) stopifnot(all.equal(r1, r_truth)) # Check names attribute dimnames(x) <- list("a", LETTERS[1:5]) r1 <- colRanges(x, useNames = TRUE) stopifnot(identical(rownames(r1), colnames(x))) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Additional tests with NA_integer_, NA_real, NaN, -Inf, +Inf # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1:12, nrow = 4L, ncol = 3L) na_list <- list( "integer" = matrix(1:12, nrow = 4L, ncol = 3L), "integer w/ NA" = matrix(NA_integer_, nrow = 4L, ncol = 3L), "real" = matrix(as.double(1:12), nrow = 4L, ncol = 3L), "real w/ NA" = matrix(NA_real_, nrow = 4L, ncol = 3L) ) na <- na_list[["real"]] na[2, 2] <- NA na_list[["real + NA cell"]] <- na na <- na_list[["real"]] na[2, ] <- NA na_list[["real + NA row"]] <- na na <- na_list[["real"]] na[2, ] <- NaN na_list[["real + NaN row"]] <- na na <- na_list[["real"]] na[2, 2] <- Inf na_list[["real + Inf cell"]] <- na na <- na_list[["real"]] na[2, ] <- Inf na_list[["real + Inf row"]] <- na na <- na_list[["real"]] na[2, 2] <- NaN na_list[["real + NaN cell"]] <- na na <- na_list[["real w/ NA"]] na[2, 2] <- NaN na_list[["real w/ NA + NaN cell"]] <- na na <- na_list[["real w/ NA"]] na[2, ] <- NaN na_list[["real w/ NA + NaN row"]] <- na # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (na.rm in c(FALSE, TRUE)) { for (name in names(na_list)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { na <- na_list[[name]] cat(sprintf("%s (%s) w/ na.rm = %s:\n", name, typeof(na), na.rm)) print(na) cat(" min:\n") y0 <- rowMins_R(na, na.rm = na.rm, useNames = useNames) str(y0) y1 <- rowMins(na, na.rm = na.rm, useNames = useNames) str(y1) stopifnot(all.equal(y1, y0)) y1c <- colMins(t(na), na.rm = na.rm, useNames = useNames) str(y1c) stopifnot(all.equal(y1c, y1)) cat(" max:\n") y0 <- rowMaxs_R(na, na.rm = na.rm, useNames = useNames) str(y0) y1 <- rowMaxs(na, na.rm = na.rm, useNames = useNames) str(y1) stopifnot(all.equal(y1, y0)) y1c <- colMaxs(t(na), na.rm = na.rm, useNames = useNames) str(y1c) stopifnot(all.equal(y1c, y1)) cat(" range:\n") y0 <- rowRanges_R(na, na.rm = na.rm, useNames = useNames) str(y0) y1 <- rowRanges(na, na.rm = na.rm, useNames = useNames) str(y1) stopifnot(all.equal(y1, y0)) y1c <- colRanges(t(na), na.rm = na.rm, useNames = useNames) str(y1c) stopifnot(all.equal(y1c, y1)) } } # for (name ...) } # for (na.rm ...) } matrixStats/tests/rowCumprods_subset.R0000644000176200001440000000256514522461565020015 0ustar liggesuserslibrary("matrixStats") rowCumprods_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cumprod)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames(y) <- if (isTRUE(useNames)) dimnames(x) else NULL y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check dimnames attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCumprods, fsure = rowCumprods_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = function(x, rows, cols, ..., useNames) { t(colCumprods(t(x), rows = cols, cols = rows, useNames = useNames)) }, fsure = rowCumprods_R, useNames = useNames) } } } } matrixStats/tests/rowTabulates.R0000644000176200001440000001026714522461565016556 0ustar liggesuserslibrary("matrixStats") nrow <- 6L ncol <- 5L data <- matrix(0:4, nrow = nrow, ncol = ncol) # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:5]) modes <- c("integer", "logical", "raw") for (mode in modes) { cat(sprintf("Mode: %s...\n", mode)) x <- data if (mode == "logical") x <- x - 2L if (mode != "raw") x[c(2,5,7)] <- NA_integer_ storage.mode(x) <- mode print(x) unique_values <- unique(as.vector(x)) nbr_of_unique_values <- length(unique_values) y <- rowTabulates(x) print(y) stopifnot( identical(dim(y), c(nrow, nbr_of_unique_values)), all(y >= 0) ) if (mode != "raw") { y0 <- t(table(x, row(x), useNA = "always")[, seq_len(nrow(x))]) stopifnot(all(y == y0)) } # Check names attribute dimnames(x) <- dimnames y1 <- rowTabulates(x, useNames = FALSE) stopifnot(all.equal(y1, y)) if (!matrixStats:::isUseNamesNADefunct()) { y2 <- rowTabulates(x, useNames = NA) stopifnot(all.equal(y2, y)) } y <- rowTabulates(x, useNames = TRUE) stopifnot(identical(rownames(y), rownames(x))) dimnames(x) <- NULL y <- colTabulates(x) print(y) stopifnot( identical(dim(y), c(ncol, nbr_of_unique_values)), all(y >= 0) ) if (mode != "raw") { y0 <- t(table(x, col(x), useNA = "always")[, seq_len(ncol(x))]) stopifnot(all(y == y0)) } # Check names attribute dimnames(x) <- dimnames y1 <- colTabulates(x, useNames = FALSE) stopifnot(all.equal(y1, y)) if (!matrixStats:::isUseNamesNADefunct()) { y2 <- colTabulates(x, useNames = NA) stopifnot(all.equal(y2, y)) } y <- colTabulates(x, useNames = TRUE) stopifnot(identical(rownames(y), colnames(x))) dimnames(x) <- NULL # Count only certain values if (mode == "integer") { subset <- c(0:2, NA_integer_) } else if (mode == "logical") { subset <- c(TRUE, FALSE, NA) } else { subset <- c(0:2) } y <- rowTabulates(x, values = subset) print(y) stopifnot(identical(dim(y), c(nrow, length(subset)))) # Check names attribute dimnames(x) <- dimnames y1 <- rowTabulates(x, values = subset, useNames = FALSE) stopifnot(all.equal(y1, y)) if (!matrixStats:::isUseNamesNADefunct()) { y2 <- rowTabulates(x, values = subset, useNames = NA) stopifnot(all.equal(y2, y)) } y <- rowTabulates(x, values = subset, useNames = TRUE) stopifnot(identical(rownames(y), rownames(x))) dimnames(x) <- NULL y <- colTabulates(x, values = subset) print(y) stopifnot(identical(dim(y), c(ncol, length(subset)))) # Check names attribute dimnames(x) <- dimnames y1 <- colTabulates(x, values = subset, useNames = FALSE) stopifnot(all.equal(y1, y)) if (!matrixStats:::isUseNamesNADefunct()) { y2 <- colTabulates(x, values = subset, useNames = NA) stopifnot(all.equal(y2, y)) } y <- colTabulates(x, values = subset, useNames = TRUE) stopifnot(identical(rownames(y), colnames(x))) dimnames(x) <- NULL # Raw if (mode %in% c("integer", "raw")) { subset <- c(0:2) y <- rowTabulates(x, values = as.raw(subset)) print(y) stopifnot(identical(dim(y), c(nrow, length(subset)))) # Check names attribute dimnames(x) <- dimnames y1 <- rowTabulates(x, values = as.raw(subset), useNames = FALSE) stopifnot(all.equal(y1, y)) if (!matrixStats:::isUseNamesNADefunct()) { y2 <- rowTabulates(x, values = as.raw(subset), useNames = NA) stopifnot(all.equal(y2, y)) } y3 <- rowTabulates(x, values = as.raw(subset), useNames = TRUE) stopifnot(identical(rownames(y3), rownames(x))) dimnames(x) <- NULL y2 <- colTabulates(t(x), values = as.raw(subset)) print(y2) stopifnot( identical(dim(y2), c(nrow, length(subset))), identical(y2, y) ) # Check names attribute dimnames(x) <- dimnames y1 <- colTabulates(t(x), values = as.raw(subset), useNames = FALSE) stopifnot(all.equal(y1, y)) if (!matrixStats:::isUseNamesNADefunct()) { y2 <- colTabulates(t(x), values = as.raw(subset), useNames = NA) stopifnot(all.equal(y2, y)) } y <- colTabulates(t(x), values = as.raw(subset), useNames = TRUE) stopifnot(identical(rownames(y), colnames(t(x)))) dimnames(x) <- NULL } cat(sprintf("Mode: %s...done\n", mode)) } # for (mode ...) matrixStats/tests/rowMedians.R0000644000176200001440000002370614522461565016214 0ustar liggesuserslibrary("matrixStats") rowMedians_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = median, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } colMedians_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 2L, FUN = median, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Non-ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Non-ties:\n") for (mode in c("integer", "double")) { x <- matrix(1:9 + 0.1, nrow = 3, ncol = 3) storage.mode(x) <- mode # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Ties:\n") for (mode in c("integer", "double")) { x <- matrix(1:16 + 0.1, nrow = 4, ncol = 4) storage.mode(x) <- mode # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Single-element matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Single-element matrix:\n") for (mode in c("integer", "double")) { x <- matrix(1, nrow = 1, ncol = 1) storage.mode(x) <- mode # To check names attribute dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Empty matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Empty matrix:\n") for (mode in c("integer", "double")) { x <- matrix(integer(0), nrow = 0, ncol = 0) storage.mode(x) <- mode y0 <- rowMedians_R(x, na.rm = FALSE) y1 <- rowMedians(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE) y1 <- colMedians(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NAs:\n") for (mode in c("integer", "double")) { x <- matrix(NA_integer_, nrow = 3, ncol = 3) storage.mode(x) <- mode # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowMedians(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = TRUE, useNames = useNames) y1 <- colMedians(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NaNs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NaNs:\n") x <- matrix(NA_real_, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowMedians(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = TRUE, useNames = useNames) y1 <- colMedians(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All Infs:\n") x <- matrix(Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All -Infs:\n") x <- matrix(-Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Infs and -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Infs and -Infs:\n") x <- matrix(c(-Inf, +Inf), nrow = 4, ncol = 4) # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Integer overflow with ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Integer overflow with ties:\n") x <- matrix(.Machine$integer.max, nrow = 4, ncol = 4) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) cat("Consistency checks:\n") n_sims <- if (Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 4L else 20L for (kk in seq_len(n_sims)) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape dim <- sample(50:200, size = 2) n <- prod(dim) x <- rnorm(n, sd = 100) dim(x) <- dim # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Integer or double? if ((kk %% 4) %in% c(2, 0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } na.rm <- sample(c(TRUE, FALSE), size = 1) # rowMedians(): y0 <- rowMedians_R(x, na.rm = na.rm) y1 <- rowMedians(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- colMedians(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) # colMedians(): y0 <- colMedians_R(x, na.rm = na.rm) y1 <- colMedians(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- rowMedians(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) } # for (kk ...) matrixStats/tests/diff2.R0000644000176200001440000000207014522461565015065 0ustar liggesuserslibrary("matrixStats") diff2_R <- function(..., useNames = NA){ res <- diff(...) if (is.na(useNames) || !useNames) names(res) <- NULL res } set.seed(0x42) for (mode in c("integer", "double")) { x <- rnorm(10, sd = 5) storage.mode(x) <- mode str(x) for (has_na in c(FALSE, TRUE)) { for (setNames in c(TRUE, FALSE)) { for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { if (has_na) { x[sample(1:10, size = 3)] <- NA } if (setNames) { names(x) <- LETTERS[1:10] } for (l in 1:3) { for (d in 1:4) { cat(sprintf("%s: NAs = %s, lag = %d, differences = %d, setNames = %d, useNames = %d\n", mode, has_na, l, d, setNames, useNames)) y0 <- diff2_R(x, lag = l, differences = d, useNames = useNames) str(y0) y1 <- diff2(x, lag = l, differences = d, useNames = useNames) str(y1) stopifnot(identical(y1, y0)) } } } } } # for (has_na ...) } matrixStats/tests/rowCumsums_subset.R0000644000176200001440000000255714522461565017656 0ustar liggesuserslibrary("matrixStats") rowCumsums_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cumsum)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames(y) <- if (isTRUE(useNames)) dimnames(x) else NULL y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check dimnames attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCumsums, fsure = rowCumsums_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = function(x, rows, cols, ..., useNames) { t(colCumsums(t(x), rows = cols, cols = rows, useNames = useNames)) }, fsure = rowCumsums_R, useNames = useNames) } } } } matrixStats/tests/rowVarDiffs_var,sd_subset.R0000644000176200001440000000343014522461565021170 0ustar liggesuserslibrary("matrixStats") fcns <- list( varDiff = c(rowVarDiffs, colVarDiffs), sdDiff = c(rowSdDiffs, colSdDiffs) ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) trim <- runif(1, min = 0, max = 0.5) for (fcn in names(fcns)) { cat(sprintf("subsetted tests on %s()...\n", fcn)) row_fcn <- fcns[[fcn]][[1L]] col_fcn <- fcns[[fcn]][[2L]] for (mode in c("numeric", "integer")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6L, ncol = 6L) storage.mode(x) <- mode if (mode == "numeric") x[1:2, 3:4] <- Inf # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (diff in 1:2) { for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = row_fcn, fsure = row_fcn, na.rm = na.rm, diff = diff, trim = trim, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = col_fcn, fsure = row_fcn, na.rm = na.rm, diff = diff, trim = trim, useNames = useNames) } } } } } cat(sprintf("%s()...DONE\n", fcn)) } matrixStats/tests/rowVarDiffs_mad,iqr_subset.R0000644000176200001440000000341714522461565021333 0ustar liggesuserslibrary("matrixStats") fcns <- list( madDiff = c(rowMadDiffs, colMadDiffs), iqrDiff = c(rowIQRDiffs, colIQRDiffs) ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) trim <- runif(1, min = 0, max = 0.5) for (fcn in names(fcns)) { cat(sprintf("subsetted tests on %s()...\n", fcn)) row_fcn <- fcns[[fcn]][[1L]] col_fcn <- fcns[[fcn]][[2L]] for (mode in c("numeric", "integer")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6L, ncol = 6L) storage.mode(x) <- mode if (mode == "numeric") x[1:2, 3:4] <- Inf # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (diff in 1:2) { for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = row_fcn, fsure = row_fcn, na.rm = na.rm, diff = diff, trim = trim, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = col_fcn, fsure = row_fcn, na.rm = na.rm, diff = diff, trim = trim, useNames = useNames) } } } } } cat(sprintf("%s()...DONE\n", fcn)) } matrixStats/tests/rowTabulates_subset.R0000644000176200001440000000266714522461565020150 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowTabulates, fsure = rowTabulates, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowTabulates, fsure = rowTabulates, values = 1:3, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = colTabulates, fsure = colTabulates, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = colTabulates, fsure = colTabulates, values = 1:3, useNames = useNames) } } } matrixStats/tests/rowCumMinMaxs_subset.R0000644000176200001440000000426014522461565020234 0ustar liggesuserslibrary("matrixStats") rowCummins_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cummin)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames(y) <- if (isTRUE(useNames)) dimnames(x) else NULL y } rowCummaxs_R <- function(x, ..., useNames = NA) { mode <- storage.mode(x) # Change mode because a bug is detected on cummax for integer in R-3.2.0 storage.mode(x) <- "numeric" suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cummax)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames(y) <- if (isTRUE(useNames)) dimnames(x) else NULL storage.mode(y) <- mode y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check dimnames attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCummins, fsure = rowCummins_R, useNames = useNames,verbose=TRUE) validateIndicesTestMatrix(x, rows, cols, ftest = function(x, rows, cols, ..., useNames) { t(colCummins(t(x), rows = cols, cols = rows, useNames = useNames)) }, fsure = rowCummins_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowCummaxs, fsure = rowCummaxs_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = function(x, rows, cols, ..., useNames) { t(colCummaxs(t(x), rows = cols, cols = rows, useNames = useNames)) }, fsure = rowCummaxs_R, useNames = useNames) } } } } matrixStats/tests/rowCumMinMaxs.R0000644000176200001440000001564014522461565016653 0ustar liggesuserslibrary("matrixStats") rowCummins_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cummin)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames(y) <- if (isTRUE(useNames)) dimnames(x) else NULL y } rowCummaxs_R <- function(x, ..., useNames = NA) { mode <- storage.mode(x) # Change mode because a bug is detected on cummax for integer in R-3.2.0 storage.mode(x) <- "numeric" suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cummax)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames(y) <- if (isTRUE(useNames)) dimnames(x) else NULL y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50, nrow = 10L, ncol = 5L) diag(x) <- 0 if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check dimnames attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCummins_R(x, useNames = useNames) r1 <- rowCummins(x, useNames = useNames) r2 <- t(colCummins(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x, useNames = useNames) r1 <- rowCummaxs(x, useNames = useNames) r2 <- t(colCummaxs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (add_na ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowCummins_R(x, useNames = useNames) r1 <- rowCummins(x, useNames = useNames) r2 <- t(colCummins(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x, useNames = useNames) r1 <- rowCummaxs(x, useNames = useNames) r2 <- t(colCummaxs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { x <- matrix(0, nrow = 1L, ncol = 1L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check dimnames attribute dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowCummins_R(x, useNames = useNames) r1 <- rowCummins(x, useNames = useNames) r2 <- t(colCummins(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x, useNames = useNames) r1 <- rowCummaxs(x, useNames = useNames) r2 <- t(colCummaxs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { cat("mode: ", mode, "\n", sep = "") value <- 0 storage.mode(value) <- mode value0 <- if (mode == "logical") 0L else value # A 0x0 matrix x <- matrix(value, nrow = 0L, ncol = 0L) str(x) r0 <- matrix(value0, nrow = nrow(x), ncol = ncol(x)) r1 <- rowCummins(x) r2 <- t(colCummins(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A 0xK matrix x <- matrix(value, nrow = 0L, ncol = 5L) str(x) colnames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) colnames(x) <- colnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowCummins_R(x, useNames = useNames) r1 <- rowCummins(x, useNames = useNames) r2 <- t(colCummins(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x, useNames = useNames) r1 <- rowCummaxs(x, useNames = useNames) r2 <- t(colCummaxs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) # A Nx0 matrix x <- matrix(value, nrow = 5L, ncol = 0L) str(x) rownames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) rownames(x) <- rownames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowCummins_R(x, useNames = useNames) r1 <- rowCummins(x, useNames = useNames) r2 <- t(colCummins(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x, useNames = useNames) r1 <- rowCummaxs(x, useNames = useNames) r2 <- t(colCummaxs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) matrixStats/tests/rowCumprods.R0000644000176200001440000001447014522461565016426 0ustar liggesuserslibrary("matrixStats") rowCumprods_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cumprod)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames(y) <- if (isTRUE(useNames)) dimnames(x) else NULL y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check dimnames attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(colCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (add_na ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(colCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { x <- matrix(0, nrow = 1L, ncol = 1L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(colCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # BUG FIX TEST: Assert zeros don't trump NAs in integer matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { x <- matrix(NA_real_, nrow = 3L, ncol = 2L) x[1, 2] <- 0 x[2, 2] <- 1 x[3, 1] <- 0 storage.mode(x) <- mode cat("mode: ", mode, "\n", sep = "") str(x) dimnames <- list(letters[1:3], LETTERS[1:2]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(colCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { cat("mode: ", mode, "\n", sep = "") value <- 0 storage.mode(value) <- mode if (mode == "logical") value2 <- 0L # A 0x0 matrix x <- matrix(value, nrow = 0L, ncol = 0L) str(x) r0 <- matrix(value2, nrow = nrow(x), ncol = ncol(x)) r1 <- rowCumprods(x) r2 <- t(colCumprods(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A 0xK matrix x <- matrix(value, nrow = 0L, ncol = 5L) str(x) colnames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) colnames(x) <- colnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(rowCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) # A Nx0 matrix x <- matrix(value, nrow = 5L, ncol = 0L) str(x) rownames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) rownames(x) <- rownames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(rowCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) matrixStats/tests/allocMatrix.R0000644000176200001440000000116314520313754016346 0ustar liggesuserslibrary("matrixStats") allocMatrix_R <- function(nrow, ncol, value = NA) { matrix(data = value, nrow = nrow, ncol = ncol) } values <- list( -1L, 0L, +1L, NA_integer_, .Machine$integer.max, -1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps, FALSE, TRUE, NA ) nrow <- 2L ncol <- 3L for (value in values) { x0 <- allocMatrix_R(nrow, ncol, value = value) x <- allocMatrix(nrow, ncol, value = value) if (!identical(x, x0)) { str(list(nrow = nrow, ncol = ncol, value = value, x = x, x0 = x0)) stopifnot(identical(x, x0)) } } matrixStats/tests/rowDiffs_subset.R0000644000176200001440000000437014522461565017250 0ustar liggesuserslibrary("matrixStats") rowDiffs_R <- function(x, lag = 1L, differences = 1L, ..., useNames = NA) { ncol2 <- ncol(x) - lag * differences if (ncol2 <= 0) { y <- matrix(x[integer(0L)], nrow = nrow(x), ncol = 0L) # Preserve names attribute if (isTRUE(useNames) && !is.null(rownames(x))) rownames(y) <- rownames(x) return(y) } suppressWarnings({ y <- apply(x, MARGIN = 1L, FUN = diff, lag = lag, differences = differences) }) y <- t(y) # Preserve dimnames attribute dim(y) <- c(nrow(x), ncol2) if (isTRUE(useNames) && !is.null(dimnames(x))) { colnames <- colnames(x) if (!is.null(colnames)) { len <- length(colnames) colnames <- colnames[(len - ncol2 + 1):len] } dimnames(y) <- list(rownames(x), colnames) } else dimnames(y) <- NULL y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check dimnames attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L for (lag in 1:2) { for (differences in 1:3) { # Check dimnames attribute useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowDiffs, fsure = rowDiffs_R, lag = lag, differences = differences, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = function(x, rows, cols, ..., useNames) { t(colDiffs(t(x), rows = cols, cols = rows, ..., useNames = useNames)) }, fsure = rowDiffs_R, lag = lag, differences = differences, useNames = useNames) } } } } } matrixStats/tests/rowMeans2_subset.R0000644000176200001440000000252014522461565017335 0ustar liggesuserslibrary("matrixStats") rowMeans_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- rowMeans(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowMeans2, fsure = rowMeans_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMeans2, fsure = rowMeans_R, na.rm = na.rm, useNames = useNames) } } } matrixStats/tests/weightedVar_etal_subset.R0000644000176200001440000000200214520313754020723 0ustar liggesuserslibrary("matrixStats") fcns <- list( weightedVar = weightedVar, weightedSd = weightedSd, weightedMad = weightedMad ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") for (name in names(fcns)) { cat(sprintf("subsetted tests on %s()...\n", name)) fcn <- fcns[[name]] for (mode in c("numeric", "integer")) { x <- runif(6, min = -6, max = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- mode if (mode == "numeric") w[1] <- Inf for (idxs in index_cases) { validateIndicesTestVector_w(x, w, idxs, ftest = fcn, fsure = fcn, na.rm = TRUE) validateIndicesTestVector_w(x, w, idxs, ftest = fcn, fsure = fcn, na.rm = FALSE) } } cat(sprintf("%s()...DONE\n", name)) } matrixStats/tests/zzz.package-unload.R0000644000176200001440000000227414520313754017602 0ustar liggesusers## These tests need to be last of all tests, otherwise ## covr::package_coverage() gives an error. cat("1. Loading package\n") loadNamespace("matrixStats") stopifnot("matrixStats" %in% loadedNamespaces()) cat("2. Unloading package\n") unloadNamespace("matrixStats") stopifnot(!"matrixStats" %in% loadedNamespaces()) if (FALSE) { ## 'covr' gives "Error in library("matrixStats") : ## there is no package called 'matrixStats'" here, cf. ## https://travis-ci.org/HenrikBengtsson/matrixStats/builds/48015577 cat("3. Attaching package\n") library("matrixStats") stopifnot("package:matrixStats" %in% search()) cat("4. Detaching package\n") detach("package:matrixStats") stopifnot(!"package:matrixStats" %in% search()) stopifnot("matrixStats" %in% loadedNamespaces()) cat("5. Unloading package\n") unloadNamespace("matrixStats") stopifnot(!"matrixStats" %in% loadedNamespaces()) cat("6. Attaching package (again)\n") library("matrixStats") stopifnot("package:matrixStats" %in% search()) cat("7. Detaching package (again)\n") detach("package:matrixStats") stopifnot(!"package:matrixStats" %in% search()) stopifnot("matrixStats" %in% loadedNamespaces()) } cat("7. DONE\n") matrixStats/tests/rowVars_subset.R0000644000176200001440000000651114522461565017127 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowVars_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = var, na.rm = na.rm) }) stopifnot(!any(is.infinite(res))) # Keep naming support consistency same as rowVars() if (is.null(center) || ncol(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } colVars_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 2L, FUN = var, na.rm = na.rm) }) stopifnot(!any(is.infinite(res))) # Keep naming support consistency same as colVars() if (is.null(center) || nrow(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } rowVars_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowWeightedMeans(x, cols = cols, na.rm = na.rm, useNames = FALSE) res <- rowVars(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } colVars_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colWeightedMeans(x, rows = rows, na.rm = na.rm, useNames = FALSE) res <- colVars(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowVars, fsure = rowVars_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowVars_center, fsure = rowVars_R, na.rm = na.rm, center = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colVars, fsure = rowVars_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colVars_center, fsure = rowVars_R, na.rm = na.rm, center = TRUE, useNames = useNames) } } } matrixStats/tests/rowIQRs.R0000644000176200001440000000604714522461565015451 0ustar liggesuserslibrary("matrixStats") rowIQRs_R <- function(x, na.rm = FALSE, ..., useNames = NA) { quantile_na <- function(x, ..., na.rm = FALSE) { if (!na.rm && anyMissing(x)) return(c(NA_real_, NA_real_)) quantile(x, ..., na.rm = na.rm) } q <- apply(x, MARGIN = 1L, FUN = quantile_na, probs = c(0.25, 0.75), na.rm = na.rm) rownames(q) <- NULL # Not needed anymore # Preserve names attribute dim(q) <- c(2L, nrow(x)) colnames(q) <- if (isTRUE(useNames)) rownames(x) else NULL q[2L, , drop = TRUE] - q[1L, , drop = TRUE] } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test with multiple quantiles # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(1:100 + 0.1, nrow = 10, ncol = 10) storage.mode(x) <- mode str(x) # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:10]) for (add_na in c(FALSE, TRUE)) { if (add_na) { x[3:5, 6:9] <- NA } # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { probs <- c(0, 0.5, 1) q0 <- rowIQRs_R(x, na.rm = na.rm, useNames = useNames) print(q0) q1 <- rowIQRs(x, na.rm = na.rm, useNames = useNames) print(q1) stopifnot(all.equal(q1, q0)) q2 <- colIQRs(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(q2, q0)) q <- iqr(x[3, ], na.rm = na.rm) print(q) } # for (useNames ...) } # for (na.rm ...) } # for (setDimnames ...) } # for (add_na ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") # Empty vectors x <- integer(0L) storage.mode(x) <- mode str(x) q <- iqr(x) print(q) stopifnot(identical(q, NA_real_)) # Scalar x <- 1L storage.mode(x) <- mode str(x) q <- iqr(x) str(q) stopifnot(identical(q, 0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Single row matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1, nrow = 1L, ncol = 2L) dimnames <- list("a", LETTERS[1:2]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { q0 <- rowIQRs_R(x, useNames = useNames) q1 <- rowIQRs(x, useNames = useNames) q2 <- colIQRs(t(x), useNames = useNames) stopifnot(all.equal(q0, q1)) stopifnot(all.equal(q0, q2)) } } x <- matrix(1, nrow = 2L, ncol = 1L) q <- colIQRs(x) stopifnot(identical(q, 0)) matrixStats/tests/rowMads_subset.R0000644000176200001440000000514114522461565017076 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") rowMads_R <- function(x, na.rm = FALSE, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = mad, na.rm = na.rm) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } colMads_R <- function(x, na.rm = FALSE, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 2L, FUN = mad, na.rm = na.rm) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } rowMads_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowMedians(x, cols = cols, na.rm = na.rm, useNames = FALSE) rowMads(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) } colMads_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colMedians(x, rows = rows, na.rm = na.rm, useNames = FALSE) colMads(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix(x, rows, cols, ftest = rowMads, fsure = rowMads_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowMads_center, fsure = rowMads_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMads, fsure = rowMads_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMads_center, fsure = rowMads_R, na.rm = na.rm, useNames = useNames) } } } matrixStats/tests/allocVector.R0000644000176200001440000000117114520313754016343 0ustar liggesuserslibrary("matrixStats") allocVector_R <- function(length, value = NA) { x <- vector(mode = typeof(value), length = length) if (!is.finite(value) || value != 0) x[] <- value x } values <- list( -1L, 0L, +1L, NA_integer_, .Machine$integer.max, -1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps, FALSE, TRUE, NA ) n <- 10L for (value in values) { x0 <- allocVector_R(n, value = value) x <- allocVector(n, value = value) if (!identical(x, x0)) { str(list(n = n, value = value, x = x, x0 = x0)) stopifnot(identical(x, x0)) } } matrixStats/tests/rowOrderStats.R0000644000176200001440000000451214522461565016720 0ustar liggesuserslibrary("matrixStats") library("stats") asWhich <- function(probs, max) { idx <- as.integer(round(probs * max)) if (idx < 1L) { idx <- 1L } else if (idx > max) { idx <- max } idx } # asWhich() rowOrderStats_R <- function(x, probs, ..., useNames = NA) { ans <- apply(x, MARGIN = 1L, FUN = quantile, probs = probs, type = 3L) # Remove Attributes if (is.na(useNames) || !useNames || length(ans) == 0L) attributes(ans) <- NULL ans } # rowOrderStats_R() set.seed(1) # Simulate data in a matrix of any shape nrow <- 60L ncol <- 30L x <- rnorm(nrow * ncol) dim(x) <- c(nrow, ncol) probs <- 0.3 which <- asWhich(probs, max = ncol) y0 <- rowOrderStats_R(x, probs = probs) y1 <- rowOrderStats(x, which = which) stopifnot(all.equal(y1, y0)) y2 <- colOrderStats(t(x), which = which) stopifnot(all.equal(y2, y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("Consistency checks without NAs:\n") for (kk in 1:3) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape nrow <- sample(20L, size = 1L) ncol <- sample(20L, size = 1L) x <- rnorm(nrow * ncol) dim(x) <- c(nrow, ncol) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) probs <- runif(1) which <- asWhich(probs, max = ncol) y0 <- rowOrderStats_R(x, probs = probs) y1 <- rowOrderStats(x, which = which) stopifnot(all.equal(y1, y0)) y2 <- colOrderStats(t(x), which = which) stopifnot(all.equal(y2, y0)) } # for (kk in ...) } # for (mode ...) # Check names attribute x <- matrix(1:9 + 0.1, nrow = 3L, ncol = 3L) probs <- runif(1) which <- asWhich(probs, max = ncol(x)) dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE)) { y0 <- rowOrderStats_R(x, probs = probs, useNames = useNames) y1 <- rowOrderStats(x, which = which, useNames = useNames) stopifnot(all.equal(y1, y0)) y2 <- colOrderStats(t(x), which = which, useNames = useNames) stopifnot(all.equal(y2, y0)) } } matrixStats/tests/x_OP_y_subset.R0000644000176200001440000000474314520313754016660 0ustar liggesuserslibrary("matrixStats") x_OP_y_R <- function(x, y, OP, na.rm = FALSE) { if (na.rm) { xnok <- is.na(x) ynok <- is.na(y) anok <- xnok & ynok unit <- switch(OP, "+" = 0, "-" = NA_real_, "*" = 1, "/" = NA_real_, stop("Unknown 'OP' operator: ", OP) ) x[xnok] <- unit y[ynok] <- unit } ans <- switch(OP, "+" = x + y, "-" = x - y, "*" = x * y, "/" = x / y, stop("Unknown 'OP' operator: ", OP) ) if (na.rm) { ans[anok] <- NA_real_ } ans } # x_OP_y_R() t_tx_OP_y_R <- function(x, y, OP, na.rm = FALSE) { t(x_OP_y_R(x = t(x), y = y, OP = OP, na.rm = na.rm)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") for (OP in c("+", "-", "*", "/")) { for (mode in c("numeric", "integer", "logical")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6L, ncol = 6L) y <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(y) <- mode if (mode == "numeric") y[1] <- Inf for (xrows in index_cases) { for (xcols in index_cases) { if (is.null(xrows)) xrows <- seq_len(nrow(x)) if (is.null(xcols)) xcols <- seq_len(ncol(x)) for (yidxs in list(xrows, xcols)) { for (na.rm in c(TRUE, FALSE)) { suppressWarnings({ actual <- tryCatch( x_OP_y(x, y, OP, xrows = xrows, xcols = xcols, yidxs = yidxs, na.rm = na.rm), error = function(c) "error" ) expect <- tryCatch( x_OP_y_R(x[xrows, xcols, drop = FALSE], y[yidxs], OP, na.rm = na.rm), error = function(c) "error" ) }) stopifnot(all.equal(as.vector(actual), as.vector(expect))) suppressWarnings({ actual <- tryCatch( t_tx_OP_y(x, y, OP, xrows = xrows, xcols = xcols, yidxs = yidxs, na.rm = na.rm), error = function(c) "error" ) expect <- tryCatch( t_tx_OP_y_R(x[xrows, xcols, drop = FALSE], y[yidxs], OP, na.rm = na.rm), error = function(c) "error" ) }) stopifnot(all.equal(as.vector(actual), as.vector(expect))) } } } } } } matrixStats/tests/allocArray.R0000644000176200001440000000110214520313754016151 0ustar liggesuserslibrary("matrixStats") allocArray_R <- function(nrow, ncol, value = NA) { array(data = value, dim = dim) } values <- list( -1L, 0L, +1L, NA_integer_, .Machine$integer.max, -1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps, FALSE, TRUE, NA ) dim <- c(2L, 4L, 3L) for (value in values) { x0 <- allocArray_R(dim, value = value) x <- allocArray(dim, value = value) if (!identical(x, x0)) { str(list(dim = dim, value = value, x = x, x0 = x0)) stopifnot(identical(x, x0)) } } matrixStats/tests/binCounts.R0000644000176200001440000000512014520313754016030 0ustar liggesuserslibrary("matrixStats") library("stats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - binCounts_hist <- function(x, bx, right = FALSE, ...) { n0 <- graphics::hist(x, breaks = bx, right = right, include.lowest = TRUE, plot = FALSE)$counts } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Non-sorted and sorted positions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nx <- 1000L # Number of data points nb <- 200L # Number of bins # Uniformely distributed bins bx <- seq(from = 0, to = 1, length.out = nb + 1L) bx <- c(-1, bx, 2) # Sample data points set.seed(0xBEEF) x <- runif(nx, min = 0, max = 1) for (kk in 1:2) { n0 <- binCounts_hist(x, bx = bx) n1 <- binCounts(x, bx = bx) # Sanity check stopifnot(identical(n1, n0)) # Check reversed n1r <- rev(binCounts(-x, bx = rev(-bx), right = TRUE)) stopifnot(identical(n1r, n1)) # Retry with a sorted vector x <- sort(x) } # for (kk in 1:2) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:40 x[20] <- NA_integer_ nx <- length(x) # Bins bx <- c(0.5, 50.5, 100.5, 150.5, 200.5) y_smooth0 <- binCounts_hist(x, bx = bx) y_smooth <- binCounts(x, bx = bx) # Sanity check stopifnot(all.equal(y_smooth, y_smooth0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Border cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:10 bx <- min(x) - c(10, 1) y_smooth <- binCounts(x, bx = bx) stopifnot(all.equal(y_smooth, 0L)) bx <- range(x) y_smooth <- binCounts(x, bx = bx) stopifnot(all.equal(y_smooth, length(x) - 1L)) bx <- max(x) + c(1, 10) y_smooth <- binCounts(x, bx = bx) stopifnot(all.equal(y_smooth, 0L)) # Every second empty x <- 1:10 bx <- rep(x, each = 2L) y_smooth <- binCounts(x, bx = bx) stopifnot(all.equal(y_smooth, rep(c(0L, 1L), length.out = length(bx) - 1L))) ## NOTE: binCounts_hist() does not give the same last bin count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Exception handling # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Zero bin bounderies (invalid bin definition) bx <- double(0L) res <- try(y_smooth <- binCounts(1:10, bx = bx), silent = TRUE) stopifnot(inherits(res, "try-error")) # One bin boundery (invalid bin definition) bx <- double(1L) res <- try(y_smooth <- binCounts(1:10, bx = bx), silent = TRUE) stopifnot(inherits(res, "try-error")) matrixStats/tests/rowWeightedMeans_subset.R0000644000176200001440000000501614522461565020737 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowWeightedMeans_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = weighted.mean, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as rowWeightedMeans() idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (na.rm) na.rm <- anyMissing(x) if ((!is.null(w) && nw == 0L) || isFALSE(na.rm)) { if (is.na(useNames) || !useNames) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } colWeightedMeans_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = weighted.mean, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as colWeightedMeans() idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (!is.null(w) && nw == 0L) { if (is.na(useNames) || !useNames) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) for (mode in c("numeric", "integer", "logical")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- if (mode == "logical") "integer" else mode if (mode == "numeric") w[1] <- Inf # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL count <- 0L for (rows in index_cases) { for (cols in index_cases) { count <- count + 1L na.rm <- c(TRUE, FALSE)[count %% 2 + 1] useNames <- c(if (!matrixStats:::isUseNamesNADefunct()) NA, TRUE, FALSE) useNames <- useNames[count %% length(useNames) + 1] validateIndicesTestMatrix_w(x, w, rows, cols, ftest = rowWeightedMeans, fsure = rowWeightedMeans_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix_w(x, w, rows, cols, fcoltest = colWeightedMeans, fsure = colWeightedMeans_R, na.rm = na.rm, useNames = useNames) } } } } matrixStats/tests/mean2_subset.R0000644000176200001440000000106614520313754016460 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) storage.mode(x) <- "integer" for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = mean2, fsure = mean, na.rm = FALSE) validateIndicesTestVector(x, idxs, ftest = mean2, fsure = mean, na.rm = TRUE) } matrixStats/tests/product.R0000644000176200001440000000220214520313754015542 0ustar liggesuserslibrary("matrixStats") for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") # Empty vector x <- 0 storage.mode(x) <- mode y <- prod(x, na.rm = TRUE) print(y) z <- product(x, na.rm = TRUE) print(z) stopifnot(all.equal(z, y)) # Test negative values x <- c(1, -4, 2) storage.mode(x) <- mode y <- prod(x, na.rm = TRUE) print(y) z <- product(x, na.rm = TRUE) print(z) stopifnot(all.equal(z, y)) # Test missing values x <- c(1, NA, NaN, 2) storage.mode(x) <- mode y <- prod(x, na.rm = TRUE) print(y) z <- product(x, na.rm = TRUE) print(z) stopifnot(all.equal(z, y)) x <- c(1, NA, NaN, 2) storage.mode(x) <- mode y <- prod(x, na.rm = FALSE) print(y) z <- product(x, na.rm = FALSE) print(z) stopifnot(all(is.na(z), is.na(y))) x <- c(1, NaN, 2) storage.mode(x) <- mode y <- prod(x, na.rm = FALSE) print(y) stopifnot(is.na(y)) z <- product(x, na.rm = FALSE) print(z) stopifnot(is.na(z)) } # for (mode ...) # NAs following 0s x <- c(0L, NA_integer_) y <- prod(x, na.rm = FALSE) print(y) z <- product(x, na.rm = FALSE) print(z) stopifnot(identical(z, y)) matrixStats/src/0000755000176200001440000000000014535664471013403 5ustar liggesusersmatrixStats/src/rowSums2_lowlevel_template.h0000644000176200001440000000751214436403546021120 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowSums2_(ARGUMENTS_LIST) Copyright: Henrik Bengtsson, 2017 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowSums2, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int hasna, int byrow, double *ans) { R_xlen_t ii, jj, idx; R_xlen_t *colOffset; X_C_TYPE value; /* Use long double (if available) for higher precision */ /* NOTE: SIMD does not long doubles - in case we ever go there */ LDOUBLE sum; int nocols, norows; /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } /* Pre-calculate the column offsets */ if (nocols) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); if (byrow) { for (jj=0; jj < ncols; jj++) if(!rowsHasNA && !colsHasNA){ colOffset[jj] = cols[jj] * nrow; } else{ colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow,1,1); } } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj]; } } for (ii=0; ii < nrows; ii++) { R_xlen_t rowIdx; if (norows) { /* ii and ncols cannot be NA-values, so we do not need R_INDEX_OP */ rowIdx = byrow ? ii : ii*ncol; } else { if(!rowsHasNA && !colsHasNA) { rowIdx = byrow ? rows[ii] : rows[ii] * ncol; } rowIdx = byrow ? rows[ii] : R_INDEX_OP(rows[ii], *, ncol,1,1); } sum = 0.0; for (jj=0; jj < ncols; jj++) { if (!rowsHasNA && nocols) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ if (byrow) idx = rowIdx + jj*nrow; else idx = rowIdx + jj; value = x[idx]; } else if (!rowsHasNA && !colsHasNA && !nocols) { idx = rowIdx + colOffset[jj]; value = x[idx]; } else { if (nocols) { if (byrow) idx = R_INDEX_OP(rowIdx, +, jj*nrow,1,1); else idx = R_INDEX_OP(rowIdx, +, jj,1,1); } else { idx = R_INDEX_OP(rowIdx, +, colOffset[jj],1,1); } value = R_INDEX_GET(x, idx, X_NA, 1); } #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm) { sum += (LDOUBLE)value; if (jj % 1048576 == 0 && ISNA(sum)) break; } else if (!ISNAN(value)) { sum += (LDOUBLE)value; } #endif } /* for (jj ...) */ if (sum > DBL_MAX) { ans[ii] = R_PosInf; } else if (sum < -DBL_MAX) { ans[ii] = R_NegInf; } else { ans[ii] = (double)sum; } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } matrixStats/src/signTabulate.c0000644000176200001440000000241314531477242016163 0ustar liggesusers/*************************************************************************** Public methods: SEXP signTabulate(SEXP x, SEXP idxs) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "signTabulate_lowlevel.h" SEXP signTabulate(SEXP x, SEXP idxs) { SEXP ans = NILSXP; R_xlen_t nx; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'idxs': */ R_xlen_t nidxs; int idxsHasNA; R_xlen_t *cidxs = validateIndicesCheckNA(idxs, nx, 1, &nidxs, &idxsHasNA); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, 6)); signTabulate_dbl(REAL(x), nx, cidxs, nidxs, idxsHasNA, REAL(ans)); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(REALSXP, 4)); signTabulate_int(INTEGER(x), nx, cidxs, nidxs, idxsHasNA, REAL(ans)); UNPROTECT(1); } return(ans); } // signTabulate() /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/rowMads_lowlevel.h0000644000176200001440000000137314436403546017077 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowMads_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, double scale, int narm, int hasna, int byrow, double *ans) void rowMads_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, double scale, int narm, int hasna, int byrow, double *ans) */ #define X_TYPE 'i' #include "rowMads_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowMads_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/diff2.c0000644000176200001440000000416714531477242014543 0ustar liggesusers/*************************************************************************** Public methods: SEXP diff2(SEXP x, SEXP idxs, SEXP lag, SEXP differences) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include #include "000.types.h" #include "diff2_lowlevel.h" #include "naming.h" SEXP diff2(SEXP x, SEXP idxs, SEXP lag, SEXP differences, SEXP useNames) { SEXP ans = NILSXP; R_xlen_t nx, nans, lagg, diff; int usenames; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'lag': */ lagg = asInteger(lag); if (lagg < 1) { error("Argument 'lag' must be a positive integer."); } /* Argument 'differences': */ diff = asInteger(differences); if (diff < 1) { error("Argument 'differences' must be a positive integer."); } /* Argument 'idxs': */ R_xlen_t nidxs; /* Argument 'useNames': */ usenames = asLogical(useNames); int idxsHasNA; R_xlen_t *cidxs = validateIndicesCheckNA(idxs, nx, 1, &nidxs, &idxsHasNA); /* Length of result vector */ nans = (R_xlen_t)((double)nidxs - ((double)diff*(double)lagg)); if (nans < 0) nans = 0; /* Dispatch to low-level C function */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, nans)); diff2_dbl(REAL(x), nx, cidxs, nidxs, idxsHasNA, lagg, diff, REAL(ans), nans); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(INTSXP, nans)); diff2_int(INTEGER(x), nx, cidxs, nidxs, idxsHasNA, lagg, diff, INTEGER(ans), nans); UNPROTECT(1); } else { error("Argument 'x' must be numeric."); } if (usenames != NA_LOGICAL && usenames) { SEXP namesVec = getAttrib(x, R_NamesSymbol); if (namesVec != R_NilValue) { setNamesDiff(ans, namesVec, nidxs, nans, cidxs); } } return ans; } // diff2() /*************************************************************************** HISTORY: 2015-06-14 [DJ] o Supported subsetted computation. 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/validateIndices.c0000644000176200001440000001513314531477242016634 0ustar liggesusers/*************************************************************************** Public methods: SEXP validate(SEXP idxs, SEXP maxIdx, SEXP allowOutOfBound) **************************************************************************/ #include #include "validateIndices_lowlevel.h" /** idxs must not be NULL, which should be checked before calling this function. **/ R_xlen_t* validateIndices_lgl(int *idxs, R_xlen_t nidxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs, int *hasna) { R_xlen_t ii, jj, kk; R_xlen_t count1 = 0, count2 = 0; // set default as no NA. *hasna = FALSE; if (nidxs == 0) { *ansNidxs = 0; return NULL; } if (nidxs > maxIdx) { if (!allowOutOfBound) { error("logical subscript too long"); } *hasna = TRUE; // out-of-bound index is NA // count how many idx items for (ii = 0; ii < maxIdx; ++ ii) { if (idxs[ii]) { // TRUE or NA ++ count1; } } for (; ii < nidxs; ++ ii) { if (idxs[ii]) { // TRUE or NA ++ count2; } } *ansNidxs = count1 + count2; R_xlen_t *ans = (R_xlen_t *) R_alloc(*ansNidxs, sizeof(R_xlen_t)); FILL_VALIDATED_ANS(maxIdx, idxs[ii], idxs[ii] == NA_LOGICAL ? NA_R_XLEN_T : ii); for (ii = count1; ii < *ansNidxs; ++ ii) { ans[ii] = NA_R_XLEN_T; } return ans; } // nidxs <= maxIdx R_xlen_t naCount = 0; R_xlen_t lastPartNum = maxIdx % nidxs; for (ii = 0; ii < lastPartNum; ++ ii) { if (idxs[ii]) { // TRUE or NA if (idxs[ii] == NA_LOGICAL) ++ naCount; ++ count1; } } for (; ii < nidxs; ++ ii) { if (idxs[ii]) { // TRUE or NA if (idxs[ii] == NA_LOGICAL) ++ naCount; ++ count2; } } R_xlen_t count = count1 + count2; if (naCount == 0 && count == nidxs) { // All True *ansNidxs = maxIdx; return NULL; } if (naCount) *hasna = TRUE; *ansNidxs = maxIdx / nidxs * count + count1; R_xlen_t *ans = (R_xlen_t *) R_alloc(*ansNidxs, sizeof(R_xlen_t)); FILL_VALIDATED_ANS(nidxs, idxs[ii], idxs[ii] == NA_LOGICAL ? NA_R_XLEN_T : ii); for (ii = count, kk = nidxs; kk+nidxs <= maxIdx; kk += nidxs, ii += count) { for (jj = 0; jj < count; ++ jj) { ans[ii+jj] = ans[jj] == NA_R_XLEN_T ? NA_R_XLEN_T : ans[jj] + kk; } } for (jj = 0; jj < count1; ++ jj) { ans[ii+jj] = ans[jj] == NA_R_XLEN_T ? NA_R_XLEN_T : ans[jj] + kk; } return ans; } /************************************************************* * The most important function which is widely called. * If `idxs` is NULL, NULL will be returned, which indicates selecting. * the whole to-be-computed vector(matrix). * `maxIdx` is the to-be-computed vector(matrix)'s length (rows/cols). * `allowOutOfBound` indicates whether to allow positve out of bound indexing. * `ansNidxs` is used for returning the new idxs array's length. * `subsettedType` is used for returning the new idxs array's datatype. * `hasna` is TRUE, if NA is included in returned result. ************************************************************/ R_xlen_t *validateIndices(SEXP idxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs) { int hasna; return validateIndicesCheckNA(idxs, maxIdx, allowOutOfBound, ansNidxs, &hasna); } R_xlen_t *validateIndicesCheckNA(SEXP idxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs, int *hasna) { R_xlen_t nidxs = xlength(idxs); int mode = TYPEOF(idxs); // Set no NA as default. *hasna = FALSE; switch (mode) { case INTSXP: return validateIndices_int(INTEGER(idxs), nidxs, maxIdx, allowOutOfBound, ansNidxs, hasna); case REALSXP: return validateIndices_dbl(REAL(idxs), nidxs, maxIdx, allowOutOfBound, ansNidxs, hasna); case LGLSXP: return validateIndices_lgl(LOGICAL(idxs), nidxs, maxIdx, allowOutOfBound, ansNidxs, hasna); case NILSXP: *ansNidxs = maxIdx; return NULL; default: error("idxs can only be integer, numeric, or logical."); } return NULL; // useless sentence. won't be executed. } /************************************************************* * This function can be called by R. * If `idxs` is NULL, NULL will be returned, which indicates selecting. * the whole to-be-computed vector(matrix). * `maxIdx` is the to-be-computed vector(matrix)'s length (rows/cols). * `allowOutOfBound` indicates whether to allow positve out of bound indexing. ************************************************************/ SEXP validate(SEXP idxs, SEXP maxIdx, SEXP allowOutOfBound) { SEXP ans; R_xlen_t ansNidxs; Rboolean need_double = FALSE; R_xlen_t cmaxIdx = asR_xlen_t(maxIdx, 0); R_xlen_t nidxs = xlength(idxs); int callowOutOfBound = asLogicalNoNA(allowOutOfBound, "allowOutOfBound"); R_xlen_t *cidxs; R_xlen_t i; // Set no NA as default. int hasna = FALSE; int mode = TYPEOF(idxs); switch (mode) { case INTSXP: cidxs = validateIndices_int(INTEGER(idxs), nidxs, cmaxIdx, callowOutOfBound, &ansNidxs, &hasna); break; case REALSXP: cidxs = validateIndices_dbl(REAL(idxs), nidxs, cmaxIdx, callowOutOfBound, &ansNidxs, &hasna); break; case LGLSXP: cidxs = validateIndices_lgl(LOGICAL(idxs), nidxs, cmaxIdx, callowOutOfBound, &ansNidxs, &hasna); break; case NILSXP: return R_NilValue; default: error("idxs can only be integer, numeric, or logical."); } if (cidxs != NULL) { for (i = 0; i < ansNidxs; i++) { if (cidxs[i] > INT_MAX) { need_double = TRUE; break; } } } else { if (Rf_length(idxs) > 0) { // "Pick all indices" case, if all(idxs > 0) switch (mode) { case INTSXP: if (INTEGER(idxs)[0] > 0) return R_NilValue; break; case REALSXP: if (REAL(idxs)[0] > 0) return R_NilValue; break; case LGLSXP: if (LOGICAL(idxs)[0] == TRUE) return R_NilValue; break; } // else, "Pick an empty subset of indices" } } if (!need_double) { ans = PROTECT(allocVector(INTSXP, ansNidxs)); // Copy from cidxs to ans and coerce to int if (cidxs && ansNidxs > 0) { int *ans_ptr = INTEGER(ans); for (i = 0; i < ansNidxs; i++) { ans_ptr[i] = cidxs[i] == NA_R_XLEN_T ? NA_INTEGER : (int)cidxs[i] + 1; } } UNPROTECT(1); return ans; } else { ans = PROTECT(allocVector(REALSXP, ansNidxs)); // Copy from cidxs to ans and coerce to double if (cidxs && ansNidxs > 0) { double *ans_ptr = REAL(ans); for (i = 0; i < ansNidxs; i++) { ans_ptr[i] = cidxs[i] == NA_R_XLEN_T ? NA_REAL : (double)cidxs[i] + 1; } } UNPROTECT(1); return ans; } } matrixStats/src/rowMeans2.c0000644000176200001440000000505214531477242015420 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowMeans2(SEXP x, SEXP naRm, SEXP hasNA) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2017 **************************************************************************/ #include #include "000.types.h" #include "rowMeans2_lowlevel.h" #include "naming.h" SEXP rowMeans2(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP refine2, SEXP hasNA, SEXP byRow, SEXP useNames) { int narm, hasna, byrow, refine, usenames; SEXP ans; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL | R_TYPE_LGL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); if (!byrow) { SWAP(R_xlen_t, nrow, ncol); SWAP(R_xlen_t*, crows, ccols); SWAP(R_xlen_t, nrows, ncols); SWAP(int, rowsHasNA, colsHasNA); } /* Argument 'refine': */ refine = asLogicalNoNA(refine2, "refine"); /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowMeans2_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, narm, refine, hasna, byrow, REAL(ans)); } else if (isInteger(x) || isLogical(x)) { rowMeans2_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, narm, FALSE, hasna, byrow, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(2); return(ans); } matrixStats/src/productExpSumLog_lowlevel_template.h0000644000176200001440000000550114436403546022637 0ustar liggesusers/*********************************************************************** TEMPLATE: double productExpSumLog_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int hasna Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" double CONCAT_MACROS(productExpSumLog, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int hasna) { LDOUBLE y = 0.0, t; R_xlen_t ii; int isneg = 0; int hasZero = 0; /* Calculate sum(log(abs(x))) */ for (ii = 0 ; ii < nidxs; ii++) { t = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA, idxsHasNA); /* Missing values? */ if (narm) { if (X_ISNAN(t)) continue; } #if X_TYPE == 'i' /* Early stopping? */ if (X_ISNAN(t)) { y = NA_REAL; break; } else if (t < 0) { isneg = !isneg; t = -t; } else if (t == 0) { hasZero = 1; /* Early stopping? */ if (narm) break; } #elif X_TYPE == 'r' if (t < 0) { isneg = !isneg; t = -t; } #endif t = log(t); y += t; /* Rprintf("#%d: x=%g, is.nan(x)=%d, abs(x)=%g, is.nan(abs(x))=%d, log(abs(x))=%g, is.nan(log(abs(x)))=%d, sum=%g, is.nan(sum)=%d\n", ii, x[ii], R_IsNaN(x[ii]), X_ABS(x[ii]), R_IsNaN(abs(x[ii])), t, R_IsNaN(y), y, R_IsNaN(y)); */ #if X_TYPE == 'r' /* Early stopping? Special for long LDOUBLE vectors */ if (ii % 1048576 == 0 && ISNAN(y)) break; #endif } if (ISNAN(y)) { /* If there where NA and/or NaN elements, then 'y' will at this point be NaN. The information on an NA value is lost when calculating fabs(NA), which returns NaN. For consistency with integers, we return NA in all cases. */ y = NA_REAL; } else if (hasZero) { /* no NA in 'x' and 'x' contains zero */ y = 0; } else { y = exp(y); /* Update sign */ if (isneg) { y = -y; } /* 2flow or underflow? */ if (y > DBL_MAX) { y = R_PosInf; } else if (y < -DBL_MAX) { y = R_NegInf; } } return (double)y; } /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/rowCummaxs_lowlevel.h0000644000176200001440000000137214436403546017627 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowCummaxs_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, int *ans) void rowCummaxs_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, int *ans) */ #define METHOD rowCummaxs #define COMP '>' #define X_TYPE 'i' #include "rowCumMinMaxs_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowCumMinMaxs_lowlevel_template.h" #include "000.templates-types_undef.h" #undef COMP matrixStats/src/diff2_lowlevel_template.h0000644000176200001440000000670414436403546020353 0ustar liggesusers/*********************************************************************** TEMPLATE: void diff2_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include #undef X_DIFF #if X_TYPE == 'i' #ifndef diff_int static R_INLINE int diff_int(int a, int b) { if (X_ISNA(a) || X_ISNA(b)) return(NA_INTEGER); return a-b; } #define diff_int diff_int #endif #define X_DIFF diff_int #elif X_TYPE == 'r' #define X_DIFF(a,b) a-b #endif void CONCAT_MACROS(diff2, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nans) { R_xlen_t ii, tt, uu; X_C_TYPE xvalue1, xvalue2; X_C_TYPE *tmp = NULL; int noidxs; if (idxs == NULL) { noidxs = 1; } else { noidxs = 0;} /* Nothing to do? */ if (nans <= 0) return; /* Special case (difference == 1) */ if (differences == 1) { for (ii=0; ii < nans; ii++) { if(noidxs) { xvalue1 = x[ii]; xvalue2 = x[ii+lag]; } else { R_xlen_t idx1 = idxs[ii]; R_xlen_t idx2 = idxs[ii+lag]; if (!idxsHasNA) { xvalue1 = x[idx1]; xvalue2 = x[idx2]; } else{ xvalue1 = R_INDEX_GET(x, idx1, X_NA, 1); xvalue2 = R_INDEX_GET(x, idx2, X_NA, 1); } } ans[ii] = X_DIFF(xvalue2, xvalue1); } } else { /* Allocate temporary work vector (to hold intermediate differences) */ tmp = Calloc(nidxs - lag, X_C_TYPE); /* (a) First order of differences */ for (ii=0; ii < nidxs-lag; ii++) { if(noidxs) { xvalue1 = x[ii]; xvalue2 = x[ii+lag]; } else { R_xlen_t idx1 = idxs[ii]; R_xlen_t idx2 = idxs[ii+lag]; if (!idxsHasNA) { xvalue1 = x[idx1]; xvalue2 = x[idx2]; } else{ xvalue1 = R_INDEX_GET(x, idx1, X_NA, 1); xvalue2 = R_INDEX_GET(x, idx2, X_NA, 1); } } tmp[ii] = X_DIFF(xvalue2, xvalue1); } nidxs -= lag; /* (b) All other orders of differences but the last */ while (--differences > 1) { uu = lag; tt = 0; for (ii=0; ii < nidxs-lag; ii++) { tmp[ii] = X_DIFF(tmp[uu++], tmp[tt++]); } nidxs -= lag; } /* Sanity check */ /* if (nidxs-lag != nans) error("nidxs != nans: %d != %d\n", nidxs, nans); */ /* (c) Last order of differences */ uu = lag; tt = 0; for (ii=0; ii < nans; ii++) { ans[ii] = X_DIFF(tmp[uu++], tmp[tt++]); } /* Deallocate temorary work vector */ Free(tmp); } /* if (differences ...) */ } /*************************************************************************** HISTORY: 2015-06-14 [DJ] o Supported subsetted computation. 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/sum2.c0000644000176200001440000000443614531477242014436 0ustar liggesusers/*************************************************************************** Public methods: SEXP sum2(SEXP x, SEXP idxs, SEXP naRm, SEXP mode) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include #include "000.types.h" #include "sum2_lowlevel.h" SEXP sum2(SEXP x, SEXP idxs, SEXP naRm, SEXP mode) { SEXP ans = NILSXP; R_xlen_t nx; int narm, mode2; double sum = NA_REAL; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL | R_TYPE_LGL), "x"); nx = xlength(x); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'mode': */ if (!isInteger(mode)) { error("Argument 'mode' must be a single integer."); } mode2 = asInteger(mode); /* Argument 'idxs': */ R_xlen_t nidxs; int idxsHasNA; R_xlen_t *cidxs = validateIndicesCheckNA(idxs, nx, 1, &nidxs, &idxsHasNA); /* Dispatch to low-level C function */ if (isReal(x)) { sum = sum2_dbl(REAL(x), nx, cidxs, nidxs, idxsHasNA, narm); } else if (isInteger(x) || isLogical(x)) { sum = sum2_int(INTEGER(x), nx, cidxs, nidxs, idxsHasNA, narm); } /* Return results */ switch (mode2) { case 1: /* integer */ PROTECT(ans = allocVector(INTSXP, 1)); if (ISNAN(sum)) { INTEGER(ans)[0] = NA_INTEGER; } else if (sum > R_INT_MAX || sum < R_INT_MIN) { Rf_warning("Integer overflow. Use sum2(..., mode = \"double\") to avoid this."); INTEGER(ans)[0] = NA_INTEGER; } else { INTEGER(ans)[0] = (int)sum; } UNPROTECT(1); break; case 2: /* numeric */ PROTECT(ans = allocVector(REALSXP, 1)); if (sum > DBL_MAX) { REAL(ans)[0] = R_PosInf; } else if (sum < -DBL_MAX) { REAL(ans)[0] = R_NegInf; } else { REAL(ans)[0] = sum; } UNPROTECT(1); break; default: /* To please compiler */ ans = NILSXP; break; } return(ans); } // sum2() /*************************************************************************** HISTORY: 2015-07-11 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o Moved validation of arguments and construction of return object to this function. 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/x_OP_y.c0000644000176200001440000001453314531477242014744 0ustar liggesusers#include #include "000.types.h" #include "x_OP_y_lowlevel.h" SEXP x_OP_y(SEXP x, SEXP y, SEXP dim, SEXP operator, SEXP xrows, SEXP xcols, SEXP yidxs, SEXP commute, SEXP naRm, SEXP hasNA, SEXP byRow) { SEXP ans = NILSXP; int narm, hasna, byrow, commute2; int op; R_xlen_t nrow, ncol, ny; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'y': */ assertArgVector(y, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "y"); ny = xlength(y); /* Argument 'byRow': */ byrow = asLogicalNoNA(byRow, "byrow"); /* Argument 'commute2': */ commute2 = asLogicalNoNA(commute, "commute"); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'xrows', 'xcols' and 'yidxs': */ R_xlen_t nxrows, nxcols, nyidxs; int xrowsHasNA, xcolsHasNA, yidxsHasNA; R_xlen_t *cxrows = validateIndicesCheckNA(xrows, nrow, 0, &nxrows, &xrowsHasNA); R_xlen_t *cxcols = validateIndicesCheckNA(xcols, ncol, 0, &nxcols, &xcolsHasNA); R_xlen_t *cyidxs = validateIndicesCheckNA(yidxs, ny, 1, &nyidxs, &yidxsHasNA); /* Argument 'operator': */ op = asInteger(operator); if (op == 1) { /* Addition */ if (isReal(x) || isReal(y)) { PROTECT(ans = allocMatrix(REALSXP, nxrows, nxcols)); if (isReal(x) && isReal(y)) { x_OP_y_Add_dbl_dbl( REAL(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && (isInteger(y) || isLogical(y))) { x_OP_y_Add_dbl_int( REAL(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if ((isInteger(x) || isLogical(x)) && isReal(y)) { x_OP_y_Add_int_dbl( INTEGER(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } else { PROTECT(ans = allocMatrix(INTSXP, nxrows, nxcols)); x_OP_y_Add_int_int( INTEGER(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans)); UNPROTECT(1); } } else if (op == 2) { /* Subtraction */ if (isReal(x) || isReal(y)) { PROTECT(ans = allocMatrix(REALSXP, nxrows, nxcols)); if (isReal(x) && isReal(y)) { x_OP_y_Sub_dbl_dbl( REAL(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && (isInteger(y) || isLogical(y))) { x_OP_y_Sub_dbl_int( REAL(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if ((isInteger(x) || isLogical(x)) && isReal(y)) { x_OP_y_Sub_int_dbl( INTEGER(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } else { PROTECT(ans = allocMatrix(INTSXP, nxrows, nxcols)); x_OP_y_Sub_int_int( INTEGER(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans)); UNPROTECT(1); } } else if (op == 3) { /* Multiplication */ if (isReal(x) || isReal(y)) { PROTECT(ans = allocMatrix(REALSXP, nxrows, nxcols)); if (isReal(x) && isReal(y)) { x_OP_y_Mul_dbl_dbl( REAL(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && (isInteger(y) || isLogical(y))) { x_OP_y_Mul_dbl_int( REAL(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if ((isInteger(x) || isLogical(x)) && isReal(y)) { x_OP_y_Mul_int_dbl( INTEGER(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } else { PROTECT(ans = allocMatrix(INTSXP, nxrows, nxcols)); x_OP_y_Mul_int_int( INTEGER(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans)); UNPROTECT(1); } } else if (op == 4) { /* Division */ PROTECT(ans = allocMatrix(REALSXP, nxrows, nxcols)); if (isReal(x) && isReal(y)) { x_OP_y_Div_dbl_dbl( REAL(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && (isInteger(y) || isLogical(y))) { x_OP_y_Div_dbl_int( REAL(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if ((isInteger(x) || isLogical(x)) && isReal(y)) { x_OP_y_Div_int_dbl( INTEGER(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if ((isInteger(x) || isLogical(x)) && (isInteger(y) || isLogical(y))) { x_OP_y_Div_int_int( INTEGER(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, xrowsHasNA, cxcols, nxcols, xcolsHasNA, cyidxs, nyidxs, yidxsHasNA, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } else { error("Unknown value on argument 'OP': %dL", op); } return(ans); } /* x_OP_y() */ matrixStats/src/logSumExp_lowlevel_template.h0000644000176200001440000001412614436403546021301 0ustar liggesusers/*********************************************************************** TEMPLATE: double logSumExp_double(ARGUMENTS_LIST) ARGUMENTS_LIST: double *x, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna, int by, double *xx ***********************************************************************/ #include #include #include "000.types.h" #include "000.templates-types.h" /* logSumExp_double(x, by=0, xx=NULL): 1. Scans for the maximum value of x=(x[0], x[1], ..., x[n-1]) 2. Computes result from 'x'. NOTE: The above sweeps the "contiguous" 'x' vector twice. --- logSumExp_double(x, by=by, xx=xx): 1. Scans for the maximum value of x=(x[0], x[by], ..., x[(n-1)*by]) and copies the values to xx = (xx[0], xx[1], xx[2], ..., xx[n-1]), which *must* be preallocated. 2. Computes result from 'xx'. NOTE: The above sweeps the "scattered" 'x' vector only once, and then the "contigous" 'xx' vector once. This is more likely to create cache hits. */ double logSumExp_double(double *x, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int hasna, R_xlen_t by, double *xx) { R_xlen_t ii, iMax, idx; double xii, xMax; LDOUBLE sum; int hasna2 = FALSE; /* Indicates whether NAs where detected or not */ int xMaxIsNA; int noidxs; if (idxs == NULL) { noidxs = 1; } else { noidxs = 0; } /* Quick return? */ if (nidxs == 0) { return(R_NegInf); } /* Find the maximum value */ iMax = 0; if (by) { if (noidxs){ idx = 0; xMax = x[idx]; } else if (!idxsHasNA) { idx = idxs[0] * by; xMax = x[idx]; } else { idx = R_INDEX_OP(idxs[0], *, by, 1, 1); xMax = R_INDEX_GET(x, idx, NA_REAL, 1); } } else { if (noidxs){ idx = 0; xMax = x[idx]; } else if (!idxsHasNA) { idx = idxs[0]; xMax = x[idx]; } else { idx = idxs[0]; xMax = R_INDEX_GET(x, idx, NA_REAL, 1); } } xMaxIsNA = ISNAN(xMax); if (nidxs == 1) { if (narm && xMaxIsNA) { return(R_NegInf); } else { return(xMax); } } if (xMaxIsNA) hasna2 = TRUE; if (by) { /* To increase the chances for cache hits below, which sweeps through the data twice, we copy data into a temporary contigous vector while scanning for the maximum value. */ xx[0] = xMax; for (ii=1; ii < nidxs; ii++) { /* Get the ii:th value */ if (noidxs){ idx = ii * by; xii = x[idx]; } else if (!idxsHasNA) { idx = idxs[ii] * by; xii = x[idx]; } else { idx = R_INDEX_OP(idxs[ii], *, by, 1, 1); xii = R_INDEX_GET(x, idx, NA_REAL, 1); } /* Copy */ xx[ii] = xii; if (hasna && ISNAN(xii)) { if (narm) { hasna2 = TRUE; continue; } else { return(R_NaReal); } } if (xii > xMax || (narm && xMaxIsNA)) { iMax = ii; xMax = xii; xMaxIsNA = ISNAN(xMax); } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } else { for (ii=1; ii < nidxs; ii++) { /* Get the ii:th value */ if (noidxs){ idx = ii; xii = x[idx]; } else if (!idxsHasNA) { idx = idxs[ii]; xii = x[idx]; } else { idx = idxs[ii]; xii = R_INDEX_GET(x, idx, NA_REAL, 1); } if (hasna && ISNAN(xii)) { if (narm) { hasna2 = TRUE; continue; } else { return(R_NaReal); } } if (xii > xMax || (narm && xMaxIsNA)) { iMax = ii; xMax = xii; xMaxIsNA = ISNAN(xMax); } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } /* by */ /* Early stopping? */ if (xMaxIsNA) { /* Found only missing values? */ return narm ? R_NegInf : R_NaReal; } else if (xMax == R_PosInf) { /* Found +Inf? */ return(R_PosInf); } else if (xMax == R_NegInf) { /* all values are -Inf */ return(R_NegInf); } /* Sum differences */ sum = 0.0; if (by) { for (ii=0; ii < nidxs; ii++) { if (ii == iMax) { continue; } /* Get the ii:th value */ xii = xx[ii]; if (!hasna2 || !ISNAN(xii)) { sum += exp(xii - xMax); } /* Early LDOUBLE stopping on -Inf/+Inf and user interrupt? */ if (ii % 1048576 == 0) { if (!R_FINITE(sum)) break; R_CheckUserInterrupt(); } } /* for (ii ...) */ } else { for (ii=0; ii < nidxs; ii++) { if (ii == iMax) { continue; } /* Get the ii:th value */ if (noidxs){ idx = ii; xii = x[idx]; } else if (!idxsHasNA) { idx = idxs[ii]; xii = x[idx]; } else { idx = idxs[ii]; xii = R_INDEX_GET(x, idx, NA_REAL, 1); } if (!hasna2 || !ISNAN(xii)) { sum += exp(xii - xMax); } /* Early LDOUBLE stopping on -Inf/+Inf and user interrupt? */ if (ii % 1048576 == 0) { if (!R_FINITE(sum)) break; R_CheckUserInterrupt(); } } /* for (ii ...) */ } /* if (by) */ sum = xMax + log1p(sum); return(sum); } /*************************************************************************** HISTORY: 2015-06-11 [DJ] o Supported subsetted computation. 2015-06-10 [DJ] o Merge 'logSumExp_double_by' to 'logSumExp_double' 2015-01-26 [HB] o SPEEDUP: Now step 2 ("summing") only checks where NAs if NAs were detected in step 1 ("max value"), which should be noticibly faster since testing for NA is expensive for double values. o SPEEDUP: Now function returns early after step 1 ("max value") if the maximum value found is +Inf, or if all values where NAs. o BUG FIX: Now logSumExp(, na.rm=TRUE) also returns -Inf. 2013-05-02 [HB] o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the 'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission. 2013-04-30 [HB] o Created. **************************************************************************/ matrixStats/src/colRanges_lowlevel.h0000644000176200001440000000140114436403546017370 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void colRanges_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int what, int narm, int hasna, int *ans, int *is_counted) void colRanges_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int what, int narm, int hasna, int *ans, int *is_counted) */ #define X_TYPE 'i' #include "colRanges_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "colRanges_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowRanksWithTies.c0000644000176200001440000004137214531477242017037 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowRanksWithTies(SEXP x, SEXP rows, SEXP cols, SEXP tiesMethod, SEXP byRow) Authors: Hector Corrada Bravo, Peter Langfelder and Henrik Bengtsson TO DO: Add support for missing values. **************************************************************************/ #include #include "rowRanksWithTies_lowlevel.h" #include "naming.h" /* Peter Langfelder's modifications: * byrow: 0 => rank columns, !0 => rank rows * tiesMethod: 1: maximum, 2: average, 3:minimum * The returned rank is a REAL matrix to accomodate average ranks */ /* Brian Montgomery's modifications: * added tiesMethods first, last, random, and dense * reordered to match base::ranks * tiesMethod: 1: average, 2: first, 3: last, 5: random, 5: maximum, 6:minimum, 7:dense */ // Arrange the elements from i to j of array in random order. // Used in tiesMethod "random". void SHUFFLE_INT(int *array, size_t i, size_t j) { if (j > i) { size_t k, l; for (k = i; k < j; k++) { l = k + (size_t) (unif_rand() * (j - k + 1)); SWAP(int, array[l], array[k]); } } } SEXP rowRanksWithTies(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP tiesMethod, SEXP byRow, SEXP useNames) { int tiesmethod, byrow, usenames; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'tiesMethod': */ tiesmethod = asInteger(tiesMethod); if (tiesmethod < 1 || tiesmethod > 7) { error("Argument 'tiesMethod' is out of range [1,7]: %d", tiesmethod); } /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Double matrices are more common to use. */ if (isReal(x)) { if (byrow) { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowRanksWithTies_Average_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, REAL(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_First_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Last_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 4: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); GetRNGstate(); rowRanksWithTies_Random_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); PutRNGstate(); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 5: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Max_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 6: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Min_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 7: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Dense_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; } /* switch */ } else { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); colRanksWithTies_Average_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, REAL(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_First_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Last_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 4: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); GetRNGstate(); colRanksWithTies_Random_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); PutRNGstate(); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 5: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Max_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 6: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Min_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 7: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Dense_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; } /* switch */ } } else if (isInteger(x)) { if (byrow) { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowRanksWithTies_Average_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, REAL(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_First_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Last_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 4: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); GetRNGstate(); rowRanksWithTies_Random_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); PutRNGstate(); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 5: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Max_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 6: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Min_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 7: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Dense_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; } /* switch */ } else { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); colRanksWithTies_Average_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, REAL(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_First_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Last_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 4: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); GetRNGstate(); colRanksWithTies_Random_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); PutRNGstate(); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 5: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Max_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 6: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Min_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 7: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Dense_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; } /* switch */ } } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } // rowRanksWithTies() /*************************************************************************** HISTORY: 2019-4-23 [BKM] o Added more tiesMethods: first, last, random, and dense 2015-06-12 [DJ] o Supported subsetted computation. 2013-01-13 [HB] o Added argument 'tiesMethod' to rowRanks(). **************************************************************************/ matrixStats/src/validateIndices_lowlevel.h0000644000176200001440000000202014406445217020537 0ustar liggesusers#include #include "000.utils.h" #include "000.macros.h" #define RETURN_VALIDATED_ANS(type, n, cond, item, poststmt) \ type *ans = (type*) R_alloc(count, sizeof(type)); \ jj = 0; \ for (ii = 0; ii < n; ++ ii) { \ if (cond) ans[jj ++] = item; \ } \ poststmt \ return ans #define FILL_VALIDATED_ANS(n, cond, item) \ jj = 0; \ for (ii = 0; ii < n; ++ ii) { \ if (cond) ans[jj ++] = item; \ } #define X_TYPE 'i' #define SUBSETTED_DEFAULT SUBSETTED_INTEGER #include "validateIndices_lowlevel_template.h" #include "000.templates-types_undef.h" #undef SUBSETTED_DEFAULT #define X_TYPE 'r' #define SUBSETTED_DEFAULT SUBSETTED_REAL #include "validateIndices_lowlevel_template.h" #include "000.templates-types_undef.h" #undef SUBSETTED_DEFAULT matrixStats/src/rowCummins_lowlevel.h0000644000176200001440000000141214436403546017620 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowCummins_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, int *ans) void rowCummins_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, int *ans) */ #define METHOD rowCummins #define COMP '<' #define X_TYPE 'i' #include "rowCumMinMaxs_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowCumMinMaxs_lowlevel_template.h" #include "000.templates-types_undef.h" #undef COMP #undef METHOD matrixStats/src/colCounts_lowlevel.h0000644000176200001440000000205514436403546017432 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void colCounts_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int value, int what, int narm, int hasna, double *ans) void colCounts_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int value, int what, int narm, int hasna, double *ans) void colCounts_lgl(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int value, int what, int narm, int hasna, double *ans) */ #define X_TYPE 'i' #include "colCounts_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "colCounts_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'l' #include "colCounts_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowRanksWithTies_lowlevel_template.h0000644000176200001440000002075714436403546022654 0ustar liggesusers/*********************************************************************** TEMPLATE: Ranks_dbl_ties(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, ANS_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - MARGIN: 'r' (rows) or 'c' (columns). - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' - TIESMETHOD: 'a' (average), 'f' (first), 'l' (last), 'r' (random), '0' (min), '1' (max), 'd' (dense) Authors: Hector Corrada Bravo [HCB] Peter Langfelder [PL] Henrik Bengtsson [HB] Brian Montgomery [BKM] ***********************************************************************/ #include #undef RANK #if TIESMETHOD == 'a' /* average */ #define ANS_TYPE 'r' #define RANK(firstTie, aboveTie) ((double) (firstTie + aboveTie + 1))/2 #elif TIESMETHOD == '0' /* min */ #define ANS_TYPE 'i' #define RANK(firstTie, aboveTie) firstTie + 1 #elif TIESMETHOD == '1' /* max */ #define ANS_TYPE 'i' #define RANK(firstTie, aboveTie) aboveTie #else #define ANS_TYPE 'i' /* dense and other(RANK not used) */ #define RANK(firstTie, aboveTie) firstTie + 1 #endif /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "000.templates-types.h" void SHUFFLE_INT(int *array, size_t i, size_t j); /* prototype for use with "random" */ /* Indexing formula to compute the vector index of element j of vector i. Should take arguments element, vector, nElements, nVectors. */ #undef ANS_INDEX_OF #if MARGIN == 'r' /* rows */ #define ANS_INDEX_OF(element, vector, nRows) \ vector + element*nRows #elif MARGIN == 'c' /* columns */ #define ANS_INDEX_OF(element, vector, nRows) \ element + vector*nRows #else #error "MARGIN can only be 'r' or 'c'" #endif void CONCAT_MACROS(METHOD, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, ANS_C_TYPE *ans) { ANS_C_TYPE rank; X_C_TYPE *values, current, tmp; R_xlen_t *colOffset; R_xlen_t ii, jj, kk, rowIdx; int *I; int lastFinite, firstTie, aboveTie, dense_rank_adj; int nvalues, nVec; #if MARGIN == 'r' nvalues = ncols; nVec = nrows; /* Pre-calculate the column offsets */ colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); if (cols == NULL) { for (jj=0; jj < ncols; jj++) colOffset[jj] = R_INDEX_OP(jj, *, nrow, 0, 0); } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow, colsHasNA, 0); } #elif MARGIN == 'c' nvalues = nrows; nVec = ncols; /* Pre-calculate the column offsets */ colOffset = (R_xlen_t *) R_alloc(nrows, sizeof(R_xlen_t)); if (rows == NULL) { for (jj=0; jj < nrows; jj++) colOffset[jj] = jj; } else { for (jj=0; jj < nrows; jj++) colOffset[jj] = rows[jj]; } #endif values = (X_C_TYPE *) R_alloc(nvalues, sizeof(X_C_TYPE)); I = (int *) R_alloc(nvalues, sizeof(int)); for (ii=0; ii < nVec; ii++) { #if MARGIN == 'r' rowIdx = ((rows == NULL) ? (ii) : rows[ii]); #elif MARGIN == 'c' rowIdx = R_INDEX_OP(((cols == NULL) ? (ii) : cols[ii]), *, nrow, colsHasNA, 0); #endif lastFinite = nvalues-1; /* Put the NA/NaN elements at the end of the vector and update the index vector appropriately. This may be a bit faster since it only uses one loop over the length of the vector, plus it shortens the sort in case there are missing values. /PL (2012-12-14) */ for (jj = 0; jj <= lastFinite; jj++) { /* * Checking for the colsHasNA when we already have to check colsHasNA || rowsHasNA * is indeed useless, but for keeping the code ideomatic, we still do it * Hopefully, the compiler will optimize out the unnecessary instructions [JPP]. */ #if MARGIN == 'r' tmp = R_INDEX_GET(x, R_INDEX_OP(rowIdx, +, colOffset[jj], rowsHasNA, colsHasNA), X_NA, colsHasNA || rowsHasNA); #elif MARGIN == 'c' tmp = R_INDEX_GET(x, R_INDEX_OP(rowIdx, +, colOffset[jj], colsHasNA, rowsHasNA), X_NA, colsHasNA || rowsHasNA); #endif if (X_ISNAN(tmp)) { while (lastFinite > jj && X_ISNAN(R_INDEX_GET(x, R_INDEX_OP(rowIdx,+,colOffset[lastFinite], #if MARGIN == 'r' rowsHasNA, colsHasNA #elif MARGIN == 'c' colsHasNA, rowsHasNA #endif ), X_NA, colsHasNA || rowsHasNA))) { I[lastFinite] = lastFinite; lastFinite--; } I[lastFinite] = jj; I[jj] = lastFinite; values[ jj ] = R_INDEX_GET(x, R_INDEX_OP(rowIdx,+,colOffset[lastFinite], #if MARGIN == 'r' rowsHasNA, colsHasNA #elif MARGIN == 'c' colsHasNA, rowsHasNA #endif ), X_NA, colsHasNA || rowsHasNA); values[ lastFinite ] = tmp; lastFinite--; } else { I[jj] = jj; values[ jj ] = tmp; } } /* for (jj ...) */ // Diagnostic print-outs /* Rprintf("Swapped vector:\n"); for (jj=0; jj < nvalues; jj++) { Rprintf(" %8.4f,", values[jj]); if (((jj+1) % 5==0) || (jj==nvalues-1)) Rprintf("\n"); } Rprintf("Index vector:\n"); for (jj=0; jj 0) X_QSORT_I(values, I, 1, lastFinite + 1); // Calculate the ranks. firstTie = 0; aboveTie = 1; dense_rank_adj = 0; for (jj=0; jj <= lastFinite;) { if (TIESMETHOD == 'd') { dense_rank_adj += (aboveTie - firstTie - 1); firstTie = jj - dense_rank_adj; } else { firstTie = jj; } current = values[jj]; while ((jj <= lastFinite) && (values[jj] == current)) jj++; if (TIESMETHOD == 'd') { aboveTie = jj - dense_rank_adj; } else { aboveTie = jj; } // X_QSORT_I is not stable - ties can be permuted. // This restores the original order. // It might be more efficient to use a stable sort to begin with. if (TIESMETHOD == 'f' || TIESMETHOD == 'l') { R_qsort_int(I, firstTie + 1, aboveTie); /* Function is 1-based */ // SHUFFLE_INT randomizes the order. } else if (TIESMETHOD == 'r') { SHUFFLE_INT(I, firstTie, aboveTie - 1); } else { // Get appropriate rank for average, min, max, or dense rank = RANK(firstTie, aboveTie); } for (kk=firstTie; kk < aboveTie; kk++) { if (TIESMETHOD == 'f' || TIESMETHOD == 'r') { ans[ ANS_INDEX_OF(I[kk], ii, nrows) ] = kk + 1; } else if (TIESMETHOD == 'l') { ans[ ANS_INDEX_OF(I[kk], ii, nrows) ] = aboveTie - (kk - firstTie); } else if (TIESMETHOD == 'd') { ans[ ANS_INDEX_OF(I[kk + dense_rank_adj], ii, nrows) ] = rank; } else { ans[ ANS_INDEX_OF(I[kk], ii, nrows) ] = rank; } } } // At this point jj = lastFinite + 1, no need to re-initialize again. for (; jj < nvalues; jj++) { ans[ ANS_INDEX_OF(I[jj], ii, nrows) ] = ANS_NA; } // Rprintf("\n"); } } /*************************************************************************** HISTORY: 2019-4-23 [BKM] o Added new tiesMethods: first, last, random, and dense. 2015-06-12 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2013-04-23 [HB] o BUG FIX: Ranks did not work for integers with NAs; now using X_ISNAN(). 2013-01-13 [HB] o Template cleanup. Extened tempate to integer matrices. o Added argument 'tiesMethod' to rowRanks(). 2012-12-14 [PL] o Added internal support for "min", "max" and "average" ties. Using template to generate the various versions of the functions. 2013-01-13 [HCB] o Created. Using "max" ties. **************************************************************************/ matrixStats/src/colRanges.c0000644000176200001440000001267014531477242015464 0ustar liggesusers/*************************************************************************** Public methods: SEXP colRanges(SEXP x, ...) Authors: Henrik Bengtsson. Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "colRanges_lowlevel.h" #include "naming.h" SEXP colRanges(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames) { SEXP ans = NILSXP, ans2 = NILSXP; int *mins, *maxs; double *mins2, *maxs2; int *is_counted, all_counted = 0; int what2, narm, hasna, usenames; R_xlen_t nrow, ncol, jj; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'what': */ if (length(what) != 1) error("Argument 'what' must be a single number."); if (!isNumeric(what)) error("Argument 'what' must be a numeric number."); what2 = asInteger(what); if (what2 < 0 || what2 > 2) error("Invalid value of 'what': %d", what2); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'useNames': */ usenames = asLogical(useNames); is_counted = (int *) R_alloc(ncols, sizeof(int)); if (isReal(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(REALSXP, ncols, 2)); } else { PROTECT(ans = allocVector(REALSXP, ncols)); } colRanges_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, what2, narm, hasna, REAL(ans), is_counted); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (what2 == 2) { if (ncols != 0) { /* colRanges() returns a numeric Kx2 matrix, reverse dimnames */ setDimnames(ans, dimnames, ncols, ccols, 0, crows, TRUE); } /* (else) Zero-length colnames attribute? Keep behavior same as base R function */ } else{ SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } } UNPROTECT(1); } else if (isInteger(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(INTSXP, ncols, 2)); } else { PROTECT(ans = allocVector(INTSXP, ncols)); } colRanges_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, what2, narm, hasna, INTEGER(ans), is_counted); /* Any entries with zero non-missing values? */ all_counted = 1; for (jj=0; jj < ncols; jj++) { if (!is_counted[jj]) { all_counted = 0; break; } } if (!all_counted) { /* Handle zero non-missing values */ /* Instead of return INTSXP, we must return REALSXP (to hold -Inf, and Inf) */ if (what2 == 0) { PROTECT(ans2 = allocVector(REALSXP, ncols)); mins = INTEGER(ans); mins2 = REAL(ans2); for (jj=0; jj < ncols; jj++) { if (is_counted[jj]) { mins2[jj] = (double)mins[jj]; } else { mins2[jj] = R_PosInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 1) { PROTECT(ans2 = allocVector(REALSXP, ncols)); maxs = INTEGER(ans); maxs2 = REAL(ans2); for (jj=0; jj < ncols; jj++) { if (is_counted[jj]) { maxs2[jj] = (double)maxs[jj]; } else { maxs2[jj] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 2) { PROTECT(ans2 = allocMatrix(REALSXP, ncols, 2)); mins = INTEGER(ans); maxs = &INTEGER(ans)[ncols]; mins2 = REAL(ans2); maxs2 = &REAL(ans2)[ncols]; for (jj=0; jj < ncols; jj++) { if (is_counted[jj]) { mins2[jj] = (double)mins[jj]; maxs2[jj] = (double)maxs[jj]; } else { mins2[jj] = R_PosInf; maxs2[jj] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } ans = ans2; } if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (what2 == 2) { if (ncols != 0) { /* colRanges() returns a numeric Kx2 matrix, reverse dimnames */ setDimnames(ans, dimnames, ncols, ccols, 0, crows, TRUE); } /* (else) Zero-length colnames attribute? Keep behavior same as base R function */ } else{ SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } } UNPROTECT(1); /* ans */ } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } // colRanges() /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/rowRanges_lowlevel.h0000644000176200001440000000140114436403546017422 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowRanges_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int what, int narm, int hasna, int *ans, int *is_counted) void rowRanges_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int what, int narm, int hasna, int *ans, int *is_counted) */ #define X_TYPE 'i' #include "rowRanges_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowRanges_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/colOrderStats.c0000644000176200001440000000617714531726565016351 0ustar liggesusers/*************************************************************************** Public methods: SEXP colOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames) Authors: Henrik Bengtsson To do: Add support for missing values. Copyright Henrik Bengtsson, 2007-2014 **************************************************************************/ #include #include "000.types.h" #include "colOrderStats_lowlevel.h" #include "naming.h" SEXP colOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames) { SEXP ans = NILSXP; R_xlen_t nrow, ncol, qq; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'which': */ if (length(which) != 1) error("Argument 'which' must be a single number."); if (!isNumeric(which)) error("Argument 'which' must be a numeric number."); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasna, colsHasna; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasna); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasna); /* Argument 'useNames': */ int usenames = asLogical(useNames); // Check missing rows if (rowsHasna && ncols > 0) { error("Argument 'rows' must not contain missing value"); } // Check missing cols if (colsHasna && nrows > 0) { error("Argument 'cols' must not contain missing value"); } /* Subtract one here, since rPsort does zero based addressing */ qq = asInteger(which) - 1; /* Assert that 'qq' is a valid index */ if (qq < 0 || qq >= nrows) { error("Argument 'which' is out of range: %lld", (long long int)(qq + 1)); } /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, ncols)); colOrderStats_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, qq, REAL(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(INTSXP, ncols)); colOrderStats_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, qq, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } // colOrderStats() /*************************************************************************** HISTORY: 2015-07-08 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created from rowOrderStats.c. **************************************************************************/ matrixStats/src/rowCumsums_lowlevel.h0000644000176200001440000000127614436403546017651 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowCumsums_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, int *ans) void rowCumsums_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, double *ans) */ #define X_TYPE 'i' #include "rowCumsums_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowCumsums_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowCumsums_lowlevel_template.h0000644000176200001440000001524514436403546021545 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowCumsums_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int byrow, ANS_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowCumsums, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, ANS_C_TYPE *ans) { R_xlen_t ii, jj, kk, kk_prev, idx; R_xlen_t colBegin; X_C_TYPE xvalue; LDOUBLE value; int norows, nocols; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } #if ANS_TYPE == 'i' double R_INT_MIN_d = (double)R_INT_MIN, R_INT_MAX_d = (double)R_INT_MAX; /* OK, i.e. no integer overflow yet? */ int warn = 0, ok, *oks = NULL; #endif if (ncols == 0 || nrows == 0) return; if (byrow) { #if ANS_TYPE == 'i' oks = (int *) R_alloc(nrows, sizeof(int)); #endif if (nocols) { colBegin = 0; } else if (!colsHasNA) { colBegin = cols[0] * nrow; } else{ colBegin = R_INDEX_OP(cols[0], *, nrow, 1, 0); } for (kk=0; kk < nrows; kk++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + kk; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[kk]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[kk]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } ans[kk] = (ANS_C_TYPE) xvalue; #if ANS_TYPE == 'i' oks[kk] = !X_ISNA(xvalue); #endif } kk_prev = 0; for (jj=1; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } #if ANS_TYPE == 'i' if (oks[ii]) { /* Missing value? */ if (X_ISNA(xvalue)) { oks[ii] = 0; ans[kk] = ANS_NA; } else { value = (LDOUBLE) ans[kk_prev] + (LDOUBLE) xvalue; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { oks[ii] = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else ans[kk] = (ANS_C_TYPE) ((LDOUBLE) ans[kk_prev] + (LDOUBLE) xvalue); #endif kk++; kk_prev++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } else { kk = 0; for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } value = 0; #if ANS_TYPE == 'i' ok = 1; #endif for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } #if ANS_TYPE == 'i' if (ok) { /* Missing value? */ if (X_ISNA(xvalue)) { ok = 0; ans[kk] = ANS_NA; } else { value += (LDOUBLE) xvalue; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { ok = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else value += xvalue; ans[kk] = (ANS_C_TYPE) value; #endif kk++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } /* if (byrow) */ #if ANS_TYPE == 'i' /* Warn on integer overflow? */ if (warn) { warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX); } #endif } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars_TYPE-template.h. **************************************************************************/ matrixStats/src/000.templates-types.h0000644000176200001440000000757114436403546017216 0ustar liggesusers#include #include "000.macros.h" #undef X_C_TYPE #undef X_C_SIGNATURE #undef X_IN_C #undef X_ISNAN #undef X_ISNA #undef X_ABS #undef X_PSORT #undef X_QSORT_I #undef X_NA #undef Y_C_TYPE #undef Y_C_SIGNATURE #undef Y_IN_C #undef Y_ISNAN #undef Y_ISNA #undef Y_ABS #undef Y_PSORT #undef Y_QSORT_I #undef Y_NA #undef X_C_Y_C_SIGNATURE #undef ANS_SXP #undef ANS_NA #undef ANS_ISNAN #undef ANS_ISNA #undef ANS_C_TYPE #undef ANS_IN_C /* Data type macros for argument 'x' */ #ifdef X_TYPE #if X_TYPE == 'i' #define X_C_TYPE int #define X_C_SIGNATURE int #define X_IN_C INTEGER #define X_ISNAN(x) (x == NA_INTEGER) #define X_ISNA(x) (x == NA_INTEGER) #define X_ABS(x) abs(x) #define X_PSORT iPsort #define X_QSORT_I R_qsort_int_I #define X_NA NA_INTEGER #elif X_TYPE == 'r' #define X_C_TYPE double #define X_C_SIGNATURE dbl #define X_IN_C REAL #define X_ISNAN(x) ISNAN(x) /* True for R's NA and IEEE NaN */ #define X_ISNA(x) ISNA(x) /* True for R's NA */ #define X_ABS(x) fabs(x) #define X_PSORT rPsort #define X_QSORT_I R_qsort_I #define X_NA NA_REAL #elif X_TYPE == 'l' #define X_C_TYPE int #define X_C_SIGNATURE lgl #define X_IN_C LOGICAL #define X_ISNAN(x) (x == NA_LOGICAL) #define X_NA NA_LOGICAL #else #error "INTERNAL ERROR: Failed to set C macro X_C_TYPE etc.: Unknown X_TYPE" #endif #endif /* Data type macros for argument 'y' */ #ifdef Y_TYPE #if Y_TYPE == 'i' #define Y_C_TYPE int #define Y_C_SIGNATURE int #define Y_IN_C INTEGER #define Y_ISNAN(x) (x == NA_INTEGER) #define Y_ISNA(x) (x == NA_INTEGER) #define Y_ABS(x) abs(x) #define Y_PSORT iPsort #define Y_QSORT_I R_qsort_int_I #define Y_NA NA_INTEGER #elif Y_TYPE == 'r' #define Y_C_TYPE double #define Y_C_SIGNATURE dbl #define Y_IN_C REAL #define Y_ISNAN(x) ISNAN(x) /* NA or NaN */ #define Y_ISNA(x) ISNA(x) /* NA only */ #define Y_ABS(x) fabs(x) #define Y_PSORT rPsort #define Y_QSORT_I R_qsort_I #define Y_NA NA_REAL #elif Y_TYPE == 'l' #define Y_C_TYPE int #define Y_C_SIGNATURE lgl #define Y_IN_C LOGICAL #define Y_ISNAN(x) (x == NA_LOGICAL) #define Y_NA NA_LOGICAL #else #error "INTERNAL ERROR: Failed to set C macro Y_C_TYPE etc.: Unknown Y_TYPE" #endif #endif /* Data type macros for result ('ans') */ #ifndef ANS_TYPE #ifdef X_TYPE /* Default to same as 'x' */ #define ANS_TYPE X_TYPE #endif #endif #ifdef ANS_TYPE #if ANS_TYPE == 'i' #define ANS_SXP INTSXP #define ANS_NA NA_INTEGER #define ANS_ISNAN(x) (x == NA_INTEGER) #define ANS_ISNA(x) (x == NA_INTEGER) #define ANS_C_TYPE int #define ANS_IN_C INTEGER #elif ANS_TYPE == 'r' #define ANS_SXP REALSXP #define ANS_NA NA_REAL #define ANS_ISNAN(x) ISNAN(x) /* NA or NaN */ #define ANS_ISNA(x) ISNA(x) /* NA only */ #define ANS_C_TYPE double #define ANS_IN_C REAL #elif ANS_TYPE == 'l' #define ANS_SXP LGLSXP #define ANS_NA NA_LOGICAL #define ANS_ISNAN(x) (x == NA_LOGICAL) #define ANS_C_TYPE int #define ANS_IN_C LOGICAL #else #error "INTERNAL ERROR: Failed to set C macro ANS_C_TYPE: Unknown ANS_TYPE" #endif #endif /* Subsetted indexing: whether to check NA according to indexing For optimization purposes, these macros can skip checking whether the arguments are NA based on the macro arguments. This may seem like a futile optimiztion (we do branching), but such checking flags are often loop invariants meaning the compiler will optimize out the branches and put the before the loop. */ #undef R_INDEX_OP #undef R_INDEX_GET #define R_INDEX_OP(a, OP, b, check_a_NA, check_b_NA) ((check_a_NA ? a == NA_R_XLEN_T : 0) || (check_b_NA ? b == NA_R_XLEN_T : 0) ? NA_R_XLEN_T : (a) OP (b)) #define R_INDEX_GET(x, i, NA, check_i_NA) ((check_i_NA ? (i) == NA_R_XLEN_T : 0) ? NA : x[(i)]) matrixStats/src/logSumExp.c0000644000176200001440000000265514531477242015474 0ustar liggesusers/*************************************************************************** Public methods: SEXP logSumExp(SEXP lx, SEXP idxs, SEXP naRm, SEXP hasNA) Arguments: lx : numeric vector idxs : subsetting indices naRm : a logical scalar hasNA: a logical scalar Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2013 **************************************************************************/ #include #include #include "logSumExp_lowlevel.h" SEXP logSumExp(SEXP lx, SEXP idxs, SEXP naRm, SEXP hasNA) { int narm, hasna; /* Argument 'lx': */ assertArgVector(lx, (R_TYPE_REAL), "lx"); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'idxs': */ R_xlen_t nidxs; int idxsHasNA; R_xlen_t *cidxs = validateIndicesCheckNA(idxs, xlength(lx), 1, &nidxs, &idxsHasNA); return(Rf_ScalarReal(logSumExp_double(REAL(lx), cidxs, nidxs, idxsHasNA, narm, hasna, 0, NULL))); } /* logSumExp() */ /*************************************************************************** HISTORY: 2015-06-11 [DJ] o Supported subsetted computation. 2013-05-02 [HB] o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the 'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission. 2013-04-30 [HB] o Created. **************************************************************************/ matrixStats/src/sum2_lowlevel.h0000644000176200001440000000105614436403546016347 0ustar liggesusers#include #include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): double sum2_int(int *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm) double sum2_dbl(double *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm) */ #define X_TYPE 'i' #include "sum2_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "sum2_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowOrderStats_lowlevel.h0000644000176200001440000000123314406445217020276 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowOrderStats_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, R_xlen_t qq, int *ans) void rowOrderStats_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, R_xlen_t qq, int *ans) */ #define X_TYPE 'i' #include "rowOrderStats_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowOrderStats_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/binCounts_lowlevel.h0000644000176200001440000000066014372747711017431 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include /* Native API (dynamically generated via macros): void binCounts_L(double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, int *count) void binCounts_R(double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, int *count) */ #define BIN_BY 'L' #include "binCounts_lowlevel_template.h" #define BIN_BY 'R' #include "binCounts_lowlevel_template.h" matrixStats/src/rowVars_lowlevel_template.h0000644000176200001440000001133414436403546021017 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowVars_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int refine, int hasna, int byrow, double *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Adopted from rowQuantiles.c by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2013 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowVars, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int refine, int hasna, int byrow, double *ans) { R_xlen_t ii, jj, kk, idx; R_xlen_t *colOffset; X_C_TYPE *values, value; double value_d, sum_d, mu_d, sigma2_d; int nocols, norows; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncols, sizeof(X_C_TYPE)); /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; /* Pre-calculate the column offsets */ if (nocols) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); if (byrow) { for (jj=0; jj < ncols; jj++) if(!rowsHasNA && !colsHasNA){ colOffset[jj] = cols[jj] * nrow; } else{ colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow,1,1); } } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj]; } } for (ii=0; ii < nrows; ii++) { //HJ R_xlen_t rowIdx; if (norows) { /* ii and ncols cannot be NA-values, so we do not need R_INDEX_OP */ rowIdx = byrow ? ii : ii*ncol; } else { if(!rowsHasNA && !colsHasNA) { rowIdx = byrow ? rows[ii] : rows[ii] * ncol; } rowIdx = byrow ? rows[ii] : R_INDEX_OP(rows[ii], *, ncol,1,1); } kk = 0; for (jj=0; jj < ncols; jj++) { if (!rowsHasNA && nocols) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ if (byrow) idx = rowIdx + jj*nrow; else idx = rowIdx + jj; value = x[idx]; } else if (!rowsHasNA && !colsHasNA && !nocols) { idx = rowIdx + colOffset[jj]; value = x[idx]; } else { if (nocols) { if (byrow) idx = R_INDEX_OP(rowIdx, +, jj*nrow,1,1); else idx = R_INDEX_OP(rowIdx, +, jj,1,1); } else { idx = R_INDEX_OP(rowIdx, +, colOffset[jj],1,1); } value = R_INDEX_GET(x, idx, X_NA,1); } if (X_ISNAN(value)) { if (narm == FALSE) { kk = -1; break; } } else { values[kk] = value; kk = kk + 1; } } /* for (jj ...) */ /* Note that 'values' will never contain NA/NaNs */ if (kk <= 1) { ans[ii] = NA_REAL; } else { /* (a) Calculate mu = sum(x)/length(x) */ sum_d = 0.0; for (jj=0; jj < kk; jj++) { sum_d += (double)values[jj]; } mu_d = sum_d / (double)kk; #if X_TYPE == 'r' if (refine) { sum_d = 0.0; for (jj=0; jj < kk; jj++) { sum_d += (double)(values[jj] - mu_d); } mu_d = mu_d + sum_d / (double)kk; } /* for (jj ...) */ #endif /* (b) Calculate sigma^2 */ sigma2_d = 0; for (jj=0; jj < kk; jj++) { value_d = ((double)values[jj] - mu_d); value_d *= value_d; sigma2_d += value_d; } sigma2_d /= (double)(kk-1); ans[ii] = sigma2_d; } /* if (kk <= 1) */ R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-11-18 [HB] o Created from rowMads_TYPE-template.h. **************************************************************************/ matrixStats/src/000.templates-gen-matrix-vector.h0000644000176200001440000000534614372747711021427 0ustar liggesusers#include "000.macros.h" #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define ROWS_TYPE 'i' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #define ROWS_TYPE 'r' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #define IDXS_TYPE 'i' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define ROWS_TYPE 'i' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #define ROWS_TYPE 'r' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #undef IDXS_TYPE #define IDXS_TYPE 'r' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define ROWS_TYPE 'i' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #define ROWS_TYPE 'r' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #undef IDXS_TYPE RETURN_TYPE (*METHOD_NAME[3][3][3])(ARGUMENTS_LIST) = {{ {METHOD_NAME_arows_acols_aidxs, METHOD_NAME_arows_acols_iidxs, METHOD_NAME_arows_acols_didxs}, {METHOD_NAME_arows_icols_aidxs, METHOD_NAME_arows_icols_iidxs, METHOD_NAME_arows_icols_didxs}, {METHOD_NAME_arows_dcols_aidxs, METHOD_NAME_arows_dcols_iidxs, METHOD_NAME_arows_dcols_didxs}, }, { {METHOD_NAME_irows_acols_aidxs, METHOD_NAME_irows_acols_iidxs, METHOD_NAME_irows_acols_didxs}, {METHOD_NAME_irows_icols_aidxs, METHOD_NAME_irows_icols_iidxs, METHOD_NAME_irows_icols_didxs}, {METHOD_NAME_irows_dcols_aidxs, METHOD_NAME_irows_dcols_iidxs, METHOD_NAME_irows_dcols_didxs}, }, { {METHOD_NAME_drows_acols_aidxs, METHOD_NAME_drows_acols_iidxs, METHOD_NAME_drows_acols_didxs}, {METHOD_NAME_drows_icols_aidxs, METHOD_NAME_drows_icols_iidxs, METHOD_NAME_drows_icols_didxs}, {METHOD_NAME_drows_dcols_aidxs, METHOD_NAME_drows_dcols_iidxs, METHOD_NAME_drows_dcols_didxs}, } }; #include "000.templates-types_undef.h" matrixStats/src/rowCounts.c0000644000176200001440000000540714531477242015552 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowCounts(SEXP x, ...) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowCounts_lowlevel.h" #include "naming.h" SEXP rowCounts(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP value, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames) { SEXP ans; int narm, hasna, what2, usenames; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' & 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'value': */ if (length(value) != 1) error("Argument 'value' must be a single value."); if (!isNumeric(value)) error("Argument 'value' must be a numeric or a logical value."); /* Argument 'what': */ what2 = asInteger(what); if (what2 < 0 || what2 > 2) error("INTERNAL ERROR: Unknown value of 'what' for rowCounts: %d", what2); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* R allocate a double vector of length 'nrow' */ PROTECT(ans = allocVector(INTSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowCounts_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, asReal(value), what2, narm, hasna, INTEGER(ans)); } else if (isInteger(x)) { rowCounts_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, asInteger(value), what2, narm, hasna, INTEGER(ans)); } else if (isLogical(x)) { rowCounts_lgl(LOGICAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, asLogical(value), what2, narm, hasna, INTEGER(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } UNPROTECT(2); return(ans); } // rowCounts() /*************************************************************************** HISTORY: 2015-04-13 [DJ] o Supported subsetted computation. 2014-06-02 [HB] o Created. **************************************************************************/ matrixStats/src/rowSums2.c0000644000176200001440000000474614531477242015315 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowSums2(SEXP x, SEXP naRm, SEXP hasNA) SEXP colSums2(SEXP x, SEXP naRm, SEXP hasNA) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2017 **************************************************************************/ #include #include "000.types.h" #include "rowSums2_lowlevel.h" #include "naming.h" SEXP rowSums2(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) { int narm, hasna, byrow, usenames; SEXP ans; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL | R_TYPE_LGL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); if (!byrow) { SWAP(R_xlen_t, nrow, ncol); SWAP(R_xlen_t*, crows, ccols); SWAP(R_xlen_t, nrows, ncols); SWAP(int, rowsHasNA, colsHasNA); } /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowSums2_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, narm, hasna, byrow, REAL(ans)); } else if (isInteger(x) || isLogical(x)) { rowSums2_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, narm, hasna, byrow, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(2); return(ans); } matrixStats/src/mean2_lowlevel_template.h0000644000176200001440000000517614436403546020365 0ustar liggesusers/*********************************************************************** TEMPLATE: double mean2_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA int narm, int refine Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014-2017 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include double CONCAT_MACROS(mean2, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int refine) { X_C_TYPE value; R_xlen_t ii; LDOUBLE sum = 0, avg = R_NaN; int noidxs; if (idxs == NULL) { noidxs = 1; } else { noidxs = 0;} #if X_TYPE == 'r' LDOUBLE rsum = 0; #endif R_xlen_t count = 0; for (ii=0; ii < nidxs; ++ii) { if (noidxs) { value = x[ii]; } else { R_xlen_t idx = idxs[ii]; if (!idxsHasNA) { value = x[idx]; } else { value = R_INDEX_GET(x, idx, X_NA, 1); } } #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; ++count; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm) { sum += (LDOUBLE)value; ++count; /* Early stopping if sum is NA_real_ (but not NaN, -Inf, or +Inf) */ if (ii % 1048576 == 0 && ISNA(sum)) break; } else if (!ISNAN(value)) { sum += (LDOUBLE)value; ++count; } #endif } /* for (i ...) */ if (sum > DBL_MAX) { avg = R_PosInf; } else if (sum < -DBL_MAX) { avg = R_NegInf; } else { avg = sum / count; /* Extra precision by summing over residuals? */ #if X_TYPE == 'r' if (refine && R_FINITE(avg)) { for (ii=0; ii < nidxs; ++ii) { value = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA, idxsHasNA); if (!narm || !ISNAN(value)) { rsum += (LDOUBLE)(value - avg); } } avg += (rsum / count); } #endif } return (double)avg; } /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Now mean2_() uses only basic C types. 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/rowCumprods_lowlevel.h0000644000176200001440000000127714436403546020012 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowCumprods_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, int *ans) void rowCumprods_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, int *ans) */ #define X_TYPE 'i' #include "rowCumprods_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowCumprods_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/naming.h0000644000176200001440000000144214436403546015020 0ustar liggesusers#ifndef NAMING_H #define NAMING_H #include #include #include "000.types.h" void setNames(SEXP vec/* Answer vector*/, SEXP namesVec, R_xlen_t length, R_xlen_t *subscripts); void setNamesDiff(SEXP vec/* Answer vector*/, SEXP namesVec, R_xlen_t length, R_xlen_t length_ans, R_xlen_t *subscripts); void setDimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t *ccols, Rboolean reverseDimnames); void set_rowDiffs_Dimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t ncol_ans, R_xlen_t *ccols); void set_colDiffs_Dimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t nrow_ans, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t *ccols); #endif /* NAMING_H */ matrixStats/src/naming.c0000644000176200001440000002530314531477242015015 0ustar liggesusers#include "naming.h" void setNames(SEXP vec/*Answer vector*/, SEXP namesVec, R_xlen_t length, R_xlen_t *subscripts) { if (length == 0) { /* Zero-length names attribute? Keep behavior same as base R function */ return; } if (subscripts == NULL) { namesgets(vec, namesVec); } else { SEXP ansNames = PROTECT(allocVector(STRSXP, length)); R_xlen_t i, thisIdx; for (i = 0; i < length; i++) { thisIdx = subscripts[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansNames, i, NA_STRING); } else { SEXP eltElement = STRING_ELT(namesVec, thisIdx); SET_STRING_ELT(ansNames, i, eltElement); } } namesgets(vec, ansNames); UNPROTECT(1); } } void setNamesDiff(SEXP vec/* Answer vector*/, SEXP namesVec, R_xlen_t length, R_xlen_t length_ans, R_xlen_t *subscripts) { /* For some reason, base::diff() actually sets an empty name attribute when the argument is a name character of length zero, so we skip the special case handled in setNames() */ SEXP ansNames = PROTECT(allocVector(STRSXP, length_ans)); R_xlen_t j = 0; if (subscripts == NULL) { for (R_xlen_t i = (length - length_ans); i < length; i++) { SEXP eltElement = STRING_ELT(namesVec, i); SET_STRING_ELT(ansNames, j++, eltElement); } } else { R_xlen_t thisIdx; for (R_xlen_t i = (length - length_ans); i < length; i++) { thisIdx = subscripts[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansNames, j++, NA_STRING); } else { SEXP eltElement = STRING_ELT(namesVec, thisIdx); SET_STRING_ELT(ansNames, j++, eltElement); } } } namesgets(vec, ansNames); UNPROTECT(1); } void setDimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t *ccols, Rboolean reverseDimnames) { SEXP rownames = VECTOR_ELT(dimnames, reverseDimnames ? 1 : 0); SEXP colnames = VECTOR_ELT(dimnames, reverseDimnames ? 0 : 1); /* In case both elements of the dimnames is NULL, we disregard the name attribute completely in order to conform to base R behavior */ if (rownames == R_NilValue && colnames == R_NilValue) { return; } if (crows == NULL && ccols == NULL && nrows > 0 && ncols > 0) { dimnamesgets(mat, dimnames); return; } SEXP ansDimnames = PROTECT(allocVector(VECSXP, 2)); if (nrows == 0 || rownames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 0, R_NilValue); } else if (crows == NULL) { SET_VECTOR_ELT(ansDimnames, 0, rownames); } else { SEXP ansRownames = PROTECT(allocVector(STRSXP, nrows)); R_xlen_t i, thisIdx; for (i = 0; i < nrows; i++) { thisIdx = crows[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansRownames, i, NA_STRING); } else { SEXP eltElement = STRING_ELT(rownames, thisIdx); SET_STRING_ELT(ansRownames, i, eltElement); } } SET_VECTOR_ELT(ansDimnames, 0, ansRownames); UNPROTECT(1); } if (ncols == 0 || colnames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 1, R_NilValue); } else if (ccols == NULL) { SET_VECTOR_ELT(ansDimnames, 1, colnames); } else { if (colnames != R_NilValue) { SEXP ansColnames = PROTECT(allocVector(STRSXP, ncols)); R_xlen_t i, thisIdx; for (i = 0; i < ncols; i++) { thisIdx = ccols[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansColnames, i, NA_STRING); } else { SEXP eltElement = STRING_ELT(colnames, thisIdx); SET_STRING_ELT(ansColnames, i, eltElement); } } SET_VECTOR_ELT(ansDimnames, 1, ansColnames); UNPROTECT(1); } } dimnamesgets(mat, ansDimnames); UNPROTECT(1); } void set_rowDiffs_Dimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t ncol_ans, R_xlen_t *ccols) { if (nrows == 0 && ncol_ans == 0) { /* Zero-length attributes? Keep behavior same as base R function */ return; } SEXP rownames = VECTOR_ELT(dimnames, 0); SEXP colnames = VECTOR_ELT(dimnames, 1); /* In case both elements of the dimnames is NULL, we disregard the name attribute completely in order to conform to base R behavior */ if (rownames == R_NilValue && colnames == R_NilValue) { return; } SEXP ansDimnames = PROTECT(allocVector(VECSXP, 2)); if (nrows == 0 || rownames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 0, R_NilValue); } else if (crows == NULL) { SET_VECTOR_ELT(ansDimnames, 0, rownames); } else { SEXP ansRownames = PROTECT(allocVector(STRSXP, nrows)); R_xlen_t i, thisIdx; for (i = 0; i < nrows; i++) { thisIdx = crows[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansRownames, i, NA_STRING); } else { SEXP eltElement = STRING_ELT(rownames, thisIdx); SET_STRING_ELT(ansRownames, i, eltElement); } } SET_VECTOR_ELT(ansDimnames, 0, ansRownames); UNPROTECT(1); } if (ncol_ans == 0 || colnames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 1, R_NilValue); } else { SEXP ansColnames = PROTECT(allocVector(STRSXP, ncol_ans)); R_xlen_t i, j = 0; if (ccols == NULL) { for (i = (ncols - ncol_ans); i < ncols; i++) { SEXP eltElement = STRING_ELT(colnames, i); SET_STRING_ELT(ansColnames, j++, eltElement); } } else { R_xlen_t thisIdx; for (i = (ncols - ncol_ans); i < ncols; i++) { thisIdx = ccols[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansColnames, j++, NA_STRING); } else { SEXP eltElement = STRING_ELT(colnames, thisIdx); SET_STRING_ELT(ansColnames, j++, eltElement); } } } SET_VECTOR_ELT(ansDimnames, 1, ansColnames); UNPROTECT(1); } dimnamesgets(mat, ansDimnames); UNPROTECT(1); } void set_colDiffs_Dimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t nrow_ans, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t *ccols) { if (nrow_ans == 0 && ncols == 0) { /* Zero-length attributes? Keep behavior same as base R function */ return; } SEXP rownames = VECTOR_ELT(dimnames, 0); SEXP colnames = VECTOR_ELT(dimnames, 1); /* In case both elements of the dimnames is NULL, we disregard the name attribute completely in order to conform to base R behavior */ if (rownames == R_NilValue && colnames == R_NilValue) { return; } SEXP ansDimnames = PROTECT(allocVector(VECSXP, 2)); if (nrow_ans == 0 || rownames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 0, R_NilValue); } else { SEXP ansRownames = PROTECT(allocVector(STRSXP, nrow_ans)); R_xlen_t i, j = 0; if (crows == NULL) { for (i = (nrows - nrow_ans); i < nrows; i++) { SEXP eltElement = STRING_ELT(rownames, i); SET_STRING_ELT(ansRownames, j++, eltElement); } } else { R_xlen_t thisIdx; for (i = (nrows - nrow_ans); i < nrows; i++) { thisIdx = crows[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansRownames, j++, NA_STRING); } else { SEXP eltElement = STRING_ELT(rownames, thisIdx); SET_STRING_ELT(ansRownames, j++, eltElement); } } } SET_VECTOR_ELT(ansDimnames, 0, ansRownames); UNPROTECT(1); } if (ncols == 0 || colnames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 1, R_NilValue); } else if (ccols == NULL) { SET_VECTOR_ELT(ansDimnames, 1, colnames); } else { SEXP ansColnames = PROTECT(allocVector(STRSXP, ncols)); R_xlen_t i, thisIdx; for (i = 0; i < ncols; i++) { thisIdx = ccols[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansColnames, i, NA_STRING); } else { SEXP eltElement = STRING_ELT(colnames, thisIdx); SET_STRING_ELT(ansColnames, i, eltElement); } } SET_VECTOR_ELT(ansDimnames, 1, ansColnames); UNPROTECT(1); } dimnamesgets(mat, ansDimnames); UNPROTECT(1); } matrixStats/src/000.templates-gen-vector.h0000644000176200001440000000052214372747711020114 0ustar liggesusers#include "000.macros.h" #include METHOD_TEMPLATE_H #define IDXS_TYPE 'i' #include METHOD_TEMPLATE_H #undef IDXS_TYPE #define IDXS_TYPE 'r' #include METHOD_TEMPLATE_H #undef IDXS_TYPE RETURN_TYPE (*METHOD_NAME[3])(ARGUMENTS_LIST) = { METHOD_NAME_aidxs, METHOD_NAME_iidxs, METHOD_NAME_didxs }; #include "000.templates-types_undef.h" matrixStats/src/productExpSumLog_lowlevel.h0000644000176200001440000000112714436403546020744 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): double productExpSumLog_int(int *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int hasna) double productExpSumLog_dbl(bouble *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int hasna) */ #define X_TYPE 'i' #include "productExpSumLog_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "productExpSumLog_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/mean2.c0000644000176200001440000000273414531477242014551 0ustar liggesusers/*************************************************************************** Public methods: SEXP mean2(SEXP x, SEXP idxs, SEXP naRm, SEXP refine) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "mean2_lowlevel.h" SEXP mean2(SEXP x, SEXP idxs, SEXP naRm, SEXP refine) { SEXP ans; R_xlen_t nx; int narm, refine2; double avg = NA_REAL; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL | R_TYPE_LGL), "x"); nx = xlength(x); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'refine': */ refine2 = asLogicalNoNA(refine, "refine"); /* Argument 'idxs': */ R_xlen_t nidxs; int idxsHasNA; R_xlen_t *cidxs = validateIndicesCheckNA(idxs, nx, 1, &nidxs, &idxsHasNA); /* Double matrices are more common to use. */ if (isReal(x)) { avg = mean2_dbl(REAL(x), nx, cidxs, nidxs, idxsHasNA, narm, refine2); } else if (isInteger(x) || isLogical(x)) { avg = mean2_int(INTEGER(x), nx, cidxs, nidxs, idxsHasNA, narm, refine2); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = avg; UNPROTECT(1); return(ans); } // mean2() /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/colOrderStats_lowlevel.h0000644000176200001440000000123314406445217020244 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void colOrderStats_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, R_xlen_t qq, int *ans) void colOrderStats_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, R_xlen_t qq, int *ans) */ #define X_TYPE 'i' #include "colOrderStats_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "colOrderStats_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowMedians_lowlevel.h0000644000176200001440000000135314436403546017571 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowMedians_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int hasna, int byrow, double *ans) void rowMedians_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int hasna, int byrow, double *ans) */ #define X_TYPE 'i' #include "rowMedians_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowMedians_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/diff2_lowlevel.h0000644000176200001440000000122214436403546016446 0ustar liggesusers#include #include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void diff2_int(int *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA R_xlen_t lag, R_xlen_t differences, int *ans, R_xlen_t nans) void diff2_dbl(double *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA R_xlen_t lag, R_xlen_t differences, int *ans, R_xlen_t nans) */ #define X_TYPE 'i' #include "diff2_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "diff2_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowCumprods_lowlevel_template.h0000644000176200001440000001541514436403546021704 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowCumprods_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, ANS_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowCumprods, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, ANS_C_TYPE *ans) { R_xlen_t ii, jj, kk, kk_prev, idx; R_xlen_t colBegin; X_C_TYPE xvalue; LDOUBLE value; int nocols, norows; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } #if ANS_TYPE == 'i' double R_INT_MIN_d = (double)R_INT_MIN, R_INT_MAX_d = (double)R_INT_MAX; /* OK, i.e. no integer overflow yet? */ int warn = 0, ok, *oks = NULL; #endif if (ncols == 0 || nrows == 0) return; if (byrow) { #if ANS_TYPE == 'i' oks = (int *) R_alloc(nrows, sizeof(int)); #endif if (nocols) { colBegin = 0; } else { R_xlen_t colsFirstElement = cols[0]; if (!colsHasNA || colsFirstElement != NA_R_XLEN_T) { colBegin = colsFirstElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (kk=0; kk < nrows; kk++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + kk; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[kk]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[kk]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } ans[kk] = (ANS_C_TYPE) xvalue; #if ANS_TYPE == 'i' oks[kk] = !X_ISNA(xvalue); #endif } kk_prev = 0; for (jj=1; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } #if ANS_TYPE == 'i' if (oks[ii]) { if (X_ISNA(xvalue)) { oks[ii] = 0; ans[kk] = ANS_NA; } else { value = (LDOUBLE) ans[kk_prev] * (LDOUBLE) xvalue; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { oks[ii] = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else ans[kk] = (ANS_C_TYPE) ((LDOUBLE) ans[kk_prev] * (LDOUBLE) xvalue); #endif kk++; kk_prev++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } else { kk = 0; for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } value = 1; #if ANS_TYPE == 'i' ok = 1; #endif for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } #if ANS_TYPE == 'i' if (ok) { if (X_ISNA(xvalue)) { ok = 0; ans[kk] = ANS_NA; } else { value *= (LDOUBLE) xvalue; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { ok = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else value *= xvalue; ans[kk] = (ANS_C_TYPE) value; #endif kk++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } /* if (byrow) */ #if ANS_TYPE == 'i' /* Warn on integer overflow? */ if (warn) { warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX); } #endif } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars_TYPE-template.h. **************************************************************************/ matrixStats/src/000.templates-types_undef.h0000644000176200001440000000013314372747711020366 0ustar liggesusers#undef METHOD_NAME #undef X_TYPE #undef Y_TYPE #undef ANS_TYPE #undef MARGIN #undef OP matrixStats/src/rowCummins.c0000644000176200001440000000464014531477242015710 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowCummins(SEXP x, ...) SEXP colCummins(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowCummins_lowlevel.h" #include "naming.h" SEXP rowCummins(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames) { int byrow, usenames; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowCummins_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, byrow, REAL(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } else if (isInteger(x) | isLogical(x)) { PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowCummins_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, byrow, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } /* rowCummins() */ #undef COMP #undef METHOD /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars.c. **************************************************************************/ matrixStats/src/rowRanksWithTies_lowlevel.h0000644000176200001440000001567414436403546020763 0ustar liggesusers#include #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowRanksWithTies_Min_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Min_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Max_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Max_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Min_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Average_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Min_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Average_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) */ /***************************************************************** * ties.method = "average" *****************************************************************/ #define TIESMETHOD 'a' /* average */ #define METHOD rowRanksWithTies_Average #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Average #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "first" *****************************************************************/ #define TIESMETHOD 'f' /* first */ #define METHOD rowRanksWithTies_First #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_First #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "last" *****************************************************************/ #define TIESMETHOD 'l' /* last */ #define METHOD rowRanksWithTies_Last #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Last #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "random" *****************************************************************/ #define TIESMETHOD 'r' /* random */ #define METHOD rowRanksWithTies_Random #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Random #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "min" *****************************************************************/ #define TIESMETHOD '0' /* min */ #define METHOD rowRanksWithTies_Min #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Min #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "max" *****************************************************************/ #define TIESMETHOD '1' /* max */ #define METHOD rowRanksWithTies_Max #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Max #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "dense" *****************************************************************/ #define TIESMETHOD 'd' /* dense */ #define METHOD rowRanksWithTies_Dense #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Dense #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD matrixStats/src/colOrderStats_lowlevel_template.h0000644000176200001440000000501714406445217022143 0ustar liggesusers/*********************************************************************** TEMPLATE: void colOrderStats_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, R_xlen_t qq, X_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Adopted from ditto for rows. Copyright: Henrik Bengtsson, 2007-2014 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(colOrderStats, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, R_xlen_t qq, X_C_TYPE *ans) { R_xlen_t ii, jj; R_xlen_t offset; X_C_TYPE *values; // Check missing rows if (rows != NULL) { for (ii=0; ii < nrows; ++ii) { if (rows[ii] == NA_R_XLEN_T) break; } if (ii < nrows && ncols > 0) { error("Argument 'rows' must not contain missing value"); } } // Check missing cols if (cols != NULL) { for (jj=0; jj < ncols; ++jj) { if (cols[jj] == NA_R_XLEN_T) break; } if (jj < ncols && nrows > 0) { error("Argument 'cols' must not contain missing value"); } } /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(nrows, sizeof(X_C_TYPE)); for (jj=0; jj < ncols; jj++) { offset = ((cols == NULL) ? (jj) : cols[jj]) * nrow; for (ii=0; ii < nrows; ii++) values[ii] = x[((rows == NULL) ? (ii) : rows[ii]) + offset]; /* Sort vector of length 'nrows' up to position 'qq'. "...partial sorting: they permute x so that x[qq] is in the correct place with smaller values to the left, larger ones to the right." */ X_PSORT(values, nrows, qq); ans[jj] = values[qq]; } } /*************************************************************************** HISTORY: 2015-07-08 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created from rowOrderStats() ditto. **************************************************************************/ matrixStats/src/productExpSumLog.c0000644000176200001440000000277514531477242017040 0ustar liggesusers/*************************************************************************** Public methods: SEXP productExpSumLog(SEXP x, SEXP idxs, SEXP naRm, SEXP hasNA) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "productExpSumLog_lowlevel.h" SEXP productExpSumLog(SEXP x, SEXP idxs, SEXP naRm, SEXP hasNA) { SEXP ans = NILSXP; R_xlen_t nx; double res = NA_REAL; int narm, hasna; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'idxs': */ R_xlen_t nidxs; int idxsHasNA; R_xlen_t *cidxs = validateIndicesCheckNA(idxs, nx, 1, &nidxs, &idxsHasNA); /* Double matrices are more common to use. */ if (isReal(x)) { res = productExpSumLog_dbl(REAL(x), nx, cidxs, nidxs, idxsHasNA, narm, hasna); } else if (isInteger(x)) { res = productExpSumLog_int(INTEGER(x), nx, cidxs, nidxs, idxsHasNA, narm, hasna); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = res; UNPROTECT(1); return(ans); } // productExpSumLog() /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/rowLogSumExp_lowlevel.h0000644000176200001440000000057414436403546020100 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" /* Native API (dynamically generated via macros): void rowLogSumExps_double(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int hasna, R_xlen_t byrow, double *ans) */ #include "rowLogSumExp_lowlevel_template.h" matrixStats/src/rowDiffs_lowlevel.h0000644000176200001440000000150714436403546017245 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowDiffs_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, R_xlen_t lag, R_xlen_t differences, int *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans) void rowDiffs_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, R_xlen_t lag, R_xlen_t differences, int *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans) */ #define X_TYPE 'i' #include "rowDiffs_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowDiffs_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowVars_lowlevel.h0000644000176200001440000000136714436403546017131 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowVars_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int refine, int hasna, int byrow, double *ans) void rowVars_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int refine, int hasna, int byrow, double *ans) */ #define X_TYPE 'i' #include "rowVars_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowVars_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowVars.c0000644000176200001440000000551514531477242015212 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowVars(SEXP x, SEXP naRm, SEXP hasNA) SEXP colVars(SEXP x, SEXP naRm, SEXP hasNA) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowVars_lowlevel.h" #include "naming.h" SEXP rowVars(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP refine2, SEXP hasNA, SEXP byRow, SEXP useNames) { int narm, hasna, refine, byrow, usenames; SEXP ans; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'refine': */ refine = asLogicalNoNA(refine2, "refine"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); if (!byrow) { SWAP(R_xlen_t, nrow, ncol); SWAP(R_xlen_t*, crows, ccols); SWAP(R_xlen_t, nrows, ncols); SWAP(int, rowsHasNA, colsHasNA); } /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowVars_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, narm, refine, hasna, byrow, REAL(ans)); } else if (isInteger(x)) { rowVars_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, narm, FALSE, hasna, byrow, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(2); return(ans); } /* rowVars() */ /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-11-18 [HB] o Created from rowMads.c. **************************************************************************/ matrixStats/src/rowCummaxs.c0000644000176200001440000000462214531477242015712 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowCummaxs(SEXP x, ...) SEXP colCummaxs(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowCummaxs_lowlevel.h" #include "naming.h" SEXP rowCummaxs(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames) { int byrow, usenames; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowCummaxs_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, byrow, REAL(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } else if (isInteger(x) | isLogical(x)) { PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowCummaxs_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, byrow, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } /* rowCummaxs() */ #undef COMP /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars.c. **************************************************************************/ matrixStats/src/signTabulate_lowlevel_template.h0000644000176200001440000000345214436403546022000 0ustar liggesusers/*********************************************************************** TEMPLATE: void signTabulate_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, double *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(signTabulate, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, double *ans) { X_C_TYPE xi; R_xlen_t ii; R_xlen_t nNeg = 0, nZero = 0, nPos = 0, nNA=0; #if X_TYPE == 'r' R_xlen_t nPosInf=0, nNegInf=0; #endif for (ii = 0; ii < nidxs; ii++) { xi = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA, idxsHasNA); if (X_ISNAN(xi)) { nNA++; } else if (xi > 0) { nPos++; #if X_TYPE == 'r' if (xi == R_PosInf) nPosInf++; #endif } else if (xi < 0) { nNeg++; #if X_TYPE == 'r' if (xi == R_NegInf) nNegInf++; #endif } else if (xi == 0) { nZero++; } } ans[0] = nNeg; ans[1] = nZero; ans[2] = nPos; ans[3] = nNA; #if X_TYPE == 'r' ans[4] = nNegInf; ans[5] = nPosInf; #endif } /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/x_OP_y_lowlevel_template.h0000644000176200001440000004576414436403546020567 0ustar liggesusers#include "000.types.h" #include "000.templates-types.h" #define X_C_Y_C_SIGNATURE CONCAT_MACROS(X_C_SIGNATURE, Y_C_SIGNATURE) #define METHOD_NAME CONCAT_MACROS(METHOD, X_C_Y_C_SIGNATURE) #if OP == '+' #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x + (double)y; } #define FUN_narm CONCAT_MACROS(FUN, METHOD_NAME) static R_INLINE double FUN_narm(X_C_TYPE x, Y_C_TYPE y) { if (X_ISNAN(x)) { return (double)y; } else if (Y_ISNAN(y)) { return (double)x; } else { return (double)x + (double)y; } } #elif OP == '-' #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x - (double)y; } #define FUN_narm FUN_no_NA #elif OP == '*' #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x * (double)y; } #define FUN_narm CONCAT_MACROS(FUN, METHOD_NAME) static R_INLINE double FUN_narm(X_C_TYPE x, Y_C_TYPE y) { if (X_ISNAN(x)) { return (double)y; } else if (Y_ISNAN(y)) { return (double)x; } else { return (double)x * (double)y; } } #elif OP == '/' #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x / (double)y; } #define FUN_narm FUN_no_NA #else #error "INTERNAL ERROR: Failed to set C inline function FUN(x, y): Unknown OP" #endif void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, Y_C_TYPE *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, ANS_C_TYPE *ans, R_xlen_t n) { R_xlen_t ii, jj, kk, idx, colBegin; R_xlen_t txi, yi; X_C_TYPE xvalue; Y_C_TYPE yvalue; int noxcols, noxrows, noyidxs; double value; if (xcols == NULL) { noxcols = 1; } else { noxcols = 0; } if (xrows == NULL) { noxrows = 1; } else { noxrows = 0; } if (yidxs == NULL) { noyidxs = 1; } else { noyidxs = 0; } #if ANS_TYPE == 'i' int ok = 1; /* OK, i.e. no integer overflow yet? */ double R_INT_MIN_d = (double)R_INT_MIN, R_INT_MAX_d = (double)R_INT_MAX; #endif yi = 0; kk = 0; if (byrow) { if (commute) { if (narm) { for (jj=0; jj < nxcols; ++jj) { if (noxcols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = xcols[jj]; if (!xcolsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } txi = jj; for (ii=0; ii < nxrows; ++ii) { if (noxrows) { if (!xcolsHasNA || colBegin != NA_R_XLEN_T) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!xrowsHasNA && !xcolsHasNA) { idx = colBegin + xrows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (xrows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (noyidxs) { idx = txi%nyidxs; yvalue = y[idx]; } else { idx = yidxs[txi%nyidxs]; if (!yidxsHasNA) { yvalue = y[idx]; } else { yvalue = R_INDEX_GET(y, idx, Y_NA, 1); } } value = FUN_narm(yvalue, xvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif txi += nxcols; /* txi = ii * nxcols + jj; */ } } } else { for (jj=0; jj < nxcols; ++jj) { if (noxcols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = xcols[jj]; if (!xcolsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } txi = jj; for (ii=0; ii < nxrows; ++ii) { if (noxrows) { if (!xcolsHasNA || colBegin != NA_R_XLEN_T) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!xrowsHasNA && !xcolsHasNA) { idx = colBegin + xrows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (xrows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (noyidxs) { idx = txi%nyidxs; yvalue = y[idx]; } else { idx = yidxs[txi%nyidxs]; if (!yidxsHasNA) { yvalue = y[idx]; } else { yvalue = R_INDEX_GET(y, idx, Y_NA, 1); } } value = FUN_no_NA(yvalue, xvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif txi += nxcols; /* txi = ii * nxcols + jj; */ } } } } else { if (narm) { for (jj=0; jj < nxcols; ++jj) { if (noxcols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = xcols[jj]; if (!xcolsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } txi = jj; for (ii=0; ii < nxrows; ++ii) { if (noxrows) { if (!xcolsHasNA || colBegin != NA_R_XLEN_T) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!xrowsHasNA && !xcolsHasNA) { idx = colBegin + xrows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (xrows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (noyidxs) { idx = txi%nyidxs; yvalue = y[idx]; } else { idx = yidxs[txi%nyidxs]; if (!yidxsHasNA) { yvalue = y[idx]; } else { yvalue = R_INDEX_GET(y, idx, Y_NA, 1); } } value = FUN_narm(xvalue, yvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif txi += nxcols; /* txi = ii * nxcols + jj; */ } } } else { for (jj=0; jj < nxcols; ++jj) { if (noxcols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = xcols[jj]; if (!xcolsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } txi = jj; for (ii=0; ii < nxrows; ++ii) { if (noxrows) { if (!xcolsHasNA || colBegin != NA_R_XLEN_T) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!xrowsHasNA && !xcolsHasNA) { idx = colBegin + xrows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (xrows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (noyidxs) { idx = txi%nyidxs; yvalue = y[idx]; } else { idx = yidxs[txi%nyidxs]; if (!yidxsHasNA) { yvalue = y[idx]; } else { yvalue = R_INDEX_GET(y, idx, Y_NA, 1); } } value = FUN_no_NA(xvalue, yvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif txi += nxcols; /* txi = ii * nxcols + jj; */ } } } } } else { /* if (byrow) */ if (commute) { if (narm) { for (jj=0; jj < nxcols; ++jj) { if (noxcols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = xcols[jj]; if (!xcolsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nxrows; ++ii) { if (noxrows) { if (!xcolsHasNA || colBegin != NA_R_XLEN_T) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!xrowsHasNA && !xcolsHasNA) { idx = colBegin + xrows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (xrows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (noyidxs) { idx = yi; yvalue = y[idx]; } else if (!yidxsHasNA) { idx = yidxs[yi]; yvalue = y[idx]; } else { idx = yidxs[yi]; yvalue = R_INDEX_GET(y, idx, Y_NA, 1); } value = FUN_narm(yvalue, xvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif if (++ yi >= nyidxs) yi = 0; } } } else { for (jj=0; jj < nxcols; ++jj) { if (noxcols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = xcols[jj]; if (!xcolsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nxrows; ++ii) { if (noxrows) { if (!xcolsHasNA || colBegin != NA_R_XLEN_T) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!xrowsHasNA && !xcolsHasNA) { idx = colBegin + xrows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (xrows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (noyidxs) { idx = yi; yvalue = y[idx]; } else if (!yidxsHasNA) { idx = yidxs[yi]; yvalue = y[idx]; } else { idx = yidxs[yi]; yvalue = R_INDEX_GET(y, idx, Y_NA, 1); } value = FUN_no_NA(yvalue, xvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif if (++ yi >= nyidxs) yi = 0; } } } } else { if (narm) { for (jj=0; jj < nxcols; ++jj) { if (noxcols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = xcols[jj]; if (!xcolsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nxrows; ++ii) { if (noxrows) { if (!xcolsHasNA || colBegin != NA_R_XLEN_T) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!xrowsHasNA && !xcolsHasNA) { idx = colBegin + xrows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (xrows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (noyidxs) { idx = yi; yvalue = y[idx]; } else if (!yidxsHasNA) { idx = yidxs[yi]; yvalue = y[idx]; } else { idx = yidxs[yi]; yvalue = R_INDEX_GET(y, idx, Y_NA, 1); } value = FUN_narm(xvalue, yvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif if (++ yi >= nyidxs) yi = 0; } } } else { for (jj=0; jj < nxcols; ++jj) { if (noxcols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = xcols[jj]; if (!xcolsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nxrows; ++ii) { if (noxrows) { if (!xcolsHasNA || colBegin != NA_R_XLEN_T) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!xrowsHasNA && !xcolsHasNA) { idx = colBegin + xrows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (xrows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (noyidxs) { idx = yi; yvalue = y[idx]; } else if (!yidxsHasNA) { idx = yidxs[yi]; yvalue = y[idx]; } else { idx = yidxs[yi]; yvalue = R_INDEX_GET(y, idx, Y_NA, 1); } value = FUN_no_NA(xvalue, yvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif if (++ yi >= nyidxs) yi = 0; } } } } } /* if (byrow) */ #if ANS_TYPE == 'i' /* Warn on integer overflow? */ if (!ok) { warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX); } #endif } #undef FUN #undef FUN_narm matrixStats/src/indexByRow.c0000644000176200001440000000525114531726565015643 0ustar liggesusers/*************************************************************************** Public methods: SEXP indexByRow(SEXP dim, SEXP idxs) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" void indexByRow_i(int nrow, int ncol, int *idxs_ptr, R_xlen_t nidxs, int *ans_ptr) { R_xlen_t i, idx, n_max; int col, row; if (idxs_ptr == NULL) { row = 1; col = 0; for (i = 0; i < nidxs; i++) { ans_ptr[i] = row + col * nrow; col++; if (col == ncol) { row++; col = 0; } } } else { n_max = (R_xlen_t)nrow * (R_xlen_t)ncol; for (i = 0; i < nidxs; i++) { idx = idxs_ptr[i] - 1; if (idx < 0) { error("Argument 'idxs' may only contain positive indices: %lld", (long long int)(idx + 1)); } if (idx >= n_max) { error("Argument 'idxs' contains indices larger than %lld: %lld", (long long int)n_max, (long long int)(idx + 1)); } col = idx / ncol; row = idx % ncol; idx = col + nrow * row + 1; ans_ptr[i] = idx; } } } // indexByRow_i() SEXP indexByRow(SEXP dim, SEXP idxs) { SEXP ans; int d, i; R_xlen_t nidxs; double n_max; int *idxs_ptr; /* Argument 'dim': */ if (!isInteger(dim) || xlength(dim) != 2) { error("Argument 'dim' must be an integer vector of length two."); } n_max = 1.0; for (i = 0; i < xlength(dim); i++) { d = INTEGER(dim)[i]; if (d < 0) { error("Argument 'dim' specifies a negative value: %d", d); } n_max *= d; #ifndef LONG_VECTOR_SUPPORT if (n_max > R_INT_MAX) { error("Argument 'dim' (%d,%d) specifies a matrix that has more than 2^31-1 elements: %.0f", INTEGER(dim)[0], INTEGER(dim)[1], n_max); } #endif if (n_max > R_INT_MAX) { error("Argument 'dim' (%d,%d) specifies a matrix that has more than 2^31-1 elements: %.0f", INTEGER(dim)[0], INTEGER(dim)[1], n_max); } } /* Argument 'idxs': */ if (isNull(idxs)) { idxs_ptr = NULL; nidxs = (R_xlen_t)n_max; } else if (isVectorAtomic(idxs)) { idxs_ptr = INTEGER(idxs); nidxs = xlength(idxs); } else { /* To please compiler */ idxs_ptr = NULL; nidxs = 0; error("Argument 'idxs' must be NULL or a vector."); } PROTECT(ans = allocVector(INTSXP, nidxs)); indexByRow_i(INTEGER(dim)[0], INTEGER(dim)[1], idxs_ptr, nidxs, INTEGER(ans)); UNPROTECT(1); return(ans); } // indexByRow() /*************************************************************************** HISTORY: 2014-11-09 [HB] o Created. **************************************************************************/ matrixStats/src/rowOrderStats_lowlevel_template.h0000644000176200001440000000672314406445217022202 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowOrderStats_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, R_xlen_t qq, X_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Adopted from rowQ() by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2014 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(rowOrderStats, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, R_xlen_t qq, X_C_TYPE *ans) { R_xlen_t ii, jj; R_xlen_t *colOffset, rowIdx; X_C_TYPE *values; // Check missing rows if (rows != NULL) { for (ii=0; ii < nrows; ++ii) { if (rows[ii] == NA_R_XLEN_T) break; } if (ii < nrows && ncols > 0) { error("Argument 'rows' must not contain missing value"); } } // Check missing cols if (cols != NULL) { for (jj=0; jj < ncols; ++jj) { if (cols[jj] == NA_R_XLEN_T) break; } if (jj < ncols && nrows > 0) { error("Argument 'cols' must not contain missing value"); } } /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncols, sizeof(X_C_TYPE)); /* Pre-calculate the column offsets */ if (cols == NULL) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj] * nrow; } for (ii=0; ii < nrows; ii++) { rowIdx = ((rows == NULL) ? (ii) : rows[ii]); for (jj=0; jj < ncols; jj++) values[jj] = x[rowIdx + ((colOffset == NULL) ? (jj*nrow) : colOffset[jj])]; /* Sort vector of length 'ncol' up to position 'qq'. "...partial sorting: they permute x so that x[qq] is in the correct place with smaller values to the left, larger ones to the right." */ X_PSORT(values, ncols, qq); ans[ii] = values[qq]; } } /*************************************************************************** HISTORY: 2015-07-08 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2013-01-13 [HB] o Merged rowOrderStatsReal() and rowOrderStatsInteger() into one rowOrderStats_() templated function. 2009-02-04 [HB] o BUG FIX: For some errors in rowOrderStats(), the stack would not become UNPROTECTED before calling error. 2008-03-25 [HB] o Renamed from 'rowQuantiles' to 'rowOrderStats'. 2007-08-10 [HB] o Removed arguments for NAs since rowOrderStats() still don't support it. 2005-11-24 [HB] o Cool, it works and compiles nicely. o Preallocate colOffset to speed up things even more. o Added more comments and error checking. o Adopted from rowQ() in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/binCounts.c0000644000176200001440000000346414531726565015521 0ustar liggesusers/*************************************************************************** Public methods: binCounts(SEXP x, SEXP bx, SEXP right) Copyright Henrik Bengtsson, 2012-2013 **************************************************************************/ #include #include "000.types.h" #include #include "binCounts_lowlevel.h" SEXP binCounts(SEXP x, SEXP bx, SEXP right) { SEXP counts = NILSXP; R_xlen_t nbins; int closedRight; /* Argument 'x': */ assertArgVector(x, (R_TYPE_REAL), "x"); /* Argument 'bx': */ assertArgVector(bx, (R_TYPE_REAL), "bx"); nbins = xlength(bx)-1; if (nbins <= 0) { error("Argument 'bx' must specify at least two bin boundaries (= one bin): %lld", (long long int)xlength(bx)); } /* Argument 'right': */ closedRight = asLogicalNoNA(right, "right"); PROTECT(counts = allocVector(INTSXP, nbins)); if (closedRight) { binCounts_R(REAL(x), xlength(x), REAL(bx), nbins, INTEGER(counts)); } else { binCounts_L(REAL(x), xlength(x), REAL(bx), nbins, INTEGER(counts)); } UNPROTECT(1); return(counts); } // binCounts() /*************************************************************************** HISTORY: 2015-05-30 [HB] o Added protected against 'bx' too short. 2014-06-03 [HB] o Dropped unused variable 'count'. 2013-10-08 [HB] o Now binCounts() calls binCounts_(). 2013-05-10 [HB] o SPEEDUP: binCounts() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binCounts() would return random/garbage counts for bins that were beyond the last data point. o BUG FIX: In some cases binCounts() could try to go past the last bin. 2012-10-03 [HB] o Created. **************************************************************************/ matrixStats/src/binMeans.c0000644000176200001440000000543614531726565015312 0ustar liggesusers/*************************************************************************** Public methods: binMeans(SEXP y, SEXP x, SEXP bx, SEXP retCount, SEXP right) Copyright Henrik Bengtsson, 2012-2013 **************************************************************************/ #include #include "000.types.h" #include #include "binMeans_lowlevel.h" SEXP binMeans(SEXP y, SEXP x, SEXP bx, SEXP retCount, SEXP right) { SEXP ans = NILSXP, count = NILSXP; R_xlen_t nx, ny, nbins; int closedRight, retcount; int *count_ptr = NULL; /* Argument 'y': */ assertArgVector(y, (R_TYPE_REAL), "y"); ny = xlength(y); /* Argument 'x': */ assertArgVector(x, (R_TYPE_REAL), "x"); nx = xlength(x); if (nx != ny) { error("Argument 'y' and 'x' are of different lengths: %lld != %lld", (long long int)ny, (long long int)nx); } /* Argument 'bx': */ assertArgVector(bx, (R_TYPE_REAL), "bx"); nbins = xlength(bx)-1; if (nbins <= 0) { error("Argument 'bx' must specify at least two bin boundaries (= one bin): %lld", (long long int)xlength(bx)); } /* Argument 'right': */ closedRight = asLogicalNoNA(right, "right"); /* Argument 'retCount': */ retcount = asLogicalNoNA(retCount, "retCount"); PROTECT(ans = allocVector(REALSXP, nbins)); if (retcount) { PROTECT(count = allocVector(INTSXP, nbins)); count_ptr = INTEGER(count); } if (closedRight) { binMeans_R(REAL(y), ny, REAL(x), nx, REAL(bx), nbins, REAL(ans), count_ptr); } else { binMeans_L(REAL(y), ny, REAL(x), nx, REAL(bx), nbins, REAL(ans), count_ptr); } if (retcount) { setAttrib(ans, install("count"), count); UNPROTECT(1); // 'count' } UNPROTECT(1); // 'ans' return ans; return(ans); } // binMeans() /*************************************************************************** HISTORY: 2015-05-30 [HB] o Added protected against 'bx' too short. 2014-10-06 [HB] o CLEANUP: All argument validation is now done by the high-level C API. 2014-06-02 [HB] o CLEANUP: Removed unused variable in binMeans(). 2013-10-08 [HB] o Now binCounts() calls binCounts_(). 2013-05-10 [HB] o SPEEDUP: binMeans() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binMeans() would return random/garbage means/counts for bins that were beyond the last data point. o BUG FIX: In some cases binMeans() could try to go past the last bin. 2012-10-03 [HB] o Created binMeans(), which was adopted from from code proposed by Martin Morgan (Fred Hutchinson Cancer Research Center, Seattle) as a reply to HB's R-devel thread 'Fastest non-overlapping binning mean function out there?' on Oct 3, 2012. **************************************************************************/ matrixStats/src/rowLogSumExp_lowlevel_template.h0000644000176200001440000000416714436403546021775 0ustar liggesusers/*********************************************************************** TEMPLATE: double rowLogSumExp_double(ARGUMENTS_LIST) ARGUMENTS_LIST: double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int narm, int hasna, R_xlen_t byrow, double *ans ***********************************************************************/ #include "000.types.h" #include "000.templates-types.h" /* extern 1-D function 'logSumExp' */ extern double logSumExp_double(double *x, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int hasna, R_xlen_t by, double *xx); void rowLogSumExps_double(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int hasna, R_xlen_t byrow, double *ans) { R_xlen_t ii, idx; double navalue; // double (*logsumexp)(double *x, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna, R_xlen_t by, double *xx); if (byrow) { /* R allocate memory for row-vector 'xx' of length 'ncol'. This will be taken care of by the R garbage collector later on. */ double *xx = (double *) R_alloc(ncols, sizeof(double)); navalue = (narm || ncols == 0) ? R_NegInf : NA_REAL; for (ii=0; ii < nrows; ++ii) { idx = ((rows == NULL) ? (ii) : rows[ii]); if (idx == NA_R_XLEN_T) { ans[ii] = navalue; } else { ans[ii] = logSumExp_double(x+idx, cols, ncols, colsHasNA, narm, hasna, nrow, xx); } } } else { navalue = (narm || nrows == 0) ? R_NegInf : NA_REAL; for (ii=0; ii < ncols; ++ii) { idx = R_INDEX_OP(((cols == NULL) ? (ii) : cols[ii]), *, nrow, colsHasNA, 0); if (idx == NA_R_XLEN_T) { ans[ii] = navalue; } else { ans[ii] = logSumExp_double(x+idx, rows, nrows, rowsHasNA, narm, hasna, 0, NULL); } } } /* if (byrow) */ } /*************************************************************************** HISTORY: 2013-06-12 [DH] o Created. **************************************************************************/ matrixStats/src/rowCounts_lowlevel.h0000644000176200001440000000204414436403546017462 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowCounts_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int value, int what, int narm, int hasna, int *ans) void rowCounts_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int value, int what, int narm, int hasna, int *ans) void rowCounts_lgl(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int value, int what, int narm, int hasna, int *ans) */ #define X_TYPE 'i' #include "rowCounts_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowCounts_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'l' #include "rowCounts_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowMeans2_lowlevel_template.h0000644000176200001440000001112714436403546021231 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowMeans2_(ARGUMENTS_LIST) Copyright: Henrik Bengtsson, 2017 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowMeans2, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int refine, int hasna, int byrow, double *ans) { R_xlen_t ii, jj, idx; R_xlen_t *colOffset; X_C_TYPE value; LDOUBLE sum, avg; R_xlen_t count; int nocols, norows; /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } /* Pre-calculate the column offsets */ if (nocols) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); if (byrow) { for (jj=0; jj < ncols; jj++) if(!rowsHasNA && !colsHasNA){ colOffset[jj] = cols[jj] * nrow; } else{ colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow,1,1); } } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj]; } } for (ii=0; ii < nrows; ii++) { R_xlen_t rowIdx; if (norows) { /* ii and ncols cannot be NA-values, so we do not need R_INDEX_OP */ rowIdx = byrow ? ii : ii*ncol; } else { if(!rowsHasNA && !colsHasNA) { rowIdx = byrow ? rows[ii] : rows[ii] * ncol; } rowIdx = byrow ? rows[ii] : R_INDEX_OP(rows[ii], *, ncol, 1, 1); } sum = 0.0; count = 0; for (jj=0; jj < ncols; jj++) { if (!rowsHasNA && nocols) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ if (byrow) idx = rowIdx + jj*nrow; else idx = rowIdx + jj; value = x[idx]; } else if (!rowsHasNA && !colsHasNA && !nocols) { idx = rowIdx + colOffset[jj]; value = x[idx]; } else { if (nocols) { if (byrow) idx = R_INDEX_OP(rowIdx, +, jj*nrow, 1, 1); else idx = R_INDEX_OP(rowIdx, +, jj, 1, 1); } else { idx = R_INDEX_OP(rowIdx, +, colOffset[jj], 1, 1); } value = R_INDEX_GET(x, idx, X_NA,1); } #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; ++count; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm) { sum += (LDOUBLE)value; ++count; if (jj % 1048576 == 0 && ISNA(sum)) break; } else if (!ISNAN(value)) { sum += (LDOUBLE)value; ++count; } #endif } /* for (jj ...) */ if (sum > DBL_MAX) { avg = R_PosInf; } else if (sum < -DBL_MAX) { avg = R_NegInf; } else { avg = sum / count; #if X_TYPE == 'r' if (refine) { sum = 0.0; for (jj=0; jj < ncols; jj++) { if (!rowsHasNA && nocols) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ if (byrow) idx = rowIdx + jj*nrow; else idx = rowIdx + jj; value = x[idx]; } else if (!rowsHasNA && !colsHasNA && !nocols) { idx = rowIdx + colOffset[jj]; value = x[idx]; } else { if (nocols) { if (byrow) idx = R_INDEX_OP(rowIdx, +, jj*nrow, 1, 1); else idx = R_INDEX_OP(rowIdx, +, jj, 1, 1); } else { idx = R_INDEX_OP(rowIdx, +, colOffset[jj], 1, 1); } value = R_INDEX_GET(x, idx, X_NA,1); } if (!narm) { sum += (LDOUBLE)(value) - avg; if (jj % 1048576 == 0 && ISNA(sum)) break; } else if (!ISNAN(value)) { sum += (LDOUBLE)(value) - avg; } } avg = avg + sum / count; } /* for (jj ...) */ #endif } ans[ii] = (double)avg; R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } matrixStats/src/anyMissing_lowlevel_template.h0000644000176200001440000000572614436403546021505 0ustar liggesusers/*********************************************************************** TEMPLATE: int anyMissing_internal(ARGUMENTS_LIST) ARGUMENTS_LIST: SEXP x, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA ***********************************************************************/ #include #include "000.types.h" #include "000.templates-types.h" #ifndef CHECK_MISSING #define CHECK_MISSING(cond) \ for (ii=0; ii < nidxs; ++ii) { \ if (cond) return 1; \ } #endif int anyMissing_internal(SEXP x, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA) { R_xlen_t ii; double *xdp; int *xip, *xlp; Rcomplex *xcp; switch (TYPEOF(x)) { case REALSXP: xdp = REAL(x); if (idxs == NULL) { CHECK_MISSING(ISNAN(xdp[ii])); } else if (!idxsHasNA) { CHECK_MISSING(ISNAN(xdp[idxs[ii]])); } else { CHECK_MISSING(ISNAN(R_INDEX_GET(xdp, idxs[ii], NA_REAL, 1))); } break; case INTSXP: xip = INTEGER(x); if (idxs == NULL) { CHECK_MISSING(xip[ii] == NA_INTEGER); } else if (!idxsHasNA) { CHECK_MISSING(xip[idxs[ii]] == NA_INTEGER); } else { CHECK_MISSING(R_INDEX_GET(xip, idxs[ii], NA_INTEGER, 1) == NA_INTEGER); } break; case LGLSXP: xlp = LOGICAL(x); if (idxs == NULL) { CHECK_MISSING(xlp[ii] == NA_LOGICAL); } else if (!idxsHasNA) { CHECK_MISSING(xlp[idxs[ii]] == NA_LOGICAL); } else { CHECK_MISSING(R_INDEX_GET(xlp, idxs[ii], NA_LOGICAL, 1) == NA_LOGICAL); } break; case CPLXSXP: xcp = COMPLEX(x); if (idxs == NULL) { CHECK_MISSING(ISNAN(xcp[ii].r) || ISNAN(xcp[ii].i)); } else if (!idxsHasNA) { CHECK_MISSING(ISNAN(xcp[idxs[ii]].r) || ISNAN(xcp[idxs[ii]].i)); } else { /* * We exploit the short-circuiting of the logical OR operator such that if idxs[ii] * is NA, then we don't evaluate the latter two parts of the expression * */ CHECK_MISSING(idxs[ii] == NA_R_XLEN_T || ISNAN(xcp[idxs[ii]].r) || ISNAN(xcp[idxs[ii]].i)); } break; case STRSXP: if (idxs == NULL) { CHECK_MISSING(STRING_ELT(x,ii) == NA_STRING); } else if (!idxsHasNA) { CHECK_MISSING(STRING_ELT(x,idxs[ii]) == NA_STRING); } else { CHECK_MISSING(idxs[ii] == NA_R_XLEN_T || STRING_ELT(x, idxs[ii]) == NA_STRING); } break; case RAWSXP: /* no such thing as a raw NA; always FALSE */ break; default: break; } /* switch() */ return 0; } // anyMissing() /*************************************************************************** HISTORY: 2015-07-15 [DJ] o Avoid 'embedding a directive within macro arguments'. 2015-06-15 [DJ] o Created. **************************************************************************/ matrixStats/src/rowSums2_lowlevel.h0000644000176200001440000000134314436403546017221 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowSums2_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int hasna, int byrow, double *ans) void rowSums2_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int hasna, int byrow, double *ans) */ #define X_TYPE 'i' #include "rowSums2_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowSums2_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/binCounts_lowlevel_template.h0000644000176200001440000000640314372747711021325 0ustar liggesusers/*************************************************************************** TEMPLATE: void binCounts_(...) GENERATES: void binCounts_L(double *x, int nx, double *bx, int nbins, int *count) void binCounts_R(double *x, int nx, double *bx, int nbins, int *count) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - BIN_BY: 'L' or 'R' Copyright Henrik Bengtsson, 2012-2014 **************************************************************************/ #include #if BIN_BY == 'L' /* [u,v) */ #define METHOD_NAME binCounts_L #define IS_PART_OF_FIRST_BIN(x, bx0) (x < bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x >= bx1) #elif BIN_BY == 'R' /* (u,v] */ #define METHOD_NAME binCounts_R #define IS_PART_OF_FIRST_BIN(x, bx0) (x <= bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x > bx1) #endif void METHOD_NAME(double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, int *count) { R_xlen_t ii = 0, jj = 0, iStart = 0; int n = 0; int warn = 0; // Count? if (nbins > 0) { // Skip to the first bin while ((iStart < nx) && IS_PART_OF_FIRST_BIN(x[iStart], bx[0])) { ++iStart; } // For each x... for (ii = iStart; ii < nx; ++ii) { // Skip to a new bin? while (IS_PART_OF_NEXT_BIN(x[ii], bx[jj+1])) { count[jj++] = n; // No more bins? if (jj >= nbins) { ii = nx; // Cause outer for-loop to exit break; } n = 0; } /* Although unlikely, with long vectors the count for a bin can become greater than what is possible to represent by an integer. Detect and warn about this. */ if (n == R_INT_MAX) { warn = 1; // No point in keep counting for this bin break; } // Count ++n; } // Update count of the last bin? if (jj < nbins) { count[jj] = n; // Assign the remaining bins to zero counts while (++jj < nbins) { count[jj] = 0; } } } // if (nbins > 0) if (warn) { warning("Integer overflow. Detected one or more bins with a count that is greater than what can be represented by the integer data type. Setting count to the maximum integer possible (.Machine$integer.max = %d). The bin mean is still correct.", R_INT_MAX); } } /* Undo template macros */ #undef BIN_BY #undef IS_PART_OF_FIRST_BIN #undef IS_PART_OF_NEXT_BIN #include "000.templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-07 [HB] o ROBUSTNESS: Added protection for integer overflow in bin counts. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2013-10-08 [HB] o Created template for binCounts_() to create functions that bin either by [u,v) or (u,v]. 2013-05-10 [HB] o SPEEDUP: binCounts() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binCounts() would return random/garbage counts for bins that were beyond the last data point. o BUG FIX: In some cases binCounts() could try to go past the last bin. 2012-10-03 [HB] o Created. **************************************************************************/ matrixStats/src/rowMads_lowlevel_template.h0000644000176200001440000002225514436403546020774 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowMads_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, double scale, int narm, int hasna, int byrow, double *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Adopted from rowQuantiles.c by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2013 ***********************************************************************/ #include #include #include "000.types.h" #include /* abs() and fabs() */ /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowMads, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, double scale, int narm, int hasna, int byrow, double *ans) { int isOdd; R_xlen_t ii, jj, kk, qq, idx; R_xlen_t *colOffset; X_C_TYPE *values, value, mu; double *values_d, value_d, mu_d; int nocols, norows; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncols, sizeof(X_C_TYPE)); values_d = (double *) R_alloc(ncols, sizeof(double)); /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; /* When narm == FALSE, isOdd and qq are the same for all rows */ if (narm == FALSE) { isOdd = (ncols % 2 == 1); qq = (R_xlen_t)(ncols/2) - 1; } else { isOdd = FALSE; qq = 0; } value = 0; /* Pre-calculate the column offsets */ if (nocols) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); if (byrow) { for (jj=0; jj < ncols; jj++) if(!rowsHasNA && !colsHasNA){ colOffset[jj] = cols[jj] * nrow; } else{ colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow,1,1); } } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj]; } } hasna = TRUE; if (hasna == TRUE) { for (ii=0; ii < nrows; ii++) { //HJ R_xlen_t rowIdx; if (norows) { /* ii and ncols cannot be NA-values, so we do not need R_INDEX_OP */ rowIdx = byrow ? ii : ii*ncol; } else { if(!rowsHasNA && !colsHasNA) { rowIdx = byrow ? rows[ii] : rows[ii] * ncol; } rowIdx = byrow ? rows[ii] : R_INDEX_OP(rows[ii], *, ncol,1,1); } kk = 0; /* The index of the last non-NA value detected */ for (jj=0; jj < ncols; jj++) { if (!rowsHasNA && nocols) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ if (byrow) idx = rowIdx + jj*nrow; else idx = rowIdx + jj; value = x[idx]; } else if (!rowsHasNA && !colsHasNA && !nocols) { idx = rowIdx + colOffset[jj]; value = x[idx]; } else { if (nocols) { if (byrow) idx = R_INDEX_OP(rowIdx, +, jj*nrow,1,1); else idx = R_INDEX_OP(rowIdx, +, jj,1,1); } else { idx = R_INDEX_OP(rowIdx, +, colOffset[jj],1,1); } value = R_INDEX_GET(x, idx, X_NA, 1); } if (X_ISNAN(value)) { if (narm == FALSE) { kk = -1; break; } } else { values[kk] = value; kk = kk + 1; } } /* for (jj ...) */ /* Note that 'values' will never contain NA/NaNs */ if (kk == 0) { ans[ii] = NA_REAL; } else if (kk == 1) { ans[ii] = 0; } else if (kk == -1) { ans[ii] = R_NaReal; } else { /* When narm == TRUE, isOdd and qq may change with row */ if (narm == TRUE) { isOdd = (kk % 2 == 1); qq = (R_xlen_t)(kk/2) - 1; } /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); value = values[qq+1]; /* Calculate mu and sigma */ if (isOdd == TRUE) { /* Since there are an odd number of values, then we also know that 'mu' is one of the values in 'x', which in turn mean we don't have to coerce integers to doubles, if 'x' is an integer. Simple benchmarking shows that it significantly faster to avoid coercion. */ mu = value; /* (a) Subtract mu and absolute value, i.e. x <- |x-mu| */ for (jj=0; jj < kk; jj++) { value = (values[jj] - mu); values[jj] = X_ABS(value); } /* (b) Calculate median of |x-mu| */ /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); value = values[qq+1]; ans[ii] = scale * (double)value; } else { /* Here we have to coerce to doubles since 'mu' is an average. */ /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); #if X_TYPE == 'i' /* If the difference between two integers is an even number, then their means is also an integer, and then we can avoid coercion to double also here. This should happen roughly half the time we end up here which is worth optimizing for. Simple benchmarking show a significant difference in speed, particular for the column-based version. */ if ((values[qq] - value) % 2 == 0) { /* No need to coerce */ mu = (values[qq] + value)/2; /* (a) Subtract mu and absolute value, i.e. x <- |x-mu| */ for (jj=0; jj < kk; jj++) { value = (values[jj] - mu); values[jj] = X_ABS(value); } /* (b) Calculate median of |x-mu| */ /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); X_PSORT(values, qq+1, qq); ans[ii] = scale * ((double)values[qq] + (double)values[qq+1])/2; /* Done, continue to next vector */ continue; } #endif mu_d = ((double)values[qq] + (double)value)/2; /* (a) Subtract mu and square, i.e. x <- (x-mu)^2 */ for (jj=0; jj < kk; jj++) { value_d = ((double)values[jj] - mu_d); values_d[jj] = fabs(value_d); } /* (b) Calculate median */ /* Permute x[0:kk-1] so that x[qq-1] and x[qq] are in the correct places with smaller values to the left, ... */ rPsort(values_d, kk, qq+1); rPsort(values_d, qq+1, qq); ans[ii] = scale * (values_d[qq] + values_d[qq+1])/2; } } /* if (kk == 0) */ R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } else { for (ii=0; ii < nrows; ii++) { //HJ R_xlen_t rowIdx; if (norows) { /* ii and ncols cannot be NA-values, so we do not need R_INDEX_OP */ rowIdx = byrow ? ii : ii*ncol; } else { if(!rowsHasNA && !colsHasNA) { rowIdx = byrow ? rows[ii] : rows[ii] * ncol; } rowIdx = byrow ? rows[ii] : R_INDEX_OP(rows[ii], *, ncol,1,1); } for (jj=0; jj < ncols; jj++) { if (!nocols) { if (byrow) idx = rowIdx + jj*nrow; else idx = rowIdx + jj; values[jj] = x[idx]; } else { idx = rowIdx + colOffset[jj]; values[jj] = x[idx]; } } //HJ /* Permute x[0:ncols-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, ncols, qq+1); value = values[qq+1]; if (isOdd == TRUE) { ans[ii] = (double)value; } else { /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); ans[ii] = ((double)values[qq] + value)/2; } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } /* if (hasna ...) */ } /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-11-17 [HB] o Created from rowMedians_TYPE-template.h. **************************************************************************/ matrixStats/src/mean2_lowlevel.h0000644000176200001440000000105514436403546016462 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): double mean2_int(int *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int refine) double mean2_dbl(double *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int refine) */ #define X_TYPE 'i' #include "mean2_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "mean2_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/000.utils.h0000644000176200001440000000777114406445217015216 0ustar liggesusers#include #include "000.types.h" #define R_TYPE_LGL 1 /* 0b0001 */ #define R_TYPE_INT 2 /* 0b0010 */ #define R_TYPE_REAL 4 /* 0b0100 */ static R_INLINE void assertArgVector(SEXP x, int type, char *xlabel) { /* Argument 'x': */ if (!isVectorAtomic(x)) { error("Argument '%s' must be a matrix or a vector.", xlabel); } switch (TYPEOF(x)) { case LGLSXP: if (!(type & R_TYPE_LGL)) error("Argument '%s' cannot be logical.", xlabel); break; case INTSXP: if (!(type & R_TYPE_INT)) error("Argument '%s' cannot be integer.", xlabel); break; case REALSXP: if (!(type & R_TYPE_REAL)) error("Argument '%s' cannot be numeric.", xlabel); break; default: error("Argument '%s' must be of type logical, integer or numeric, not '%s'.", xlabel, type2char(TYPEOF(x))); } /* switch */ } /* assertArgVector() */ static R_INLINE void assertArgDim(SEXP dim, double max, char *maxlabel) { double nrow, ncol; const char *dimlabel = "dim."; /* Argument 'dim': */ if (!isVectorAtomic(dim) || xlength(dim) != 2 || !isInteger(dim)) { error("Argument '%s' must be an integer vector of length two.", dimlabel); } nrow = (double)INTEGER(dim)[0]; ncol = (double)INTEGER(dim)[1]; if (nrow < 0) { error("Argument '%s' specifies a negative number of rows (%s[1]): %g", dimlabel, dimlabel, nrow); } else if (ncol < 0) { error("Argument '%s' specifies a negative number of columns (%s[2]): %g", dimlabel, dimlabel, ncol); } else if (nrow * ncol != max) { error("Argument '%s' does not match length of argument '%s': %g * %g != %g", dimlabel, maxlabel, nrow, ncol, max); } } /* assertArgDim() */ static R_INLINE void assertArgMatrix(SEXP x, SEXP dim, int type, char *xlabel) { /* Argument 'x': */ if (isMatrix(x)) { } else if (isVectorAtomic(x)) { } else { error("Argument '%s' must be a matrix or a vector.", xlabel); } switch (TYPEOF(x)) { case LGLSXP: if (!(type & R_TYPE_LGL)) error("Argument '%s' cannot be logical.", xlabel); break; case INTSXP: if (!(type & R_TYPE_INT)) error("Argument '%s' cannot be integer.", xlabel); break; case REALSXP: if (!(type & R_TYPE_REAL)) error("Argument '%s' cannot be numeric.", xlabel); break; default: error("Argument '%s' must be of type logical, integer or numeric, not '%s'.", xlabel, type2char(TYPEOF(x))); } /* switch */ /* Argument 'dim': */ assertArgDim(dim, xlength(x), "x"); } /* assertArgMatrix() */ static R_INLINE int asLogicalNoNA(SEXP x, char *xlabel) { int value = 0; if (length(x) != 1) error("Argument '%s' must be a single value.", xlabel); if (isLogical(x)) { value = asLogical(x); } else if (isInteger(x)) { value = asInteger(x); } else { error("Argument '%s' must be a logical.", xlabel); } if (value != TRUE && value != FALSE) error("Argument '%s' must be either TRUE or FALSE.", xlabel); return value; } /* asLogicalNoNA() */ /* Retrieve the 'i'th element of 'x' as R_xlen_t */ static R_INLINE R_xlen_t asR_xlen_t(SEXP x, R_xlen_t i) { int mode = TYPEOF(x); switch (mode) { case INTSXP: return INTEGER(x)[i]; case REALSXP: return REAL(x)[i]; default: error("only integer and numeric are supported, not '%s'.", type2char(TYPEOF(x))); } return 0; } /* asR_xlen_t() */ /* Specified in validateIndices.c */ R_xlen_t *validateIndicesCheckNA(SEXP idxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs, int *hasna); R_xlen_t *validateIndices(SEXP idxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs); static R_INLINE int int_from_dbl(double x) { if (ISNAN(x)) return NA_INTEGER; if (x > INT_MAX || x <= INT_MIN) return NA_INTEGER; return x; } /* int_from_dbl() */ static R_INLINE double dbl_from_int(int x) { if (x == NA_INTEGER) return NA_REAL; return x; } /* dbl_from_int() */ #define SWAP(type, x, y) { \ type tmp = x; \ x = y; \ y = tmp; \ } matrixStats/src/rowCounts_lowlevel_template.h0000644000176200001440000002606414436403546021365 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowCounts_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, X_C_TYPE value, int what, int narm, int hasna, int *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i', 'r', or 'l' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(rowCounts, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, X_C_TYPE value, int what, int narm, int hasna, int *ans) { R_xlen_t ii, jj; R_xlen_t colBegin, idx; int count; X_C_TYPE xvalue; int norows, nocols; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } if (what == 0) { /* all */ for (ii=0; ii < nrows; ii++) ans[ii] = 1; /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { /* Skip? */ if (ans[ii]) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (!X_ISNAN(xvalue)) { ans[ii] = 0; /* Found another value! Skip from now on */ } } } } } else { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { /* Skip? */ if (ans[ii]) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (xvalue == value) { } else if (narm && X_ISNAN(xvalue)) { /* Skip */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is not 'value' later, then we know for sure that all = FALSE regardless of missing values. In other words, at this point the answer can be either NA or FALSE.*/ ans[ii] = NA_INTEGER; } else { /* Found another value! Skip from now on */ ans[ii] = 0; } } } /* for (ii ...) */ } /* for (jj ...) */ } } else if (what == 1) { /* any */ for (ii=0; ii < nrows; ii++) ans[ii] = 0; /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { /* Skip? */ if (!ans[ii]) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (X_ISNAN(xvalue)) { ans[ii] = 1; /* Found value! Skip from now on */ } } } } } else { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { /* Skip? */ if (ans[ii] == 0 || ans[ii] == NA_INTEGER) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (xvalue == value) { /* Found value! Skip from now on */ ans[ii] = 1; } else if (narm && X_ISNAN(xvalue)) { /* Skip */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is 'value' later, then we know for sure that any = TRUE regardless of missing values. In other words, at this point the answer can be either NA or TRUE.*/ ans[ii] = NA_INTEGER; } } } /* for (ii ...) */ } /* for (jj ...) */ } } else if (what == 2) { /* count */ for (ii=0; ii < nrows; ii++) ans[ii] = 0; /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (X_ISNAN(xvalue)) ans[ii] = ans[ii] + 1; } } } else { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { count = ans[ii]; /* Nothing more to do on this row? */ if (count == NA_INTEGER) continue; if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (xvalue == value) { ans[ii] = count + 1; } else { if (!narm && X_ISNAN(xvalue)) { ans[ii] = NA_INTEGER; continue; } } } /* for (ii ...) */ } /* for (jj ...) */ } } /* if (what) */ } /*************************************************************************** HISTORY: 2015-04-13 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-11-01 [HB] o SPEEDUP: Now using ansp = INTEGER(ans) once and then querying/assigning 'ansp[i]' instead of INTEGER(ans)[i]. 2014-06-02 [HB] o Created. **************************************************************************/ matrixStats/src/rowMedians_lowlevel_template.h0000644000176200001440000001715014436403546021466 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowMedians_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int hasna, int byrow, double *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Adopted from rowQuantiles.c by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2013 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowMedians, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int hasna, int byrow, double *ans) { int isOdd; R_xlen_t ii, jj, kk, qq, idx; R_xlen_t *colOffset; X_C_TYPE *values, value; int norows, nocols; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncols, sizeof(X_C_TYPE)); /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; /* When narm == FALSE, isOdd and qq are the same for all rows */ if (narm == FALSE) { isOdd = (ncols % 2 == 1); qq = (R_xlen_t)(ncols/2) - 1; } else { isOdd = FALSE; qq = 0; } value = 0; /* Pre-calculate the column offsets */ if (nocols) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); if (byrow) { for (jj=0; jj < ncols; jj++) if(!rowsHasNA && !colsHasNA){ colOffset[jj] = cols[jj] * nrow; } else{ colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow,1,1); } } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj]; } } if (hasna) { for (ii=0; ii < nrows; ii++) { //HJ R_xlen_t rowIdx; if (norows) { /* ii and ncols cannot be NA-values, so we do not need R_INDEX_OP */ rowIdx = byrow ? ii : ii*ncol; } else { if(!rowsHasNA && !colsHasNA) { rowIdx = byrow ? rows[ii] : rows[ii] * ncol; } rowIdx = byrow ? rows[ii] : R_INDEX_OP(rows[ii], *, ncol,1,1); } kk = 0; /* The index of the last non-NA value detected */ for (jj=0; jj < ncols; jj++) { if (!rowsHasNA && nocols) { /* * In this special case, we can eliminate * the possibility of having NA indicies */ if (byrow) idx = rowIdx + jj*nrow; else idx = rowIdx + jj; value = x[idx]; } else if (!rowsHasNA && !colsHasNA && !nocols) { idx = rowIdx + colOffset[jj]; value = x[idx]; } else { if (nocols) { if (byrow) idx = R_INDEX_OP(rowIdx, +, jj*nrow,1,1); else idx = R_INDEX_OP(rowIdx, +, jj,1,1); } else { idx = R_INDEX_OP(rowIdx, +, colOffset[jj],1,1); } value = R_INDEX_GET(x, idx, X_NA, 1); } if (X_ISNAN(value)) { if (narm == FALSE) { kk = -1; break; } } else { values[kk] = value; kk = kk + 1; } } /* Note that 'values' will never contain NA/NaNs */ if (kk == 0) { ans[ii] = R_NaN; } else if (kk == -1) { ans[ii] = R_NaReal; } else { /* When narm == TRUE, isOdd and qq may change with row */ if (narm == TRUE) { isOdd = (kk % 2 == 1); qq = (R_xlen_t)(kk/2) - 1; } /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); value = values[qq+1]; if (isOdd == TRUE) { ans[ii] = (double)value; } else { /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); ans[ii] = ((double)values[qq] + (double)value)/2; } } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } else { for (ii=0; ii < nrows; ii++) { //HJ R_xlen_t rowIdx; if (norows) { rowIdx = byrow ? ii : ii*ncol; } else { rowIdx = byrow ? rows[ii] : rows[ii]*ncol; } for (jj=0; jj < ncols; jj++) { if (colOffset == NULL) { if (byrow) values[jj] = x[rowIdx+(jj)*nrow]; else values[jj] = x[rowIdx+(jj)]; } else { values[jj] = x[rowIdx+colOffset[jj]]; } } //HJ /* Permute x[0:ncols-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, ncols, qq+1); value = values[qq+1]; if (isOdd == TRUE) { ans[ii] = (double)value; } else { /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); ans[ii] = ((double)values[qq] + (double)value)/2; } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } /* if (hasna ...) */ } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-11-01 [HB] o SPEEDUP: Now using 'ansp = REAL(ans)' once and then assigning to 'ansp' instead of to 'REAL(ans)'. 2013-04-23 [HB] o BUG FIX: The integer template of rowMedians_() would not handle ties properly. This was because ties were calculated as '(double)((rowData[qq] + value)/2)' instead of '((double)(rowData[qq] + value))/2'. 2013-01-13 [HB] o Merged rowMedians_int() and rowMedians_dbl() into template rowMedians_(). 2013-01-13 [HB] o Using internal arguments 'by_row' instead of 'by_column'. 2011-12-11 [HB] o BUG FIX: rowMediansReal(..., na.rm=TRUE) did not handle NaN:s, only NA:s. Note that NaN:s does not exist for integers. 2011-10-12 [HJ] o Added colMedians(). o Now rowMediansInteger/Real() can operate also by columns, cf. argument 'by_column'. 2007-08-14 [HB] o Added checks for user interrupts every 1000 line. o Added argument 'hasNA' to rowMedians(). 2005-12-07 [HB] o BUG FIX: When calculating the median of an even number (non-NA) values, the length of the second sort was one element too short, which made the method to freeze, i.e. rPsort(rowData, qq, qq) is now (...qq+1, qq). 2005-11-24 [HB] o By implementing a special version for integers, there is no need to coerce to double in R, which would take up twice the amount of memory. o rowMedians() now handles NAs too. o Adopted from rowQuantiles.c in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/psortKM.c0000644000176200001440000000534214531477242015144 0ustar liggesusers/*************************************************************************** Public methods: SEXP psortKM(SEXP x, SEXP k, SEXP nk) Arguments: x: numeric vector k: integer scalar in [1,length(x)] m: integer scalar in [1,k] and not too large if k is large. Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2012 **************************************************************************/ #include #include #include "000.types.h" #include "000.utils.h" void psortKM_C(double *x, R_xlen_t nx, R_xlen_t k, R_xlen_t m, double *ans) { R_xlen_t ii, ll; double *xx; /* R allocate memory for the 'xx'. This will be taken care of by the R garbage collector later on. */ xx = (double *) R_alloc(nx, sizeof(double)); /* Create a local copy 'xx' of 'x'. */ for (ii=0; ii < nx; ii++) { xx[ii] = x[ii]; } /* Permute xx[0:partial] so that xx[partial+1] is in the correct place with smaller values to the left, ... Example: psortKM(x, k=50, m=2) with length(x) = 1000 rPsort(xx, 1000, 50); We know x[50] and that x[1:49] <= x[50] rPsort(xx, 50, 49); x[49] and that x[1:48] <= x[49] rPsort(xx, 49, 48); x[48] and that x[1:47] <= x[48] */ ll = nx; for (ii=0; ii < m; ii++) { rPsort(xx, ll, k-1-ii); ll = (k-1)-ii; } for (ii=0; ii < m; ii++) { ans[ii] = xx[(k-m)+ii]; } } /* psortKM_C() */ SEXP psortKM(SEXP x, SEXP k, SEXP m) { SEXP ans; R_xlen_t nx, kk, mm; /* Argument 'x': */ assertArgVector(x, (R_TYPE_REAL), "x"); nx = xlength(x); if (nx == 0) { error("Argument 'x' must not be empty."); } /* Argument 'k': */ if (!isInteger(k)) { error("Argument 'k' must be an integer."); } if (length(k) != 1) { error("Argument 'k' must be a single integer."); } kk = asInteger(k); if (kk <= 0) { error("Argument 'k' must be a positive integer."); } if (kk > nx) { error("Argument 'k' must not be greater than number of elements in 'x'."); } /* Argument 'm': */ if (!isInteger(m)) { error("Argument 'm' must be an integer."); } if (length(m) != 1) { error("Argument 'm' must be a single integer."); } mm = asInteger(m); if (mm <= 0) { error("Argument 'm' must be a positive integer."); } else if (mm > kk) { error("Argument 'm' must not be greater than argument 'k'."); } /* R allocate a double vector of length 'partial' */ PROTECT(ans = allocVector(REALSXP, mm)); psortKM_C(REAL(x), nx, kk, mm, REAL(ans)); UNPROTECT(1); return(ans); } /* psortKM() */ /*************************************************************************** HISTORY: 2012-09-10 [HB] o Added psortKM(). o Created. **************************************************************************/ matrixStats/src/sum2_lowlevel_template.h0000644000176200001440000000372714436403546020251 0ustar liggesusers/*********************************************************************** TEMPLATE: double sum2_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014-2018 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include double CONCAT_MACROS(sum2, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm) { X_C_TYPE value; R_xlen_t ii; LDOUBLE sum = 0; int noidxs; if (idxs == NULL) { noidxs = 1; } else { noidxs = 0; } for (ii=0; ii < nidxs; ++ii) { if (noidxs) { value = x[ii]; } else { R_xlen_t idx = idxs[ii]; if (!idxsHasNA) { value = x[idx]; } else { value = R_INDEX_GET(x, idx, X_NA, 1); } } #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm) { sum += (LDOUBLE)value; /* Early stopping if sum is NA_real_ (but not NaN, -Inf, or +Inf) */ if (ii % 1048576 == 0 && ISNA(sum)) break; } else if (!ISNAN(value)) { sum += (LDOUBLE)value; } #endif } /* for (ii ...) */ return (double)sum; } /*************************************************************************** HISTORY: 2015-07-11 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Now sum2_() uses only basic C types. 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/colCounts.c0000644000176200001440000001200414531477242015507 0ustar liggesusers/*************************************************************************** Public methods: SEXP colCounts(SEXP x, ...) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "colCounts_lowlevel.h" #include "naming.h" SEXP colCounts(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP value, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames) { SEXP ans; int narm, hasna, what2, usenames; R_xlen_t ii, nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'value': */ if (length(value) != 1) error("Argument 'value' must be a single value."); if (!isNumeric(value)) error("Argument 'value' must be a numeric or a logical value."); /* Argument 'what': */ what2 = asInteger(what); if (what2 < 0 || what2 > 2) error("INTERNAL ERROR: Unknown value of 'what' for rowCounts: %d", what2); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* R allocate an integer vector of length 'ncol' */ /* R allocate memory for vector 'count' of length 'ncols'. This will be taken care of by the R garbage collector later on. */ double *count = (double *) R_alloc(ncols, sizeof(double)); if (isReal(x)) { colCounts_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, asReal(value), what2, narm, hasna, count); } else if (isInteger(x)) { colCounts_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, asInteger(value), what2, narm, hasna, count); } else if (isLogical(x)) { colCounts_lgl(LOGICAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, asLogical(value), what2, narm, hasna, count); } /* Coerce counts from double to integer. This is needed because colCount_nnn() returns double counts, which is in turn is needed because count() may need to return > INT_MAX. */ PROTECT(ans = allocVector(INTSXP, ncols)); int *ans_ptr = INTEGER(ans); for (ii = 0; ii < ncols; ii++) { if (count[ii] == (double)NA_R_XLEN_T) { ans_ptr[ii] = NA_INTEGER; } else { ans_ptr[ii] = (int)count[ii]; } } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } UNPROTECT(2); return(ans); } // colCounts() SEXP count(SEXP x, SEXP idxs, SEXP value, SEXP what, SEXP naRm, SEXP hasNA) { SEXP ans; int narm, hasna, what2; R_xlen_t nx; double count = 0.0; /* Argument 'x' and 'dim': */ assertArgVector(x, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'value': */ if (length(value) != 1) error("Argument 'value' must be a single value."); if (!isNumeric(value)) error("Argument 'value' must be a numeric or a logical value."); /* Argument 'what': */ what2 = asInteger(what); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'idxs': */ R_xlen_t nrows, ncols = 1; int rowsHasNA; int colsHasNA = 0; R_xlen_t *crows = validateIndicesCheckNA(idxs, nx, 1, &nrows, &rowsHasNA); R_xlen_t *ccols = NULL; if (isReal(x)) { colCounts_dbl(REAL(x), nx, 1, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, asReal(value), what2, narm, hasna, &count); } else if (isInteger(x)) { colCounts_int(INTEGER(x), nx, 1, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, asInteger(value), what2, narm, hasna, &count); } else if (isLogical(x)) { colCounts_lgl(LOGICAL(x), nx, 1, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, asLogical(value), what2, narm, hasna, &count); } /* R allocate a scalar */ if (count > (double)INT_MAX && count != (double)NA_R_XLEN_T) { PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = count; UNPROTECT(1); } else { PROTECT(ans = allocVector(INTSXP, 1)); if (count == (double)NA_R_XLEN_T) { INTEGER(ans)[0] = NA_INTEGER; } else { INTEGER(ans)[0] = (int)count; } UNPROTECT(1); } return(ans); } // count() /*************************************************************************** HISTORY: 2015-04-21 [DJ] o Supported subsetted computation. 2014-11-14 [HB] o Created from rowCounts.c. **************************************************************************/ matrixStats/src/logSumExp_lowlevel.h0000644000176200001440000000042414406445217017400 0ustar liggesusers#include #include #include "000.utils.h" /* Native API (dynamically generated via macros): double logSumExp_double(double *x, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna, R_xlen_t by, double *xx) */ #include "logSumExp_lowlevel_template.h" matrixStats/src/rowCumprods.c0000644000176200001440000000455714531477242016100 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowCumprods(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowCumprods_lowlevel.h" #include "naming.h" SEXP rowCumprods(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames) { int byrow, usenames; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowCumprods_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, byrow, REAL(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } else if (isInteger(x) | isLogical(x)) { PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowCumprods_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, byrow, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } /* rowCumprods() */ /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars.c. **************************************************************************/ matrixStats/src/weightedMedian_lowlevel_template.h0000644000176200001440000002161514436403546022275 0ustar liggesusers/*********************************************************************** TEMPLATE: double weightedMedian_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int interpolate, int ties Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include double CONCAT_MACROS(weightedMedian, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int interpolate, int ties) { X_C_TYPE value; X_C_TYPE *xtmp; double weight, res; double dx, dy, Dy; double *wtmp, *wcum, wtotal, wlow, whigh, tmp_d, tmp_d2; R_xlen_t nxt, ii, jj, half; int *idxs_int; int equalweights = 0; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Weights */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ wtmp = Calloc(nidxs, double); /* Check for missing, negative, and infite weights */ nxt = 0; for (ii=0; ii < nidxs; ii++) { /* Assume negative or missing weight by default or that the signals is missing and should be dropped */ wtmp[ii] = 0; /* * Unlike the indices, the weights are not pre-checked for NA values in any way, * meaning that we must pretend that the weights have NA-values in them */ weight = R_INDEX_GET(w, ((idxs == NULL) ? (ii) : idxs[ii]), NA_REAL, 1); if (ISNAN(weight)) { if (!narm) { Free(wtmp); return NA_REAL; } } else if (weight <= 0) { /* Drop non-positive weights */ } else if (isinf(weight)) { /* Detected a +Inf. From now on, treat all +Inf weights equal and drop everything else */ nxt = 0; for (jj=0; jj < nidxs; jj++) { /* Assume non-infinite weight by default */ wtmp[jj] = 0; weight = R_INDEX_GET(w, ((idxs == NULL) ? (jj) : idxs[jj]), NA_REAL, 1); if (isinf(weight)) { value = R_INDEX_GET(x, ((idxs == NULL) ? (jj) : idxs[jj]), X_NA, idxsHasNA); if (X_ISNAN(value)) { if (!narm) { Free(wtmp); return NA_REAL; } } else { /* Infinite weight, i.e. use data point */ wtmp[jj] = 1; nxt++; } } else if (ISNAN(weight)) { if (!narm) { Free(wtmp); return NA_REAL; } } } equalweights = 1; break; } else { /* A data points with a finite positive weight */ value = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA, idxsHasNA); if (X_ISNAN(value)) { if (!narm) { Free(wtmp); return NA_REAL; } } else { /* A data point with a non-missing value */ wtmp[ii] = weight; nxt++; } } } /* printf("nx=%d, nxt=%d\n", nx, nxt); for (ii=0; ii < nx; ii++) printf("w[%d]=%g, wtmp[%d]=%g\n", (int)ii, (double)w[ii], (int)ii, wtmp[ii]); */ /* Nothing to do? */ if (nxt == 0) { Free(wtmp); return NA_REAL; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Copy (x,w) to work with and calculate total weight */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ xtmp = Calloc(nxt, X_C_TYPE); jj = 0; wtotal = 0; for (ii=0; ii < nidxs; ii++) { if (wtmp[ii] > 0) { /* printf("ii=%d, jj=%d, wtmp[%d]=%g\n", (int)ii, (int)jj, (int)ii, wtmp[ii]); */ xtmp[jj] = x[((idxs == NULL) ? (ii) : idxs[ii])]; // sure that xvalue is not NA wtmp[jj] = wtmp[ii]; wtotal += wtmp[jj]; jj++; } } x = xtmp; w = wtmp; nx = nxt; /* for (ii=0; ii < nx; ii++) printf("x[%d]=%g, w[%d]=%g\n", (int)ii, (double)x[ii], (int)ii, w[ii]); */ /* Early stopping? */ if (nx == 1) { res = (double)x[0]; Free(xtmp); Free(wtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* All weights equal? Happens if +Inf were detected. */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (equalweights) { half = (nx+1) / 2; /* printf("half=%d\n", (int)half); */ X_PSORT(x, nx, half); /* for (ii=0; ii < nx; ii++) printf("x[%d]=%g\n", (int)ii, (double)x[ii]); */ /* FIXME: Add support for ties here too */ if (nx % 2 == 1) { res = (double)x[half-1]; } else { X_PSORT(x, half, half-1); res = ((double)x[half-1] + (double)x[half]) / 2; } Free(xtmp); Free(wtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Sort x and calculated the cumulative sum of weights (normalize to */ /* one) according to the reordered vector. */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* (a) Sort x */ idxs_int = Calloc(nx, int); for (ii = 0; ii < nx; ii++) idxs_int[ii] = ii; X_QSORT_I(x, idxs_int, 1, nx); /* (b) Normalized cumulative weights */ wcum = Calloc(nx, double); tmp_d2 = 0; /* Index where cumulative weight passed 1/2 */ half = nx+1; /* Default is last */ if (interpolate) { /* Adjust */ for (ii = 0; ii < nx; ii++) { tmp_d = w[idxs_int[ii]] / wtotal; tmp_d2 += tmp_d; wcum[ii] = tmp_d2 - (tmp_d/2); if (wcum[ii] >= 0.5) { half = ii; /* Early stopping - no need to continue */ break; } } } else { for (ii = 0; ii < nx; ii++) { tmp_d2 += w[idxs_int[ii]] / wtotal; wcum[ii] = tmp_d2; if (tmp_d2 > 0.5) { half = ii; /* Early stopping - no need to continue */ break; } } } Free(wtmp); Free(idxs_int); /* Two special cases where more than half of the total weight is at a) the first, or b) the last value */ if (half == 0 || half == nx) { res = (double)x[half]; Free(wcum); Free(xtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Alt 1: Linearly interpolated weighted median */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (interpolate) { /* The width and the height of the "rectangle". */ dx = (double)(x[half] - x[half-1]); Dy = wcum[half] - wcum[half-1]; /* printf("dx=%g, Dy=%g\n", dx, Dy); */ /* The width and the height of the triangle which upper corner touches the level where the cumulative sum of weights *equals* half the total weight. */ dy = 0.5 - wcum[half]; dx = (dy/Dy) * dx; /* printf("dx=%g, dy=%g\n", dx, dy); */ /* The corresponding x value */ res = dx + x[half]; Free(wcum); Free(xtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Alt 2: Classical weighted median (tied or not) */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* At this point we know that: 1) at most half the total weight is in the set x[1:half], 2) that the set x[(half+2):n] contains less than half the total weight The question is whether x[(half+1):n] contains *more* than half the total weight (try x=c(1,2,3), w=c(1,1,1)). If it is then we can be sure that x[half+1] is the weighted median we are looking for, otherwise it is any function of x[k:(half+1)]. */ wlow = wcum[half-1]; whigh = 1 - wlow; /* printf("half=%d, wtotal=%g, wlow=%g, whigh=%g, ties=%d\n", half, (double)wtotal, (double)wlow, (double)whigh, ties); printf("x[half+(-1:1)]=c(%g, %g, %g)\n", x[half-1-1], x[half-1], x[half-1+1]); */ if (whigh > 0.5) { /* printf("matrixStats2: Not a tie!\n"); */ /* Not a tie */ res = x[half]; } else { /* printf("matrixStats2: A tie!\n"); */ /* A tie! */ if (ties == 1) { /* weighted */ /* printf("ties=%d, half=%d, wlow*x[half]=%g, whigh*x[half+1]=%g\n", ties, half, wlow*x[half-1], whigh*x[half]); */ res = wlow*(double)x[half-1] + whigh*(double)x[half]; } else if (ties == 2) { /* min */ res = (double)x[half-1]; } else if (ties == 4) { /* max */ res = (double)x[half]; } else if (ties == 8) { /* mean */ res = ((double)x[half-1] + (double)x[half]) / 2; } else { error("Unknown value of argument 'ties': %d", ties); } } Free(wcum); Free(xtmp); return res; } /*************************************************************************** HISTORY: 2015-07-09 [DJ] o Supported subsetted computation. 2015-01-01 [HB] o Created. **************************************************************************/ matrixStats/src/rowDiffs.c0000644000176200001440000000645314531477242015334 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowDiffs(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowDiffs_lowlevel.h" #include "naming.h" SEXP rowDiffs(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP lag, SEXP differences, SEXP byRow, SEXP useNames) { int byrow, usenames; SEXP ans = NILSXP; R_xlen_t lagg, diff; R_xlen_t nrow, ncol; R_xlen_t nrow_ans, ncol_ans; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'lag': */ lagg = asInteger(lag); if (lagg < 1) { error("Argument 'lag' must be a positive integer."); } /* Argument 'differences': */ diff = asInteger(differences); if (diff < 1) { error("Argument 'differences' must be a positive integer."); } /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Dimension of result matrix */ if (byrow) { nrow_ans = nrows; ncol_ans = (R_xlen_t)((double)ncols - ((double)diff*(double)lagg)); if (ncol_ans < 0) ncol_ans = 0; } else { nrow_ans = (R_xlen_t)((double)nrows - ((double)diff*(double)lagg)); if (nrow_ans < 0) nrow_ans = 0; ncol_ans = ncols; } if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrow_ans, ncol_ans)); rowDiffs_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, byrow, lagg, diff, REAL(ans), nrow_ans, ncol_ans); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { set_rowDiffs_Dimnames(ans, dimnames, nrows, crows, ncols, ncol_ans, ccols); } else { set_colDiffs_Dimnames(ans, dimnames, nrows, nrow_ans, crows, ncols, ccols); } } } UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocMatrix(INTSXP, nrow_ans, ncol_ans)); rowDiffs_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, byrow, lagg, diff, INTEGER(ans), nrow_ans, ncol_ans); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { set_rowDiffs_Dimnames(ans, dimnames, nrows, crows, ncols, ncol_ans, ccols); } else { set_colDiffs_Dimnames(ans, dimnames, nrows, nrow_ans, crows, ncols, ccols); } } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } /* rowDiffs() */ /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/x_OP_y_lowlevel.h0000644000176200001440000001675514436403546016672 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void x_OP_y_Add_int_int(int *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Add_int_dbl(int *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Add_dbl_int(double *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Add_dbl_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Sub_int_int(int *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Sub_int_dbl(int *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Sub_dbl_int(double *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Sub_dbl_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Mul_int_int(int *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Mul_int_dbl(int *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Mul_dbl_int(double *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Mul_dbl_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Div_int_int(int *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Div_int_dbl(int *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Div_dbl_int(double *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Div_dbl_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, int xrowsHasNA, R_xlen_t *xcols, R_xlen_t nxcols, int xcolsHasNA, R_xlen_t *yidxs, R_xlen_t nyidxs, int yidxsHasNA, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) */ /* Addition */ #define METHOD x_OP_y_Add #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'i' #define OP '+' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '+' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '+' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '+' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD /* Subtraction */ #define METHOD x_OP_y_Sub #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'i' #define OP '-' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '-' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '-' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '-' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD /* Multiplication */ #define METHOD x_OP_y_Mul #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'i' #define OP '*' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '*' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '*' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '*' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD /* Division */ #define METHOD x_OP_y_Div #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD matrixStats/src/binMeans_lowlevel_template.h0000644000176200001440000001211414372747711021111 0ustar liggesusers/*************************************************************************** TEMPLATE: binMeans_(...) GENERATES: void binMeans_L(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) void binMeans_R(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - BIN_BY: 'L' or 'R' Copyright Henrik Bengtsson, 2012-2013 **************************************************************************/ #include "000.types.h" #if BIN_BY == 'L' /* [u,v) */ #define METHOD_NAME binMeans_L #define IS_PART_OF_FIRST_BIN(x, bx0) (x < bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x >= bx1) #elif BIN_BY == 'R' /* (u,v] */ #define METHOD_NAME binMeans_R #define IS_PART_OF_FIRST_BIN(x, bx0) (x <= bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x > bx1) #endif void METHOD_NAME(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) { R_xlen_t ii = 0, jj = 0, iStart=0; R_xlen_t n = 0; LDOUBLE sum = 0.0; int warn = 0; // Count? if (nbins > 0) { // Skip to the first bin while ((iStart < nx) && IS_PART_OF_FIRST_BIN(x[iStart], bx[0])) { ++iStart; } // For each x... for (ii = iStart; ii < nx; ++ii) { // Skip to a new bin? while (IS_PART_OF_NEXT_BIN(x[ii], bx[jj+1])) { // Update statistic of current bin? if (count) { /* Although unlikely, with long vectors the count for a bin can become greater than what is possible to represent by an integer. Detect and warn about this. */ if (n > R_INT_MAX) { warn = 1; count[jj] = R_INT_MAX; } else { count[jj] = n; } } ans[jj] = n > 0 ? sum / n : R_NaN; sum = 0.0; n = 0; // ...and move to next ++jj; // No more bins? if (jj >= nbins) { // Make the outer for-loop to exit... ii = nx - 1; // ...but correct for the fact that the y[nx-1] point will // be incorrectly added to the sum. Doing the correction // here avoids an if (ii < nx) sum += y[ii] below. sum -= y[ii]; break; } } // Sum and count sum += y[ii]; ++n; /* Early LDOUBLE stopping? */ if (n % 1048576 == 0 && !R_FINITE(sum)) break; } // Update statistic of the last bin? if (jj < nbins) { if (count) { /* Although unlikely, with long vectors the count for a bin can become greater than what is possible to represent by an integer. Detect and warn about this. */ if (n > R_INT_MAX) { warn= 1; count[jj] = R_INT_MAX; } else { count[jj] = n; } } ans[jj] = n > 0 ? sum / n : R_NaN; // Assign the remaining bins to zero counts and missing mean values while (++jj < nbins) { ans[jj] = R_NaN; if (count) count[jj] = 0; } } } // if (nbins > 0) if (warn) { warning("Integer overflow. Detected one or more bins with a count that is greater than what can be represented by the integer data type. Setting count to the maximum integer possible (.Machine$integer.max = %d). The bin mean is still correct.", R_INT_MAX); } } /* Undo template macros */ #undef BIN_BY #undef IS_PART_OF_FIRST_BIN #undef IS_PART_OF_NEXT_BIN #include "000.templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-07 [HB] o ROBUSTNESS: Added protection for integer overflow in bin counts. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-10-01 [HB] o BUG FIX: binMeans() returned 0.0 instead of NA_real_ for empty bins. 2014-04-04 [HB] o BUG FIX: The native code of binMeans(x, bx) would try to access an out-of-bounds value of argument 'y' iff 'x' contained elements that are left of all bins in 'bx'. This bug had no impact on the results and since no assignment was done it should also not crash/ core dump R. This was discovered thanks to new memtests (ASAN and valgrind) provided by CRAN. 2013-10-08 [HB] o Created template for binMeans_() to create functions that bin either by [u,v) or (u,v]. 2013-05-10 [HB] o SPEEDUP: binMeans() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binMeans() would return random/garbage means/counts for bins that were beyond the last data point. o BUG FIX: In some cases binMeans() could try to go past the last bin. 2012-10-03 [HB] o Created binMeans(), which was adopted from from code proposed by Martin Morgan (Fred Hutchinson Cancer Research Center, Seattle) as a reply to HB's R-devel thread 'Fastest non-overlapping binning mean function out there?' on Oct 3, 2012. **************************************************************************/ matrixStats/src/weightedMedian_lowlevel.h0000644000176200001440000000123414436403546020375 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include #include "000.macros.h" /* Native API (dynamically generated via macros): double weightedMedian_int(int *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA int narm, int interpolate, int ties) double weightedMedian_dbl(double *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA int narm, int interpolate, int ties) */ #define X_TYPE 'i' #include "weightedMedian_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "weightedMedian_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/allocMatrix2.c0000644000176200001440000001063514531477242016107 0ustar liggesusers#include #include "000.types.h" #include /* Checks whether setting bytes of an int/double to all zeroes corresponds to assigning a zero value. Note that the bit representation of int's and double's may not be the same on all architectures. */ int memset_zero_ok_int(void) { int t = 1; memset(&t, 0, sizeof(t)); return (t == 0); } int memset_zero_ok_double(void) { double t = 1; memset(&t, 0, sizeof(t)); return (t == 0); } /* For debugging purposes */ /* SEXP memsetZeroable(void) { SEXP ans; PROTECT(ans = allocVector(LGLSXP, 2)); LOGICAL(ans)[1] = memset_zero_ok_int(); LOGICAL(ans)[2] = memset_zero_ok_double(); UNPROTECT(1); return(ans); } */ void fillWithValue(SEXP ans, SEXP value) { R_xlen_t i, n; SEXPTYPE type; double *ans_ptr_d, value_d; int *ans_ptr_i, value_i; int *ans_ptr_l, value_l; /* Argument 'ans': */ if (!isVectorAtomic(ans)) { error("Argument 'ans' must be a vector."); } n = xlength(ans); /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); switch (type) { case INTSXP: value_i = asInteger(value); ans_ptr_i = INTEGER(ans); if (value_i == 0 && memset_zero_ok_int()) { memset(ans_ptr_i, 0, n*sizeof(value_i)); } else { for (i=0; i < n; i++) ans_ptr_i[i] = value_i; } break; case REALSXP: value_d = asReal(value); ans_ptr_d = REAL(ans); if (value_d == 0 && memset_zero_ok_double()) { memset(ans_ptr_d, 0, n*sizeof(value_d)); } else { for (i=0; i < n; i++) ans_ptr_d[i] = value_d; } break; case LGLSXP: value_l = asLogical(value); ans_ptr_l = LOGICAL(ans); if (value_l == 0 && memset_zero_ok_int()) { memset(ans_ptr_l, 0, n*sizeof(value_l)); } else { for (i=0; i < n; i++) ans_ptr_l[i] = value_l; } break; default: error("Argument 'value' must be either of type integer, numeric or logical."); break; } } /* fillWithValue() */ SEXP allocVector2(SEXP length, SEXP value) { SEXP ans; SEXPTYPE type; R_xlen_t n = 0; /* Argument 'length': */ if (isInteger(length) && xlength(length) == 1) { n = (R_xlen_t)asInteger(length); } else if (isReal(length) && xlength(length) == 1) { n = (R_xlen_t)asReal(length); } else { error("Argument 'length' must be a single numeric."); } if (n < 0) error("Argument 'length' is negative."); /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); PROTECT(ans = allocVector(type, n)); fillWithValue(ans, value); UNPROTECT(1); return(ans); } /* allocVector2() */ SEXP allocMatrix2(SEXP nrow, SEXP ncol, SEXP value) { SEXP ans; SEXPTYPE type; int nc, nr; /* Argument 'nrow' & 'ncol': */ if (!isInteger(nrow) || xlength(nrow) != 1) { error("Argument 'nrow' must be a single integer."); } if (!isInteger(ncol) || xlength(ncol) != 1) { error("Argument 'ncol' must be a single integer."); } nr = asInteger(nrow); nc = asInteger(ncol); if (nr < 0) error("Argument 'nrow' is negative."); if (nr < 0) error("Argument 'ncol' is negative."); /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); PROTECT(ans = allocMatrix(type, nr, nc)); fillWithValue(ans, value); UNPROTECT(1); return(ans); } /* allocMatrix2() */ SEXP allocArray2(SEXP dim, SEXP value) { SEXP ans; SEXPTYPE type; int i, d; double nd = 1.0; R_xlen_t n; /* Argument 'dim': */ if (!isInteger(dim) || xlength(dim) == 0) { error("Argument 'dim' must be an integer vector of at least length one."); } for (i = 0; i < xlength(dim); i++) { d = INTEGER(dim)[i]; nd *= d; #ifndef LONG_VECTOR_SUPPORT if (nd > R_INT_MAX) { error("Argument 'dim' specifies too many elements: %.g > %d", nd, R_INT_MAX); } #endif } n = (R_xlen_t)nd; /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); PROTECT(dim = duplicate(dim)); PROTECT(ans = allocVector(type, n)); fillWithValue(ans, value); setAttrib(ans, R_DimSymbol, dim); UNPROTECT(2); return(ans); } /* allocArray2() */ matrixStats/src/rowMads.c0000644000176200001440000000553514531477242015165 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowMads(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowMads_lowlevel.h" #include "naming.h" SEXP rowMads(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP constant, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) { int narm, hasna, byrow, usenames; SEXP ans; R_xlen_t nrow, ncol; double scale; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'constant': */ if (!isNumeric(constant)) error("Argument 'constant' must be a numeric scale."); scale = asReal(constant); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); if (!byrow) { SWAP(R_xlen_t, nrow, ncol); SWAP(R_xlen_t*, crows, ccols); SWAP(R_xlen_t, nrows, ncols); SWAP(int, rowsHasNA, colsHasNA); } /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowMads_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, scale, narm, hasna, byrow, REAL(ans)); } else if (isInteger(x)) { rowMads_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, scale, narm, hasna, byrow, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(2); return(ans); } /* rowMads() */ /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-11-17 [HB] o Created from rowMedians.c. **************************************************************************/ matrixStats/src/000.init.c0000644000176200001440000000237514531477242015011 0ustar liggesusers#include #include #include "000.api.h" #define CALLDEF(name, n) {#name, (DL_FUNC) &name, n} static R_CallMethodDef callMethods[] = { CALLDEF(allocArray2, 2), CALLDEF(allocMatrix2, 3), CALLDEF(allocVector2, 2), CALLDEF(anyMissing, 2), CALLDEF(binCounts, 3), CALLDEF(binMeans, 5), CALLDEF(colCounts, 9), CALLDEF(colOrderStats, 6), CALLDEF(colRanges, 8), CALLDEF(count, 6), CALLDEF(diff2, 5), CALLDEF(indexByRow, 2), CALLDEF(logSumExp, 4), CALLDEF(mean2, 4), CALLDEF(productExpSumLog, 4), CALLDEF(psortKM, 3), CALLDEF(rowCounts, 9), CALLDEF(rowCummaxs, 6), CALLDEF(rowCummins, 6), CALLDEF(rowCumprods, 6), CALLDEF(rowCumsums, 6), CALLDEF(rowDiffs, 8), CALLDEF(rowLogSumExps, 8), CALLDEF(rowMads, 9), CALLDEF(rowMeans2, 9), CALLDEF(rowMedians, 8), CALLDEF(rowOrderStats, 6), CALLDEF(rowRanges, 8), CALLDEF(rowRanksWithTies, 7), CALLDEF(rowSums2, 8), CALLDEF(rowVars, 9), CALLDEF(signTabulate, 2), CALLDEF(sum2, 4), CALLDEF(validate, 3), CALLDEF(weightedMean, 5), CALLDEF(weightedMedian, 6), CALLDEF(x_OP_y, 11), {NULL, NULL, 0} }; void R_init_matrixStats(DllInfo *info) { R_registerRoutines(info, NULL, callMethods, NULL, NULL); R_useDynamicSymbols(info, FALSE); } matrixStats/src/weightedMean.c0000644000176200001440000000343614531726565016155 0ustar liggesusers/*************************************************************************** Public methods: SEXP weightedMean(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP refine) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include #include "weightedMean_lowlevel.h" SEXP weightedMean(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP refine) { SEXP ans; int narm, refine2; double avg = NA_REAL; R_xlen_t nx, nw; /* Argument 'x': */ assertArgVector(x, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'x': */ assertArgVector(w, (R_TYPE_REAL), "w"); nw = xlength(w); if (nx != nw) { error("Argument 'x' and 'w' are of different lengths: %lld != %lld", (long long int)nx, (long long int)nw); } /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'refine': */ refine2 = asLogicalNoNA(refine, "refine"); /* Argument 'idxs': */ R_xlen_t nidxs; int idxsHasNA; R_xlen_t *cidxs = validateIndicesCheckNA(idxs, nx, 1, &nidxs, &idxsHasNA); /* Double matrices are more common to use. */ if (isReal(x)) { avg = weightedMean_dbl(REAL(x), nx, REAL(w), cidxs, nidxs, idxsHasNA, narm, refine2); } else if (isInteger(x) | isLogical(x)) { avg = weightedMean_int(INTEGER(x), nx, REAL(w), cidxs, nidxs, idxsHasNA, narm, refine2); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = avg; UNPROTECT(1); return(ans); } // weightedMean() /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-12-08 [HB] o Created. **************************************************************************/ matrixStats/src/000.types.h0000644000176200001440000000207314531172122015177 0ustar liggesusers#include /* R_xlen_t, ... */ #ifndef R_INT_MIN #define R_INT_MIN -INT_MAX #endif #ifndef R_INT_MAX #define R_INT_MAX INT_MAX #endif /* inf */ #ifndef IS_INF #define IS_INF(x) (x == R_PosInf || x == R_NegInf) #endif /* Subsetting index mode */ #ifndef SUBSETTED_MODE_INDEX #define SUBSETTED_MODE_INDEX #define SUBSETTED_ALL 0 #define SUBSETTED_INTEGER 1 #define SUBSETTED_REAL 2 #endif /* As in /src/include/Defn.h */ #ifdef HAVE_LONG_DOUBLE #define LDOUBLE long double #else #define LDOUBLE double #endif /* Backward compatibility with R (< 3.0.0) As in /src/include/Rinternals.h */ #ifndef R_XLEN_T_MAX typedef int R_xlen_t; #define R_XLEN_T_MAX R_LEN_T_MAX #ifndef xlength #define xlength length #endif #endif /* define NA_R_XLEN_T */ #ifdef LONG_VECTOR_SUPPORT #define R_XLEN_T_MIN -R_XLEN_T_MAX-1 #define NA_R_XLEN_T R_XLEN_T_MIN #else #define NA_R_XLEN_T NA_INTEGER #endif /* Macro to check for user interrupts every 2^20 iteration */ #define R_CHECK_USER_INTERRUPT(i) if (i % 1048576 == 0) R_CheckUserInterrupt() matrixStats/src/rowCumMinMaxs_lowlevel_template.h0000644000176200001440000001533714436403546022134 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowCummins_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, ANS_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" #if COMP == '<' #define OP < #elif COMP == '>' #define OP > #endif void CONCAT_MACROS(METHOD, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, ANS_C_TYPE *ans) { R_xlen_t ii, jj, kk, kk_prev, idx; R_xlen_t colBegin; X_C_TYPE xvalue; ANS_C_TYPE value; int ok; int *oks = NULL; int norows, nocols; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } if (ncols == 0 || nrows == 0) return; if (byrow) { oks = (int *) R_alloc(nrows, sizeof(int)); if (nocols) { colBegin = 0; } else { R_xlen_t colsFirstElement = cols[0]; if (!colsHasNA || colsFirstElement != NA_R_XLEN_T) { colBegin = colsFirstElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (kk=0; kk < nrows; kk++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + kk; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[kk]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[kk]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } value = (ANS_C_TYPE) xvalue; if (ANS_ISNAN(value)) { oks[kk] = 0; value = ANS_NA; ans[kk] = ANS_NA; } else { oks[kk] = 1; ans[kk] = value; } } kk_prev = 0; for (jj=1; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } value = (ANS_C_TYPE) xvalue; if (oks[ii]) { if (ANS_ISNAN(value)) { oks[ii] = 0; ans[kk] = ANS_NA; } else { if (value OP ans[kk_prev]) { ans[kk] = value; } else { ans[kk] = (ANS_C_TYPE) ans[kk_prev]; } } } else { ans[kk] = ANS_NA; } kk++; kk_prev++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } else { kk = 0; for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T) { idx = colBegin; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[0]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[0]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } value = (ANS_C_TYPE) xvalue; if (ANS_ISNAN(value)) { ok = 0; value = ANS_NA; ans[kk] = ANS_NA; } else { ok = 1; ans[kk] = value; } kk_prev = kk; kk++; for (ii=1; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } value = (ANS_C_TYPE) xvalue; if (ok) { if (ANS_ISNAN(value)) { ok = 0; value = ANS_NA; ans[kk] = ANS_NA; } else { if (value OP ans[kk_prev]) { ans[kk] = value; } else { ans[kk] = (ANS_C_TYPE) ans[kk_prev]; } } kk++; kk_prev++; } else { ans[kk] = ANS_NA; kk++; } R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } /* if (byrow) */ } #undef OP /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars_TYPE-template.h. **************************************************************************/ matrixStats/src/rowMedians.c0000644000176200001440000000731114531477242015653 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowMedians(SEXP x, ...) Authors: Adopted from rowQuantiles.c by R. Gentleman. Copyright Henrik Bengtsson, 2007 **************************************************************************/ #include #include "000.types.h" #include "rowMedians_lowlevel.h" #include "naming.h" SEXP rowMedians(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) { int narm, hasna, byrow, usenames; SEXP ans; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); /* Get dimensions of 'x'. */ nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); if (!byrow) { SWAP(R_xlen_t, nrow, ncol); SWAP(R_xlen_t*, crows, ccols); SWAP(R_xlen_t, nrows, ncols); SWAP(int, rowsHasNA, colsHasNA); } /* R allocate a double vector of length 'nrows' Note that 'nrows' means 'ncols' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowMedians_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, narm, hasna, byrow, REAL(ans)); } else if (isInteger(x)) { rowMedians_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, narm, hasna, byrow, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(2); return(ans); } /* rowMedians() */ /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2013-01-13 [HB] o Added argument 'byRow' to rowMedians() and dropped colMedians(). o Using internal arguments 'by_row' instead of 'by_column'. 2011-12-11 [HB] o BUG FIX: rowMediansReal(..., na.rm=TRUE) did not handle NaN:s, only NA:s. Note that NaN:s does not exist for integers. 2011-10-12 [HJ] o Added colMedians(). o Now rowMediansInteger/Real() can operate also by columns, cf. argument 'by_column'. 2007-08-14 [HB] o Added checks for user interrupts every 1000 line. o Added argument 'hasNA' to rowMedians(). 2005-12-07 [HB] o BUG FIX: When calculating the median of an even number (non-NA) values, the length of the second sort was one element too short, which made the method to freeze, i.e. rPsort(rowData, qq, qq) is now (...qq+1, qq). 2005-11-24 [HB] o By implementing a special version for integers, there is no need to coerce to double in R, which would take up twice the amount of memory. o rowMedians() now handles NAs too. o Adopted from rowQuantiles.c in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/weightedMean_lowlevel.h0000644000176200001440000000113714436403546020062 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): double weightedMean_int(int *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int refine) double weightedMean_dbl(double *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int refine) */ #define X_TYPE 'i' #include "weightedMean_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "weightedMean_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowRanges_lowlevel_template.h0000644000176200001440000001575414436403546021335 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowRanges_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(rowRanges, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted) { R_xlen_t ii, jj; R_xlen_t colBegin, idx; X_C_TYPE value, *mins = NULL, *maxs = NULL; int *skip = NULL; /* Rprintf("(nrow,ncol)=(%d,%d), what=%d\n", nrow, ncol, what); */ /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; if (hasna) { skip = (int *) R_alloc(nrows, sizeof(int)); for (ii=0; ii < nrows; ii++) { is_counted[ii] = 0; skip[ii] = 0; } /* Missing values */ if (what == 0) { /* rowMins() */ mins = ans; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow, colsHasNA, 0); for (ii=0; ii < nrows; ii++) { if (!narm && skip[ii]) continue; idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii]), colsHasNA, rowsHasNA); value = R_INDEX_GET(x, idx, X_NA, colsHasNA || rowsHasNA); if (X_ISNAN(value)) { if (!narm) { mins[ii] = value; is_counted[ii] = 1; /* Early stopping? */ #if X_TYPE == 'i' skip[ii] = 1; #elif X_TYPE == 'r' if (X_ISNA(value)) skip[ii] = 1; #endif } } else if (!is_counted[ii]) { mins[ii] = value; is_counted[ii] = 1; } else if (value < mins[ii]) { mins[ii] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (ii=0; ii < nrows; ii++) { if (!is_counted[ii]) { mins[ii] = R_PosInf; } } #endif } else if (what == 1) { /* rowMaxs() */ maxs = ans; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow, colsHasNA, 0); for (ii=0; ii < nrows; ii++) { if (!narm && skip[ii]) continue; idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii]), colsHasNA, rowsHasNA); value = R_INDEX_GET(x, idx, X_NA, colsHasNA || rowsHasNA); if (X_ISNAN(value)) { if (!narm) { maxs[ii] = value; is_counted[ii] = 1; /* Early stopping? */ #if X_TYPE == 'i' skip[ii] = 1; #elif X_TYPE == 'r' if (X_ISNA(value)) skip[ii] = 1; #endif } } else if (!is_counted[ii]) { maxs[ii] = value; is_counted[ii] = 1; } else if (value > maxs[ii]) { maxs[ii] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (ii=0; ii < nrows; ii++) { if (!is_counted[ii]) { maxs[ii] = R_NegInf; } } #endif } else if (what == 2) { /* rowRanges() */ mins = ans; maxs = &ans[nrows]; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow, colsHasNA, 0); for (ii=0; ii < nrows; ii++) { if (!narm && skip[ii]) continue; idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii]), colsHasNA, rowsHasNA); value = R_INDEX_GET(x, idx, X_NA, colsHasNA || rowsHasNA); if (X_ISNAN(value)) { if (!narm) { mins[ii] = value; maxs[ii] = value; is_counted[ii] = 1; /* Early stopping? */ #if X_TYPE == 'i' skip[ii] = 1; #elif X_TYPE == 'r' if (X_ISNA(value)) skip[ii] = 1; #endif } } else if (!is_counted[ii]) { mins[ii] = value; maxs[ii] = value; is_counted[ii] = 1; } else if (value < mins[ii]) { mins[ii] = value; } else if (value > maxs[ii]) { maxs[ii] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (ii=0; ii < nrows; ii++) { if (!is_counted[ii]) { mins[ii] = R_PosInf; maxs[ii] = R_NegInf; } } #endif } /* if (what ...) */ } else { /* No missing values */ if (what == 0) { /* rowMins() */ mins = ans; /* Initiate results */ for (ii=0; ii < nrows; ii++) { mins[ii] = x[ii]; } for (jj=1; jj < ncols; jj++) { colBegin = ((cols == NULL) ? (jj) : cols[jj]) * nrow; for (ii=0; ii < nrows; ii++) { value = x[((rows == NULL) ? (ii) : rows[ii])+colBegin]; if (value < mins[ii]) mins[ii] = value; } } } else if (what == 1) { /* rowMax() */ maxs = ans; /* Initiate results */ for (ii=0; ii < nrows; ii++) { maxs[ii] = x[ii]; } for (jj=1; jj < ncols; jj++) { colBegin = ((cols == NULL) ? (jj) : cols[jj]) * nrow; for (ii=0; ii < nrows; ii++) { value = x[((rows == NULL) ? (ii) : rows[ii])+colBegin]; if (value > maxs[ii]) maxs[ii] = value; } } } else if (what == 2) { /* rowRanges()*/ mins = ans; maxs = &ans[nrows]; /* Initiate results */ for (ii=0; ii < nrows; ii++) { mins[ii] = x[ii]; maxs[ii] = x[ii]; } for (jj=1; jj < ncols; jj++) { colBegin = ((cols == NULL) ? (jj) : cols[jj]) * nrow; for (ii=0; ii < nrows; ii++) { value = x[((rows == NULL) ? (ii) : rows[ii])+colBegin]; if (value < mins[ii]) { mins[ii] = value; } else if (value > maxs[ii]) { maxs[ii] = value; } } } } /* if (what ...) */ } /* if (narm) */ } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/colCounts_lowlevel_template.h0000644000176200001440000002532014436403546021325 0ustar liggesusers/*********************************************************************** TEMPLATE: void colCounts_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, X_C_TYPE value, int what, int narm, int hasna, double *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i', 'r', or 'l' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(colCounts, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, X_C_TYPE value, int what, int narm, int hasna, double *ans) { R_xlen_t ii, jj; R_xlen_t colBegin, idx; R_xlen_t count; X_C_TYPE xvalue; int norows, nocols; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } if (what == 0L) { /* all */ /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } count = 1; for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (!X_ISNAN(xvalue)) { count = 0; /* Found another value! Early stopping */ break; } } ans[jj] = (double)count; } } else { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } count = 1; for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (xvalue == value) { } else if (narm && X_ISNAN(xvalue)) { /* Skip */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is not 'value' later, then we know for sure that all = FALSE regardless of missing values. In other words, at this point the answer can be either NA or FALSE.*/ count = NA_R_XLEN_T; } else { count = 0; /* Found another value! Early stopping */ break; } } /* for (ii ...) */ ans[jj] = (double)count; } /* for (jj ...) */ } /* if (X_ISNAN(value)) */ } else if (what == 1L) { /* any */ /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } count = 0; for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (X_ISNAN(xvalue)) { count = 1; /* Found value! Early stopping */ break; } } ans[jj] = (double)count; } } else { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } count = 0; for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (xvalue == value) { count = 1; /* Found value! Early stopping */ break; } else if (narm && X_ISNAN(xvalue)) { /* Skipping */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is 'value' later, then we know for sure that any = TRUE regardless of missing values. In other words, at this point the answer can be either NA or TRUE.*/ count = NA_R_XLEN_T; } } /* for (ii ...) */ ans[jj] = (double)count; } /* for (jj ...) */ } /* if (X_ISNAN(value)) */ } else if (what == 2L) { /* count */ /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } count = 0; for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (X_ISNAN(xvalue)) { ++count; } } ans[jj] = (double)count; } } else { for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } count = 0; for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (xvalue == value) { ++count; } else if (!narm && X_ISNAN(xvalue)) { count = NA_R_XLEN_T; /* Early stopping */ break; } } /* for (ii ...) */ ans[jj] = (double)count; } /* for (jj ...) */ } /* if (X_ISNAN(value)) */ } /* if (what) */ } /*************************************************************************** HISTORY: 2015-04-18 [DJ] o Supported subsetted computation. 2014-11-14 [HB] o Created colCounts() templates from rowCounts() templates. **************************************************************************/ matrixStats/src/rowCumsums.c0000644000176200001440000000454614531477242015736 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowCumsums(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowCumsums_lowlevel.h" #include "naming.h" SEXP rowCumsums(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames) { int byrow, usenames; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowCumsums_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, rowsHasNA, byrow, REAL(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } else if (isInteger(x) | isLogical(x)) { PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowCumsums_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, byrow, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } /* rowCumsums() */ /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars.c. **************************************************************************/ matrixStats/src/weightedMedian.c0000644000176200001440000000366314531726565016474 0ustar liggesusers/*************************************************************************** Public methods: SEXP weightedMedian(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP interpolate, SEXP ties) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include #include "weightedMedian_lowlevel.h" SEXP weightedMedian(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP interpolate, SEXP ties) { SEXP ans; int narm, interpolate2, ties2; double mu = NA_REAL; R_xlen_t nx, nw; /* Argument 'x': */ assertArgVector(x, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'x': */ assertArgVector(w, (R_TYPE_REAL), "w"); nw = xlength(w); if (nx != nw) { error("Argument 'x' and 'w' are of different lengths: %lld != %lld", (long long int)nx, (long long int)nw); } /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'interpolate': */ interpolate2 = asLogicalNoNA(interpolate, "interpolate"); /* Argument 'idxs': */ R_xlen_t nidxs; int idxsHasNA; R_xlen_t *cidxs = validateIndicesCheckNA(idxs, nx, 1, &nidxs, &idxsHasNA); /* Argument 'ties': */ ties2 = asInteger(ties); /* Double matrices are more common to use. */ if (isReal(x)) { mu = weightedMedian_dbl(REAL(x), nx, REAL(w), cidxs, nidxs, idxsHasNA, narm, interpolate2, ties2); } else if (isInteger(x) | isLogical(x)) { mu = weightedMedian_int(INTEGER(x), nx, REAL(w), cidxs, nidxs, idxsHasNA, narm, interpolate2, ties2); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = mu; UNPROTECT(1); return(ans); } // weightedMedian() /*************************************************************************** HISTORY: 2015-07-09 [DJ] o Supported subsetted computation. 2015-01-01 [HB] o Created. **************************************************************************/ matrixStats/src/000.macros.h0000644000176200001440000000031314406445217015323 0ustar liggesusers#ifndef _MACROS_H_ #define _MACROS_H_ #define CONCAT(x,y) x ##_## y #define CONCAT_MACROS(x,y) CONCAT(x,y) #define QUOTE(str) #str #define QUOTE_MACROS(str) QUOTE(str) #endif /* END OF _MACROS_H_ */ matrixStats/src/rowOrderStats.c0000644000176200001440000000715014531726565016373 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames) Authors: Henrik Bengtsson. Adopted from rowQ() by R. Gentleman. To do: Add support for missing values. Copyright Henrik Bengtsson, 2007-2014 **************************************************************************/ #include #include "000.types.h" #include "rowOrderStats_lowlevel.h" #include "naming.h" SEXP rowOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames) { SEXP ans = NILSXP; R_xlen_t nrow, ncol, qq; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'which': */ if (length(which) != 1) error("Argument 'which' must be a single number."); if (!isNumeric(which)) error("Argument 'which' must be a numeric number."); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasna, colsHasna; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasna); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasna); /* Argument 'useNames': */ int usenames = asLogical(useNames); // Check missing rows if (rowsHasna && ncols > 0) { error("Argument 'rows' must not contain missing value"); } // Check missing cols if (colsHasna && nrows > 0) { error("Argument 'cols' must not contain missing value"); } /* Subtract one here, since rPsort does zero based addressing */ qq = asInteger(which) - 1; /* Assert that 'qq' is a valid index */ if (qq < 0 || qq >= ncols) { error("Argument 'which' is out of range: %lld", (long long int)(qq + 1)); } /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, nrows)); rowOrderStats_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, qq, REAL(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(INTSXP, nrows)); rowOrderStats_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, qq, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } // rowOrderStats() /*************************************************************************** HISTORY: 2015-07-11 [DJ] o Supported subsetted computation. 2009-02-04 [HB] o BUG FIX: For some errors in rowOrderStats(), the stack would not become UNPROTECTED before calling error. 2008-03-25 [HB] o Renamed from 'rowQuantiles' to 'rowOrderStats'. 2007-08-10 [HB] o Removed arguments for NAs since rowOrderStats() still don't support it. 2005-11-24 [HB] o Cool, it works and compiles nicely. o Preallocate colOffset to speed up things even more. o Added more comments and error checking. o Adopted from rowQ() in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/rowLogSumExp.c0000644000176200001440000000536214531477242016162 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowLogSumExps(SEXP lx, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2013-2014 **************************************************************************/ #include #include "000.types.h" #include "rowLogSumExp_lowlevel.h" #include "naming.h" SEXP rowLogSumExps(SEXP lx, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) { SEXP ans; int narm, hasna, byrow, usenames; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(lx = coerceVector(lx, REALSXP)); PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'lx' and 'dim': */ assertArgMatrix(lx, dim, (R_TYPE_REAL), "lx"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'byRow': */ byrow = asLogical(byRow); if (byrow) { ans = PROTECT(allocVector(REALSXP, nrows)); rowLogSumExps_double(REAL(lx), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, narm, hasna, 1, REAL(ans)); } else { ans = PROTECT(allocVector(REALSXP, ncols)); rowLogSumExps_double(REAL(lx), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, narm, hasna, 0, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames == NA_LOGICAL || usenames) { SEXP dimnames = getAttrib(lx, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } } UNPROTECT(3); /* ans = PROTECT(...), PROTECT(lx = ...), PROTECT(dim = ...) */ return(ans); } /* rowLogSumExps() */ /*************************************************************************** HISTORY: 2015-06-12 [DJ] o Supported subsetted computation. 2013-05-02 [HB] o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the 'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission. 2013-04-30 [HB] o Created. **************************************************************************/ matrixStats/src/rowRanges.c0000644000176200001440000001244014531477242015511 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowRanges(SEXP x, ...) Authors: Henrik Bengtsson. Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowRanges_lowlevel.h" #include "naming.h" SEXP rowRanges(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames) { SEXP ans = NILSXP, ans2 = NILSXP; int *mins, *maxs; double *mins2, *maxs2; int *is_counted, all_counted = 0; int what2, narm, hasna, usenames; R_xlen_t nrow, ncol, ii; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'what': */ if (length(what) != 1) error("Argument 'what' must be a single number."); if (!isNumeric(what)) error("Argument 'what' must be a numeric number."); what2 = asInteger(what); if (what2 < 0 || what2 > 2) error("Invalid value of 'what': %d", what2); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasNA; int colsHasNA; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasNA); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasNA); /* Argument 'useNames': */ usenames = asLogical(useNames); is_counted = (int *) R_alloc(nrows, sizeof(int)); if (isReal(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(REALSXP, nrows, 2)); } else { PROTECT(ans = allocVector(REALSXP, nrows)); } rowRanges_dbl(REAL(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, what2, narm, hasna, REAL(ans), is_counted); if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (what2 == 2) { if (nrows != 0) { setDimnames(ans, dimnames, nrows, crows, 0, ccols, FALSE); } /* (else) Zero-length rownames attribute? Keep behavior same as base R function */ } else{ SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(1); } else if (isInteger(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(INTSXP, nrows, 2)); } else { PROTECT(ans = allocVector(INTSXP, nrows)); } rowRanges_int(INTEGER(x), nrow, ncol, crows, nrows, rowsHasNA, ccols, ncols, colsHasNA, what2, narm, hasna, INTEGER(ans), is_counted); /* Any entries with zero non-missing values? */ all_counted = 1; for (ii=0; ii < nrows; ii++) { if (!is_counted[ii]) { all_counted = 0; break; } } if (!all_counted) { /* Handle zero non-missing values */ /* Instead of return INTSXP, we must return REALSXP (to hold -Inf, and Inf) */ if (what2 == 0) { PROTECT(ans2 = allocVector(REALSXP, nrows)); mins = INTEGER(ans); mins2 = REAL(ans2); for (ii=0; ii < nrows; ii++) { if (is_counted[ii]) { mins2[ii] = (double)mins[ii]; } else { mins2[ii] = R_PosInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 1) { PROTECT(ans2 = allocVector(REALSXP, nrows)); maxs = INTEGER(ans); maxs2 = REAL(ans2); for (ii=0; ii < nrows; ii++) { if (is_counted[ii]) { maxs2[ii] = (double)maxs[ii]; } else { maxs2[ii] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 2) { PROTECT(ans2 = allocMatrix(REALSXP, nrows, 2)); mins = INTEGER(ans); maxs = &INTEGER(ans)[nrows]; mins2 = REAL(ans2); maxs2 = &REAL(ans2)[nrows]; for (ii=0; ii < nrows; ii++) { if (is_counted[ii]) { mins2[ii] = (double)mins[ii]; maxs2[ii] = (double)maxs[ii]; } else { mins2[ii] = R_PosInf; maxs2[ii] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } ans = ans2; } if (usenames != NA_LOGICAL && usenames) { SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (what2 == 2) { if (nrows != 0) { setDimnames(ans, dimnames, nrows, crows, 0, ccols, FALSE); } /* (else) Zero-length rownames attribute? Keep behavior same as base R function */ } else{ SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(1); /* ans */ } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } // rowRanges() /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/anyMissing_lowlevel.h0000644000176200001440000000026114436403546017577 0ustar liggesusers/* Native API (dynamically generated via macros): int anyMissing_internal(SEXP x, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA) */ #include "anyMissing_lowlevel_template.h" matrixStats/src/000.templates-gen-matrix.h0000644000176200001440000000162614372747711020124 0ustar liggesusers#include "000.macros.h" #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define ROWS_TYPE 'i' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #define ROWS_TYPE 'r' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE RETURN_TYPE (*METHOD_NAME[3][3])(ARGUMENTS_LIST) = { {METHOD_NAME_arows_acols, METHOD_NAME_arows_icols, METHOD_NAME_arows_dcols}, {METHOD_NAME_irows_acols, METHOD_NAME_irows_icols, METHOD_NAME_irows_dcols}, {METHOD_NAME_drows_acols, METHOD_NAME_drows_icols, METHOD_NAME_drows_dcols}, }; #include "000.templates-types_undef.h" matrixStats/src/rowMeans2_lowlevel.h0000644000176200001440000000137714436403546017344 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowMeans2_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int refine, int hasna, int byrow, double *ans) void rowMeans2_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int narm, int refine, int hasna, int byrow, double *ans) */ #define X_TYPE 'i' #include "rowMeans2_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowMeans2_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/anyMissing.c0000644000176200001440000000232214531477242015661 0ustar liggesusers/*************************************************************************** Public methods: anyMissing(SEXP x, SEXP idxs) TO DO: Support list():s too. Copyright Henrik Bengtsson, 2007 **************************************************************************/ #include #include "000.types.h" #include "000.utils.h" #include "anyMissing_lowlevel.h" SEXP anyMissing(SEXP x, SEXP idxs) { R_xlen_t nx; nx = xlength(x); /* anyMissing() on zero-length objects should always return FALSE, just like any(double(0)). */ if (nx == 0) return(ScalarLogical(FALSE)); /* Argument 'idxs': */ R_xlen_t nidxs; int idxsHasNA; R_xlen_t *cidxs = validateIndicesCheckNA(idxs, nx, 1, &nidxs, &idxsHasNA); if (nidxs == 0) return(ScalarLogical(FALSE)); if (anyMissing_internal(x, cidxs, nidxs, idxsHasNA)) { return(ScalarLogical(TRUE)); } return(ScalarLogical(FALSE)); } // anyMissing() /*************************************************************************** HISTORY: 2015-06-14 [DJ] o Supported subsetted computation. 2007-08-14 [HB] o Created using do_isna() in src/main/coerce.c as a template. **************************************************************************/ matrixStats/src/binMeans_lowlevel.h0000644000176200001440000000076614372747711017230 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include /* Native API (dynamically generated via macros): void binMeans_L(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) void binMeans_R(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) */ #define BIN_BY 'L' #include "binMeans_lowlevel_template.h" #define BIN_BY 'R' #include "binMeans_lowlevel_template.h" matrixStats/src/000.api.h0000644000176200001440000000572514436403546014626 0ustar liggesusers/* C-level API that is called from R */ SEXP allocArray2(SEXP dim, SEXP value); SEXP allocMatrix2(SEXP nrow, SEXP ncol, SEXP value); SEXP allocVector2(SEXP length, SEXP value); SEXP anyMissing(SEXP x, SEXP idxs); SEXP binCounts(SEXP x, SEXP bx, SEXP right); SEXP binMeans(SEXP y, SEXP x, SEXP bx, SEXP retCount, SEXP right); SEXP colCounts(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP value, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames); SEXP colOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames); SEXP colRanges(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames); SEXP count(SEXP x, SEXP idxs, SEXP value, SEXP what, SEXP naRm, SEXP hasNA); SEXP diff2(SEXP x, SEXP idxs, SEXP lag, SEXP differences, SEXP useNames); SEXP indexByRow(SEXP dim, SEXP idxs); SEXP logSumExp(SEXP lx, SEXP idxs, SEXP naRm, SEXP hasNA); SEXP mean2(SEXP x, SEXP idxs, SEXP naRm, SEXP refine); SEXP productExpSumLog(SEXP x, SEXP idxs, SEXP naRm, SEXP hasNA); SEXP psortKM(SEXP x, SEXP k, SEXP m); SEXP rowCounts(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP value, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames); SEXP rowCummaxs(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames); SEXP rowCummins(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames); SEXP rowCumprods(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames); SEXP rowCumsums(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames); SEXP rowDiffs(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP lag, SEXP differences, SEXP byRow, SEXP useNames); SEXP rowLogSumExps(SEXP lx, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP rowMads(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP constant, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP rowMeans2(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP refine, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP rowMedians(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP rowOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames); SEXP rowRanges(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames); SEXP rowRanksWithTies(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP tiesMethod, SEXP byRow, SEXP useNames); SEXP rowSums2(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP rowVars(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP refine, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP signTabulate(SEXP x, SEXP idxs); SEXP sum2(SEXP x, SEXP idxs, SEXP naRm, SEXP mode); SEXP validate(SEXP idxs, SEXP maxIdx, SEXP allowOutOfBound); SEXP weightedMean(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP refine); SEXP weightedMedian(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP interpolate, SEXP ties); SEXP x_OP_y(SEXP x, SEXP y, SEXP dim, SEXP operator, SEXP xrows, SEXP xcols, SEXP yidxs, SEXP commute, SEXP naRm, SEXP hasNA, SEXP byRow); matrixStats/src/weightedMean_lowlevel_template.h0000644000176200001440000000605714436403546021763 0ustar liggesusers/*********************************************************************** TEMPLATE: double weightedMean_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int refine Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include double CONCAT_MACROS(weightedMean, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, int narm, int refine) { X_C_TYPE value; double weight; R_xlen_t i; LDOUBLE sum = 0, wtotal = 0; LDOUBLE avg = R_NaN; for (i=0; i < nidxs; i++) { /* * Unlike the indices, the weights are not pre-checked for NA values in any way, * meaning that we must pretend that the weights have NA-values in them */ weight = R_INDEX_GET(w, ((idxs == NULL) ? (i) : idxs[i]), NA_REAL, 1); /* Skip or early stopping? */ if (weight == 0) { continue; } value = R_INDEX_GET(x, ((idxs == NULL) ? (i) : idxs[i]), X_NA, idxsHasNA); #if X_TYPE == 'i' if (X_ISNAN(value)) { /* Skip or early stopping? */ if (narm) { continue; } else { sum = R_NaReal; break; } } else { sum += (LDOUBLE)weight * (LDOUBLE)value; wtotal += weight; } #elif X_TYPE == 'r' if (!narm) { sum += (LDOUBLE)weight * (LDOUBLE)value; wtotal += weight; /* Early stopping? Special for long LDOUBLE vectors */ if (i % 1048576 == 0 && ISNAN(sum)) break; } else if (!X_ISNAN(value)) { sum += (LDOUBLE)weight * (LDOUBLE)value; wtotal += weight; } #endif } /* for (i ...) */ if (wtotal > DBL_MAX || wtotal < -DBL_MAX) { avg = R_NaN; } else if (sum > DBL_MAX) { avg = R_PosInf; } else if (sum < -DBL_MAX) { avg = R_NegInf; } else { avg = sum / wtotal; #if X_TYPE == 'r' /* Extra precision by summing over residuals? */ if (refine && R_FINITE(avg)) { sum = 0; for (i=0; i < nidxs; i++) { weight = R_INDEX_GET(w, ((idxs == NULL) ? (i) : idxs[i]), NA_REAL, 1); /* Skip? */ if (weight == 0) { continue; } value = R_INDEX_GET(x, ((idxs == NULL) ? (i) : idxs[i]), X_NA, idxsHasNA); if (!narm) { sum += (LDOUBLE)weight * (value - avg); /* Early stopping? Special for long LDOUBLE vectors */ if (i % 1048576 == 0 && ISNAN(sum)) break; } else if (!ISNAN(value)) { sum += (LDOUBLE)weight * (value - avg); } } avg += (sum / wtotal); } #endif } return (double)avg; } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-12-08 [HB] o Created. **************************************************************************/ matrixStats/src/colRanges_lowlevel_template.h0000644000176200001440000002316714436403546021300 0ustar liggesusers/*********************************************************************** TEMPLATE: void colRanges_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(colRanges, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted) { R_xlen_t ii, jj; R_xlen_t colBegin, idx; X_C_TYPE xvalue, *mins = NULL, *maxs = NULL; int norows, nocols; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } /* Rprintf("(nrow,ncol)=(%d,%d), what=%d\n", nrow, ncol, what); */ /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; if (hasna) { for (jj=0; jj < ncols; jj++) is_counted[jj] = 0; /* Missing values */ if (what == 0) { /* colMins() */ mins = ans; for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (X_ISNAN(xvalue)) { if (!narm) { mins[jj] = xvalue; is_counted[jj] = 1; /* Early stopping? */ #if X_TYPE == 'i' break; #elif X_TYPE == 'r' if (X_ISNA(xvalue)) break; #endif } } else if (!is_counted[jj]) { mins[jj] = xvalue; is_counted[jj] = 1; } else if (xvalue < mins[jj]) { mins[jj] = xvalue; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (jj=0; jj < ncols; jj++) { if (!is_counted[jj]) { mins[jj] = R_PosInf; } } #endif } else if (what == 1) { /* colMaxs() */ maxs = ans; for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (X_ISNAN(xvalue)) { if (!narm) { maxs[jj] = xvalue; is_counted[jj] = 1; /* Early stopping? */ #if X_TYPE == 'i' break; #elif X_TYPE == 'r' if (X_ISNA(xvalue)) break; #endif } } else if (!is_counted[jj]) { maxs[jj] = xvalue; is_counted[jj] = 1; } else if (xvalue > maxs[jj]) { maxs[jj] = xvalue; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (jj=0; jj < ncols; jj++) { if (!is_counted[jj]) { maxs[jj] = R_NegInf; } } #endif } else if (what == 2) { /* colRanges() */ mins = ans; maxs = &ans[ncols]; for (jj=0; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin = colsElement * nrow; } else { colBegin = NA_R_XLEN_T; } } for (ii=0; ii < nrows; ii++) { if (norows) { if (!colsHasNA || colBegin != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin + ii; xvalue = x[idx]; } else { xvalue = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin + rows[ii]; xvalue = x[idx]; } else { idx = R_INDEX_OP(colBegin, +, (rows[ii]), 1, 1); xvalue = R_INDEX_GET(x, idx, X_NA, 1); } if (X_ISNAN(xvalue)) { if (!narm) { mins[jj] = xvalue; maxs[jj] = xvalue; is_counted[jj] = 1; /* Early stopping? */ #if X_TYPE == 'i' break; #elif X_TYPE == 'r' if (X_ISNA(xvalue)) break; #endif } } else if (!is_counted[jj]) { mins[jj] = xvalue; maxs[jj] = xvalue; is_counted[jj] = 1; } else if (xvalue < mins[jj]) { mins[jj] = xvalue; } else if (xvalue > maxs[jj]) { maxs[jj] = xvalue; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (jj=0; jj < ncols; jj++) { if (!is_counted[jj]) { mins[jj] = R_PosInf; maxs[jj] = R_NegInf; } } #endif } /* if (what ...) */ } else { /* No missing values */ if (what == 0) { /* colMins() */ mins = ans; /* Initiate results */ for (jj=0; jj < ncols; jj++) { mins[jj] = x[jj]; } for (jj=1; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { colBegin = cols[jj] * nrow; } for (ii=0; ii < nrows; ii++) { if (norows){ idx = ii + colBegin; } else { idx = rows[ii] + colBegin; } xvalue = x[idx]; if (xvalue < mins[jj]) mins[jj] = xvalue; } } } else if (what == 1) { /* colMax() */ maxs = ans; /* Initiate results */ for (jj=0; jj < ncols; jj++) { maxs[jj] = x[jj]; } for (jj=1; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { colBegin = cols[jj] * nrow; } for (ii=0; ii < nrows; ii++) { if (norows){ idx = ii + colBegin; } else { idx = rows[ii] + colBegin; } xvalue = x[idx]; if (xvalue > maxs[jj]) maxs[jj] = xvalue; } } } else if (what == 2) { /* colRanges()*/ mins = ans; maxs = &ans[ncols]; /* Initiate results */ for (jj=0; jj < ncols; jj++) { mins[jj] = x[jj]; maxs[jj] = x[jj]; } for (jj=1; jj < ncols; jj++) { if (nocols) { colBegin = jj * nrow; } else { colBegin = cols[jj] * nrow; } for (ii=0; ii < nrows; ii++) { if (norows){ idx = ii +colBegin; } else { idx = rows[ii] + colBegin; } xvalue = x[idx]; if (xvalue < mins[jj]) { mins[jj] = xvalue; } else if (xvalue > maxs[jj]) { maxs[jj] = xvalue; } } } } /* if (what ...) */ } /* if (narm) */ } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/signTabulate_lowlevel.h0000644000176200001440000000106314436403546020101 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void signTabulate_int(int *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, double *ans) void signTabulate_dbl(double *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int idxsHasNA, double *ans) */ #define X_TYPE 'i' #include "signTabulate_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "signTabulate_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowDiffs_lowlevel_template.h0000644000176200001440000002101514436403546021134 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowDiffs_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int byrow, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include #undef X_DIFF #undef DIFF_X_MATRIX #if X_TYPE == 'i' #ifndef diff_int static R_INLINE int diff_int(int a, int b) { if (X_ISNA(a) || X_ISNA(b)) return(NA_INTEGER); return a-b; } #define diff_int diff_int #endif #define X_DIFF diff_int #define DIFF_X_MATRIX diff_matrix_int #elif X_TYPE == 'r' #define X_DIFF(a,b) a-b #define DIFF_X_MATRIX diff_matrix_double #endif #if (X_TYPE == 'i' && !defined(diff_matrix_int)) || (X_TYPE == 'r' && !defined(diff_matrix_double)) static R_INLINE void DIFF_X_MATRIX(X_C_TYPE *x, R_xlen_t nrow_x, R_xlen_t ncol_x, int byrow, R_xlen_t lag, X_C_TYPE *y, R_xlen_t nrow_y, R_xlen_t ncol_y) { R_xlen_t ii, jj, ss, tt, uu; if (byrow) { uu = lag * nrow_x; tt = 0; ss = 0; for (jj=0; jj < ncol_y; jj++) { for (ii=0; ii < nrow_y; ii++) { y[ss++] = X_DIFF(x[uu++], x[tt++]); } } } else { uu = lag; tt = 0; ss = 0; for (jj=0; jj < ncol_y; jj++) { for (ii=0; ii < nrow_y; ii++) { /* Rprintf("y[%d] = x[%d] - x[%d] = %g - %g = %g\n", ss, uu, tt, (double)x[uu], (double)x[tt], (double)X_DIFF(x[uu], x[tt])); */ y[ss++] = X_DIFF(x[uu++], x[tt++]); } tt += lag; uu += lag; } } } #if X_TYPE == 'i' #define diff_matrix_int diff_matrix_int #elif X_TYPE == 'r' #define diff_matrix_double diff_matrix_double #endif #endif #undef DIFF_X_MATRIX_TYPE #if X_TYPE == 'i' #define DIFF_X_MATRIX_TYPE CONCAT_MACROS(DIFF_X_MATRIX_TYPE, int) #elif X_TYPE == 'r' #define DIFF_X_MATRIX_TYPE CONCAT_MACROS(DIFF_X_MATRIX_TYPE, double) #endif static R_INLINE void DIFF_X_MATRIX_TYPE(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, R_xlen_t lag, X_C_TYPE *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans) { R_xlen_t ii, jj, ss; R_xlen_t idx, colBegin1, colBegin2; X_C_TYPE xvalue1, xvalue2; int norows, nocols; if (cols == NULL) { nocols = 1; } else { nocols = 0; } if (rows == NULL) { norows = 1; } else { norows = 0; } ss = 0; if (byrow) { for (jj=0; jj < ncol_ans; jj++) { if (nocols) { colBegin1 = jj * nrow; } else if (!colsHasNA) { colBegin1 = cols[jj] * nrow; } else { colBegin1 = R_INDEX_OP(cols[jj], *, nrow, 1, 1); } if (nocols) { colBegin2 = (jj+lag) * nrow; } else if (!colsHasNA) { colBegin2 = cols[jj+lag] * nrow; } else { colBegin2 = R_INDEX_OP(cols[jj+lag], *, nrow, 1, 1); } for (ii=0; ii < nrow_ans; ii++) { if (norows) { if (!colsHasNA || colBegin1 != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin1 + ii; xvalue1 = x[idx]; } else { xvalue1 = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin1 + rows[ii]; xvalue1 = x[idx]; } else { idx = R_INDEX_OP(colBegin1, +, (rows[ii]), 1, 1); xvalue1 = R_INDEX_GET(x, idx, X_NA, 1); } if (norows) { if (!colsHasNA || colBegin2 != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin2 + ii; xvalue2 = x[idx]; } else { xvalue2 = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin2 + rows[ii]; xvalue2 = x[idx]; } else { idx = R_INDEX_OP(colBegin2, +, (rows[ii]), 1, 1); xvalue2 = R_INDEX_GET(x, idx, X_NA, 1); } ans[ss++] = X_DIFF(xvalue2, xvalue1); } } } else { for (jj=0; jj < ncol_ans; jj++) { if (nocols) { colBegin1 = jj * nrow; } else { R_xlen_t colsElement = cols[jj]; if (!colsHasNA || colsElement != NA_R_XLEN_T) { colBegin1 = colsElement * nrow; } else { colBegin1 = NA_R_XLEN_T; } } for (ii=0; ii < nrow_ans; ii++) { if (norows) { if (!colsHasNA || colBegin1 != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin1 + ii; xvalue1 = x[idx]; } else { xvalue1 = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin1 + rows[ii]; xvalue1 = x[idx]; } else { idx = R_INDEX_OP(colBegin1, +, (rows[ii]), 1, 1); xvalue1 = R_INDEX_GET(x, idx, X_NA, 1); } if (norows) { if (!colsHasNA || colBegin1 != NA_R_XLEN_T){ /* * In this special case, we can eliminate * the possibility of having NA indicies */ idx = colBegin1 + ii + lag; xvalue2 = x[idx]; } else { xvalue2 = X_NA; } } else if (!rowsHasNA && !colsHasNA) { idx = colBegin1 + rows[ii+lag]; xvalue2 = x[idx]; } else { idx = R_INDEX_OP(colBegin1, +, (rows[ii+lag]), 1, 1); xvalue2 = R_INDEX_GET(x, idx, X_NA, 1); } ans[ss++] = X_DIFF(xvalue2, xvalue1); } } } } void CONCAT_MACROS(rowDiffs, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, int rowsHasNA, R_xlen_t *cols, R_xlen_t ncols, int colsHasNA, int byrow, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans) { R_xlen_t nrow_tmp, ncol_tmp; X_C_TYPE *tmp = NULL; /* Nothing to do? */ if (ncol_ans <= 0 || nrow_ans <= 0) return; /* Special case (difference == 1) */ if (differences == 1) { DIFF_X_MATRIX_TYPE(x, nrow, rows, nrows, rowsHasNA, cols, ncols, colsHasNA, byrow, lag, ans, nrow_ans, ncol_ans); } else { /* Allocate temporary work matrix (to hold intermediate differences) */ if (byrow) { nrow_tmp = nrows; ncol_tmp = ncols - lag; } else { nrow_tmp = nrows - lag; ncol_tmp = ncols; } tmp = Calloc(nrow_tmp*ncol_tmp, X_C_TYPE); /* (a) First order of differences */ DIFF_X_MATRIX_TYPE(x, nrow, rows, nrows, rowsHasNA, cols, ncols, colsHasNA, byrow, lag, tmp, nrow_tmp, ncol_tmp); if (byrow) { ncol_tmp = ncol_tmp - lag; } else { nrow_tmp = nrow_tmp - lag; } /* (a) Intermediate orders of differences */ while (--differences > 1) { DIFF_X_MATRIX(tmp, nrow_tmp, ncol_tmp, byrow, lag, tmp, nrow_tmp, ncol_tmp); if (byrow) { ncol_tmp = ncol_tmp - lag; } else { nrow_tmp = nrow_tmp - lag; } } /* (c) Last order of differences */ DIFF_X_MATRIX(tmp, nrow_tmp, ncol_tmp, byrow, lag, ans, nrow_ans, ncol_ans); /* Deallocate temporary work matrix */ Free(tmp); } /* if (differences ...) */ } /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/validateIndices_lowlevel_template.h0000644000176200001440000000725314406445217022447 0ustar liggesusers/*********************************************************************** TEMPLATE: void validateIndices_(X_C_TYPE *idxs, R_xlen_t nidxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs, int *hasna) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i', 'r' ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "000.templates-types.h" #undef R_xlen_t_from_idx_TYPE #if X_TYPE == 'i' #define R_xlen_t_from_idx_TYPE CONCAT_MACROS(R_xlen_t_from_idx, int) #elif X_TYPE == 'r' #define R_xlen_t_from_idx_TYPE CONCAT_MACROS(R_xlen_t_from_idx, dbl) #endif static R_INLINE R_xlen_t R_xlen_t_from_idx_TYPE(X_C_TYPE x, R_xlen_t maxIdx) { if (X_ISNAN(x)) return NA_R_XLEN_T; #if X_TYPE == 'r' if (IS_INF(x)) return NA_R_XLEN_T; #endif if (x > maxIdx) return NA_R_XLEN_T; return x - 1; } /** idxs must not be NULL, which should be checked before calling this function. **/ R_xlen_t* CONCAT_MACROS(validateIndices, X_C_SIGNATURE)(X_C_TYPE *idxs, R_xlen_t nidxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs, int *hasna) { // set default as no NA. *hasna = FALSE; R_xlen_t ii, jj; int state = 0; R_xlen_t count = 0; Rboolean needReAlloc = FALSE; // figure out whether idxs are all positive or all negative. for (ii = 0; ii < nidxs; ++ ii) { X_C_TYPE idx = idxs[ii]; if (idx > 0 || X_ISNAN(idx) #if X_TYPE == 'r' || IS_INF(idx) #endif ) { if (state < 0) error("only 0's may be mixed with negative subscripts"); #if X_TYPE == 'r' if (IS_INF(idx)) { needReAlloc = TRUE; // need to realloc indices array to set inf to NA } else #endif if (!X_ISNAN(idx)) { if (idx > maxIdx) { if (!allowOutOfBound) error("subscript out of bounds"); *hasna = TRUE; // out-of-bound index is NA needReAlloc = TRUE; } } else { *hasna = TRUE; } state = 1; ++ count; } else if (idx < 0) { if (state > 0) error("only 0's may be mixed with negative subscripts"); state = -1; needReAlloc = TRUE; } else { // idx == 0, need to realloc indices array needReAlloc = TRUE; } } if (state >= 0) *ansNidxs = count; if (!needReAlloc) { // must have: state >= 0 // return idxs; RETURN_VALIDATED_ANS(R_xlen_t, nidxs, idxs[ii], R_xlen_t_from_idx_TYPE(idxs[ii],maxIdx),); } // fill positive idxs into ans if (state >= 0) { // NOTE: braces is needed here, because of macro-defined function RETURN_VALIDATED_ANS(R_xlen_t, nidxs, idxs[ii], R_xlen_t_from_idx_TYPE(idxs[ii],maxIdx),); } // state < 0 // use filter as bitset to find out all required idxs Rboolean *filter = Calloc(maxIdx, Rboolean); count = maxIdx; memset(filter, 0, maxIdx*sizeof(Rboolean)); // set to FALSE for (ii = 0; ii < nidxs; ++ ii) { R_xlen_t idx = -idxs[ii]; if (idx > 0 && idx <= maxIdx) { if (filter[idx-1] == 0) { -- count; filter[idx-1] = TRUE; } } } *ansNidxs = count; if (count == 0) { Free(filter); return NULL; } // find the biggest number 'upperBound' R_xlen_t upperBound; for (upperBound = maxIdx-1; upperBound >= 0; -- upperBound) { if (!filter[upperBound]) break; } ++ upperBound; // fill required idxs into ans // NOTE: braces is needed here, because of macro-defined function RETURN_VALIDATED_ANS(R_xlen_t, upperBound, !filter[ii], ii, Free(filter);); } #include "000.templates-types_undef.h" matrixStats/vignettes/0000755000176200001440000000000014535664471014624 5ustar liggesusersmatrixStats/vignettes/matrixStats-methods.md.rsp0000644000176200001440000002132414372747711021735 0ustar liggesusers<%@meta language="R-vignette" content="-------------------------------- DIRECTIVES FOR R: %\VignetteIndexEntry{matrixStats: Summary of functions} %\VignetteAuthor{Henrik Bengtsson} %\VignetteKeyword{matrix} %\VignetteKeyword{vector} %\VignetteKeyword{apply} %\VignetteKeyword{rows} %\VignetteKeyword{columns} %\VignetteKeyword{memory} %\VignetteKeyword{speed} %\VignetteEngine{R.rsp::rsp} %\VignetteTangle{FALSE} --------------------------------------------------------------------"%> <% pkgName <- "matrixStats" library(pkgName, character.only=TRUE) ns <- getNamespace(pkgName) env <- as.environment(sprintf("package:%s", pkgName)) R.utils::use("R.utils") kable <- function(df, ...) { fcns <- as.character(df$Functions) fcns <- strsplit(fcns, split=",") fcns <- sapply(fcns, FUN=function(names) { names <- trim(names) ok <- sapply(names, FUN=exists, envir=ns, mode="function") names[ok] <- sprintf("%s()", names[ok]) names[!ok] <- sprintf("~~%s()~~", names[!ok]) names <- paste(names, collapse=", ") }) df$Functions <- fcns df$Example <- sprintf("`%s`", df$Example) print(knitr::kable(df, ..., format="markdown")) } # Find all functions all <- ls(envir=env) keep <- sapply(all, FUN=function(name) { is.function(get(name, envir=env)) }) all <- all[keep] keep <- !grepl("[.]([^.]*)$", all) all <- all[keep] # Hidden functions skip <- c("rowAvgsPerColSet", "colAvgsPerRowSet") skip <- c(skip, "allocArray", "allocMatrix", "allocVector") all <- setdiff(all, skip) # Column and row functions crfcns <- grep("^(col|row)", all, value=TRUE) # Vector functions vfcns <- setdiff(all, crfcns) %> # <%@meta name="title"%> <% pkg <- R.oo::Package(pkgName) %> <%@meta name="author"%> on <%=format(as.Date(pkg$date), format="%B %d, %Y")%> <% fcns <- crfcns base <- gsub("^(col|row)", "", fcns) groups <- tapply(fcns, base, FUN=list) stopifnot(all(sapply(groups, FUN=length) == 2L)) groups <- matrix(unlist(groups, use.names=FALSE), nrow=2L) %> <%--- ## Functions that apply to column and rows of matrices ``` <% print(fcns) %> ``` ---%> <% fcns <- vfcns %> <%--- ## Functions that apply to vectors ``` <% print(fcns) %> ``` ---%> ## Location and scale estimators <% tbl <- NULL row <- data.frame( "Estimator" = "Weighted sample mean", "Functions" = "weightedMean, colWeightedMeans, rowWeightedMeans", "Example" = "weightedMean(x, w); rowWeightedMeans(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Mean", "Functions" = "mean2, colMeans2, rowMeans2", "Example" = "mean2(x); rowMeans2(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median", "Functions" = "median, colMedians, rowMedians", "Example" = "median(x); rowMedians(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median", "Functions" = "weightedMedian, colWeightedMedians, rowWeightedMedians", "Example" = "weightedMedian(x, w); rowWeightedMedians(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance", "Functions" = "var, colVars, rowVars", "Example" = "var(x); rowVars(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample variance", "Functions" = "weightedVar, colWeightedVars, rowWeightedVars", "Example" = "weightedVar(x, w), rowWeightedVars(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance by n-order differences", "Functions" = "varDiff, colVarDiffs, rowVarDiffs", "Example" = "varDiff(x); rowVarDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation", "Functions" = "sd, colSds, rowSds", "Example" = "sd(x); rowSds(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample deviation", "Functions" = "weightedSd, colWeightedSds, rowWeightedSds", "Example" = "weightedSd(x, w), rowWeightedSds(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation by n-order differences", "Functions" = "sdDiff, colSdDiffs, rowSdDiffs", "Example" = "sdDiff(x); rowSdDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD)", "Functions" = "mad, colMads, rowMads", "Example" = "mad(x); rowMads(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median absolute deviation (MAD)", "Functions" = "weightedMad, colWeightedMads, rowWeightedMads", "Example" = "weightedMad(x, w), rowWeightedMads(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD) by n-order differences", "Functions" = "madDiff, colMadDiffs, rowMadDiffs", "Example" = "madDiff(x); rowMadDiffs()" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Quantile", "Functions" = "quantile, colQuantiles, rowQuantiles", "Example" = "quantile(x, probs); rowQuantiles(x, probs)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR)", "Functions" = "iqr, colIQRs, rowIQRs", "Example" = "iqr(x); rowIQRs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR) by n-order differences", "Functions" = "iqrDiff, colIQRDiffs, rowIQRDiffs", "Example" = "iqrDiff(x); rowIQRDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Range", "Functions" = "range, colRanges, rowRanges", "Example" = "range(x); rowRanges(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Minimum", "Functions" = "min, colMins, rowMins", "Example" = "min(x); rowMins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Maximum", "Functions" = "max, colMaxs, rowMaxs", "Example" = "max(x); rowMaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Testing for and counting values <% tbl <- NULL row <- data.frame( "Operator" = "Are there any missing values?", "Functions" = "anyMissing, colAnyMissings, rowAnyMissings", "Example" = "anyMissing(x); rowAnyMissings(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does TRUE exists?", "Functions" = "any, colAnys, rowAnys", "Example" = "any(x); rowAnys(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Are all values TRUE?", "Functions" = "all, colAlls, rowAlls", "Example" = "all(x); rowAlls(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does value exists?", "Functions" = "anyValue, colAnys, rowAnys", "Example" = "anyValue(x, value); rowAnys(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Do all elements have a given value?", "Functions" = "allValue, colAlls, rowAlls", "Example" = "allValue(x, value); rowAlls(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Number of occurrences of a value?", "Functions" = "count, colCounts, rowCounts", "Example" = "count(x, value); rowCounts(x, value)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Cumulative functions <% tbl <- NULL row <- data.frame( "Operator" = "Cumulative sum", "Functions" = "cumsum, colCumsums, rowCumsums", "Example" = "cumsum(x); rowCumsums(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative product", "Functions" = "cumprod, colCumprods, rowCumprods", "Example" = "cumprod(x); rowCumprods(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative minimum", "Functions" = "cummin, colCummins, rowCummins", "Example" = "cummin(x); rowCummins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative maximum", "Functions" = "cummax, colCummaxs, rowCummaxs", "Example" = "cummax(x); rowCummaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Binning <% tbl <- NULL row <- data.frame( "Estimator" = "Counts in disjoint bins", "Functions" = "binCounts", "Example" = "binCounts(x, bx)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample means (and counts) in disjoint bins", "Functions" = "binMeans", "Example" = "binMeans(y, x, bx)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Miscellaneous <% tbl <- NULL row <- data.frame( "Operation" = "Sum", "Functions" = "sum2, colSums2, rowSums2", "Example" = "sum2(x); rowSums2(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operation" = "Lagged differences", "Functions" = c("diff2, colDiffs, rowDiffs"), "Example" = "diff2(x), rowDiffs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ------------------------------------------------------------- <%=pkgName%> v<%=getVersion(pkg)%>. Release: [CRAN](https://cran.r-project.org/package=<%=pkgName%>), Development: [GitHub](<%=getUrl(pkg)%>). matrixStats/R/0000755000176200001440000000000014535663647013021 5ustar liggesusersmatrixStats/R/rowCounts.R0000644000176200001440000001433014436403546015136 0ustar liggesusers#' Counts the number of occurrences of a specific value #' #' The row- and column-wise functions take either a matrix or a vector as #' input. If a vector, then argument \code{dim.} must be specified and fulfill #' \code{prod(dim.) == length(x)}. The result will be identical to the results #' obtained when passing \code{matrix(x, nrow = dim.[1L], ncol = dim.[2L])}, #' but avoids having to temporarily create/allocate a matrix, if only such is #' needed only for these calculations. #' #' @inheritParams rowAlls #' #' @param value A value to search for. #' #' @return \code{rowCounts()} (\code{colCounts()}) returns an #' \code{\link[base]{integer}} \code{\link[base]{vector}} of length N (K). #' \code{count()} returns a scalar of type \code{\link[base]{integer}} if #' the count is less than 2^31-1 (= \code{.Machine$integer.max}) otherwise #' a scalar of type \code{\link[base]{double}}. #' #' @example incl/rowCounts.R #' #' @author Henrik Bengtsson #' @seealso rowAlls #' @keywords array logic iteration univar #' @export rowCounts <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { # Argument 'x': if (is.matrix(x)) { } else if (is.vector(x)) { } else { stop(sprintf("Argument '%s' is not a matrix or a vector: %s", "x", mode(x)[1L])) } # Argument 'value': if (length(value) != 1L) { stop(sprintf("Argument '%s' is not a scalar: %.0f", "value", length(value))) } # Coerce 'value' to matrix storage.mode(value) <- storage.mode(x) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.numeric(x) || is.logical(x)) { if (is.na(useNames)) deprecatedUseNamesNA() # Preserve rownames names <- rownames(x) has_nas <- TRUE counts <- .Call(C_rowCounts, x, dim., rows, cols, value, 2L, na.rm, has_nas, useNames) # Preserve names attribute names <- names(counts) counts <- as.integer(counts) names(counts) <- names } else { # Preserve rownames names <- rownames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { counts <- apply(x, MARGIN = 1L, FUN = function(x) { sum(is.na(x)) }) } else { counts <- apply(x, MARGIN = 1L, FUN = function(x) { sum(x == value, na.rm = na.rm) }) } counts <- as.integer(counts) # Update names attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(names)) { if (!is.null(rows)) { names <- names[rows] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(counts) <- names } } else { names(counts) <- NULL } } else { deprecatedUseNamesNA() } } counts } #' @rdname rowCounts #' @export colCounts <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { # Argument 'x': if (is.matrix(x)) { } else if (is.vector(x)) { } else { stop(sprintf("Argument '%s' is not a matrix or a vector: %s", "x", mode(x)[1L])) } # Argument 'value': if (length(value) != 1L) { stop(sprintf("Argument '%s' is not a scalar: %.0f", "value", length(value))) } # Coerce 'value' to matrix storage.mode(value) <- storage.mode(x) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.numeric(x) || is.logical(x)) { if (is.na(useNames)) deprecatedUseNamesNA() # Preserve colnames names <- colnames(x) has_nas <- TRUE counts <- .Call(C_colCounts, x, dim., rows, cols, value, 2L, na.rm, has_nas, useNames) # Preserve names attribute names <- names(counts) counts <- as.integer(counts) names(counts) <- names } else { # Preserve colnames names <- colnames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { counts <- apply(x, MARGIN = 2L, FUN = function(x) sum(is.na(x)) ) } else { counts <- apply(x, MARGIN = 2L, FUN = function(x) { sum(x == value, na.rm = na.rm) }) } counts <- as.integer(counts) # Update names attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(names)) { if (!is.null(cols)) { names <- names[cols] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(counts) <- names } } else { names(counts) <- NULL } } else { deprecatedUseNamesNA() } } counts } #' @rdname rowCounts #' @export count <- function(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) { # Argument 'x': if (!is.vector(x)) { stop(sprintf("Argument '%s' is not a vector: %s", "x", mode(x)[1L])) } # Argument 'value': if (length(value) != 1L) { stop(sprintf("Argument '%s' is not a scalar: %.0f", "value", length(value))) } # Coerce 'value' to matrix storage.mode(value) <- storage.mode(x) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_count, x, idxs, value, 2L, na.rm, has_nas) } else { # Apply subset if (!is.null(idxs)) x <- x[idxs] if (is.na(value)) { counts <- sum2(is.na(x)) } else { counts <- sum2(x == value, na.rm = na.rm) } } counts } matrixStats/R/rowCumsums.R0000644000176200001440000000475114436403546015325 0ustar liggesusers#' Cumulative sums, products, minima and maxima for each row (column) in a #' matrix #' #' Cumulative sums, products, minima and maxima for each row (column) in a #' matrix. #' #' @inheritParams rowAlls #' #' @return Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}} #' of the same mode as \code{x}, except when \code{x} is of mode #' \code{\link[base]{logical}}, then the return type is #' \code{\link[base]{integer}}. #' #' @example incl/rowCumsums.R #' #' @author Henrik Bengtsson #' #' @seealso See \code{\link[base]{cumsum}}(), \code{\link[base]{cumprod}}(), #' \code{\link[base]{cummin}}(), and \code{\link[base]{cummax}}(). #' #' @keywords array iteration univar #' @export rowCumsums <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowCumsums, x, dim., rows, cols, TRUE, useNames) } #' @rdname rowCumsums #' @export colCumsums <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowCumsums, x, dim., rows, cols, FALSE, useNames) } #' @rdname rowCumsums #' @export rowCumprods <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowCumprods, x, dim., rows, cols, TRUE, useNames) } #' @rdname rowCumsums #' @export colCumprods <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowCumprods, x, dim., rows, cols, FALSE, useNames) } #' @rdname rowCumsums #' @export rowCummins <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowCummins, x, dim., rows, cols, TRUE, useNames) } #' @rdname rowCumsums #' @export colCummins <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowCummins, x, dim., rows, cols, FALSE, useNames) } #' @rdname rowCumsums #' @export rowCummaxs <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowCummaxs, x, dim., rows, cols, TRUE, useNames) } #' @rdname rowCumsums #' @export colCummaxs <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowCummaxs, x, dim., rows, cols, FALSE, useNames) } matrixStats/R/rowLogSumExps.R0000644000176200001440000000352314436403546015733 0ustar liggesusers#' Accurately computes the logarithm of the sum of exponentials across rows or #' columns #' #' Accurately computes the logarithm of the sum of exponentials across rows or #' columns. #' #' @inheritParams rowAlls #' @inheritParams logSumExp #' #' @param lx A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}. #' Typically \code{lx} are \eqn{log(x)} values. #' #' @param rows,cols A \code{\link[base]{vector}} indicating subset of rows #' (and/or columns) to operate over. If \code{\link[base]{NULL}}, no subsetting #' is done. #' #' @param na.rm If \code{\link[base:logical]{TRUE}}, any missing values are #' ignored, otherwise not. #' #' @param dim. An \code{\link[base]{integer}} \code{\link[base]{vector}} of #' length two specifying the dimension of \code{x}, also when not a #' \code{\link[base]{matrix}}. #' #' @param ... Not used. #' #' @return A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N #' (K). #' #' @section Benchmarking: #' These methods are implemented in native code and have been optimized for #' speed and memory. #' #' @author Native implementation by Henrik Bengtsson. Original R code by #' Nakayama ??? (Japan). #' #' @seealso To calculate the same on vectors, \code{\link{logSumExp}}(). #' #' @keywords array #' @export rowLogSumExps <- function(lx, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(lx), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() has_na <- TRUE .Call(C_rowLogSumExps, lx, dim., rows, cols, na.rm, has_na, TRUE, useNames) } #' @rdname rowLogSumExps #' @export colLogSumExps <- function(lx, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(lx), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() has_na <- TRUE .Call(C_rowLogSumExps, lx, dim., rows, cols, na.rm, has_na, FALSE, useNames) } matrixStats/R/x_OP_y.R0000644000176200001440000000526614522461565014341 0ustar liggesusers#' Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)' #' #' Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)', where OP can be #' +, -, *, and /. For + and *, na.rm = TRUE will drop missing values first. #' #' @inheritParams rowAlls #' #' @param x A \code{\link[base]{numeric}} or \code{\link[base]{logical}} #' NxK \code{\link[base]{matrix}}. #' #' @param y A \code{\link[base]{numeric}} or \code{\link[base]{logical}} #' \code{\link[base]{vector}} of length L. #' #' @param OP A \code{\link[base]{character}} specifying which operator to use. #' #' @param xrows,xcols A \code{\link[base]{vector}} indicating subset of rows #' (and/or columns) to operate over 'x'. If \code{\link[base]{NULL}}, no #' subsetting is done. #' #' @param yidxs A \code{\link[base]{vector}} indicating subset of elements to #' operate over 'y'. If \code{\link[base]{NULL}}, no subsetting is done. #' #' @param commute If \code{\link[base:logical]{TRUE}}, 'y OP x' ('t(y OP #' t(x))') is calculated, otherwise 'x OP y' ('t(t(x) OP y)'). #' #' @return Returns a \code{\link[base]{numeric}} NxK #' \code{\link[base]{matrix}}. #' #' @example incl/x_OP_y.R #' #' @section Missing values: If \code{na.rm = TRUE}, then missing values are #' "dropped" before applying the operator to each pair of values. For #' instance, if \code{x[1, 1]} is a missing value, then the result of #' \code{x[1, 1] + y[1]} equals \code{y[1]}. If also \code{y[1]} is a missing #' value, then the result is a missing value. This only applies to additions #' and multiplications. For subtractions and divisions, argument \code{na.rm} #' is ignored. #' #' @author Henrik Bengtsson #' #' @keywords internal #' @export x_OP_y <- function(x, y, OP, xrows = NULL, xcols = NULL, yidxs = NULL, commute = FALSE, na.rm = FALSE) { commute <- as.logical(commute) na.rm <- as.logical(na.rm) if (is.character(OP)) { op <- charmatch(OP, c("+", "-", "*", "/"), nomatch = 0L) if (op == 0L) { stop(sprintf("Unknown value of argument '%s': %s", "OP", sQuote(OP))) } } else { op <- as.integer(OP) } .Call(C_x_OP_y, x, y, dim(x), op, xrows, xcols, yidxs, commute, na.rm, TRUE, FALSE) } #' @rdname x_OP_y #' @export t_tx_OP_y <- function(x, y, OP, xrows = NULL, xcols = NULL, yidxs = NULL, commute = FALSE, na.rm = FALSE) { commute <- as.logical(commute) na.rm <- as.logical(na.rm) if (is.character(OP)) { op <- charmatch(OP, c("+", "-", "*", "/"), nomatch = 0L) if (op == 0L) { stop(sprintf("Unknown value of argument '%s': %s", "OP", sQuote(OP))) } } else { op <- as.integer(OP) } .Call(C_x_OP_y, x, y, dim(x), op, xrows, xcols, yidxs, commute, na.rm, TRUE, TRUE) } matrixStats/R/binMeans.R0000644000176200001440000001102214372747711014666 0ustar liggesusers#' Fast mean calculations in non-overlapping bins #' #' Computes the sample means in non-overlapping bins #' #' \code{binMeans(x, bx, right = TRUE)} gives equivalent results as #' \code{rev(binMeans(-x, bx = sort(-bx), right = FALSE))}, but is faster. #' #' @inheritParams rowAlls #' #' @param y A \code{\link[base]{numeric}} or \code{\link[base]{logical}} #' \code{\link[base]{vector}} of K values to calculate means on. #' #' @param x A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K #' positions for to be binned. #' #' @param bx A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B + 1 #' ordered positions specifying the B > 0 bins \code{[bx[1], bx[2])}, #' \code{[bx[2], bx[3])}, ..., \code{[bx[B], bx[B + 1])}. #' #' @param na.rm If \code{\link[base:logical]{TRUE}}, missing values in \code{y} #' are dropped before calculating the mean, otherwise not. #' #' @param count If \code{\link[base:logical]{TRUE}}, the number of data points #' in each bins is returned as attribute \code{count}, which is an #' \code{\link[base]{integer}} \code{\link[base]{vector}} of length B. #' #' @param right If \code{\link[base:logical]{TRUE}}, the bins are right-closed #' (left open), otherwise left-closed (right open). #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length B. #' #' @section Missing and non-finite values: #' Data points where either of \code{y} and \code{x} is missing are dropped #' (and therefore are also not counted). Non-finite values in \code{y} are #' not allowed and gives an error. Missing values in \code{bx} are not allowed #' and gives an error. #' #' @example incl/binMeans.R #' #' @author Henrik Bengtsson with initial code contributions by #' Martin Morgan [1]. #' #' @seealso \code{\link{binCounts}}(). \code{\link[stats]{aggregate}} and #' \code{\link[base]{mean}}(). #' #' @references [1] R-devel thread \emph{Fastest non-overlapping binning mean #' function out there?} on Oct 3, 2012\cr #' #' @keywords univar #' @export binMeans <- function(y, x, idxs = NULL, bx, na.rm = TRUE, count = TRUE, right = FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'y': if (!is.numeric(y) && !is.logical(y)) { stop(sprintf("Argument '%s' is not numeric or logical: %s", "y", mode(y))) } if (is.numeric(y) && !is.integer(y) && any(is.infinite(y))) { stop(sprintf("Argument '%s' must not contain infinite values", "y")) } n <- length(y) # Argument 'x': if (!is.numeric(x)) { stop(sprintf("Argument '%s' is not numeric: %s", "x", mode(x))) } if (length(x) != n) { stop(sprintf("Argument '%s' and '%s' have different lengths: %.0f != %.0f", "y", "x", length(y), length(x))) } # Argument 'bx': if (!is.numeric(bx)) { stop(sprintf("Argument '%s' is not numeric: %s", "bx", mode(bx))) } if (any(is.infinite(bx))) { stop(sprintf("Argument '%s' must not contain infinite values", "bx")) } if (is.unsorted(bx)) { stop(sprintf("Argument '%s' is not ordered", "bx")) } # Argument 'na.rm': if (!is.logical(na.rm)) { stop(sprintf("Argument '%s' is not logical: %s", "na.rm", mode(na.rm))) } # Argument 'count': if (!is.logical(count)) { stop(sprintf("Argument '%s' is not logical: %s", "count", mode(count))) } # Apply subset if (!is.null(idxs)) { x <- x[idxs] y <- y[idxs] } # Argument 'right': right <- as.logical(right) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Preprocessing of (x, y) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Drop missing values in 'x' keep <- which(!is.na(x)) if (length(keep) < n) { x <- x[keep] y <- y[keep] n <- length(y) } keep <- NULL # Not needed anymore # Drop missing values in 'y'? if (na.rm) { keep <- which(!is.na(y)) if (length(keep) < n) { x <- x[keep] y <- y[keep] } keep <- NULL # Not needed anymore } # Order (x, y) by increasing x. # If 'x' is already sorted, the overhead of (re)sorting is # relatively small. x <- sort.int(x, method = "quick", index.return = TRUE) y <- y[x$ix] x <- x$x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Bin # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - y <- as.numeric(y) x <- as.numeric(x) bx <- as.numeric(bx) count <- as.logical(count) .Call(C_binMeans, y, x, bx, count, right) } matrixStats/R/999.package.R0000644000176200001440000000154614372747711015070 0ustar liggesusers#' Package matrixStats #' #' High-performing functions operating on rows and columns of matrices, e.g. #' col / rowMedians(), col / rowRanks(), and col / rowSds(). Functions #' optimized per data type and for subsetted calculations such that both memory #' usage and processing time is minimized. There are also optimized #' vector-based methods, e.g. binMeans(), madDiff() and weightedMedian(). #' #' @section How to cite this package: #' Henrik Bengtsson (2017). matrixStats: Functions that Apply to Rows and #' Columns of Matrices (and to Vectors). R package version 0.52.2. #' https://github.com/HenrikBengtsson/matrixStats #' #' @author Henrik Bengtsson, Hector Corrada Bravo, Robert Gentleman, Ola #' Hossjer, Harris Jaffee, Dongcan Jiang, Peter Langfelder #' #' @keywords package #' #' @name matrixStats-package #' @aliases matrixStats #' @docType package NULL matrixStats/R/rowProds.R0000644000176200001440000000700414436403546014752 0ustar liggesusers#' Calculates the product for each row (column) in a matrix #' #' Calculates the product for each row (column) in a matrix. #' #' If \code{method = "expSumLog"}, then then \code{\link{product}}() function is #' used, which calculates the product via the logarithmic transform (treating #' negative values specially). This improves the precision and lowers the risk #' for numeric overflow. If \code{method = "direct"}, the direct product is #' calculated via the \code{\link[base]{prod}}() function. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param method A \code{\link[base]{character}} string specifying how each #' product is calculated. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @section Missing values: #' Note, if \code{method = "expSumLog"}, \code{na.rm = FALSE}, and \code{x} #' contains missing values (\code{\link[base]{NA}} or #' \code{\link[base:is.finite]{NaN}}), then the calculated value is also #' missing value. Note that it depends on platform whether #' \code{\link[base:is.finite]{NaN}} or \code{\link[base]{NA}} is returned #' when an \code{\link[base:is.finite]{NaN}} exists, cf. #' \code{\link[base]{is.nan}}(). #' #' @author Henrik Bengtsson #' #' @keywords array iteration robust univar #' @export rowProds <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, method = c("direct", "expSumLog"), ..., useNames = TRUE) { # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preallocate result (zero:ed by default) n <- nrow(x) y <- double(length = n) # Nothing todo? if (n == 0L) return(y) # Argument 'method': method <- method[1L] # How to calculate product? if (method == "expSumLog") { prod <- product } else if (method == "direct") { } else { stop(sprintf("Unknown value of argument '%s': %s", "method", method)) } for (ii in seq_len(n)) { y[ii] <- prod(x[ii, , drop = TRUE], na.rm = na.rm) } # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- rownames(x) if (!is.null(names)) names(y) <- names } else { names(y) <- NULL } } else { deprecatedUseNamesNA() } y } #' @rdname rowProds #' @export colProds <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, method = c("direct", "expSumLog"), ..., useNames = TRUE) { # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preallocate result (zero:ed by default) n <- ncol(x) y <- double(length = n) # Nothing todo? if (n == 0L) return(y) # Argument 'method': method <- method[1L] # How to calculate product? if (method == "expSumLog") { prod <- product } else if (method == "direct") { } else { stop(sprintf("Unknown value of argument '%s': %s", "method", method)) } for (ii in seq_len(n)) { y[ii] <- prod(x[, ii, drop = TRUE], na.rm = na.rm) } # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- colnames(x) if (!is.null(names)) names(y) <- names } else { names(y) <- NULL } } else { deprecatedUseNamesNA() } y } matrixStats/R/sum2.R0000644000176200001440000000611714406445217014023 0ustar liggesusers#' Fast sum over subset of vector elements #' #' Computes the sum of all or a subset of values. #' #' \code{sum2(x, idxs)} gives equivalent results as \code{sum(x[idxs])}, but #' is faster and more memory efficient since it avoids the actual subsetting #' which requires copying of elements and garbage collection thereof. #' #' Furthermore, \code{sum2(x, mode = "double")} is equivalent to #' \code{sum(as.numeric(x))} and may therefore be used to avoid integer #' overflow(*), but at the same time is much more memory efficient that #' the regular \code{sum()} function when \code{x} is an #' \code{\link[base]{integer}} vector. #' #' (*) \emph{In R (>= 3.5.0), \code{sum(x)} will no longer integer overflow #' and return \code{NA_integer_}. Instead it will return the correct sum in #' form of a double value.} #' #' @inheritParams rowAlls #' @inheritParams weightedMad #' #' @param mode A \code{\link[base]{character}} string specifying the data type #' of the return value. Default is to use the same mode as argument \code{x}, #' unless it is logical when it defaults to \code{"integer"}. #' #' @return Returns a scalar of the data type specified by argument \code{mode}. #' If \code{mode = "integer"}, then integer overflow occurs if the \emph{sum} #' is outside the range of defined integer values. #' Note that the intermediate sum (\code{sum(x[1:n])}) is internally #' represented as a floating point value and will therefore never be outside of #' the range. #' If \code{mode = "integer"} and \code{typeof(x) == "double"}, then a warning #' is generated. #' #' @example incl/sum2.R #' #' @author Henrik Bengtsson #' #' @seealso \code{\link[base]{sum}}(). #' To efficiently average over a subset, see \code{\link{mean2}}(). #' #' @keywords univar internal #' @export sum2 <- function(x, idxs = NULL, na.rm = FALSE, mode = typeof(x), ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': x_mode <- typeof(x) x_logical <- (x_mode == "logical") if (!is.numeric(x) && !x_logical) { stop(sprintf("Argument '%s' is not numeric or logical: %s", "x", x_mode)) } # Argument 'na.rm': if (!is.logical(na.rm)) { stop(sprintf("Argument '%s' is not logical: %s", "na.rm", mode(na.rm))) } # Argument 'mode': mode <- mode[1L] if (x_logical && mode == "logical") { ## SPECIAL CASE: If `x` is logical, default mode should be `integer` mode_idx <- 1L } else if (mode == "integer") { mode_idx <- 1L ## Coercing results from double to integer is likely a mistake if (x_mode == "double") { warning('sum2(x, mode = "integer") called with typeof(x) == "double"; did you mean to use as.integer(sum2(x))?') } } else if (mode == "double") { mode_idx <- 2L } else { stop(sprintf("Unknown value of argument '%s': %s", "mode", mode)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summing # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - .Call(C_sum2, x, idxs, na.rm, mode_idx) } matrixStats/R/rowQuantiles.R0000644000176200001440000002267514436403546015643 0ustar liggesusers#' Estimates quantiles for each row (column) in a matrix #' #' Estimates quantiles for each row (column) in a matrix. #' #' @inheritParams rowAlls #' #' @param x An \code{\link[base]{integer}}, \code{\link[base]{numeric}} or #' \code{\link[base]{logical}} NxK \code{\link[base]{matrix}} with N >= 0. #' #' @param probs A \code{\link[base]{numeric}} \code{\link[base]{vector}} of J #' probabilities in [0, 1]. #' #' @param type An \code{\link[base]{integer}} specifying the type of estimator. #' See \code{\link[stats]{quantile}} for more details. #' #' @param digits An \code{\link[base]{integer}} specifying the precision of #' the formatted percentages. Not used when `useNames = FALSE`. #' In **matrixStats** (< 0.63.0), the default used to be #' `max(2L, getOption("digits"))` inline with R (< 4.1.0). #' #' @param ... Additional arguments passed to \code{\link[stats]{quantile}}. #' #' @param drop If TRUE, singleton dimensions in the result are dropped, #' otherwise not. #' #' @return Returns a NxJ (KxJ) \code{\link[base]{matrix}}, where N (K) is the #' number of rows (columns) for which the J quantiles are calculated. #' The return type is either integer or numeric depending on \code{type}. #' #' @example incl/rowQuantiles.R #' #' @author Henrik Bengtsson #' @seealso \code{\link[stats]{quantile}}. #' @keywords array iteration robust univar #' #' @importFrom stats quantile #' @export rowQuantiles <- function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, digits = 7L, ..., useNames = TRUE, drop = TRUE) { # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) if (!is.numeric(x) && !is.integer(x) && !is.logical(x)) { .Defunct(msg = sprintf("Argument 'x' is of type %s. Only 'integer', 'numeric', and 'logical' is supported.", sQuote(storage.mode(x)))) #nolint } # Argument 'probs': if (anyMissing(probs)) { stop(sprintf("Argument '%s' must not contain missing values", "probs")) } eps <- 100 * .Machine$double.eps if (any((probs < -eps | probs > 1 + eps))) { stop(sprintf("Argument '%s' is out of range [0-eps, 1+eps]: %g", "probs", probs)) } # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Coerce? # if (is.logical(x)) { # storage.mode(x) <- "integer" # } # Argument 'x': nrow <- nrow(x) ncol <- ncol(x) # Allocate result na_value <- NA_real_ if (type != 7L) storage.mode(na_value) <- storage.mode(x) q <- matrix(na_value, nrow = nrow, ncol = length(probs)) if (nrow > 0L && ncol > 0L) { na_rows <- rowAnyMissings(x) has_na <- any(na_rows) if (!has_na) na.rm <- FALSE if (!has_na && type == 7L) { n <- ncol idxs <- 1 + (n - 1) * probs idxs_lo <- floor(idxs) idxs_hi <- ceiling(idxs) partial <- sort.int(unique(c(idxs_lo, idxs_hi))) # Adjust? idxs_adj <- which(idxs > idxs_lo) adj <- (length(idxs_adj) > 0L) # Adjust if (adj) { idxs_hi <- idxs_hi[idxs_adj] w <- (idxs - idxs_lo)[idxs_adj] q_lo <- matrix(na_value, nrow = length(idxs_adj), ncol = nrow) q_hi <- matrix(na_value, nrow = length(idxs_adj), ncol = nrow) cols <- seq_len(ncol) for (rr in seq_len(nrow)) { x_rr <- .subset(x, rr, cols, drop = TRUE) xp_rr <- sort.int(x_rr, partial = partial) q_rr <- .subset(xp_rr, idxs_lo) q[rr,] <- q_rr q_hi[,rr] <- .subset(xp_rr, idxs_hi) q_lo[,rr] <- .subset(q_rr, idxs_adj) # Not needed anymore x_rr <- xp_rr <- NULL } q_adj <- (1 - w) * q_lo + w * q_hi for (cc in seq_along(idxs_adj)) { q[, idxs_adj[cc]] <- q_adj[cc, , drop = TRUE] } # Not needed anymore q_adj <- q_lo <- q_hi <- NULL } else { cols <- seq_len(ncol) for (rr in seq_len(nrow)) { x_rr <- .subset(x, rr, cols, drop = TRUE) xp_rr <- sort.int(x_rr, partial = partial) q[rr,] <- .subset(xp_rr, idxs_lo) # Not needed anymore x_rr <- xp_rr <- NULL } } storage.mode(q) <- "numeric" } else { # For each row... rows <- seq_len(nrow) # Rows with NAs should return all NAs (so skip those) if (has_na && !na.rm) rows <- rows[!na_rows] for (kk in rows) { xkk <- x[kk, ] if (na.rm) xkk <- xkk[!is.na(xkk)] q[kk, ] <- quantile(xkk, probs = probs, na.rm = FALSE, type = type, names = FALSE, ...) } } # if (type ...) } # Preserve names attribute? if (is.na(useNames)) { deprecatedUseNamesNA() rownames(q) <- rownames(x) # Add percentage names if (length(probs) > 0) { colnames(q) <- quantile_probs_names(probs, digits = digits) } } else if (useNames) { rownames(q) <- rownames(x) # Add percentage names if (length(probs) > 0) { colnames(q) <- quantile_probs_names(probs, digits = digits) } } else { rownames(q) <- NULL } # Drop singleton dimensions? if (drop) { q <- drop(q) } q } #' @importFrom stats quantile #' @rdname rowQuantiles #' @export colQuantiles <- function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, digits = 7L, ..., useNames = TRUE, drop = TRUE) { # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) if (!is.numeric(x) && !is.integer(x) && !is.logical(x)) { .Defunct(msg = sprintf("Argument 'x' is of type %s. Only 'integer', 'numeric', and 'logical' is supported.", sQuote(storage.mode(x)))) #nolint } # Argument 'probs': if (anyMissing(probs)) { stop(sprintf("Argument '%s' must not contain missing values", "probs")) } eps <- 100 * .Machine$double.eps if (any((probs < -eps | probs > 1 + eps))) { stop(sprintf("Argument '%s' is out of range [0-eps, 1+eps]: %g", "probs", probs)) } # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Coerce? # if (is.logical(x)) { # storage.mode(x) <- "integer" # } # Argument 'x': nrow <- nrow(x) ncol <- ncol(x) # Allocate result na_value <- NA_real_ if (type != 7L) storage.mode(na_value) <- storage.mode(x) q <- matrix(na_value, nrow = ncol, ncol = length(probs)) if (nrow > 0L && ncol > 0L) { na_cols <- colAnyMissings(x) has_na <- any(na_cols) if (!has_na) na.rm <- FALSE if (!has_na && type == 7L) { n <- nrow idxs <- 1 + (n - 1) * probs idxs_lo <- floor(idxs) idxs_hi <- ceiling(idxs) partial <- sort.int(unique(c(idxs_lo, idxs_hi))) # Adjust? idxs_adj <- which(idxs > idxs_lo) adj <- (length(idxs_adj) > 0L) if (adj) { idxs_hi <- idxs_hi[idxs_adj] w <- (idxs - idxs_lo)[idxs_adj] q_lo <- matrix(na_value, nrow = length(idxs_adj), ncol = ncol) q_hi <- matrix(na_value, nrow = length(idxs_adj), ncol = ncol) rows <- seq_len(nrow) for (cc in seq_len(ncol)) { x_cc <- .subset(x, rows, cc, drop = TRUE) xp_cc <- sort.int(x_cc, partial = partial) q_cc <- .subset(xp_cc, idxs_lo) q[cc,] <- q_cc q_hi[,cc] <- .subset(xp_cc, idxs_hi) q_lo[,cc] <- .subset(q_cc, idxs_adj) # Not needed anymore x_cc <- xp_cc <- NULL } q_adj <- (1 - w) * q_lo + w * q_hi for (cc in seq_along(idxs_adj)) { q[, idxs_adj[cc]] <- q_adj[cc, , drop = TRUE] } # Not needed anymore q_adj <- q_lo <- q_hi <- NULL } else { rows <- seq_len(nrow) for (cc in seq_len(ncol)) { x_cc <- .subset(x, rows, cc, drop = TRUE) xp_cc <- sort.int(x_cc, partial = partial) q[cc,] <- .subset(xp_cc, idxs_lo) # Not needed anymore x_cc <- xp_cc <- NULL } } } else { # For each column... cols <- seq_len(ncol) # Columns with NAs should return all NAs (so skip those) if (has_na && !na.rm) cols <- cols[!na_cols] for (kk in cols) { xkk <- x[, kk] if (na.rm) xkk <- xkk[!is.na(xkk)] q[kk, ] <- quantile(xkk, probs = probs, na.rm = FALSE, type = type, names = FALSE, ...) } } # if (type ...) } # Preserve names attribute? if (is.na(useNames)) { deprecatedUseNamesNA() rownames(q) <- colnames(x) # Add percentage names if (length(probs) > 0) { colnames(q) <- quantile_probs_names(probs, digits = digits) } } else if (useNames) { rownames(q) <- colnames(x) # Add percentage names if (length(probs) > 0) { colnames(q) <- quantile_probs_names(probs, digits = digits) } } else { rownames(q) <- NULL } # Drop singleton dimensions? if (drop) { q <- drop(q) } q } quantile_probs_names <- function(probs, digits = 7L) { if (!is.numeric(digits) || is.na(digits) || digits < 1L) { stop("Argument 'digits' is not a single positive numeric") } ## Adopted from stats:::format_perc() probs <- 100 * probs if (length(probs) < 100) { names <- formatC(probs, format = "fg", width = 1L, digits = digits) } else { names <- format(probs, trim = TRUE, digits = digits) } names <- paste(names, "%", sep = "") names[is.na(probs)] <- "" names } matrixStats/R/zzz.R0000644000176200001440000000200214522461565013762 0ustar liggesusers.onLoad <- function(libname, pkgname) { ## https://github.com/HenrikBengtsson/matrixStats/issues/183 onLoadSetCenterOnScalar() ## https://github.com/HenrikBengtsson/matrixStats/issues/187 onLoadSetCenterOnUse() ## https://github.com/HenrikBengtsson/matrixStats/issues/16 onLoadSetVarsFormulaFreq() onLoadSetVarsFormulaOnMistake() ## Deprecate validateIndices() /HB 2021-08-26 action <- Sys.getenv("R_MATRIXSTATS_VALIDATEINDICES", NA_character_) if (!is.na(action)) { action <- match.arg(action, choices = c("deprecated", "defunct")) options(matrixStats.validateIndices = action) } ## Deprecate/defunct useNames = NA /HB 2023-10-31 action <- Sys.getenv("R_MATRIXSTATS_USENAMES_NA", NA_character_) if (!is.na(action)) { action <- match.arg(action, choices = c("deprecated", "defunct")) options(matrixStats.useNames.NA = action) } } #' @useDynLib "matrixStats", .registration = TRUE, .fixes = "C_" .onUnload <- function(libpath) { library.dynam.unload("matrixStats", libpath) } matrixStats/R/rowVars.R0000644000176200001440000002632214436403546014602 0ustar liggesusers#' Variance estimates for each row (column) in a matrix #' #' Variance estimates for each row (column) in a matrix. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param refine If \code{\link[base:logical]{TRUE}}, `center` is NULL, and #' \code{x} is \code{\link[base]{numeric}}, then extra effort is used to #' calculate the average with greater numerical precision, otherwise not. #' #' @param center (optional; a vector or length N (K)) If the row (column) #' means are already estimated, they can be pre-specified using this argument. #' This avoid re-estimating them again. #' _Warning: It is important that a non-biased sample mean estimate is passed. #' If not, then the variance estimate of the spread will also be biased._ #' If NULL (default), the row/column means are estimated internally. #' #' @param ... Additional arguments passed to \code{rowMeans()} and #' \code{rowSums()}. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @section Providing center estimates: #' The sample variance is estimated as #' #' \eqn{n/(n-1) * mean((x - center)^2)}, #' #' where \eqn{center} is estimated as the sample mean, by default. #' In matrixStats (< 0.58.0), #' #' \eqn{n/(n-1) * (mean(x^2) - center^2)} #' #' was used. Both formulas give the same result _when_ `center` is the #' sample mean estimate. #' #' Argument `center` can be used to provide an already existing estimate. #' It is important that the sample mean estimate is passed. #' If not, then the variance estimate of the spread will be biased. #' #' For the time being, in order to lower the risk for such mistakes, #' argument `center` is occasionally validated against the sample-mean #' estimate. If a discrepancy is detected, an informative error is #' provided to prevent incorrect variance estimates from being used. #' For performance reasons, this check is only performed once every 50 times. #' The frequency can be controlled by R option `matrixStats.vars.formula.freq`, #' whose default can be set by environment variable #' `R_MATRIXSTATS_VARS_FORMULA_FREQ`. #' #' @example incl/rowMethods.R #' #' @author Henrik Bengtsson #' #' @seealso See \code{rowMeans()} and \code{rowSums()} in #' \code{\link[base]{colSums}}(). #' @keywords array iteration robust univar #' @export rowVars <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE, center = NULL, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() if (is.null(center)) { has_nas <- TRUE sigma2 <- .Call(C_rowVars, x, dim., rows, cols, na.rm, refine, has_nas, TRUE, useNames) return(sigma2) } ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("rowVars") ## BACKWARD COMPATIBILITY: matrixStats (<= 0.57.0) returns names ## when !is.null(center), which is tested by DelayedMatrixStats ## and sparseMatrixStats names <- rownames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset on 'center' if (length(center) != nrow(x)) { ## Scalar 'center'? if (length(center) == 1L && is.null(rows)) { validateScalarCenter(center, nrow(x), "rows") center <- rep(center, times = nrow(x)) } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "rows", "x", length(center), nrow(x))) } } if (!is.null(rows)) center <- center[rows] # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) ncol <- ncol(x) # Nothing to do? if (ncol <= 1L) { x <- rep(NA_real_, times = nrow(x)) # Update names attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(names)) { if (!is.null(rows)) { names <- names[rows] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(x) <- names } } else { names(x) <- NULL } } return(x) } if (na.rm) { # Count number of missing values in each row na_counts <- rowCounts(x, value = NA_real_, na.rm = FALSE) # Number of non-missing values n <- ncol - na_counts has_na <- any(na_counts > 0L) if (has_na) { # Set NA estimates for rows with less than two observations n[n <= 1L] <- NA_integer_ } else { # No need to check for missing values below na.rm <- FALSE } } else { # Assuming no missing values n <- ncol } validate <- validateVarsCenterFormula() if (!validate) { ## The primary formula for estimating the sample variance x <- (x - center)^2 ## SPECIAL: The variance estimate when the mean estimate is infinite should be NaN ## just like for stats::var() - not Inf, e.g. var(c(0,Inf)) == NaN x[is.infinite(center)] <- NaN x <- rowMeans(x, na.rm = na.rm) } else { ## The alternative formula for estimating the sample variance x2 <- x * x x2 <- rowMeans(x2, na.rm = na.rm) x2 <- (x2 - center^2) ## The primary formula for estimating the sample variance x <- (x - center)^2 x <- rowMeans(x, na.rm = na.rm) ## SPECIAL: The variance estimate when the mean estimate is infinite should be NaN ## just like for stats::var() - not Inf, e.g. var(c(0,Inf)) == NaN x[is.infinite(center)] <- NaN equal <- all.equal(x, x2, check.attribute = FALSE) x2 <- NULL if (!isTRUE(equal)) { fcn <- getOption("matrixStats.vars.formula.onMistake", "deprecated") fcn <- switch(fcn, deprecated = .Deprecated, .Defunct) when <- attr(validate, "when", exact = TRUE) fcn(msg = sprintf("Detected incorrect use of argument 'center' for rowVars() or rowSds(). The value of 'center' does not meet the assumption that estimating the variance using the 'primary' or the 'alternative' formula does not matter as they should give the same results, which suggests a misunderstanding on what argument 'center' should be. Please see help(\"rowVars\", package = \"%s\"). The reason was: %s (this validation is performed %s per R option 'matrixStats.vars.formula.freq')", .packageName, equal, when)) } } x <- x * (n / (n - 1)) # Preserve names attribute? if (is.na(useNames) || useNames) { if (!is.null(names)) { if (!is.null(rows)) { names <- names[rows] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(x) <- names } } else { names(x) <- NULL } x } #' @rdname rowVars #' @export colVars <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE, center = NULL, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() if (is.null(center)) { has_nas <- TRUE sigma2 <- .Call(C_rowVars, x, dim., rows, cols, na.rm, refine, has_nas, FALSE, useNames) return(sigma2) } ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("colVars") ## BACKWARD COMPATIBILITY: matrixStats (<= 0.57.0) returns names ## when !is.null(center), which is tested by DelayedMatrixStats ## and sparseMatrixStats names <- colnames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset on 'center' if (length(center) != ncol(x)) { ## Scalar 'center'? if (length(center) == 1L && is.null(cols)) { validateScalarCenter(center, ncol(x), "columns") center <- rep(center, times = ncol(x)) } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "columns", "x", length(center), ncol(x))) } } if (!is.null(cols)) center <- center[cols] # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) nrow <- nrow(x) # Nothing to do? if (nrow <= 1L) { x <- rep(NA_real_, times = ncol(x)) # Update names attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(names)) { if (!is.null(cols)) { names <- names[cols] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(x) <- names } } else { names(x) <- NULL } } else { deprecatedUseNamesNA() } return(x) } if (na.rm) { # Count number of missing values in each column na_counts <- colCounts(x, value = NA_real_, na.rm = FALSE) # Number of non-missing values n <- nrow - na_counts has_na <- any(na_counts > 0L) if (has_na) { # Set NA estimates for rows with less than two observations n[n <= 1L] <- NA_integer_ } else { # No need to check for missing values below na.rm <- FALSE } } else { # Assuming no missing values n <- nrow } validate <- validateVarsCenterFormula() if (!validate) { ## The primary formula for estimating the sample variance for (cc in seq_len(ncol(x))) { x[, cc] <- (x[, cc] - center[cc])^2 } x <- colMeans(x, na.rm = na.rm) ## SPECIAL: The variance estimate when the mean estimate is infinite should be NaN ## just like for stats::var() - not Inf, e.g. var(c(0,Inf)) == NaN x[is.infinite(center)] <- NaN } else { ## The alternative formula for estimating the sample variance x2 <- x * x x2 <- colMeans(x2, na.rm = na.rm) x2 <- (x2 - center^2) ## The primary formula for estimating the sample variance for (cc in seq_len(ncol(x))) { x[, cc] <- (x[, cc] - center[cc])^2 } x <- colMeans(x, na.rm = na.rm) ## SPECIAL: The variance estimate when the mean estimate is infinite should be NaN ## just like for stats::var() - not Inf, e.g. var(c(0,Inf)) == NaN x[is.infinite(center)] <- NaN equal <- all.equal(x, x2) x2 <- NULL if (!isTRUE(equal)) { fcn <- getOption("matrixStats.vars.formula.onMistake", "deprecated") fcn <- switch(fcn, deprecated = .Deprecated, .Defunct) when <- attr(validate, "when", exact = TRUE) fcn(msg = sprintf("Detected incorrect use of argument 'center' for colVars() or colSds(). The value of 'center' does not meet the assumption that estimating the variance using the 'primary' or the 'alternative' formula does not matter as they should give the same results, which suggests a misunderstanding on what argument 'center' should be. Please see help(\"rowVars\", package = \"%s\"). The reason was: %s (this validation is performed %s per R option 'matrixStats.vars.formula.freq')", .packageName, equal, when)) } } x <- x * (n / (n - 1)) # Preserve names attribute? if (is.na(useNames) || useNames) { if (!is.null(names)) { if (!is.null(cols)) { names <- names[cols] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(x) <- names } } else { names(x) <- NULL } x } matrixStats/R/rowSums2.R0000644000176200001440000000176014436403546014677 0ustar liggesusers#' Calculates the sum for each row (column) in a matrix #' #' Calculates the sum for each row (column) in a matrix. #' #' The implementation of \code{rowSums2()} and \code{colSums2()} is #' optimized for both speed and memory. #' #' @inheritParams rowAlls #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @author Henrik Bengtsson #' #' @keywords array iteration robust univar #' @export rowSums2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() has_nas <- TRUE .Call(C_rowSums2, x, dim., rows, cols, na.rm, has_nas, TRUE, useNames) } #' @rdname rowSums2 #' @export colSums2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() has_nas <- TRUE .Call(C_rowSums2, x, dim., rows, cols, na.rm, has_nas, FALSE, useNames) } matrixStats/R/logSumExp.R0000644000176200001440000000470014372747711015062 0ustar liggesusers#' Accurately computes the logarithm of the sum of exponentials #' #' Accurately computes the logarithm of the sum of exponentials, that is, #' \eqn{log(sum(exp(lx)))}. If \eqn{lx = log(x)}, then this is equivalently to #' calculating \eqn{log(sum(x))}. #' #' This function, which avoid numerical underflow, is often used when computing #' the logarithm of the sum of small numbers (\eqn{|x| << 1}) such as #' probabilities. #' #' This is function is more accurate than \code{log(sum(exp(lx)))} when the #' values of \eqn{x = exp(lx)} are \eqn{|x| << 1}. The implementation of this #' function is based on the observation that \deqn{ log(a + b) = [ la = log(a), #' lb = log(b) ] = log( exp(la) + exp(lb) ) = la + log ( 1 + exp(lb - la) ) } #' Assuming \eqn{la > lb}, then \eqn{|lb - la| < |lb|}, and it is less likely #' that the computation of \eqn{1 + exp(lb - la)} will not underflow/overflow #' numerically. Because of this, the overall result from this function should #' be more accurate. Analogously to this, the implementation of this function #' finds the maximum value of \code{lx} and subtracts it from the remaining #' values in \code{lx}. #' #' @inheritParams rowAlls #' #' @param lx A \code{\link[base]{numeric}} \code{\link[base]{vector}}. #' Typically \code{lx} are \eqn{log(x)} values. #' #' @return Returns a \code{\link[base]{numeric}} scalar. #' #' @section Benchmarking: This method is optimized for correctness, that #' avoiding underflowing. It is implemented in native code that is optimized #' for speed and memory. #' #' @example incl/logSumExp.R #' #' @author Henrik Bengtsson #' #' @seealso To compute this function on rows or columns of a matrix, see #' \code{\link{rowLogSumExps}}(). #' #' For adding \emph{two} double values in native code, R provides the C #' function \code{logspace_add()} [1]. For properties of the #' log-sum-exponential function, see [2]. #' #' @references #' [1] R Core Team, \emph{Writing R Extensions}, v3.0.0, April 2013. \cr #' [2] Laurent El Ghaoui, \emph{Hyper-Textbook: Optimization Models #' and Applications}, University of California at Berkeley, August 2012. #' (Chapter 'Log-Sum-Exp (LSE) Function and Properties') \cr #' [3] R-help thread \emph{logsumexp function in R}, 2011-02-17. #' \url{https://stat.ethz.ch/pipermail/r-help/2011-February/269205.html}\cr #' #' @export logSumExp <- function(lx, idxs = NULL, na.rm = FALSE, ...) { has_na <- TRUE .Call(C_logSumExp, as.numeric(lx), idxs, as.logical(na.rm), has_na) } matrixStats/R/indexByRow.R0000644000176200001440000000221014372747711015223 0ustar liggesusers#' Translates matrix indices by rows into indices by columns #' #' Translates matrix indices by rows into indices by columns. #' #' @inheritParams rowAlls #' #' @param dim A \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length two specifying the length of the "template" matrix. #' #' @return Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of #' indices. #' #' @section Known limitations: #' The current implementation does not support long-vector indices, #' because both input and output indices are of type integers. #' This means that the indices in argument \code{idxs} can only be in #' range [1,2^31-1]. Using a greater value will be coerced to #' \code{NA_integer_}. Moreover, returned indices can only be in the #' same range [1,2^31-1]. #' #' @example incl/indexByRow.R #' #' @author Henrik Bengtsson #' @keywords iteration logic #' @export indexByRow <- function(dim, idxs = NULL, ...) { if (is.matrix(dim)) { .Defunct(msg = "indexByRow(x) where 'x' is a matrix is defunct. Use indexByRow(dim(x)) instead.") } if (!is.null(idxs)) idxs <- as.integer(idxs) .Call(C_indexByRow, as.integer(dim), idxs) } matrixStats/R/rowDiffs.R0000644000176200001440000000174314436403546014722 0ustar liggesusers#' Calculates difference for each row (column) in a matrix #' #' Calculates difference for each row (column) in a matrix. #' #' @inheritParams rowAlls #' @inheritParams diff2 #' #' @return Returns a \code{\link[base]{numeric}} Nx(K-1) or (N-1)xK #' \code{\link[base]{matrix}}. #' #' @example incl/rowDiffs.R #' #' @author Henrik Bengtsson #' #' @seealso See also \code{\link{diff2}}(). #' @keywords array iteration robust univar #' @export rowDiffs <- function(x, rows = NULL, cols = NULL, lag = 1L, differences = 1L, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowDiffs, x, dim., rows, cols, lag, differences, TRUE, useNames) } #' @rdname rowDiffs #' @export colDiffs <- function(x, rows = NULL, cols = NULL, lag = 1L, differences = 1L, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowDiffs, x, dim., rows, cols, lag, differences, FALSE, useNames) } matrixStats/R/rowMeans2.R0000644000176200001440000000236314436403546015013 0ustar liggesusers#' Calculates the mean for each row (column) in a matrix #' #' Calculates the mean for each row (column) in a matrix. #' #' The implementation of \code{rowMeans2()} and \code{colMeans2()} is #' optimized for both speed and memory. #' #' @inheritParams rowAlls #' #' @param refine If \code{\link[base:logical]{TRUE}} and #' \code{x} is \code{\link[base]{numeric}}, then extra effort is used to #' calculate the average with greater numerical precision, otherwise not. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @author Henrik Bengtsson #' #' @keywords array iteration robust univar #' @export rowMeans2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() has_nas <- TRUE .Call(C_rowMeans2, x, dim., rows, cols, na.rm, refine, has_nas, TRUE, useNames) } #' @rdname rowMeans2 #' @export colMeans2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, refine= TRUE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() has_nas <- TRUE .Call(C_rowMeans2, x, dim., rows, cols, na.rm, refine, has_nas, FALSE, useNames) } matrixStats/R/psortKM.R0000644000176200001440000000020014372747711014525 0ustar liggesusers.psortKM <- function(x, k = length(x), m = 1L, ...) { .Call(C_psortKM, as.numeric(x), k = as.integer(k), m = as.integer(m)) } matrixStats/R/rowRanks.R0000644000176200001440000001332714436403546014746 0ustar liggesusers#' Gets the rank of the elements in each row (column) of a matrix #' #' Gets the rank of the elements in each row (column) of a matrix. #' #' @inheritParams rowAlls #' #' @param ties.method A \code{\link[base]{character}} string specifying how #' ties are treated. For details, see below. #' #' @param preserveShape A \code{\link[base]{logical}} specifying whether the #' \code{\link[base]{matrix}} returned should preserve the input shape of #' \code{x}, or not. #' #' @return A \code{\link[base]{matrix}} of type \code{\link[base]{integer}} is #' returned, unless \code{ties.method = "average"} when it is of type #' \code{\link[base]{numeric}}. #' #' The \code{rowRanks()} function always returns an NxK #' \code{\link[base]{matrix}}, where N (K) is the number of rows (columns) #' whose ranks are calculated. #' #' The \code{colRanks()} function returns an NxK \code{\link[base]{matrix}}, if #' \code{preserveShape = TRUE}, otherwise a KxN \code{\link[base]{matrix}}. #' #' Any \code{\link[base]{names}} of \code{x} are ignored and absent in the #' result. #' #' @details #' These functions rank values and treats missing values the same way as #' \code{\link[base]{rank}}(). #' For equal values ("ties"), argument \code{ties.method} determines how these #' are ranked among each other. More precisely, for the following values of #' \code{ties.method}, each index set of ties consists of: #' \itemize{ #' \item{\code{"first"} - increasing values that are all unique} #' \item{\code{"last"} - decreasing values that are all unique} #' \item{\code{"min"} - identical values equaling the minimum of #' their original ranks} #' \item{\code{"max"} - identical values equaling the maximum of #' their original ranks} #' \item{\code{"average"} - identical values that equal the sample mean of #' their original ranks. Because the average is calculated, the returned #' ranks may be non-integer values} #' \item{\code{"random"} - randomly shuffled values of their original ranks.} #' \item{\code{"dense"} - increasing values that are all unique and, #' contrary to \code{"first"}, never contain any gaps} #' } #' For more information on \code{ties.method = "dense"}, see \code{frank()} of #' the \pkg{data.table} package. #' For more information on the other alternatives, see \code{\link[base]{rank}}(). #' #' Note that, due to different randomization strategies, the shuffling order #' produced by these functions when using \code{ties.method = "random"} does #' not reproduce that of \code{\link[base]{rank}}(). #' #' \emph{WARNING: For backward-compatibility reasons, the default is #' \code{ties.method = "max"}, which differs from \code{\link[base]{rank}}() #' which uses \code{ties.method = "average"} by default. #' Since we plan to change the default behavior in a future version, we recommend #' to explicitly specify the intended value of argument \code{ties.method}.} #' #' @section Missing values: #' Missing values are ranked as \code{NA_integer_}, as with \code{na.last = "keep"} #' in the \code{\link[base]{rank}}() function. #' #' @section Performance: #' The implementation is optimized for both speed and memory. To avoid #' coercing to \code{\link[base]{double}}s (and hence memory allocation), #' there is a unique implementation for \code{\link[base]{integer}} matrices. #' Furthermore, it is more memory efficient to do #' \code{colRanks(x, preserveShape = TRUE)} than #' \code{t(colRanks(x, preserveShape = FALSE))}. #' #' @author Hector Corrada Bravo and Harris Jaffee. Peter Langfelder for adding #' 'ties.method' support. Brian Montgomery for adding more 'ties.method's. #' Henrik Bengtsson adapted the original native #' implementation of \code{rowRanks()} from Robert Gentleman's \code{rowQ()} in #' the \pkg{Biobase} package. #' #' @seealso #' For developers, see also Section Utility functions' in #' 'Writing R Extensions manual', particularly the #' native functions \code{R_qsort_I()} and \code{R_qsort_int_I()}. #' @keywords array iteration robust univar #' #' @export rowRanks <- function(x, rows = NULL, cols = NULL, # max is listed twice so that it remains the default for now ties.method = c("max", "average", "first", "last", "random", "max", "min", "dense"), dim. = dim(x), ..., useNames = TRUE) { # Argument 'ties.method': ties.method <- ties.method[1L] ties_method <- charmatch(ties.method, c("average", "first", "last", "random", "max", "min", "dense"), nomatch = 0L) if (ties_method == 0L) { stop(sprintf("Unknown value of argument '%s': %s", "ties.method", ties.method)) } if (is.na(useNames)) deprecatedUseNamesNA() # byrow = TRUE .Call(C_rowRanksWithTies, x, dim., rows, cols, ties_method, TRUE, useNames) } #' @rdname rowRanks #' @export colRanks <- function(x, rows = NULL, cols = NULL, # max is listed twice so that it remains the default for now ties.method = c("max", "average", "first", "last", "random", "max", "min", "dense"), dim. = dim(x), preserveShape = FALSE, ..., useNames = TRUE) { # Argument 'ties.method': ties.method <- ties.method[1L] # Argument 'preserveShape' preserveShape <- as.logical(preserveShape) ties_method <- charmatch(ties.method, c("average", "first", "last", "random", "max", "min", "dense"), nomatch = 0L) if (ties_method == 0L) { stop(sprintf("Unknown value of argument '%s': %s", "ties.method", ties.method)) } if (is.na(useNames)) deprecatedUseNamesNA() # byrow = FALSE y <- .Call(C_rowRanksWithTies, x, dim., rows, cols, ties_method, FALSE, useNames) if (!preserveShape) y <- t(y) y } matrixStats/R/weightedMedian.R0000644000176200001440000001026014372747711016053 0ustar liggesusers#' Weighted Median Value #' #' Computes a weighted median of a numeric vector. #' #' @inheritParams weightedMad #' #' @param na.rm a logical value indicating whether \code{\link[base]{NA}} #' values in \code{x} should be stripped before the computation proceeds, or #' not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s #' is done. #' #' @param interpolate If \code{\link[base:logical]{TRUE}}, linear interpolation #' is used to get a consistent estimate of the weighted median. #' #' @param ties If \code{interpolate == FALSE}, a character string specifying #' how to solve ties between two \code{x}'s that are satisfying the weighted #' median criteria. Note that at most two values can satisfy the criteria. #' When \code{ties} is \code{"min"} ("lower weighted median"), the smaller #' value of the two is returned and when it is \code{"max"} ("upper weighted #' median"), the larger value is returned. If \code{ties} #' is \code{"mean"}, the mean of the two values is returned. Finally, if #' \code{ties} is \code{"weighted"} (or \code{\link[base]{NULL}}) a weighted #' average of the two are returned, where the weights are weights of all values #' \code{x[i] <= x[k]} and \code{x[i] >= x[k]}, respectively. #' #' @return Returns a \code{\link[base]{numeric}} scalar. #' #' For the \code{n} elements \code{x = c(x[1], x[2], ..., x[n])} with positive #' weights \code{w = c(w[1], w[2], ..., w[n])} such that \code{sum(w) = S}, the #' \emph{weighted median} is defined as the element \code{x[k]} for which the #' total weight of all elements \code{x[i] < x[k]} is less or equal to #' \code{S/2} and for which the total weight of all elements \code{x[i] > x[k]} #' is less or equal to \code{S/2} (c.f. [1]). #' #' When using linear interpolation, the weighted mean of \code{x[k-1]} and #' \code{x[k]} with weights \code{S[k-1]} and \code{S[k]} corresponding to the #' cumulative weights of those two elements is used as an estimate. #' #' If \code{w} is missing then all elements of \code{x} are given the same #' positive weight. If all weights are zero, \code{\link[base:NA]{NA_real_}} is #' returned. #' #' If one or more weights are \code{Inf}, it is the same as these weights have #' the same weight and the others have zero. This makes things easier for cases #' where the weights are result of a division with zero. #' #' If there are missing values in \code{w} that are part of the calculation #' (after subsetting and dropping missing values in \code{x}), then the final #' result is always \code{NA} of the same type as \code{x}. #' #' The weighted median solves the following optimization problem: #' #' \deqn{\alpha^* = \arg_\alpha \min \sum_{i = 1}^{n} w_i |x_i-\alpha|} where #' \eqn{x = (x_1, x_2, \ldots, x_n)} are scalars and #' \eqn{w = (w_1, w_2, \ldots, w_n)} are the corresponding "weights" for each #' individual \eqn{x} value. #' #' @example incl/weightedMedian.R #' #' @author Henrik Bengtsson and Ola Hossjer, Centre for Mathematical Sciences, #' Lund University. Thanks to Roger Koenker, Econometrics, University of #' Illinois, for the initial ideas. #' #' @seealso \code{\link[stats]{median}}, \code{\link[base]{mean}}() and #' \code{\link{weightedMean}}(). #' #' @references [1] T.H. Cormen, C.E. Leiserson, R.L. Rivest, Introduction to #' Algorithms, The MIT Press, Massachusetts Institute of Technology, 1989. #' #' @keywords univar robust #' @export weightedMedian <- function(x, w = NULL, idxs = NULL, na.rm = FALSE, interpolate = is.null(ties), ties = NULL, ...) { # Argument 'w': if (is.null(w)) { w <- rep(1, times = length(x)) } else { w <- as.double(w) } # Argument 'na.rm': na.rm <- as.logical(na.rm) if (is.na(na.rm)) na.rm <- FALSE # Argument 'interpolate': interpolate <- as.logical(interpolate) # Argument 'ties': if (is.null(ties)) { ties_id <- 1L } else { if (ties == "weighted") { ties_id <- 1L } else if (ties == "min") { ties_id <- 2L } else if (ties == "max") { ties_id <- 4L } else if (ties == "mean") { ties_id <- 8L } else { stop(sprintf("Unknown value of argument '%s': %s", "ties", ties)) } } .Call(C_weightedMedian, x, w, idxs, na.rm, interpolate, ties_id) } matrixStats/R/000.DEPRECATION.R0000644000176200001440000001161014535662115015163 0ustar liggesusersisUseNamesNADefunct <- function() { action <- getOption("matrixStats.useNames.NA", "defunct") action <- match.arg(action, choices = c("deprecated", "defunct")) (action == "defunct") } deprecatedUseNamesNA <- function() { if (isUseNamesNADefunct()) { .Defunct(msg = "useNames = NA is defunct. Instead, specify either useNames = TRUE or useNames = FALSE.", package = .packageName) } else { .Deprecated(msg = "useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.", package = .packageName) } } defunctShouldBeMatrixOrDim <- function(x) { x_class <- sQuote(class(x)[1]) x_name <- sQuote(as.character(substitute(x))) .Defunct(msg = sprintf("Argument %s is of class %s, but should be a matrix or 'dim.' should specify one. The use of a %s is not supported, the correctness of the result is not guaranteed. Please update your code accordingly.", x_name, x_class, x_class)) #nolint } defunctShouldBeMatrix <- function(x) { x_class <- sQuote(class(x)[1]) x_name <- sQuote(as.character(substitute(x))) .Defunct(msg = sprintf("Argument %s is of class %s, but should be a matrix. The use of a %s is not supported, the correctness of the result is not guaranteed. Please update your code accordingly.", x_name, x_class, x_class)) #nolint } defunctShouldBeMatrixOrVector <- function(x) { x_class <- sQuote(class(x)[1]) x_name <- sQuote(as.character(substitute(x))) .Defunct(msg = sprintf("Argument %s is of class %s, but should be a matrix or a vector. The use of a %s is not supported, the correctness of the result is not guaranteed. Please update your code accordingly.", x_name, x_class, x_class)) #nolint } validateScalarCenter <- function(center, n, dimname) { onScalar <- getOption("matrixStats.center.onScalar", NULL) ## Nothing to do? if (is.null(onScalar)) return() fcn <- switch(onScalar, deprecated = .Deprecated, defunct = .Defunct, NULL) ## Nothing to do? if (is.null(fcn)) return() msg <- sprintf("Argument '%s' should be of the same length as number of %s of '%s'. Use of a scalar value is deprecated: %s != %s", "center", dimname, "x", length(center), n) fcn(msg = msg, package = .packageName) } validateVarsCenterFormula <- local({ .curr <- 1 .next <- 1 always <- structure(TRUE, when = "each time this function is called") function() { freq <- getOption("matrixStats.vars.formula.freq", NULL) ## Nothing to do? if (is.null(freq)) return(FALSE) ## never? if (freq <= 0) return(FALSE) ## always? if (is.infinite(freq)) return(always) ## each time? if (freq == 1) return(always) ## once in a while? .curr <<- .curr + 1 .next <<- freq ## update .next according to R option ## Skip or not? if (.curr <= .next) return(FALSE) .curr <<- 1 ## reset structure(TRUE, when = sprintf("every %g call to this function", freq)) } }) centerOnUse <- function(fcnname, calls = sys.calls(), msg = NULL) { value <- getOption("matrixStats.center.onUse") if (is.null(value) || identical(value, "ignore")) return() value <- match.arg(value, c("deprecated", "defunct")) fcn <- switch(value, deprecated = .Deprecated, defunct = .Defunct) if (is.null(msg)) { msg <- sprintf("Argument '%s' of %s::%s() is %s: %s", "center", .packageName, fcnname, value, deparse(calls[[1]])[1]) } fcn(msg = msg, package = .packageName) } onLoadSetVarsFormulaFreq <- function() { ## Option is already set? if (!is.null(getOption("matrixStats.vars.formula.freq", NULL))) return() ## Is environment variable set? value <- Sys.getenv("R_MATRIXSTATS_VARS_FORMULA_FREQ", "50") if (nzchar(value)) { value <- as.numeric(value) } else { value <- NULL } options(matrixStats.vars.formula.freq = value) } onLoadSetVarsFormulaOnMistake <- function() { ## Option is already set? if (!is.null(getOption("matrixStats.vars.formula.onMistake", NULL))) return() ## Is environment variable set? value <- Sys.getenv("R_MATRIXSTATS_VARS_FORMULA_MISTAKE", "defunct") if (is.na(value)) return() value <- match.arg(value, c("deprecated", "defunct")) options(matrixStats.vars.formula.onMistake = value) } onLoadSetCenterOnUse <- function() { ## Option is already set? if (!is.null(getOption("matrixStats.center.onUse", NULL))) return() ## Is environment variable set? value <- Sys.getenv("R_MATRIXSTATS_CENTER_ONUSE", NA_character_) if (is.na(value)) return() value <- match.arg(value, c("ignore", "deprecated", "defunct")) options(matrixStats.center.onUse = value) } onLoadSetCenterOnScalar <- function() { ## Option is already set? if (!is.null(getOption("matrixStats.center.onScalar", NULL))) return() ## Is environment variable set? value <- Sys.getenv("R_MATRIXSTATS_CENTER_ONSCALAR", "deprecated") if (is.na(value)) return() value <- match.arg(value, c("ignore", "deprecated", "defunct")) options(matrixStats.center.onScalar = value) } matrixStats/R/anyMissing.R0000644000176200001440000000422314535663647015266 0ustar liggesusers#' Checks if there are any missing values in an object or not #' #' Checks if there are any missing values in an object or not. #' \emph{Please use \code{base::anyNA()} instead of \code{anyMissing()}, #' \code{colAnyNAs()} instead of \code{colAnyMissings()}, and #' \code{rowAnyNAs()} instead of \code{rowAnyMissings()}.} #' #' The implementation of this method is optimized for both speed and memory. #' The method will return \code{\link[base:logical]{TRUE}} as soon as a missing #' value is detected. #' #' @inheritParams rowAlls #' #' @param x A \code{\link[base]{vector}}, a \code{\link[base]{list}}, a #' \code{\link[base]{matrix}}, a \code{\link[base]{data.frame}}, or #' \code{\link[base]{NULL}}. #' #' @return Returns \code{\link[base:logical]{TRUE}} if a missing value was #' detected, otherwise \code{\link[base:logical]{FALSE}}. #' #' @examples #' x <- rnorm(n = 1000) #' x[seq(300, length(x), by = 100)] <- NA #' stopifnot(anyMissing(x) == any(is.na(x))) #' #' @author Henrik Bengtsson #' #' @seealso Starting with R v3.1.0, there is \code{anyNA()} in the \pkg{base}, #' which provides the same functionality as \code{anyMissing()}. #' #' @keywords iteration logic #' @export anyMissing <- function(x, idxs = NULL, ...) { ## All list or a data.frame? if (is.list(x)) { for (kk in seq_along(x)) { if (.Call(C_anyMissing, x[[kk]], idxs)) return(TRUE) } return(FALSE) } else { ## All other data types .Call(C_anyMissing, x, idxs) } } #' @rdname anyMissing #' @export colAnyMissings <- function(x, rows = NULL, cols = NULL, ..., useNames = TRUE) { colAnys(x, rows, cols, value = NA, ..., useNames = useNames) } #' @rdname anyMissing #' @export rowAnyMissings <- function(x, rows = NULL, cols = NULL, ..., useNames = TRUE) { rowAnys(x, rows, cols, value = NA, ..., useNames = useNames) } #' @rdname anyMissing #' @export colAnyNAs <- function(x, rows = NULL, cols = NULL, ..., useNames = TRUE) { colAnys(x, rows, cols, value = NA, ..., useNames = useNames) } #' @rdname anyMissing #' @export rowAnyNAs <- function(x, rows = NULL, cols = NULL, ..., useNames = TRUE) { rowAnys(x, rows, cols, value = NA, ..., useNames = useNames) } matrixStats/R/validateIndices.R0000644000176200001440000000202414406445217016216 0ustar liggesusers#' Validate indices #' #' Computes validated positive indices from given indices. #' #' \emph{WARNING: This function is defunct and will be removed in a future #' version.} #' #' @inheritParams rowAlls #' #' @param maxIdx The possible max index. #' #' @param allowOutOfBound Allow positive out of bound to indicate #' \code{\link[base]{NA}}. #' #' @return Returns a validated integers list indicating the indices. #' If some of the indices cannot be represented as an integer, the #' indices are returned as doubles. #' #' @keywords internal #' @export validateIndices <- function(idxs = NULL, maxIdx, allowOutOfBound = TRUE) { action <- getOption("matrixStats.validateIndices", "defunct") if (!is.null(action)) { fcn <- switch(action, deprecated = .Deprecated, defunct = .Defunct, ignore = function(...) NULL) fcn(msg = sprintf("validateIndices() is %s and will eventually be removed from %s", action, .packageName)) } ans <- .Call(C_validate, idxs, maxIdx, allowOutOfBound) if (is.null(ans)) ans <- seq_len(maxIdx) ans } matrixStats/R/rowWeightedMedians.R0000644000176200001440000001016514436403546016726 0ustar liggesusers#' Calculates the weighted medians for each row (column) in a matrix #' #' Calculates the weighted medians for each row (column) in a matrix. #' #' The implementations of these methods are optimized for both speed and #' memory. If no weights are given, the corresponding #' \code{\link{rowMedians}}()/\code{colMedians()} is used. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' @inheritParams rowWeightedMeans #' #' @param ... Additional arguments passed to \code{\link{weightedMedian}}(). #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @example incl/rowWeightedMedians.R #' #' @author Henrik Bengtsson #' #' @seealso Internally, \code{\link{weightedMedian}}() is used. #' See \code{\link{rowMedians}}() and \code{colMedians()} for non-weighted #' medians. #' #' @keywords array iteration robust univar #' @export rowWeightedMedians <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Argument 'w': has_weights <- !is.null(w) if (has_weights) { n <- ncol(x) if (length(w) != n) { stop(sprintf("The length of argument '%s' does not match the number of %s in '%s': %d != %d", "w", "columns", "x", length(w), n)) #nolint } if (!is.numeric(w)) { stop(sprintf("Argument '%s' is not numeric: %s", "w", mode(w))) } if (any(!is.na(w) & w < 0)) { stop(sprintf("Argument '%s' must not contain negative values", "w")) } } if (is.na(useNames)) deprecatedUseNamesNA() # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(cols)) w <- w[cols] if (has_weights) { # Allocate results m <- nrow(x) if (m == 0L) return(double(0L)) res <- apply(x, MARGIN = 1L, FUN = function(x) { weightedMedian(x, w = w, na.rm = na.rm, ...) }) # Preserve names attribute? if (!(is.na(useNames) || useNames)) { names(res) <- NULL } w <- NULL # Not needed anymore } else { res <- rowMedians(x, na.rm = na.rm, useNames = useNames) } res } #' @rdname rowWeightedMedians #' @export colWeightedMedians <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Argument 'w': has_weights <- !is.null(w) if (has_weights) { n <- nrow(x) if (length(w) != n) { stop(sprintf("The length of argument '%s' does not match the number of %s in '%s': %d != %d", "w", "rows", "x", length(w), n)) #nolint } if (!is.numeric(w)) { stop(sprintf("Argument '%s' is not numeric: %s", "w", mode(w))) } if (any(!is.na(w) & w < 0)) { stop(sprintf("Argument '%s' must not contain negative values", "w")) } } if (is.na(useNames)) deprecatedUseNamesNA() # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(rows)) w <- w[rows] if (has_weights) { # Allocate results m <- ncol(x) if (m == 0L) return(double(0L)) res <- apply(x, MARGIN = 2L, FUN = function(x) { weightedMedian(x, w = w, na.rm = na.rm, ...) }) # Preserve names attribute? if (!(is.na(useNames) || useNames)) { names(res) <- NULL } w <- NULL # Not needed anymore } else { res <- colMedians(x, na.rm = na.rm, useNames = useNames) } res } matrixStats/R/rowAlls.R0000644000176200001440000002573314522461565014570 0ustar liggesusers#' Checks if a value exists / does not exist in each row (column) of a matrix #' #' Checks if a value exists / does not exist in each row (column) of a matrix. #' #' These functions takes either a matrix or a vector as input. If a vector, #' then argument \code{dim.} must be specified and fulfill \code{prod(dim.) == #' length(x)}. The result will be identical to the results obtained when #' passing \code{matrix(x, nrow = dim.[1L], ncol = dim.[2L])}, but avoids #' having to temporarily create/allocate a matrix, if only such is needed #' only for these calculations. #' #' @param x An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, #' an N * K \code{\link[base]{vector}}. #' #' @param idxs A \code{\link[base]{vector}} indicating subset of elements to #' operate over. If \code{\link[base]{NULL}}, no subsetting is done. #' #' @param rows A \code{\link[base]{vector}} indicating subset of rows to #' operate over. If \code{\link[base]{NULL}}, no subsetting is done. #' #' @param cols A \code{\link[base]{vector}} indicating subset of columns to #' operate over. If \code{\link[base]{NULL}}, no subsetting is done. #' #' @param value A value to search for. #' #' @param na.rm If \code{\link[base:logical]{TRUE}}, missing values are #' excluded. #' #' @param dim. An \code{\link[base]{integer}} \code{\link[base]{vector}} of #' length two specifying the dimension of \code{x}, also when not a #' \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument #' being named with a period at the end is purely technical (we get a run-time #' error if we try to name it \code{dim}). #' #' @param ... Not used. #' #' @param useNames If \code{\link[base:logical]{TRUE}} (default), names #' attributes of the result are set, otherwise not. #' #' @return \code{rowAlls()} (\code{colAlls()}) returns an #' \code{\link[base]{logical}} \code{\link[base]{vector}} of length N (K). #' Analogously for \code{rowAnys()} (\code{rowAlls()}). #' #' @section Logical \code{value}: #' When \code{value} is logical, the result is as if the function is applied #' on \code{as.logical(x)}. More specifically, if \code{x} is numeric, then #' all zeros are treated as \code{FALSE}, non-zero values as \code{TRUE}, #' and all missing values as \code{NA}. #' #' @example incl/rowAlls.R #' #' @author Henrik Bengtsson #' @seealso rowCounts #' @keywords array logic iteration univar #' @export rowAlls <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { if (is.na(useNames)) deprecatedUseNamesNA() has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_rowCounts, x, dim., rows, cols, FALSE, 1L, na.rm, has_nas, useNames) res <- (counts == 0L) } else { counts <- .Call(C_rowCounts, x, dim., rows, cols, FALSE, 0L, na.rm, has_nas, useNames) res <- (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_rowCounts, x, dim., rows, cols, value, 0L, na.rm, has_nas, FALSE) res <- as.logical(counts) # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- rownames(x) if (!is.null(names)) { if (!is.null(rows)) { names <- names[rows] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(res) <- names } } else { names(res) <- NULL } } else { deprecatedUseNamesNA() } } else { if (!identical(dim(x), dim.)) dim(x) <- dim. if (!is.matrix(x)) defunctShouldBeMatrixOrDim(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { return(rowAlls(is.na(x), na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } else { z <- (x == value) dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!identical(dim(z), as.integer(dim))) dim(z) <- dim if (isTRUE(useNames)) dimnames(z) <- dimnames(x) return(rowAlls(z, na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } } res } #' @rdname rowAlls #' @export colAlls <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { if (is.na(useNames)) deprecatedUseNamesNA() has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_colCounts, x, dim., rows, cols, FALSE, 1L, na.rm, has_nas, useNames) res <- (counts == 0L) } else { counts <- .Call(C_colCounts, x, dim., rows, cols, FALSE, 0L, na.rm, has_nas, useNames) res <- (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_colCounts, x, dim., rows, cols, value, 0L, na.rm, has_nas, FALSE) res <- as.logical(counts) # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- colnames(x) if (!is.null(names)) { if (!is.null(cols)) { names <- names[cols] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(res) <- names } } else { names(res) <- NULL } } } else { if (!identical(dim(x), dim.)) dim(x) <- dim. if (!is.matrix(x)) defunctShouldBeMatrixOrDim(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { return(colAlls(is.na(x), na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } else { z <- (x == value) dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!identical(dim(z), as.integer(dim))) dim(z) <- dim if (isTRUE(useNames)) dimnames(z) <- dimnames(x) return(colAlls(z, na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } } res } #' @rdname rowAlls #' @export allValue <- function(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_count, x, idxs, FALSE, 1L, na.rm, has_nas) (counts == 0L) } else { counts <- .Call(C_count, x, idxs, FALSE, 0L, na.rm, has_nas) (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_count, x, idxs, value, 0L, na.rm, has_nas) as.logical(counts) } else { # Apply subset if (!is.null(idxs)) x <- x[idxs] if (is.na(value)) { allValue(is.na(x), na.rm = na.rm, ...) } else { allValue(x == value, na.rm = na.rm, ...) } } } #' @rdname rowAlls #' @export rowAnys <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { if (is.na(useNames)) deprecatedUseNamesNA() has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_rowCounts, x, dim., rows, cols, FALSE, 0L, na.rm, has_nas, useNames) res <- (counts == 0L) } else { counts <- .Call(C_rowCounts, x, dim., rows, cols, FALSE, 1L, na.rm, has_nas, useNames) res <- (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_rowCounts, x, dim., rows, cols, value, 1L, na.rm, has_nas, useNames) res <- as.logical(counts) # Preserve names attribute names <- names(counts) res <- as.logical(counts) names(res) <- names } else { if (!identical(dim(x), dim.)) dim(x) <- dim. if (!is.matrix(x)) defunctShouldBeMatrixOrDim(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { return(rowAnys(is.na(x), na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } else { z <- (x == value) dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!identical(dim(z), as.integer(dim))) dim(z) <- dim if (isTRUE(useNames)) dimnames(z) <- dimnames(x) return(rowAnys(z, na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } } res } #' @rdname rowAlls #' @export colAnys <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { if (is.na(useNames)) deprecatedUseNamesNA() has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_colCounts, x, dim., rows, cols, FALSE, 0L, na.rm, has_nas, useNames) res <- (counts == 0L) } else { counts <- .Call(C_colCounts, x, dim., rows, cols, FALSE, 1L, na.rm, has_nas, useNames) res <- (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_colCounts, x, dim., rows, cols, value, 1L, na.rm, has_nas, useNames) # Preserve names attribute names <- names(counts) res <- as.logical(counts) names(res) <- names } else { if (!identical(dim(x), dim.)) dim(x) <- dim. if (!is.matrix(x)) defunctShouldBeMatrixOrDim(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { return(colAnys(is.na(x), na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } else { z <- (x == value) dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!identical(dim(z), as.integer(dim))) dim(z) <- dim if (isTRUE(useNames)) dimnames(z) <- dimnames(x) return(colAnys(z, na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } } res } #' @rdname rowAlls #' @export anyValue <- function(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_count, x, idxs, FALSE, 0L, na.rm, has_nas) (counts == 0L) } else { counts <- .Call(C_count, x, idxs, FALSE, 1L, na.rm, has_nas) (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_count, x, idxs, value, 1L, na.rm, has_nas) as.logical(counts) } else { # Apply subset if (!is.null(idxs)) x <- x[idxs] if (is.na(value)) { anyValue(is.na(x), na.rm = na.rm, ...) } else { anyValue(x == value, na.rm = na.rm, ...) } } } matrixStats/R/rowSds.R0000644000176200001440000000254514436403546014421 0ustar liggesusers#' Standard deviation estimates for each row (column) in a matrix #' #' Standard deviation estimates for each row (column) in a matrix. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' @inheritParams rowVars #' #' @param center (optional) The center, defaults to the row means for the #' SD estimators and row medians for the MAD estimators. #' #' @param ... Additional arguments passed to \code{rowMeans()} and #' \code{rowSums()}. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @author Henrik Bengtsson #' #' @seealso \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and #' \code{\link[stats:cor]{var}}. \code{\link{rowIQRs}}(). #' @keywords array iteration robust univar #' #' @export rowSds <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE, center = NULL, dim. = dim(x), ..., useNames = TRUE) { x <- rowVars(x, rows = rows, cols = cols, na.rm = na.rm, refine = refine, center = center, dim. = dim., useNames = useNames, ...) sqrt(x) } #' @rdname rowSds #' @export colSds <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, refine = TRUE, center = NULL, dim. = dim(x), ..., useNames = TRUE) { x <- colVars(x, rows = rows, cols = cols, na.rm = na.rm, refine = refine, center = center, dim. = dim., useNames = useNames, ...) sqrt(x) } matrixStats/R/mean2.R0000644000176200001440000000425514434247742014144 0ustar liggesusers#' Fast averaging over subset of vector elements #' #' Computes the sample mean of all or a subset of values. #' #' \code{mean2(x, idxs)} gives equivalent results as \code{mean(x[idxs])}, #' but is faster and more memory efficient since it avoids the actual #' subsetting which requires copying of elements and garbage collection #' thereof. #' #' If \code{x} is \code{\link[base]{numeric}} and \code{refine = TRUE}, then a #' two-pass scan is used to calculate the average. The first scan calculates #' the total sum and divides by the number of (non-missing) values. In the #' second scan, this average is refined by adding the residuals towards the #' first average. The \code{\link[base]{mean}}() uses this approach. #' \code{mean2(..., refine = FALSE)} is almost twice as fast as #' \code{mean2(..., refine = TRUE)}. #' #' @inheritParams rowAlls #' @inheritParams weightedMad #' #' @param refine If \code{\link[base:logical]{TRUE}} and \code{x} is #' \code{\link[base]{numeric}}, then extra effort is used to calculate the #' average with greater numerical precision, otherwise not. #' #' @return Returns a \code{\link[base]{numeric}} scalar. #' #' @example incl/mean2.R #' #' @author Henrik Bengtsson #' #' @seealso \code{\link[base]{mean}}(). #' To efficiently sum over a subset, see \code{\link{sum2}}(). #' @keywords univar internal #' @export mean2 <- function(x, idxs = NULL, na.rm = FALSE, refine = TRUE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.numeric(x) && !is.logical(x)) { stop(sprintf("Argument '%s' is not numeric or logical: %s", "x", mode(x))) } # Argument 'na.rm': if (!is.logical(na.rm)) { stop(sprintf("Argument '%s' is not logical: %s", "na.rm", mode(na.rm))) } # Argument 'refine': if (!is.logical(refine)) { stop(sprintf("Argument '%s' is not logical: %s", "refine", mode(refine))) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Averaging # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - .Call(C_mean2, x, idxs, na.rm, refine) } matrixStats/R/weightedMad.R0000644000176200001440000001710314436403546015356 0ustar liggesusers#' Weighted Median Absolute Deviation (MAD) #' #' Computes a weighted MAD of a numeric vector. #' #' @inheritParams rowAlls #' #' @param x \code{\link[base]{vector}} of type \code{\link[base]{integer}}, #' \code{\link[base]{numeric}}, or \code{\link[base]{logical}}. #' #' @param w a vector of weights the same length as \code{x} giving the weights #' to use for each element of \code{x}. Negative weights are treated as zero #' weights. Default value is equal weight to all values. #' #' @param constant A \code{\link[base]{numeric}} scale factor, cf. #' \code{\link[stats]{mad}}. #' #' @param center Optional \code{\link[base]{numeric}} scalar specifying the #' center location of the data. If \code{\link[base]{NULL}}, it is estimated #' from data. #' #' @return Returns a \code{\link[base]{numeric}} scalar. #' #' @example incl/weightedMad.R #' #' @section Missing values: Missing values are dropped at the very beginning, #' if argument \code{na.rm} is \code{\link[base:logical]{TRUE}}, otherwise not. #' #' @author Henrik Bengtsson #' #' @seealso For the non-weighted MAD, see \code{\link[stats]{mad}}. Internally #' \code{\link{weightedMedian}}() is used to calculate the weighted median. #' #' @importFrom stats mad median #' @keywords univar robust #' @export weightedMad <- function(x, w = NULL, idxs = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ...) { # No weights? Fall back to non-weighted method. if (is.null(w)) { if (is.null(center)) center <- median(x, na.rm = na.rm) return(mad(x, center = center, constant = constant, na.rm = na.rm, ...)) } # Argument 'x': n <- length(x) # Argument 'w': if (length(w) != n) { stop(sprintf("The number of elements in arguments '%s' and '%s' does not match: %.0f != %.0f", "w", "x", length(w), n)) } else if (!is.null(idxs)) { # Apply subset on w w <- w[idxs] } # Argument 'constant': if (length(constant) != 1L) stop(sprintf("Argument '%s' is not a scalar: %.0f", "constant", length(constant))) if (!is.numeric(constant)) stop(sprintf("Argument '%s' is not numeric: %s", "constant", mode(constant))) # Argument 'center': if (!is.null(center) && length(center) != 1L) stop(sprintf("Argument '%s' is not a scalar or NULL: %.0f", "center", length(center))) # Apply subset on x if (!is.null(idxs)) { x <- x[idxs] n <- length(x) } na_value <- NA storage.mode(na_value) <- storage.mode(x) # Remove values with zero (and negative) weight. This will: # 1) take care of the case when all weights are zero, # 2) it will most likely speed up the sorting. tmp <- (is.na(w) | w > 0) if (!all(tmp)) { x <- .subset(x, tmp) w <- .subset(w, tmp) n <- length(x) } tmp <- NULL # Not needed anymore # Drop missing values? if (na.rm) { keep <- which(!is.na(x)) x <- .subset(x, keep) w <- .subset(w, keep) n <- length(x) keep <- NULL # Not needed anymore } else if (anyMissing(x)) { return(na_value) } # Missing values in 'w'? if (anyMissing(w)) return(na_value) # Are any weights Inf? Then treat them with equal weight and all others # with weight zero. tmp <- is.infinite(w) if (any(tmp)) { keep <- tmp x <- .subset(x, keep) n <- length(x) w <- rep(1, times = n) keep <- NULL # Not needed anymore } tmp <- NULL # Not needed anymore # Are there any values left to calculate the weighted median of? # This is consistent with how stats::mad() works. if (n == 0L) { return(na_value) } else if (n == 1L) { zero_value <- 0 storage.mode(zero_value) <- storage.mode(x) return(zero_value) } # Estimate the mean? if (is.null(center)) { center <- weightedMedian(x, w = w, na.rm = NA) } else { ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("weightedMad") } # Estimate the standard deviation x <- abs(x - center) sigma <- weightedMedian(x, w = w, na.rm = NA) x <- w <- NULL # Not needed anymore # Rescale for normal distributions sigma <- constant * sigma sigma } #' @rdname weightedMad #' @export rowWeightedMads <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ..., useNames = TRUE) { # Argument 'constant': if (length(constant) != 1L) stop(sprintf("Argument '%s' is not a scalar: %.0f", "constant", length(constant))) if (!is.numeric(constant)) stop(sprintf("Argument '%s' is not numeric: %s", "constant", mode(constant))) # Apply subset on 'center'? if (!is.null(center)) { if (length(center) != nrow(x)) { ## Scalar 'center'? if (length(center) == 1L && is.null(rows)) { validateScalarCenter(center, nrow(x), "rows") center <- rep(center, times = nrow(x)) } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "rows", "x", length(center), nrow(x))) } } if (!is.null(rows)) center <- center[rows] } # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(cols)) w <- w[cols] y <- numeric(nrow(x)) for (kk in seq_along(y)) { y[kk] <- weightedMad(x[kk, , drop = TRUE], w = w, na.rm = na.rm, constant = constant, center = center[kk], ...) } ## BACKWARD COMPATIBILITY: matrixStats (<= 0.57.0) returns names ## when !is.null(center), which is tested by DelayedMatrixStats ## and sparseMatrixStats if (is.na(useNames)) { deprecatedUseNamesNA() names(y) <- rownames(x) } else if (useNames) { names(y) <- rownames(x) } else { names(y) <- NULL } y } #' @rdname weightedMad #' @export colWeightedMads <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ..., useNames = TRUE) { # Argument 'constant': if (length(constant) != 1L) stop(sprintf("Argument '%s' is not a scalar: %.0f", "constant", length(constant))) if (!is.numeric(constant)) stop(sprintf("Argument '%s' is not numeric: %s", "constant", mode(constant))) # Argument 'center': # Apply subset on 'center'? if (!is.null(center)) { if (length(center) != ncol(x)) { ## Scalar 'center'? if (length(center) == 1L && is.null(cols)) { validateScalarCenter(center, ncol(x), "cols") center <- rep(center, times = ncol(x)) } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "columns", "x", length(center), ncol(x))) } } if (!is.null(cols)) center <- center[cols] } # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(rows)) w <- w[rows] y <- numeric(ncol(x)) for (kk in seq_along(y)) { y[kk] <- weightedMad(x[, kk, drop = TRUE], w = w, na.rm = na.rm, constant = constant, center = center[kk], ...) } ## BACKWARD COMPATIBILITY: matrixStats (<= 0.57.0) returns names ## when !is.null(center), which is tested by DelayedMatrixStats ## and sparseMatrixStats if (is.na(useNames)) { deprecatedUseNamesNA() names(y) <- colnames(x) } else if (useNames) { names(y) <- colnames(x) } else { names(y) <- NULL } y } matrixStats/R/benchmark.R0000644000176200001440000000117614372747711015075 0ustar liggesusersbenchmark <- function(fcn, tags = NULL, path = NULL, workdir = "reports", envir = parent.frame(), ...) { requireNamespace("R.rsp") || stop(sprintf("Package %s is not installed", "R.rsp")) if (is.function(fcn)) { fcn <- deparse(substitute(fcn)) } if (is.null(path)) { path <- system.file("benchmarking", package = "matrixStats") } fullname <- paste(c(fcn, tags), collapse = ", ") filename <- sprintf("%s.md.rsp", fullname) pathname <- file.path(path, filename) oopts <- options("prompt" = "> ") on.exit(options(oopts)) R.rsp::rfile(pathname, workdir = workdir, envir = envir, ...) } matrixStats/R/weightedVar.R0000644000176200001440000001310514436403546015403 0ustar liggesusers#' Weighted variance and weighted standard deviation #' #' Computes a weighted variance / standard deviation of a numeric vector or #' across rows or columns of a matrix. #' #' @inheritParams weightedMad #' @inheritParams rowAlls #' #' @return Returns a \code{\link[base]{numeric}} scalar. #' #' @section Missing values: #' This function handles missing values consistently with #' \code{\link{weightedMean}}(). #' More precisely, if \code{na.rm = FALSE}, then any missing values in either #' \code{x} or \code{w} will give result \code{NA_real_}. #' If \code{na.rm = TRUE}, then all \code{(x, w)} data points for which #' \code{x} is missing are skipped. Note that if both \code{x} and \code{w} #' are missing for a data points, then it is also skipped (by the same rule). #' However, if only \code{w} is missing, then the final results will always #' be \code{NA_real_} regardless of \code{na.rm}. #' #' @author Henrik Bengtsson #' #' @details #' The estimator used here is the same as the one used by the "unbiased" #' estimator of the \bold{Hmisc} package. More specifically, #' \code{weightedVar(x, w = w) == Hmisc::wtd.var(x, weights = w)}, #' #' @seealso For the non-weighted variance, see \code{\link[stats]{var}}. #' #' @keywords univar robust #' @export weightedVar <- function(x, w = NULL, idxs = NULL, na.rm = FALSE, center = NULL, ...) { # Argument 'x': n <- length(x) # Argument 'w': if (is.null(w)) { w <- rep(1, times = n) } else if (length(w) != n) { stop(sprintf("The number of elements in arguments '%s' and '%s' does not match: %.0f != %.0f", "w", "x", length(w), n)) } else if (!is.null(idxs)) { # Apply subset on 'w' w <- w[idxs] } # Apply subset on 'x' if (!is.null(idxs)) { x <- x[idxs] n <- length(x) } na_value <- NA storage.mode(na_value) <- storage.mode(x) # Remove values with zero (and negative) weight. This will: # 1) take care of the case when all weights are zero, # 2) it will most likely speed up the sorting. tmp <- (is.na(w) | w > 0) if (!all(tmp)) { x <- .subset(x, tmp) w <- .subset(w, tmp) n <- length(x) } tmp <- NULL # Not needed anymore # Drop missing values? if (na.rm) { keep <- which(!is.na(x)) x <- .subset(x, keep) w <- .subset(w, keep) n <- length(x) keep <- NULL # Not needed anymore } else if (anyMissing(x)) { return(na_value) } # Missing values in 'w'? if (anyMissing(w)) return(na_value) # Are any weights Inf? Then treat them with equal weight and all others # with weight zero. tmp <- is.infinite(w) if (any(tmp)) { keep <- tmp x <- .subset(x, keep) n <- length(x) w <- rep(1, times = n) keep <- NULL # Not needed anymore } tmp <- NULL # Not needed anymore # Are there any values left to calculate the weighted variance of? # This is consistent with how stats::var() works. if (n <= 1L) return(na_value) # Standardize weights to sum to one wsum <- sum(w) # Estimate the mean? if (is.null(center)) { center <- sum(w * x) / wsum } else { ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("weightedVar") } # Estimate the variance x <- x - center # Residuals x <- x^2 # Squared residuals ## Correction factor lambda <- 1 / (wsum - 1) sigma2 <- lambda * sum(w * x) x <- w <- NULL # Not needed anymore ## Undefined estimate? (adopted from Hmisc::wtd.var()) if (wsum <= 1) { warning(sprintf("Produced invalid variance estimate, because the weights suggest at most one effective observation (sum(w) <= 1): %g (wsum = %g)", sigma2, wsum)) } sigma2 } #' @rdname weightedVar #' @export weightedSd <- function(...) { sqrt(weightedVar(...)) } #' @rdname weightedVar #' @export rowWeightedVars <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on 'w' if (!is.null(w) && !is.null(cols)) w <- w[cols] # Preserve names attributes? if (is.na(useNames)) { deprecatedUseNamesNA() } else if (!useNames) { rownames(x) <- NULL } apply(x, MARGIN = 1L, FUN = weightedVar, w = w, na.rm = na.rm, ...) } #' @rdname weightedVar #' @export colWeightedVars <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on 'w' if (!is.null(w) && !is.null(rows)) w <- w[rows] # Preserve names attributes? if (is.na(useNames)) { deprecatedUseNamesNA() } else if (!useNames) { colnames(x) <- NULL } apply(x, MARGIN = 2L, FUN = weightedVar, w = w, na.rm = na.rm, ...) } #' @rdname weightedVar #' @export rowWeightedSds <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { sqrt(rowWeightedVars(x = x, w = w, rows = rows, cols = cols, na.rm = na.rm, useNames = useNames, ...)) } #' @rdname weightedVar #' @export colWeightedSds <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { sqrt(colWeightedVars(x = x, w = w, rows = rows, cols = cols, na.rm = na.rm, useNames = useNames, ...)) } matrixStats/R/varDiff.R0000644000176200001440000002575714436403546014533 0ustar liggesusers#' Estimation of scale based on sequential-order differences #' #' Estimation of scale based on sequential-order differences, corresponding to #' the scale estimates provided by \code{\link[stats]{var}}, #' \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and #' \code{\link[stats]{IQR}}. #' #' Note that n-order difference MAD estimates, just like the ordinary MAD #' estimate by \code{\link[stats]{mad}}, apply a correction factor such that #' the estimates are consistent with the standard deviation under Gaussian #' distributions. #' #' The interquartile range (IQR) estimates does \emph{not} apply such a #' correction factor. If asymptotically normal consistency is wanted, the #' correction factor for IQR estimate is \code{1 / (2 * qnorm(3/4))}, which is #' half of that used for MAD estimates, which is \code{1 / qnorm(3/4)}. This #' correction factor needs to be applied manually, i.e. there is no #' \code{constant} argument for the IQR functions. #' #' @inheritParams rowAlls #' #' @param x A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length #' N or a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}. #' #' @param diff The positional distance of elements for which the difference #' should be calculated. #' #' @param trim A \code{\link[base]{double}} in [0,1/2] specifying the fraction #' of observations to be trimmed from each end of (sorted) \code{x} before #' estimation. #' #' @param constant A scale factor adjusting for asymptotically normal #' consistency. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length 1, length N, or length K. #' #' @author Henrik Bengtsson #' #' @seealso For the corresponding non-differentiated estimates, see #' \code{\link[stats]{var}}, \code{\link[stats]{sd}}, \code{\link[stats]{mad}} #' and \code{\link[stats]{IQR}}. Internally, \code{\link{diff2}}() is used #' which is a faster version of \code{\link[base]{diff}}(). #' #' @references [1] J. von Neumann et al., \emph{The mean square successive #' difference}. Annals of Mathematical Statistics, 1941, 12, 153-162.\cr #' #' @keywords iteration robust univar #' @export varDiff <- function(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) { if (length(diff) != 1L) stop(sprintf("Argument '%s' is not scalar: %d", "diff", length(diff))) if (diff < 0L) stop(sprintf("Argument '%s' must not be negative: %d", "diff", diff)) # Apply subset if (!is.null(idxs)) x <- x[idxs] if (na.rm) x <- x[!is.na(x)] # Nothing to do? n <- length(x) if (n <= 1L) return(NA_real_) # Calculate differences? if (diff > 0L) { x <- diff2(x, differences = diff) # Nothing to do? n <- length(x) if (n == 1L) return(NA_real_) } # Trim? if (trim > 0 && n > 0L) { if (anyMissing(x)) return(NA_real_) lo <- floor(n * trim) + 1 hi <- (n + 1) - lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial = partial) x <- x[lo:hi] } # Estimate var <- var(x, na.rm = FALSE) x <- NULL # Not needed anymore # Correction for the differentiation var / (2^diff) } #' @rdname varDiff #' @export sdDiff <- function(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) { if (length(diff) != 1L) stop(sprintf("Argument '%s' is not scalar: %d", "diff", length(diff))) if (diff < 0L) stop(sprintf("Argument '%s' must not be negative: %d", "diff", diff)) # Apply subset if (!is.null(idxs)) x <- x[idxs] if (na.rm) x <- x[!is.na(x)] # Nothing to do? n <- length(x) if (n <= 1L) return(NA_real_) # Calculate differences? if (diff > 0L) { x <- diff2(x, differences = diff) # Nothing to do? n <- length(x) if (n == 1L) return(NA_real_) } # Trim? if (trim > 0 && n > 0L) { if (anyMissing(x)) return(NA_real_) lo <- floor(n * trim) + 1 hi <- (n + 1) - lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial = partial) x <- x[lo:hi] } # Estimate sd <- sd(x, na.rm = FALSE) x <- NULL # Not needed anymore # Correction for the differentiation sd / (sqrt(2) ^ diff) } #' @importFrom stats mad #' @rdname varDiff #' @export madDiff <- function(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, constant = 1.4826, ...) { if (length(diff) != 1L) stop(sprintf("Argument '%s' is not scalar: %d", "diff", length(diff))) if (diff < 0L) stop(sprintf("Argument '%s' must not be negative: %d", "diff", diff)) # Apply subset if (!is.null(idxs)) x <- x[idxs] if (na.rm) x <- x[!is.na(x)] # Nothing to do? n <- length(x) if (n <= 0L) return(NA_real_) # Calculate differences? if (diff > 0L) { x <- diff2(x, differences = diff) # Nothing to do? n <- length(x) if (n == 1L) return(NA_real_) } # Trim? if (trim > 0 && n > 0L) { if (anyMissing(x)) return(NA_real_) lo <- floor(n * trim) + 1 hi <- (n + 1) - lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial = partial) x <- x[lo:hi] } # Estimate sd <- mad(x, na.rm = FALSE, constant = constant, ...) x <- NULL # Not needed anymore # Correction for the differentiation sd / (sqrt(2) ^ diff) } #' @importFrom stats quantile #' @rdname varDiff #' @export iqrDiff <- function(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) { if (length(diff) != 1L) stop(sprintf("Argument '%s' is not scalar: %d", "diff", length(diff))) if (diff < 0L) stop(sprintf("Argument '%s' must not be negative: %d", "diff", diff)) # Apply subset if (!is.null(idxs)) x <- x[idxs] if (na.rm) { x <- x[!is.na(x)] } else if (anyMissing(x)) { return(NA_real_) } # At this point, there should be no missing values # Nothing to do? n <- length(x) if (n == 0L) { return(NA_real_) } else if (n == 1L) { return(0) } # Calculate differences? if (diff > 0L) { x <- diff2(x, differences = diff) # Nothing to do? n <- length(x) if (n == 1L) return(0) } # Trim? if (trim > 0 && n > 0L) { lo <- floor(n * trim) + 1 hi <- (n + 1) - lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial = partial) x <- x[lo:hi] } # Estimate qs <- quantile(x, probs = c(0.25, 0.75), na.rm = FALSE, names = FALSE, ...) x <- NULL # Not needed anymore iqr <- (qs[2L] - qs[1L]) # Correction for the differentiation iqr / (sqrt(2) ^ diff) } #' @rdname varDiff #' @export rowVarDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (is.na(useNames)) { deprecatedUseNamesNA() } else if (!useNames) { rownames(x) <- NULL } apply(x, MARGIN = 1L, FUN = varDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export colVarDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (is.na(useNames)) { deprecatedUseNamesNA() } else if (!useNames) { colnames(x) <- NULL } apply(x, MARGIN = 2L, FUN = varDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export rowSdDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (is.na(useNames)) { deprecatedUseNamesNA() } else if (!useNames) { rownames(x) <- NULL } apply(x, MARGIN = 1L, FUN = sdDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export colSdDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (is.na(useNames)) { deprecatedUseNamesNA() } else if (!useNames) { colnames(x) <- NULL } apply(x, MARGIN = 2L, FUN = sdDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export rowMadDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (is.na(useNames)) { deprecatedUseNamesNA() } else if (!useNames) { rownames(x) <- NULL } apply(x, MARGIN = 1L, FUN = madDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export colMadDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (is.na(useNames)) { deprecatedUseNamesNA() } else if (!useNames) { colnames(x) <- NULL } apply(x, MARGIN = 2L, FUN = madDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export rowIQRDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (is.na(useNames)) { deprecatedUseNamesNA() } else if (!useNames) { rownames(x) <- NULL } apply(x, MARGIN = 1L, FUN = iqrDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export colIQRDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = TRUE) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (is.na(useNames)) { deprecatedUseNamesNA() } else if (!useNames) { colnames(x) <- NULL } apply(x, MARGIN = 2L, FUN = iqrDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } matrixStats/R/weightedMean.R0000644000176200001440000000355514372747711015547 0ustar liggesusers#' Weighted Arithmetic Mean #' #' Computes the weighted sample mean of a numeric vector. #' #' @inheritParams rowAlls #' @inheritParams weightedMad #' @inheritParams mean2 #' #' @param w a vector of weights the same length as \code{x} giving the weights #' to use for each element of \code{x}. Negative weights are treated as zero #' weights. Default value is equal weight to all values. #' If a missing-value weight exists, the result is always a missing value. #' #' @return Returns a \code{\link[base]{numeric}} scalar. If \code{x} is of #' zero length, then \code{NaN} is returned, which is consistent with #' \code{\link[base]{mean}}(). #' #' @example incl/weightedMean.R #' #' @section Missing values: #' This function handles missing values consistently with #' \code{\link[stats]{weighted.mean}}. More precisely, if \code{na.rm = FALSE}, #' then any missing values in either \code{x} or \code{w} will give result #' \code{NA_real_}. If \code{na.rm = TRUE}, then all \code{(x, w)} data points #' for which \code{x} is missing are skipped. Note that if both \code{x} and #' \code{w} are missing for a data points, then it is also skipped (by the same #' rule). However, if only \code{w} is missing, then the final results will #' always be \code{NA_real_} regardless of \code{na.rm}. #' #' @author Henrik Bengtsson #' #' @seealso \code{\link[base]{mean}}() and \code{\link[stats]{weighted.mean}}. #' @keywords univar robust #' @export weightedMean <- function(x, w = NULL, idxs = NULL, na.rm = FALSE, refine = FALSE, ...) { # Argument 'refine': refine <- as.logical(refine) # Argument 'w': if (is.null(w)) { ## We won't fall back to stats::mean(), because it's has some overhead ## and it doesn't support refine = FALSE. w <- rep(1, times = length(x)) } else { w <- as.numeric(w) } .Call(C_weightedMean, x, w, idxs, na.rm, refine) } matrixStats/R/rowMads.R0000644000176200001440000000741714436403546014557 0ustar liggesusers#' @param constant A scale factor. See \code{\link[stats]{mad}} for details. #' #' @rdname rowSds #' @export rowMads <- function(x, rows = NULL, cols = NULL, center = NULL, constant = 1.4826, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.null(center)) { if (is.na(useNames)) deprecatedUseNamesNA() constant <- as.numeric(constant) has_nas <- TRUE x <- .Call(C_rowMads, x, dim., rows, cols, constant, na.rm, has_nas, TRUE, useNames) } else { ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("rowMads") # Preserve names names <- rownames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset on 'center' if (length(center) != nrow(x)) { if (length(center) == 1L && is.null(rows)) { validateScalarCenter(center, nrow(x), "rows") } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "rows", "x", length(center), nrow(x))) } } if (!is.null(rows)) center <- center[rows] # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) x <- x - center if (is.null(dim(x))) { dim(x) <- dim. # prevent from dim dropping # Preserve names attribute? if (!is.na(useNames) && useNames) { if (!is.null(names)) { if (!is.null(rows)) { names <- names[rows] } rownames(x) <- names } } } x <- abs(x) x <- rowMedians(x, na.rm = na.rm, ..., useNames = useNames) x <- constant * x } x } #' @rdname rowSds #' @export colMads <- function(x, rows = NULL, cols = NULL, center = NULL, constant = 1.4826, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.null(center)) { if (is.na(useNames)) deprecatedUseNamesNA() constant <- as.numeric(constant) has_nas <- TRUE x <- .Call(C_rowMads, x, dim., rows, cols, constant, na.rm, has_nas, FALSE, useNames) } else { ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("colMads") # Preserve names names <- colnames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset on 'center' if (length(center) != ncol(x)) { if (length(center) == 1L && is.null(cols)) { validateScalarCenter(center, ncol(x), "columns") } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "columns", "x", length(center), ncol(x))) } } if (!is.null(cols)) center <- center[cols] # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) ## SLOW: # for (cc in seq_len(ncol(x))) { # x[, cc] <- x[, cc] - center[cc] # } ## FAST: x <- t_tx_OP_y(x, center, OP = "-", na.rm = FALSE) # Preserve names attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(names)) { if (!is.null(cols)) { names <- names[cols] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } colnames(x) <- names } } else { colnames(x) <- NULL } } x <- abs(x) x <- colMedians(x, na.rm = na.rm, ..., useNames = useNames) x <- constant * x } x } matrixStats/R/signTabulate.R0000644000176200001440000000142414372747711015561 0ustar liggesusers#' Calculates the number of negative, zero, positive and missing values #' #' Calculates the number of negative, zero, positive and missing values in a #' \code{\link[base]{numeric}} vector. For \code{\link[base]{double}} vectors, #' the number of negative and positive infinite values are also counted. #' #' @inheritParams rowAlls #' #' @param x a \code{\link[base]{numeric}} \code{\link[base]{vector}}. #' #' @return Returns a \code{\link[base]{name}}d \code{\link[base]{numeric}} #' \code{\link[base]{vector}}. #' #' @author Henrik Bengtsson #' @seealso \code{\link[base]{sign}}(). #' @keywords internal #' @export signTabulate <- function(x, idxs = NULL, ...) { res <- .Call(C_signTabulate, x, idxs) names(res) <- c("-1", "0", "+1", "NA", "-Inf", "+Inf")[1:length(res)] res } matrixStats/R/rowCollapse.R0000644000176200001440000000455014436403546015430 0ustar liggesusers#' Extracts one cell per row (column) from a matrix #' #' Extracts one cell per row (column) from a matrix. The implementation is #' optimized for memory and speed. #' #' @inheritParams rowAlls #' #' @param idxs An index \code{\link[base]{vector}} of (maximum) length N (K) #' specifying the columns (rows) to be extracted. #' #' @return Returns a \code{\link[base]{vector}} of length N (K). #' #' @example incl/rowCollapse.R #' #' @author Henrik Bengtsson #' #' @seealso \emph{Matrix indexing} to index elements in matrices and arrays, #' cf. \code{\link[base]{[}}(). #' @keywords utilities #' @export rowCollapse <- function(x, idxs, rows = NULL, dim. = dim(x), ..., useNames = TRUE) { # Argument 'x': if (!is.matrix(x) && !is.vector(x)) defunctShouldBeMatrixOrVector(x) # Argument 'idxs': idxs <- rep(idxs, length.out = dim.[1L]) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset if (!is.null(rows)) { x <- x[rows, , drop = FALSE] idxs <- idxs[rows] } dim. <- dim(x) # Columns of interest cols <- 0:(dim.[2L] - 1L) cols <- cols[idxs] # Calculate column-based indices idxs <- dim.[1L] * cols + seq_len(dim.[1L]) cols <- NULL # Not needed anymore # Update names attribute? res <- x[idxs] if (!is.na(useNames)) { if (useNames) { names <- rownames(x) if (!is.null(names)) { names(res) <- names } } else { names(res) <- NULL } } else { deprecatedUseNamesNA() } res } #' @rdname rowCollapse #' @export colCollapse <- function(x, idxs, cols = NULL, dim. = dim(x), ..., useNames = TRUE) { # Argument 'x': if (!is.matrix(x) && !is.vector(x)) defunctShouldBeMatrixOrVector(x) # Argument 'idxs': idxs <- rep(idxs, length.out = dim.[2L]) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset if (!is.null(cols)) { x <- x[, cols, drop = FALSE] idxs <- idxs[cols] } dim. <- dim(x) # Rows of interest rows <- seq_len(dim.[1L]) rows <- rows[idxs] # Calculate column-based indices idxs <- dim.[1L] * 0:(dim.[2L] - 1L) + rows rows <- NULL # Not needed anymore # Update names attribute? res <- x[idxs] if (!is.na(useNames)) { if (useNames) { names <- colnames(x) if (!is.null(names)) { names(res) <- names } } else { names(res) <- NULL } } res } matrixStats/R/rowWeightedMeans.R0000644000176200001440000001633314436403546016414 0ustar liggesusers#' Calculates the weighted means for each row (column) in a matrix #' #' Calculates the weighted means for each row (column) in a matrix. #' #' The implementations of these methods are optimized for both speed and #' memory. If no weights are given, the corresponding #' \code{rowMeans()}/\code{colMeans()} is used. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param w A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length #' K (N). #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @example incl/rowWeightedMeans.R #' #' @author Henrik Bengtsson #' #' @seealso See \code{rowMeans()} and \code{colMeans()} in #' \code{\link[base]{colSums}}() for non-weighted means. See also #' \code{\link[stats]{weighted.mean}}. #' #' @keywords array iteration robust univar #' @export rowWeightedMeans <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Argument 'w': has_weights <- !is.null(w) if (has_weights) { n <- ncol(x) if (length(w) != n) { stop(sprintf("The length of argument '%s' does not match the number of %s in '%s': %d != %d", "w", "columns", "x", length(w), n)) #nolint } if (!is.numeric(w)) { stop(sprintf("Argument '%s' is not numeric: %s", "w", mode(w))) } if (any(!is.na(w) & w < 0)) { stop(sprintf("Argument '%s' must not contain negative values", "w")) } } if (is.na(useNames)) deprecatedUseNamesNA() # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(cols)) w <- w[cols] if (has_weights) { # Allocate results m <- nrow(x) if (m == 0L) return(double(0L)) # Drop entries with zero weight? ...but keep NAs idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (nw == 0L) { res <- rep(NaN, times = m) # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- rownames(x) if (!is.null(names)) { names(res) <- names } } else { names(res) <- NULL } } return(res) } else if (nw < n) { w <- w[idxs] x <- x[, idxs, drop = FALSE] } idxs <- NULL # Not needed anymore # Has missing values? if (na.rm) { # Really? na.rm <- anyMissing(x) } if (na.rm) { # Indices of missing values nas <- which(is.na(x)) # Weight matrix W <- matrix(w, nrow = nrow(x), ncol = ncol(x), byrow = TRUE) w <- NULL # Not needed anymore W[nas] <- NA wS <- rowSums(W, na.rm = TRUE) # Standarized weights summing to one w/out missing values W[nas] <- 0 W <- W / wS x[nas] <- 0 nas <- NULL # Not needed anymore x <- W * x # Preserve dimnames attribute? if (!(is.na(useNames) || useNames)) { dimnames(x) <- NULL } W <- NULL # Not needed anymore } else { wS <- sum(w) # Standardize weights summing to one. w <- w / wS # Preserve dimnames attribute dimnames <- dimnames(x) # Weighted values ## SLOW: for (rr in 1:m) x[rr, ] <- w * x[rr, , drop = TRUE] ## FAST: x <- t_tx_OP_y(x, w, OP = "*", na.rm = FALSE) # Update dimnames attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(dimnames)) { dimnames(x) <- dimnames } } else { dimnames(x) <- NULL } } w <- NULL # Not needed anymore } # Here we know there are no missing value in the new 'x' res <- rowSums(x, na.rm = FALSE) } else { res <- rowMeans(x, na.rm = na.rm) # Preserve names attribute? if (!(is.na(useNames) || useNames)) { names(res) <- NULL } } res } #' @rdname rowWeightedMeans #' @export colWeightedMeans <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Argument 'w': has_weights <- !is.null(w) if (has_weights) { n <- nrow(x) if (length(w) != n) { stop(sprintf("The length of argument '%s' does not match the number of %s in '%s': %d != %d", "w", "rows", "x", length(w), n)) #nolint } if (!is.numeric(w)) { stop(sprintf("Argument '%s' is not numeric: %s", "w", mode(w))) } if (any(!is.na(w) & w < 0)) { stop(sprintf("Argument '%s' must not contain negative values", "w")) } } if (is.na(useNames)) deprecatedUseNamesNA() # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(rows)) w <- w[rows] if (has_weights) { # Allocate results m <- ncol(x) if (m == 0L) return(double(0L)) # Drop entries with zero weight? ...but keep NAs idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (nw == 0L) { res <- rep(NaN, times = m) # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- colnames(x) if (!is.null(names)) { names(res) <- names } } else { names(res) <- NULL } } return(res) } else if (nw < n) { w <- w[idxs] x <- x[idxs, , drop = FALSE] } idxs <- NULL # Not needed anymore # Has missing values? if (na.rm) { # Really? na.rm <- anyMissing(x) } if (na.rm) { # Indices of missing values nas <- which(is.na(x)) # Weight matrix W <- matrix(w, nrow = nrow(x), ncol = ncol(x), byrow = FALSE) w <- NULL # Not needed anymore W[nas] <- NA wS <- colSums(W, na.rm = TRUE) # Standarized weights summing to one w/out missing values W[nas] <- 0 for (cc in 1:m) { W[, cc] <- W[, cc, drop = TRUE] / wS[cc] } x[nas] <- 0 nas <- NULL # Not needed anymore x <- W * x W <- NULL # Not needed anymore } else { wS <- sum(w) # Standardize weights summing to one. w <- w / wS # Weighted values x <- w * x ## SLIGHTLY SLOWER: x <- x_OP_y(x, w, OP = "*") w <- NULL # Not needed anymore } # Here we know there are no missing value in the new 'x' res <- colSums(x, na.rm = FALSE) } else { res <- colMeans(x, na.rm = na.rm) } # Preserve names attribute? if (!(is.na(useNames) || useNames)) { names(res) <- NULL } res } matrixStats/R/rowAvgsPerColSet.R0000644000176200001440000001426114372747711016353 0ustar liggesusers#' Applies a row-by-row (column-by-column) averaging function to equally-sized #' subsets of matrix columns (rows) #' #' Applies a row-by-row (column-by-column) averaging function to equally-sized #' subsets of matrix columns (rows). Each subset is averaged independently of #' the others. #' #' If argument \code{S} is a single column vector with indices \code{1:N}, then #' \code{rowAvgsPerColSet(X, S = S, FUN = rowMeans)} gives the same result as #' \code{rowMeans(X)}. Analogously, for \code{colAvgsPerRowSet()}. #' #' @inheritParams rowAlls #' #' @param X A \code{\link[base]{numeric}} NxM \code{\link[base]{matrix}}. #' #' @param W An optional \code{\link[base]{numeric}} NxM #' \code{\link[base]{matrix}} of weights. #' #' @param S An \code{\link[base]{integer}} KxJ \code{\link[base]{matrix}} #' specifying the J subsets. Each column holds K column (row) indices for the #' corresponding subset. #' #' @param FUN The row-by-row (column-by-column) \code{\link[base]{function}} #' used to average over each subset of \code{X}. This function must accept a #' \code{\link[base]{numeric}} NxK (KxM) \code{\link[base]{matrix}} and the #' \code{\link[base]{logical}} argument \code{na.rm}, and return a #' \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (M). #' #' @param ... Additional arguments passed to then \code{FUN} #' \code{\link[base]{function}}. #' #' @param na.rm (logical) Argument passed to \code{FUN()} as #' \code{na.rm = na.rm}. If \code{\link[base:logical]{NA}} (default), then #' \code{na.rm = TRUE} is used if \code{X} or \code{S} holds missing values, #' otherwise \code{na.rm = FALSE}. #' #' @param tFUN If \code{\link[base:logical]{TRUE}}, the NxK (KxM) #' \code{\link[base]{matrix}} passed to \code{FUN()} is transposed first. #' #' @return Returns a \code{\link[base]{numeric}} JxN (MxJ) #' \code{\link[base]{matrix}}, where row names equal \code{rownames(X)} #' (\code{colnames(S)}) and column names \code{colnames(S)} #' (\code{colnames(X)}). #' #' @example incl/rowAvgsPerColSet.R #' #' @author Henrik Bengtsson #' @keywords internal utilities #' @export rowAvgsPerColSet <- function(X, W = NULL, rows = NULL, S, FUN = rowMeans, ..., na.rm = NA, tFUN = FALSE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'X': if (!is.matrix(X)) { stop(sprintf("Argument '%s' is not a matrix: %s", "X", class(X)[1L])) } dimX <- dim(X) # Argument 'W': hasW <- !is.null(W) if (hasW) { if (!is.matrix(W)) { stop(sprintf("Argument '%s' is not a matrix: %s", "W", class(W)[1L])) } if (any(dim(W) != dimX)) { stop(sprintf("Argument '%s' and '%s' have different dimensions: %s != %s", "W", "X", paste(dim(W), collapse = "x"), paste(dimX, collapse = "x"))) } if (!is.numeric(W)) { stop(sprintf("Argument '%s' is not numeric: %s", "W", mode(W))) } } # Argument 'S': if (!is.matrix(S)) { stop(sprintf("Argument '%s' is not a matrix: %s", "S", class(S)[1L])) } nbrOfSets <- ncol(S) setNames <- colnames(S) # Argument 'FUN': if (!is.function(FUN)) { stop(sprintf("Argument '%s' is not a function: %s", "FUN", mode(S))) } # Apply subset if (!is.null(rows)) { X <- X[rows, , drop = FALSE] if (hasW) W <- W[rows, , drop = FALSE] dimX <- dim(X) } # Argument 'tFUN': tFUN <- as.logical(tFUN) # Check if missing values have to be excluded while averaging if (is.na(na.rm)) na.rm <- (anyMissing(X) || anyMissing(S)) # Record names of dimension rownamesX <- rownames(X) dimnames(X) <- NULL # Average in sets of columns of X. Z <- apply(S, MARGIN = 2L, FUN = function(jj) { # Extract set of columns from X jj <- jj[is.finite(jj)] Zjj <- X[, jj, drop = FALSE] jj <- NULL # Not needed anymore if (tFUN) { Zjj <- t(Zjj) } # Average by weights if (hasW) { Wjj <- W[, jj, drop = FALSE] Zjj <- FUN(Zjj, W = Wjj, ..., na.rm = na.rm) Wjj <- NULL # Not needed anymore } else { Zjj <- FUN(Zjj, ..., na.rm = na.rm) } # Sanity check if (length(Zjj) != dimX[1L]) stop("Internal error: length(Zjj) != dimX[1L]") # Return set average Zjj }) # apply() drops 2nd dimension if nrow(X) <= 1 (and FUN returns a vector of # length nrow(X) as it should), cf. ?apply if (!is.matrix(Z)) { if (dimX[1] > 1L) stop("Internal error: dimX[1] > 1L") dim(Z) <- c(dimX[1L], nbrOfSets) } # Sanity check if (any(dim(Z) != c(dimX[1L], nbrOfSets))) stop("Internal error: dim(Z) != c(dimX[1L], nbrOfSets)") # Set names rownames(Z) <- rownamesX colnames(Z) <- setNames Z } #' @rdname rowAvgsPerColSet #' @export colAvgsPerRowSet <- function(X, W = NULL, cols = NULL, S, FUN = colMeans, ..., na.rm = NA, tFUN = FALSE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'X': if (!is.matrix(X)) { stop(sprintf("Argument '%s' is not a matrix: %s", "X", class(X)[1L])) } # Argument 'W': # Argument 'S': if (!is.matrix(S)) { stop(sprintf("Argument '%s' is not a matrix: %s", "S", class(S)[1L])) } # Argument 'FUN': if (!is.function(FUN)) { stop(sprintf("Argument '%s' is not a function: %s", "FUN", mode(S))) } # Apply subset if (!is.null(cols)) { X <- X[, cols, drop = FALSE] if (is.null(W)) W <- W[, cols, drop = FALSE] } # Argument 'tFUN': tFUN <- as.logical(tFUN) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Transpose # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - tX <- t(X) if (is.null(W)) { tW <- NULL } else { tW <- t(W) } # ... tZ <- rowAvgsPerColSet(X = tX, W = tW, S = S, FUN = FUN, ..., na.rm = na.rm, tFUN = !tFUN) tX <- tW <- NULL # Not needed anymore # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Transpose back # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Z <- t(tZ) tZ <- NULL # Not needed anymore Z } matrixStats/R/rowRanges.R0000644000176200001440000000444014436403546015103 0ustar liggesusers#' Gets the range of values in each row (column) of a matrix #' #' Gets the range of values in each row (column) of a matrix. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @return \code{rowRanges()} (\code{colRanges()}) returns a #' \code{\link[base]{numeric}} Nx2 (Kx2) \code{\link[base]{matrix}}, where N #' (K) is the number of rows (columns) for which the ranges are calculated. #' #' \code{rowMins()/rowMaxs()} (\code{colMins()/colMaxs()}) returns a #' \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). #' #' @author Henrik Bengtsson #' #' @seealso \code{\link{rowOrderStats}}() and \code{\link[base]{pmin.int}}(). #' #' @keywords array iteration robust univar #' #' @export rowRanges <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowRanges, x, dim., rows, cols, 2L, na.rm, TRUE, useNames) } #' @rdname rowRanges #' @export rowMins <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowRanges, x, dim., rows, cols, 0L, na.rm, TRUE, useNames) } #' @rdname rowRanges #' @export rowMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowRanges, x, dim., rows, cols, 1L, na.rm, TRUE, useNames) } #' @rdname rowRanges #' @export colRanges <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_colRanges, x, dim., rows, cols, 2L, na.rm, TRUE, useNames) } #' @rdname rowRanges #' @export colMins <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_colRanges, x, dim., rows, cols, 0L, na.rm, TRUE, useNames) } #' @rdname rowRanges #' @export colMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_colRanges, x, dim., rows, cols, 1L, na.rm, TRUE, useNames) } matrixStats/R/rowTabulates.R0000644000176200001440000001223114436403546015605 0ustar liggesusers#' Tabulates the values in a matrix by row (column). #' #' @inheritParams rowAlls #' #' @param x An \code{\link[base]{integer}}, a \code{\link[base]{logical}}, or #' a \code{\link[base]{raw}} NxK \code{\link[base]{matrix}}. #' #' @param values An \code{\link[base]{vector}} of J values of count. If #' \code{\link[base]{NULL}}, all (unique) values are counted. #' #' @return Returns a NxJ (KxJ) \code{\link[base]{matrix}} where N (K) is the #' number of row (column) \code{\link[base]{vector}}s tabulated and J is the #' number of values counted. #' #' @details #' An alternative to these functions, is to use \code{table(x, row(x))} #' and \code{table(x, col(x))}, with the exception that the latter do not #' support the \code{\link[base]{raw}} data type. #' When there are no missing values in \code{x}, we have that #' \code{all(rowTabulates(x) == t(table(x, row(x))))} and #' \code{all(colTabulates(x) == t(table(x, col(x))))}. #' When there are missing values, we have that #' \code{all(rowTabulates(x) == t(table(x, row(x), useNA = "always")[, seq_len(nrow(x))]))} and #' \code{all(colTabulates(x) == t(table(x, col(x), useNA = "always")[, seq_len(ncol(x))]))}. #' #' @example incl/rowTabulates.R #' #' @author Henrik Bengtsson #' @keywords utilities #' @export rowTabulates <- function(x, rows = NULL, cols = NULL, values = NULL, ..., useNames = TRUE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (is.integer(x)) { } else if (is.logical(x)) { } else if (is.raw(x)) { } else { stop(sprintf("Argument '%s' is not integer, logical, or raw: %s", "x", class(x)[1])) } # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Argument 'values': if (is.null(values)) { values <- as.vector(x) values <- unique(values) if (is.raw(values)) { values <- as.integer(values) values <- sort.int(values) # WORKAROUND: Cannot use "%#x" because it gives an error OSX with # R v2.9.0 devel (2009-01-13 r47593b) at R-forge. /HB 2009-06-20 names <- sprintf("%x", values) names <- paste("0x", names, sep = "") values <- as.raw(values) } else { values <- sort.int(values, na.last = TRUE) names <- as.character(values) } } else { if (is.raw(values)) { names <- sprintf("%x", as.integer(values)) names <- paste("0x", names, sep = "") } else { names <- as.character(values) } } nbr_of_values <- length(values) counts <- matrix(0L, nrow = nrow(x), ncol = nbr_of_values) colnames(counts) <- names na.rm <- anyMissing(x) for (kk in seq_len(nbr_of_values)) { counts[, kk] <- rowCounts(x, value = values[kk], na.rm = na.rm) } # Update rownames attribute? if (!is.na(useNames)) { if (useNames) { rownames <- rownames(x) if (!is.null(rownames)) rownames(counts) <- rownames } else { rownames(counts) <- NULL } } else { deprecatedUseNamesNA() } counts } #' @rdname rowTabulates #' @export colTabulates <- function(x, rows = NULL, cols = NULL, values = NULL, ..., useNames = TRUE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (is.integer(x)) { } else if (is.logical(x)) { } else if (is.raw(x)) { } else { stop(sprintf("Argument '%s' is not integer, logical, or raw: %s", "x", class(x)[1])) } # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Argument 'values': if (is.null(values)) { values <- as.vector(x) values <- unique(values) if (is.raw(values)) { values <- as.integer(values) values <- sort.int(values) # WORKAROUND: Cannot use "%#x" because it gives an error OSX with # R v2.9.0 devel (2009-01-13 r47593b) at R-forge. /HB 2009-06-20 names <- sprintf("%x", values) names <- paste("0x", names, sep = "") values <- as.raw(values) } else { values <- sort.int(values, na.last = TRUE) names <- as.character(values) } } else { if (is.raw(values)) { names <- sprintf("%x", as.integer(values)) names <- paste("0x", names, sep = "") } else { names <- as.character(values) } } transpose <- FALSE if (!transpose) { nbr_of_values <- length(values) counts <- matrix(0L, nrow = ncol(x), ncol = nbr_of_values) colnames(counts) <- names na.rm <- anyMissing(x) for (kk in seq_len(nbr_of_values)) { counts[, kk] <- colCounts(x, value = values[kk], na.rm = na.rm) } } # Update rownames attribute? if (!is.na(useNames)) { if (useNames) { colnames <- colnames(x) if (!is.null(colnames)) rownames(counts) <- colnames } else { rownames(counts) <- NULL } } else { deprecatedUseNamesNA() } counts } matrixStats/R/rowMedians.R0000644000176200001440000000436214436403546015247 0ustar liggesusers#' Calculates the median for each row (column) in a matrix #' #' Calculates the median for each row (column) in a matrix. #' #' The implementation of \code{rowMedians()} and \code{colMedians()} is #' optimized for both speed and memory. To avoid coercing to #' \code{\link[base]{double}}s (and hence memory allocation), there is a #' special implementation for \code{\link[base]{integer}} matrices. That is, #' if \code{x} is an \code{\link[base]{integer}} \code{\link[base]{matrix}}, #' then \code{rowMedians(as.double(x))} (\code{rowMedians(as.double(x))}) would #' require three times the memory of \code{rowMedians(x)} #' (\code{colMedians(x)}), but all this is avoided. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param rows,cols A \code{\link[base]{vector}} indicating subset of rows #' (and/or columns) to operate over. If \code{\link[base]{NULL}}, no subsetting #' is done. #' #' @param na.rm If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s #' are excluded first, otherwise not. #' #' @param dim. An \code{\link[base]{integer}} \code{\link[base]{vector}} of #' length two specifying the dimension of \code{x}, also when not a #' \code{\link[base]{matrix}}. #' #' @param ... Not used. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @author Henrik Bengtsson, Harris Jaffee #' #' @seealso See \code{\link{rowWeightedMedians}()} and #' \code{colWeightedMedians()} for weighted medians. #' For mean estimates, see \code{\link{rowMeans2}()} and #' \code{\link[base:colSums]{rowMeans}()}. #' #' @keywords array iteration robust univar #' @export rowMedians <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() has_nas <- TRUE # Add as an argument? /2007-08-24 .Call(C_rowMedians, x, dim., rows, cols, na.rm, has_nas, TRUE, useNames) } #' @rdname rowMedians #' @export colMedians <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = TRUE) { if (is.na(useNames)) deprecatedUseNamesNA() has_nas <- TRUE # Add as an argument? /2007-08-24 .Call(C_rowMedians, x, dim., rows, cols, na.rm, has_nas, FALSE, useNames) } matrixStats/R/diff2.R0000644000176200001440000000305514436403546014127 0ustar liggesusers#' Fast lagged differences #' #' Computes the lagged and iterated differences. #' #' @inheritParams rowAlls #' #' @param x A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length #' N. #' #' @param lag An \code{\link[base]{integer}} specifying the lag. #' #' @param differences An \code{\link[base]{integer}} specifying the order of #' difference. #' #' @param useNames If \code{\link[base]{NA}}, the default behavior of the #' function about naming support is remained. If \code{\link[base:logical]{FALSE}}, #' no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names #' attributes of result are set. #' #' #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N - \code{differences}. #' #' @examples #' diff2(1:10) #' #' @author Henrik Bengtsson #' #' @seealso \code{\link[base]{diff}}(). #' @keywords univar internal #' #' @export diff2 <- function(x, idxs = NULL, lag = 1L, differences = 1L, ..., useNames = TRUE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'lag': if (length(lag) != 1L) { stop(sprintf("Argument '%s' is not a scalar: %.0f", "lag", length(lag))) } # Argument 'differences': if (length(differences) != 1L) { stop(sprintf("Argument '%s' is not a scalar: %.0f", "differences", length(differences))) } lag <- as.integer(lag) differences <- as.integer(differences) .Call(C_diff2, x, idxs, lag, differences, useNames) } matrixStats/R/allocMatrix.R0000644000176200001440000000234714372747711015423 0ustar liggesusers#' Allocates an empty vector, matrix or array #' #' Allocates an empty vector, matrix or array faster than the corresponding #' function in R. #' #' @param value A \code{\link[base]{numeric}} scalar that all elements will #' have as value. #' #' @param length,nrow,ncol,dim \code{\link[base]{numeric}}s specifying the #' dimension of the created \code{\link[base]{vector}}, #' \code{\link[base]{matrix}} or \code{\link[base]{array}}. #' #' @return Returns a \code{\link[base]{vector}}, \code{\link[base]{matrix}} and #' \code{\link[base]{array}} respectively of the same data type as #' \code{value}. #' #' @author Henrik Bengtsson #' #' @seealso See also \code{\link[base]{vector}}, \code{\link[base]{matrix}} and #' \code{\link[base]{array}}. #' #' @keywords internal programming #' #' @export allocMatrix <- function(nrow, ncol, value = 0.0, ...) { nrow <- as.integer(nrow) ncol <- as.integer(ncol) .Call(C_allocMatrix2, nrow, ncol, value) } #' @rdname allocMatrix #' @export allocVector <- function(length, value = 0.0, ...) { length <- as.numeric(length) .Call(C_allocVector2, length, value) } #' @rdname allocMatrix #' @export allocArray <- function(dim, value = 0.0, ...) { dim <- as.integer(dim) .Call(C_allocArray2, dim, value) } matrixStats/R/rowIQRs.R0000644000176200001440000000441714436403546014506 0ustar liggesusers#' Estimates of the interquartile range for each row (column) in a matrix #' #' Estimates of the interquartile range for each row (column) in a matrix. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param ... Additional arguments passed to \code{\link{rowQuantiles}}() #' (\code{colQuantiles()}). #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @section Missing values: Contrary to \code{\link[stats]{IQR}}, which gives #' an error if there are missing values and \code{na.rm = FALSE}, \code{iqr()} #' and its corresponding row and column-specific functions return #' \code{\link[base]{NA}}_real_. #' #' @example incl/rowIQRs.R #' #' @author Henrik Bengtsson #' @seealso See \code{\link[stats]{IQR}}. See \code{\link{rowSds}}(). #' @keywords array iteration robust univar #' #' @importFrom stats quantile #' @export rowIQRs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { Q <- rowQuantiles(x, rows = rows, cols = cols, probs = c(0.25, 0.75), na.rm = na.rm, useNames = useNames, drop = FALSE, ...) colnames(Q) <- NULL # Not needed anymore ans <- Q[, 2L, drop = TRUE] - Q[, 1L, drop = TRUE] # Remove attributes if (is.na(useNames)) { deprecatedUseNamesNA() attributes(ans) <- NULL } ans } #' @rdname rowIQRs #' @export colIQRs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { Q <- colQuantiles(x, rows = rows, cols = cols, probs = c(0.25, 0.75), na.rm = na.rm, useNames = useNames, drop = FALSE, ...) colnames(Q) <- NULL # Not needed anymore ans <- Q[, 2L, drop = TRUE] - Q[, 1L, drop = TRUE] # Remove attributes if (is.na(useNames)) { deprecatedUseNamesNA() attributes(ans) <- NULL } ans } #' @rdname rowIQRs #' @export iqr <- function(x, idxs = NULL, na.rm = FALSE, ...) { # Apply subset if (!is.null(idxs)) x <- x[idxs] if (na.rm) { x <- x[!is.na(x)] } else if (anyMissing(x)) { return(NA_real_) } # At this point, there should be no missing values # Nothing to do? n <- length(x) if (n == 0L) { return(NA_real_) } else if (n == 1L) { return(0) } q <- quantile(x, probs = c(0.25, 0.75), names = FALSE, na.rm = FALSE, ...) q[2L] - q[1L] } matrixStats/R/rowOrderStats.R0000644000176200001440000000360114436403546015754 0ustar liggesusers#' Gets an order statistic for each row (column) in a matrix #' #' Gets an order statistic for each row (column) in a matrix. #' #' The implementation of \code{rowOrderStats()} is optimized for both speed and #' memory. To avoid coercing to \code{\link[base]{double}}s (and hence memory #' allocation), there is a unique implementation for #' \code{\link[base]{integer}} matrices. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param which An \code{\link[base]{integer}} index in [1,K] ([1,N]) #' indicating which order statistic to be returned. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @section Missing values: This method does \emph{not} handle missing values, #' that is, the result corresponds to having \code{na.rm = FALSE} (if such an #' argument would be available). #' #' @author The native implementation of \code{rowOrderStats()} was adopted by #' Henrik Bengtsson from Robert Gentleman's \code{rowQ()} in the \pkg{Biobase} #' package. #' #' @seealso See \code{rowMeans()} in \code{\link[base]{colSums}}(). #' #' @keywords array iteration robust univar #' @export rowOrderStats <- function(x, rows = NULL, cols = NULL, which, dim. = dim(x), ..., useNames = TRUE) { # Check missing values if (anyMissing(x)) { stop(sprintf("Argument '%s' must not contain missing values", "x")) } if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_rowOrderStats, x, dim., rows, cols, which, useNames) } #' @rdname rowOrderStats #' @export colOrderStats <- function(x, rows = NULL, cols = NULL, which, dim. = dim(x), ..., useNames = TRUE) { # Check missing values if (anyMissing(x)) { stop(sprintf("Argument '%s' must not contain missing values", "x")) } if (is.na(useNames)) deprecatedUseNamesNA() .Call(C_colOrderStats, x, dim., rows, cols, which, useNames) } matrixStats/R/binCounts.R0000644000176200001440000000615214372747711015106 0ustar liggesusers#' Fast element counting in non-overlapping bins #' #' Counts the number of elements in non-overlapping bins #' #' \code{binCounts(x, bx, right = TRUE)} gives equivalent results as #' \code{rev(binCounts(-x, bx = rev(-bx), right = FALSE))}, but is faster #' and more memory efficient. #' #' @inheritParams rowAlls #' #' @param x A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K #' positions for to be binned and counted. #' #' @param bx A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B + 1 #' ordered positions specifying the B > 0 bins \code{[bx[1], bx[2])}, #' \code{[bx[2], bx[3])}, ..., \code{[bx[B], bx[B + 1])}. #' #' @param right If \code{\link[base:logical]{TRUE}}, the bins are right-closed #' (left open), otherwise left-closed (right open). #' #' @return Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of #' length B with non-negative integers. #' #' @section Missing and non-finite values: #' Missing values in \code{x} are ignored/dropped. Missing values in \code{bx} #' are not allowed and gives an error. #' #' @author Henrik Bengtsson #' #' @seealso An alternative for counting occurrences within bins is #' \code{\link[graphics]{hist}}, e.g. \code{hist(x, breaks = bx, #' plot = FALSE)$counts}. That approach is ~30-60\% slower than #' \code{binCounts(..., right = TRUE)}. #' #' To count occurrences of indices \code{x} (positive #' \code{\link[base]{integer}}s) in \code{[1, B]}, use \code{tabulate(x, #' nbins = B)}, where \code{x} does \emph{not} have to be sorted first. For #' details, see \code{\link[base]{tabulate}}(). #' #' To average values within bins, see \code{\link{binMeans}}(). #' #' @keywords univar #' @export binCounts <- function(x, idxs = NULL, bx, right = FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.numeric(x)) { stop(sprintf("Argument '%s' is not numeric: %s", "x", mode(x))) } # Argument 'bx': if (!is.numeric(bx)) { stop(sprintf("Argument '%s' is not numeric: %s", "bx", mode(bx))) } if (any(is.infinite(bx))) { stop(sprintf("Argument '%s' must not contain infinite values", "bx")) } if (is.unsorted(bx)) { stop(sprintf("Argument '%s' is not ordered", "bx")) } # Apply subset if (!is.null(idxs)) x <- x[idxs] # Argument 'right': right <- as.logical(right) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Preprocessing of x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Drop missing values keep <- which(!is.na(x)) if (length(keep) < length(x)) { x <- x[keep] } keep <- NULL # Not needed anymore # Order x (by increasing x). # If 'x' is already sorted, the overhead of (re)sorting is # relatively small. x <- sort.int(x, method = "quick") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Bin # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- as.numeric(x) bx <- as.numeric(bx) .Call(C_binCounts, x, bx, right) } matrixStats/R/product.R0000644000176200001440000000023114372747711014612 0ustar liggesusers#' @rdname rowProds #' @export product <- function(x, idxs = NULL, na.rm = FALSE, ...) { .Call(C_productExpSumLog, x, idxs, as.logical(na.rm), TRUE) } matrixStats/NEWS.md0000644000176200001440000015457414535664030013721 0ustar liggesusers# Version 1.2.0 [2023-12-11] ## Bug Fixes * Error messages that report on large integers (> 2^31 - 1), would not render those integers correctly. ## Deprecated and Defunct * `useNames = NA` is defunct. # Version 1.1.0 [2023-11-06] ## Deprecated and Defunct * `useNames = NA` is defunct in R (>= 4.4.0). Remains deprecated in R (< 4.4.0) for now. ## Miscellaneous * The deprecation warning for using `useNames = NA`, suggested using `useNames = TRUE` twice instead of also `useNames = FALSE`. # Version 1.0.0 [2023-06-01] ## Significant Changes * `useNames = TRUE` is the new default for all functions. For backward compatibility, it used to be `useNames = NA`. * `colQuantiles()` and `rowQuantiles()` gained argument `digits`, just like `stats::quantile()` gained that argument in R 4.1.0. * `colQuantiles()` and `rowQuantiles()` only sets quantile percentage names when `useNames = TRUE`, to align with how argument `names` of `stats::quantile()` works in base R. ## New Features * `colMeans2()` and `rowMeans2()` gained argument `refine`. If `refine = TRUE`, then the sample average for numeric matrices are calculated using a two-pass scan, resulting in higher precision. The default is `refine = TRUE` to align it with `colMeans()`, but also `mean2()` in this package. If the higher precision is not needed, using `refine = FALSE` will be almost twice as fast. * `colSds()`, `rowSds()`, `colVars()`, and `rowVars()` gained argument `refine`. If `refine = TRUE`, then the sample average for numeric matrices are calculated using a two-pass scan, resulting in higher precision for the estimate of the center and therefore also the variance. ## Performance * Unnecessary checks for missing indices are eliminated, yielding better performance. This change does not affect user-facing API. * Made `colQuantiles()` and `rowQuantiles()` a bit faster for `type != 7L`, by making sure percentage names are only generated once, instead of once per column or row. ## Bug Fixes * Contrary to other functions in the package, and how it works in base R, functions `colCumsums()`, `colCumprods()`, `colCummins()`, `colCummaxs()`, `colRanges()`, `colRanks()`, and `colDiffs()`, plus the corresponding row-based versions, did not drop the `names` attribute when both row and column names were `NULL`. Now also these functions behaves the same as the case when neither row or column names are set. * `colQuantiles()` and `rowQuantiles()` did not generate quantile percentage names exactly the same way as `stats::quantile()`, which would reveal itself for certain combinations of `probs` and `digits`. ## Deprecated and Defunct * `useNames = NA` is now deprecated. Use `useNames = TRUE` or `useNames = FALSE` instead. # Version 0.63.0 [2022-11-14] ## Miscellaneous * Package compiles again with older compilers not supporting the C99 standard (e.g. GCC 4.8.5 (2015), which is the default on RHEL / CentOS 7.9). This was the case also for matrixStats (<= 0.54.0). * Added more information to the error message produced when argument `center` for `col-` and `rowVars()` holds an invalid value. * Fix two compilation warnings on `a function declaration without a prototype is deprecated in all versions of C [-Wstrict-prototypes]`. ## Deprecated and Defunct * `validateIndices()` is now defunct and will eventually be removed from the package API. # Version 0.62.0 [2022-04-18] ## New Features * `colCummins()`, `colCummaxs()`, `rowCummins()`, and `rowCummaxs()` now support also logical input. ## Miscellaneous * Updated native code to use the C99 constant `DBL_MAX` instead of legacy S constant `DOUBLE_XMAX`, which is planned to be unsupported in R (>= 4.2.0). # Version 0.61.0 [2021-09-12] ## New Features * When argument `which` for `colOrderStats()` and `rowOrderStats()` is out of range, the error message now reports on the value of `which`. Similarly, when argument `probs` for `colQuantiles()` and `rowQuantiles()` is out of range, the error message reports on its value too. ## Deprecated and Defunct * `validateIndices()` is deprecated and will eventually be removed from the package API. ## Miscellaneous * The package test for benchmark reports failed because the **markdown** package was not declared as a suggested package. # Version 0.60.1 [2021-08-22] ## Performance * Handling of the `useNames` argument is now done in the native code. * Passing `idxs`, `rows`, and `cols` arguments of type integer is now less efficient than it used to, because the new code re-design (see below) requires an internal allocation of an equally long `R_xlen_t` vector that is populated by indices coerced from `R_len_t` to `R_xlen_t` integers. ## Code Design * No longer using native-code implementations that are specific to the type of index that is passed for subsetting of vectors, rows, and columns. This was done to avoid the complex use of macros that was cumbersome to maintain and added an extra threshold for new contributors to overcome. Another advantage is that faster compilation time when built from source and a smaller size of compiled library. In previous version `R CMD check` would produce a NOTE on the package installation size being large, which no longer is the case. The downside is that extra overhead when passing integer indices (see above comment). ## Bug Fixes * Contrary to other functions which gained new argument `useNames = NA` in the previous release, `colQuantiles()` and `rowQuantiles()` got `useNames = TRUE`. # Version 0.60.0 [2021-07-26] ## New Features * Add row and column names support to all row and column functions. To return row and column names, set argument `useNames = TRUE`. To drop them, set `useNames = FALSE`. To preserve the current, inconsistent behavior, set `useNames = NA`, which, for backward compatibility reasons, remains the default for now. # Version 0.59.0 [2021-05-31] ## Miscellaneous * Harmonized error messages. ## Bug Fixes * Some of the examples and package tests would allocated matrices with dimensions that did not match the number of elements in the input data. ## Deprecated and Defunct * Dropped `meanOver()` and `sumOver()`, and argument `method` from `weightedVar()`, that have been defunct since January 2018. # Version 0.58.0 [2021-01-26] ## Significant Changes * `colVars()` and `rowVars()` with argument `center` now calculates the sample variance using the `n/(n-1)*avg((x-center)^2)` formula rather than the `n/(n-1)*(avg(x^2)-center^2)` formula that was used in the past. Both give the same result when `center` is the correct sample mean estimate. The main reason for this change is that, if an incorrect `center` is provided, in contrast to the old approach, the new approach is guaranteed to give at least non-negative results, despite being incorrect. BACKWARD COMPATIBILITY: Out of all 314 reverse dependencies on CRAN and Bioconductor, only four called these functions with argument `center`. All of them pass their package checks also after this update. To further protect against a negative impact in existing user scripts, `colVars()` and `rowVars()` will calculate both versions and assert that the result is the same. If not, an informative error is produced. To limit the performance impact, this validation is run only once every 50:th call, a frequency that can be controlled by R option `matrixStats.vars.formula.freq`. Setting it to 0 or NULL will disable the validation. The default can also be controlled by environment variable `R_MATRIXSTATS_VARS_FORMULA_FREQ`. This validation framework will be removed in a future version of the package after it has been established that this change has no negative impact. ## New Features * Now `colWeightedMads()` and `rowWeightedMads()` accept `center` of the same length as the number of columns and rows, respectively. * `colAvgsPerRowSet()` and `rowAvgsPerRowSet()` gained argument `na.rm`. * Now `weightedMean()` and `weightedMedian()` and the corresponding row- and column-based functions accept logical `x`, where FALSE is treated as integer 0 and TRUE as 1. * Now `x_OP_y()` and `t_tx_OP_y()` accept logical `x` and `y`, where FALSE is treated as integer 0 and TRUE as 1. ## Bug Fixes * `colQuantiles()` and `rowQuantiles()` on a logical matrix should return a numeric vector for `type = 7`. However, when there were only missing values (= NA) in the matrix, then it would return a "logical" vector instead. * `colAvgsPerRowSet()` on a single-column matrix would produce an error on non-matching dimensions. Analogously, for `rowAvgsPerRowSet()` and single- row matrices. * `colVars(x)` and `rowVars(x)` with `x` being an array would give the wrong value if both argument `dim.` and `center` would be specified. * The documentation was unclear on what the `center` argument should be. They would not detect when an incorrect specification was used, notably when the length of `center` did not match the matrix dimensions. Now these functions give an informative error message when `center` is of the incorrect length. ## Deprecated and Defunct * Using a scalar value for argument `center` of `colSds()`, `rowSds()`, `colVars()`, `rowVars()`, `colMads()`, `rowMads()`, `colWeightedMads()`, and `rowWeightedMads()` is now deprecated. # Version 0.57.0 [2020-09-25] ## New Features * `colCumprods()` and `rowCumprods()` now support also logical input. Thanks to Constantin Ahlmann-Eltze at EMBL Heidelberg for the patch. ## Bug Fixes * `colCollapse()` and `rowCollapse()` did not expand `idxs` argument before subsetting by `cols` and `rows`, respectively. Thanks to Constantin Ahlmann-Eltze for reporting on this. * `colAnys()`, `rowAnys()`, `anyValue()`, `colAlls()`, `rowAlls()`, and `allValue()` with `value=FALSE` and _numeric_ input would incorrectly consider all values different from one as FALSE. Now it is only values that are zero that are considered FALSE. Thanks to Constantin Ahlmann-Eltze for the bug fix. # Version 0.56.0 [2020-03-12] ## Significant Changes * `colQuantiles()` and `rowQuantiles()` now supports only integer, numeric and logical input. Previously, it was also possible to pass, for instance, `character` input, but that was a mistake. The restriction on input allows for further optimization of these functions. * The returned type of `colQuantiles()` and `rowQuantiles()` is now the same as for `stats::quantile()`, which depends on argument `type`. ## Performance * `colQuantiles()` and `rowQuantiles()` with the default `type = 7L` and when there are no missing values are now significantly faster and use significantly fewer memory allocations. ## Bug Fixes * `colDiffs()` and `rowDiffs()` gave an error if argument `dim.` was of type numeric rather than type integer. * `varDiff()`, `sdDiff()`, `madDiff()`, `iqrDiff()`, and the corresponding row- and column functions silently treated a `diff` less than zero as `diff = 0`. Now an error is produced. * Error messages on argument `dim.` referred to non-existing argument `dim`. * Error messages on negative values in argument `dim.` reported a garbage value instead of the negative value. * The Markdown reports produced by the internal benchmark report generator did not add a line between tables and the following text (a figure caption) causing the following text to be included in a cell on an extra row in the table (at least when rendered on GitHub Wiki pages). # Version 0.55.0 [2019-09-05] ## Significant Changes * `weightedVar()`, `weightedSd()`, `weightedMad()`, and their row- and column- specific counter parts now return a missing value if there are missing values in any of the weights `w` after possibly dropping (`x`, `w`) elements with missing values in `x` (`na.rm = TRUE`). Previously, `na.rm = TRUE` would also drop (`x`, `w`) elements where `w` was missing. With this change, we now have that for all functions in this package, `na.rm = TRUE` never applies to weights - only `x` values. ## New Features * `colRanks()` and `rowRanks()` now supports the same set of `ties.method` as `base::rank()` plus `"dense"` as defined by `data.table::frank()`. For backward compatible reasons, the default `ties.method` remains the same as in previous versions. Thank to Brian Montgomery for contributing this. * `colCumsums()` and `rowCumsums()` now support also logical input. ## Bug Fixes * `weightedVar()`, `weightedSd()`, `weightedMad()`, and their row- and column- specific counter parts would produce an error instead of returning a missing value when one of the weights is a missing value. ## Deprecated and Defunct * Calling `indexByRow(x)` where `x` is a matrix is now defunct. Use `indexByRow(dim(x))` instead. # Version 0.54.0 [2018-07-23] ## Performance * SPEEDUP: No longer using `stopifnot()` for internal validation, because it comes with a great overhead. This was only used in `weightedMad()`, `col-`, and `rowWeightedMads()`, as well as `col-` and `rowAvgsPerColSet()`. ## Bug Fixes * Despite being an unlikely use case, `colLogSumExps(lx)` / `rowLogSumExps(lx)` now also accepts integer `lx` values. * The error produced when using `indexByRow(dim)` with `prod(dim) >= 2^31` would report garbage dimensions instead of `dim`. ## Deprecated and Defunct * Calling `indexByRow(x)`, where `x` is a matrix, is deprecated. Use `indexByRow(dim(x))` instead. # Version 0.53.1 [2018-02-10] ## Code Refactoring * Now `col-`/`rowSds()` explicitly replicate all arguments that are passed to `col-`/`rowVars()`. ## Documentation * Added details on how `weightedMedian(x, interpolate = TRUE)` works. ## Bug Fixes * `colLogSumExps(lx, cols)` / `rowLogSumExps(lx, rows)` gave an error if `lx` has rownames / colnames. * `col-`/`rowQuantiles()` would lose rownames of output in certain cases. # Version 0.53.0 [2018-01-23] ## New Features * Functions `sum2(x)` and `means2(x)` now accept also logical input `x`, which corresponds to using `as.integer(x)` but without the need for neither coercion nor internal extra copies. With `sum2(x, mode = "double")` it is possible to count number of TRUE elements beyond 2^31-1, which `base::sum()` does not support. * Functions `col-`/`rowSums2()` and `col-`/`rowMeans2()` now accept also logical input `x`. * Function `binMeans(y, x, bx)` now accepts logical `y`, which corresponds to to using `as.integer(y)`, but without the need for coercion to integer. * Functions `col-`/`rowTabulates(x)` now support logical input `x`. * Now `count()` can count beyond 2^31-1. * `allocVector()` can now allocate long vectors (longer than 2^31-1). * Now `sum2(x, mode = "integer")` generates a warning if `typeof(x) == "double"` asking if `as.integer(sum2(x))` was intended. * Inspired by `Hmisc::wtd.var()`, when `sum(w) <= 1`, `weightedVar(x, w)` now produces an informative warning that the estimate is invalid. ## Code Refactoring * Harmonized the ordering of the arguments of `colAvgsPerColSet()` with that of `rowAvgsPerColSet()`. ## Bug Fixes * `col-`/`rowLogSumExp()` could core dump R for "large" number of columns/rows. Thanks Brandon Stewart at Princeton University for reporting on this. * `count()` beyond 2^31-1 would return invalid results. * Functions `col-`/`rowTabulates(x)` did not count missing values. * `indexByRow(dim, idxs)` would give nonsense results if `idxs` had indices greater than `prod(dim)` or non-positive indices; now it gives an error. * `indexByRow(dim)` would give nonsense results when `prod(dim) >= 2^31`; now it gives an informative error. * `col-`/`rowAvgsPerColSet()` would return vector rather than matrix if `nrow(X) <= 1`. Thanks to Peter Hickey (Johns Hopkins University) for troubleshooting and providing a fix. ## Deprecated and Defunct * Previously deprecated `meanOver()` and `sumOver()` are defunct. Use `mean2()` and `sum2()` instead. * Previously deprecated `weightedVar(x, w, method = "0.14.2")` is defunct. * Dropped previously defunct `weightedMedian(..., ties = "both")`. * Dropped previously defunct argument `centers` for `col-`/`rowMads()`. Use `center` instead. * Dropped previously defunct argument `flavor` of `colRanks()` and `rowRanks()`. # Version 0.52.2 [2017-04-13] ## Bug Fixes * Several of the row- and column-based functions would core dump R if the matrix was of a data type other than logical, integer, or numeric, e.g. character or complex. This is now detected and an informative error is produced instead. Similarly, some vector-based functions could potentially core dump R or silently return a nonsense result. Thank you Hervé Pagès, Bioconductor Core, for the report. ## Deprecated and Defunct * `rowVars(..., method = "0.14.2")` that was added for very unlikely needs of backward compatibility of an invalid degree-of-freedom term is deprecated. # Version 0.52.1 [2017-04-04] ## Bug Fixes * The package test on `matrixStats:::benchmark()` tried to run even if not all suggested packages were available. # Version 0.52.0 [2017-04-03] ## Significant Changes * Since `anyNA()` is a built-in function since R (>= 3.1.0), please use that instead of `anyMissing()` part of this package. The latter will eventually be deprecated. For consistency with the `anyNA()` name, `colAnyNAs()` and `rowAnyNAs()` are now also available replacing the identically `colAnyMissings()` and `rowAnyMissings()` functions, which will also be deprecated in a future release. * `meanOver()` was renamed to `mean2()` and `sumOver()` was renamed to `sum2()`. ## New Features * Added `colSums2()` and `rowSums2()` which work like `colSums()` and `rowSums()` of the **base** package but also supports efficient subsetting via optional arguments `rows` and `cols`. * Added `colMeans2()` and `rowMeans2()` which work like `colMeans()` and `rowMeans()` of the **base** package but also supports efficient subsetting via optional arguments `rows` and `cols`. * Functions `colDiffs()` and `rowDiffs()` gained argument `dim.`. * Functions `colWeightedMads()` and `rowWeightedMads()` gained arguments `constant` and `center`. The current implementation only support scalars for these arguments, which means that the same values are applied to all columns and rows, respectively. In previous version a hard-to-understand error would be produced if `center` was of length greater than one; now an more informative error message is given. * Package is now silent when loaded; it no longer displays a startup message. ## Software Quality * Continuous-integration testing is now also done on macOS, in addition to Linux and Windows. * ROBUSTNESS: Package now registers the native API using also `R_useDynamicSymbols()`. ## Code Refactoring * Cleaned up native low-level API and renamed native source code files to make it easier to navigate the native API. * Now using **roxygen2** for help and NAMESPACE (was `R.oo::Rdoc`). ## Bug Fixes * `rowAnys(x)` on numeric matrices `x` would return `rowAnys(x == 1)` and not `rowAnys(x != 0)`. Same for `colAnys()`, `rowAlls()`, and `colAlls()`. Thanks Richard Cotton for reporting on this. * `sumOver(x)` and `meanOver(x)` would incorrectly return -Inf or +Inf if the intermediate sum would have that value, even if one of the following elements would turn the intermediate sum into NaN or NA, e.g. with `x` as `c(-Inf, NaN)`, `c(-Inf, +Inf)`, or `c(+Inf, NA)`. * WORKAROUND: Benchmark reports generated by `matrixStats:::benchmark()` would use any custom R prompt that is currently set in the R session, which may not render very well. Now it forces the prompt to be the built-in `"> "` one. ## Deprecated and Defunct * The package API is only intended for matrices and vectors of type numeric, integer and logical. However, a few functions would still return if called with a data.frame. This was never intended to work and is now an error. Specifically, functions `colAlls()`, `colAnys()`, `colProds()`, `colQuantiles()`, `colIQRs()`, `colWeightedMeans()`, `colWeightedMedians()`, and `colCollapse()` now produce warnings if called with a data.frame. Same for the corresponding row- functions. The use of a `data.frame will be produce an error in future releases. * `meanOver()` and `sumOver()` are deprecated because they were renamed to `mean2()` and `sum2()`, respectively. * Previously deprecated (and ignored) argument `flavor` of `colRanks()` and `rowRanks()` is now defunct. * Previously deprecated support for passing non-vector, non-matrix objects to `rowAlls()`, `rowAnys()`, `rowCollapse()`, and the corresponding column-based versions are now defunct. Likewise, `rowProds()`, `rowQuantiles()`, `rowWeightedMeans()`, `rowWeightedMedians()`, and the corresponding column-based versions are also defunct. The rationale for this is to tighten up the identity of the **matrixStats** package and what types of input it accepts. This will also help optimize the code further. # Version 0.51.0 [2016-10-08] ## Performance and Memory * SPEEDUP / CLEANUP: `rowMedians()` and `colMedians()` are now plain functions. They were previously S4 methods (due to a Bioconductor legacy). The package no longer imports the **methods** package. * SPEEDUP: Now native API is formally registered allowing for faster lookup of routines from R. # Version 0.50.2 [2016-04-24] ## Bug Fixes * Package now installs on R (>= 2.12.0) as claimed. Thanks to Mikko Korpela at Aalto University School of Science, Finland, for troubleshooting and providing a fix. * `logSumExp(c(-Inf, -Inf, ...))` would return NaN rather than `-Inf`. Thanks to Jason Xu (University of Washington) for reporting and Brennan Vincent for troubleshooting and contributing a fix. # Version 0.50.1 [2015-12-14] ## Bug Fixes * The Undefined Behavior Sanitizer (UBsan) reported on a `memcall(src, dest, 0)` call when `dest == null`. Thanks to Brian Ripley and the CRAN check tools for catching this. We could reproduce this with gcc 5.1.1 but not with gcc 4.9.2. # Version 0.50.0 [2015-12-13] ## New Features * MAJOR FEATURE UPDATE: Subsetting arguments `idxs`, `rows` and `cols` were added to all functions such that the calculations are performed on the requested subset while avoiding creating a subsetted copy, i.e. `rowVars(x, cols = 4:6)` is a much faster and more memory efficient version than `rowVars(x[, 4:6])` and even yet more efficient than `apply(x, MARGIN = 1L, FUN = var)`. These features were added by Dongcan Jiang, Peking University, with support from the Google Summer of Code program. A great thank you to Dongcan and to Google for making this possible. # Version 0.15.0 [2015-10-26] ## New Features * CONSISTENCY: Now all weight arguments (`w` and `W`) default to NULL, which corresponds to uniform weights. ## Code Refactoring * ROBUSTNESS: Importing **stats** functions in namespace. ## Bug Fixes * `weightedVar(x, w)` used the wrong bias correction factor resulting in an estimate that was tau too large, where `tau = ((sum(w) - 1) / sum(w)) / ((length(w) - 1) / length(w))`. Thanks to Wolfgang Abele for reporting and troubleshooting on this. * `weightedVar(x)` with `length(x) = 1` returned 0 - not NA. Same for `weightedSd()`. * `weightedMedian(x, w = NA_real_)` returned `x` rather than `NA_real_`. This only happened for `length(w) = 1`. * `allocArray(dim)` failed for `prod(dim) >= .Machine$integer.max`. ## Deprecated and Defunct * CLEANUP: Defunct argument `centers` for `col-`/`rowMads()`; use `center`. * `weightedVar(x, w, method = "0.14.2")` is deprecated. # Version 0.14.2 [2015-06-23] ## Bug Fixes * `x_OP_y()` and `t_tx_OP_y()` would return garbage on Solaris SPARC (and possibly other architectures as well) when input was integer and had missing values. # Version 0.14.1 [2015-06-17] ## Bug Fixes * `product(x, na.rm = FALSE)` for integer `x` with both zeros and NAs returned zero rather than NA. * `weightedMean(x, w, na.rm = TRUE)` did not handle missing values in `x` properly, if it was an integer. It would also return NaN if there were weights `w` with missing values, whereas `stats::weighted.mean()` would skip such data points. Now `weightedMean()` does the same. * `(col|row)WeightedMedians()` did not handle infinite weights as `weightedMedian()` does. * `x_OP_y(x, y, OP, na.rm = FALSE)` returned garbage iff `x` or `y` had missing values of type integer. * `rowQuantiles()` and `rowIQRs()` did not work for single-row matrices. Analogously for the corresponding column functions. * `rowCumsums()`, `rowCumprods()` `rowCummins()`, and `rowCummaxs()`, accessed out-of-bound elements for Nx0 matrices where N > 0. The corresponding column methods has similar memory errors for 0xK matrices where K > 0. * `anyMissing(list(NULL))` returned NULL; now FALSE. * `rowCounts()` resulted in garbage if a previous column had NAs (because it forgot to update index kk in such cases). * `rowCumprods(x)` handled missing values and zeros incorrectly for integer `x` (not double); a zero would trump an existing missing value causing the following cumulative products to become zero. It was only a zero that trumped NAs; any other integer would work as expected. Note, this bug was not in `colCumprods()`. * `rowAnys(x, value, na.rm = FALSE)` did not handle missing values in a numeric `x` properly. Similarly, for non-numeric and non-logical `x`, row- and `colAnys()`, row- and `colAlls()`, `anyValue()` and `allValue()` did not handle when `value` was a missing value. * All of the above bugs were identified and fixed by Dongcan Jiang (Peking University, China), who also added corresponding unit tests. # Version 0.14.0 [2015-02-13] ## Significant Changes * CLEANUP: `anyMissing()` is no longer an S4 generic. This was done as part of the migration of making all functions of **matrixStats** plain R functions, which minimizes calling overhead and it will also allow us to drop **methods** from the package dependencies. I've scanned all CRAN and Bioconductor packages depending on **matrixStats** and none of them relied on `anyMissing()` dispatching on class, so hopefully this move has little impact. The only remaining S4 methods are now `colMedians()` and `rowMedians()`. ## New Features * CONSISTENCY: Renamed argument `centers` of `col-`/`rowMads()` to `center`. This is consistent with `col-`/`rowVars()`. * CONSISTENCY: `col-`/`rowVars()` now use `na.rm = FALSE` as the default (`na.rm = TRUE` was mistakenly introduced as the default in v0.9.7). ## Performance and Memory * SPEEDUP: The check for user interrupts at the C level is now done less frequently of the functions. It does every k:th iteration, where `k = 2^20`, which is tested for using (`iter % k == 0`). It turns out, at least with the default compiler optimization settings that I use, that this test is 3 times faster if `k = 2^n` where n is an integer. The following functions checks for user interrupts: `logSumExp()`, `(col|row)LogSumExps()`, `(col|row)Medians()`, `(col|row)Mads()`, `(col|row)Vars()`, and `(col|row)Cum(Min|Max|prod|sum)s()`. * SPEEDUP: `logSumExp(x)` is now faster if `x` does not contain any missing values. It is also faster if all values are missing or the maximum value is +Inf - in both cases it can skip the actual summation step. ## Software Quality * ROBUSTNESS/TESTS: Package tests cover 96% of the code (was 91%). ## Code Refactoring * CLEANUP: Package no longer depends on **R.methodsS3**. ## Bug Fixes * `all()` and `any()` flavored methods on non-numeric and non-logical (e.g. character) vectors and matrices with `na.rm = FALSE` did not give results consistent with `all()` and `any()` if there were missing values. For example, with `x <- c("a", NA, "b")` we have `all(x == "a") == FALSE` and `any(x == "a") == TRUE`, whereas our corresponding methods would return NA in those cases. The methods fixed are `allValue()`, `anyValue()`, `col-`/`rowAlls()`, and `col-`/`rowAnys()`. Added more package tests to cover these cases. * `logSumExp(x, na.rm = TRUE)` would return NA if all values were NA and `length(x) > 1`. Now it returns -Inf for all `length(x)`:s. # Version 0.13.1 [2015-01-21] ## Bug Fixes * `diff2()` with `differences >= 3` would _read_ spurious values beyond the allocated memory. This error, introduced in 0.13.0, was harmless in the sense that the returned value was unaffected and still correct. Thanks to Brian Ripley and the CRAN check tools for catching this. I could reproduce it locally with valgrind. # Version 0.13.0 [2015-01-20] ## Significant Changes * SPEEDUP/CLEANUP: Turned several S3 and S4 methods into plain R functions, which decreases the overhead of calling the functions. After this there are no longer any S3 methods. Remaining S4 methods are `anyMissing()` and `rowMedians()`. ## New Features * Added `weightedMean()`, which is ~10 times faster than `stats::weighted.mean()`. * Added `count(x, value)` which is a notably faster than `sum(x == value)`. This can also be used to count missing values etc. * Added `allValue()` and `anyValue()` for `all(x == value)` and `any(x == value)`. * Added `diff2()`, which is notably faster than `base::diff()` for vectors, which it is designed for. * Added `iqrDiff()` and `(col|row)IqrDiffs()`. * CONSISTENCY: Now `rowQuantiles(x, na.rm = TRUE)` returns all NAs for rows with missing values. Analogously for `colQuantiles()`, `colIQRs()`, `rowIQRs()` and `iqr()`. Previously, all these functions gave an error saying missing values are not allowed. * COMPLETENESS: Added corresponding "missing" vector functions for already existing column and row functions. Similarly, added "missing" column and row functions for already existing vector functions, e.g. added `iqr()` and `count()` to complement already existing `(col|row)IQRs()` and `(col|row)Counts()` functions. * ROBUSTNESS: Now column and row methods give slightly more informative error messages if a data.frame is passed instead of a matrix. ## Documentation * Added vignette summarizing available functions. ## Performance and Memory * SPEEDUP: `(col|row)Diffs()` are now implemented in native code and notably faster than `diff()` for matrices. * SPEEDUP: Made `binCounts()` and `binMeans()` a bit faster. * SPEEDUP: Implemented `weightedMedian()` in native code, which made it ~3-10 times faster. Dropped support for `ties = "both"`, because it would have to return two values in case of ties, which made the API unnecessarily complicated. If really needed, then call the function twice with `ties = "min"` and `ties = "max"`. * SPEEDUP: `(col|row)Anys()` and `(col|row)Alls()` is now notably faster compared to previous versions. ## Code Refactoring * CLEANUP: In the effort of migrating `anyMissing()` into a plain R function, the specific `anyMissing()` implementations for data.frame:s and and list:s were dropped and is now handled by `anyMissing()` for `"ANY"`, which is the only S4 method remaining now. In a near future release, this remaining `"ANY"` method will turned into a plain R function and the current S4 generic will be dropped. We know of no CRAN and Bioconductor packages that rely on it being a generic function. Note also that since R (>= 3.1.0) there is a `base::anyNA()` function that does the exact same thing making `anyMissing()` obsolete. ## Bug Fixes * `weightedMedian(..., ties = "both")` would give an error if there was a tie. Added package test for this case. ## Deprecated and Defunct * `weightedMedian(..., ties = "both")` is now defunct. # Version 0.12.2 [2014-12-07] ## Bug Fixes * CODE FIX: The native code for `product()` on integer vector incorrectly used C-level `abs()` on intermediate values despite those being doubles requiring `fabs()`. Despite this, the calculated product would still be correct (at least when validated on several local setups as well as on the CRAN servers). Again, thanks to Brian Ripley for pointing out another invalid integer-double coercion at the C level. ## Deprecated and Defunct * `weightedMedian(..., interpolate = FALSE, ties = "both")` is defunct. # Version 0.12.1 [2014-12-06] ## Software Quality * ROBUSTNESS: Updated package tests to check methods in more scenarios, especially with both integer and numeric input data. ## Bug Fixes * `(col|row)Cumsums(x)` where `x` is integer would return garbage for columns (rows) containing missing values. * `rowMads(x)` where `x` is numeric (not integer) would give incorrect results for rows that had an _odd_ number of values (no ties). Analogously issues with `colMads()`. Added package tests for such cases too. Thanks to Brian Ripley and the CRAN check tools for (yet again) catching another coding mistake. Details: This was because the C-level calculation of the absolute value of residuals toward the median would use integer-based `abs()` rather than double-based `fabs()`. Now it `fabs()` is used when the values are double and `abs()` when they are integers. # Version 0.12.0 [2014-12-05] * Submitted to CRAN. # Version 0.11.9 [2014-11-26] ## New Features * Added `(col|row)Cumsums()`, `(col|row)Cumprods()`, `(col|row)Cummins()`, and `(col|row)Cummaxs()`. ## Bug Fixes * `(col|row)WeightedMeans()` with all zero weights gave mean estimates with values 0 instead of NaN. # Version 0.11.8 [2014-11-25] ## Performance and Memory * SPEEDUP: Implemented `(col|row)Mads()`, `(col|row)Sds()`, and `(col|row)Vars()` in native code. * SPEEDUP: Made `(col|row)Quantiles(x)` faster for `x` without missing values (and default `type = 7L` quantiles). It should still be implemented in native code though. * SPEEDUP: Made `rowWeightedMeans()` faster. ## Bug Fixes * `(col|row)Medians(x)` when `x` is integer would give invalid median values in case (a) it was calculated as the mean of two values ("ties"), and (b) the sum of those values where greater than `.Machine$integer.max`. Now such ties are calculated using floating point precision. Add lots of package tests. # Version 0.11.6 [2014-11-16] ## Performance and Memory * SPEEDUP: Now `(col|row)Mins()`, `(col|row)Maxs()`, and `(col|row)Ranges()` are implemented in native code providing a significant speedup. * SPEEDUP: Now `colOrderStats()` also is implemented in native code, which indirectly makes `colMins()`, `colMaxs()` and `colRanges()` faster. * SPEEDUP: `colTabulates(x)` no longer uses `rowTabulates(t(x))`. * SPEEDUP: `colQuantiles(x)` no longer uses `rowQuantiles(t(x))`. ## Deprecated and Defunct * CLEANUP: Argument `flavor` of `(col|row)Ranks()` is now ignored. # Version 0.11.5 [2014-11-15] ## Significant Changes * `(col|row)Prods()` now uses default `method = "direct"` (was `"expSumLog"`). ## Performance and Memory * SPEEDUP: Now `colCollapse(x)` no longer utilizes `rowCollapse(t(x))`. Added package tests for `(col|row)Collapse()`. * SPEEDUP: Now `colDiffs(x)` no longer uses `rowDiffs(t(x))`. Added package tests for `(col|row)Diffs()`. * SPEEDUP: Package no longer utilizes `match.arg()` due to its overhead; methods `sumOver()`, `(col|row)Prods()` and `(col|row)Ranks()` were updated. # Version 0.11.4 [2014-11-14] ## New Features * Added support for vector input to several of the row- and column methods as long as the "intended" matrix dimension is specified via argument `dim`. For instance, `rowCounts(x, dim = c(nrow, ncol))` is the same as `rowCounts(matrix(x, nrow, ncol))`, but more efficient since it avoids creating/allocating a temporary matrix. ## Performance and Memory * SPEEDUP: Now `colCounts()` is implemented in native code. Moreover, `(col|row)Counts()` are now also implemented in native code for logical input (previously only for integer and double input). Added more package tests and benchmarks for these functions. # Version 0.11.3 [2014-11-11] ## Significant Changes * Turned `sdDiff()`, `madDiff()`, `varDiff()`, `weightedSd()`, `weightedVar()` and `weightedMad()` into plain functions (were generic functions). ## Code Refactoring * Removed unnecessary usage of `::`. # Version 0.11.2 [2014-11-09] ## Significant Changes * SPEEDUP: Implemented `indexByRow()` in native code and it is no longer a generic function, but a regular function, which is also faster to call. The first argument of `indexByRow()` has been changed to `dim` such that one should use `indexByRow(dim(X))` instead of `indexByRow(X)` as in the past. The latter form is still supported, but deprecated. ## New Features * Added `allocVector()`, `allocMatrix()`, and `allocArray()` for faster allocation numeric vectors, matrices and arrays, particularly when filled with non-missing values. ## Deprecated and Defunct * Calling `indexByRow(X)` with a matrix `X` is deprecatated. Instead call it with `indexByRow(dim(X))`. # Version 0.11.1 [2014-11-07] ## New Features * Better support for long vectors. * PRECISION: Using greater floating-point precision in more internal intermediate calculations, where possible. ## Software Quality * ROBUSTNESS: Although unlikely, with long vectors support for `binCounts()` and `binMeans()` it is possible that a bin gets a higher count than what can be represented by an R integer (`.Machine$integer.max = 2^31-1`). If that happens, an informative warning is generated and the bin count is set to `.Machine$integer.max`. If this happens for `binMeans()`, the corresponding mean is still properly calculated and valid. ## Code Refactoring * CLEANUP: Cleanup and harmonized the internal C API such there are two well defined API levels. The high-level API is called by R via `.Call()` and takes care of most of the argument validation and construction of the return value. This function dispatch to functions in the low-level API based on data type(s) and other arguments. The low-level API is written to work with basic C data types only. ## Bug Fixes * Package incorrectly redefined `R_xlen_t` on R (>= 3.0.0) systems where `LONG_VECTOR_SUPPORT` is not supported. # Version 0.11.0 [2014-11-02] ## New Features * Added `sumOver()` and `meanOver()`, which are notably faster versions of `sum(x[idxs])` and `mean(x[idxs])`. Moreover, instead of having to do `sum(as.numeric(x))` to avoid integer overflow when `x` is an integer vector, one can do `sumOver(x, mode = "numeric")`, which avoids the extra copy created when coercing to numeric (this numeric copy is also twice as large as the integer vector). Added package tests and benchmark reports for these functions. # Version 0.10.4 [2014-11-01] ## Performance and Memory * SPEEDUP: Made `anyMissing()`, `logSumExp()`, `(col|row)Medians()`, and `(col|row)Counts()` slightly faster by making the native code assign the results directly to the native vector instead of to the R vector, e.g. `ansp[i] = v` where `ansp = REAL(ans)` instead of `REAL(ans)[i] = v`. * Added benchmark reports for `anyMissing()` and `logSumExp()`. # Version 0.10.3 [2014-10-01] ## Bug Fixes * `binMeans()` returned 0.0 instead of `NA_real_` for empty bins. # Version 0.10.2 [2014-09-01] ## Bug Fixes * On some systems, the package failed to build on R (<= 2.15.3) with compilation error: `"redefinition of typedef 'R_xlen_t'"`. # Version 0.10.1 [2014-06-09] ## Performance and Memory * Added benchmark reports for also non-**matrixStats** functions `col-`/`rowSums()` and `col-`/`rowMeans()`. * Now all `colNnn()` and `rowNnn()` methods are benchmarked in a combined report making it possible to also compare `colNnn(x)` with `rowNnn(t(x))`. # Version 0.10.0 [2014-06-07] ## Software Quality * Relaxed some packages tests such that they assert numerical correctness via `all.equal()` rather than `identical()`. * Submitted to CRAN. ## Bug Fixes * The package tests for `product()` incorrectly assumed that the value of `prod(c(NaN, NA))` is uniquely defined. However, as documented in `help("is.nan")`, it may be NA or NaN depending on R system/platform. # Version 0.9.7 [2014-06-05] ## Bug Fixes * Introduced a bug in v0.9.5 causing `col-`/`rowVars()` and hence also `col-`/`rowSds()` to return garbage. Add package tests for these now. * Submitted to CRAN. # Version 0.9.6 [2014-06-04] ## New Features * Added `signTabulate()` for tabulating the number of negatives, zeros, positives and missing values. For doubles, the number of negative and positive infinite values are also counted. ## Performance and Memory * SPEEDUP: Now `col-`/`rowProds()` utilizes new `product()` function. * SPEEDUP: Added `product()` for calculating the product of a numeric vector via the logarithm. # Version 0.9.5 [2014-06-04] ## Significant Changes * SPEEDUP: Made `weightedMedian()` a plain function (was an S3 method). * CLEANUP: Now only exporting plain functions and generic functions. * SPEEDUP: Turned more S4 methods into S3 methods, e.g. `rowCounts()`, `rowAlls()`, `rowAnys()`, `rowTabulates()` and `rowCollapse()`. ## New Features * Added argument `method` to `col-`/`rowProds()` for controlling how the product is calculated. ## Performance and Memory * SPEEDUP: Package is now byte compiled. * SPEEDUP: Made `rowProds()` and `rowTabulates()` notably faster. * SPEEDUP: Now `rowCounts()`, `rowAnys()`, `rowAlls()` and corresponding column methods can search for any value in addition to the default TRUE. The search for a matching integer or double value is done in native code, which is notably faster (and more memory efficient because it avoids creating any new objects). * SPEEDUP: Made `colVars()` and `colSds()` notably faster and `rowVars()` and `rowSds()` a slightly bit faster. * Added benchmark reports, e.g. `matrixStats:::benchmark("colMins")`. # Version 0.9.4 [2014-05-23] ## Significant Changes * SPEEDUP: Turned several S4 methods into S3 methods, e.g. `indexByRow()`, `madDiff()`, `sdDiff()` and `varDiff()`. # Version 0.9.3 [2014-04-26] ## New Features * Added argument `trim` to `madDiff()`, `sdDiff()` and `varDiff()`. # Version 0.9.2 [2014-04-04] ## Bug Fixes * The native code of `binMeans(x, bx)` would try to access an out-of-bounds value of argument `y` iff `x` contained elements that are left of all bins in `bx`. This bug had no impact on the results and since no assignment was done it should also not crash/core dump R. This was discovered thanks to new memtests (ASAN and valgrind) provided by CRAN. # Version 0.9.1 [2014-03-31] ## Bug Fixes * `rowProds()` would throw `"Error in rowSums(isNeg) : `x` must be an array of at least two dimensions"` on matrices where all rows contained at least one zero. Thanks to Roel Verbelen at KU Leuven for the report. # Version 0.9.0 [2014-03-26] ## New Features * Added `weighedVar()` and `weightedSd()`. # Version 0.8.14 [2013-11-23] ## Performance and Memory * MEMORY: Updated all functions to do a better job of cleaning out temporarily allocated objects as soon as possible such that the garbage collector can remove them sooner, iff wanted. This increase the chance for a smaller memory footprint. * Submitted to CRAN. # Version 0.8.13 [2013-10-08] ## New Features * Added argument `right` to `binCounts()` and `binMeans()` to specify whether binning should be done by (u,v] or [u,v). Added system tests validating the correctness of the two cases. ## Code Refactoring * Bumped up package dependencies. # Version 0.8.12 [2013-09-26] ## Performance and Memory * SPEEDUP: Now utilizing `anyMissing()` everywhere possible. # Version 0.8.11 [2013-09-21] ## Software Quality * ROBUSTNESS: Now importing `loadMethod` from **methods** package such that **matrixStats** S4-based methods also work when **methods** is not loaded, e.g. when `Rscript` is used, cf. Section 'Default packages' in 'R Installation and Administration'. * ROBUSTNESS: Updates package system tests such that the can run with only the **base** package loaded. # Version 0.8.10 [2013-09-15] ## Code Refactoring * CLEANUP: Now only importing two functions from the **methods** package. * Bumped up package dependencies. # Version 0.8.9 [2013-08-29] ## New Features * CLEANUP: Now the package startup message acknowledges argument `quietly` of `library()`/`require()`. # Version 0.8.8 [2013-07-29] ## Documentation * The dimension of the return value was swapped in `help("rowQuantiles")`. # Version 0.8.7 [2013-07-28] ## Performance and Memory * SPEEDUP: Made `(col|row)Mins()` and `(col|row)Maxs()` much faster. ## Bug Fixes * `rowRanges(x)` on an Nx0 matrix would give an error. Same for `colRanges(x)` on an 0xN matrix. Added system tests for these and other special cases. # Version 0.8.6 [2013-07-20] ## Code Refactoring * Bumped up package dependencies. ## Bug Fixes * Forgot to declare S3 methods `(col|row)WeightedMedians()`. # Version 0.8.5 [2013-05-25] ## Performance and Memory * Minor speedup of `(col|row)Tabulates()` by replacing `rm()` calls with NULL assignments. # Version 0.8.4 [2013-05-20] ## Documentation * CRAN POLICY: Now all Rd `\usage{}` lines are at most 90 characters long. # Version 0.8.3 [2013-05-10] ## Performance and Memory * SPEEDUP: `binCounts()` and `binMeans()` now uses Hoare's Quicksort for presorting `x` before counting/averaging. They also no longer test in every iteration (== for every data point) whether the last bin has been reached or not, but only after completing a bin. # Version 0.8.2 [2013-05-02] ## Documentation * Minor corrections and updates to help pages. # Version 0.8.1 [2013-05-02] ## Bug Fixes * Native code of `logSumExp()` used an invalid check for missing value of an integer argument. Detected by Brian Ripley upon CRAN submission. # Version 0.8.0 [2013-05-01] ## New Features * Added `logSumExp(lx)` and `(col|row)LogSumExps(lx)` for accurately computing of `log(sum(exp(lx)))` for standalone vectors, and row and column vectors of matrices. Thanks to Nakayama (Japan) for the suggestion and contributing a draft in R. # Version 0.7.1 [2013-04-23] ## New Features * Added argument `preserveShape` to `colRanks()`. For backward compatibility the default is `preserveShape = FALSE`, but it may change in the future. ## Bug Fixes * Since v0.6.4, `(col|row)Ranks()` gave the incorrect results for integer matrices with missing values. * Since v0.6.4, `(col|row)Medians()` for integers would calculate ties as `floor(tieAvg)`. # Version 0.7.0 [2013-01-14] ## New Features * Now `(col|row)Ranks()` support `"max"` (default), `"min"` and `"average"` for argument `ties.method`. Added system tests validation these cases. Thanks Peter Langfelder (UCLA) for contributing this. # Version 0.6.4 [2013-01-13] ## New Features * Added argument `ties.method` to `rowRanks()` and `colRanks()`, but still only support for `"max"` (as before). ## Code Refactoring * ROBUSTNESS: Lots of cleanup of the internal/native code. Native code for integer and double cases have been harmonized and are now generated from a common code template. This was inspired by code contributions from Peter Langfelder (UCLA). # Version 0.6.3 [2013-01-13] ## New Features * Added `anyMissing()` for data type `raw`, which always returns FALSE. ## Software Quality * ROBUSTNESS: Added system test for `anyMissing()`. * ROBUSTNESS: Now S3 methods are declared in the namespace. # Version 0.6.2 [2012-11-15] ## Software Quality * CRAN POLICY: Made `example(weightedMedian)` faster. # Version 0.6.1 [2012-10-10] ## Bug Fixes * In some cases `binCounts()` and `binMeans()` could try to go past the last bin resulting a core dump. * `binCounts()` and `binMeans()` would return random/garbage values for bins that were beyond the last data point. # Version 0.6.0 [2012-10-04] ## New Features * Added `binMeans()` for fast sample-mean calculation in bins. Thanks to Martin Morgan at the Fred Hutchinson Cancer Research Center, Seattle, for contributing the core code for this. * Added `binCounts()` for fast element counting in bins. # Version 0.5.3 [2012-09-10] ## Software Quality * CRAN POLICY: Replaced the `.Internal(psort(...))` call with a call to a new internal partial sorting function, which utilizes the native `rPsort()` part of the R internals. # Version 0.5.2 [2012-07-02] ## Code Refactoring * Updated package dependencies to match CRAN. # Version 0.5.1 [2012-06-25] ## New Features * GENERALIZATION: Now `(col|row)Prods()` handle missing values. ## Code Refactoring * Package now only imports the **methods** package. ## Bug Fixes * In certain cases, `(col|row)Prods()` would return NA instead of 0 for some elements. Added a redundancy test for the case. Thanks Brenton Kenkel at University of Rochester for reporting on this. # Version 0.5.0 [2012-04-16] ## New Features * Added `weightedMad()` from **aroma.core** v2.5.0. * Added `weightedMedian()` from **aroma.light** v1.25.2. ## Code Refactoring * This package no longer depends on the **aroma.light** package for any of its functions. * Now this package only imports **R.methodsS3**, meaning it no longer loads **R.methodsS3** when it is loaded. # Version 0.4.5 [2012-03-19] ## New Features * Updated the default argument `centers` of `rowMads()`/`colMads()` to explicitly be `(col|row)Medians(x,...)`. The default behavior has not changed. # Version 0.4.4 [2012-03-05] ## Software Quality * ROBUSTNESS: Added system/redundancy tests for `rowMads()`/`colMads()`. * CRAN: Made the system tests "lighter" by default, but full tests can still be run, cf. `tests/*.R` scripts. ## Bug Fixes * `colMads()` would return the incorrect estimates. This bug was introduced in **matrixStats** v0.4.0 (2011-11-11). # Version 0.4.3 [2011-12-11] ## Bug Fixes * `rowMedians(..., na.rm = TRUE)` did not handle NaN (only NA). The reason for this was the the native code used `ISNA()` to test for NA and NaN, but it should have been `ISNAN()`, which is opposite to how `is.na()` and `is.nan()` at the R level work. Added system tests for this case. # Version 0.4.2 [2011-11-29] ## New Features * Added `rowAvgsPerColSet()` and `colAvgsPerRowSet()`. # Version 0.4.1 [2011-11-25] ## Documentation * Added help pages with an example to `rowIQRs()` and `colIQRs()`. * Added example to `rowQuantiles()`. ## Bug Fixes * `rowIQRs()` and `colIQRs()` would return the 25% and the 75% quantiles, not the difference between them. Thanks Pierre Neuvial at CNRS, Evry, France for the report. # Version 0.4.0 [2011-11-11] ## Significant Changes * Dropped the previously introduced expansion of `center` in `rowMads()` and `colMads()`. It added unnecessary overhead if not needed. ## New Features * Added `rowRanks()` and `colRanks()`. Thanks Hector Corrada Bravo (University of Maryland) and Harris Jaffee (John Hopkins). # Version 0.3.0 [2011-10-13] ## Performance and Memory * SPEEDUP/LESS MEMORY: `colMedians(x)` no longer uses `rowMedians(t(x))`; instead there is now an optimized native-code implementation. Also, `colMads()` utilizes the new `colMedians()` directly. This improvement was kindly contributed by Harris Jaffee at Biostatistics of John Hopkins, USA. ## Software Quality * Added additional unit tests for `colMedians()` and `rowMedians()`. # Version 0.2.2 [2010-10-06] ## New Features * Now the result of `(col|row)Quantiles()` contains column names. # Version 0.2.1 [2010-04-05] ## New Features * Added a startup message when package is loaded. ## Code Refactoring * CLEANUP: Removed obsolete internal `.First.lib()` and `.Last.lib()`. # Version 0.2.0 [2010-03-30] ## Documentation * Fixed some incorrect cross references. # Version 0.1.9 [2010-02-03] ## Bug Fixes * `(col|row)WeightedMeans(..., na.rm = TRUE)` would incorrectly treat missing values as zeros. Added corresponding redundancy tests (also for the median case). Thanks Pierre Neuvial for reporting this. # Version 0.1.8 [2009-11-13] ## Bug Fixes * `colRanges(x)` would return a matrix of wrong dimension if `x` did not have any missing values. This would affect all functions relying on `colRanges()`, e.g. `colMins()` and `colMaxs()`. Added a redundancy test for this case. Thanks Pierre Neuvial at UC Berkeley for reporting this. * `(col|row)Ranges()` return a matrix with dimension names. # Version 0.1.7 [2009-06-20] ## Bug Fixes * WORKAROUND: Cannot use `"%#x"` in `rowTabulates()` when creating the column names of the result matrix. It gave an error OSX with R v2.9.0 devel (2009-01-13 r47593b) current the OSX server at R-forge. # Version 0.1.6 [2009-06-17] ## Documentation * Updated the help example for `rowWeightedMedians()` to run conditionally on **aroma.light**, which is only a suggested package - not a required one. This in order to prevent `R CMD check` to fail on CRAN, which prevents it for building binaries (as it currently happens on their OSX servers). # Version 0.1.5 [2009-02-04] ## Bug Fixes * For some errors in `rowOrderStats()`, the stack would not become UNPROTECTED before calling error. # Version 0.1.4 [2009-02-02] ## New Features * Added methods `(col|row)Weighted(Mean|Median)s()` for weighted averaging. ## Documentation * Added help to more functions. ## Software Quality * Package passes `R CMD check` flawlessly. # Version 0.1.3 [2008-07-30] ## New Features * Added `(col|row)Tabulates()` for integer and raw matrices. ## Bug Fixes * `rowCollapse()` was broken and returned the wrong elements. # Version 0.1.2 [2008-04-13] ## New Features * Added `(col|row)Collapse()`. * Added `varDiff()`, `sdDiff()`, and `madDiff()`. * Added `indexByRow()`. # Version 0.1.1 [2008-03-25] ## New Features * Added `(col|row)OrderStats()`. * Added `(col|row)Ranges()` and `(col|row)(Min|Max)s()`. * Added `colMedians()`. * Now `anyMissing()` support most data types as structures. # Version 0.1.0 [2007-11-26] ## New Features * Imported the `rowNnn()` methods from **Biobase**. * Created. matrixStats/MD50000644000176200001440000005423214535706561013127 0ustar liggesusers132c7b078e1213962091580584c20d89 *DESCRIPTION 27bd90e8e917d72efbffe86ec0c70adf *NAMESPACE 46a55511cb83e56f469e37de313f63d2 *NEWS.md 9e56f96eadfb5af7ac7f155b27f614a6 *R/000.DEPRECATION.R bb69907c26b811fcf67fddc0e9cd5b77 *R/999.package.R 22692aefe0cfed876cf940f760dfe1a8 *R/allocMatrix.R 0d8b18bc42dbf6155445a77805db90d8 *R/anyMissing.R 029a1ceab8c720d15f9c8887bf580b18 *R/benchmark.R e1ab7f274317f2c22753ea86d5c59619 *R/binCounts.R d949de2d5a1007e9b53ab269a50dbe01 *R/binMeans.R 87ec6bc8986b1d91e98f15ab0aaee4f9 *R/diff2.R 347eadcaf24f2a10ab26e8142aeba913 *R/indexByRow.R 8953d8ff68c0269108adeb3bf473041e *R/logSumExp.R cb915487b0215f1ea4468a8ab9a12b21 *R/mean2.R 53451895bb9b2462bc6f658234faa87f *R/product.R 9da47eff39df6e33896014f79961fd5c *R/psortKM.R 4d6be12119de0179d0b71b136c1e0248 *R/rowAlls.R 87d4f7fb3c618784a640d617fe6ce077 *R/rowAvgsPerColSet.R b4db4385ce2e0e18037a08cb5886d94a *R/rowCollapse.R c83f04c07539b410d1ac021ecdc1da9e *R/rowCounts.R 6ba0f86f654498c39e2affd5b819b2cf *R/rowCumsums.R 54fbe6bb2452ba8d25ce605860a4d301 *R/rowDiffs.R e7352ec13e29009259377161bc85b3d6 *R/rowIQRs.R c4c233f2c6cf2f91987eb7130d1f900d *R/rowLogSumExps.R 5c01f8b948ffc8ad459d14d1d807c157 *R/rowMads.R da7c9803c227be4f1549941004c28396 *R/rowMeans2.R a3e6cb9471b0dc0b1c726f3aa69a49cd *R/rowMedians.R 7099b6f3c253d0ebc408cabbfdb1777f *R/rowOrderStats.R 46c342486a77f75ac63cd695c88a234d *R/rowProds.R 1ac9f98d810941edb261633001d2a54a *R/rowQuantiles.R 854521c8eaa63282947ef0c98ed64cb4 *R/rowRanges.R 72e39bebbdc84c384ab025c5e5371fba *R/rowRanks.R 1fc3dc14c66677885089f0f234e7fc3c *R/rowSds.R a638520d99835eb4d246cc37fd4ef030 *R/rowSums2.R 738966fbfc70f6955a0f1096477f2900 *R/rowTabulates.R 923497d601ce9d99c13a54134b3b24ef *R/rowVars.R 3b7285bc6340220823e7f4d8e36ad316 *R/rowWeightedMeans.R 48fb4f14af2f8c4227ec069287822757 *R/rowWeightedMedians.R 797df1655bc7c5236c01e136c7b04b79 *R/signTabulate.R 524e9c3bf9863e13b5fb84b6fca2e64c *R/sum2.R 7b5a0987a69e87d0043f485a5a959075 *R/validateIndices.R 7c2b538fe145ec52d4169e0e15522b88 *R/varDiff.R 829d1dcbd748f137cd7fe765b21b2b27 *R/weightedMad.R c250fd34f40c5fc816f505eeff9e4839 *R/weightedMean.R 707dcc1630616260447f26a2feb85721 *R/weightedMedian.R 712b21f461fd1c475d0fa18285a491de *R/weightedVar.R 36d268136e18f67db93dc5649623a8da *R/x_OP_y.R 60481318214ab7ffbc6edd0e4957ec44 *R/zzz.R 36e53478d48fdbd6cbbb04292cc0011f *build/vignette.rds 9e4a729fee3a8de13a1fd2c09e90dfce *inst/WORDLIST ca773a01fc2df607bef2bf6ee72f01fb *inst/benchmarking/R/random-matrices.R 68d9d75e6b36f6bd61569553be4cb133 *inst/benchmarking/R/random-vectors.R e00cf911d14b0c6bc13bb17b3dd62f1e *inst/benchmarking/allocMatrix.md.rsp 81d8eb603b80c27bed9570bd49ae03f0 *inst/benchmarking/allocVector.md.rsp 87c66c03ab530a473a7cfab448e30664 *inst/benchmarking/anyMissing.md.rsp f97927a62ed926d0092677057692449f *inst/benchmarking/anyMissing_subset.md.rsp 86bfbd81cb938e1caa98d01bfc32849c *inst/benchmarking/binCounts.md.rsp 17aa0aa0a6be8f9359f1cb0f09286db2 *inst/benchmarking/binCounts_subset.md.rsp 4e7fff2c598adc6b0982245a5542dfd9 *inst/benchmarking/binMeans.md.rsp 3797dd738454272a3431c4979b39c0c9 *inst/benchmarking/binMeans_subset.md.rsp 3a3911d3e2b396d9f55015baef8f04f5 *inst/benchmarking/colRowAlls.md.rsp ffe602c8b8a9e9ce2aab09ad6b3e37d8 *inst/benchmarking/colRowAlls_subset.md.rsp 23f7cd533e7631799278786bca210bf9 *inst/benchmarking/colRowAnyMissings.md.rsp 69ab6c0d6c537a30b5550ad5c4a231b2 *inst/benchmarking/colRowAnyMissings_subset.md.rsp 73f26ffb9340049cd3818c63b622a549 *inst/benchmarking/colRowAnys.md.rsp 0098b1085ada124e68e8dfa7f7d3620f *inst/benchmarking/colRowAnys_subset.md.rsp 40d79f5b1e5f42e33318a5bcd36a554d *inst/benchmarking/colRowCounts.md.rsp 313513c280d51f3638c2cd6eebf650d5 *inst/benchmarking/colRowCounts_subset.md.rsp 2b1c4e6a925ee6443499c01c846ea04b *inst/benchmarking/colRowCummins.md.rsp 5261ab39cdf0673af2d3589071f00fb2 *inst/benchmarking/colRowCummins_subset.md.rsp e619768942c419a9e0628d0ef324a914 *inst/benchmarking/colRowCumprods.md.rsp 6f6045c415f73cdceef948a6978f5707 *inst/benchmarking/colRowCumprods_subset.md.rsp a8697a83212a1d59887f547e097104b2 *inst/benchmarking/colRowCumsums.md.rsp efba595e480cbd813311cc298d5e2c88 *inst/benchmarking/colRowCumsums_subset.md.rsp 927737181751c970fae9963e4e4bc370 *inst/benchmarking/colRowDiffs.md.rsp 4e74122ee4454982b84331ef7d7951e9 *inst/benchmarking/colRowDiffs_subset.md.rsp c20505dc8b1d26b04899a0a627ec7bd3 *inst/benchmarking/colRowLogSumExps.md.rsp 505c90416b1dedd80ce017b7ebae6d9b *inst/benchmarking/colRowLogSumExps_subset.md.rsp 24d973f08bca1b563257c1dc1a10544a *inst/benchmarking/colRowMads.md.rsp 54cb05bb7c246cca315156a83cd8087d *inst/benchmarking/colRowMads_subset.md.rsp e4362ddc6881e551ad8609d15dfa5430 *inst/benchmarking/colRowMeans2.md.rsp 1625e937116e331749caaf0b6f3f23c7 *inst/benchmarking/colRowMeans2_subset.md.rsp 10cc2739e1485f274478af45cc535386 *inst/benchmarking/colRowMedians.md.rsp 2149f29d22e6f6a5ca0b3952ea4b9521 *inst/benchmarking/colRowMedians_subset.md.rsp fdc8aea9103c8f029188621562fbd985 *inst/benchmarking/colRowMins.md.rsp f8e85f562710a8ab093c7c64c8b88597 *inst/benchmarking/colRowMins_subset.md.rsp eb98ef33bed1925407cfcf6a2b133b5d *inst/benchmarking/colRowOrderStats.md.rsp ae91cc26681c8784f56d078a6983feb9 *inst/benchmarking/colRowOrderStats_subset.md.rsp 3d8008f33266df3de8da082280f93fa1 *inst/benchmarking/colRowProds.md.rsp 66699301fd6260e59762b488bf8c13fe *inst/benchmarking/colRowProds_subset.md.rsp c8bd43b47ecb77a45536af8f9ad39949 *inst/benchmarking/colRowQuantiles.md.rsp 7f84b255abdcf984e7d845a999662587 *inst/benchmarking/colRowQuantiles_subset.md.rsp 88c7df3b546ffb3b96dd82d7d7b584c1 *inst/benchmarking/colRowRanges.md.rsp 3513026d04a7983083fb4e8f06b99362 *inst/benchmarking/colRowRanges_subset.md.rsp f4f381ce25f6718d064cb9cffa89db68 *inst/benchmarking/colRowRanks.md.rsp 11291514300345f2ff5428b5a216d498 *inst/benchmarking/colRowRanks_subset.md.rsp ad61d0827bffb9751d6d2836b85387db *inst/benchmarking/colRowSums2.md.rsp 0b3508d6488f5f133dcdd3b98753d961 *inst/benchmarking/colRowSums2_subset.md.rsp 50b104a7f39a08ea581f0964907333e6 *inst/benchmarking/colRowTabulates.md.rsp 113d098fd3a2b8ccfa2657dc67c50d1b *inst/benchmarking/colRowTabulates_subset.md.rsp b7029ee5fb5966559b19a3ed574546f7 *inst/benchmarking/colRowVars.md.rsp fd0f2f8afa805375950d79ea1fd7757e *inst/benchmarking/colRowVars_subset.md.rsp 02b139cf5d9e2eb4abe496b26a745d76 *inst/benchmarking/colRowWeightedMeans.md.rsp 5643a1fd128eb081a40fa632404d87ee *inst/benchmarking/colRowWeightedMeans_subset.md.rsp bbaedf22f44a0a1d038d2e3b89826f37 *inst/benchmarking/colRowWeightedMedians.md.rsp 9545c448dd47ea1314617936fbf5fc81 *inst/benchmarking/colRowWeightedMedians_subset.md.rsp ca9f0ec0b3fdba6f089b42f284eafbae *inst/benchmarking/count.md.rsp 58aaf19b7253bb01ccca1de1719b9d9e *inst/benchmarking/count_subset.md.rsp ab9f5049c780d39eca80e084f60cf68e *inst/benchmarking/includes/appendix.md.rsp 75a4dbe1cebc11442ce3f8626cb2f786 *inst/benchmarking/includes/footer.md.rsp 80fec8731611547e148434c4a8af80a7 *inst/benchmarking/includes/header.md.rsp 126e65a1ededbfcac666238fd271a0dd *inst/benchmarking/includes/references.md.rsp e4f6f67e64fa5e2dc270cd443add0053 *inst/benchmarking/includes/results.md.rsp f2400acb9800a049f23305a37cac9db3 *inst/benchmarking/includes/setup.md.rsp b02cc2f4ed76e14478c85429397ac129 *inst/benchmarking/index.md.rsp b9c2a2f843b653034c1654bc12d48fe5 *inst/benchmarking/indexByRow.md.rsp e55144f01e221ec010b701b273e164cc *inst/benchmarking/logSumExp.md.rsp 0c1b2fa6b417e39be2da23c774ff663b *inst/benchmarking/logSumExp_subset.md.rsp 1a6af4356e4e9edb2a59e9632e6d4615 *inst/benchmarking/madDiff.md.rsp 07ad9f539a5cee89cd0bdad8175990cf *inst/benchmarking/madDiff_subset.md.rsp a4dbef70e5817836b15dcf085deb426d *inst/benchmarking/mean2.md.rsp 9f87d271ce0a2ec1387c304ccdb128d9 *inst/benchmarking/mean2_subset.md.rsp 1402880c69595363a1e3c3c985bc013f *inst/benchmarking/product.md.rsp 62d982e249e0d8cfee8729d35af302ef *inst/benchmarking/product_subset.md.rsp 9c94919d6a73a46b27b97673f771afe1 *inst/benchmarking/sum2.md.rsp 18bdbfc55c0bff412054f683c4f26342 *inst/benchmarking/sum2_subset.md.rsp b744777ba0af2a8a50e5d1c9207e7177 *inst/benchmarking/t_tx_OP_y.md.rsp 03905f14b790eb96b401d934d953cfbd *inst/benchmarking/t_tx_OP_y_subset.md.rsp e8c65eec84796fa104ebd81eaf06baee *inst/benchmarking/varDiff.md.rsp 7673948c3ce870b5a2f9d777e1e94a16 *inst/benchmarking/varDiff_subset.md.rsp 4056928e8440de8793c172c949b1a64d *inst/benchmarking/weightedMean.md.rsp 414e2188ce3206a01b280c3786d187b2 *inst/benchmarking/weightedMean_subset.md.rsp 0d6e67649cafa21f08f3405456d0365e *inst/benchmarking/weightedMedian.md.rsp fa6e9a4a08145bce053c6e4d774e81a0 *inst/benchmarking/weightedMedian_subset.md.rsp e68f83709396b851d7c53e36561ed6de *inst/benchmarking/x_OP_y.md.rsp 2dd5fa9d134fc7d05ded89f7d458b1eb *inst/benchmarking/x_OP_y_subset.md.rsp 8aca63a6f160a4957408e179fa346c3f *inst/doc/matrixStats-methods.html b5d58ec253e5089b3b1ca4887eab8707 *inst/doc/matrixStats-methods.md.rsp c15095e33173fb3d53787d941d897b38 *man/allocMatrix.Rd 0cc157b8e006da349166ed27ee16e923 *man/anyMissing.Rd 1597c2bff07ae712ad7f8ae2f4463bda *man/binCounts.Rd 2b140889d7a66298178edc89991cb800 *man/binMeans.Rd aec03d002f48cd8e4c19f8e7ba169d58 *man/diff2.Rd dfec8268244c7ba31e5f05259de042fa *man/indexByRow.Rd 54b2a88dd66b4375d970bb60494a0d0b *man/logSumExp.Rd d43c64f9b7c712624975ec95c1f35da8 *man/matrixStats-package.Rd 5931291ee15bb5e5db7dab9444422ccb *man/mean2.Rd 5953527e5ac3883252a19503f5d52478 *man/rowAlls.Rd 023d7d19d51a9a5864586bf3ef5f2849 *man/rowAvgsPerColSet.Rd 08319b9868129455cc312a69647d0723 *man/rowCollapse.Rd 0f201288ca7eb10d514e321c717dbb7a *man/rowCounts.Rd 7e2a214413816dab7c0b6d00f7c5c0f2 *man/rowCumsums.Rd 7473796856244b03cab78f1bfd99305e *man/rowDiffs.Rd e6736e12befae7f142a2403623c1505c *man/rowIQRs.Rd 7d600b3251699c8729fa682e2a8149e2 *man/rowLogSumExps.Rd 980f22e8b7844c2afbfd530c7ec0bbb4 *man/rowMeans2.Rd 78f84751f21a6ddb0df6bad32a69e420 *man/rowMedians.Rd 64c043f4e0fe02d5466a660d9bff1b7c *man/rowOrderStats.Rd c04bff5493a121362846f5e2a17b3613 *man/rowProds.Rd 3a18056d1b223537a69a523f089892b0 *man/rowQuantiles.Rd 164db7d40cc72657b7466c42712f4ebc *man/rowRanges.Rd 5c772b3945042e05fd5b15845b121e80 *man/rowRanks.Rd 7e3508517f0380e7d4006853428ba162 *man/rowSds.Rd 3eb0accfb771bcdb27103dab1ed72ec5 *man/rowSums2.Rd 5a251d34bd909abf6e960b27861d2df2 *man/rowTabulates.Rd 8c87625fcb2591697ea86c3181c713c2 *man/rowVars.Rd cc6c848a44f9fbaefe76efd3d1f630f4 *man/rowWeightedMeans.Rd e80714b76e98403fd0f6e153d56b5023 *man/rowWeightedMedians.Rd aa7b600a019f2582cd7428cc7f0398f4 *man/signTabulate.Rd e97a29fa0ee367f395911358f52a56f6 *man/sum2.Rd d654c9f780cae199457ed8af88f89363 *man/validateIndices.Rd 2749f9c15eba0538b15258e2ee160acf *man/varDiff.Rd 18f57dbfaa32a51aea47655bc95490ed *man/weightedMad.Rd 1bf025f76a8aaffef5aa3cf680170670 *man/weightedMean.Rd 4baf1676a824eacb67ec185ab56d295e *man/weightedMedian.Rd f70df1d998b982bb30a50e9dcd4a337d *man/weightedVar.Rd 2925d3c7237ed81c4651bb5970d7ad48 *man/x_OP_y.Rd d5c9c33a99530c1b16370d4f9166a4cb *src/000.api.h fbfc7033e88493ee5ef6f77828fa9d21 *src/000.init.c 868a4fcddfaaac11ab9baa14ad540ff7 *src/000.macros.h 3591d2ca94a33b65f00d043d8336c7d1 *src/000.templates-gen-matrix-vector.h 707b5b0c13bd4613ba117a0d4f04a80a *src/000.templates-gen-matrix.h 16aca8455b398420b74fc43d6726d0cc *src/000.templates-gen-vector.h 33d90e5f5a19ed2f41e0edd84c83d25a *src/000.templates-types.h 78bbf931f308f8cf910cf70f646d4597 *src/000.templates-types_undef.h 1fe96f23b52c292f78943b4980eb0f33 *src/000.types.h ad0eb460a823c99bc125e5538abed0af *src/000.utils.h 9068a12abdd41b2e2e8ce7ae37ce26d3 *src/allocMatrix2.c c2c73f2dec822eaa223a8f439023e815 *src/anyMissing.c f707d1f7d1836a553ddcd23ea601dcd2 *src/anyMissing_lowlevel.h 8555b5ea1e89da20e8d78713949413c7 *src/anyMissing_lowlevel_template.h 63fa0abc483aa0794ebb3fd0c7d7b4cb *src/binCounts.c f026702ce73cfca5e4c129595a871b8c *src/binCounts_lowlevel.h d972d21de1f4216d249fe461d3b87a53 *src/binCounts_lowlevel_template.h b865609a2091b68604a2a7aa877d5560 *src/binMeans.c a50741688f74c650961f648bd4c8bba6 *src/binMeans_lowlevel.h c58cc3cf0939d9f3346396493f36ad46 *src/binMeans_lowlevel_template.h cbfca9292e9a23933720e6ad18bcd269 *src/colCounts.c c7deed2de5de842d0df91a129976f39d *src/colCounts_lowlevel.h 61d59e223a991ad47fe4e8026cdc33bc *src/colCounts_lowlevel_template.h 85721e1a83ad47abb806d8fa55b988ee *src/colOrderStats.c d0492740e45af2e94f5e2b0ebbf840e6 *src/colOrderStats_lowlevel.h 953656060629b45b8ba473643eaba4a4 *src/colOrderStats_lowlevel_template.h 15eee15b1bc5a27800290e1c16b35406 *src/colRanges.c df3954a274e510190521a95eedb686c9 *src/colRanges_lowlevel.h 1f3bd10344dad14a097f802ca5ecec4d *src/colRanges_lowlevel_template.h 87291facb330ef01c0be9244e13e39c4 *src/diff2.c fbb48f659c37341b609c77d851238c46 *src/diff2_lowlevel.h 0cc84d7d60cf7b6b91f5f42927bf1872 *src/diff2_lowlevel_template.h ffe52b4ba4ac5430dd3e6c492ee0e78d *src/indexByRow.c c558de7a23b63d611522b2fd3a4cdf5e *src/logSumExp.c 18b041e72633c98970002a39b667cd37 *src/logSumExp_lowlevel.h cae9f864841998e5f616b23740a857c3 *src/logSumExp_lowlevel_template.h 8b5f309e6da1250da2b6873a91e74375 *src/mean2.c 39ee6378eb30ac046610df48789ce405 *src/mean2_lowlevel.h 5aaba4f2fbbaba24ab3e10cafa7d7f72 *src/mean2_lowlevel_template.h f71aa7750fadf2c4ec6090b38088c28d *src/naming.c 0e401ec2a93138540c117eae001c6058 *src/naming.h f632de439cb4573c333e9cbad8c0f7f2 *src/productExpSumLog.c d8acfc11dbdb98f3f8554be84f754c37 *src/productExpSumLog_lowlevel.h 2aa7b1e6f177974e7e633510b2499929 *src/productExpSumLog_lowlevel_template.h a2e008bbd2037b65edec7e0d65724354 *src/psortKM.c 273149ca4ec4c44df5967b7e7bc892e2 *src/rowCounts.c 14e4dffc63c6b94cc0dc17867c9631f7 *src/rowCounts_lowlevel.h 4499cb4d7a05e1ef0e8afa79e14bda0e *src/rowCounts_lowlevel_template.h f930ec9acc2348145e7a00abaad2420e *src/rowCumMinMaxs_lowlevel_template.h 5eeefb1842a674c1ec8b92c8f3488334 *src/rowCummaxs.c 6cede01dc7456bb4d778f3f2aa818058 *src/rowCummaxs_lowlevel.h 71e257dbc0a017b064410e70765528e3 *src/rowCummins.c e21d4576135b6bf3644d52dba6dbd447 *src/rowCummins_lowlevel.h 7c8239a8789571db0d4c7967b8f9f353 *src/rowCumprods.c 555b037c7a4b91672f4f54b36768bc7e *src/rowCumprods_lowlevel.h 18b8ecc4aa44a5d67db312335f13573f *src/rowCumprods_lowlevel_template.h acaa0b2828ddd30c8fe30190fdeafbc1 *src/rowCumsums.c 99a5f91f4b6e56c9c875d9ada8a08861 *src/rowCumsums_lowlevel.h 546a91d20de6296ea182c6f36d185c92 *src/rowCumsums_lowlevel_template.h f8f34e7414890d350191b49ce4c60a13 *src/rowDiffs.c 9b3e976c672b5d789f2707f30c6aacf2 *src/rowDiffs_lowlevel.h b09cbdd1050d66619e53eee60325557b *src/rowDiffs_lowlevel_template.h 4e90fc6e298579e5f724ac3356242177 *src/rowLogSumExp.c 9c628e3d784944295fe77b927c0e2001 *src/rowLogSumExp_lowlevel.h 1b7d9ea6ef78f0b16443fd2b2ba0f4ad *src/rowLogSumExp_lowlevel_template.h 5d258c56097aa980b46efaeab58ff616 *src/rowMads.c 875565352ac1d7d5a92be8215a54fca9 *src/rowMads_lowlevel.h 3f32a234b99eeace309ad04aabc69ae7 *src/rowMads_lowlevel_template.h 9185fc9d08230b336e73549b0d6790ab *src/rowMeans2.c fa5f14a118c30b67915ef9a6e0013921 *src/rowMeans2_lowlevel.h 1dc908166d6484c4391d5257fd9123c9 *src/rowMeans2_lowlevel_template.h cb30b49750bbd465de084aea54f896ee *src/rowMedians.c 348ee5f226a180488b3d6f76fd01707a *src/rowMedians_lowlevel.h 1590cc765379e65d1878908448bebf73 *src/rowMedians_lowlevel_template.h 01b210068410fdf842fa972ce5e9d2f2 *src/rowOrderStats.c 8652eb4cd0030805139cb455984774ee *src/rowOrderStats_lowlevel.h 082b44d5aa35a6163b9705b377673f5e *src/rowOrderStats_lowlevel_template.h 3344388a6e52e0cbb4dbf8f94822868e *src/rowRanges.c 325fe6d1f3fdb08318f21a86c5e738af *src/rowRanges_lowlevel.h bde138be2b70427d3e144945481bb90e *src/rowRanges_lowlevel_template.h 3d2c0168440d36531c857de45a837697 *src/rowRanksWithTies.c a7a56fe7c92d4504d87233de18d647a3 *src/rowRanksWithTies_lowlevel.h 995c050fcab76aee26537ddc05b61de8 *src/rowRanksWithTies_lowlevel_template.h ff43ae0070f33daff9e45772be061653 *src/rowSums2.c 209c226ca99239c059a2193017e1ef46 *src/rowSums2_lowlevel.h b1613699ebf2070fa72e31a12f427296 *src/rowSums2_lowlevel_template.h f6e8906a531d861d74cfd7d13eaae583 *src/rowVars.c 5efe9852f5b52191bb51817bc49d8c31 *src/rowVars_lowlevel.h 1ecd8207adc70792abfd2210e1af0cb3 *src/rowVars_lowlevel_template.h e3fdf9de1d5ba1b16192e0a49b46b01f *src/signTabulate.c efdabd29810bb191c4868c991633f56d *src/signTabulate_lowlevel.h 93403bf228195260e1925069be1db01f *src/signTabulate_lowlevel_template.h 6468bb525177166b617e0963535745ec *src/sum2.c dcb3116940b2507193e05ec2b02b9e15 *src/sum2_lowlevel.h eb04dd7cf21c0db9c6fe643f6d99f6c5 *src/sum2_lowlevel_template.h 45a8a42762dc22d1f0f1ec75213165f9 *src/validateIndices.c 673e1caf745e7aa9613f5e62dd292cea *src/validateIndices_lowlevel.h ea4a7b5c1eaa0c5625be4d386e397d1e *src/validateIndices_lowlevel_template.h 314c4dda14dcb3f9411b7587c6111812 *src/weightedMean.c ab1fd15c3c860a3f7370d982352c635d *src/weightedMean_lowlevel.h a2497f864ae32df0889b7450d0806a8d *src/weightedMean_lowlevel_template.h d59495e09b776912b33e9b544cda1d9f *src/weightedMedian.c 7b0bdaf3f4ce49dee9e8cb68fde02b64 *src/weightedMedian_lowlevel.h 2cf8f019799524a795245c88a564d11b *src/weightedMedian_lowlevel_template.h 7f9e55ba2d6ac7fd2b2ae92e3d1e0dae *src/x_OP_y.c bc72f301efc6a3f271407894ad75c929 *src/x_OP_y_lowlevel.h cbb3efa1f4b80fdf568d5d979bc10d9d *src/x_OP_y_lowlevel_template.h 631bd7ebf66662a7a83d3695cdd98149 *tests/allocArray.R 26b24f578ea01425c6575469f4858b05 *tests/allocMatrix.R 1c7839cfd44c26855ea1aa654105748d *tests/allocVector.R bd72ad4ad55040aaff979fdfbe2e593e *tests/anyMissing.R 67fb9105a0b3ace0bc15046f90bbd5da *tests/anyMissing_subset.R a1e2e2c6a832f0e0ce4fd3c55a7e2f0e *tests/benchmark.R feb57eb54180362c581b6fe53b500b29 *tests/binCounts.R 099a8ddd1b5e46d307a624d87fc12169 *tests/binCounts_subset.R c7f7cea187264d3fb7152ef5bb3e94d1 *tests/binMeans,binCounts.R e0b1c88a59eaf59571a6af74d26b55d8 *tests/binMeans,binCounts_subset.R 14f1b37ac91da690e225bad78a1a632b *tests/count.R 880706ea6c5cfc1d213df9ee5b758211 *tests/count_subset.R 5c28add3909391972e74a9f924a1bd27 *tests/diff2.R 20b7f6f96f3c1c18f87e5bdf3db94800 *tests/diff2_subset.R 02750ad0adf3675733e3f1006a703a3b *tests/indexByRow.R 3367c853594ca5ff2cb145a7b4ba9670 *tests/logSumExp.R 4c911012825f2d15f5d8abd1abc411fb *tests/logSumExp_subset.R bc615a404d2aa4ac9ff678afbc2a8e67 *tests/mean2.R 028eee10d3e9da4822795bf96e47b1f4 *tests/mean2_subset.R d5c028c3973250bbfd46fde0f6201609 *tests/product.R e0bb296810a798ae8d481f2402e70846 *tests/product_subset.R 16228ebb15d78d1d8a80b9342e4c88d3 *tests/psortKM.R d90820f7ae871ec41b14b45e7c9c38d4 *tests/rowAllAnys.R 492c72edc45859a6d80bb7ae64b3b3a8 *tests/rowAllAnys_subset.R df340cb87673d51c7f7912f4e42dddc6 *tests/rowAvgsPerColSet.R 0689281bc5dcadfb63cb6e4a8b9ca719 *tests/rowAvgsPerColSet_subset.R ed8837cb1f7f49f10da8a0a15564271f *tests/rowCollapse.R dc23f780f4ad38c30813fde16cb01989 *tests/rowCollapse_subset.R 48b30430b9e5cbbe9d5b7321ccb3fae9 *tests/rowCounts.R 792de6b9615b3ccb51400557ee5ef6c9 *tests/rowCounts_subset.R e9f20ed32b4a2407705ac03876f32391 *tests/rowCumMinMaxs.R f27b6219b3bde96019ceb2f173233f1d *tests/rowCumMinMaxs_subset.R b3be3db9228382d0d4183a090156dcea *tests/rowCumprods.R 7ccfdf66c74900b797f626f414c30df1 *tests/rowCumprods_subset.R 58b4133d186c3b49618c27aa4fd36475 *tests/rowCumsums.R 891dc612ffb88eb51120daec35e7411f *tests/rowCumsums_subset.R 39216a7769c530b9965f8bbd74c245b0 *tests/rowDiffs.R fcad50619ae1b08adbc1fd4ff5b2d5fc *tests/rowDiffs_subset.R 2c62a58f350493ac0fcac7f46e7320b9 *tests/rowIQRs.R 75f98de960a95d15a7b8d111f8f39f29 *tests/rowIQRs_subset.R ee261cc354cb9960ff2ae362bd751309 *tests/rowLogSumExps.R 18ac9949702145182516f427ca4f33bf *tests/rowLogSumExps_subset.R 767cc500312e8cdd6ca9dba122342e2a *tests/rowMads.R cddf3d9a766c89bd4a27f83ac2de29ed *tests/rowMads_subset.R a994b05d2d920a5c7f8300a98c0c151e *tests/rowMeans2.R 65154251d226d6689a695179c99dde6d *tests/rowMeans2_subset.R 482c67a42669890c4afe5ecdd013b47e *tests/rowMedians.R 5c8be132a6ad40ef5b029a07b68a055d *tests/rowMedians_subset.R 98e97156e59bfa3894038a0802b82006 *tests/rowOrderStats.R b72cc7cf804bd1e7b5257e51d82c6bfc *tests/rowOrderStats_subset.R 885d64086fd4caff50c8310619047ac2 *tests/rowProds.R 274a62399d522d4bbf307e793eaa3c05 *tests/rowProds_subset.R 3649c0d5bac8e9bbbc1dc4d7c1a26754 *tests/rowQuantiles.R 41f367ffc4d68dd305e98fc9d27d18fc *tests/rowQuantiles_subset.R 97bdb895e3205ed502d01cfbde5be76b *tests/rowRanges.R 4ec3bdc4c3cbd12d6edfacb80bba75ae *tests/rowRanges_subset.R 5fce573a1155bba8d27c5bfc7e1ebd11 *tests/rowRanks.R 9d16a6c7bff1f466ee05c7e12d5d7de5 *tests/rowRanks_subset.R 374683458d2c5b4a79bb9ca9eba05a91 *tests/rowSds.R f90a4f8041a8bc87f64d3ca5eed1c716 *tests/rowSds_subset.R 3bce99530ab3c0bd839577bce9da0947 *tests/rowSums2.R d52f6dce8c919af0885ce36950562d44 *tests/rowSums2_subset.R 0323756be25d8ed3dfbd05cde0647d62 *tests/rowTabulates.R ebcc44022e7f6d16ffa8347ce7547230 *tests/rowTabulates_subset.R 59a7d90222ef1df964ccf3c611dbecc1 *tests/rowVarDiffs.R 90ec9a31fa0a525d8c582be86e5d6c9e *tests/rowVarDiffs_mad,iqr_subset.R 75219f38b5913d2917a4c2a921b63703 *tests/rowVarDiffs_var,sd_subset.R fc2ccc064f73e46371a51b71fd134e6f *tests/rowVars.R cb326498a7f1690429134a76b7588e0d *tests/rowVars_subset.R fb4a776c02357dd4bd0625bf2940bcfe *tests/rowWeightedMeans.R 49733bbc3fcc0a5af4ea91d9b88d677f *tests/rowWeightedMeans_subset.R e884ce92b8e0c7a244dae6d57f989cc4 *tests/rowWeightedMedians.R 9650574644d77f0162db510e74d641d1 *tests/rowWeightedMedians_subset.R 3fac06edaca17f6485288051092e615b *tests/rowWeightedVars.R 0af8ce246411ebc2efaf3116b05360b6 *tests/rowWeightedVars_subset.R d3d529257f5e212fe44e4d7edec07f28 *tests/signTabulate.R fb3a48080d0a12e86997c1b96a9cf68c *tests/signTabulate_subset.R a45c9c4723df1e16ccc9849bf407b6a9 *tests/sum2.R 5b542504aabd2cf782b11d5987ca6b93 *tests/sum2_subset.R e0044a74c4d6585bd2071a8097128fbe *tests/utils/validateIndicesFramework.R 0b25e503226f9dd53ba3bbf84137f47c *tests/validateIndices.R 62dff0f6f8cd327315ab719bc2b4abdc *tests/varDiff_etal.R 776e5c2aac0f37ee8e9ee0bfd5625bf6 *tests/varDiff_etal_subset.R 7ae3041de11cbc9b7d7d990357ecb829 *tests/weightedMean.R f45aabcd7da19289147c9fe12dd41566 *tests/weightedMean_subset.R e5785aced77e842fe7c1d35781976bab *tests/weightedMedian.R 31af6e65986eed830fa17824e002b9bc *tests/weightedMedian_subset.R be4415d793eb8bd7a3d1bf5906abccb8 *tests/weightedVar.R 66f915b07f523fd74bc54cb2139c34b8 *tests/weightedVar_etal.R 115fd03bc3822bf1be54f36dc2cca212 *tests/weightedVar_etal_subset.R 53ebd8c7218cea409ff3e4a81bf812ae *tests/x_OP_y.R a32e4d6d831823488d07ae2b5e9117e3 *tests/x_OP_y_subset.R ae02684a77e833bf48a7a1e4b3b10dca *tests/zzz.package-unload.R b5d58ec253e5089b3b1ca4887eab8707 *vignettes/matrixStats-methods.md.rsp matrixStats/inst/0000755000176200001440000000000014535664471013571 5ustar liggesusersmatrixStats/inst/doc/0000755000176200001440000000000014535664471014336 5ustar liggesusersmatrixStats/inst/doc/matrixStats-methods.html0000644000176200001440000002232414535664471021213 0ustar liggesusers matrixStats: Summary of functions

matrixStats: Summary of functions

matrixStats: Summary of functions

Henrik Bengtsson on NA

Location and scale estimators

Estimator Functions Example
Weighted sample mean weightedMean(), colWeightedMeans(), rowWeightedMeans() weightedMean(x, w); rowWeightedMeans(x, w)
Mean mean2(), colMeans2(), rowMeans2() mean2(x); rowMeans2(x)
Median median(), colMedians(), rowMedians() median(x); rowMedians(x)
Weighted median weightedMedian(), colWeightedMedians(), rowWeightedMedians() weightedMedian(x, w); rowWeightedMedians(x, w)
Sample variance var(), colVars(), rowVars() var(x); rowVars(x)
Weighted sample variance weightedVar(), colWeightedVars(), rowWeightedVars() weightedVar(x, w), rowWeightedVars(x, w)
Sample variance by n-order differences varDiff(), colVarDiffs(), rowVarDiffs() varDiff(x); rowVarDiffs(x)
Sample standard deviation sd(), colSds(), rowSds() sd(x); rowSds(x)
Weighted sample deviation weightedSd(), colWeightedSds(), rowWeightedSds() weightedSd(x, w), rowWeightedSds(x, w)
Sample standard deviation by n-order differences sdDiff(), colSdDiffs(), rowSdDiffs() sdDiff(x); rowSdDiffs(x)
Median absolute deviation (MAD) mad(), colMads(), rowMads() mad(x); rowMads(x)
Weighted median absolute deviation (MAD) weightedMad(), colWeightedMads(), rowWeightedMads() weightedMad(x, w), rowWeightedMads(x, w)
Median absolute deviation (MAD) by n-order differences madDiff(), colMadDiffs(), rowMadDiffs() madDiff(x); rowMadDiffs()
Quantile quantile(), colQuantiles(), rowQuantiles() quantile(x, probs); rowQuantiles(x, probs)
Interquartile range (IQR) iqr(), colIQRs(), rowIQRs() iqr(x); rowIQRs(x)
Interquartile range (IQR) by n-order differences iqrDiff(), colIQRDiffs(), rowIQRDiffs() iqrDiff(x); rowIQRDiffs(x)
Range range(), colRanges(), rowRanges() range(x); rowRanges(x)
Minimum min(), colMins(), rowMins() min(x); rowMins(x)
Maximum max(), colMaxs(), rowMaxs() max(x); rowMaxs(x)

Testing for and counting values

Operator Functions Example
Are there any missing values? anyMissing(), colAnyMissings(), rowAnyMissings() anyMissing(x); rowAnyMissings(x)
Does TRUE exists? any(), colAnys(), rowAnys() any(x); rowAnys(x)
Are all values TRUE? all(), colAlls(), rowAlls() all(x); rowAlls(x)
Does value exists? anyValue(), colAnys(), rowAnys() anyValue(x, value); rowAnys(x, value)
Do all elements have a given value? allValue(), colAlls(), rowAlls() allValue(x, value); rowAlls(x, value)
Number of occurrences of a value? count(), colCounts(), rowCounts() count(x, value); rowCounts(x, value)

Cumulative functions

Operator Functions Example
Cumulative sum cumsum(), colCumsums(), rowCumsums() cumsum(x); rowCumsums(x)
Cumulative product cumprod(), colCumprods(), rowCumprods() cumprod(x); rowCumprods(x)
Cumulative minimum cummin(), colCummins(), rowCummins() cummin(x); rowCummins(x)
Cumulative maximum cummax(), colCummaxs(), rowCummaxs() cummax(x); rowCummaxs(x)

Binning

Estimator Functions Example
Counts in disjoint bins binCounts() binCounts(x, bx)
Sample means (and counts) in disjoint bins binMeans() binMeans(y, x, bx)

Miscellaneous

Operation Functions Example
Sum sum2(), colSums2(), rowSums2() sum2(x); rowSums2(x)
Lagged differences diff2(), colDiffs(), rowDiffs() diff2(x), rowDiffs(x)

matrixStats v1.2.0. Release: CRAN, Development: GitHub.

matrixStats/inst/doc/matrixStats-methods.md.rsp0000644000176200001440000002132414372747711021447 0ustar liggesusers<%@meta language="R-vignette" content="-------------------------------- DIRECTIVES FOR R: %\VignetteIndexEntry{matrixStats: Summary of functions} %\VignetteAuthor{Henrik Bengtsson} %\VignetteKeyword{matrix} %\VignetteKeyword{vector} %\VignetteKeyword{apply} %\VignetteKeyword{rows} %\VignetteKeyword{columns} %\VignetteKeyword{memory} %\VignetteKeyword{speed} %\VignetteEngine{R.rsp::rsp} %\VignetteTangle{FALSE} --------------------------------------------------------------------"%> <% pkgName <- "matrixStats" library(pkgName, character.only=TRUE) ns <- getNamespace(pkgName) env <- as.environment(sprintf("package:%s", pkgName)) R.utils::use("R.utils") kable <- function(df, ...) { fcns <- as.character(df$Functions) fcns <- strsplit(fcns, split=",") fcns <- sapply(fcns, FUN=function(names) { names <- trim(names) ok <- sapply(names, FUN=exists, envir=ns, mode="function") names[ok] <- sprintf("%s()", names[ok]) names[!ok] <- sprintf("~~%s()~~", names[!ok]) names <- paste(names, collapse=", ") }) df$Functions <- fcns df$Example <- sprintf("`%s`", df$Example) print(knitr::kable(df, ..., format="markdown")) } # Find all functions all <- ls(envir=env) keep <- sapply(all, FUN=function(name) { is.function(get(name, envir=env)) }) all <- all[keep] keep <- !grepl("[.]([^.]*)$", all) all <- all[keep] # Hidden functions skip <- c("rowAvgsPerColSet", "colAvgsPerRowSet") skip <- c(skip, "allocArray", "allocMatrix", "allocVector") all <- setdiff(all, skip) # Column and row functions crfcns <- grep("^(col|row)", all, value=TRUE) # Vector functions vfcns <- setdiff(all, crfcns) %> # <%@meta name="title"%> <% pkg <- R.oo::Package(pkgName) %> <%@meta name="author"%> on <%=format(as.Date(pkg$date), format="%B %d, %Y")%> <% fcns <- crfcns base <- gsub("^(col|row)", "", fcns) groups <- tapply(fcns, base, FUN=list) stopifnot(all(sapply(groups, FUN=length) == 2L)) groups <- matrix(unlist(groups, use.names=FALSE), nrow=2L) %> <%--- ## Functions that apply to column and rows of matrices ``` <% print(fcns) %> ``` ---%> <% fcns <- vfcns %> <%--- ## Functions that apply to vectors ``` <% print(fcns) %> ``` ---%> ## Location and scale estimators <% tbl <- NULL row <- data.frame( "Estimator" = "Weighted sample mean", "Functions" = "weightedMean, colWeightedMeans, rowWeightedMeans", "Example" = "weightedMean(x, w); rowWeightedMeans(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Mean", "Functions" = "mean2, colMeans2, rowMeans2", "Example" = "mean2(x); rowMeans2(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median", "Functions" = "median, colMedians, rowMedians", "Example" = "median(x); rowMedians(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median", "Functions" = "weightedMedian, colWeightedMedians, rowWeightedMedians", "Example" = "weightedMedian(x, w); rowWeightedMedians(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance", "Functions" = "var, colVars, rowVars", "Example" = "var(x); rowVars(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample variance", "Functions" = "weightedVar, colWeightedVars, rowWeightedVars", "Example" = "weightedVar(x, w), rowWeightedVars(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance by n-order differences", "Functions" = "varDiff, colVarDiffs, rowVarDiffs", "Example" = "varDiff(x); rowVarDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation", "Functions" = "sd, colSds, rowSds", "Example" = "sd(x); rowSds(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample deviation", "Functions" = "weightedSd, colWeightedSds, rowWeightedSds", "Example" = "weightedSd(x, w), rowWeightedSds(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation by n-order differences", "Functions" = "sdDiff, colSdDiffs, rowSdDiffs", "Example" = "sdDiff(x); rowSdDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD)", "Functions" = "mad, colMads, rowMads", "Example" = "mad(x); rowMads(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median absolute deviation (MAD)", "Functions" = "weightedMad, colWeightedMads, rowWeightedMads", "Example" = "weightedMad(x, w), rowWeightedMads(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD) by n-order differences", "Functions" = "madDiff, colMadDiffs, rowMadDiffs", "Example" = "madDiff(x); rowMadDiffs()" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Quantile", "Functions" = "quantile, colQuantiles, rowQuantiles", "Example" = "quantile(x, probs); rowQuantiles(x, probs)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR)", "Functions" = "iqr, colIQRs, rowIQRs", "Example" = "iqr(x); rowIQRs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR) by n-order differences", "Functions" = "iqrDiff, colIQRDiffs, rowIQRDiffs", "Example" = "iqrDiff(x); rowIQRDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Range", "Functions" = "range, colRanges, rowRanges", "Example" = "range(x); rowRanges(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Minimum", "Functions" = "min, colMins, rowMins", "Example" = "min(x); rowMins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Maximum", "Functions" = "max, colMaxs, rowMaxs", "Example" = "max(x); rowMaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Testing for and counting values <% tbl <- NULL row <- data.frame( "Operator" = "Are there any missing values?", "Functions" = "anyMissing, colAnyMissings, rowAnyMissings", "Example" = "anyMissing(x); rowAnyMissings(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does TRUE exists?", "Functions" = "any, colAnys, rowAnys", "Example" = "any(x); rowAnys(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Are all values TRUE?", "Functions" = "all, colAlls, rowAlls", "Example" = "all(x); rowAlls(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does value exists?", "Functions" = "anyValue, colAnys, rowAnys", "Example" = "anyValue(x, value); rowAnys(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Do all elements have a given value?", "Functions" = "allValue, colAlls, rowAlls", "Example" = "allValue(x, value); rowAlls(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Number of occurrences of a value?", "Functions" = "count, colCounts, rowCounts", "Example" = "count(x, value); rowCounts(x, value)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Cumulative functions <% tbl <- NULL row <- data.frame( "Operator" = "Cumulative sum", "Functions" = "cumsum, colCumsums, rowCumsums", "Example" = "cumsum(x); rowCumsums(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative product", "Functions" = "cumprod, colCumprods, rowCumprods", "Example" = "cumprod(x); rowCumprods(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative minimum", "Functions" = "cummin, colCummins, rowCummins", "Example" = "cummin(x); rowCummins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative maximum", "Functions" = "cummax, colCummaxs, rowCummaxs", "Example" = "cummax(x); rowCummaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Binning <% tbl <- NULL row <- data.frame( "Estimator" = "Counts in disjoint bins", "Functions" = "binCounts", "Example" = "binCounts(x, bx)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample means (and counts) in disjoint bins", "Functions" = "binMeans", "Example" = "binMeans(y, x, bx)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Miscellaneous <% tbl <- NULL row <- data.frame( "Operation" = "Sum", "Functions" = "sum2, colSums2, rowSums2", "Example" = "sum2(x); rowSums2(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operation" = "Lagged differences", "Functions" = c("diff2, colDiffs, rowDiffs"), "Example" = "diff2(x), rowDiffs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ------------------------------------------------------------- <%=pkgName%> v<%=getVersion(pkg)%>. Release: [CRAN](https://cran.r-project.org/package=<%=pkgName%>), Development: [GitHub](<%=getUrl(pkg)%>). matrixStats/inst/benchmarking/0000755000176200001440000000000014372747711016217 5ustar liggesusersmatrixStats/inst/benchmarking/colRowMedians.md.rsp0000644000176200001440000000274714372747711022124 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMedians"%> <%@string rowname="rowMedians"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + median() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMedians = colMedians(X, na.rm = FALSE), "apply+median" = apply(X, MARGIN = 2L, FUN = median, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMedians = rowMedians(X, na.rm = FALSE), "apply+median" = apply(X, MARGIN = 1L, FUN = median, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowLogSumExps_subset.md.rsp0000644000176200001440000000336314372747711024172 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colLogSumExps"%> <%@string rowname="rowLogSumExps"%> <%@string fcnname="colRowLogSumExps_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colLogSumExps_X_S" = colLogSumExps(X_S, na.rm = FALSE), "colLogSumExps(X, rows, cols)" = colLogSumExps(X, rows = rows, cols = cols, na.rm = FALSE), "colLogSumExps(X[rows, cols])" = colLogSumExps(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowLogSumExps_X_S" = rowLogSumExps(X_S, na.rm = FALSE), "rowLogSumExps(X, cols, rows)" = rowLogSumExps(X, rows = cols, cols = rows, na.rm = FALSE), "rowLogSumExps(X[cols, rows])" = rowLogSumExps(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/mean2_subset.md.rsp0000644000176200001440000000310014372747711021725 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="mean2_subset"%> <%@string subname="mean2"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) ##data <- data[1:3] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "mean2_x_S" = mean2(x_S, refine = TRUE), "mean2_x_S_no_refine" = mean2(x_S, refine = FALSE), "mean2(x, idxs)" = mean2(x, idxs = idxs, refine = TRUE), "mean2_no_refine(x, idxs)" = mean2(x, idxs = idxs, refine = FALSE), "mean2(x[idxs])" = mean2(x[idxs], refine = TRUE), "mean2_no_refine(x[idxs])" = mean2(x[idxs], refine = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowWeightedMeans.md.rsp0000644000176200001440000000272614372747711023265 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colWeightedMeans"%> <%@string rowname="rowWeightedMeans"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + weighted.mean() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] w <- runif(nrow(X)) gc() colStats <- microbenchmark( colWeightedMeans = colWeightedMeans(X, w = w, na.rm = FALSE), "apply+weigthed.mean" = apply(X, MARGIN = 2L, FUN = weighted.mean, w = w, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowWeightedMeans = rowWeightedMeans(X, w = w, na.rm = FALSE), "apply+weigthed.mean" = apply(X, MARGIN = 1L, FUN = weighted.mean, w = w, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowOrderStats.md.rsp0000644000176200001440000000346414372747711022633 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colOrderStats"%> <%@string rowname="rowOrderStats"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> <% use("Biobase", how = "load") rowQ <- Biobase::rowQ %> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + quantile(..., type = 3L) * Biobase::rowQ() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() probs <- 0.3 which <- round(probs*nrow(X)) colStats <- microbenchmark( colOrderStats = colOrderStats(X, which = which, na.rm = FALSE), "apply+quantile" = apply(X, MARGIN = 2L, FUN = quantile, probs = probs, na.rm = FALSE, type = 3L), "rowQ(t(X))" = rowQ(t(X), which = which), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowOrderStats = rowOrderStats(X, which = which, na.rm = FALSE), "apply+quantile" = apply(X, MARGIN = 1L, FUN = quantile, probs = probs, na.rm = FALSE, type = 3L), rowQ = rowQ(X, which = which), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAnyMissings_subset.md.rsp0000644000176200001440000000344414372747711024370 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAnyMissings"%> <%@string rowname="rowAnyMissings"%> <%@string fcnname="colRowAnyMissings_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colAnyMissings_X_S" = colAnyMissings(X_S), "colAnyMissings(X, rows, cols)" = colAnyMissings(X, rows = rows, cols = cols), "colAnyMissings(X[rows, cols])" = colAnyMissings(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowAnyMissings_X_S" = rowAnyMissings(X_S), "rowAnyMissings(X, cols, rows)" = rowAnyMissings(X, rows = cols, cols = rows), "rowAnyMissings(X[cols, rows])" = rowAnyMissings(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMins.md.rsp0000644000176200001440000000357714372747711021454 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMins"%> <%@string rowname="rowMins"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + min() * lapply() + pmin() * lapply() + pmin.int() See also [StackOverflow:colMins?]. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMins = colMins(X, na.rm = FALSE), "apply+min" = apply(X, MARGIN = 2L, FUN = min, na.rm = FALSE), "lapply+pmin" = do.call(pmin, lapply(seq_len(nrow(X)), function(i) X[i, ])), "lapply+pmin.int" = do.call(pmin.int, lapply(seq_len(nrow(X)), function(i) X[i, ])), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMins = rowMins(X, na.rm = FALSE), "apply+min" = apply(X, MARGIN = 1L, FUN = min, na.rm = FALSE), "lapply+pmin" = do.call(pmin, lapply(seq_len(ncol(X)), function(i) X[, i])), "lapply+pmin.int" = do.call(pmin.int, lapply(seq_len(ncol(X)), function(i) X[, i])), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCumsums.md.rsp0000644000176200001440000000265314372747711022174 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCumsums"%> <%@string rowname="rowCumsums"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + cumsum() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colCumsums = colCumsums(X), "apply+cumsum" = apply(X, MARGIN = 2L, FUN = cumsum), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowCumsums = rowCumsums(X), "apply+cumsum" = apply(X, MARGIN = 1L, FUN = cumsum), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/weightedMean.md.rsp0000644000176200001440000000312014372747711021741 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="weightedMean"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-12-09"%> <%@include file="${header}"%> <% weighted.mean <- stats:::weighted.mean weighted.mean.default <- stats:::weighted.mean.default %> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * stats::weighted.mean() * stats:::weighted.mean.default() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] w <- runif(length(x)) gc() stats <- microbenchmark( "weightedMean" = weightedMean(x, w = w, na.rm = FALSE), "stats::weighted.mean" = weighted.mean(x, w = w, na.rm = FALSE), "stats:::weighted.mean.default" = weighted.mean.default(x, w = w, na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-12-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowProds_subset.md.rsp0000644000176200001440000000504314372747711023210 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colProds"%> <%@string rowname="rowProds"%> <%@string fcnname="colRowProds_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] })%> <% gc() %> <%=withCapture({ colStats <- microbenchmark( "colProds_X_S w/ direct" = colProds(X_S, method = "direct", na.rm = FALSE), "colProds_X_S w/ expSumLog" = colProds(X_S, method = "expSumLog", na.rm = FALSE), "colProds(X, rows, cols) w/ direct" = colProds(X, rows = rows, cols = cols, method = "direct", na.rm = FALSE), "colProds(X, rows, cols) w/ expSumLog" = colProds(X, rows = rows, cols = cols, method = "expSumLog", na.rm = FALSE), "colProds(X[rows, cols]) w/ direct" = colProds(X[rows, cols], method = "direct", na.rm = FALSE), "colProds(X[rows, cols]) w/ expSumLog" = colProds(X[rows, cols], method = "expSumLog", na.rm = FALSE), unit = "ms" ) })%> <%=withCapture({ X <- t(X) X_S <- t(X_S) })%> <% gc() %> <%=withCapture({ rowStats <- microbenchmark( "rowProds_X_S w/ direct" = rowProds(X_S, method = "direct", na.rm = FALSE), "rowProds_X_S w/ expSumLog" = rowProds(X_S, method = "expSumLog", na.rm = FALSE), "rowProds(X, cols, rows) w/ direct" = rowProds(X, rows = cols, cols = rows, method = "direct", na.rm = FALSE), "rowProds(X, cols, rows) w/ expSumLog" = rowProds(X, rows = cols, cols = rows, method = "expSumLog", na.rm = FALSE), "rowProds(X[cols, rows]) w/ direct" = rowProds(X[cols, rows], method = "direct", na.rm = FALSE), "rowProds(X[cols, rows]) w/ expSumLog" = rowProds(X[cols, rows], method = "expSumLog", na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/sum2_subset.md.rsp0000644000176200001440000000251314372747711021620 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="sum2_subset"%> <%@string subname="sum2"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) ##data <- data[1:3] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "sum2_x_S" = sum2(x_S), "sum2(x, idxs)" = sum2(x, idxs = idxs), "sum2(x[idxs])" = sum2(x[idxs]), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/binCounts_subset.md.rsp0000644000176200001440000000354614372747711022705 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="binCounts_subset"%> <%@string subname="binCounts"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-04"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Non-sorted simulated data ```r <%=withCapture({ set.seed(0xBEEF) nx <- 100e3 # Number of data points xmax <- 0.01*nx x <- runif(nx, min = 0, max = xmax) storage.mode(x) <- mode str(x) # Uniformely distributed bins nb <- 10e3 # Number of bins bx <- seq(from = 0, to = xmax, length.out = nb+1L) bx <- c(-1, bx, xmax+1) # indices for subsetting idxs <- sample.int(length(x), size = length(x)*0.7) })%> ``` ### Results <% benchmark <- function() { %> <% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %> <% mprintf("%s: %s\n", mode, dataLabel) %> ```r <%=withCapture({ x_S <- x[idxs] gc() stats <- microbenchmark( "binCounts_x_S" = binCounts(x_S, bx = bx), "binCounts(x, idxs)" = binCounts(x, idxs = idxs, bx = bx), "binCounts(x[idxs])" = binCounts(x[idxs], bx = bx), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% # Sanity checks n1 <- binCounts(x, idxs = idxs, bx = bx) n1r <- rev(binCounts(-x, idxs = idxs, bx = rev(-bx), right = TRUE)) stopifnot(identical(n1r, n1)) %> <% } # benchmark() %> <% benchmark() %> ### Sorted simulated data ```r <%=withCapture({ x <- sort(x) idxs <- sort(idxs) })%> ``` <% benchmark() %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-04 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/mean2.md.rsp0000644000176200001440000000436414372747711020355 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="mean2"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-02"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * mean() + [() * mean.default() + [() - avoids method dispatching as below ```r <%=withCapture({ mean2_R_v1 <- function(x, na.rm = FALSE, idxs) { mean(x[idxs], na.rm = na.rm) } })%> ``` and ```r <%=withCapture({ mean2_R_v2 <- function(x, na.rm = FALSE, idxs) { mean.default(x[idxs], na.rm = na.rm) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) ##data <- data[1:3] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "mean2" = mean2(x, refine = TRUE), "mean2_no_refine" = mean2(x, refine = FALSE), "mean" = mean(x), "mean.default" = mean.default(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(dataLabel, "all")) %> <% for (subset in c(0.2, 0.4, 0.8)) { %> #### A <%=sprintf("%g", 100*subset)%>% subset ```r <%=withCapture({ x <- data[[.dataLabel.]] subset idxs <- sort(sample(length(x), size = subset*length(x), replace = FALSE)) gc() stats <- microbenchmark( "mean2" = mean2(x, idxs = idxs, refine = TRUE), "mean2_no_refine" = mean2(x, idxs = idxs, refine = FALSE), "mean+[()" = mean2_R_v1(x, idxs = idxs), "mean.default+[()" = mean2_R_v2(x, idxs = idxs), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, subset)) %> <% } # for (subset in ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-02 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAnyMissings.md.rsp0000644000176200001440000000352114372747711022777 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAnyMissings"%> <%@string rowname="rowAnyMissings"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * colAnyMissings() and rowAnyMissings() * apply() + anyMissing() * colSums() + is.na() and rowSums() + is.na() where ```r <%=withCapture({ colAnyMissings <- function(x, ...) colAnys(x, value = NA) })%> ``` and ```r <%=withCapture({ rowAnyMissings <- function(x, ...) rowAnys(x, value = NA) })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colAnyMissings = colAnyMissings(X), "apply+anyMissing" = apply(X, MARGIN = 2L, FUN = anyMissing), colSums = is.na(colSums(X, na.rm = FALSE)), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowAnyMissings = rowAnyMissings(X), "apply+anyMissing" = apply(X, MARGIN = 1L, FUN = anyMissing), rowSums = is.na(rowSums(X, na.rm = FALSE)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowQuantiles.md.rsp0000644000176200001440000000273614372747711022507 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colQuantiles"%> <%@string rowname="rowQuantiles"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + quantile() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() probs <- seq(from = 0, to = 1, by = 0.25) colStats <- microbenchmark( colQuantiles = colQuantiles(X, probs = probs, na.rm = FALSE), "apply+quantile" = apply(X, MARGIN = 2L, FUN = quantile, probs = probs, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowQuantiles = rowQuantiles(X, probs = probs, na.rm = FALSE), "apply+quantile" = apply(X, MARGIN = 1L, FUN = quantile, probs = probs, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/x_OP_y.md.rsp0000644000176200001440000000404014372747711020537 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="x_OP_y"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * x_OP_y_R() as below ```r <%=withCapture({ x_OP_y_R <- function(x, y, OP, na.rm = FALSE) { if (na.rm) { xnok <- is.na(x) ynok <- is.na(y) anok <- xnok & ynok unit <- switch(OP, "+" = 0, "-" = NA_real_, "*" = 1, "/" = NA_real_, stop("Unknown 'OP' operator: ", OP) ) x[xnok] <- unit y[ynok] <- unit } ans <- switch(OP, "+" = x + y, "-" = x - y, "*" = x * y, "/" = x / y, stop("Unknown 'OP' operator: ", OP) ) if (na.rm) { ans[anok] <- NA_real_ } ans } # x_OP_y_R() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] y <- x[, 1L] })%> ``` <% for (OP in c("+", "-", "*", "/")) { %> <% OPTag <- c("+" = "add", "-" = "sub", "*" = "mul", "/" = "div")[OP] gc() %> ```r <%=withCapture({ OP stats <- microbenchmark( "x_OP_y" = x_OP_y(x, y, OP = OP, na.rm = FALSE), "x_OP_y_R" = x_OP_y_R(x, y, OP = OP, na.rm = FALSE), unit = "ms" ) gc() })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %> <% } # for (OP ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCumsums_subset.md.rsp0000644000176200001440000000336214372747711023557 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCumsums"%> <%@string rowname="rowCumsums"%> <%@string fcnname="colRowCumsums_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colCumsums_X_S" = colCumsums(X_S), "colCumsums(X, rows, cols)" = colCumsums(X, rows = rows, cols = cols), "colCummins(X[rows, cols])" = colCumsums(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowCumsums_X_S" = rowCumsums(X_S), "rowCumsums(X, cols, rows)" = rowCumsums(X, rows = cols, cols = rows), "rowCumsums(X[cols, rows])" = rowCumsums(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCumprods.md.rsp0000644000176200001440000000271014372747711022326 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCumprods"%> <%@string rowname="rowCumprods"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + cumprod() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode, range = c(-1, 1)) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colCumprods = colCumprods(X), "apply+cumprod" = apply(X, MARGIN = 2L, FUN = cumprod), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowCumprods = rowCumprods(X), "apply+cumprod" = apply(X, MARGIN = 1L, FUN = cumprod), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCounts_subset.md.rsp0000644000176200001440000000377514372747711023406 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCounts"%> <%@string rowname="rowCounts"%> <%@string fcnname="colRowCounts_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-04-18"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("logical", "integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> #### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] value <- 42 })%> ``` <% gc() %> ```r <%=withCapture({ colStats <- microbenchmark( "colCounts_X_S" = colCounts(X_S, value = value, na.rm = FALSE), "colCounts(X, rows, cols)" = colCounts(X, value = value, na.rm = FALSE, rows = rows, cols = cols), "colCounts(X[rows, cols])" = colCounts(X[rows, cols], value = value, na.rm = FALSE), unit = "ms" ) })%> ``` ```r <%=withCapture({ X <- t(X) X_S <- t(X_S) })%> ``` <% gc() %> ```r <%=withCapture({ rowStats <- microbenchmark( "rowCounts_X_S" = rowCounts(X_S, value = value, na.rm = FALSE), "rowCounts(X, cols, rows)" = rowCounts(X, value = value, na.rm = FALSE, rows = cols, cols = rows), "rowCounts(X[cols, rows])" = rowCounts(X[cols, rows], value = value, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-04-18 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowTabulates.md.rsp0000644000176200001440000000234714372747711022464 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colTabulates"%> <%@string rowname="rowTabulates"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * ??? ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "integer", range = c(-10, 10)) })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colTabulates = colTabulates(X, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowTabulates = rowTabulates(X, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowWeightedMedians_subset.md.rsp0000644000176200001440000000371514372747711025166 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colWeightedMedians"%> <%@string rowname="rowWeightedMedians"%> <%@string fcnname="colRowWeightedMedians_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%> on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] w <- runif(nrow(X)) w_S <- w[rows] gc() colStats <- microbenchmark( "colWeightedMedians_X_w_S" = colWeightedMedians(X_S, w = w_S, na.rm = FALSE), "colWeightedMedians(X, w, rows, cols)" = colWeightedMedians(X, w = w, rows = rows, cols = cols, na.rm = FALSE), "colWeightedMedians(X[rows, cols], w[rows])" = colWeightedMedians(X[rows, cols], w = w[rows], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowWeightedMedians_X_w_S" = rowWeightedMedians(X_S, w = w_S, na.rm = FALSE), "rowWeightedMedians(X, w, cols, rows)" = rowWeightedMedians(X, w = w, rows = cols, cols = rows, na.rm = FALSE), "rowWeightedMedians(X[cols, rows], w[rows])" = rowWeightedMedians(X[cols, rows], w = w[rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/madDiff_subset.md.rsp0000644000176200001440000000246414372747711022271 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="madDiff_subset"%> <%@string subname="madDiff"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "madDiff_x_S" = madDiff(x_S), "madDiff(x, idxs)" = madDiff(x, idxs = idxs), "madDiff(x[idxs])" = madDiff(x[idxs]), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowRanks.md.rsp0000644000176200001440000000300414372747711021605 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colRanks"%> <%@string rowname="rowRanks"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + rank() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colRanks = colRanks(X, na.rm = FALSE), "apply+rank" = apply(X, MARGIN = 2L, FUN = rank, na.last = "keep", ties.method = "max"), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowRanks = rowRanks(X, na.rm = FALSE), "apply+rank" = apply(X, MARGIN = 1L, FUN = rank, na.last = "keep", ties.method = "max"), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/x_OP_y_subset.md.rsp0000644000176200001440000000340614372747711022131 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="x_OP_y_subset"%> <%@string subname="x_OP_y"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] y <- x[, 1L] xrows <- sample.int(nrow(x), size = nrow(x)*0.7) xcols <- sample.int(ncol(x), size = ncol(x)*0.7) x_S <- x[xrows, xcols] yidxs <- xrows y_S <- y[yidxs] })%> ``` <% for (OP in c("+", "-", "*", "/")) { %> <% OPTag <- c("+" = "add", "-" = "sub", "*" = "mul", "/" = "div")[OP] gc() %> ```r <%=withCapture({ OP stats <- microbenchmark( "x_OP_y_x_y_S" = x_OP_y(x_S, y_S, OP = OP, na.rm = FALSE), "x_OP_y(x, y, OP, xrows, xcols, yidxs)" = x_OP_y(x, y, OP = OP, xrows = xrows, xcols = xcols, yidxs = yidxs, na.rm = FALSE), "x_OP_y(x[xrows, xcols], y[yidxs], OP)" = x_OP_y(x[xrows, xcols], y[yidxs], OP = OP, na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %> <% } # for (OP ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/allocMatrix.md.rsp0000644000176200001440000000410014372747711021616 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="allocMatrix"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * matrix() * matrix() special trick for NA where ```r <%=withCapture({ allocMatrix_R <- function(nrow, ncol, value = NA) { if (is.na(value) && !is.nan(value)) { matrix(data = value[c()], nrow = nrow, ncol = ncol) } else { matrix(data = value, nrow = nrow, ncol = ncol) } } # allocMatrix_R() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) values <- list(zero = 0, one = 1, "NA" = NA_real_) if (mode != "double") values <- lapply(values, FUN = function(x) { storage.mode(x) <- mode; x }) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> matrix <% for (value in values) { %> <% valueLabel <- as.character(value) mprintf("%s: %s, value=%s\n", mode, dataLabel, valueLabel) %> ```r <%=withCapture({ dim <- dim(data[[.dataLabel.]]) nrow <- dim[1L] ncol <- dim[2L] str(value) })%> ``` <% gc() %> ```r <%=withCapture({ stats <- microbenchmark( "allocMatrix" = allocMatrix(nrow = nrow, ncol = ncol, value = value), "matrix" = matrix(data = value, nrow = nrow, ncol = ncol), "allocMatrix_R" = allocMatrix_R(nrow = nrow, ncol = ncol, value = value), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, valueLabel)) %> <% } # for (value in values) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/product_subset.md.rsp0000644000176200001440000000237014372747711022413 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="product_subset"%> <%@string subname="product"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "product_x_S" = product(x_S, na.rm = FALSE), "product(x, idxs)" = product(x, idxs = idxs, na.rm = FALSE), "product(x[idxs])" = product(x[idxs], na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/weightedMedian_subset.md.rsp0000644000176200001440000000275414372747711023657 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="weightedMedian_subset"%> <%@string subname="weightedMedian"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> <% weightedMedian <- matrixStats::weightedMedian %> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:3] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] w <- runif(length(x)) w_S <- x[idxs] gc() stats <- microbenchmark( "weightedMedian_x_w_S" = weightedMedian(x_S, w = w_S, ties = "mean", na.rm = FALSE), "weightedMedian(x, w, idxs)" = weightedMedian(x, w = w, idxs = idxs, ties = "mean", na.rm = FALSE), "weightedMedian(x[idxs], w[idxs])" = weightedMedian(x[idxs], w = w[idxs], ties = "mean", na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCummins_subset.md.rsp0000644000176200001440000000336214372747711023536 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCummins"%> <%@string rowname="rowCummins"%> <%@string fcnname="colRowCummins_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colCummins_X_S" = colCummins(X_S), "colCummins(X, rows, cols)" = colCummins(X, rows = rows, cols = cols), "colCummins(X[rows, cols])" = colCummins(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowCummins_X_S" = rowCummins(X_S), "rowCummins(X, cols, rows)" = rowCummins(X, rows = cols, cols = rows), "rowCummins(X[cols, rows])" = rowCummins(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMeans2.md.rsp0000644000176200001440000000306614372747711021664 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMeans2"%> <%@string rowname="rowMeans2"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2017-03-31"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + mean() * .colMeans() and .rowMeans() * colMeans() and rowMeans() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMeans2 = colMeans2(X, na.rm = FALSE), .colMeans = .colMeans(X, m = nrow(X), n = ncol(X), na.rm = FALSE), colMeans = colMeans(X, na.rm = FALSE), "apply+mean" = apply(X, MARGIN = 2L, FUN = mean, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMeans2 = rowMeans2(X, na.rm = FALSE), .rowMeans = .rowMeans(X, m = nrow(X), n = ncol(X), na.rm = FALSE), rowMeans = rowMeans(X, na.rm = FALSE), "apply+mean" = apply(X, MARGIN = 1L, FUN = mean, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> matrixStats/inst/benchmarking/madDiff.md.rsp0000644000176200001440000000243614372747711020703 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="madDiff"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-10"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * N/A <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) data <- data[1:4] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] stats <- microbenchmark( "madDiff" = madDiff(x), "mad" = mad(x), "diff" = diff(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-10 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/binMeans_subset.md.rsp0000644000176200001440000000323014372747711022463 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="binMeans"%> <%@string subname="binMeans"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2014-06-05"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. ## Results ### Non-sorted simulated data ```r <%=withCapture({ nx <- 100e3 # Number of data points set.seed(0xBEEF) x <- runif(nx, min = 0, max = 1) y <- runif(nx, min = 0, max = 1) # Uniformely distributed bins nb <- 1e3 # Number of bins bx <- seq(from = 0, to = 1, length.out = nb+1L) bx <- c(-1, bx, 2) # indices for subsetting idxs <- sample.int(length(x), size = length(x)*0.7) })%> ``` <% benchmark <- function() { %> <% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %> <% message(dataLabel) %> ```r <%=withCapture({ x_S <- x[idxs] y_S <- y[idxs] gc() stats <- microbenchmark( "binMeans_x_y_S" = binMeans(x = x_S, y = y_S, bx = bx, count = TRUE), "binMeans(x, y, idxs)" = binMeans(x = x, y = y, idxs = idxs, bx = bx, count = TRUE), "binMeans(x[idxs], y[idxs])" = binMeans(x = x[idxs], y = y[idxs], bx = bx, count = TRUE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # benchmark() %> <% benchmark() %> ### Sorted simulated data ```r <%=withCapture({ x <- sort(x) idxs <- sort(idxs) })%> ``` <% benchmark() %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-05 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/includes/0000755000176200001440000000000014372747711020025 5ustar liggesusersmatrixStats/inst/benchmarking/includes/setup.md.rsp0000644000176200001440000000236414372747711022317 0ustar liggesusers<%@string header="includes/header.md.rsp"%> <%@string footer="includes/footer.md.rsp"%> <%@string references="../includes/references.md.rsp"%> <%@string appendix="${appendix}" default="true"%> <%@string colname=""%> <%@string rowname=""%> <%@string fcnname=""%> <%@string fcntags=""%> <%@meta author="Henrik Bengtsson"%> <%-------------------------------------------------------------- RSP specific --------------------------------------------------------------%> <% R.utils::use("R.utils, R.devices (>= 2.12.0), knitr, ggplot2") devOptions("png", width=390) options("withCapture/newline"=FALSE) options(deparse.cutoff=100) kable <- function(...) { t <- knitr::kable(..., format="markdown") print(t) } %> <%-------------------------------------------------------------- Report/package specific --------------------------------------------------------------%> <% use("matrixStats") use("microbenchmark") %> <%@include file="results.md.rsp"%> <%-------------------------------------------------------------- Macros --------------------------------------------------------------%> <%-------------------------------------------------------------- Timing --------------------------------------------------------------%> <% rspStartTime <- Sys.time() %> matrixStats/inst/benchmarking/includes/references.md.rsp0000644000176200001440000000121714372747711023274 0ustar liggesusers<%--------------------------------------------------------------- REFERENCES ---------------------------------------------------------------%> [RSP]: https://cran.r-project.org/package=R.rsp [matrixStats]: https://cran.r-project.org/package=matrixStats [StackOverflow:colMins?]: https://stackoverflow.com/questions/13676878 "Stack Overflow: fastest way to get Min from every column in a matrix?" [StackOverflow:colSds?]: https://stackoverflow.com/questions/17549762 "Stack Overflow: Is there such 'colsd' in R?" [StackOverflow:rowProds?]: https://stackoverflow.com/questions/20198801/ "Stack Overflow: Row product of matrix and column sum of matrix" matrixStats/inst/benchmarking/includes/results.md.rsp0000644000176200001440000001242614372747711022660 0ustar liggesusers<%-------------------------------------------------------------- BENCHMARK RESULTS --------------------------------------------------------------%> <%-------------------------------------------------------------- Local functions --------------------------------------------------------------%> <% toImage <- function(stats, name=levels(stats$expr)[1L], tags=NULL, ylim="auto", col=NULL, alpha=NULL, ...) { %> <% # Replace spaces in name with hypen, e.g. ' w/ direct' -> '-w/-direct' name <- gsub(" ", "-", name, fixed=TRUE) # Drop any forward slashes in name, e.g. ' w/ ' -> ' w ' name <- gsub("/", "", name, fixed=TRUE) # Drop any spaces in tags, e.g. 'n = 1000' -> 'n=1000' tags <- gsub(" ", "", tags, fixed=TRUE) cat("\n") %> ![](<%=toPNG(name, tags=c(tags, "benchmark"), aspectRatio=2/3, { if (identical(ylim, "auto")) { y <- stats$time/1e6 ymax <- max(y, na.rm=TRUE) y75 <- quantile(y, probs=0.75, na.rm=TRUE) yupper <- min(c(1.5*y75, ymax), na.rm=TRUE) ylim <- c(0, yupper) } if (!is.null(ylim)) { stats$outlier <- (stats$time > ylim[2]*1e6) stats$time[stats$outlier] <- ylim[2]*1e6 } gg <- ggplot(data=stats, aes(x=seq_along(time)/length(levels(expr)), y=time/1e6)) gg <- gg + geom_point(aes(colour=expr, shape=outlier)) gg <- gg + scale_shape_manual(values=c(16,4), guide="none") if (!is.null(col)) gg <- gg + scale_colour_manual(values=col) if (!is.null(alpha)) gg <- gg + scale_alpha_manual(values=alpha) gg <- gg + xlab("iteration") + ylab("time (ms)") if (!is.null(ylim)) gg <- gg + ylim(ylim) print(gg) })%>) <% } # toImage() %> <% toTable <- function(stats, tags=NULL, order="median", ...) { kable({ s <- summary(stats) s$neval <- NULL s$cld <- NULL s <- s[order(s[[order]]),] s }, row.names=TRUE) kable({ s <- summary(stats, unit="relative") s$neval <- NULL s$cld <- NULL s <- s[order(s[[order]]),] s }, row.names=TRUE) cat("\n") } %> <%-------------------------------------------------------------- Benchmark results for vector functions --------------------------------------------------------------%> <% benchmarkResults <- function(stats, tags=NULL, ...) { %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. The top panel shows times in milliseconds and the bottom panel shows relative times._ <% toTable(stats, tags=tags) %> _Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. Outliers are displayed as crosses. Times are in milliseconds._ <% toImage(stats, tags=tags) %> <% } # benchmarkResults() %> <%-------------------------------------------------------------- Benchmark results for col- and row-specific functions --------------------------------------------------------------%> <% crBenchmarkResults <- function(colStats, rowStats=NULL, tags=NULL, ...) { %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(colStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. The top panel shows times in milliseconds and the bottom panel shows relative times._ <% toTable(colStats, tags=tags) %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(rowStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (transposed). The top panel shows times in milliseconds and the bottom panel shows relative times._ <% if (!is.null(rowStats)) { toTable(rowStats, tags=tags) } %> _Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(colStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data <% if (!is.null(rowStats)) { %> as well as <%=hpaste(sprintf("%s()", levels(rowStats$expr)), lastCollapse=" and ")%> on the same data transposed<% } # if (!is.null(rowStats)) %>. Outliers are displayed as crosses. Times are in milliseconds._ <% y <- c(colStats$time, rowStats$time)/1e6 ymax <- max(y, na.rm=TRUE) y75 <- quantile(y, probs=0.75, na.rm=TRUE) yupper <- min(c(1.5*y75, ymax), na.rm=TRUE) ylim <- c(0, yupper) %> <% toImage(colStats, tags=tags, ylim=ylim) %> <% if (!is.null(rowStats)) toImage(rowStats, tags=tags, ylim=ylim) %> <% if (!is.null(rowStats)) { %> <% # Compare performance or the column- and the row-specific methods # for the "main" function. stats <- list(colStats, rowStats) stats <- lapply(stats, FUN=function(x) { level <- levels(x$expr)[1] x <- subset(x, expr %in% level) x$expr <- factor(as.character(x$expr)) x }) stats <- Reduce(rbind, stats) odd <- seq(from=1L, to=nrow(stats), by=2L) top <- 1:(nrow(stats)/2) stats0 <- stats stats[ odd,] <- stats0[ top,] stats[-odd,] <- stats0[-top,] %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (original and transposed). The top panel shows times in milliseconds and the bottom panel shows relative times._ <% toTable(stats, tags=tags) %> _Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (original and transposed). Outliers are displayed as crosses. Times are in milliseconds._ <% toImage(stats, name=paste(levels(stats$expr), collapse="_vs_"), tags=tags, col=c("#000000", "#999999")) %> <% } # if (!is.null(rowStats)) %> <% } # crBenchmarkResults() %> matrixStats/inst/benchmarking/includes/appendix.md.rsp0000644000176200001440000000067514372747711022772 0ustar liggesusers## Appendix ### Session information ```r <% print(sessionInfo()) %> ``` Total processing time was <%=rspDuration <- round(Sys.time()-rspStartTime, digits=2)%> <%=attr(rspDuration, "units")%>. ### Reproducibility To reproduce this report, do: ```r <%@ifeq fcnname=""%><%@string fcnname="${colname}"%><%@endif%> html <- matrixStats:::benchmark('<%@string name="fcnname"%>'<%@ifneq fcntags=""%>, tags='<%@string name="fcntags"%>'<%@endif%>) ``` matrixStats/inst/benchmarking/includes/header.md.rsp0000644000176200001440000000011214372747711022374 0ustar liggesusers[matrixStats]: Benchmark report --------------------------------------- matrixStats/inst/benchmarking/includes/footer.md.rsp0000644000176200001440000000136314372747711022453 0ustar liggesusers<%--------------------------------------------------------------- Page footer ---------------------------------------------------------------%> <%@string appendix="${appendix}" default="false"%> <%@ifeq appendix="true"%> <%@include file="appendix.md.rsp"%> <%@endif%> <%@include file="${references}"%> --------------------------------------- Copyright <%@meta name="author"%>. Last updated on <%=format(Sys.time(), format="%Y-%m-%d %H:%M:%S (%z UTC)")%>. Powered by [RSP]. <%--------------------------------------------------------------- Dynamically insert an HTML favicon ---------------------------------------------------------------%> <%=toFavicon({ plot(1, col="blue", bg="yellow", pch=21, cex=4, lwd=4, axes=FALSE) }, force=FALSE)%> matrixStats/inst/benchmarking/anyMissing_subset.md.rsp0000644000176200001440000000251314372747711023053 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="anyMissing_subset"%> <%@string subname="anyMissing"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-04"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "anyMissing_x_S" = anyMissing(x_S), "anyMissing(x, idxs)" = anyMissing(x, idxs = idxs), "anyMissing(x[idxs])" = anyMissing(x[idxs]), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-04 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAlls_subset.md.rsp0000644000176200001440000000310114372747711023005 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAlls"%> <%@string rowname="rowAlls"%> <%@string fcnname="colRowAlls_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "logical") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colAlls_X_S" = colAlls(X_S), "colAlls(X, rows, cols)" = colAlls(X, rows = rows, cols = cols), "colAlls(X[rows, cols])" = colAlls(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowAlls_X_S" = rowAlls(X_S), "rowAlls(X, cols, rows)" = rowAlls(X, rows = cols, cols = rows), "rowAlls(X[cols, rows])" = rowAlls(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowWeightedMeans_subset.md.rsp0000644000176200001440000000366114372747711024651 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colWeightedMeans"%> <%@string rowname="rowWeightedMeans"%> <%@string fcnname="colRowWeightedMeans_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] w <- runif(nrow(X)) w_S <- w[rows] gc() colStats <- microbenchmark( "colWeightedMeans_X_w_S" = colWeightedMeans(X_S, w = w_S, na.rm = FALSE), "colWeightedMeans(X, w, rows, cols)" = colWeightedMeans(X, w = w, rows = rows, cols = cols, na.rm = FALSE), "colWeightedMeans(X[rows, cols], w[rows])" = colWeightedMeans(X[rows, cols], w = w[rows], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowWeightedMeans_X_w_S" = rowWeightedMeans(X_S, w = w_S, na.rm = FALSE), "rowWeightedMeans(X, w, cols, rows)" = rowWeightedMeans(X, w = w, rows = cols, cols = rows, na.rm = FALSE), "rowWeightedMeans(X[cols, rows], w[rows])" = rowWeightedMeans(X[cols, rows], w = w[rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/t_tx_OP_y_subset.md.rsp0000644000176200001440000000343614372747711022643 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="t_tx_OP_y_subset"%> <%@string subname="t_tx_OP_y"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] y <- x[, 1L] xrows <- sample.int(nrow(x), size = nrow(x)*0.7) xcols <- sample.int(ncol(x), size = ncol(x)*0.7) x_S <- x[xrows, xcols] yidxs <- xrows y_S <- y[yidxs] })%> ``` <% for (OP in c("+", "-", "*", "/")) { %> <% OPTag <- c("+" = "add", "-" = "sub", "*" = "mul", "/" = "div")[OP] gc() %> ```r <%=withCapture({ OP stats <- microbenchmark( "t_tx_OP_y_x_y_S" = t_tx_OP_y(x_S, y_S, OP = OP, na.rm = FALSE), "t_tx_OP_y(x, y, OP, xrows, xcols, yidxs)" = t_tx_OP_y(x, y, OP = OP, xrows = xrows, xcols = xcols, yidxs = yidxs, na.rm = FALSE), "t_tx_OP_y(x[xrows, xcols], y[yidxs], OP)" = t_tx_OP_y(x[xrows, xcols], y[yidxs], OP = OP, na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %> <% } # for (OP ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/allocVector.md.rsp0000644000176200001440000000422214372747711021621 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="allocVector"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * vector() + assignment * rep() * matrix() + as.vector() where ```r <%=withCapture({ allocVector_R1 <- function(length, value = NA) { x <- vector(mode = typeof(value), length = length) if (!is.finite(value) || value != 0) x[] <- value x } # allocVector_R1() allocVector_R2 <- function(length, value = NA) { x <- matrix(data = value, nrow = length, ncol = 1L) as.vector(x) } # allocVector_R2() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) values <- list(zero = 0, one = 1, "NA" = NA_real_) if (mode != "double") values <- lapply(values, FUN = function(x) { storage.mode(x) <- mode; x }) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> matrix <% for (value in values) { %> <% valueLabel <- as.character(value) mprintf("%s: %s, value=%s\n", mode, dataLabel, valueLabel) %> ```r <%=withCapture({ n <- length(data[[.dataLabel.]]) str(value) })%> ``` <% gc() %> ```r <%=withCapture({ stats <- microbenchmark( "allocVector" = allocVector(length = n, value = value), "rep" = rep(value, times = n), "allocVector_R1" = allocVector_R1(length = n, value = value), "allocVector_R2" = allocVector_R2(length = n, value = value), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, valueLabel)) %> <% } # for (value in values) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAnys.md.rsp0000644000176200001440000000256614372747711021455 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAnys"%> <%@string rowname="rowAnys"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-18"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + any() * colSums() > 0 or rowSums() > 0 ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "logical") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colAnys = colAnys(X), "apply+any" = apply(X, MARGIN = 2L, FUN = any), "colSums > 0" = (colSums(X) > 0L), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowAnys = rowAnys(X), "apply+any" = apply(X, MARGIN = 1L, FUN = any), "rowSums > 0" = (rowSums(X) > 0L), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-18 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowVars.md.rsp0000644000176200001440000000567614372747711021463 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colVars"%> <%@string rowname="rowVars"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2015-01-06"%> <%@include file="${header}"%> <%@string test_genefilter="TRUE"%> <%@ifeq test_genefilter="TRUE"%> <% use("genefilter", how = "load") genefilter_rowVars <- genefilter::rowVars genefilter_colVars <- function(x, ...) genefilter_rowVars(t(x), ...) %> <%@endif%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + var() * colVarColMeans() and rowVarColMeans() <%@ifeq test_genefilter="TRUE"%> * genefilter::rowVars(t(.)) and genefilter::rowVars() <%@endif%> where ```r <%=withCapture({ colVarColMeans <- function(x, na.rm = TRUE) { if (na.rm) { n <- colSums(!is.na(x)) } else { n <- nrow(x) } var <- colMeans(x*x, na.rm = na.rm) - (colMeans(x, na.rm = na.rm))^2 var * n/(n-1) } })%> ``` and ```r <%=withCapture({ rowVarRowMeans <- function(x, na.rm = TRUE) { if (na.rm) { n <- rowSums(!is.na(x)) } else { n <- ncol(x) } mu <- rowMeans(x, na.rm = na.rm) var <- rowMeans(x*x, na.rm = na.rm) - mu^2 var * (n/(n-1)) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colVars = colVars(X, na.rm = FALSE), colVarsCenter = colVars(X, center = colMeans(X, na.rm = FALSE), na.rm = FALSE), colVarColMeans = colVarColMeans(X, na.rm = FALSE), "apply+var" = apply(X, MARGIN = 2L, FUN = var, na.rm = FALSE), <%@ifeq test_genefilter="TRUE"%> "genefilter::rowVars(t(.))" = genefilter_colVars(X, na.rm = FALSE), <%@endif%> unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowVars = rowVars(X, na.rm = FALSE), rowVarsCenter = rowVars(X, center = rowMeans(X, na.rm = FALSE), na.rm = FALSE), rowVarRowMeans = rowVarRowMeans(X, na.rm = FALSE), "apply+var" = apply(X, MARGIN = 1L, FUN = var, na.rm = FALSE), <%@ifeq test_genefilter="TRUE"%> "genefilter::rowVars" = genefilter_rowVars(X, na.rm = FALSE), <%@endif%> unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-01-06 o Now benchmarking 'genefilter' functions too. 2014-11-23 o Now benchmarking rowVars() instead of rowSds() since the latter uses the former. 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCounts.md.rsp0000644000176200001440000000340214372747711022004 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCounts"%> <%@string rowname="rowCounts"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * colSums() and rowSums() * apply() + sum() <% for (mode in c("logical", "integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> #### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] value <- 42 })%> ``` <% gc() %> ```r <%=withCapture({ colStats <- microbenchmark( colCounts = colCounts(X, value = value, na.rm = FALSE), colSums = colSums(X == value, na.rm = FALSE), "apply+sum" = apply(X, MARGIN = 2L, FUN = function(x) sum(x == value, na.rm = FALSE)), unit = "ms" ) })%> ``` ```r <%=withCapture({ X <- t(X) })%> ``` <% gc() %> ```r <%=withCapture({ rowStats <- microbenchmark( rowCounts = rowCounts(X, value = value, na.rm = FALSE), rowSums = rowSums(X == value, na.rm = FALSE), "apply+sum" = apply(X, MARGIN = 1L, FUN = function(x) sum(x == value, na.rm = FALSE)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/weightedMean_subset.md.rsp0000644000176200001440000000276514372747711023344 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="weightedMean_subset"%> <%@string subname="weightedMean"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] w <- runif(length(x)) w_S <- w[idxs] gc() stats <- microbenchmark( "weightedMean_x_w_S" = weightedMean(x_S, w = w_S, na.rm = FALSE), "weightedMean(x, w, idxs)" = weightedMean(x, w = w, idxs = idxs, na.rm = FALSE), "weightedMean(x[idxs], w[idxs])" = weightedMean(x[idxs], w = w[idxs], na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/varDiff_subset.md.rsp0000644000176200001440000000252514372747711022316 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="varDiff_subset"%> <%@string subname="varDiff"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) data <- data[1:4] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "varDiff_x_S" = varDiff(x_S), "varDiff(x, idxs)" = varDiff(x, idxs = idxs), "varDiff(x[idxs])" = varDiff(x[idxs]), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMeans2_subset.md.rsp0000644000176200001440000000320514372747711023244 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMeans2"%> <%@string rowname="rowMeans2"%> <%@string fcnname="colRowMeans2_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2017-03-31"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colMeans2_X_S" = colMeans2(X_S, na.rm = FALSE), "colMeans2(X, rows, cols)" = colMeans2(X, rows = rows, cols = cols, na.rm = FALSE), "colMeans2(X[rows, cols])" = colMeans2(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowMeans2_X_S" = rowMeans2(X_S, na.rm = FALSE), "rowMeans2(X, cols, rows)" = rowMeans2(X, rows = cols, cols = rows, na.rm = FALSE), "rowMeans2(X[cols, rows])" = rowMeans2(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> matrixStats/inst/benchmarking/colRowCumprods_subset.md.rsp0000644000176200001440000000342414372747711023716 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCumprods"%> <%@string rowname="rowCumprods"%> <%@string fcnname="colRowCumprods_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode, range = c(-1, 1)) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colCumprods_X_S" = colCumprods(X_S), "colCumprods(X, rows, cols)" = colCumprods(X, rows = rows, cols = cols), "colCumprods(X[rows, cols])" = colCumprods(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowCumprods_X_S" = rowCumprods(X_S), "rowCumprods(X, cols, rows)" = rowCumprods(X, rows = cols, cols = rows), "rowCumprods(X[cols, rows])" = rowCumprods(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowTabulates_subset.md.rsp0000644000176200001440000000337114372747711024047 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colTabulates"%> <%@string rowname="rowTabulates"%> <%@string fcnname="colRowTabulates_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "integer", range = c(-10, 10)) })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colTabulates_X_S" = colTabulates(X_S, na.rm = FALSE), "colTabulates(X, rows, cols)" = colTabulates(X, rows = rows, cols = cols, na.rm = FALSE), "colTabulates(X[rows, cols])" = colTabulates(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowTabulates_X_S" = rowTabulates(X_S, na.rm = FALSE), "rowTabulates(X, cols, rows)" = rowTabulates(X, rows = cols, cols = rows, na.rm = FALSE), "rowTabulates(X[cols, rows])" = rowTabulates(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCummins.md.rsp0000644000176200001440000000265314372747711022153 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCummins"%> <%@string rowname="rowCummins"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + cummin() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colCummins = colCummins(X), "apply+cummin" = apply(X, MARGIN = 2L, FUN = cummin), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowCummins = rowCummins(X), "apply+cummin" = apply(X, MARGIN = 1L, FUN = cummin), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMads_subset.md.rsp0000644000176200001440000000343714372747711023012 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMads"%> <%@string rowname="rowMads"%> <%@string fcnname="colRowMads_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colMads_X_S" = colMads(X_S, na.rm = FALSE), "colMads(X, rows, cols)" = colMads(X, rows = rows, cols = cols, na.rm = FALSE), "colMads(X[rows, cols])" = colMads(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowMads_X_S" = rowMads(X_S, na.rm = FALSE), "rowMads(X, cols, rows)" = rowMads(X, rows = cols, cols = rows, na.rm = FALSE), "rowMads(X[cols, rows])" = rowMads(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAlls.md.rsp0000644000176200001440000000257414372747711021435 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAlls"%> <%@string rowname="rowAlls"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-18"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + all() * colSums() == n or rowSums() == n ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "logical") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colAlls = colAlls(X), "apply+all" = apply(X, MARGIN = 2L, FUN = all), "colSums==n" = (colSums(X) == nrow(X)), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowAlls = rowAlls(X), "apply+all" = apply(X, MARGIN = 1L, FUN = all), "rowSums==n" = (rowSums(X) == ncol(X)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-18 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowDiffs_subset.md.rsp0000644000176200001440000000332514372747711023155 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colDiffs"%> <%@string rowname="rowDiffs"%> <%@string fcnname="colRowDiffs_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colDiffs_X_S" = colDiffs(X_S), "colDiffs(X, rows, cols)" = colDiffs(X, rows = rows, cols = cols), "colDiffs(X[rows, cols])" = colDiffs(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowDiffs_X_S" = rowDiffs(X_S), "rowDiffs(X, cols, rows)" = rowDiffs(X, rows = cols, cols = rows), "rowDiffs(X[cols, rows])" = rowDiffs(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/anyMissing.md.rsp0000644000176200001440000000254614372747711021474 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="anyMissing"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-01"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * anyNA() * any() + is.na() as below ```r <%=withCapture({ any_is.na <- function(x) { any(is.na(x)) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "anyMissing" = anyMissing(x), "anyNA" = anyNA(x), "any_is.na" = any_is.na(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMads.md.rsp0000644000176200001440000000422314372747711021417 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMads"%> <%@string rowname="rowMads"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-18"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + mad() * colMads2() and rowMads2() where `rowMads2()` and `colMads2()` are: ```r <%=withCapture({ rowMads2 <- function(x, const = 1.4826, na.rm = FALSE) { mu <- rowMedians(x, na.rm = na.rm) x <- abs(x - mu) mad <- rowMedians(x, na.rm = FALSE) const * mad } colMads2 <- function(x, const = 1.4826, na.rm = FALSE) { mu <- colMedians(x, na.rm = na.rm) x <- abs(x - mu) mad <- colMedians(x, na.rm = FALSE) const * mad } })%> ``` <% rowMads_R <- function(x, na.rm = FALSE) { apply(x, MARGIN = 1L, FUN = mad, na.rm = na.rm) } colMads_R <- function(x, na.rm = FALSE) { apply(x, MARGIN = 2L, FUN = mad, na.rm = na.rm) } %> <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMads = colMads(X, na.rm = FALSE), colMads2 = colMads2(X, na.rm = FALSE), "apply+mad" = apply(X, MARGIN = 2L, FUN = mad, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMads = rowMads(X, na.rm = FALSE), rowMads2 = rowMads2(X, na.rm = FALSE), "apply+mad" = apply(X, MARGIN = 1L, FUN = mad, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-17 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowOrderStats_subset.md.rsp0000644000176200001440000000400114372747711024204 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colOrderStats"%> <%@string rowname="rowOrderStats"%> <%@string fcnname="colRowOrderStats_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() probs <- 0.3 which <- round(probs*nrow(X)) colStats <- microbenchmark( "colOrderStats_X_S" = colOrderStats(X_S, which = which, na.rm = FALSE), "colOrderStats(X, rows, cols)" = colOrderStats(X, rows = rows, cols = cols, which = which, na.rm = FALSE), "colOrderStats(X[rows, cols])" = colOrderStats(X[rows, cols], which = which, na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowOrderStats_X_S" = rowOrderStats(X_S, which = which, na.rm = FALSE), "rowOrderStats(X, cols, rows)" = rowOrderStats(X, rows = cols, cols = rows, which = which, na.rm = FALSE), "rowOrderStats(X[cols, rows])" = rowOrderStats(X[cols, rows], which = which, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/product.md.rsp0000644000176200001440000000323314372747711021025 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="product"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-04"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * product_R() * prod() where ```r <%=withCapture({ product_R <- function(x, na.rm = FALSE, ...) { # Nothing todo? if (length(x) == 0L) return(0); # Any missing values? if (na.rm) { x <- x[!is.na(x)]; } # Any zeros? if (is.integer(x) && any(x == 0)) return(0); # Calculate product via logarithmic sum sign <- if (sum(x < 0) %% 2 == 0) +1 else -1; x <- abs(x); x <- log(x); x <- sum(x, na.rm = FALSE); x <- exp(x); y <- sign*x; y; } # product_R() })%> ``` ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( product = product(x, na.rm = FALSE), product_R = product_R(x, na.rm = FALSE), prod = prod(x, na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-02 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMedians_subset.md.rsp0000644000176200001440000000351414372747711023502 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMedians"%> <%@string rowname="rowMedians"%> <%@string fcnname="colRowMedians_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colMedians_X_S" = colMedians(X_S, na.rm = FALSE), "colMedians(X, rows, cols)" = colMedians(X, rows = rows, cols = cols, na.rm = FALSE), "colMedians(X[rows, cols])" = colMedians(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowMedians_X_S" = rowMedians(X_S, na.rm = FALSE), "rowMedians(X, cols, rows)" = rowMedians(X, rows = cols, cols = rows, na.rm = FALSE), "rowMedians(X[cols, rows])" = rowMedians(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowProds.md.rsp0000644000176200001440000000356114372747711021626 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colProds"%> <%@string rowname="rowProds"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-15"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * <%=colname%>()/<%=rowname%>() using method="expSumLog" * apply() + prod() * apply() + product() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] })%> <% gc() %> <%=withCapture({ colStats <- microbenchmark( "colProds w/ direct" = colProds(X, method = "direct", na.rm = FALSE), "colProds w/ expSumLog" = colProds(X, method = "expSumLog", na.rm = FALSE), "apply+prod" = apply(X, MARGIN = 2L, FUN = prod, na.rm = FALSE), "apply+product" = apply(X, MARGIN = 2L, FUN = product, na.rm = FALSE), unit = "ms" ) })%> <%=withCapture({ X <- t(X) })%> <% gc() %> <%=withCapture({ rowStats <- microbenchmark( "rowProds w/ direct" = rowProds(X, method = "direct", na.rm = FALSE), "rowProds w/ expSumLog" = rowProds(X, method = "expSumLog", na.rm = FALSE), "apply+prod" = apply(X, MARGIN = 1L, FUN = prod, na.rm = FALSE), "apply+product" = apply(X, MARGIN = 1L, FUN = product, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowVars_subset.md.rsp0000644000176200001440000000344114372747711023034 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colVars"%> <%@string rowname="rowVars"%> <%@string fcnname="colRowVars_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colVars_X_S" = colVars(X_S, na.rm = FALSE), "colVars(X, rows, cols)" = colVars(X, rows = rows, cols = cols, na.rm = FALSE), "colVars(X[rows, cols])" = colVars(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowVars_X_S" = rowVars(X_S, na.rm = FALSE), "rowVars(X, cols, rows)" = rowVars(X, rows = cols, cols = rows, na.rm = FALSE), "rowVars(X[cols, rows])" = rowVars(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/logSumExp.md.rsp0000644000176200001440000000247414372747711021276 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="logSumExp"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-01"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * logSumExp_R() where ```r <%=withCapture({ logSumExp_R <- function(lx, ...) { iMax <- which.max(lx) log1p(sum(exp(lx[-iMax] - lx[iMax]))) + lx[iMax] } # logSumExp_R() })%> ``` ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "logSumExp" = logSumExp(x), "logSumExp_R" = logSumExp_R(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowQuantiles_subset.md.rsp0000644000176200001440000000355114372747711024070 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colQuantiles"%> <%@string rowname="rowQuantiles"%> <%@string fcnname="colRowQuantiles_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() probs <- seq(from = 0, to = 1, by = 0.25) colStats <- microbenchmark( "colQuantiles_X_S" = colQuantiles(X_S, probs = probs, na.rm = FALSE), "colQuantiles(X, rows, cols)" = colQuantiles(X, rows = rows, cols = cols, probs = probs, na.rm = FALSE), "colQuantiles(X[rows, cols])" = colQuantiles(X[rows, cols], probs = probs, na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowQuantiles_X_S" = rowQuantiles(X_S, probs = probs, na.rm = FALSE), "rowQuantiles(X, cols, rows)" = rowQuantiles(X, rows = cols, cols = rows, probs = probs, na.rm = FALSE), "rowQuantiles(X[cols, rows])" = rowQuantiles(X[cols, rows], probs = probs, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/index.md.rsp0000644000176200001440000000413614372747711020457 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@meta title="Benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-05"%> <%@include file="${header}"%> <% use("matrixStats") use("R.utils (>= 1.34.0)") ## Assert that all third-party packages to be benchmarked ## are available and can be loaded/installed already here R.utils::use("cwhmisc, ergm, laeken, genefilter", how = "load") # Simple logging function lenter <- function(...) { s <- mprintf(...) s <- gsub("[.][.][.](\n)*", "...done\\1", s) lexit <<- function() mprintf(s) } %> # <%@meta name="title"%> List of benchmark report for some of the functions available in the [matrixStats] package. <% path <- cmdArg(path = system.file("benchmarking", package = "matrixStats")) path <- getAbsolutePath(path) message("Processing benchmark report directory: ", path) pattern <- "[.]md[.]rsp$" filenames <- list.files(path = path, pattern = pattern) filenames <- setdiff(filenames, "index.md.rsp") names <- gsub(pattern, "", filenames) # col- and rowAnyMissing() does not really exist names <- setdiff(names, c("colAnyMissing", "rowAnyMissing")) message("Number of reports found: ", length(names)) mprintf("Report #%d: %s\n", seq_along(names), names) %> <% for (ii in seq_along(names)) { %> <% name <- names[ii] if (regexpr("^colRow", name) != -1L) { label <- gsub("^colRow", "", name) label <- sprintf("col%s() and row%s()", label, label) } else { label <- sprintf("%s()", name) } %> * [<%=label%>](<%={ lenter("%d of %d. Benchmarking %s...\n", ii, length(names), label) html <- sprintf("%s.html", name) if (!file_test("-f", html)) { html <- matrixStats:::benchmark(name, path = path, workdir = ".", envir = new.env()) html <- getRelativePath(html) gc() } lexit() html }%>) <% } # for (ii ...) %> ## Appendix To reproduce this page and all of its reports, do: ```r path <- system.file("benchmarking", package = "matrixStats") R.rsp::rfile("index.md.rsp", path = path) ``` _Note: Each of the above reports takes up to several minutes to complete._ <%@string appendix="false"%> <%@include file="${footer}"%> <%@string appendix="true"%> matrixStats/inst/benchmarking/colRowLogSumExps.md.rsp0000644000176200001440000000335714372747711022610 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colLogSumExps"%> <%@string rowname="rowLogSumExps"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + matrixStats::logSumExp() * apply() + logSumExp0() where ```r <%=withCapture({ logSumExp0 <- function(lx, ...) { iMax <- which.max(lx) log1p(sum(exp(lx[-iMax] - lx[iMax]))) + lx[iMax] } # logSumExp0() })%> ``` ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colLogSumExps = colLogSumExps(X, na.rm = FALSE), "apply+logSumExp" = apply(X, MARGIN = 2L, FUN = logSumExp, na.rm = FALSE), "apply+logSumExp0" = apply(X, MARGIN = 2L, FUN = logSumExp0, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowLogSumExps = rowLogSumExps(X, na.rm = FALSE), "apply+logSumExp" = apply(X, MARGIN = 1L, FUN = logSumExp, na.rm = FALSE), "apply+logSumExp0" = apply(X, MARGIN = 1L, FUN = logSumExp0, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowRanges_subset.md.rsp0000644000176200001440000000347614372747711023350 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colRanges"%> <%@string rowname="rowRanges"%> <%@string fcnname="colRowRanges_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colRanges_X_S" = colRanges(X_S, na.rm = FALSE), "colRanges(X, rows, cols)" = colRanges(X, rows = rows, cols = cols, na.rm = FALSE), "colRanges(X[rows, cols])" = colRanges(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowRanges_X_S" = rowRanges(X_S, na.rm = FALSE), "rowRanges(X, cols, rows)" = rowRanges(X, rows = cols, cols = rows, na.rm = FALSE), "rowRanges(X[cols, rows])" = rowRanges(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowRanks_subset.md.rsp0000644000176200001440000000345714372747711023206 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colRanks"%> <%@string rowname="rowRanks"%> <%@string fcnname="colRowRanks_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colRanks_X_S" = colRanks(X_S, na.rm = FALSE), "colRanks(X, rows, cols)" = colRanks(X, rows = rows, cols = cols, na.rm = FALSE), "colRanks(X[rows, cols])" = colRanks(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowRanks_X_S" = rowRanks(X_S, na.rm = FALSE), "rowRanks(X, cols, rows)" = rowRanks(X, rows = cols, cols = rows, na.rm = FALSE), "rowRanks(X[cols, rows])" = rowRanks(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/R/0000755000176200001440000000000014372747711016420 5ustar liggesusersmatrixStats/inst/benchmarking/R/random-matrices.R0000644000176200001440000000202514372747711021627 0ustar liggesusersrmatrix <- function(nrow, ncol, mode = c("logical", "double", "integer", "index"), range = c(-100, +100), na_prob = 0) { mode <- match.arg(mode) n <- nrow * ncol if (mode == "logical") { x <- sample(c(FALSE, TRUE), size = n, replace = TRUE) } else if (mode == "index") { x <- seq_len(n) mode <- "integer" } else { x <- runif(n, min = range[1], max = range[2]) } storage.mode(x) <- mode if (na_prob > 0) x[sample(n, size = na_prob * n)] <- NA dim(x) <- c(nrow, ncol) x } rmatrices <- function(scale = 10, seed = 1, ...) { set.seed(seed) data <- list() data[[1]] <- rmatrix(nrow = scale * 1, ncol = scale * 1, ...) data[[2]] <- rmatrix(nrow = scale * 10, ncol = scale * 10, ...) data[[3]] <- rmatrix(nrow = scale * 100, ncol = scale * 1, ...) data[[4]] <- t(data[[3]]) data[[5]] <- rmatrix(nrow = scale * 10, ncol = scale * 100, ...) data[[6]] <- t(data[[5]]) names(data) <- sapply(data, FUN = function(x) paste(dim(x), collapse = "x")) data } matrixStats/inst/benchmarking/R/random-vectors.R0000644000176200001440000000142014372747711021503 0ustar liggesusersrvector <- function(n, mode = c("logical", "double", "integer"), range = c(-100, +100), na_prob = 0) { mode <- match.arg(mode) if (mode == "logical") { x <- sample(c(FALSE, TRUE), size = n, replace = TRUE) } else { x <- runif(n, min = range[1], max = range[2]) } storage.mode(x) <- mode if (na_prob > 0) x[sample(n, size = na_prob * n)] <- NA x } # rvector() rvectors <- function(scale = 10, seed = 1, ...) { set.seed(seed) data <- list() data[[1]] <- rvector(n = scale * 1e2, ...) data[[2]] <- rvector(n = scale * 1e3, ...) data[[3]] <- rvector(n = scale * 1e4, ...) data[[4]] <- rvector(n = scale * 1e5, ...) data[[5]] <- rvector(n = scale * 1e6, ...) names(data) <- sprintf("n = %d", sapply(data, FUN = length)) data } matrixStats/inst/benchmarking/colRowSums2_subset.md.rsp0000644000176200001440000000316614372747711023136 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colSums2"%> <%@string rowname="rowSums2"%> <%@string fcnname="colRowSums2_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2017-03-31"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colSums2_X_S" = colSums2(X_S, na.rm = FALSE), "colSums2(X, rows, cols)" = colSums2(X, rows = rows, cols = cols, na.rm = FALSE), "colSums2(X[rows, cols])" = colSums2(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowSums2_X_S" = rowSums2(X_S, na.rm = FALSE), "rowSums2(X, cols, rows)" = rowSums2(X, rows = cols, cols = rows, na.rm = FALSE), "rowSums2(X[cols, rows])" = rowSums2(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> matrixStats/inst/benchmarking/colRowDiffs.md.rsp0000644000176200001440000000312714372747711021570 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colDiffs"%> <%@string rowname="rowDiffs"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-12-30"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + diff() * apply() + diff2() * diff() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colDiffs = colDiffs(X), "apply+diff" = apply(X, MARGIN = 2L, FUN = diff), "apply+diff2" = apply(X, MARGIN = 2L, FUN = diff2), diff = diff(X), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowDiffs = rowDiffs(X), "apply+diff" = apply(X, MARGIN = 1L, FUN = diff), "apply+diff2" = apply(X, MARGIN = 1L, FUN = diff2), "diff + t" = diff(t(X)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-17 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowWeightedMedians.md.rsp0000644000176200001440000000274514372747711023603 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colWeightedMedians"%> <%@string rowname="rowWeightedMedians"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + weightedMedian() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] w <- runif(nrow(X)) gc() colStats <- microbenchmark( colWeightedMedians = colWeightedMedians(X, w = w, na.rm = FALSE), "apply+weigthedMedian" = apply(X, MARGIN = 2L, FUN = weightedMedian, w = w, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowWeightedMedians = rowWeightedMedians(X, w = w, na.rm = FALSE), "apply+weigthedMedian" = apply(X, MARGIN = 1L, FUN = weightedMedian, w = w, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAnys_subset.md.rsp0000644000176200001440000000310114372747711023024 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAnys"%> <%@string rowname="rowAnys"%> <%@string fcnname="colRowAnys_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "logical") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colAnys_X_S" = colAnys(X_S), "colAnys(X, rows, cols)" = colAnys(X, rows = rows, cols = cols), "colAnys(X[rows, cols])" = colAnys(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowAnys_X_S" = rowAnys(X_S), "rowAnys(X, cols, rows)" = rowAnys(X, rows = cols, cols = rows), "rowAnys(X[cols, rows])" = rowAnys(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/binCounts.md.rsp0000644000176200001440000000367714372747711021325 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="binCounts"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-05-25"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * hist() as below ```r <%=withCapture({ hist <- graphics::hist binCounts_hist <- function(x, bx, right = FALSE, ...) { hist(x, breaks = bx, right = right, include.lowest = TRUE, plot = FALSE)$counts } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Non-sorted simulated data ```r <%=withCapture({ set.seed(0xBEEF) nx <- 100e3 # Number of data points xmax <- 0.01*nx x <- runif(nx, min = 0, max = xmax) storage.mode(x) <- mode str(x) # Uniformely distributed bins nb <- 10e3 # Number of bins bx <- seq(from = 0, to = xmax, length.out = nb+1L) bx <- c(-1, bx, xmax+1) })%> ``` ### Results <% benchmark <- function() { %> <% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %> <% mprintf("%s: %s\n", mode, dataLabel) %> ```r <%=withCapture({ gc() stats <- microbenchmark( binCounts = binCounts(x, bx = bx), hist = binCounts_hist(x, bx = bx), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% # Sanity checks n0 <- binCounts_hist(x, bx = bx) n1 <- binCounts(x, bx = bx) stopifnot(identical(n1, n0)) n1r <- rev(binCounts(-x, bx = rev(-bx), right = TRUE)) stopifnot(identical(n1r, n1)) %> <% } # benchmark() %> <% benchmark() %> ### Sorted simulated data ```r <%=withCapture({ x <- sort(x) })%> ``` <% benchmark() %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-02 o Restructured. 2014-05-25 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/logSumExp_subset.md.rsp0000644000176200001440000000233214372747711022654 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="logSumExp_subset"%> <%@string subname="logSumExp"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "logSumExp_x_S" = logSumExp(x_S), "logSumExp(x, idxs)" = logSumExp(x, idxs = idxs), "logSumExp(x[idxs])" = logSumExp(x[idxs]), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/binMeans.md.rsp0000644000176200001440000000376214372747711021110 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="binMeans"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-04"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * binMeans_R() which is defined as ```r <%=withCapture({ binMeans_R <- function(y, x, bx, na.rm = FALSE, count = TRUE, right = FALSE) { B <- length(bx)-1L res <- double(B) counts <- integer(B) # For each bin... for (kk in seq_len(B)) { if (right) { idxs <- which(bx[kk] < x & x <= bx[kk+1L]) } else { idxs <- which(bx[kk] <= x & x < bx[kk+1L]) } yKK <- y[idxs] muKK <- mean(yKK) res[kk] <- muKK counts[kk] <- length(idxs) } # for (kk ...) if (count) attr(res, "count") <- counts res } # binMeans_R() })%> ``` ## Results ### Non-sorted simulated data ```r <%=withCapture({ nx <- 10e3 # Number of data points set.seed(0xBEEF) x <- runif(nx, min = 0, max = 1) y <- runif(nx, min = 0, max = 1) # Uniformely distributed bins nb <- 1e3 # Number of bins bx <- seq(from = 0, to = 1, length.out = nb+1L) bx <- c(-1, bx, 2) })%> ``` <% benchmark <- function() { %> <% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %> <% message(dataLabel) %> ```r <%=withCapture({ gc() stats <- microbenchmark( binMeans = binMeans(x = x, y = y, bx = bx, count = TRUE), binMeans_R = binMeans_R(x = x, y = y, bx = bx, count = TRUE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # benchmark() %> <% benchmark() %> ### Sorted simulated data ```r <%=withCapture({ x <- sort(x) })%> ``` <% benchmark() %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-02 o Restructured. 2014-05-25 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/count_subset.md.rsp0000644000176200001440000000250314372747711022061 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="count_subset"%> <%@string subname="count"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) value <- 42 %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "count_x_S" = count(x_S, value), "count(x, idxs)" = count(x, idxs = idxs, value), "count(x[idxs])" = count(x[idxs], value), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/count.md.rsp0000644000176200001440000000240514372747711020475 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="count"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-12-08"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * sum(x == value) <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] value <- 42 gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "count" = count(x, value), "sum(x == value)" = sum(x == value), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowRanges.md.rsp0000644000176200001440000000275114372747711021756 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colRanges"%> <%@string rowname="rowRanges"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + range() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colRanges = colRanges(X, na.rm = FALSE), "apply+range" = apply(X, MARGIN = 2L, FUN = range, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowRanges = rowRanges(X, na.rm = FALSE), "apply+range" = apply(X, MARGIN = 1L, FUN = range, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/indexByRow.md.rsp0000644000176200001440000000557014372747711021445 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="indexByRow"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-09"%> <%@include file="${header}"%> <% lfun <- local({ locals <- list() function(txt = NULL) { if (is.null(txt)) return(locals) local <- list(txt) locals <<- c(locals, local) } }) %> # <%@meta name="title"%> This report benchmark the performance of `<%=fcnname%>()` against alternative methods: * `indexByRow_R1()` based in `matrix(..., byrow = TRUE)` * `indexByRow_R2()` is a modified version of `indexByRow_R1()` where `indexByRow_R1()` and `indexByRow_R2()` are defined as in the Appendix. <% lfun(withCapture({ indexByRow_R1 <- function(dim, idxs = NULL, ...) { n <- prod(dim) x <- matrix(seq_len(n), nrow = dim[2L], ncol = dim[1L], byrow = TRUE) if (!is.null(idxs)) x <- x[idxs] as.vector(x) } # indexByRow_R1() })) lfun(withCapture({ indexByRow_R2 <- function(dim, idxs = NULL, ...) { n <- prod(dim) if (is.null(idxs)) { x <- matrix(seq_len(n), nrow = dim[2L], ncol = dim[1L], byrow = TRUE) as.vector(x) } else { idxs <- idxs - 1 cols <- idxs %/% dim[2L] rows <- idxs %% dim[2L] cols + dim[1L]*rows + 1L } } # indexByRow_R2() })) %> ## Data <% lfun(withCapture({ <%@include file="R/random-matrices.R"%> })) %> ```r <%=withCapture({ data <- rmatrices(mode = "index") })%> ``` where `rmatrices()` is defined in the Appendix. <% # data <- data[1:2] %> ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] dim <- dim(X) idxsList <- list( 'all-by-NULL' = NULL, all = seq_len(prod(dim)), odd = seq(from = 1, to = prod(dim), by = 2L) ) str(idxsList) })%> ``` <% for (ii in seq_along(idxsList)) { %> #### Index set '<%=names(idxsList)[ii]%>' <% idxs <- idxsList[[ii]] idxsTag <- names(idxsList)[ii] # Validate correctness res <- list( indexByRow = indexByRow(dim, idxs = idxs), indexByRow_R1 = indexByRow_R1(dim, idxs = idxs), indexByRow_R2 = indexByRow_R2(dim, idxs = idxs) ) lapply(res, FUN = function(x) stopifnot(all.equal(x, res[[1]]))) gc() %> ```r <%=withCapture({ stats <- microbenchmark( indexByRow = indexByRow(dim, idxs = idxs), indexByRow_R1 = indexByRow_R1(dim, idxs = idxs), indexByRow_R2 = indexByRow_R2(dim, idxs = idxs), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(dataLabel, idxsTag)) %> <% } # for (ii ...) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> ### Local functions ```r <%=lfun()[[1]]%> ``` ```r <%=lfun()[[2]]%> ``` ```r <%=lfun()[[3]]%> ``` <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/varDiff.md.rsp0000644000176200001440000000243614372747711020732 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="varDiff"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-10"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * N/A <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) data <- data[1:4] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] stats <- microbenchmark( "varDiff" = varDiff(x), "var" = var(x), "diff" = diff(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-10 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/t_tx_OP_y.md.rsp0000644000176200001440000000411214372747711021246 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="t_tx_OP_y"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * t_tx_OP_y_R() as below ```r <%=withCapture({ t_tx_OP_y_R <- function(x, y, OP, na.rm = FALSE) { x <- t(x) if (na.rm) { xnok <- is.na(x) ynok <- is.na(y) anok <- xnok & ynok unit <- switch(OP, "+" = 0, "-" = NA_real_, "*" = 1, "/" = NA_real_, stop("Unknown 'OP' operator: ", OP) ) x[xnok] <- unit y[ynok] <- unit } ans <- switch(OP, "+" = x + y, "-" = x - y, "*" = x * y, "/" = x / y, stop("Unknown 'OP' operator: ", OP) ) if (na.rm) { ans[anok] <- NA_real_ } t(ans) } # t_tx_OP_y_R() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] y <- x[, 1L] })%> ``` <% for (OP in c("+", "-", "*", "/")) { %> <% OPTag <- c("+" = "add", "-" = "sub", "*" = "mul", "/" = "div")[OP] gc() %> ```r <%=withCapture({ OP stats <- microbenchmark( "t_tx_OP_y" = t_tx_OP_y(x, y, OP = OP, na.rm = FALSE), "t_tx_OP_y_R" = t_tx_OP_y_R(x, y, OP = OP, na.rm = FALSE), unit = "ms" ) gc() })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %> <% } # for (OP ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/sum2.md.rsp0000644000176200001440000000350414372747711020234 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="sum2"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-02"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * sum() + [() as below ```r <%=withCapture({ sum2_R <- function(x, na.rm = FALSE, idxs) { sum(x[idxs], na.rm = na.rm) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) ##data <- data[1:3] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "sum2" = sum2(x), "sum" = sum(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(dataLabel, "all")) %> <% for (subset in c(0.2, 0.4, 0.8)) { %> #### A <%=sprintf("%g", 100*subset)%>% subset ```r <%=withCapture({ x <- data[[.dataLabel.]] subset idxs <- sort(sample(length(x), size = subset*length(x), replace = FALSE)) gc() stats <- microbenchmark( "sum2" = sum2(x, idxs = idxs), "sum+[()" = sum2_R(x, idxs = idxs), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, subset)) %> <% } # for (subset in ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-02 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/weightedMedian.md.rsp0000644000176200001440000000434714372747711022272 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="weightedMedian"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-03"%> <%@include file="${header}"%> <%--- 'ergm' could be hard to install, because it imports 'Rglpk', which requires GLPK library on the system. ---%> <%@string test_ergm="FALSE"%> <% use("limma", how = "load") limma_weighted.median <- limma::weighted.median use("cwhmisc", how = "load") cwhmisc_w.median <- cwhmisc::w.median use("laeken", how = "load") laeken_weightedMedian <- laeken::weightedMedian <%@ifeq test_ergm="TRUE"%> use("ergm", how = "load") ergm_wtd.median <- ergm::wtd.median <%@endif%> weightedMedian <- matrixStats::weightedMedian %> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * apply() + limma::weighted.median() * apply() + cwhmisc::w.median() * apply() + laeken::weightedMedian() <%@ifeq test_ergm="TRUE"%> * apply() + ergm::wtd.median() --%> <%@endif%> ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:3] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] w <- runif(length(x)) gc() stats <- microbenchmark( "weightedMedian" = weightedMedian(x, w = w, ties = "mean", na.rm = FALSE), "limma::weighted.median" = limma_weighted.median(x, w = w, na.rm = FALSE), "cwhmisc::w.median" = cwhmisc_w.median(x, w = w), "laeken::weightedMedian" = laeken_weightedMedian(x, w = w), <%@ifeq test_ergm="TRUE"%> "ergm::wtd.median" = ergm_wtd.median(x, w = w), <%@endif%> unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-12-17 o Made 'ergm::wtd.median' optional. 2014-06-03 o Created using benchmark snippet in incl/weightedMedian.Rex. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowSums2.md.rsp0000644000176200001440000000303714372747711021546 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colSums2"%> <%@string rowname="rowSums2"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2017-03-31"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + sum() * colSums() and rowSums() * .colSums() and .rowSums() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colSums2 = colSums2(X, na.rm = FALSE), .colSums = .colSums(X, m = nrow(X), n = ncol(X), na.rm = FALSE), colSums = colSums(X, na.rm = FALSE), "apply+sum" = apply(X, MARGIN = 2L, FUN = sum, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowSums2 = rowSums2(X, na.rm = FALSE), .rowSums = .rowSums(X, m = nrow(X), n = ncol(X), na.rm = FALSE), rowSums = rowSums(X, na.rm = FALSE), "apply+sum" = apply(X, MARGIN = 1L, FUN = sum, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> matrixStats/inst/benchmarking/colRowMins_subset.md.rsp0000644000176200001440000000344014372747711023026 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMins"%> <%@string rowname="rowMins"%> <%@string fcnname="colRowMins_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colMins_X_S" = colMins(X_S, na.rm = FALSE), "colMins(X, rows, cols)" = colMins(X, rows = rows, cols = cols, na.rm = FALSE), "colMins(X[rows, cols])" = colMins(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowMins_X_S" = rowMins(X_S, na.rm = FALSE), "rowMins(X, cols, rows)" = rowMins(X, rows = cols, cols = rows, na.rm = FALSE), "rowMins(X[cols, rows])" = rowMins(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/WORDLIST0000644000176200001440000000451514436403546014762 0ustar liggesusersASAN Aalto Abele Ahlmann Alls Anys AppVeyor Benchmarking Bengtsson Biobase Biostatistics CMD CNRS Centre Constantin Cormen Corrada Cummaxs Cummins Cumprods Cumsums Dongcan EMBL El Eltze Evry Ghaoui HenrikBengtsson Hervé Hmisc Hoare's ISNA ISNAN IqrDiffs Jaffee Jiang JxN KU Kenkel Koenker Korpela Kx KxJ KxM KxN LSE Langfelder Leiserson Leuven LogSumExps Lund MATRIXSTATS Maxs Mikko Mins MxJ NaN Nakayama Neumann Neuvial Nx NxJ NxK NxM OSX OrderStats Pagès Pre Quicksort Rdoc Rivest Roel Rscript Rtools SPARC Sds UBsan UC Verbelen VignetteAuthor VignetteEngine VignetteIndexEntry VignetteKeyword VignetteTangle WeightedMeans WeightedMedians Xcode Xu al allValue allocArray allocMatrix allocVector ansp anyMissing anyNA anyValue arg benchmarked benchmarking binCounts binMeans colAlls colAnyMissings colAnyNAs colAnys colAvgsPerColSet colAvgsPerRowSet colCollapse colCounts colCummaxs colCummins colCumprods colCumsums colDiffs colIQRDiffs colIQRs colLogSumExps colMadDiffs colMads colMaxs colMeans colMedians colMins colNnn colOrderStats colProds colQuantiles colRanges colRanks colSdDiffs colSds colSums colTabulates colVarDiffs colVars colWeightedMads colWeightedMeans colWeightedMedians colWeightedSds colWeightedVars cran crfcns cummax cummin cumprod cumsum deprecatated dest df env envir et expSumLog exponentials fabs fcns gcc getNamespace getUrl getVersion github grepl gsub https idxs indexByRow iqr iqrDiff isNeg iter kable kk knitr loadMethod logSumExp logsumexp lx macOS madDiff matrixStats meanOver memcall memtests methodsS na nan ncol nrow ns oo pkgName pre preserveShape probs psort rPsort rowAlls rowAnyMissings rowAnyNAs rowAnys rowAvgsPerColSet rowAvgsPerRowSet rowCollapse rowCounts rowCummaxs rowCummins rowCumprods rowCumsums rowDiffs rowIQRDiffs rowIQRs rowLogSumExp rowLogSumExps rowMadDiffs rowMads rowMaxs rowMeans rowMedians rowMins rowNnn rowOrderStats rowProds rowQuantiles rowRanges rowRanks rowSdDiffs rowSds rowSums rowTabulates rowVarDiffs rowVars rowWeightedMads rowWeightedMeans rowWeightedMedians rowWeightedSds rowWeightedVars roxygen rsp sapply sd sdDiff setdiff signTabulate sprintf src stopifnot strsplit sumOver tapply tbl th tieAvg tx typedef typeof underflowing unlist useDynamicSymbols useNames valgrind varDiff vfcns von weighedVar weightedMad weightedMean weightedMedian weightedSd weightedVar wtd xK xN xlen md CentOS RHEL getOption