matrixStats/0000755000176200001440000000000014121073612012572 5ustar liggesusersmatrixStats/NAMESPACE0000644000176200001440000000412314120166003014005 0ustar liggesusers# Generated by roxygen2: do not edit by hand export(allValue) export(allocArray) export(allocMatrix) export(allocVector) export(anyMissing) export(anyValue) export(binCounts) export(binMeans) export(colAlls) export(colAnyMissings) export(colAnyNAs) export(colAnys) export(colAvgsPerRowSet) export(colCollapse) export(colCounts) export(colCummaxs) export(colCummins) export(colCumprods) export(colCumsums) export(colDiffs) export(colIQRDiffs) export(colIQRs) export(colLogSumExps) export(colMadDiffs) export(colMads) export(colMaxs) export(colMeans2) export(colMedians) export(colMins) export(colOrderStats) export(colProds) export(colQuantiles) export(colRanges) export(colRanks) export(colSdDiffs) export(colSds) export(colSums2) export(colTabulates) export(colVarDiffs) export(colVars) export(colWeightedMads) export(colWeightedMeans) export(colWeightedMedians) export(colWeightedSds) export(colWeightedVars) export(count) export(diff2) export(indexByRow) export(iqr) export(iqrDiff) export(logSumExp) export(madDiff) export(mean2) export(product) export(rowAlls) export(rowAnyMissings) export(rowAnyNAs) export(rowAnys) export(rowAvgsPerColSet) export(rowCollapse) export(rowCounts) export(rowCummaxs) export(rowCummins) export(rowCumprods) export(rowCumsums) export(rowDiffs) export(rowIQRDiffs) export(rowIQRs) export(rowLogSumExps) export(rowMadDiffs) export(rowMads) export(rowMaxs) export(rowMeans2) export(rowMedians) export(rowMins) export(rowOrderStats) export(rowProds) export(rowQuantiles) export(rowRanges) export(rowRanks) export(rowSdDiffs) export(rowSds) export(rowSums2) export(rowTabulates) export(rowVarDiffs) export(rowVars) export(rowWeightedMads) export(rowWeightedMeans) export(rowWeightedMedians) export(rowWeightedSds) export(rowWeightedVars) export(sdDiff) export(signTabulate) export(sum2) export(t_tx_OP_y) export(validateIndices) export(varDiff) export(weightedMad) export(weightedMean) export(weightedMedian) export(weightedSd) export(weightedVar) export(x_OP_y) importFrom(stats,mad) importFrom(stats,median) importFrom(stats,quantile) useDynLib("matrixStats", .registration = TRUE, .fixes = "C_") matrixStats/man/0000755000176200001440000000000014111740760013351 5ustar liggesusersmatrixStats/man/rowCollapse.Rd0000644000176200001440000000433514107236216016140 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowCollapse.R \name{rowCollapse} \alias{rowCollapse} \alias{colCollapse} \title{Extracts one cell per row (column) from a matrix} \usage{ rowCollapse(x, idxs, rows = NULL, dim. = dim(x), ..., useNames = NA) colCollapse(x, idxs, cols = NULL, dim. = dim(x), ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{idxs}{An index \code{\link[base]{vector}} of (maximum) length N (K) specifying the columns (rows) to be extracted.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} } \value{ Returns a \code{\link[base]{vector}} of length N (K). } \description{ Extracts one cell per row (column) from a matrix. The implementation is optimized for memory and speed. } \examples{ x <- matrix(1:27, ncol = 3) y <- rowCollapse(x, 1) stopifnot(identical(y, x[, 1])) y <- rowCollapse(x, 2) stopifnot(identical(y, x[, 2])) y <- rowCollapse(x, c(1, 1, 1, 1, 1, 3, 3, 3, 3)) stopifnot(identical(y, c(x[1:5, 1], x[6:9, 3]))) y <- rowCollapse(x, 1:3) print(y) y_truth <- c(x[1, 1], x[2, 2], x[3, 3], x[4, 1], x[5, 2], x[6, 3], x[7, 1], x[8, 2], x[9, 3]) stopifnot(identical(y, y_truth)) } \seealso{ \emph{Matrix indexing} to index elements in matrices and arrays, cf. \code{\link[base]{[}}(). } \author{ Henrik Bengtsson } \keyword{utilities} matrixStats/man/rowRanges.Rd0000644000176200001440000000473614107236216015622 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowRanges.R \name{rowRanges} \alias{rowRanges} \alias{rowMins} \alias{rowMaxs} \alias{colRanges} \alias{colMins} \alias{colMaxs} \title{Gets the range of values in each row (column) of a matrix} \usage{ rowRanges(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) rowMins(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) rowMaxs(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) colRanges(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) colMins(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) colMaxs(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ \code{rowRanges()} (\code{colRanges()}) returns a \code{\link[base]{numeric}} Nx2 (Kx2) \code{\link[base]{matrix}}, where N (K) is the number of rows (columns) for which the ranges are calculated. \code{rowMins()/rowMaxs()} (\code{colMins()/colMaxs()}) returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Gets the range of values in each row (column) of a matrix. } \seealso{ \code{\link{rowOrderStats}}() and \code{\link[base]{pmin.int}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowMeans2.Rd0000644000176200001440000000352614107236216015524 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowMeans2.R \name{rowMeans2} \alias{rowMeans2} \alias{colMeans2} \title{Calculates the mean for each row (column) in a matrix} \usage{ rowMeans2(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) colMeans2(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the mean for each row (column) in a matrix. } \details{ The implementation of \code{rowMeans2()} and \code{colMeans2()} is optimized for both speed and memory. } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowOrderStats.Rd0000644000176200001440000000463114107236216016467 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowOrderStats.R \name{rowOrderStats} \alias{rowOrderStats} \alias{colOrderStats} \title{Gets an order statistic for each row (column) in a matrix} \usage{ rowOrderStats(x, rows = NULL, cols = NULL, which, dim. = dim(x), ..., useNames = NA) colOrderStats(x, rows = NULL, cols = NULL, which, dim. = dim(x), ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{which}{An \code{\link[base]{integer}} index in [1,K] ([1,N]) indicating which order statistic to be returned.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Gets an order statistic for each row (column) in a matrix. } \details{ The implementation of \code{rowOrderStats()} is optimized for both speed and memory. To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there is a unique implementation for \code{\link[base]{integer}} matrices. } \section{Missing values}{ This method does \emph{not} handle missing values, that is, the result corresponds to having \code{na.rm = FALSE} (if such an argument would be available). } \seealso{ See \code{rowMeans()} in \code{\link[base]{colSums}}(). } \author{ The native implementation of \code{rowOrderStats()} was adopted by Henrik Bengtsson from Robert Gentleman's \code{rowQ()} in the \pkg{Biobase} package. } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/signTabulate.Rd0000644000176200001440000000164014053334714016266 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/signTabulate.R \name{signTabulate} \alias{signTabulate} \title{Calculates the number of negative, zero, positive and missing values} \usage{ signTabulate(x, idxs = NULL, ...) } \arguments{ \item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{name}}d \code{\link[base]{numeric}} \code{\link[base]{vector}}. } \description{ Calculates the number of negative, zero, positive and missing values in a \code{\link[base]{numeric}} vector. For \code{\link[base]{double}} vectors, the number of negative and positive infinite values are also counted. } \seealso{ \code{\link[base]{sign}}(). } \author{ Henrik Bengtsson } \keyword{internal} matrixStats/man/weightedMean.Rd0000644000176200001440000000544614055352045016254 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/weightedMean.R \name{weightedMean} \alias{weightedMean} \title{Weighted Arithmetic Mean} \usage{ weightedMean(x, w = NULL, idxs = NULL, na.rm = FALSE, refine = FALSE, ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values. If a missing-value weight exists, the result is always a missing value.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{refine}{If \code{\link[base:logical]{TRUE}} and \code{x} is \code{\link[base]{numeric}}, then extra effort is used to calculate the average with greater numerical precision, otherwise not.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. If \code{x} is of zero length, then \code{NaN} is returned, which is consistent with \code{\link[base]{mean}}(). } \description{ Computes the weighted sample mean of a numeric vector. } \section{Missing values}{ This function handles missing values consistently with \code{\link[stats]{weighted.mean}}. More precisely, if \code{na.rm = FALSE}, then any missing values in either \code{x} or \code{w} will give result \code{NA_real_}. If \code{na.rm = TRUE}, then all \code{(x, w)} data points for which \code{x} is missing are skipped. Note that if both \code{x} and \code{w} are missing for a data points, then it is also skipped (by the same rule). However, if only \code{w} is missing, then the final results will always be \code{NA_real_} regardless of \code{na.rm}. } \examples{ x <- 1:10 n <- length(x) w <- rep(1, times = n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) # Pull the mean towards zero w[1] <- 5 m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) # Put even more weight on the zero w[1] <- 8.5 m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) # All weight on the first value w[1] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) # All weight on the last value w[1] <- 1 w[n] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) # All weights set to zero w <- rep(0, times = n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1, m0)) } \seealso{ \code{\link[base]{mean}}() and \code{\link[stats]{weighted.mean}}. } \author{ Henrik Bengtsson } \keyword{robust} \keyword{univar} matrixStats/man/anyMissing.Rd0000644000176200001440000000445214107236216015767 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/anyMissing.R \name{anyMissing} \alias{anyMissing} \alias{colAnyMissings} \alias{rowAnyMissings} \alias{colAnyNAs} \alias{rowAnyNAs} \title{Checks if there are any missing values in an object or not} \usage{ anyMissing(x, idxs = NULL, ...) colAnyMissings(x, rows = NULL, cols = NULL, ..., useNames = NA) rowAnyMissings(x, rows = NULL, cols = NULL, ..., useNames = NA) colAnyNAs(x, rows = NULL, cols = NULL, ..., useNames = NA) rowAnyNAs(x, rows = NULL, cols = NULL, ..., useNames = NA) } \arguments{ \item{x}{A \code{\link[base]{vector}}, a \code{\link[base]{list}}, a \code{\link[base]{matrix}}, a \code{\link[base]{data.frame}}, or \code{\link[base]{NULL}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{...}{Not used.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns \code{\link[base:logical]{TRUE}} if a missing value was detected, otherwise \code{\link[base:logical]{FALSE}}. } \description{ Checks if there are any missing values in an object or not. \emph{Please use \code{base::anyNA()} instead of \code{anyMissing()}, \code{colAnyNAs()} instead of \code{colAnyMissings()}, and \code{rowAnyNAs()} instead of \code{rowAnyMissings()}.} } \details{ The implementation of this method is optimized for both speed and memory. The method will return \code{\link[base:logical]{TRUE}} as soon as a missing value is detected. } \examples{ x <- rnorm(n = 1000) x[seq(300, length(x), by = 100)] <- NA stopifnot(anyMissing(x) == any(is.na(x))) } \seealso{ Starting with R v3.1.0, there is \code{anyNA()} in the \pkg{base}, which provides the same functionality as \code{anyMissing()}. } \author{ Henrik Bengtsson } \keyword{iteration} \keyword{logic} matrixStats/man/rowAvgsPerColSet.Rd0000644000176200001440000001044314055352045017055 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowAvgsPerColSet.R \name{rowAvgsPerColSet} \alias{rowAvgsPerColSet} \alias{colAvgsPerRowSet} \title{Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows)} \usage{ rowAvgsPerColSet(X, W = NULL, rows = NULL, S, FUN = rowMeans, ..., na.rm = NA, tFUN = FALSE) colAvgsPerRowSet(X, W = NULL, cols = NULL, S, FUN = colMeans, ..., na.rm = NA, tFUN = FALSE) } \arguments{ \item{X}{A \code{\link[base]{numeric}} NxM \code{\link[base]{matrix}}.} \item{W}{An optional \code{\link[base]{numeric}} NxM \code{\link[base]{matrix}} of weights.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{S}{An \code{\link[base]{integer}} KxJ \code{\link[base]{matrix}} specifying the J subsets. Each column holds K column (row) indices for the corresponding subset.} \item{FUN}{The row-by-row (column-by-column) \code{\link[base]{function}} used to average over each subset of \code{X}. This function must accept a \code{\link[base]{numeric}} NxK (KxM) \code{\link[base]{matrix}} and the \code{\link[base]{logical}} argument \code{na.rm}, and return a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (M).} \item{...}{Additional arguments passed to then \code{FUN} \code{\link[base]{function}}.} \item{na.rm}{(logical) Argument passed to \code{FUN()} as \code{na.rm = na.rm}. If \code{\link[base:logical]{NA}} (default), then \code{na.rm = TRUE} is used if \code{X} or \code{S} holds missing values, otherwise \code{na.rm = FALSE}.} \item{tFUN}{If \code{\link[base:logical]{TRUE}}, the NxK (KxM) \code{\link[base]{matrix}} passed to \code{FUN()} is transposed first.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} } \value{ Returns a \code{\link[base]{numeric}} JxN (MxJ) \code{\link[base]{matrix}}, where row names equal \code{rownames(X)} (\code{colnames(S)}) and column names \code{colnames(S)} (\code{colnames(X)}). } \description{ Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows). Each subset is averaged independently of the others. } \details{ If argument \code{S} is a single column vector with indices \code{1:N}, then \code{rowAvgsPerColSet(X, S = S, FUN = rowMeans)} gives the same result as \code{rowMeans(X)}. Analogously, for \code{colAvgsPerRowSet()}. } \examples{ X <- matrix(rnorm(20 * 6), nrow = 20, ncol = 6) rownames(X) <- LETTERS[1:nrow(X)] colnames(X) <- letters[1:ncol(X)] print(X) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply rowMeans() for 3 sets of 2 columns # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 3 S <- matrix(1:ncol(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s\%d", 1:nbr_of_sets) print(S) Z <- rowAvgsPerColSet(X, S = S) print(Z) # Validation Z0 <- cbind(s1 = rowMeans(X[, 1:2]), s2 = rowMeans(X[, 3:4]), s3 = rowMeans(X[, 5:6])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply colMeans() for 5 sets of 4 rows # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 5 S <- matrix(1:nrow(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s\%d", 1:nbr_of_sets) print(S) Z <- colAvgsPerRowSet(X, S = S) print(Z) # Validation Z0 <- rbind(s1 = colMeans(X[ 1:4, ]), s2 = colMeans(X[ 5:8, ]), s3 = colMeans(X[ 9:12, ]), s4 = colMeans(X[13:16, ]), s5 = colMeans(X[17:20, ])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # When there is only one "complete" set # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 1 S <- matrix(1:ncol(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s\%d", 1:nbr_of_sets) print(S) Z <- rowAvgsPerColSet(X, S = S, FUN = rowMeans) print(Z) Z0 <- rowMeans(X) stopifnot(identical(drop(Z), Z0)) nbr_of_sets <- 1 S <- matrix(1:nrow(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s\%d", 1:nbr_of_sets) print(S) Z <- colAvgsPerRowSet(X, S = S, FUN = colMeans) print(Z) Z0 <- colMeans(X) stopifnot(identical(drop(Z), Z0)) } \author{ Henrik Bengtsson } \keyword{internal} \keyword{utilities} matrixStats/man/rowProds.Rd0000644000176200001440000000504214107236216015461 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/product.R, R/rowProds.R \name{product} \alias{product} \alias{rowProds} \alias{colProds} \title{Calculates the product for each row (column) in a matrix} \usage{ product(x, idxs = NULL, na.rm = FALSE, ...) rowProds(x, rows = NULL, cols = NULL, na.rm = FALSE, method = c("direct", "expSumLog"), ..., useNames = NA) colProds(x, rows = NULL, cols = NULL, na.rm = FALSE, method = c("direct", "expSumLog"), ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{...}{Not used.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{method}{A \code{\link[base]{character}} string specifying how each product is calculated.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the product for each row (column) in a matrix. } \details{ If \code{method = "expSumLog"}, then then \code{\link{product}}() function is used, which calculates the product via the logarithmic transform (treating negative values specially). This improves the precision and lowers the risk for numeric overflow. If \code{method = "direct"}, the direct product is calculated via the \code{\link[base]{prod}}() function. } \section{Missing values}{ Note, if \code{method = "expSumLog"}, \code{na.rm = FALSE}, and \code{x} contains missing values (\code{\link[base]{NA}} or \code{\link[base:is.finite]{NaN}}), then the calculated value is also missing value. Note that it depends on platform whether \code{\link[base:is.finite]{NaN}} or \code{\link[base]{NA}} is returned when an \code{\link[base:is.finite]{NaN}} exists, cf. \code{\link[base]{is.nan}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowDiffs.Rd0000644000176200001440000000375414107236216015435 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowDiffs.R \name{rowDiffs} \alias{rowDiffs} \alias{colDiffs} \title{Calculates difference for each row (column) in a matrix} \usage{ rowDiffs(x, rows = NULL, cols = NULL, lag = 1L, differences = 1L, dim. = dim(x), ..., useNames = NA) colDiffs(x, rows = NULL, cols = NULL, lag = 1L, differences = 1L, dim. = dim(x), ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{lag}{An \code{\link[base]{integer}} specifying the lag.} \item{differences}{An \code{\link[base]{integer}} specifying the order of difference.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} Nx(K-1) or (N-1)xK \code{\link[base]{matrix}}. } \description{ Calculates difference for each row (column) in a matrix. } \examples{ x <- matrix(1:27, ncol = 3) d1 <- rowDiffs(x) print(d1) d2 <- t(colDiffs(t(x))) stopifnot(all.equal(d2, d1)) } \seealso{ See also \code{\link{diff2}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowLogSumExps.Rd0000644000176200001440000000337614107236216016450 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowLogSumExps.R \name{rowLogSumExps} \alias{rowLogSumExps} \alias{colLogSumExps} \title{Accurately computes the logarithm of the sum of exponentials across rows or columns} \usage{ rowLogSumExps(lx, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(lx), ..., useNames = NA) colLogSumExps(lx, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(lx), ..., useNames = NA) } \arguments{ \item{lx}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}. Typically \code{lx} are \eqn{log(x)} values.} \item{rows, cols}{A \code{\link[base]{vector}} indicating subset of rows (and/or columns) to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, any missing values are ignored, otherwise not.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Accurately computes the logarithm of the sum of exponentials across rows or columns. } \section{Benchmarking}{ These methods are implemented in native code and have been optimized for speed and memory. } \seealso{ To calculate the same on vectors, \code{\link{logSumExp}}(). } \author{ Native implementation by Henrik Bengtsson. Original R code by Nakayama ??? (Japan). } \keyword{array} matrixStats/man/rowCumsums.Rd0000644000176200001440000000543214107236216016031 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowCumsums.R \name{rowCumsums} \alias{rowCumsums} \alias{colCumsums} \alias{rowCumprods} \alias{colCumprods} \alias{rowCummins} \alias{colCummins} \alias{rowCummaxs} \alias{colCummaxs} \title{Cumulative sums, products, minima and maxima for each row (column) in a matrix} \usage{ rowCumsums(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) colCumsums(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) rowCumprods(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) colCumprods(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) rowCummins(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) colCummins(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) rowCummaxs(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) colCummaxs(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}} of the same mode as \code{x}, except when \code{x} is of mode \code{\link[base]{logical}}, then the return type is \code{\link[base]{integer}}. } \description{ Cumulative sums, products, minima and maxima for each row (column) in a matrix. } \examples{ x <- matrix(1:12, nrow = 4, ncol = 3) print(x) yr <- rowCumsums(x) print(yr) yc <- colCumsums(x) print(yc) yr <- rowCumprods(x) print(yr) yc <- colCumprods(x) print(yc) yr <- rowCummaxs(x) print(yr) yc <- colCummaxs(x) print(yc) yr <- rowCummins(x) print(yr) yc <- colCummins(x) print(yc) } \seealso{ See \code{\link[base]{cumsum}}(), \code{\link[base]{cumprod}}(), \code{\link[base]{cummin}}(), and \code{\link[base]{cummax}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{univar} matrixStats/man/rowSums2.Rd0000644000176200001440000000351414107236216015405 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowSums2.R \name{rowSums2} \alias{rowSums2} \alias{colSums2} \title{Calculates the sum for each row (column) in a matrix} \usage{ rowSums2(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) colSums2(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the sum for each row (column) in a matrix. } \details{ The implementation of \code{rowSums2()} and \code{colSums2()} is optimized for both speed and memory. } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowWeightedMeans.Rd0000644000176200001440000000525214107236216017121 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowWeightedMeans.R \name{rowWeightedMeans} \alias{rowWeightedMeans} \alias{colWeightedMeans} \title{Calculates the weighted means for each row (column) in a matrix} \usage{ rowWeightedMeans(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) colWeightedMeans(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{w}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length K (N).} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the weighted means for each row (column) in a matrix. } \details{ The implementations of these methods are optimized for both speed and memory. If no weights are given, the corresponding \code{rowMeans()}/\code{colMeans()} is used. } \examples{ x <- matrix(rnorm(20), nrow = 5, ncol = 4) print(x) # Non-weighted row averages mu_0 <- rowMeans(x) mu <- rowWeightedMeans(x) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (uniform weights) w <- rep(2.5, times = ncol(x)) mu <- rowWeightedMeans(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (excluding some columns) w <- c(1, 1, 0, 1) mu_0 <- rowMeans(x[, (w == 1), drop = FALSE]) mu <- rowWeightedMeans(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (excluding some columns) w <- c(0, 1, 0, 0) mu_0 <- rowMeans(x[, (w == 1), drop = FALSE]) mu <- rowWeightedMeans(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted averages by rows and columns w <- 1:4 mu_1 <- rowWeightedMeans(x, w = w) mu_2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(mu_2, mu_1)) } \seealso{ See \code{rowMeans()} and \code{colMeans()} in \code{\link[base]{colSums}}() for non-weighted means. See also \code{\link[stats]{weighted.mean}}. } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowRanks.Rd0000644000176200001440000001234414107236216015453 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowRanks.R \name{rowRanks} \alias{rowRanks} \alias{colRanks} \title{Gets the rank of the elements in each row (column) of a matrix} \usage{ rowRanks(x, rows = NULL, cols = NULL, ties.method = c("max", "average", "first", "last", "random", "max", "min", "dense"), dim. = dim(x), ..., useNames = NA) colRanks(x, rows = NULL, cols = NULL, ties.method = c("max", "average", "first", "last", "random", "max", "min", "dense"), dim. = dim(x), preserveShape = FALSE, ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{ties.method}{A \code{\link[base]{character}} string specifying how ties are treated. For details, see below.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} \item{preserveShape}{A \code{\link[base]{logical}} specifying whether the \code{\link[base]{matrix}} returned should preserve the input shape of \code{x}, or not.} } \value{ A \code{\link[base]{matrix}} of type \code{\link[base]{integer}} is returned, unless \code{ties.method = "average"} when it is of type \code{\link[base]{numeric}}. The \code{rowRanks()} function always returns an NxK \code{\link[base]{matrix}}, where N (K) is the number of rows (columns) whose ranks are calculated. The \code{colRanks()} function returns an NxK \code{\link[base]{matrix}}, if \code{preserveShape = TRUE}, otherwise a KxN \code{\link[base]{matrix}}. Any \code{\link[base]{names}} of \code{x} are ignored and absent in the result. } \description{ Gets the rank of the elements in each row (column) of a matrix. } \details{ These functions rank values and treats missing values the same way as \code{\link[base]{rank}}(). For equal values ("ties"), argument \code{ties.method} determines how these are ranked among each other. More precisely, for the following values of \code{ties.method}, each index set of ties consists of: \itemize{ \item{\code{"first"} - increasing values that are all unique} \item{\code{"last"} - decreasing values that are all unique} \item{\code{"min"} - identical values equaling the minimum of their original ranks} \item{\code{"max"} - identical values equaling the maximum of their original ranks} \item{\code{"average"} - identical values that equal the sample mean of their original ranks. Because the average is calculated, the returned ranks may be non-integer values} \item{\code{"random"} - randomly shuffled values of their original ranks.} \item{\code{"dense"} - increasing values that are all unique and, contrary to \code{"first"}, never contain any gaps} } For more information on \code{ties.method = "dense"}, see \code{frank()} of the \pkg{data.table} package. For more information on the other alternatives, see \code{\link[base]{rank}}(). Note that, due to different randomization strategies, the shuffling order produced by these functions when using \code{ties.method = "random"} does not reproduce that of \code{\link[base]{rank}}(). \emph{WARNING: For backward-compatibility reasons, the default is \code{ties.method = "max"}, which differs from \code{\link[base]{rank}}() which uses \code{ties.method = "average"} by default. Since we plan to change the default behavior in a future version, we recommend to explicitly specify the intended value of argument \code{ties.method}.} } \section{Missing values}{ Missing values are ranked as \code{NA_integer_}, as with \code{na.last = "keep"} in the \code{\link[base]{rank}}() function. } \section{Performance}{ The implementation is optimized for both speed and memory. To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there is a unique implementation for \code{\link[base]{integer}} matrices. Furthermore, it is more memory efficient to do \code{colRanks(x, preserveShape = TRUE)} than \code{t(colRanks(x, preserveShape = FALSE))}. } \seealso{ For developers, see also Section Utility functions' in 'Writing R Extensions manual', particularly the native functions \code{R_qsort_I()} and \code{R_qsort_int_I()}. } \author{ Hector Corrada Bravo and Harris Jaffee. Peter Langfelder for adding 'ties.method' support. Brian Montgomery for adding more 'ties.method's. Henrik Bengtsson adapted the original native implementation of \code{rowRanks()} from Robert Gentleman's \code{rowQ()} in the \pkg{Biobase} package. } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/varDiff.Rd0000644000176200001440000001024414107236216015223 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/varDiff.R \name{varDiff} \alias{varDiff} \alias{sdDiff} \alias{madDiff} \alias{iqrDiff} \alias{rowVarDiffs} \alias{colVarDiffs} \alias{rowSdDiffs} \alias{colSdDiffs} \alias{rowMadDiffs} \alias{colMadDiffs} \alias{rowIQRDiffs} \alias{colIQRDiffs} \title{Estimation of scale based on sequential-order differences} \usage{ varDiff(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) sdDiff(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) madDiff(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, constant = 1.4826, ...) iqrDiff(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) rowVarDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) colVarDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) rowSdDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) colSdDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) rowMadDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) colMadDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) rowIQRDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) colIQRDiffs(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) } \arguments{ \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N or a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{diff}{The positional distance of elements for which the difference should be calculated.} \item{trim}{A \code{\link[base]{double}} in [0,1/2] specifying the fraction of observations to be trimmed from each end of (sorted) \code{x} before estimation.} \item{...}{Not used.} \item{constant}{A scale factor adjusting for asymptotically normal consistency.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length 1, length N, or length K. } \description{ Estimation of scale based on sequential-order differences, corresponding to the scale estimates provided by \code{\link[stats]{var}}, \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and \code{\link[stats]{IQR}}. } \details{ Note that n-order difference MAD estimates, just like the ordinary MAD estimate by \code{\link[stats]{mad}}, apply a correction factor such that the estimates are consistent with the standard deviation under Gaussian distributions. The interquartile range (IQR) estimates does \emph{not} apply such a correction factor. If asymptotically normal consistency is wanted, the correction factor for IQR estimate is \code{1 / (2 * qnorm(3/4))}, which is half of that used for MAD estimates, which is \code{1 / qnorm(3/4)}. This correction factor needs to be applied manually, i.e. there is no \code{constant} argument for the IQR functions. } \references{ [1] J. von Neumann et al., \emph{The mean square successive difference}. Annals of Mathematical Statistics, 1941, 12, 153-162.\cr } \seealso{ For the corresponding non-differentiated estimates, see \code{\link[stats]{var}}, \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and \code{\link[stats]{IQR}}. Internally, \code{\link{diff2}}() is used which is a faster version of \code{\link[base]{diff}}(). } \author{ Henrik Bengtsson } \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowTabulates.Rd0000644000176200001440000000447214107236216016324 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowTabulates.R \name{rowTabulates} \alias{rowTabulates} \alias{colTabulates} \title{Tabulates the values in a matrix by row (column).} \usage{ rowTabulates(x, rows = NULL, cols = NULL, values = NULL, ..., useNames = NA) colTabulates(x, rows = NULL, cols = NULL, values = NULL, ..., useNames = NA) } \arguments{ \item{x}{An \code{\link[base]{integer}}, a \code{\link[base]{logical}}, or a \code{\link[base]{raw}} NxK \code{\link[base]{matrix}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{values}{An \code{\link[base]{vector}} of J values of count. If \code{\link[base]{NULL}}, all (unique) values are counted.} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a NxJ (KxJ) \code{\link[base]{matrix}} where N (K) is the number of row (column) \code{\link[base]{vector}}s tabulated and J is the number of values counted. } \description{ Tabulates the values in a matrix by row (column). } \details{ An alternative to these functions, is to use \code{table(x, row(x))} and \code{table(x, col(x))}, with the exception that the latter do not support the \code{\link[base]{raw}} data type. When there are no missing values in \code{x}, we have that \code{all(rowTabulates(x) == t(table(x, row(x))))} and \code{all(colTabulates(x) == t(table(x, col(x))))}. When there are missing values, we have that \code{all(rowTabulates(x) == t(table(x, row(x), useNA = "always")[, seq_len(nrow(x))]))} and \code{all(colTabulates(x) == t(table(x, col(x), useNA = "always")[, seq_len(ncol(x))]))}. } \examples{ x <- matrix(1:5, nrow = 10, ncol = 5) print(x) print(rowTabulates(x)) print(colTabulates(x)) # Count only certain values print(rowTabulates(x, values = 1:3)) y <- as.raw(x) dim(y) <- dim(x) print(y) print(rowTabulates(y)) print(colTabulates(y)) } \author{ Henrik Bengtsson } \keyword{utilities} matrixStats/man/sum2.Rd0000644000176200001440000000636614055352045014543 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sum2.R \name{sum2} \alias{sum2} \title{Fast sum over subset of vector elements} \usage{ sum2(x, idxs = NULL, na.rm = FALSE, mode = typeof(x), ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{mode}{A \code{\link[base]{character}} string specifying the data type of the return value. Default is to use the same mode as argument \code{x}, unless it is logical when it defaults to \code{"integer"}.} \item{...}{Not used.} } \value{ Returns a scalar of the data type specified by argument \code{mode}. If \code{mode = "integer"}, then integer overflow occurs if the \emph{sum} is outside the range of defined integer values. Note that the intermediate sum (\code{sum(x[1:n])}) is internally represented as a floating point value and will therefore never be outside of the range. If \code{mode = "integer"} and \code{typeof{x} == "double"}, then a warning is generated. } \description{ Computes the sum of all or a subset of values. } \details{ \code{sum2(x, idxs)} gives equivalent results as \code{sum(x[idxs])}, but is faster and more memory efficient since it avoids the actual subsetting which requires copying of elements and garbage collection thereof. Furthermore, \code{sum2(x, mode = "double")} is equivalent to \code{sum(as.numeric(x))} and may therefore be used to avoid integer overflow(*), but at the same time is much more memory efficient that the regular \code{sum()} function when \code{x} is an \code{\link[base]{integer}} vector. (*) \emph{In R (>= 3.5.0), \code{sum(x)} will no longer integer overflow and return \code{NA_integer_}. Instead it will return the correct sum in form of a double value.} } \examples{ x <- 1:10 n <- length(x) idxs <- seq(from = 1, to = n, by = 2) s1 <- sum(x[idxs]) # 25 s2 <- sum2(x, idxs = idxs) # 25 stopifnot(identical(s1, s2)) idxs <- seq(from = n, to = 1, by = -2) s1 <- sum(x[idxs]) # 25 s2 <- sum2(x, idxs = idxs) # 25 stopifnot(identical(s1, s2)) s1 <- sum(x) # 55 s2 <- sum2(x) # 55 stopifnot(identical(s1, s2)) # Total gives integer overflow x <- c(.Machine$integer.max, 1L, -.Machine$integer.max) s1 <- sum(x[1:2]) # NA_integer_ in R (< 3.5.0) s2 <- sum2(x[1:2]) # NA_integer_ # Total gives integer overflow (coerce to numeric) s1 <- sum(as.numeric(x[1:2])) # 2147483648 s2 <- sum2(as.numeric(x[1:2])) # 2147483648 s3 <- sum2(x[1:2], mode = "double") # 2147483648 w/out copy stopifnot(identical(s1, s2)) stopifnot(identical(s1, s3)) # Cumulative sum would give integer overflow but not the total s1 <- sum(x) # 1L s2 <- sum2(x) # 1L stopifnot(identical(s1, s2)) } \seealso{ \code{\link[base]{sum}}(). To efficiently average over a subset, see \code{\link{mean2}}(). } \author{ Henrik Bengtsson } \keyword{internal} \keyword{univar} matrixStats/man/weightedVar.Rd0000644000176200001440000000615214107236216016116 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/weightedVar.R \name{weightedVar} \alias{weightedVar} \alias{weightedSd} \alias{rowWeightedVars} \alias{colWeightedVars} \alias{rowWeightedSds} \alias{colWeightedSds} \title{Weighted variance and weighted standard deviation} \usage{ weightedVar(x, w = NULL, idxs = NULL, na.rm = FALSE, center = NULL, ...) weightedSd(...) rowWeightedVars(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) colWeightedVars(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) rowWeightedSds(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) colWeightedSds(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) } \arguments{ \item{x}{\code{\link[base]{vector}} of type \code{\link[base]{integer}}, \code{\link[base]{numeric}}, or \code{\link[base]{logical}}.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{center}{Optional \code{\link[base]{numeric}} scalar specifying the center location of the data. If \code{\link[base]{NULL}}, it is estimated from data.} \item{...}{Not used.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \description{ Computes a weighted variance / standard deviation of a numeric vector or across rows or columns of a matrix. } \details{ The estimator used here is the same as the one used by the "unbiased" estimator of the \bold{Hmisc} package. More specifically, \code{weightedVar(x, w = w) == Hmisc::wtd.var(x, weights = w)}, } \section{Missing values}{ This function handles missing values consistently with \code{\link{weightedMean}}(). More precisely, if \code{na.rm = FALSE}, then any missing values in either \code{x} or \code{w} will give result \code{NA_real_}. If \code{na.rm = TRUE}, then all \code{(x, w)} data points for which \code{x} is missing are skipped. Note that if both \code{x} and \code{w} are missing for a data points, then it is also skipped (by the same rule). However, if only \code{w} is missing, then the final results will always be \code{NA_real_} regardless of \code{na.rm}. } \seealso{ For the non-weighted variance, see \code{\link[stats]{var}}. } \author{ Henrik Bengtsson } \keyword{robust} \keyword{univar} matrixStats/man/weightedMedian.Rd0000644000176200001440000001255414055352045016567 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/weightedMedian.R \name{weightedMedian} \alias{weightedMedian} \title{Weighted Median Value} \usage{ weightedMedian(x, w = NULL, idxs = NULL, na.rm = FALSE, interpolate = is.null(ties), ties = NULL, ...) } \arguments{ \item{x}{\code{\link[base]{vector}} of type \code{\link[base]{integer}}, \code{\link[base]{numeric}}, or \code{\link[base]{logical}}.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{a logical value indicating whether \code{\link[base]{NA}} values in \code{x} should be stripped before the computation proceeds, or not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s is done.} \item{interpolate}{If \code{\link[base:logical]{TRUE}}, linear interpolation is used to get a consistent estimate of the weighted median.} \item{ties}{If \code{interpolate == FALSE}, a character string specifying how to solve ties between two \code{x}'s that are satisfying the weighted median criteria. Note that at most two values can satisfy the criteria. When \code{ties} is \code{"min"} ("lower weighted median"), the smaller value of the two is returned and when it is \code{"max"} ("upper weighted median"), the larger value is returned. If \code{ties} is \code{"mean"}, the mean of the two values is returned. Finally, if \code{ties} is \code{"weighted"} (or \code{\link[base]{NULL}}) a weighted average of the two are returned, where the weights are weights of all values \code{x[i] <= x[k]} and \code{x[i] >= x[k]}, respectively.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. For the \code{n} elements \code{x = c(x[1], x[2], ..., x[n])} with positive weights \code{w = c(w[1], w[2], ..., w[n])} such that \code{sum(w) = S}, the \emph{weighted median} is defined as the element \code{x[k]} for which the total weight of all elements \code{x[i] < x[k]} is less or equal to \code{S/2} and for which the total weight of all elements \code{x[i] > x[k]} is less or equal to \code{S/2} (c.f. [1]). When using linear interpolation, the weighted mean of \code{x[k-1]} and \code{x[k]} with weights \code{S[k-1]} and \code{S[k]} corresponding to the cumulative weights of those two elements is used as an estimate. If \code{w} is missing then all elements of \code{x} are given the same positive weight. If all weights are zero, \code{\link[base:NA]{NA_real_}} is returned. If one or more weights are \code{Inf}, it is the same as these weights have the same weight and the others have zero. This makes things easier for cases where the weights are result of a division with zero. If there are missing values in \code{w} that are part of the calculation (after subsetting and dropping missing values in \code{x}), then the final result is always \code{NA} of the same type as \code{x}. The weighted median solves the following optimization problem: \deqn{\alpha^* = \arg_\alpha \min \sum_{i = 1}^{n} w_i |x_i-\alpha|} where \eqn{x = (x_1, x_2, \ldots, x_n)} are scalars and \eqn{w = (w_1, w_2, \ldots, w_n)} are the corresponding "weights" for each individual \eqn{x} value. } \description{ Computes a weighted median of a numeric vector. } \examples{ x <- 1:10 n <- length(x) m1 <- median(x) # 5.5 m2 <- weightedMedian(x) # 5.5 stopifnot(identical(m1, m2)) w <- rep(1, times = n) m1 <- weightedMedian(x, w) # 5.5 (default) m2 <- weightedMedian(x, ties = "weighted") # 5.5 (default) m3 <- weightedMedian(x, ties = "min") # 5 m4 <- weightedMedian(x, ties = "max") # 6 stopifnot(identical(m1, m2)) # Pull the median towards zero w[1] <- 5 m1 <- weightedMedian(x, w) # 3.5 y <- c(rep(0, times = w[1]), x[-1]) # Only possible for integer weights m2 <- median(y) # 3.5 stopifnot(identical(m1, m2)) # Put even more weight on the zero w[1] <- 8.5 weightedMedian(x, w) # 2 # All weight on the first value w[1] <- Inf weightedMedian(x, w) # 1 # All weight on the last value w[1] <- 1 w[n] <- Inf weightedMedian(x, w) # 10 # All weights set to zero w <- rep(0, times = n) weightedMedian(x, w) # NA # Simple benchmarking bench <- function(N = 1e5, K = 10) { x <- rnorm(N) gc() t <- c() t[1] <- system.time(for (k in 1:K) median(x))[3] t[2] <- system.time(for (k in 1:K) weightedMedian(x))[3] t <- t / t[1] names(t) <- c("median", "weightedMedian") t } print(bench(N = 5, K = 100)) print(bench(N = 50, K = 100)) print(bench(N = 200, K = 100)) print(bench(N = 1000, K = 100)) print(bench(N = 10e3, K = 20)) print(bench(N = 100e3, K = 20)) } \references{ [1] T.H. Cormen, C.E. Leiserson, R.L. Rivest, Introduction to Algorithms, The MIT Press, Massachusetts Institute of Technology, 1989. } \seealso{ \code{\link[stats]{median}}, \code{\link[base]{mean}}() and \code{\link{weightedMean}}(). } \author{ Henrik Bengtsson and Ola Hossjer, Centre for Mathematical Sciences, Lund University. Thanks to Roger Koenker, Econometrics, University of Illinois, for the initial ideas. } \keyword{robust} \keyword{univar} matrixStats/man/rowCounts.Rd0000644000176200001440000000637114107236216015653 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowCounts.R \name{rowCounts} \alias{rowCounts} \alias{colCounts} \alias{count} \title{Counts the number of occurrences of a specific value} \usage{ rowCounts(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) colCounts(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) count(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{value}{A value to search for.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} } \value{ \code{rowCounts()} (\code{colCounts()}) returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of length N (K). \code{count()} returns a scalar of type \code{\link[base]{integer}} if the count is less than 2^31-1 (= \code{.Machine$integer.max}) otherwise a scalar of type \code{\link[base]{double}}. } \description{ The row- and column-wise functions take either a matrix or a vector as input. If a vector, then argument \code{dim.} must be specified and fulfill \code{prod(dim.) == length(x)}. The result will be identical to the results obtained when passing \code{matrix(x, nrow = dim.[1L], ncol = dim.[2L])}, but avoids having to temporarily create/allocate a matrix, if only such is needed only for these calculations. } \examples{ x <- matrix(0:11, nrow = 4, ncol = 3) x[2:3, 2:3] <- 2:5 x[3, 3] <- NA_integer_ print(x) print(rowCounts(x, value = 2)) ## [1] 0 1 NA 0 print(colCounts(x, value = 2)) ## [1] 1 1 NA print(colCounts(x, value = NA_integer_)) ## [1] 0 0 1 print(rowCounts(x, value = 2, na.rm = TRUE)) ## [1] 0 1 1 0 print(colCounts(x, value = 2, na.rm = TRUE)) ## [1] 1 1 0 print(rowAnys(x, value = 2)) ## [1] FALSE TRUE TRUE FALSE print(rowAnys(x, value = NA_integer_)) ## [1] FALSE FALSE TRUE FALSE print(colAnys(x, value = 2)) ## [1] TRUE TRUE NA print(colAnys(x, value = 2, na.rm = TRUE)) ## [1] TRUE TRUE FALSE print(colAlls(x, value = 2)) ## [1] FALSE FALSE FALSE } \seealso{ rowAlls } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{logic} \keyword{univar} matrixStats/man/rowIQRs.Rd0000644000176200001440000000437614107236216015221 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowIQRs.R \name{rowIQRs} \alias{rowIQRs} \alias{colIQRs} \alias{iqr} \title{Estimates of the interquartile range for each row (column) in a matrix} \usage{ rowIQRs(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) colIQRs(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) iqr(x, idxs = NULL, na.rm = FALSE, ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{...}{Additional arguments passed to \code{\link{rowQuantiles}}() (\code{colQuantiles()}).} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Estimates of the interquartile range for each row (column) in a matrix. } \section{Missing values}{ Contrary to \code{\link[stats]{IQR}}, which gives an error if there are missing values and \code{na.rm = FALSE}, \code{iqr()} and its corresponding row and column-specific functions return \code{\link[base]{NA}}_real_. } \examples{ set.seed(1) x <- matrix(rnorm(50 * 40), nrow = 50, ncol = 40) str(x) # Row IQRs q <- rowIQRs(x) print(q) q0 <- apply(x, MARGIN = 1, FUN = IQR) stopifnot(all.equal(q0, q)) # Column IQRs q <- colIQRs(x) print(q) q0 <- apply(x, MARGIN = 2, FUN = IQR) stopifnot(all.equal(q0, q)) } \seealso{ See \code{\link[stats]{IQR}}. See \code{\link{rowSds}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowQuantiles.Rd0000644000176200001440000000475114111740760016344 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowQuantiles.R \name{rowQuantiles} \alias{rowQuantiles} \alias{colQuantiles} \title{Estimates quantiles for each row (column) in a matrix} \usage{ rowQuantiles(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, ..., useNames = NA, drop = TRUE) colQuantiles(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, ..., useNames = NA, drop = TRUE) } \arguments{ \item{x}{An \code{\link[base]{integer}}, \code{\link[base]{numeric}} or \code{\link[base]{logical}} NxK \code{\link[base]{matrix}} with N >= 0.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{probs}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of J probabilities in [0, 1].} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{type}{An \code{\link[base]{integer}} specify the type of estimator. See \code{\link[stats]{quantile}} for more details.} \item{...}{Additional arguments passed to \code{\link[stats]{quantile}}.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} \item{drop}{If TRUE, singleton dimensions in the result are dropped, otherwise not.} } \value{ Returns a NxJ (KxJ) \code{\link[base]{matrix}}, where N (K) is the number of rows (columns) for which the J quantiles are calculated. The return type is either integer or numeric depending on \code{type}. } \description{ Estimates quantiles for each row (column) in a matrix. } \examples{ set.seed(1) x <- matrix(rnorm(50 * 40), nrow = 50, ncol = 40) str(x) probs <- c(0.25, 0.5, 0.75) # Row quantiles q <- rowQuantiles(x, probs = probs) print(q) q_0 <- apply(x, MARGIN = 1, FUN = quantile, probs = probs) stopifnot(all.equal(q_0, t(q))) # Column IQRs q <- colQuantiles(x, probs = probs) print(q) q_0 <- apply(x, MARGIN = 2, FUN = quantile, probs = probs) stopifnot(all.equal(q_0, t(q))) } \seealso{ \code{\link[stats]{quantile}}. } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/matrixStats-package.Rd0000644000176200001440000000167313615621102017557 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/999.package.R \docType{package} \name{matrixStats-package} \alias{matrixStats-package} \alias{matrixStats} \title{Package matrixStats} \description{ High-performing functions operating on rows and columns of matrices, e.g. col / rowMedians(), col / rowRanks(), and col / rowSds(). Functions optimized per data type and for subsetted calculations such that both memory usage and processing time is minimized. There are also optimized vector-based methods, e.g. binMeans(), madDiff() and weightedMedian(). } \section{How to cite this package}{ Henrik Bengtsson (2017). matrixStats: Functions that Apply to Rows and Columns of Matrices (and to Vectors). R package version 0.52.2. https://github.com/HenrikBengtsson/matrixStats } \author{ Henrik Bengtsson, Hector Corrada Bravo, Robert Gentleman, Ola Hossjer, Harris Jaffee, Dongcan Jiang, Peter Langfelder } \keyword{package} matrixStats/man/indexByRow.Rd0000644000176200001440000000256214055352045015741 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/indexByRow.R \name{indexByRow} \alias{indexByRow} \title{Translates matrix indices by rows into indices by columns} \usage{ indexByRow(dim, idxs = NULL, ...) } \arguments{ \item{dim}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length two specifying the length of the "template" matrix.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{...}{Not used.} } \value{ Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of indices. } \description{ Translates matrix indices by rows into indices by columns. } \section{Known limitations}{ The current implementation does not support long-vector indices, because both input and output indices are of type integers. This means that the indices in argument \code{idxs} can only be in range [1,2^31-1]. Using a greater value will be coerced to \code{NA_integer_}. Moreover, returned indices can only be in the same range [1,2^31-1]. } \examples{ dim <- c(5, 4) X <- matrix(NA_integer_, nrow = dim[1], ncol = dim[2]) Y <- t(X) idxs <- seq_along(X) # Assign by columns X[idxs] <- idxs print(X) # Assign by rows Y[indexByRow(dim(Y), idxs)] <- idxs print(Y) stopifnot(X == t(Y)) } \author{ Henrik Bengtsson } \keyword{iteration} \keyword{logic} matrixStats/man/binMeans.Rd0000644000176200001440000000500014053334714015372 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/binMeans.R \name{binMeans} \alias{binMeans} \title{Fast mean calculations in non-overlapping bins} \usage{ binMeans(y, x, idxs = NULL, bx, na.rm = TRUE, count = TRUE, right = FALSE, ...) } \arguments{ \item{y}{A \code{\link[base]{numeric}} or \code{\link[base]{logical}} \code{\link[base]{vector}} of K values to calculate means on.} \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K positions for to be binned.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{bx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B + 1 ordered positions specifying the B > 0 bins \code{[bx[1], bx[2])}, \code{[bx[2], bx[3])}, ..., \code{[bx[B], bx[B + 1])}.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values in \code{y} are dropped before calculating the mean, otherwise not.} \item{count}{If \code{\link[base:logical]{TRUE}}, the number of data points in each bins is returned as attribute \code{count}, which is an \code{\link[base]{integer}} \code{\link[base]{vector}} of length B.} \item{right}{If \code{\link[base:logical]{TRUE}}, the bins are right-closed (left open), otherwise left-closed (right open).} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length B. } \description{ Computes the sample means in non-overlapping bins } \details{ \code{binMeans(x, bx, right = TRUE)} gives equivalent results as \code{rev(binMeans(-x, bx = sort(-bx), right = FALSE))}, but is faster. } \section{Missing and non-finite values}{ Data points where either of \code{y} and \code{x} is missing are dropped (and therefore are also not counted). Non-finite values in \code{y} are not allowed and gives an error. Missing values in \code{bx} are not allowed and gives an error. } \examples{ x <- 1:200 mu <- double(length(x)) mu[1:50] <- 5 mu[101:150] <- -5 y <- mu + rnorm(length(x)) # Binning bx <- c(0, 50, 100, 150, 200) + 0.5 y_s <- binMeans(y, x = x, bx = bx) plot(x, y) for (kk in seq_along(y_s)) { lines(bx[c(kk, kk + 1)], y_s[c(kk, kk)], col = "blue", lwd = 2) } } \references{ [1] R-devel thread \emph{Fastest non-overlapping binning mean function out there?} on Oct 3, 2012\cr } \seealso{ \code{\link{binCounts}}(). \code{\link[stats]{aggregate}} and \code{\link[base]{mean}}(). } \author{ Henrik Bengtsson with initial code contributions by Martin Morgan [1]. } \keyword{univar} matrixStats/man/rowSds.Rd0000644000176200001440000000465114107236216015130 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowMads.R, R/rowSds.R \name{rowMads} \alias{rowMads} \alias{colMads} \alias{rowSds} \alias{colSds} \title{Standard deviation estimates for each row (column) in a matrix} \usage{ rowMads(x, rows = NULL, cols = NULL, center = NULL, constant = 1.4826, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) colMads(x, rows = NULL, cols = NULL, center = NULL, constant = 1.4826, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) rowSds(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, dim. = dim(x), ..., useNames = NA) colSds(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, dim. = dim(x), ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{center}{(optional) The center, defaults to the row means for the SD estimators and row medians for the MAD estimators.} \item{constant}{A scale factor. See \code{\link[stats]{mad}} for details.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Additional arguments passed to \code{rowMeans()} and \code{rowSums()}.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Standard deviation estimates for each row (column) in a matrix. } \seealso{ \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and \code{\link[stats:cor]{var}}. \code{\link{rowIQRs}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/validateIndices.Rd0000644000176200001440000000162014111755776016744 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/validateIndices.R \name{validateIndices} \alias{validateIndices} \title{Validate indices} \usage{ validateIndices(idxs = NULL, maxIdx, allowOutOfBound = TRUE) } \arguments{ \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{maxIdx}{The possible max index.} \item{allowOutOfBound}{Allow positive out of bound to indicate \code{\link[base]{NA}}.} } \value{ Returns a validated integers list indicating the indices. If some of the indices cannot be represented as an integer, the indices are returned as doubles. } \description{ Computes validated positive indices from given indices. } \examples{ idxs <- validateIndices(c(-4, 0, -3, -1), 5) # [2, 5] idxs <- validateIndices(c(4, 4, 8, 2, 3), 8) # [4, 4, 8, 2, 3] } \keyword{internal} matrixStats/man/binCounts.Rd0000644000176200001440000000364114053334714015613 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/binCounts.R \name{binCounts} \alias{binCounts} \title{Fast element counting in non-overlapping bins} \usage{ binCounts(x, idxs = NULL, bx, right = FALSE, ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K positions for to be binned and counted.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{bx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B + 1 ordered positions specifying the B > 0 bins \code{[bx[1], bx[2])}, \code{[bx[2], bx[3])}, ..., \code{[bx[B], bx[B + 1])}.} \item{right}{If \code{\link[base:logical]{TRUE}}, the bins are right-closed (left open), otherwise left-closed (right open).} \item{...}{Not used.} } \value{ Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of length B with non-negative integers. } \description{ Counts the number of elements in non-overlapping bins } \details{ \code{binCounts(x, bx, right = TRUE)} gives equivalent results as \code{rev(binCounts(-x, bx = rev(-bx), right = FALSE))}, but is faster and more memory efficient. } \section{Missing and non-finite values}{ Missing values in \code{x} are ignored/dropped. Missing values in \code{bx} are not allowed and gives an error. } \seealso{ An alternative for counting occurrences within bins is \code{\link[graphics]{hist}}, e.g. \code{hist(x, breaks = bx, plot = FALSE)$counts}. That approach is ~30-60\% slower than \code{binCounts(..., right = TRUE)}. To count occurrences of indices \code{x} (positive \code{\link[base]{integer}}s) in \code{[1, B]}, use \code{tabulate(x, nbins = B)}, where \code{x} does \emph{not} have to be sorted first. For details, see \code{\link[base]{tabulate}}(). To average values within bins, see \code{\link{binMeans}}(). } \author{ Henrik Bengtsson } \keyword{univar} matrixStats/man/mean2.Rd0000644000176200001440000000432414055352045014647 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mean2.R \name{mean2} \alias{mean2} \title{Fast averaging over subset of vector elements} \usage{ mean2(x, idxs = NULL, na.rm = FALSE, refine = TRUE, ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{refine}{If \code{\link[base:logical]{TRUE}} and \code{x} is \code{\link[base]{numeric}}, then extra effort is used to calculate the average with greater numerical precision, otherwise not.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \description{ Computes the sample mean of all or a subset of values. } \details{ \code{mean2(x, idxs)} gives equivalent results as \code{mean(x[idxs])}, but is faster and more memory efficient since it avoids the actual subsetting which requires copying of elements and garbage collection thereof. If \code{x} is \code{\link[base]{numeric}} and \code{refine = TRUE}, then a two-pass scan is used to calculate the average. The first scan calculates the total sum and divides by the number of (non-missing) values. In the second scan, this average is refined by adding the residuals towards the first average. The \code{\link[base]{mean}}() uses this approach. \code{mean2(..., refine = FALSE)} is almost twice as fast as \code{mean2(..., refine = TRUE)}. } \examples{ x <- 1:10 n <- length(x) idxs <- seq(from = 1, to = n, by = 2) s1 <- mean(x[idxs]) # 25 s2 <- mean2(x, idxs = idxs) # 25 stopifnot(identical(s1, s2)) idxs <- seq(from = n, to = 1, by = -2) s1 <- mean(x[idxs]) # 25 s2 <- mean2(x, idxs = idxs) # 25 stopifnot(identical(s1, s2)) s1 <- mean(x) # 55 s2 <- mean2(x) # 55 stopifnot(identical(s1, s2)) } \seealso{ \code{\link[base]{mean}}(). To efficiently sum over a subset, see \code{\link{sum2}}(). } \author{ Henrik Bengtsson } \keyword{internal} \keyword{univar} matrixStats/man/rowMedians.Rd0000644000176200001440000000434414107236216015756 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowMedians.R \name{rowMedians} \alias{rowMedians} \alias{colMedians} \title{Calculates the median for each row (column) in a matrix} \usage{ rowMedians(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) colMedians(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows, cols}{A \code{\link[base]{vector}} indicating subset of rows (and/or columns) to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the median for each row (column) in a matrix. } \details{ The implementation of \code{rowMedians()} and \code{colMedians()} is optimized for both speed and memory. To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there is a special implementation for \code{\link[base]{integer}} matrices. That is, if \code{x} is an \code{\link[base]{integer}} \code{\link[base]{matrix}}, then \code{rowMedians(as.double(x))} (\code{rowMedians(as.double(x))}) would require three times the memory of \code{rowMedians(x)} (\code{colMedians(x)}), but all this is avoided. } \seealso{ See \code{\link{rowWeightedMedians}()} and \code{colWeightedMedians()} for weighted medians. For mean estimates, see \code{\link{rowMeans2}()} and \code{\link[base:colSums]{rowMeans}()}. } \author{ Henrik Bengtsson, Harris Jaffee } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/x_OP_y.Rd0000644000176200001440000000435214055352045015043 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/x_OP_y.R \name{x_OP_y} \alias{x_OP_y} \alias{t_tx_OP_y} \title{Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)'} \usage{ x_OP_y(x, y, OP, xrows = NULL, xcols = NULL, yidxs = NULL, commute = FALSE, na.rm = FALSE) t_tx_OP_y(x, y, OP, xrows = NULL, xcols = NULL, yidxs = NULL, commute = FALSE, na.rm = FALSE) } \arguments{ \item{x}{A \code{\link[base]{numeric}} or \code{\link[base]{logical}} NxK \code{\link[base]{matrix}}.} \item{y}{A \code{\link[base]{numeric}} or \code{\link[base]{logical}} \code{\link[base]{vector}} of length L.} \item{OP}{A \code{\link[base]{character}} specifying which operator to use.} \item{xrows, xcols}{A \code{\link[base]{vector}} indicating subset of rows (and/or columns) to operate over 'x'. If \code{\link[base]{NULL}}, no subsetting is done.} \item{commute}{If \code{\link[base:logical]{TRUE}}, 'y OP x' ('t(y OP t(x))') is calculated, otherwise 'x OP y' ('t(t(x) OP y)').} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over 'y'. If \code{\link[base]{NULL}}, no subsetting is done.} } \value{ Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}. } \description{ Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)', where OP can be +, -, *, and /. For + and *, na.rm = TRUE will drop missing values first. } \section{Missing values}{ If \code{na.rm = TRUE}, then missing values are "dropped" before applying the operator to each pair of values. For instance, if \code{x[1, 1]} is a missing value, then the result of \code{x[1, 1] + y[1]} equals \code{y[1]}. If also \code{y[1]} is a missing value, then the result is a missing value. This only applies to additions and multiplications. For subtractions and divisions, argument \code{na.rm} is ignored. } \examples{ x <- matrix(c(1, 2, 3, NA, 5, 6), nrow = 3, ncol = 2) # Add 'y' to each column y <- 1:2 z0 <- x + y z1 <- x_OP_y(x, y, OP = "+") print(z1) stopifnot(all.equal(z1, z0)) # Add 'y' to each row y <- 1:3 z0 <- t(t(x) + y) z1 <- t_tx_OP_y(x, y, OP = "+") print(z1) stopifnot(all.equal(z1, z0)) } \author{ Henrik Bengtsson } \keyword{internal} matrixStats/man/diff2.Rd0000644000176200001440000000163214053334714014637 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/diff2.R \name{diff2} \alias{diff2} \title{Fast lagged differences} \usage{ diff2(x, idxs = NULL, lag = 1L, differences = 1L, ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{lag}{An \code{\link[base]{integer}} specifying the lag.} \item{differences}{An \code{\link[base]{integer}} specifying the order of difference.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N - \code{differences}. } \description{ Computes the lagged and iterated differences. } \examples{ diff2(1:10) } \seealso{ \code{\link[base]{diff}}(). } \author{ Henrik Bengtsson } \keyword{internal} \keyword{univar} matrixStats/man/rowAlls.Rd0000644000176200001440000000675414107236216015300 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowAlls.R \name{rowAlls} \alias{rowAlls} \alias{colAlls} \alias{allValue} \alias{rowAnys} \alias{colAnys} \alias{anyValue} \title{Checks if a value exists / does not exist in each row (column) of a matrix} \usage{ rowAlls(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) colAlls(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) allValue(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) rowAnys(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) colAnys(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) anyValue(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{value}{A value to search for.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Not used.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} } \value{ \code{rowAlls()} (\code{colAlls()}) returns an \code{\link[base]{logical}} \code{\link[base]{vector}} of length N (K). Analogously for \code{rowAnys()} (\code{rowAlls()}). } \description{ Checks if a value exists / does not exist in each row (column) of a matrix. } \details{ These functions takes either a matrix or a vector as input. If a vector, then argument \code{dim.} must be specified and fulfill \code{prod(dim.) == length(x)}. The result will be identical to the results obtained when passing \code{matrix(x, nrow = dim.[1L], ncol = dim.[2L])}, but avoids having to temporarily create/allocate a matrix, if only such is needed only for these calculations. } \section{Logical \code{value}}{ When \code{value} is logical, the result is as if the function is applied on \code{as.logical(x)}. More specifically, if \code{x} is numeric, then all zeros are treated as \code{FALSE}, non-zero values as \code{TRUE}, and all missing values as \code{NA}. } \examples{ x <- matrix(FALSE, nrow = 10, ncol = 5) x[3:7, c(2, 4)] <- TRUE x[2:4, ] <- TRUE x[, 1] <- TRUE x[5, ] <- FALSE x[, 5] <- FALSE print(x) print(rowCounts(x)) # 1 4 4 4 0 3 3 1 1 1 print(colCounts(x)) # 9 5 3 5 0 print(rowAnys(x)) print(which(rowAnys(x))) # 1 2 3 4 6 7 8 9 10 print(colAnys(x)) print(which(colAnys(x))) # 1 2 3 4 } \seealso{ rowCounts } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{logic} \keyword{univar} matrixStats/man/weightedMad.Rd0000644000176200001440000000561014107236216016065 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/weightedMad.R \name{weightedMad} \alias{weightedMad} \alias{rowWeightedMads} \alias{colWeightedMads} \title{Weighted Median Absolute Deviation (MAD)} \usage{ weightedMad(x, w = NULL, idxs = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ...) rowWeightedMads(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ..., useNames = NA) colWeightedMads(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ..., useNames = NA) } \arguments{ \item{x}{\code{\link[base]{vector}} of type \code{\link[base]{integer}}, \code{\link[base]{numeric}}, or \code{\link[base]{logical}}.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{constant}{A \code{\link[base]{numeric}} scale factor, cf. \code{\link[stats]{mad}}.} \item{center}{Optional \code{\link[base]{numeric}} scalar specifying the center location of the data. If \code{\link[base]{NULL}}, it is estimated from data.} \item{...}{Not used.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \description{ Computes a weighted MAD of a numeric vector. } \section{Missing values}{ Missing values are dropped at the very beginning, if argument \code{na.rm} is \code{\link[base:logical]{TRUE}}, otherwise not. } \examples{ x <- 1:10 n <- length(x) m1 <- mad(x) m2 <- weightedMad(x) stopifnot(identical(m1, m2)) w <- rep(1, times = n) m1 <- weightedMad(x, w) stopifnot(identical(m1, m2)) # All weight on the first value w[1] <- Inf m <- weightedMad(x, w) stopifnot(m == 0) # All weight on the first two values w[1:2] <- Inf m1 <- mad(x[1:2]) m2 <- weightedMad(x, w) stopifnot(identical(m1, m2)) # All weights set to zero w <- rep(0, times = n) m <- weightedMad(x, w) stopifnot(is.na(m)) } \seealso{ For the non-weighted MAD, see \code{\link[stats]{mad}}. Internally \code{\link{weightedMedian}}() is used to calculate the weighted median. } \author{ Henrik Bengtsson } \keyword{robust} \keyword{univar} matrixStats/man/logSumExp.Rd0000644000176200001440000000622014055352045015565 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/logSumExp.R \name{logSumExp} \alias{logSumExp} \title{Accurately computes the logarithm of the sum of exponentials} \usage{ logSumExp(lx, idxs = NULL, na.rm = FALSE, ...) } \arguments{ \item{lx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}}. Typically \code{lx} are \eqn{log(x)} values.} \item{idxs}{A \code{\link[base]{vector}} indicating subset of elements to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \description{ Accurately computes the logarithm of the sum of exponentials, that is, \eqn{log(sum(exp(lx)))}. If \eqn{lx = log(x)}, then this is equivalently to calculating \eqn{log(sum(x))}. } \details{ This function, which avoid numerical underflow, is often used when computing the logarithm of the sum of small numbers (\eqn{|x| << 1}) such as probabilities. This is function is more accurate than \code{log(sum(exp(lx)))} when the values of \eqn{x = exp(lx)} are \eqn{|x| << 1}. The implementation of this function is based on the observation that \deqn{ log(a + b) = [ la = log(a), lb = log(b) ] = log( exp(la) + exp(lb) ) = la + log ( 1 + exp(lb - la) ) } Assuming \eqn{la > lb}, then \eqn{|lb - la| < |lb|}, and it is less likely that the computation of \eqn{1 + exp(lb - la)} will not underflow/overflow numerically. Because of this, the overall result from this function should be more accurate. Analogously to this, the implementation of this function finds the maximum value of \code{lx} and subtracts it from the remaining values in \code{lx}. } \section{Benchmarking}{ This method is optimized for correctness, that avoiding underflowing. It is implemented in native code that is optimized for speed and memory. } \examples{ ## EXAMPLE #1 lx <- c(1000.01, 1000.02) y0 <- log(sum(exp(lx))) print(y0) ## Inf y1 <- logSumExp(lx) print(y1) ## 1000.708 ## EXAMPLE #2 lx <- c(-1000.01, -1000.02) y0 <- log(sum(exp(lx))) print(y0) ## -Inf y1 <- logSumExp(lx) print(y1) ## -999.3218 ## EXAMPLE #3 ## R-help thread 'Beyond double-precision?' on May 9, 2009. set.seed(1) x <- runif(50) ## The logarithm of the harmonic mean y0 <- log(1 / mean(1 / x)) print(y0) ## -1.600885 lx <- log(x) y1 <- log(length(x)) - logSumExp(-lx) print(y1) ## [1] -1.600885 # Sanity check stopifnot(all.equal(y1, y0)) } \references{ [1] R Core Team, \emph{Writing R Extensions}, v3.0.0, April 2013. \cr [2] Laurent El Ghaoui, \emph{Hyper-Textbook: Optimization Models and Applications}, University of California at Berkeley, August 2012. (Chapter 'Log-Sum-Exp (LSE) Function and Properties') \cr [3] R-help thread \emph{logsumexp function in R}, 2011-02-17. \url{https://stat.ethz.ch/pipermail/r-help/2011-February/269205.html}\cr } \seealso{ To compute this function on rows or columns of a matrix, see \code{\link{rowLogSumExps}}(). For adding \emph{two} double values in native code, R provides the C function \code{logspace_add()} [1]. For properties of the log-sum-exponential function, see [2]. } \author{ Henrik Bengtsson } matrixStats/man/allocMatrix.Rd0000644000176200001440000000205414055352045016122 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/allocMatrix.R \name{allocMatrix} \alias{allocMatrix} \alias{allocVector} \alias{allocArray} \title{Allocates an empty vector, matrix or array} \usage{ allocMatrix(nrow, ncol, value = 0, ...) allocVector(length, value = 0, ...) allocArray(dim, value = 0, ...) } \arguments{ \item{value}{A \code{\link[base]{numeric}} scalar that all elements will have as value.} \item{length, nrow, ncol, dim}{\code{\link[base]{numeric}}s specifying the dimension of the created \code{\link[base]{vector}}, \code{\link[base]{matrix}} or \code{\link[base]{array}}.} } \value{ Returns a \code{\link[base]{vector}}, \code{\link[base]{matrix}} and \code{\link[base]{array}} respectively of the same data type as \code{value}. } \description{ Allocates an empty vector, matrix or array faster than the corresponding function in R. } \seealso{ See also \code{\link[base]{vector}}, \code{\link[base]{matrix}} and \code{\link[base]{array}}. } \author{ Henrik Bengtsson } \keyword{internal} \keyword{programming} matrixStats/man/rowWeightedMedians.Rd0000644000176200001440000000537614107236216017445 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowWeightedMedians.R \name{rowWeightedMedians} \alias{rowWeightedMedians} \alias{colWeightedMedians} \title{Calculates the weighted medians for each row (column) in a matrix} \usage{ rowWeightedMedians(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) colWeightedMedians(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{w}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length K (N).} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{...}{Additional arguments passed to \code{\link{weightedMedian}}().} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Calculates the weighted medians for each row (column) in a matrix. } \details{ The implementations of these methods are optimized for both speed and memory. If no weights are given, the corresponding \code{\link{rowMedians}}()/\code{colMedians()} is used. } \examples{ x <- matrix(rnorm(20), nrow = 5, ncol = 4) print(x) # Non-weighted row averages mu_0 <- rowMedians(x) mu <- rowWeightedMedians(x) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (uniform weights) w <- rep(2.5, times = ncol(x)) mu <- rowWeightedMedians(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (excluding some columns) w <- c(1, 1, 0, 1) mu_0 <- rowMedians(x[, (w == 1), drop = FALSE]) mu <- rowWeightedMedians(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted row averages (excluding some columns) w <- c(0, 1, 0, 0) mu_0 <- rowMedians(x[, (w == 1), drop = FALSE]) mu <- rowWeightedMedians(x, w = w) stopifnot(all.equal(mu, mu_0)) # Weighted averages by rows and columns w <- 1:4 mu_1 <- rowWeightedMedians(x, w = w) mu_2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(mu_2, mu_1)) } \seealso{ Internally, \code{\link{weightedMedian}}() is used. See \code{\link{rowMedians}}() and \code{colMedians()} for non-weighted medians. } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowVars.Rd0000644000176200001440000000623014107236216015305 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowVars.R \name{rowVars} \alias{rowVars} \alias{colVars} \title{Variance estimates for each row (column) in a matrix} \usage{ rowVars(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, dim. = dim(x), ..., useNames = NA) colVars(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, dim. = dim(x), ..., useNames = NA) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, an N * K \code{\link[base]{vector}}.} \item{rows}{A \code{\link[base]{vector}} indicating subset of rows to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{cols}{A \code{\link[base]{vector}} indicating subset of columns to operate over. If \code{\link[base]{NULL}}, no subsetting is done.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded.} \item{center}{(optional; a vector or length N (K)) If the row (column) means are already estimated, they can be pre-specified using this argument. This avoid re-estimating them again. (*Warning: If biased estimated are given, the estimate of the spread will also be biased.*) If NULL (default), the row/column means are estimated internally.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument being named with a period at the end is purely technical (we get a run-time error if we try to name it \code{dim}).} \item{...}{Additional arguments passed to \code{rowMeans()} and \code{rowSums()}.} \item{useNames}{If \code{\link[base]{NA}}, the default behavior of the function about naming support is remained. If \code{\link[base:logical]{FALSE}}, no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names attributes of result are set.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \description{ Variance estimates for each row (column) in a matrix. } \examples{ set.seed(1) x <- matrix(rnorm(20), nrow = 5, ncol = 4) print(x) # Row averages print(rowMeans(x)) print(rowMedians(x)) # Column averages print(colMeans(x)) print(colMedians(x)) # Row variabilities print(rowVars(x)) print(rowSds(x)) print(rowMads(x)) print(rowIQRs(x)) # Column variabilities print(rowVars(x)) print(colSds(x)) print(colMads(x)) print(colIQRs(x)) # Row ranges print(rowRanges(x)) print(cbind(rowMins(x), rowMaxs(x))) print(cbind(rowOrderStats(x, which = 1), rowOrderStats(x, which = ncol(x)))) # Column ranges print(colRanges(x)) print(cbind(colMins(x), colMaxs(x))) print(cbind(colOrderStats(x, which = 1), colOrderStats(x, which = nrow(x)))) x <- matrix(rnorm(2000), nrow = 50, ncol = 40) # Row standard deviations d <- rowDiffs(x) s1 <- rowSds(d) / sqrt(2) s2 <- rowSds(x) print(summary(s1 - s2)) # Column standard deviations d <- colDiffs(x) s1 <- colSds(d) / sqrt(2) s2 <- colSds(x) print(summary(s1 - s2)) } \seealso{ See \code{rowMeans()} and \code{rowSums()} in \code{\link[base]{colSums}}(). } \author{ Henrik Bengtsson } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/DESCRIPTION0000644000176200001440000000405214121073612014301 0ustar liggesusersPackage: matrixStats Version: 0.61.0 Depends: R (>= 2.12.0) Suggests: base64enc, ggplot2, knitr, markdown, microbenchmark, R.devices, R.rsp VignetteBuilder: R.rsp Title: Functions that Apply to Rows and Columns of Matrices (and to Vectors) Authors@R: c( person("Henrik", "Bengtsson", role=c("aut", "cre", "cph"), email="henrikb@braju.com"), person("Constantin", "Ahlmann-Eltze", role = "ctb"), person("Hector", "Corrada Bravo", role="ctb"), person("Robert", "Gentleman", role="ctb"), person("Jan", "Gleixner", role="ctb"), person("Peter", "Hickey", role="ctb"), person("Ola", "Hossjer", role="ctb"), person("Harris", "Jaffee", role="ctb"), person("Dongcan", "Jiang", role="ctb"), person("Peter", "Langfelder", role="ctb"), person("Brian", "Montgomery", role="ctb"), person("Angelina", "Panagopoulou", role="ctb"), person("Hugh", "Parsonage", role="ctb"), person("Jakob Peder", "Pettersen", role="ctb")) Author: Henrik Bengtsson [aut, cre, cph], Constantin Ahlmann-Eltze [ctb], Hector Corrada Bravo [ctb], Robert Gentleman [ctb], Jan Gleixner [ctb], Peter Hickey [ctb], Ola Hossjer [ctb], Harris Jaffee [ctb], Dongcan Jiang [ctb], Peter Langfelder [ctb], Brian Montgomery [ctb], Angelina Panagopoulou [ctb], Hugh Parsonage [ctb], Jakob Peder Pettersen [ctb] Maintainer: Henrik Bengtsson Description: High-performing functions operating on rows and columns of matrices, e.g. col / rowMedians(), col / rowRanks(), and col / rowSds(). Functions optimized per data type and for subsetted calculations such that both memory usage and processing time is minimized. There are also optimized vector-based methods, e.g. binMeans(), madDiff() and weightedMedian(). License: Artistic-2.0 LazyLoad: TRUE NeedsCompilation: yes ByteCompile: TRUE URL: https://github.com/HenrikBengtsson/matrixStats BugReports: https://github.com/HenrikBengtsson/matrixStats/issues RoxygenNote: 7.1.2 Packaged: 2021-09-15 17:47:41 UTC; hb Repository: CRAN Date/Publication: 2021-09-17 11:10:02 UTC matrixStats/build/0000755000176200001440000000000014120430675013676 5ustar liggesusersmatrixStats/build/vignette.rds0000644000176200001440000000041614120430675016236 0ustar liggesusersmPn0R%.Rz⍈l4~ya ޵gg $$$)  \Km̻W +%/3 )vUorQ#ꃗ"*Wp+K'Iɧ[0xOGH>7T^ɌXdV]୴hr>ȣH A &v@KowcJ?/3Gehٛ Wc%}\pYY[uO4matrixStats/tests/0000755000176200001440000000000014117375107013745 5ustar liggesusersmatrixStats/tests/diff2_subset.R0000644000176200001440000000076014063411362016444 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) for (l in 1:2) { for (d in 1:2) { for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = diff2, fsure = base::diff, lag = l, differences = d) } } } matrixStats/tests/count.R0000644000176200001440000000512014111766417015220 0ustar liggesuserslibrary("matrixStats") count_R <- function(x, value = TRUE, na.rm = FALSE, ...) { if (is.na(value)) { counts <- sum(is.na(x)) } else { counts <- sum(x == value, na.rm = na.rm) } as.integer(counts) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: integer and numeric # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- runif(20 * 5, min = -3, max = 3) x[sample.int(length(x), size = 7)] <- 0 storage.mode(x) <- mode for (na.rm in c(FALSE, TRUE)) { # Count zeros n0 <- count_R(x, value = 0, na.rm = na.rm) n1 <- count(x, value = 0, na.rm = na.rm) stopifnot(identical(n1, n0)) all <- allValue(x, value = 0, na.rm = na.rm) any <- anyValue(x, value = 0, na.rm = na.rm) # Count NAs n0 <- count_R(x, value = NA, na.rm = na.rm) n1 <- count(x, value = NA, na.rm = na.rm) stopifnot(identical(n1, n0)) all <- allValue(x, value = NA, na.rm = na.rm) any <- anyValue(x, value = NA, na.rm = na.rm) if (mode == "integer") { ux <- unique(as.vector(x)) n0 <- n1 <- integer(length(x)) for (value in ux) { n0 <- n0 + count_R(x, value = value, na.rm = na.rm) n1 <- n1 + count(x, value = value, na.rm = na.rm) stopifnot(identical(n1, n0)) } stopifnot(all(n0 == ncol(x))) } # if (mode == "integer") } # for (na.rm ...) } # for (mode ...) # All NAs na_list <- list(NA_integer_, NA_real_, NaN) for (na_value in na_list) { x <- rep(na_value, times = 10L) for (na.rm in c(FALSE, TRUE)) { n0 <- count_R(x, na.rm = na.rm) n1 <- count(x, na.rm = na.rm) stopifnot(identical(n1, n0)) # Count NAs n0 <- count_R(x, value = NA, na.rm = na.rm) n1 <- count(x, value = NA, na.rm = na.rm) stopifnot(identical(n1, n0)) any <- anyValue(x, value = NA, na.rm = na.rm) all <- allValue(x, value = NA, na.rm = na.rm) stopifnot(any) stopifnot(all) } } # for (na_value ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: logical # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- logical(length = 10L) x[3:7] <- TRUE # Row/column counts for (na.rm in c(FALSE, TRUE)) { n0 <- count_R(x, na.rm = na.rm) n1 <- count(x, na.rm = na.rm) stopifnot(identical(n1, n0)) n_true <- count(x, value = TRUE, na.rm = na.rm) n_false <- count(x, value = FALSE, na.rm = na.rm) stopifnot(n_true + n_false == ncol(x)) # Count NAs n0 <- count_R(x, value = NA, na.rm = na.rm) n1 <- count(x, value = NA, na.rm = na.rm) stopifnot(identical(n1, n0)) } matrixStats/tests/rowCounts.R0000644000176200001440000001456614111770540016101 0ustar liggesuserslibrary("matrixStats") rowCounts_R <- function(x, value = TRUE, na.rm = FALSE, ...) { if (is.na(value)) { counts <- apply(x, MARGIN = 1L, FUN = function(x) sum(is.na(x)) ) } else { counts <- apply(x, MARGIN = 1L, FUN = function(x) sum(x == value, na.rm = na.rm) ) } # Preserve names attribute names <- names(counts) counts <- as.integer(counts) if (isTRUE(useNames) && !is.null(names)) names(counts) <- names counts } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: integer and numeric # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(runif(10 * 5, min = -3, max = 3), nrow = 10L, ncol = 5L) x[sample.int(length(x), size = 7L)] <- 0 storage.mode(x) <- mode dimnames = list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { # Count zeros r0 <- rowCounts_R(x, value = 0, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, value = 0, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), value = 0, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) # Count NAs r0 <- rowCounts_R(x, value = NA, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, value = NA, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), value = NA, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) if (mode == "integer") { ux <- unique(as.vector(x)) r0 <- r1 <- r2 <- integer(nrow(x)) for (value in ux) { r0 <- r0 + rowCounts_R(x, value = value, na.rm = na.rm, useNames = useNames) r1 <- r1 + rowCounts(x, value = value, na.rm = na.rm, useNames = useNames) r2 <- r2 + colCounts(t(x), value = value, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) } stopifnot(all(r0 == ncol(x))) } # if (mode == "integer") } # for (useNames ...) } # for (na.rm ...) } # for (setDimnames ...) } # for (mode ...) # All NAs na_list <- list(NA_integer_, NA_real_, NaN) for (na_value in na_list) { x <- matrix(na_value, nrow = 10L, ncol = 5L) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowCounts_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) # Count NAs r0 <- rowCounts_R(x, value = NA, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, value = NA, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), value = NA, na.rm = na.rm, useNames = useNames) stopifnot(all(r0 == ncol(x))) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) } } } } # for (na_value ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: logical # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(FALSE, nrow = 10L, ncol = 4L) x[7:8, 2:3] <- TRUE x[1:3, ] <- TRUE x[, 1] <- TRUE x[4, ] <- FALSE x[, 4] <- FALSE x[2, ] <- FALSE x[3, ] <- TRUE # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column counts for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowCounts_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) r_true <- rowCounts(x, value = TRUE, na.rm = na.rm, useNames = useNames) r_false <- rowCounts(x, value = FALSE, na.rm = na.rm, useNames = useNames) stopifnot(r_true + r_false == ncol(x)) c_true <- colCounts(x, value = TRUE, na.rm = na.rm, useNames = useNames) c_false <- colCounts(x, value = FALSE, na.rm = na.rm, useNames = useNames) stopifnot(c_true + c_false == nrow(x)) # Count NAs r0 <- rowCounts_R(x, value = NA, na.rm = na.rm, useNames = useNames) r1 <- rowCounts(x, value = NA, na.rm = na.rm, useNames = useNames) r2 <- colCounts(t(x), value = NA, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: character (not sure if this should be supported) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(rep(letters, length.out = 10 * 5), nrow = 10L, ncol = 5L) x[2:3, 3:4] <- NA_character_ # Row/column counts for (na.rm in c(FALSE, TRUE)) { for (value in c("g", NA_character_)) { r0 <- rowCounts_R(x, value = value, na.rm = na.rm) r1 <- rowCounts(x, value = value, na.rm = na.rm) r2 <- colCounts(t(x), value = value, na.rm = na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) c <- count(x[1, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[1])) c <- count(x[2, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[2])) } } # NA row x <- matrix(0, nrow = 2L, ncol = 2L) x[1, ] <- NA_integer_ dimnames <- list(letters[1:2], LETTERS[1:2]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowCounts(x, value = 0, useNames = useNames) r1 <- rowCounts_R(x, value = 0, useNames = useNames) stopifnot(identical(r0, r1)) } } matrixStats/tests/rowSds_subset.R0000644000176200001440000000641314105674332016741 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowSds_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ sigma <- apply(x, MARGIN = 1L, FUN = sd, na.rm = na.rm) }) stopifnot(!any(is.infinite(sigma))) # Keep naming support consistency same as rowSds() if (is.null(center) || ncol(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(sigma) <- NULL } else if (isFALSE(useNames)) names(sigma) <- NULL sigma } colSds_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ sigma <- apply(x, MARGIN = 2L, FUN = sd, na.rm = na.rm) }) stopifnot(!any(is.infinite(sigma))) # Keep naming support consistency same as colSds() if (is.null(center) || nrow(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(sigma) <- NULL } else if (isFALSE(useNames)) names(sigma) <- NULL sigma } rowSds_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowWeightedMeans(x, cols = cols, na.rm = na.rm, useNames = FALSE) sigma <- rowSds(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(sigma))) sigma } colSds_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colWeightedMeans(x, rows = rows, na.rm = na.rm, useNames = FALSE) sigma <- colSds(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(sigma))) sigma } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowSds, fsure = rowSds_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowSds_center, fsure = rowSds_R, na.rm = na.rm, center = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colSds, fsure = rowSds_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colSds_center, fsure = rowSds_R, na.rm = na.rm, center = TRUE, useNames = useNames) } } } } } matrixStats/tests/rowLogSumExps_subset.R0000644000176200001440000000302214105674332020247 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowLogSumExps_R <- function(x, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = function(rx, ...) { log(sum(exp(rx), ...)) }, ...) if (isFALSE(useNames)) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowLogSumExps, fsure = rowLogSumExps_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colLogSumExps, fsure = rowLogSumExps_R, na.rm = na.rm, useNames = useNames) } } } } } matrixStats/tests/rowCumsums.R0000644000176200001440000001212314111772572016254 0ustar liggesuserslibrary("matrixStats") rowCumsums_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cumsum)) }) # Preserve dimnames attribute? dim(y) <- dim(x) if (isTRUE(useNames)) { dimnames <- dimnames(x) if (!is.null(dimnames)) dimnames(y) <- dimnames } y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - dimnames <- list(letters[1:10], LETTERS[1:5]) # to check dimnames attribute for (mode in c("logical", "integer", "double")) { for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumsums_R(x, useNames = useNames) r1 <- rowCumsums(x, useNames = useNames) r2 <- t(colCumsums(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (add_na ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumsums_R(x, useNames = useNames) r1 <- rowCumsums(x, useNames = useNames) r2 <- t(colCumsums(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - dimnames <- list("a", "A") # to check dimnames attribute for (mode in c("logical", "integer", "double")) { x <- matrix(0, nrow = 1L, ncol = 1L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) r0 <- rowCumsums_R(x) r1 <- rowCumsums(x) r2 <- t(colCumsums(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # Check dimnames attribute dimnames(x) <- dimnames # r0 <- rowCumsums_R(x) # > r0 # a # [1,] 0 r1 <- rowCumsums(x, useNames = TRUE) r2 <- t(colCumsums(t(x), useNames = TRUE)) stopifnot(identical(dimnames(r1), dimnames)) stopifnot(identical(dimnames(r2), dimnames)) dimnames(x) <- NULL } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - names <- LETTERS[1:5] # to check dimnames attribute for (mode in c("logical", "integer", "double")) { cat("mode: ", mode, "\n", sep = "") value <- 0 storage.mode(value) <- mode value2 <- value if (mode == "logical") value2 <- 0L # A 0x0 matrix x <- matrix(value, nrow = 0L, ncol = 0L) str(x) r0 <- matrix(value2, nrow = nrow(x), ncol = ncol(x)) r1 <- rowCumsums(x) r2 <- t(colCumsums(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A 0xK matrix x <- matrix(value, nrow = 0L, ncol = 5L) str(x) colnames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) colnames(x) <- colnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowCumsums_R(x, useNames = useNames) r1 <- rowCumsums(x, useNames = useNames) r2 <- t(rowCumsums(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) # A Nx0 matrix x <- matrix(value, nrow = 5L, ncol = 0L) str(x) rownames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) rownames(x) <- rownames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowCumsums_R(x, useNames = useNames) r1 <- rowCumsums(x, useNames = useNames) r2 <- t(rowCumsums(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) matrixStats/tests/weightedMedian_subset.R0000644000176200001440000000252714063411362020373 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") for (mode in c("numeric", "integer")) { x <- runif(6, min = -6, max = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- mode if (mode == "numeric") w[1] <- Inf for (idxs in index_cases) { validateIndicesTestVector_w(x, w, idxs, ftest = weightedMedian, fsure = weightedMedian, na.rm = TRUE) validateIndicesTestVector_w(x, w, idxs, ftest = weightedMedian, fsure = weightedMedian, na.rm = FALSE) for (ties in c("weighted", "mean", "min", "max")) { validateIndicesTestVector_w(x, w, idxs, ftest = weightedMedian, fsure = weightedMedian, na.rm = TRUE, ties = ties) validateIndicesTestVector_w(x, w, idxs, ftest = weightedMedian, fsure = weightedMedian, na.rm = FALSE, ties = ties) } } } matrixStats/tests/rowLogSumExps.R0000644000176200001440000001334214111771400016657 0ustar liggesusers# Test inspired by the harmonic mean example in R-help # thread '[R] Beyond double-precision?' on May 9, 2009. library("matrixStats") library("stats") logSumExp0 <- function(lx) { idx_max <- which.max(lx) log1p(sum(exp(lx[-idx_max] - lx[idx_max]))) + lx[idx_max] } n <- 200L set.seed(1) for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(runif(n, min = 1.0, max = 3.0), nrow = 20L) storage.mode(x) <- mode str(x) # The logarithm of the harmonic mean by rows y_h <- log(1 / rowMeans(1 / x)) str(y_h) lx_neg <- -log(x) y0 <- log(ncol(x)) - apply(lx_neg, MARGIN = 1L, FUN = logSumExp0) stopifnot(all.equal(y0, y_h)) y1 <- log(ncol(x)) - apply(lx_neg, MARGIN = 1L, FUN = logSumExp) stopifnot(all.equal(y1, y0)) y2 <- log(ncol(x)) - rowLogSumExps(lx_neg) stopifnot(all.equal(y2, y0)) y3 <- log(ncol(x)) - colLogSumExps(t(lx_neg)) stopifnot(all.equal(y3, y0)) # The logarithm of the harmonic mean by columns y_h <- log(1 / colMeans(1 / x)) str(y_h) y0 <- log(nrow(x)) - apply(lx_neg, MARGIN = 2L, FUN = logSumExp0) stopifnot(all.equal(y0, y_h)) y1 <- log(nrow(x)) - apply(lx_neg, MARGIN = 2L, FUN = logSumExp) stopifnot(all.equal(y1, y0)) y2 <- log(nrow(x)) - colLogSumExps(lx_neg) stopifnot(all.equal(y2, y0)) y3 <- log(nrow(x)) - rowLogSumExps(t(lx_neg)) stopifnot(all.equal(y3, y0)) # Testing names rownames(lx_neg) <- seq_len(nrow(x)) colnames(lx_neg) <- seq_len(ncol(x)) y2 <- rowLogSumExps(lx_neg, useNames = TRUE) stopifnot(identical(names(y2), rownames(lx_neg))) y3 <- colLogSumExps(t(lx_neg), useNames = TRUE) stopifnot(identical(names(y3), rownames(lx_neg))) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Zero-size matrices lx <- matrix(numeric(0L), nrow = 0L, ncol = 0L) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) y <- colLogSumExps(lx) print(y) stopifnot(length(y) == ncol(lx)) ## Zero-height matrices lx <- matrix(numeric(0L), nrow = 0L, ncol = 5L) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) y <- colLogSumExps(lx) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(all(y == -Inf)) ## Zero-width matrices lx <- matrix(numeric(0L), nrow = 5L, ncol = 0L) y <- colLogSumExps(lx) print(y) stopifnot(length(y) == ncol(lx)) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(y == -Inf)) ## Matrices with one element lx <- matrix(1.0, nrow = 1L, ncol = 1L) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(y == lx)) y <- colLogSumExps(lx) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(all(y == lx)) ## All missing values lx <- matrix(NA_real_, nrow = 1L, ncol = 1L) y <- rowLogSumExps(lx, na.rm = TRUE) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(identical(y, -Inf)) lx <- matrix(NA_real_, nrow = 1L, ncol = 1L) y <- colLogSumExps(lx, na.rm = TRUE) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(identical(y, -Inf)) lx <- matrix(NA_real_, nrow = 2L, ncol = 2L) y <- rowLogSumExps(lx, na.rm = TRUE) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(y == -Inf)) y <- rowLogSumExps(lx, na.rm = FALSE) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(is.na(y) & !is.nan(y))) lx <- matrix(NA_real_, nrow = 2L, ncol = 2L) y <- colLogSumExps(lx, na.rm = TRUE) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(all(y == -Inf)) y <- colLogSumExps(lx, na.rm = FALSE) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(all(is.na(y) & !is.nan(y))) ## +Inf values lx <- matrix(c(1, 2, +Inf), nrow = 3L, ncol = 2L) y <- colLogSumExps(lx, na.rm = TRUE) print(y) stopifnot(length(y) == ncol(lx)) stopifnot(all(y == +Inf)) ## multiple -Inf values lx <- matrix(c(-Inf, -Inf), nrow = 2L, ncol = 3L) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(y == -Inf)) lx <- matrix(c(-Inf, 5, -Inf), nrow = 2L, ncol = 3L, byrow = TRUE) y <- rowLogSumExps(lx) print(y) stopifnot(length(y) == nrow(lx)) stopifnot(all(y == 5)) ## Bug report #104 (https://github.com/HenrikBengtsson/matrixStats/issues/104) ## (This would core dump on Windows) x <- matrix(0.0, nrow = 2L, ncol = 32762L) y <- colLogSumExps(x) str(y) ## Bug report #120 (https://github.com/HenrikBengtsson/matrixStats/issues/120) ## (This would error if x had rownames/colnames and non-NULL rows/cols were ## used) x <- matrix(runif(6), nrow = 2L, ncol = 3L, dimnames = list(c("A", "B"), c("a", "b", "c"))) y <- colLogSumExps(x, cols = 3:1, useNames = TRUE) stopifnot(names(y) == c("c", "b", "a")) y <- rowLogSumExps(x, rows = 2, useNames = TRUE) stopifnot(names(y) == "B") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Check names attributes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowLogSumExps_R <- function(x, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = function(rx, ...) { log(sum(exp(rx), ...)) }, ...) if (isFALSE(useNames)) names(res) <- NULL res } x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6L, ncol = 6L) # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowLogSumExps_R(x, useNames = useNames) y1 <- rowLogSumExps(x, useNames = useNames) y2 <- colLogSumExps(t(x), useNames = useNames) stopifnot(all.equal(y1, y0)) stopifnot(all.equal(y2, y0)) } } matrixStats/tests/x_OP_y.R0000644000176200001440000000731314063411362015263 0ustar liggesuserslibrary("matrixStats") x_OP_y_R <- function(x, y, OP, na.rm = FALSE) { if (na.rm) { xnok <- is.na(x) ynok <- is.na(y) anok <- xnok & ynok unit <- switch(OP, "+" = 0, "-" = NA_real_, "*" = 1, "/" = NA_real_, stop("Unknown 'OP' operator: ", OP) ) x[xnok] <- unit y[ynok] <- unit } ans <- switch(OP, "+" = x + y, "-" = x - y, "*" = x * y, "/" = x / y, stop("Unknown 'OP' operator: ", OP) ) if (na.rm) { ans[anok] <- NA_real_ } ans } # x_OP_y_R() t_tx_OP_y_R <- function(x, y, OP, na.rm = FALSE) { t(x_OP_y_R(x = t(x), y = y, OP = OP, na.rm = na.rm)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # No missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1:16, nrow = 4, ncol = 4) y <- 1:nrow(x) storage.mode(y) <- storage.mode(x) for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("OP = '%s', na.rm = %s\n", OP, na.rm)) a0 <- x_OP_y_R(x, y, OP, na.rm = na.rm) a1 <- x_OP_y(x, y, OP, na.rm = na.rm) str(a1) stopifnot(all.equal(a1, a0)) b0 <- t_tx_OP_y_R(x, y, OP, na.rm = na.rm) b1 <- t_tx_OP_y(x, y, OP, na.rm = na.rm) str(b1) stopifnot(all.equal(b1, b0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values in x, y, or both. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (which in c("x", "y", "both")) { x <- matrix(1:16, nrow = 4, ncol = 4) y <- 1:nrow(x) storage.mode(y) <- storage.mode(x) if (which == "x") { x[3:6] <- NA_real_ } else if (which == "y") { y[c(1, 3)] <- NA_real_ } else if (which == "both") { x[3:6] <- NA_real_ y[c(1, 3)] <- NA_real_ } for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("OP = '%s', na.rm = %s\n", OP, na.rm)) a0 <- x_OP_y_R(x, y, OP, na.rm = na.rm) a1 <- x_OP_y(x, y, OP, na.rm = na.rm) str(a1) stopifnot(all.equal(a1, a0)) b0 <- t_tx_OP_y_R(x, y, OP, na.rm = na.rm) b1 <- t_tx_OP_y(x, y, OP, na.rm = na.rm) str(b1) stopifnot(all.equal(b1, b0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Length differences # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1:8, nrow = 2, ncol = 4) y <- 1:ncol(x) storage.mode(y) <- storage.mode(x) for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("OP = '%s', na.rm = %s\n", OP, na.rm)) a0 <- x_OP_y_R(x, y, OP, na.rm = na.rm) a1 <- x_OP_y(x, y, OP, na.rm = na.rm) str(a1) stopifnot(all.equal(a1, a0)) b0 <- t_tx_OP_y_R(x, y, OP, na.rm = na.rm) b1 <- t_tx_OP_y(x, y, OP, na.rm = na.rm) str(b1) stopifnot(all.equal(b1, b0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - xs <- list( A = matrix(1:2, nrow = 2, ncol = 2), B = matrix(NA_integer_, nrow = 2, ncol = 2) ) ys <- list( A = 1L, B = NA_integer_ ) for (x in xs) { for (y in ys) { for (mode in c("logical", "integer", "double")) { storage.mode(x) <- mode storage.mode(y) <- mode str(list(x = x, y = y)) for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("mode = '%s', OP = '%s', na.rm = %s\n", mode, OP, na.rm)) suppressWarnings({ z0 <- x_OP_y_R(x, y, OP, na.rm = na.rm) z <- x_OP_y(x, y, OP, na.rm = na.rm) }) str(z) stopifnot(all.equal(z, z0)) } } } # for (mode ...) } # for (y ...) } # for (x ...) matrixStats/tests/rowIQRs_subset.R0000644000176200001440000000354314074054377017035 0ustar liggesuserslibrary("matrixStats") rowIQRs_R <- function(x, na.rm = FALSE, ..., useNames = NA) { quantile_na <- function(x, ..., na.rm = FALSE) { if (!na.rm && anyMissing(x)) return(c(NA_real_, NA_real_)) quantile(x, ..., na.rm = na.rm) } q <- apply(x, MARGIN = 1L, FUN = quantile_na, probs = c(0.25, 0.75), na.rm = na.rm) rownames(q) <- NULL # Not needed anymore # Preserve names attribute dim(q) <- c(2L, nrow(x)) names <- rownames(x) if (isTRUE(useNames) && !is.null(names)) colnames(q) <- names q[2L, , drop = TRUE] - q[1L, , drop = TRUE] } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) for (idxs in index_cases) { for (na.rm in c(TRUE, FALSE)) { validateIndicesTestVector(x, idxs, ftest = iqr, fsure = function(x, na.rm) { dim(x) <- c(1L, length(x)) rowIQRs_R(x, na.rm = na.rm) }, na.rm = na.rm) } } x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowIQRs, fsure = rowIQRs_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colIQRs, fsure = rowIQRs_R, na.rm = na.rm, useNames = useNames) } } } } } matrixStats/tests/weightedVar_etal.R0000644000176200001440000000376014063411362017346 0ustar liggesuserslibrary("matrixStats") fcns <- list( weightedVar = weightedVar, weightedSd = weightedSd, weightedMad = weightedMad ) for (name in names(fcns)) { cat(sprintf("%s()...\n", name)) fcn <- fcns[[name]] for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") n <- 15L x <- runif(n, min = -5, max = 5) storage.mode(x) <- mode str(x) for (add_na in c(FALSE, TRUE)) { cat("add_na: ", add_na, "\n", sep = "") if (add_na) { x[c(5, 7)] <- NA } str(x) for (na.rm in c(FALSE, TRUE)) { cat("na.rm: ", na.rm, "\n", sep = "") cat("Weights are not specified (all are 1)\n") m1 <- fcn(x, na.rm = na.rm) str(list(m1 = m1)) cat("All weights are 1\n") w <- rep(1, times = n) m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("First weight is 5\n") # Pull the mean towards zero w[1] <- 5 str(w) m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("All weights are 0\n") # All weights set to zero w <- rep(0, times = n) m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("First weight is 8.5\n") # Put even more weight on the zero w[1] <- 8.5 m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("First weight is Inf\n") # All weight on the first value w[1] <- Inf m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("Last weight is Inf\n") # All weight on the last value w[1] <- 1 w[n] <- Inf m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) cat("Last weight is NA\n") # All weight on the last value w[1] <- 1 w[n] <- NA_real_ m1 <- fcn(x, w, na.rm = na.rm) str(list(m1 = m1)) } # for (na.rm ...) } # for (add_na ...) } # for (mode ...) cat(sprintf("%s()...DONE\n", name)) } # for (name ...) matrixStats/tests/binMeans,binCounts_subset.R0000644000176200001440000000315714063411362021152 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Naive R implementation of binMeans() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - binMeans0 <- function(y, x, bx, na.rm = TRUE, count = TRUE, right = FALSE) { n_smooth <- length(bx) - 1L res <- double(n_smooth) counts <- rep(NaN, times = n_smooth) if (na.rm) { keep <- !is.na(x) & !is.na(y) x <- x[keep] y <- y[keep] } # For each bin... for (kk in seq_len(n_smooth)) { if (right) { idxs <- which(bx[kk] < x & x <= bx[kk + 1L]) } else { idxs <- which(bx[kk] <= x & x < bx[kk + 1L]) } y_kk <- y[idxs] res[kk] <- mean(y_kk) counts[kk] <- length(idxs) } # for (kk ...) if (count) attr(res, "count") <- counts res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") y <- runif(6, min = -6, max = 6) x <- runif(6, min = -6, max = 6) storage.mode(x) <- "integer" bx <- c(-6, 0, 3, 4, 10) for (idxs in index_cases) { for (na.rm in c(TRUE, FALSE)) { validateIndicesTestVector_w(y, x, idxs, ftest = binMeans, fsure = binMeans0, bx = bx, na.rm = na.rm, count = TRUE, right = FALSE) validateIndicesTestVector_w(y, x, idxs, ftest = binMeans, fsure = binMeans0, bx = bx, na.rm = na.rm, count = TRUE, right = TRUE) } } matrixStats/tests/rowProds.R0000644000176200001440000000670314074054377015722 0ustar liggesuserslibrary("matrixStats") rowProds_R <- function(x, FUN = prod, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = FUN, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } all.equal.na <- function(target, current, ...) { # Computations involving NaN may return NaN or NA, cf. ?is.nan current[is.nan(current)] <- NA_real_ target[is.nan(target)] <- NA_real_ all.equal(target, current, ...) } for (mode in c("integer", "double")) { # Missing values x <- matrix(c(1, NA, NaN, 1, 1, 0, 1, 0), nrow = 4, ncol = 2) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:2]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowProds_R(x, na.rm = TRUE, useNames = useNames) print(y0) y1 <- rowProds(x, na.rm = TRUE, useNames = useNames) print(y1) y2 <- colProds(t(x), na.rm = TRUE, useNames = useNames) print(y2) stopifnot(all.equal(y1, y0)) stopifnot(all.equal(y2, y1)) # Missing values y0 <- rowProds_R(x, na.rm = FALSE, useNames = useNames) print(y0) y1 <- rowProds(x, na.rm = FALSE, useNames = useNames) print(y1) y2 <- colProds(t(x), na.rm = FALSE, useNames = useNames) print(y2) stopifnot(all.equal(y1, y0)) stopifnot(all.equal(y2, y1)) # "Empty" rows y0 <- rowProds_R(x[integer(0), , drop = FALSE], na.rm = FALSE, useNames = useNames) print(y0) y1 <- rowProds(x[integer(0), , drop = FALSE], na.rm = FALSE, useNames = useNames) print(y1) y2 <- colProds(t(x[integer(0), , drop = FALSE]), na.rm = FALSE, useNames = useNames) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal(y2, y1)) stopifnot(length(y1) == 0L) # Using product() y1 <- rowProds(x, method = "expSumLog", na.rm = FALSE, useNames = useNames) print(y1) y2 <- colProds(t(x), method = "expSumLog", na.rm = FALSE, useNames = useNames) print(y2) stopifnot(all.equal(y2, y1)) } } } # for (mode ...) # Bug report 2012-06-25 x <- matrix(c(1, 1, 1, 1, 1, 0, 1, 0), nrow = 4, ncol = 2) y0 <- rowProds_R(x) print(y0) y1 <- rowProds(x) print(y1) y2 <- colProds(t(x)) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal.na(y1, x[, 1] * x[, 2])) stopifnot(all.equal.na(y2, y1)) # Check names attribute dimnames(x) <- dimnames y0 <- rowProds_R(x, useNames = TRUE) print(y0) y1 <- rowProds(x, useNames = TRUE) print(y1) y2 <- colProds(t(x), useNames = TRUE) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal.na(y1, x[, 1] * x[, 2])) stopifnot(all.equal.na(y2, y1)) # Bug report 2014-03-25 ("all rows contains a zero") x <- matrix(c(0, 1, 1, 0), nrow = 2, ncol = 2) # To check names attribute dimnames <- list(letters[1:2], LETTERS[1:2]) y0 <- rowProds_R(x) print(y0) y1 <- rowProds(x) print(y1) y2 <- colProds(t(x)) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal.na(y1, c(0, 0))) stopifnot(all.equal.na(y2, y1)) # Check names attribute dimnames(x) <- dimnames y0 <- rowProds_R(x, useNames = TRUE) print(y0) y1 <- rowProds(x, useNames = TRUE) print(y1) y2 <- colProds(t(x), useNames = TRUE) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal.na(y2, y1)) matrixStats/tests/sum2.R0000644000176200001440000001772114111772235014763 0ustar liggesuserslibrary("matrixStats") options(warn = 1) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) sum2_R <- function(x, na.rm = FALSE, idxs = NULL) { if (is.null(idxs)) { sum(x, na.rm = na.rm) } else { sum(x[idxs], na.rm = na.rm) } } # sum2_R() cat("Consistency checks:\n") for (kk in 1:20) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape n <- sample(1e3, size = 1L) x <- rnorm(n, sd = 100) # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1L) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Integer or double? if ((kk %% 4) %in% c(2, 0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } na.rm <- sample(c(TRUE, FALSE), size = 1L) # Sum over all y0 <- sum2_R(x, na.rm = na.rm) y1 <- sum2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) # Sum over subset nidxs <- sample(n, size = 1L) idxs <- sample(n, size = nidxs) y0 <- sum2_R(x, na.rm = na.rm, idxs = idxs) y1 <- sum2(x, na.rm = na.rm, idxs = idxs) stopifnot(all.equal(y1, y0)) if (storage.mode(x) == "integer") { storage.mode(x) <- "logical" y0 <- sum2_R(x, na.rm = na.rm) y1 <- sum2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y0 <- sum2_R(x, na.rm = na.rm, idxs = idxs) y1 <- sum2(x, na.rm = na.rm, idxs = idxs) stopifnot(all.equal(y1, y0)) } } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (n in 0:2) { for (na.rm in c(FALSE, TRUE)) { x <- rep(NA_real_, times = n) y0 <- sum(x, na.rm = na.rm) y <- sum2(x, na.rm = na.rm) stopifnot(all.equal(y, y0)) x <- rep(NA_integer_, times = n) y0 <- sum(x, na.rm = na.rm) y <- sum2(x, na.rm = na.rm) stopifnot(all.equal(y, y0)) x <- rep(NA, times = n) y0 <- sum(x, na.rm = na.rm) y <- sum2(x, na.rm = na.rm) stopifnot(all.equal(y, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (na.rm in c(FALSE, TRUE)) { # Summing over zero elements (integers) x <- integer(0) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) x <- 1:10 idxs <- integer(0) s1 <- sum(x[idxs], na.rm = na.rm) s2 <- sum2(x, idxs = idxs, na.rm = na.rm) stopifnot(identical(s1, s2)) # Summing over NA_integer_:s x <- rep(NA_integer_, times = 10L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) x <- rep(NA_integer_, times = 10L) idxs <- 1:5 s1 <- sum(x[idxs], na.rm = na.rm) s2 <- sum2(x, idxs = idxs, na.rm = na.rm) stopifnot(identical(s1, s2)) # Summing over zero elements (doubles) x <- double(0) s1 <- sum(x) s2 <- sum2(x) stopifnot( identical(s1, 0), identical(s1, s2) ) x <- as.double(1:10) idxs <- integer(0) s1 <- sum(x[idxs]) s2 <- sum2(x, idxs = idxs) stopifnot( identical(s1, 0), identical(s1, s2) ) # Summing over NA_real_:s x <- rep(NA_real_, times = 10L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot( !na.rm || s1 == 0, identical(s1, s2) ) x <- rep(NA_real_, times = 10L) idxs <- 1:5 s1 <- sum(x[idxs], na.rm = na.rm) s2 <- sum2(x, idxs = idxs, na.rm = na.rm) stopifnot( !na.rm || s1 == 0, identical(s1, s2) ) # Summing over -Inf:s x <- rep(-Inf, times = 3L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot( is.infinite(s1) && s1 < 0, identical(s1, s2) ) # Summing over +Inf:s x <- rep(+Inf, times = 3L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) stopifnot( is.infinite(s1) && s1 > 0, identical(s1, s2) ) # Summing over mix of -Inf:s and +Inf:s x <- rep(c(-Inf, +Inf), times = 3L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot( is.nan(s1), identical(s1, s2) ) # Summing over mix of -Inf:s and +Inf:s and numerics x <- rep(c(-Inf, +Inf, 3.14), times = 2L) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) stopifnot( is.nan(s1), identical(s1, s2) ) # Summing over mix of NaN, NA, +Inf, and numerics x <- c(NaN, NA, +Inf, 3.14) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) if (na.rm) { stopifnot( is.infinite(s1) && s1 > 0, identical(s2, s1) ) } else { stopifnot(is.na(s1), is.na(s2)) ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that s1 is identical to s0. } # Summing over mix of NaN, NA, +Inf, and numerics x <- c(NA, NaN, +Inf, 3.14) s1 <- sum(x, na.rm = na.rm) s2 <- sum2(x, na.rm = na.rm) if (na.rm) { stopifnot( is.infinite(s1) && s1 > 0, identical(s2, s1) ) } else { stopifnot(is.na(s1), is.na(s2)) ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that s1 is identical to s0. } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summing of large integers # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- c(.Machine$integer.max, 1L, -.Machine$integer.max) # Total gives integer overflow s1 <- sum(x[1:2]) # NA_integer_ w/ warning s2 <- sum2(x[1:2]) # NA_integer_ w/ warning stopifnot( getRversion() >= "3.5.0" || identical(s1, NA_integer_), identical(s2, NA_integer_) ) ## Assert above warning res <- tryCatch({ s2 <- sum2(x[1:2]) }, warning = identity) stopifnot(inherits(res, "warning")) # Total gives integer overflow (coerce to numeric) s1 <- sum(as.numeric(x[1:2])) # 2147483648 s2 <- sum2(as.numeric(x[1:2])) # 2147483648 s3 <- sum2(x[1:2], mode = "double") # 2147483648 stopifnot( identical(s1, 2147483648), identical(s1, s2), identical(s1, s3) ) # Cumulative sum would give integer overflow but not the total s1 <- sum(x) # 1L s2 <- sum2(x) # 1L stopifnot( identical(s1, 1L), identical(s1, s2) ) # Input is double but coersing result to integer x <- c(1, 2, 3.1) s1 <- sum2(x) s2 <- sum2(x, mode = "integer") stopifnot( identical(as.integer(s1), s2) ) ## Assert above warning res <- tryCatch({ s2 <- sum2(x, mode = "integer") }, warning = identity) stopifnot(inherits(res, "warning")) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summing of large doubles # - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Double overflow x <- rep(.Machine$double.xmax, times = 2L) y0 <- sum(x) print(y0) y <- sum2(x) print(y) stopifnot( is.infinite(y) && y > 0, identical(y, y0) ) x <- rep(-.Machine$double.xmax, times = 2L) y0 <- sum(x) print(y0) y <- sum2(x) print(y) stopifnot( is.infinite(y) && y < 0, identical(y, y0) ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'idxs' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:10 idxs_list <- list( integer = 1:5, double = as.double(1:5), logical = (x <= 5) ) for (idxs in idxs_list) { cat("idxs:\n") str(idxs) s1 <- sum(x[idxs], na.rm = TRUE) s2 <- sum2(x, idxs = idxs, na.rm = TRUE) stopifnot(identical(s1, s2)) } matrixStats/tests/rowQuantiles.R0000644000176200001440000002030214105674332016561 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowQuantiles_R <- function(x, probs = c(0, 0.25, 0.50, 0.75, 1), na.rm = FALSE, drop = TRUE, type = 7L, ..., useNames = NA) { q <- apply(x, MARGIN = 1L, FUN = function(x, probs, na.rm) { if (!na.rm && any(is.na(x))) { na_value <- NA_real_ if (type != 7L) storage.mode(na_value) <- storage.mode(x) rep(na_value, times = length(probs)) } else { as.vector(quantile(x, probs = probs, na.rm = na.rm, type = type, ...)) } }, probs = probs, na.rm = na.rm) if (!is.null(dim(q))) q <- t(q) else dim(q) <- c(nrow(x), length(probs)) digits <- max(2L, getOption("digits")) colnames(q) <- sprintf("%.*g%%", digits, 100 * probs) rownames(q) <- rownames(x) if (isFALSE(useNames)) rownames(q) <- NULL if (drop) q <- drop(q) q } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test with multiple quantiles # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(1:40 + 0.1, nrow = 8, ncol = 5) storage.mode(x) <- mode dimnames <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) str(x) probs <- c(0, 0.5, 1) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { q0 <- rowQuantiles_R(x, probs = probs, useNames = useNames) print(q0) q1 <- rowQuantiles(x, probs = probs, useNames = useNames) print(q1) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical") storage.mode(q1) <- storage.mode(q0) stopifnot(all.equal(q1, q0)) q2 <- colQuantiles(t(x), probs = probs, useNames = useNames) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical") storage.mode(q2) <- storage.mode(q0) stopifnot(all.equal(q2, q0)) } } } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test with a single quantile # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(1:40, nrow = 8, ncol = 5) storage.mode(x) <- mode dimnames <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) str(x) probs <- c(0.5) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { q0 <- rowQuantiles_R(x, probs = probs, useNames = useNames) print(q0) q1 <- rowQuantiles(x, probs = probs, useNames = useNames) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical") storage.mode(q1) <- storage.mode(q0) print(q1) stopifnot(all.equal(q1, q0)) q2 <- colQuantiles(t(x), probs = probs, useNames = useNames) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical") storage.mode(q2) <- storage.mode(q0) stopifnot(all.equal(q2, q0)) } } } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) probs <- seq(from = 0, to = 1, by = 0.25) cat("Consistency checks:\n") n_sims <- if (Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 6L else 24L for (kk in seq_len(n_sims)) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape dim <- sample(20:60, size = 2L) n <- prod(dim) x <- rnorm(n, sd = 100) dim(x) <- dim dimnames <- lapply(dim(x), FUN = function(n) rep(letters, length.out = n)) # Add NAs? has_na <- ((kk %% 2) == 0L) if (has_na) { cat("Adding NAs\n") nna <- sample(n, size = 1) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Logical, integer, or double? mode <- "numeric" if ((kk %% 6) %in% 1:2) { cat("Coercing to logical\n") mode <- "logical" } else if ((kk %% 6) %in% 3:4) { cat("Coercing to integers\n") mode <- "integer" } storage.mode(x) <- mode str(x) # rowQuantiles(): for (type in 1:9) { cat(sprintf("type=%d, has_na=%s:\n", type, has_na)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { q0 <- rowQuantiles_R(x, probs = probs, na.rm = has_na, type = type, useNames = useNames) q1 <- rowQuantiles(x, probs = probs, na.rm = has_na, type = type, useNames = useNames) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical" && !has_na && type == 7L) storage.mode(q1) <- storage.mode(q0) stopifnot(all.equal(q1, q0)) q2 <- colQuantiles(t(x), probs = probs, na.rm = has_na, type = type, useNames = useNames) ## FIXME: Workaround for R (< 3.0.0) if (getRversion() < "3.0.0" && mode == "logical" && !has_na && type == 7L) storage.mode(q2) <- storage.mode(q0) stopifnot(all.equal(q2, q0)) } } } } # for (kk ...) for (mode in c("logical", "integer", "double")) { naValue <- NA_real_ storage.mode(naValue) <- mode someValue <- 1 storage.mode(someValue) <- mode for (type in 1:9) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NA # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(naValue, nrow = 3L, ncol = 4L) dimnames <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { qr0 <- rowQuantiles_R(x, type = type, useNames = useNames) qr <- rowQuantiles(x, type = type, useNames = useNames) stopifnot(identical(qr, qr0)) # x <- matrix(naValue, nrow = 4L, ncol = 3L) qc <- colQuantiles(t(x), type = type, useNames = useNames) stopifnot(identical(qc, qr)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Empty matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - probs <- c(0, 0.25, 0.75, 1) x <- matrix(naValue, nrow = 0L, ncol = 0L) dimnames(x) <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) q <- rowQuantiles(x, probs = probs, type = type, useNames = TRUE) stopifnot(identical(dim(q), c(nrow(x), length(probs)))) q <- colQuantiles(x, probs = probs, type = type, useNames = TRUE) stopifnot(identical(dim(q), c(ncol(x), length(probs)))) x <- matrix(naValue, nrow = 2L, ncol = 0L) dimnames(x) <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) q <- rowQuantiles(x, probs = probs, type = type, useNames = TRUE) stopifnot(identical(dim(q), c(nrow(x), length(probs)))) x <- matrix(naValue, nrow = 0L, ncol = 2L) dimnames(x) <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) q <- colQuantiles(x, probs = probs, type = type, useNames = TRUE) stopifnot(identical(dim(q), c(ncol(x), length(probs)))) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Single column matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - probs <- c(0, 0.25, 0.75, 1) x <- matrix(someValue, nrow = 2L, ncol = 1L) dimnames(x) <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) qr <- rowQuantiles(x, probs = probs, type = type, useNames = TRUE) print(qr) x <- matrix(someValue, nrow = 1L, ncol = 2L) dimnames(x) <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) qc <- colQuantiles(x, probs = probs, type = type, useNames = TRUE) print(qc) stopifnot(identical(qc, qr)) } } matrixStats/tests/varDiff_etal.R0000644000176200001440000000536014063411362016454 0ustar liggesuserslibrary("matrixStats") set.seed(1) x <- rnorm(1e4) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Variance estimators # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sigma2_a <- var(x) cat(sprintf("var(x) = %g\n", sigma2_a)) sigma2_b <- varDiff(x) cat(sprintf("varDiff(x) = %g\n", sigma2_b)) d <- abs(sigma2_b - sigma2_a) cat(sprintf("Absolute difference = %g\n", d)) stopifnot(d < 0.02) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Standard deviation estimators # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sigma_a <- sd(x) cat(sprintf("sd(x) = %g\n", sigma_a)) sigma_b <- sdDiff(x) cat(sprintf("sdDiff(x) = %g\n", sigma_b)) d <- abs(sigma_b - sigma_a) cat(sprintf("Absolute difference = %g\n", d)) stopifnot(d < 0.01) # Sanity checks stopifnot(abs(sigma2_a - sigma_a ^ 2) < 1e-9) stopifnot(abs(sigma2_b - sigma_b ^ 2) < 1e-9) sigma_a2 <- mad(x) cat(sprintf("mad(x) = %g\n", sigma_a2)) sigma_b2 <- madDiff(x) cat(sprintf("madDiff(x) = %g\n", sigma_b2)) d <- abs(sigma_b2 - sigma_a2) cat(sprintf("Absolute difference = %g\n", d)) stopifnot(d < 0.05) sigma_a3 <- IQR(x) cat(sprintf("IQR(x) = %g\n", sigma_a3)) sigma_b3 <- iqrDiff(x) cat(sprintf("iqrDiff(x) = %g\n", sigma_b3)) d <- abs(sigma_b3 - sigma_a3) cat(sprintf("Absolute difference = %g\n", d)) stopifnot(d < 0.05) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Trimmed estimators # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - y <- x outliers <- sample(length(x), size = 0.1 * length(x)) y[outliers] <- 100 * y[outliers] sigma_ao <- sd(y[-outliers]) cat(sprintf("sd(y) = %g\n", sigma_ao)) sigma_bo <- sdDiff(y[-outliers]) cat(sprintf("sdDiff(y) = %g\n", sigma_bo)) d <- abs(sigma_b - sigma_a) cat(sprintf("Absolute difference = %g\n", d)) stopifnot(d < 0.01) sigma_bot <- sdDiff(y, trim = 0.05) cat(sprintf("sdDiff(y, trim = 0.05) = %g\n", sigma_bot)) d <- abs(sigma_bot - sigma_a) cat(sprintf("Absolute difference = %g\n", d)) #stopifnot(d < 1e-3) sigma_cot <- madDiff(y, trim = 0.05) cat(sprintf("madDiff(y, trim = 0.05) = %g\n", sigma_cot)) sigma_dot <- iqrDiff(y, trim = 0.05) cat(sprintf("iqrDiff(y, trim = 0.05) = %g\n", sigma_dot)) fcns <- list( varDiff = varDiff, sdDiff = sdDiff, madDiff = madDiff, iqrDiff = iqrDiff ) for (name in names(fcns)) { cat(sprintf("%s()...\n", name)) fcn <- fcns[[name]] for (mode in c("integer", "double")) { cat("mode: ", mode, "", sep = "") for (n in 0:3) { x <- runif(n, min = -5, max = 5) storage.mode(x) <- mode str(x) y <- fcn(x) yt <- fcn(x, trim = 0.1) str(list("non-trimmed" = y, trimmed = yt)) } # for (mode ...) } cat(sprintf("%s()...DONE\n", name)) } # for (name ...) matrixStats/tests/rowCollapse_subset.R0000644000176200001440000000434014074054377017755 0ustar liggesuserslibrary("matrixStats") rowCollapse_R <- function(x, idxs, ..., useNames = NA) { ans <- c() storage.mode(ans) <- storage.mode(x) for (ii in seq_len(length(idxs))) { ans[ii] <- x[ii, idxs[ii]] } # Preserve names attribute if (isTRUE(useNames)) { names <- rownames(x) if (!is.null(names)) names(ans) <- names } ans } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) for (rows in index_cases) { if (is.null(rows)) rows <- seq_len(nrow(x)) for (idxs in list(2L, seq_len(6L))) { for (useNames in c(NA, TRUE, FALSE)) { suppressWarnings({ actual <- tryCatch(rowCollapse(x, idxs, rows = rows, useNames = useNames), error = function(c) "error") expect <- tryCatch({ idxs_0 <- rep(idxs, length.out = nrow(x))[rows] rowCollapse_R(x[rows, , drop = FALSE], idxs_0, useNames = useNames) }, error = function(c) "error") }) stopifnot(all.equal(actual, expect)) suppressWarnings({ actual <- tryCatch(colCollapse(t(x), idxs, cols = rows, useNames = useNames), error = function(c) "error") }) stopifnot(all.equal(actual, expect)) # Check names attribute dimnames(x) <- dimnames suppressWarnings({ actual <- tryCatch(rowCollapse(x, idxs, rows = rows, useNames = useNames), error = function(c) "error") expect <- tryCatch({ idxs_0 <- rep(idxs, length.out = nrow(x))[rows] rowCollapse_R(x[rows, , drop = FALSE], idxs_0, useNames = useNames) }, error = function(c) "error") }) stopifnot(all.equal(actual, expect)) suppressWarnings({ actual <- tryCatch(colCollapse(t(x), idxs, cols = rows, useNames = useNames), error = function(c) "error") }) stopifnot(all.equal(actual, expect)) dimnames(x) <- NULL } } } matrixStats/tests/rowAllAnys_subset.R0000644000176200001440000002423114074054377017557 0ustar liggesuserslibrary("matrixStats") rowAlls_R <- function(x, value = TRUE, na.rm = FALSE, ..., useNames = NA) { if (is.na(value)) { res <- apply(is.na(x), MARGIN = 1L, FUN = all, na.rm = na.rm) } else { y <- x == value # Preserve dimnames attribute dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!isTRUE(all.equal(dim(y), dim))) { dim(y) <- dim dimnames <- dimnames(x) if (!is.null(dimnames)) dimnames(y) <- dimnames } res <- apply(y, MARGIN = 1L, FUN = all, na.rm = na.rm) } if (is.na(useNames) || !useNames) names(res) <- NULL res } rowAnys_R <- function(x, value = TRUE, na.rm = FALSE, ..., useNames = NA) { if (is.na(value)) { res <- apply(is.na(x), MARGIN = 1L, FUN = any, na.rm = na.rm) } else { y <- x == value # Preserve dimnames attribute dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!isTRUE(all.equal(dim(y), dim))) { dim(y) <- dim dimnames <- dimnames(x) if (!is.null(dimnames)) dimnames(y) <- dimnames } res <- apply(y, MARGIN = 1L, FUN = any, na.rm = na.rm) } if (is.na(useNames) || !useNames) names(res) <- NULL res } rowAnyMissings_R <- function(x, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = anyMissing) if (is.na(useNames) || !useNames) names(res) <- NULL res } all_R <- function(x, value = TRUE, ...) { if (is.na(value)) { all(is.na(x), ...) } else { all(x == value, ...) } } any_R <- function(x, value = TRUE, ...) { if (is.na(value)) { any(is.na(x), ...) } else { any(x == value, ...) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" x[2:3, ] <- NA_integer_ x[2, 1] <- 0L x[4:5, ] <- 0L x[4, 6] <- NA_integer_ # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = 0, na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = 0, na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = NA_integer_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = 0, na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = 0, na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = NA_integer_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = 0, na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = 0, na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = NA_integer_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = 0, na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = 0, na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = NA_integer_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnyMissings, fsure = rowAnyMissings_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnyMissings, fsure = rowAnyMissings_R, useNames = useNames) } } } } for (rr in seq_len(nrow(x))) { for (idxs in index_cases) { validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = 0, na.rm = TRUE) validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = 0, na.rm = FALSE) validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = NA_integer_) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = 0, na.rm = TRUE) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = 0, na.rm = FALSE) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = NA_integer_) } } storage.mode(x) <- "character" # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = "0", na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = "0", na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAlls, fsure = rowAlls_R, value = NA_character_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = "0", na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = "0", na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAlls, fsure = rowAlls_R, value = NA_character_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = "0", na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = "0", na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnys, fsure = rowAnys_R, value = NA_character_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = "0", na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = "0", na.rm = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnys, fsure = rowAnys_R, value = NA_character_, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowAnyMissings, fsure = rowAnyMissings_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colAnyMissings, fsure = rowAnyMissings_R, useNames = useNames) } } } } for (rr in seq_len(nrow(x))) { for (idxs in index_cases) { validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = "0", na.rm = TRUE) validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = "0", na.rm = FALSE) validateIndicesTestVector(x[rr, ], idxs, ftest = allValue, fsure = all_R, value = NA_integer_) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = "0", na.rm = TRUE) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = "0", na.rm = FALSE) validateIndicesTestVector(x[rr, ], idxs, ftest = anyValue, fsure = any_R, value = NA_integer_) } } matrixStats/tests/binMeans,binCounts.R0000644000176200001440000001037414111766323017570 0ustar liggesuserslibrary("matrixStats") library("stats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Naive R implementation of binMeans() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - binMeans0 <- function(y, x, bx, na.rm = TRUE, count = TRUE, right = FALSE) { n_smooth <- length(bx) - 1L res <- double(n_smooth) counts <- rep(NaN, times = n_smooth) if (na.rm) { keep <- !is.na(x) & !is.na(y) x <- x[keep] y <- y[keep] } # For each bin... for (kk in seq_len(n_smooth)) { if (right) { idxs <- which(bx[kk] < x & x <= bx[kk + 1L]) } else { idxs <- which(bx[kk] <= x & x < bx[kk + 1L]) } y_kk <- y[idxs] res[kk] <- mean(y_kk) counts[kk] <- length(idxs) } # for (kk ...) if (count) attr(res, "count") <- counts res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Case #1 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:100 nx <- length(x) y <- double(nx) y[1:25] <- 5 y[51:75] <- -5 y <- y + rnorm(nx) # Bins bx <- c(0.5, 25.5, 50.5, 75.5, 100.5) y_smooth0 <- binMeans0(y, x = x, bx = bx) y_smooth <- binMeans(y, x = x, bx = bx) n_smooth <- binCounts(x, bx = bx) # Sanity check stopifnot(all.equal(y_smooth, y_smooth0)) stopifnot(all.equal(attr(y_smooth, "count"), n_smooth)) y_smooth0r <- rev(binMeans0(y, x = -x, bx = rev(-bx), count = FALSE, right = TRUE)) y_smoothr <- rev(binMeans(y, x = -x, bx = rev(-bx), count = FALSE, right = TRUE)) # Sanity check stopifnot(all.equal(y_smooth0r, y_smooth0, check.attributes = FALSE)) stopifnot(all.equal(y_smoothr, y_smooth0r)) # Integer input y <- as.integer(y) y_smooth0 <- binMeans0(y, x = x, bx = bx) y_smooth <- binMeans(y, x = x, bx = bx) n_smooth <- binCounts(x, bx = bx) # Sanity check stopifnot(is.integer(y), all.equal(y_smooth, y_smooth0), all.equal(attr(y_smooth, "count"), n_smooth)) # Logical input y <- as.logical(y) y_smooth0 <- binMeans0(y, x = x, bx = bx) y_smooth <- binMeans(y, x = x, bx = bx) n_smooth <- binCounts(x, bx = bx) # Sanity check stopifnot(is.logical(y), all.equal(y_smooth, y_smooth0), all.equal(attr(y_smooth, "count"), n_smooth)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Case #2 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nx <- 1e3 x <- runif(nx) y <- runif(nx) nb <- 10 bx <- do.call(seq, c(as.list(range(x)), length.out = nb)) bx1 <- c(bx[-1], bx[nb] + 1) y_smooth0 <- binMeans0(y, x = x, bx = bx1) y_smooth <- binMeans(y, x = x, bx = bx1) n_smooth <- binCounts(x, bx = bx1) y_smoothr <- rev(binMeans(y, x = -x, bx = rev(-bx1), right = TRUE)) # Sanity check stopifnot(all.equal(y_smooth, y_smooth0)) stopifnot(all.equal(attr(y_smooth, "count"), n_smooth)) stopifnot(all.equal(y_smoothr, y_smooth, check.attributes = FALSE)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Empty bins # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- c(6:8, 16:19) nx <- length(x) y <- runif(nx) bx <- c(0, 5, 10, 15, 20, 25) y_smooth0 <- binMeans0(y, x = x, bx = bx) y_smooth <- binMeans(y, x = x, bx = bx) n_smooth <- binCounts(x, bx = bx) stopifnot(all.equal(attr(y_smooth, "count"), n_smooth)) stopifnot(all.equal(y_smooth, y_smooth0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:100 x[50] <- NA_integer_ nx <- length(x) y <- double(nx) y[1:25] <- 5 y[51:75] <- -5 y[82:92] <- NA_real_ y <- y + rnorm(nx) # Bins bx <- c(0.5, 25.5, 75.5, 82.5, 100.5) y_smooth0 <- binMeans0(y, x = x, bx = bx) y_smooth <- binMeans(y, x = x, bx = bx) # Sanity check stopifnot(all.equal(y_smooth, y_smooth0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Exception handling # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Zero bin bounderies (invalid bin definition) bx <- double(0L) res <- try(y_smooth <- binMeans(x = 1:5, y = 1:5, bx = bx), silent = TRUE) stopifnot(inherits(res, "try-error")) # One bin boundery (invalid bin definition) bx <- double(1L) res <- try(y_smooth <- binMeans(x = 1:5, y = 1:5, bx = bx), silent = TRUE) stopifnot(inherits(res, "try-error")) matrixStats/tests/rowVars.R0000644000176200001440000002200714111772151015526 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") options(matrixStats.vars.formula.freq = Inf) ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowVars_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = var, na.rm = na.rm) }) stopifnot(!any(is.infinite(res))) # Keep naming support consistency same as rowVars() if (is.null(center) || ncol(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } colVars_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 2L, FUN = var, na.rm = na.rm) }) stopifnot(!any(is.infinite(res))) # Keep naming support consistency same as colVars() if (is.null(center) || ncol(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } rowVars_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowWeightedMeans(x, cols = cols, na.rm = na.rm, useNames = FALSE) res <- rowVars(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } colVars_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colWeightedMeans(x, rows = rows, na.rm = na.rm, useNames = FALSE) res <- colVars(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } rowVars_center_naive <- function(x, rows = NULL, cols = NULL, center = NULL, na.rm = FALSE, ..., useNames = NA) { x <- sweep(x, MARGIN = 1, STATS = as.array(center), FUN = "-") x[is.infinite(center), ] <- NaN res <- rowVars(x, rows = rows, cols = cols, center = rep(0, times = nrow(x)), na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } colVars_center_naive <- function(x, rows = NULL, cols = NULL, center = NULL, na.rm = FALSE, ..., useNames = NA) { x <- sweep(x, MARGIN = 2, STATS = as.array(center), FUN = "-") x[, is.infinite(center)] <- NaN res <- colVars(x, rows = rows, cols = cols, center = rep(0, times = ncol(x)), na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs or Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { if (mode == "integer") { specials <- c(0L, NA_integer_) delta <- 0L } else { specials <- c(0, NA_real_, Inf) delta <- 0.1 } for (special in specials) { cat("special = ", special, "\n", sep = "") x <- matrix(1:50 + delta, nrow = 10L, ncol = 5L) x[3:7, c(2, 4)] <- special cat("mode: ", mode, "\n", sep = "") str(x) stopifnot(storage.mode(x) == mode) # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column variance for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") center <- rowMeans(x, na.rm = na.rm) r0 <- rowVars_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowVars(x, na.rm = na.rm, useNames = useNames) r1b <- rowVars_center(x, na.rm = na.rm, useNames = useNames) r1c <- rowVars(x, center = center, na.rm = na.rm, useNames = useNames) tx <- t(x) r2 <- colVars(tx, na.rm = na.rm, useNames = useNames) r2b <- colVars_center(tx, na.rm = na.rm, useNames = useNames) r2c <- colVars(tx, center = center, na.rm = na.rm, useNames = useNames) tx <- NULL stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r1b, r1c)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r2b, r2c)) center <- colMeans(x, na.rm = na.rm) r3 <- colVars(x, center = center, na.rm = na.rm, useNames = useNames) r3b <- colVars_center_naive(x, center = center, na.rm = na.rm, useNames = useNames) r3c <- rowVars(t(x), center = center, na.rm = na.rm, useNames = useNames) r3d <- rowVars_center_naive(t(x), center = center, na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r3b, r3)) stopifnot(all.equal(r3c, r3)) stopifnot(all.equal(r3d, r3)) stopifnot( !any(is.infinite(r0)), !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r3)), !any(is.infinite(r1b)), !any(is.infinite(r1c)), !any(is.infinite(r2b)), !any(is.infinite(r2c)), !any(is.infinite(r3b)), !any(is.infinite(r3c)), !any(is.infinite(r3d)) ) } } } } # for (special ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column variance for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowVars_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowVars(x, na.rm = na.rm, useNames = useNames) r1b <- rowVars_center(x, na.rm = na.rm, useNames = useNames) r2 <- colVars(t(x), na.rm = na.rm, useNames = useNames) r2b <- colVars_center(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r2b)) stopifnot( !any(is.infinite(r0)), !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r1b)), !any(is.infinite(r2b)) ) } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 1L, ncol = 1L) dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column variance for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowVars_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowVars(x, na.rm = na.rm, useNames = useNames) r1b <- rowVars_center(x, na.rm = na.rm, useNames = useNames) r2 <- colVars(t(x), na.rm = na.rm, useNames = useNames) r2b <- colVars_center(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r2b)) stopifnot( !any(is.infinite(r0)), !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r1b)), !any(is.infinite(r2b)) ) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # center and .dim # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - m <- matrix(1:12, nrow = 3L, ncol = 4L) a <- array(m, dim = c(3, 2, 2)) stopifnot(prod(dim(a)) == prod(dim(m))) y0 <- rowVars(m, dim. = dim(m)) print(y0) y1 <- rowVars(a, dim. = dim(m)) print(y1) stopifnot(identical(y1, y0)) stopifnot( !any(is.infinite(y0)), !any(is.infinite(y1)) ) mu <- rowMeans(m) y0 <- rowVars(m, center = mu, dim. = dim(m)) print(y0) y1 <- rowVars(a, center = mu, dim. = dim(m)) print(y1) stopifnot(identical(y1, y0)) stopifnot( !any(is.infinite(y0)), !any(is.infinite(y1)) ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # https://github.com/HenrikBengtsson/matrixStats/issues/195 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(c(1,2,3,4), nrow = 2L, ncol = 2L) x[1,1] <- NA_real_ x[1,2] <- Inf center <- colMeans2(x, na.rm = TRUE) y <- colVars(x, center = center, na.rm = TRUE) stopifnot(!any(is.infinite(y))) x <- t(x) center <- rowMeans2(x, na.rm = TRUE) y <- rowVars(x, center = center, na.rm = TRUE) stopifnot(!any(is.infinite(y))) matrixStats/tests/rowSums2.R0000644000176200001440000002365114074054377015645 0ustar liggesuserslibrary("matrixStats") rowSums2_R <- function(x, na.rm = FALSE, ..., useNames = NA) { ## FIXME: sum() may overflow for integers, whereas ## base::rowSums() doesn't. What should rowSums2() do? ## apply(x, MARGIN = 1L, FUN = sum, na.rm = na.rm) res <- rowSums(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } colSums2_R <- function(x, na.rm = FALSE, ..., useNames = NA) { ## FIXME: sum() may overflow for integers, whereas ## base::colSums() doesn't. What should colSums2() do? ## apply(x, MARGIN = 2L, FUN = sum, na.rm = na.rm) res <- colSums(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } for (mode in c("integer", "logical", "double")) { x <- matrix(-4:4, nrow = 3, ncol = 3) storage.mode(x) <- mode if (mode == "double") x <- x + 0.1 # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Single-element matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Single-element matrix:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(1, nrow = 1, ncol = 1) storage.mode(x) <- mode # To check names attribute dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Empty matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Empty matrix:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(integer(0), nrow = 0, ncol = 0) storage.mode(x) <- mode y0 <- rowSums2_R(x, na.rm = FALSE) y1 <- rowSums2(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE) y1 <- colSums2(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NAs:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(NA_integer_, nrow = 3, ncol = 3) storage.mode(x) <- mode # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowSums2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = TRUE, useNames = useNames) y1 <- colSums2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NaNs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NaNs:\n") x <- matrix(NA_real_, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowSums2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = TRUE, useNames = useNames) y1 <- colSums2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All Infs:\n") x <- matrix(Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All -Infs:\n") x <- matrix(-Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Infs and -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Infs and -Infs:\n") x <- matrix(c(-Inf, +Inf), nrow = 4, ncol = 4) # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: NaNs and NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Infs and -Infs:\n") x <- matrix(c(NaN, NA_real_), nrow = 4, ncol = 4) y0 <- rowSums(x, na.rm = FALSE) str(y0) stopifnot(all(is.na(y0)), length(unique(y0)) >= 1L) y1 <- rowSums2(x, na.rm = FALSE) str(y0) stopifnot(all(is.na(y1)), length(unique(y1)) >= 1L) stopifnot(all.equal(y1, y0)) y0 <- colSums(x, na.rm = FALSE) stopifnot(all(is.na(y0)), length(unique(y0)) == 1L) y1 <- colSums2(x, na.rm = FALSE) stopifnot(all(is.na(y1)), length(unique(y1)) == 1L) ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that y1 is identical to y0. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Integer overflow with ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Integer overflow with ties:\n") x <- matrix(.Machine$integer.max, nrow = 4, ncol = 4) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colSums2_R(x, na.rm = FALSE, useNames = useNames) y1 <- colSums2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) cat("Consistency checks:\n") n_sims <- if (Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 4L else 20L for (kk in seq_len(n_sims)) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape dim <- sample(50:200, size = 2) n <- prod(dim) x <- rnorm(n, sd = 100) dim(x) <- dim # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Mode? modes <- "double" if ((kk %% 4) %in% c(2, 0)) { modes <- c("integer", "logical") } for (mode in modes) { if (mode != "double") { cat(sprintf("Coercing from %s to %s\n", storage.mode(x), mode)) storage.mode(x) <- mode } na.rm <- sample(c(TRUE, FALSE), size = 1) # rowSums2(): y0 <- rowSums2_R(x, na.rm = na.rm) y1 <- rowSums2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- colSums2(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) # colSums2(): y0 <- colSums2_R(x, na.rm = na.rm) y1 <- colSums2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- rowSums2(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) } } # for (kk ...) matrixStats/tests/rowSums2_subset.R0000644000176200001440000000331614074054377017226 0ustar liggesuserslibrary("matrixStats") rowSums2_R <- function(x, na.rm = FALSE, ..., useNames = NA) { ## FIXME: sum() may overflow for integers, whereas ## base::rowSums() doesn't. What should rowSums2() do? ## apply(x, MARGIN = 1L, FUN = sum, na.rm = na.rm) res <- rowSums(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } colSums2_R <- function(x, na.rm = FALSE, ..., useNames = NA) { ## FIXME: sum() may overflow for integers, whereas ## base::colSums() doesn't. What should colSums2() do? ## apply(x, MARGIN = 2L, FUN = sum, na.rm = na.rm) res <- colSums(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowSums2, fsure = rowSums2_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colSums2, fsure = rowSums2_R, na.rm = na.rm, useNames = useNames) } } } } } matrixStats/tests/signTabulate_subset.R0000644000176200001440000000165114063411362020074 0ustar liggesuserslibrary("matrixStats") signTabulate0 <- function(x, ...) { nneg <- sum(x < 0, na.rm = TRUE) nzero <- sum(x == 0, na.rm = TRUE) npos <- sum(x > 0, na.rm = TRUE) nna <- sum(is.na(x)) nneginf <- sum(is.infinite(x) & x < 0, na.rm = TRUE) nposinf <- sum(is.infinite(x) & x > 0, na.rm = TRUE) res <- c(nneg, nzero, npos, nna, nneginf, nposinf) res <- as.double(res) names(res) <- c("-1", "0", "+1", "NA", "-Inf", "+Inf") if (is.integer(x)) res <- res[1:4] res } # signTabulate0() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) x[2:3, 4:5] <- +Inf x[4:5, 1:2] <- -Inf for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = signTabulate, fsure = signTabulate0) } matrixStats/tests/rowRanks_subset.R0000644000176200001440000000446214074054377017276 0ustar liggesuserslibrary("matrixStats") rowRanks_R <- function(x, ties.method = "average", ..., useNames = NA) { ans <- t(apply(x, MARGIN = 1L, FUN = rank, na.last = "keep", ties.method = ties.method)) # Preserve dimnames attribute? dim(ans) <- dim(x) dimnames <- dimnames(x) if (isTRUE(useNames) && !is.null(dimnames)) dimnames(ans) <- dimnames ans } colRanks_R <- function(x, ties.method, preserveShape = FALSE, ..., useNames = NA) { ans <- t(apply(x, MARGIN = 2L, FUN = rank, na.last = "keep", ties.method = ties.method)) # Preserve dimnames attribute? tx <- t(x) dim(ans) <- dim(tx) dimnames <- dimnames(tx) if (isTRUE(useNames) && !is.null(dimnames)) dimnames(ans) <- dimnames if (preserveShape) ans <- t(ans) ans } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check dimnames attribute dimnames <- list(letters[1:6], LETTERS[1:6]) colRanks_R_t <- function(x, rows, cols, ..., useNames = NA) { t(colRanks(t(x), rows = cols, cols = rows, preserveShape = TRUE, ..., useNames = useNames)) } # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowRanks, fsure = rowRanks_R, ties.method = "average", useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = colRanks_R_t, fsure = rowRanks_R, ties.method = "average", useNames = useNames) for (perserveShape in c(TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = colRanks, fsure = colRanks_R, ties.method = "average", perserveShape = perserveShape, useNames = useNames) } } } } } matrixStats/tests/logSumExp.R0000644000176200001440000000620514063411362016010 0ustar liggesuserslibrary("matrixStats") library("stats") logSumExp_R <- function(lx, na.rm = FALSE) { log(sum(exp(lx), na.rm = na.rm)) } ## R-help thread \emph{'[R] Beyond double-precision?'} on May 9, 2009. for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") set.seed(1) x <- runif(20, min = 1.0, max = 3.0) storage.mode(x) <- mode str(x) ## The logarithm of the harmonic mean y0 <- log(1 / mean(1 / x)) print(y0) ## -1.600885 lx <- log(x) y1 <- log(length(x)) - logSumExp(-lx) print(y1) ## [1] -1.600885 # Sanity check stopifnot(all.equal(y1, y0)) y2 <- log(length(x)) - logSumExp_R(-lx) # Sanity check stopifnot(all.equal(y2, y0)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## NA values lx <- c(1, 2, 3) lx[2] <- NA_real_ y0 <- logSumExp_R(lx, na.rm = FALSE) y <- logSumExp(lx, na.rm = FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) y0 <- logSumExp_R(lx, na.rm = TRUE) y <- logSumExp(lx, na.rm = TRUE) print(y) stopifnot(all.equal(y, y0)) ## NaN values lx <- c(1, 2, 3) lx[2] <- NaN y0 <- logSumExp_R(lx, na.rm = FALSE) y <- logSumExp(lx, na.rm = FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) y0 <- logSumExp_R(lx, na.rm = TRUE) y <- logSumExp(lx, na.rm = TRUE) print(y) stopifnot(all.equal(y, y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Zero-length vectors lx <- numeric(0L) y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, -Inf)) stopifnot(all.equal(y, y0)) ## Vectors of length one lx <- 1.0 y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, lx)) stopifnot(all.equal(y, y0)) lx <- NA_real_ y0 <- logSumExp_R(lx, na.rm = TRUE) y <- logSumExp(lx, na.rm = TRUE) print(y) stopifnot(identical(y, -Inf)) stopifnot(all.equal(y, y0)) ## All missing values lx <- c(NA_real_, NA_real_) y0 <- logSumExp_R(lx, na.rm = TRUE) y <- logSumExp(lx, na.rm = TRUE) print(y) stopifnot(identical(y, -Inf)) stopifnot(all.equal(y, y0)) lx <- c(NA_real_, NA_real_) y0 <- logSumExp_R(lx, na.rm = FALSE) y <- logSumExp(lx, na.rm = FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) ## +Inf values lx <- c(1, 2, +Inf) y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, +Inf)) stopifnot(all.equal(y, y0)) ## First element is a missing value, cf. PR #33 lx <- c(NA_real_, 1) y0 <- logSumExp_R(lx) print(y0) y <- logSumExp(lx, na.rm = FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) y0 <- logSumExp_R(lx, na.rm = TRUE) print(y0) y <- logSumExp(lx, na.rm = TRUE) print(y) stopifnot(identical(y, 1)) stopifnot(all.equal(y, y0)) ## Multiple -Inf values, cf. issue #84 lx <- c(-Inf, -Inf) y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, -Inf)) stopifnot(all.equal(y, y0)) lx <- c(-Inf, 5, -Inf) y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, 5)) stopifnot(all.equal(y, y0)) matrixStats/tests/rowOrderStats_subset.R0000644000176200001440000000335414111762651020302 0ustar liggesuserslibrary("matrixStats") rowOrderStats_R <- function(x, probs, ..., useNames = NA) { ans <- apply(x, MARGIN = 1L, FUN = quantile, probs = probs, type = 3L) # Remove Attributes if (is.na(useNames) || !useNames || length(ans) == 0L) attributes(ans) <- NULL ans } # rowOrderStats_R() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) probs <- 0.3 # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { if (is.null(cols)) which <- round(probs * ncol(x)) else { xxrows <- rows suppressWarnings({ xx <- tryCatch(x[, cols, drop = FALSE], error = function(c) "error") if (identical(xx, "error")) which <- 0L else which <- round(probs * ncol(xx)) }) } if (which == 0L) next validateIndicesTestMatrix(x, rows, cols, ftest = rowOrderStats, fsure = rowOrderStats_R, which = which, probs = probs, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colOrderStats, fsure = rowOrderStats_R, which = which, probs = probs, useNames = useNames) } } } } matrixStats/tests/indexByRow.R0000644000176200001440000000412314063411362016154 0ustar liggesuserslibrary("matrixStats") indexByRow_R1 <- function(dim, idxs = NULL, ...) { n <- prod(dim) x <- matrix(seq_len(n), nrow = dim[2L], ncol = dim[1L], byrow = TRUE) if (!is.null(idxs)) x <- x[idxs] as.vector(x) } indexByRow_R2 <- function(dim, idxs = NULL, ...) { n <- prod(dim) if (is.null(idxs)) { x <- matrix(seq_len(n), nrow = dim[2L], ncol = dim[1L], byrow = TRUE) as.vector(x) } else { idxs <- idxs - 1L cols <- idxs %/% dim[2L] rows <- idxs %% dim[2L] cols + dim[1L] * rows + 1L } } dim <- c(5L, 4L) x <- matrix(NA_integer_, nrow = dim[1L], ncol = dim[2L]) y <- t(x) idxs_by_cols <- seq_along(x) # Assign by columns x[idxs_by_cols] <- idxs_by_cols print(x) # Truth y0 <- t(x) idxs_by_rows <- as.vector(y0) # Assert idxs <- indexByRow(dim) stopifnot(all.equal(idxs, idxs_by_rows)) y <- x y[idxs_by_rows] <- idxs print(y) stopifnot(all(as.vector(y) == as.vector(x))) idxs_R1 <- indexByRow_R1(dim) stopifnot(all.equal(idxs_R1, idxs_by_rows)) idxs_R2 <- indexByRow_R2(dim) stopifnot(all.equal(idxs_R2, idxs_by_rows)) # Assert idxs_by_cols <- seq(from = 1, to = length(x), by = 3L) idxs_by_rows <- as.vector(t(x)[idxs_by_cols]) idxs <- indexByRow(dim, idxs = idxs_by_cols) stopifnot(all(idxs == idxs_by_rows)) idxs_R1 <- indexByRow_R1(dim, idxs = idxs_by_cols) stopifnot(all(idxs_R1 == idxs_by_rows)) idxs_R2 <- indexByRow_R2(dim, idxs = idxs_by_cols) stopifnot(all(idxs_R2 == idxs_by_rows)) ## DEFUNCT: Backward compatibility res <- tryCatch({ idxs1 <- indexByRow(x) }, error = identity) stopifnot(inherits(res, "error")) ## Exceptions: ## Too large matrices are not supported, which happens ## when prod(dim) > .Machine$integer.max dim_too_large <- c(.Machine$integer.max, 2L) res <- tryCatch({ idxs <- indexByRow(dim_too_large, idxs = 1L) }, error = identity) stopifnot(inherits(res, "error")) ## Non-positive indices are not supported res <- tryCatch({ idxs <- indexByRow(c(1,1), idxs = 0L) }, error = identity) stopifnot(inherits(res, "error")) res <- tryCatch({ idxs <- indexByRow(c(1,1), idxs = -1L) }, error = identity) stopifnot(inherits(res, "error")) matrixStats/tests/anyMissing_subset.R0000644000176200001440000000100614111765674017601 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(4, min = -3, max = 3) x[2] <- NA for (mode in c("integer", "numeric")) { storage.mode(x) <- mode for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = anyMissing, fsure = function(x, ...) { anyValue(x, value = NA) }) } } matrixStats/tests/rowDiffs.R0000644000176200001440000000757214111771213015656 0ustar liggesuserslibrary("matrixStats") rowDiffs_R <- function(x, lag = 1L, differences = 1L, ..., useNames = NA) { ncol2 <- ncol(x) - lag * differences if (ncol2 <= 0) { y <- matrix(x[integer(0L)], nrow = nrow(x), ncol = 0L) # Preserve names attribute if (isTRUE(useNames) && !is.null(rownames(x))) rownames(y) <- rownames(x) return(y) } suppressWarnings({ y <- apply(x, MARGIN = 1L, FUN = diff, lag = lag, differences = differences) }) y <- t(y) # Preserve dimnames attribute dim(y) <- c(nrow(x), ncol2) if (isTRUE(useNames) && !is.null(dimnames(x))) { colnames <- colnames(x) if (!is.null(colnames)) { len <- length(colnames) colnames <- colnames[(len - ncol2 + 1):len] } dimnames(y) <- list(rownames(x), colnames) } else dimnames(y) <- NULL y } set.seed(0x42) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(sample(10 * 8) + 0.1, nrow = 10L, ncol = 8L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } storage.mode(x) <- mode str(x) dimnames <- list(letters[1:10], LETTERS[1:8]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check dimnames attribute for (useNames in c(NA, TRUE, FALSE)) { for (lag in 1:4) { for (differences in 1:3) { cat(sprintf("mode: %s, lag = %d, differences = %d\n", mode, lag, differences)) # Row/column ranges r0 <- rowDiffs_R(x, lag = lag, differences = differences, useNames = useNames) r1 <- rowDiffs(x, lag = lag, differences = differences, useNames = useNames) r2 <- t(colDiffs(t(x), lag = lag, differences = differences, useNames = useNames)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } } } # for (useNames ...) } # for (setDimnames ...) } # for (add_na ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(NA_real_, nrow = 10L, ncol = 5L) storage.mode(x) <- mode str(x) dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check dimnames attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowDiffs_R(x, useNames = useNames) r1 <- rowDiffs(x, useNames = useNames) r2 <- t(colDiffs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 1L, ncol = 1L) dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check dimnames attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowDiffs_R(x, useNames = useNames) r1 <- rowDiffs(x, useNames = useNames) r2 <- t(colDiffs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } # for (useNames ...) } # for (setDimnames ...) matrixStats/tests/varDiff_etal_subset.R0000644000176200001440000000207014063411362020034 0ustar liggesuserslibrary("matrixStats") fcns <- list( varDiff = varDiff, sdDiff = sdDiff, madDiff = madDiff, iqrDiff = iqrDiff ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") for (name in names(fcns)) { cat(sprintf("subsetted tests on %s()...\n", name)) fcn <- fcns[[name]] for (mode in c("numeric", "integer")) { x <- runif(6, min = -6, max = 6) storage.mode(x) <- mode trim <- runif(1, min = 0, max = 0.5) if (mode == "numeric") x[1] <- Inf for (diff in 1:2) { for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = fcn, fsure = fcn, na.rm = TRUE, diff = diff, trim = trim) validateIndicesTestVector(x, idxs, ftest = fcn, fsure = fcn, na.rm = FALSE, diff = diff, trim = trim) } } } cat(sprintf("%s()...DONE\n", name)) } matrixStats/tests/rowMeans2.R0000644000176200001440000002377314074054377015766 0ustar liggesuserslibrary("matrixStats") rowMeans_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- rowMeans(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } colMeans_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- colMeans(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } for (mode in c("integer", "logical", "double")) { x <- matrix(-4:4, nrow = 3, ncol = 3) storage.mode(x) <- mode if (mode == "double") x <- x + 0.1 # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Single-element matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Single-element matrix:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(1, nrow = 1, ncol = 1) storage.mode(x) <- mode # To check names attribute dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Empty matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Empty matrix:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(integer(0), nrow = 0, ncol = 0) storage.mode(x) <- mode y0 <- rowMeans(x, na.rm = FALSE) y1 <- rowMeans2(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) y0 <- colMeans(x, na.rm = FALSE) y1 <- colMeans2(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NAs:\n") for (mode in c("integer", "logical", "double")) { x <- matrix(NA_integer_, nrow = 3, ncol = 3) storage.mode(x) <- mode # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowMeans2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = TRUE, useNames = useNames) y1 <- colMeans2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NaNs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NaNs:\n") x <- matrix(NA_real_, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowMeans2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = TRUE, useNames = useNames) y1 <- colMeans2(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All Infs:\n") x <- matrix(Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All -Infs:\n") x <- matrix(-Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Infs and -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Infs and -Infs:\n") x <- matrix(c(-Inf, +Inf), nrow = 4, ncol = 4) # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: NaNs and NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: NaNs and NAs:\n") x <- matrix(c(NaN, NA_real_), nrow = 4, ncol = 4) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) str(y0) stopifnot(all(is.na(y0)), length(unique(y0)) >= 1L) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) str(y1) stopifnot(all(is.na(y1)), length(unique(y1)) >= 1L) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) stopifnot(all(is.na(y0)), length(unique(y0)) == 1L) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all(is.na(y1)), length(unique(y1)) == 1L) stopifnot(all.equal(y1, y0)) } } ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that y1 is identical to y0. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Integer overflow with ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Integer overflow with ties:\n") x <- matrix(.Machine$integer.max, nrow = 4, ncol = 4) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMeans_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMeans2(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) cat("Consistency checks:\n") n_sims <- if (Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 4L else 20L for (kk in seq_len(n_sims)) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape dim <- sample(50:200, size = 2) n <- prod(dim) x <- rnorm(n, sd = 100) dim(x) <- dim # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Mode? modes <- "double" if ((kk %% 4) %in% c(2, 0)) { modes <- c("integer", "logical") } for (mode in modes) { if (mode != "double") { cat(sprintf("Coercing from %s to %s\n", storage.mode(x), mode)) storage.mode(x) <- mode } na.rm <- sample(c(TRUE, FALSE), size = 1) # rowMeans2(): y0 <- rowMeans(x, na.rm = na.rm) y1 <- rowMeans2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- colMeans2(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) # colMeans2(): y0 <- colMeans(x, na.rm = na.rm) y1 <- colMeans2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- rowMeans2(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) } } # for (kk ...) matrixStats/tests/psortKM.R0000644000176200001440000000246014111767124015467 0ustar liggesuserslibrary("matrixStats") library("utils") ## utils::str # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - psortKM_R <- function(x, k, m) { x <- sort(x) x[(k - m + 1):k] } psortKM_R2 <- function(x, k, m) { partial <- (k - m + 1):k x <- sort.int(x, partial = partial) x[partial] } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) cat("Consistency checks:\n") x <- 1:30 x[18:20] <- 20 y <- sample(x) cat("x:\n") str(x) cat("sample(x):\n") str(y) for (k in c(1L, 2L, 20L, 21L, length(x))) { for (m in 1:min(5L, k)) { px0 <- psortKM_R(x, k = k, m = m) px0b <- psortKM_R2(x, k = k, m = m) stopifnot(identical(px0b, px0)) px1 <- matrixStats:::.psortKM(x, k = k, m = m) cat(sprintf(".psortKM(x, k = %d, m = %d):\n", k, m)) print(px1) stopifnot(identical(px1, px0)) py0 <- psortKM_R(y, k = k, m = m) py0b <- psortKM_R2(y, k = k, m = m) stopifnot(identical(py0b, py0)) py1 <- matrixStats:::.psortKM(y, k = k, m = m) cat(sprintf(".psortKM(y, k = %d, m = %d):\n", k, m)) print(py1) stopifnot(identical(py1, py0)) stopifnot(identical(py1, px1)) } # for (m ...) } # for (k ...) matrixStats/tests/rowRanks.R0000644000176200001440000001407114111772015015672 0ustar liggesuserslibrary("matrixStats") dense_rank <- function(x) match(x, table = sort(unique(x))) rowRanks_R <- function(x, ties.method, ..., useNames = NA) { if (ties.method == "dense") { res <- t(apply(x, MARGIN = 1L, FUN = dense_rank)) } else { res <- t(apply(x, MARGIN = 1L, FUN = rank, na.last = "keep", ties.method = ties.method)) } # Preserve dimnames attribute? dim(res) <- dim(x) dimnames <- dimnames(x) if (isTRUE(useNames) && !is.null(dimnames)) dimnames(res) <- dimnames res } colRanks_R <- function(x, ties.method, preserveShape = FALSE, ..., useNames = NA) { if (ties.method == "dense") { res <- t(apply(x, MARGIN = 2L, FUN = dense_rank)) } else { res <- t(apply(x, MARGIN = 2L, FUN = rank, na.last = "keep", ties.method = ties.method)) } # Preserve dimnames attribute? tx <- t(x) dim(res) <- dim(tx) dimnames <- dimnames(tx) if (isTRUE(useNames) && !is.null(dimnames)) dimnames(res) <- dimnames if (preserveShape) res <- t(res) res } set.seed(1) cat("Consistency checks:\n") xs <- vector("list", length = 4L) for (kk in 1:4) { # Simulate data in a matrix of any shape dim <- sample(40:80, size = 2L) n <- prod(dim) x <- rnorm(n, sd = 10) dim(x) <- dim # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1L) x[sample(length(x), size = nna)] <- NA_real_ } # Integer or double? if ((kk %% 4) %in% c(2, 0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } xs[[kk]] <- x } # for (kk ...) str(xs) for (kk in 1:4) { cat("Random test #", kk, "\n", sep = "") x <- xs[[kk]] tx <- t(x) for (ties in c("max", "min", "average", "first", "last", "dense")) { cat(sprintf("ties.method = %s\n", ties)) # rowRanks(): y1 <- matrixStats::rowRanks(x, ties.method = ties) if (ties != "last" || getRversion() >= "3.3.0") { y2 <- rowRanks_R(x, ties.method = ties) stopifnot(identical(y1, y2)) } y3 <- matrixStats::colRanks(tx, ties.method = ties) stopifnot(identical(y1, y3)) # colRanks(): y1 <- matrixStats::colRanks(x, ties.method = ties) if (ties != "last" || getRversion() >= "3.3.0") { y2 <- colRanks_R(x, ties.method = ties) stopifnot(identical(y1, y2)) } y3 <- matrixStats::rowRanks(tx, ties.method = ties) stopifnot(identical(y1, y3)) } } # for (kk ...) # Note, below we know ties.method %in% c("min", "max", "average") is correct cat("Consistency checks for random:\n") tolerance <- 0.1 nsamples <- 10000 for (kk in 1:4) { cat("Random test #", kk, "\n", sep = "") x <- xs[[kk]] tx <- t(x) for (ties in c("random")) { cat(sprintf("ties.method = %s\n", ties)) ## rowRanks(): y0 <- rowRanks_R(x, ties.method = ties) y1 <- matrixStats::rowRanks(x, ties.method = ties) ## Assert symmetric rank differences d <- y1 - y0 stopifnot(all(rowSums(d) == 0, na.rm = TRUE)) ## Assert within [min, max] y2min <- matrixStats::rowRanks(x, ties.method = "min") y2max <- matrixStats::rowRanks(x, ties.method = "max") stopifnot(all(y1 >= y2min, na.rm = TRUE) && all(y1 <= y2max, na.rm = TRUE)) ## Assert near average y1list <- replicate(nsamples, matrixStats::rowRanks(x, ties.method = ties), simplify = FALSE) y1mean <- Reduce(`+`, y1list) / nsamples y2avg <- matrixStats::rowRanks(x, ties.method = "average") stopifnot(all(abs(y1mean - y2avg) < tolerance, na.rm = TRUE)) ## colRanks(): y0 <- colRanks_R(x, ties.method = ties) y1 <- matrixStats::colRanks(x, ties.method = ties) ## Assert symmetric rank differences d <- y1 - y0 stopifnot(all(rowSums(d) == 0, na.rm = TRUE)) ## Assert within [min, max] y2min <- matrixStats::colRanks(x, ties.method = "min") y2max <- matrixStats::colRanks(x, ties.method = "max") stopifnot(all(y1 >= y2min, na.rm = TRUE) && all(y1 <= y2max, na.rm = TRUE)) y1list <- replicate(nsamples, matrixStats::colRanks(x, ties.method = ties), simplify = FALSE) y1mean <- Reduce(`+`, y1list) / nsamples ## Assert near average y2avg <- matrixStats::colRanks(x, ties.method = "average") stopifnot(all(abs(y1mean - y2avg) < tolerance, na.rm = TRUE)) } } # for (kk ...) ## Exception handling x <- matrix(1:12, nrow = 3L, ncol = 4L) y <- try(rowRanks(x, ties.method = "unknown"), silent = TRUE) stopifnot(inherits(y, "try-error")) y <- try(colRanks(x, ties.method = "unknown"), silent = TRUE) stopifnot(inherits(y, "try-error")) dimnames <- list(letters[1:3], LETTERS[1:4]) for (mode in c("integer", "double")){ storage.mode(x) <- mode # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { for (ties in c("max", "min", "average", "first", "last", "dense", "random")) { cat(sprintf("ties.method = %s\n", ties)) # rowRanks(): y1 <- matrixStats::rowRanks(x, ties.method = ties, useNames = useNames) if (ties != "last" || getRversion() >= "3.3.0") { y2 <- rowRanks_R(x, ties.method = ties, useNames = useNames) stopifnot(identical(y1, y2)) } y3 <- matrixStats::colRanks(t(x), ties.method = ties, useNames = useNames) stopifnot(identical(y1, y3)) # colRanks(): y1 <- matrixStats::colRanks(x, ties.method = ties, useNames = useNames) if (ties != "last" || getRversion() >= "3.3.0") { y2 <- colRanks_R(x, ties.method = ties, useNames = useNames) stopifnot(identical(y1, y2)) } y3 <- matrixStats::rowRanks(t(x), ties.method = ties, useNames = useNames) stopifnot(identical(y1, y3)) # Check preserveShape y1 <- matrixStats::colRanks(x, ties.method = ties, preserveShape = TRUE, useNames = useNames) if (ties != "last" || getRversion() >= "3.3.0") { y2 <- colRanks_R(x, ties.method = ties, preserveShape = TRUE, useNames = useNames) stopifnot(identical(y1, y2)) } } } } } matrixStats/tests/rowVarDiffs.R0000644000176200001440000000621214111772117016321 0ustar liggesuserslibrary("matrixStats") fcns <- list( rowVarDiffs = list(rowVarDiffs, colVarDiffs), rowSdDiffs = list(rowSdDiffs, colSdDiffs), rowMadDiffs = list(rowMadDiffs, colMadDiffs), rowIQRDiffs = list(rowIQRDiffs, colIQRDiffs) ) for (fcn in names(fcns)) { cat(sprintf("%s()...\n", fcn)) row_fcn <- fcns[[fcn]][[1L]] col_fcn <- fcns[[fcn]][[2L]] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50 + 0.1, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r1 <- row_fcn(x, na.rm = na.rm, useNames = useNames) r2 <- col_fcn(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) } } } } # for (add_na ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r1 <- row_fcn(x, na.rm = na.rm, useNames = useNames) r2 <- col_fcn(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 1L, ncol = 1L) dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r1 <- row_fcn(x, na.rm = na.rm, useNames = useNames) r2 <- col_fcn(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) } } } cat(sprintf("%s()...DONE\n", fcn)) } # for (fcn ...) matrixStats/tests/product_subset.R0000644000176200001440000000107214063411362017127 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) storage.mode(x) <- "integer" for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = product, fsure = prod, na.rm = TRUE) validateIndicesTestVector(x, idxs, ftest = product, fsure = prod, na.rm = FALSE) } matrixStats/tests/weightedMedian.R0000644000176200001440000000625014063411362017003 0ustar liggesuserslibrary("matrixStats") x <- 1:5 y <- weightedMedian(x) y <- weightedMedian(x, w = c(NA, Inf, NA, Inf, NA), na.rm = TRUE) print(y) y <- weightedMedian(x, w = c(NA, Inf, NA, Inf, NA), na.rm = FALSE) print(y) stopifnot(is.na(y)) x <- 1:10 n <- length(x) y1 <- median(x) # 5.5 y2 <- weightedMedian(x) # 5.5 stopifnot(all.equal(y1, y2)) w <- rep(1, times = n) y1 <- weightedMedian(x, w) # 5.5 (default) y2a <- weightedMedian(x, ties = "weighted") # 5.5 (default) y2b <- weightedMedian(x, ties = "min") # 5 y2c <- weightedMedian(x, ties = "max") # 6 stopifnot(all.equal(y2a, y1)) y3 <- weightedMedian(x, w) # 5.5 (default) # Pull the median towards zero w[1] <- 5 y1 <- weightedMedian(x, w) # 3.5 y <- c(rep(0, times = w[1]), x[-1]) # Only possible for integer weights y2 <- median(y) # 3.5 stopifnot(all.equal(y1, y2)) # Put even more weight on the zero w[1] <- 8.5 y <- weightedMedian(x, w) # 2 # All weight on the first value w[1] <- Inf y <- weightedMedian(x, w) # 1 # All weight on the last value w[1] <- 1 w[n] <- Inf y <- weightedMedian(x, w) # 10 # All weights set to zero w <- rep(0, times = n) y <- weightedMedian(x, w) # NA x <- 1:4 w <- rep(1, times = 4) for (mode in c("integer", "double")) { storage.mode(x) <- mode for (ties in c("weighted", "mean", "min", "max")) { cat(sprintf("ties = %s\n", ties)) y <- weightedMedian(x, w, ties = ties) } } set.seed(0x42) y <- weightedMedian(x = double(0L)) print(y) stopifnot(length(y) == 1L) stopifnot(is.na(y)) y <- weightedMedian(x = x[1]) print(y) stopifnot(length(y) == 1L) stopifnot(all.equal(y, x[1])) n <- 1e3 x <- runif(n) w <- runif(n, min = 0, max = 1) for (mode in c("integer", "double")) { storage.mode(x) <- mode for (ties in c("weighted", "mean", "min", "max")) { y <- weightedMedian(x, w, ties = ties) cat(sprintf("mode = %s, ties = %s, result = %g\n", mode, ties, y)) } } # A large vector n <- 1e5 x <- runif(n) w <- runif(n, min = 0, max = 1) y <- weightedMedian(x, w) y <- weightedMedian(x, w, ties = "min") # Single Number xs <- c(1, NA_integer_) ws <- c(1, NA_integer_) for (x in xs) { for (w in ws) { y <- weightedMedian(x = x, w = w) if (is.na(w)) z <- NA_real_ else z <- x[1] stopifnot(all.equal(y, z)) } } ## Logical x1 <- c(TRUE, FALSE, TRUE) w0 <- c(0, 0, 0) stopifnot(!is.finite(weightedMedian(x1, w0)), !is.infinite(weightedMedian(x1, w0))) w1 <- c(1, 1, 1) stopifnot(weightedMedian(x1, w1) == 1) w2 <- c(1, 2, 3) stopifnot(weightedMedian(x1, w2) == 1) ### NA stopifnot(is.na(weightedMedian(c(TRUE, FALSE, NA), c(1, 2, 3))), all.equal(weightedMedian(c(TRUE, FALSE, NA), c(1, 2, 3), na.rm = TRUE), weightedMedian(c(TRUE, FALSE), c(1, 2)))) ### Identical to as.integer() x <- rcauchy(100) w <- abs(rcauchy(100)) stopifnot(all.equal(weightedMedian(x > 0, w), weightedMedian(as.integer(x > 0), w))) matrixStats/tests/rowWeightedVars.R0000644000176200001440000002241414074054377017224 0ustar liggesuserslibrary("matrixStats") set.seed(1) x <- matrix(rnorm(20), nrow = 5L, ncol = 4L) print(x) # To check names attribute dimnames <- list(letters[1:5], LETTERS[1:4]) # Weighted row variances (uniform weights - all w = 1) # Non-weighted row variances x_est0 <- rowVars(x) w <- rep(1, times = ncol(x)) x_est1 <- rowWeightedVars(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = w, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowVars(x, useNames = TRUE) x_est1 <- rowWeightedVars(x, w = w, useNames = NA) x_est2 <- colWeightedVars(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est1 <- rowWeightedVars(x, w = w, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row variances (uniform weights - all w = 3) x3 <- cbind(x, x, x) x_est0 <- rowVars(x3) w <- rep(3, times = ncol(x)) x_est1 <- rowWeightedVars(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = w, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x3 <- cbind(x, x, x) x_est0 <- rowVars(x3, useNames = TRUE) x_est1 <- rowWeightedVars(x, w = w, useNames = NA) x_est2 <- colWeightedVars(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est1 <- rowWeightedVars(x, w = w, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row variances (excluding some columns) w <- c(1, 1, 0, 1) x_est0 <- rowVars(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedVars(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = w, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowVars(x[, (w == 1), drop = FALSE], useNames = TRUE) x_est1 <- rowWeightedVars(x, w = w, useNames = NA) x_est2 <- colWeightedVars(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est1 <- rowWeightedVars(x, w = w, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row variances (excluding some columns) w <- c(0, 1, 0, 0) x_est0 <- rowVars(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedVars(x, w = w) #stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = w) stopifnot(all.equal(x_est2, x_est1)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = w, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowVars(x[, (w == 1), drop = FALSE], useNames = TRUE) x_est1 <- rowWeightedVars(x, w = w, useNames = NA) x_est2 <- colWeightedVars(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est1 <- rowWeightedVars(x, w = w, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row variances (all zero weights) w <- c(0, 0, 0, 0) x_est0 <- rowVars(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedVars(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = w, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowVars(x[, (w == 1), drop = FALSE], useNames = TRUE) x_est1 <- rowWeightedVars(x, w = w, useNames = NA) x_est2 <- colWeightedVars(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est1 <- rowWeightedVars(x, w = w, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted variances by rows and columns w <- 1:4 # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est1 <- rowWeightedVars(x, w = w, useNames = useNames) print(x_est1) x_est2 <- colWeightedVars(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est2, x_est1)) } } x[sample(length(x), size = 0.3 * length(x))] <- NA print(x) # Non-weighted row variances with missing values x_est0 <- rowVars(x, na.rm = TRUE) x_est1 <- rowWeightedVars(x, w = rep(1, times = ncol(x)), na.rm = TRUE) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedVars(t(x), w = rep(1, times = ncol(x)), na.rm = TRUE) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedVars(x, w = rep(1, times = ncol(x)), na.rm = TRUE, useNames = FALSE) x_est2 <- colWeightedVars(t(x), w = rep(1, times = ncol(x)), na.rm = TRUE, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowVars(x, na.rm = TRUE, useNames = TRUE) x_est1 <- rowWeightedVars(x, w = rep(1, times = ncol(x)), na.rm = TRUE, useNames = TRUE) x_est2 <- colWeightedVars(t(x), w = rep(1, times = ncol(x)), na.rm = TRUE, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row variances with missing values # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est1 <- rowWeightedVars(x, w = w, na.rm = TRUE, useNames = useNames) print(x_est1) x_est2 <- colWeightedVars(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est2, x_est1)) } } # Weighted variances by rows and columns w <- 1:4 # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est1 <- rowWeightedVars(x, w = w, na.rm = TRUE, useNames = useNames) print(x_est1) x_est2 <- colWeightedVars(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est2, x_est1)) } } # Weighted row standard deviation (excluding some columns) w <- c(1, 1, 0, 1) ## FIXME: rowVars()/rowSds() needs na.rm = FALSE (wrong default) x_est0 <- rowSds(x[, (w == 1), drop = FALSE], na.rm = FALSE) x_est1 <- rowWeightedSds(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedSds(t(x), w = w, na.rm = FALSE) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedSds(x, w = w, na.rm = FALSE, useNames = FALSE) x_est2 <- colWeightedSds(t(x), w = w, na.rm = FALSE, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowSds(x[, (w == 1), drop = FALSE], na.rm = FALSE, useNames = TRUE) x_est1 <- rowWeightedSds(x, w = w, na.rm = FALSE, useNames = NA) x_est2 <- colWeightedSds(t(x), w = w, na.rm = FALSE, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est1 <- rowWeightedSds(x, w = w, na.rm = FALSE, useNames = TRUE) x_est2 <- colWeightedSds(t(x), w = w, na.rm = FALSE, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row MADs (excluding some columns) w <- c(1, 1, 0, 1) x_est0 <- rowMads(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMads(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMads(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedMads(x, w = w, useNames = FALSE) x_est2 <- colWeightedMads(t(x), w = w, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowMads(x[, (w == 1), drop = FALSE], useNames = TRUE) x_est1 <- rowWeightedMads(x, w = w, useNames = NA) x_est2 <- colWeightedMads(t(x), w = w, useNames = NA) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est1 <- rowWeightedMads(x, w = w, useNames = TRUE) x_est2 <- colWeightedMads(t(x), w = w, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL matrixStats/tests/rowWeightedVars_subset.R0000644000176200001440000000324514074030335020576 0ustar liggesuserslibrary("matrixStats") fcns <- list( weightedVar = c(rowWeightedVars, colWeightedVars), weightedSd = c(rowWeightedSds, colWeightedSds), weightedMad = c(rowWeightedMads, colWeightedMads) ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) for (fcn in names(fcns)) { cat(sprintf("subsetted tests on matrix %s()...\n", fcn)) row_fcn <- fcns[[fcn]][[1]] col_fcn <- fcns[[fcn]][[2]] for (mode in c("numeric", "integer")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- mode if (mode == "numeric") w[1] <- Inf # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix_w(x, w, rows, cols, ftest = row_fcn, fsure = row_fcn, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix_w(x, w, rows, cols, fcoltest = col_fcn, fsure = row_fcn, na.rm = na.rm, useNames = useNames) } } } } } } cat(sprintf("%s()...DONE\n", fcn)) } matrixStats/tests/binCounts_subset.R0000644000176200001440000000174014063411362017415 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - binCounts_hist <- function(x, bx, right = FALSE, ...) { n0 <- graphics::hist(x, breaks = bx, right = right, include.lowest = TRUE, plot = FALSE)$counts } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) storage.mode(x) <- "integer" bx <- c(-6, 0, 3, 4, 10) for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = binCounts, fsure = binCounts_hist, bx = bx, right = FALSE) validateIndicesTestVector(x, idxs, ftest = binCounts, fsure = binCounts_hist, bx = bx, right = TRUE) } matrixStats/tests/anyMissing.R0000644000176200001440000000477314111765646016231 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- data.frame( logical = c(TRUE, FALSE, TRUE), integer = 1:3, double = seq(from = 1.0, to = 3.0, by = 1.0), complex = seq(from = 1.0, to = 3.0, by = 1.0) + 1.0i, character = letters[1:3], stringsAsFactors = FALSE ) modes <- names(data) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("NULL...\n") stopifnot(identical(anyMissing(NULL), FALSE)) cat("NULL...done\n") cat("raw...\n") stopifnot(identical(anyMissing(as.raw(0:2)), FALSE)) cat("raw...done\n") cat("list(NULL)...\n") stopifnot(identical(anyMissing(list(NULL)), FALSE)) cat("list(NULL)...done\n") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Scalars, vectors, and matrices of various modes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in modes) { cat(sprintf("Mode: %s...\n", mode)) values <- data[[mode]] # Scalars cat(" scalar\n") x <- values[1L] print(x) stopifnot(identical(anyMissing(x), FALSE)) is.na(x) <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) # Vectors cat(" vector\n") x <- values print(x) stopifnot(identical(anyMissing(x), FALSE)) is.na(x)[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) # Matrices cat(" matrix\n") x <- matrix(c(values, values), ncol = 2L) print(x) stopifnot(identical(anyMissing(x), FALSE)) is.na(x)[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) cat(sprintf("Mode: %s...done\n", mode)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data frames # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("data.frame...\n") x <- data stopifnot(identical(anyMissing(x), FALSE)) for (mode in modes) { x <- data is.na(x[[mode]])[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) } # for (mode ...) cat("data.frame...done\n") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Lists # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("list...\n") x <- as.list(data) stopifnot(identical(anyMissing(x), FALSE)) for (mode in modes) { x <- as.list(data) is.na(x[[mode]])[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) } # for (mode ...) cat("list...done\n") matrixStats/tests/validateIndices.R0000644000176200001440000000235614111754664017171 0ustar liggesuserslibrary(matrixStats) source("utils/validateIndicesFramework.R") oopts <- options(matrixStats.validateIndices = NULL) ftest <- function(x, idxs) validateIndices(idxs, length(x)) x <- 1:6 for (idxs in index_cases) { for (mode in c("integer", "numeric", "logical")) { if (!is.null(idxs)) { suppressWarnings({storage.mode(idxs) <- mode}) } validateIndicesTestVector(x, idxs, ftest = ftest, fsure = identity) } } ftest <- function(x, idxs) validateIndices(idxs, length(x)) for (idxs in index_error_cases) { validateIndicesTestVector(x, idxs, ftest = ftest, fsure = identity) } ftest <- function(x, rows, cols) { vr <- validateIndices(rows, dim(x)[1], FALSE) vc <- validateIndices(cols, dim(x)[2], FALSE) stopifnot(all((vr > 0 & vr <= dim(x)[1]) | is.na(vr))) stopifnot(all((vc > 0 & vc <= dim(x)[2]) | is.na(vc))) suppressWarnings(x <- x[vr, vc, drop = FALSE]) x } x <- matrix(1:36, nrow = 6, ncol = 6) for (rows in index_cases) { for (cols in index_cases) { validateIndicesTestMatrix(x, rows, cols, ftest = ftest, fsure = identity) } } # Testing for 64-bit builds (64 bits = 8 bytes) if (.Machine$sizeof.pointer == 8L) { validateIndices(c(1.25*2^40, 3, 1*2^38, 1, 1.4*2^39), maxIdx = 1*2^41) } options(oopts) matrixStats/tests/logSumExp_subset.R0000644000176200001440000000123314063411362017371 0ustar liggesuserslibrary("matrixStats") logSumExp_R <- function(lx, na.rm = FALSE) { log(sum(exp(lx), na.rm = na.rm)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = logSumExp, fsure = logSumExp_R, na.rm = FALSE) validateIndicesTestVector(x, idxs, ftest = logSumExp, fsure = logSumExp_R, na.rm = TRUE) } matrixStats/tests/rowWeightedMedians.R0000644000176200001440000002032714111740760017660 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowWeightedMedians_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = weightedMedian, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as rowWeightedMedians() if (!is.null(w)) { if (isFALSE(useNames)) names(res) <- NULL } else if (is.na(useNames) || !useNames) names(res) <- NULL res } colWeightedMedians_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 2L, FUN = weightedMedian, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as colWeightedMedians() if (!is.null(w)) { if (isFALSE(useNames)) names(res) <- NULL } else if (is.na(useNames) || !useNames) names(res) <- NULL res } set.seed(1) x <- matrix(rnorm(20), nrow = 5, ncol = 4) print(x) # To check names attribute dimnames <- list(letters[1:5], LETTERS[1:4]) # Non-weighted row medians x_est0 <- rowMedians(x) x_est1 <- rowWeightedMedians(x) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x)) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowMedians(x, useNames = useNames) x_est1 <- rowWeightedMedians(x, useNames = useNames) x_est2 <- colWeightedMedians(t(x), useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) } } # Weighted row medians (uniform weights) w <- rep(2.5, times = ncol(x)) x_est0 <- rowMedians(x) x_est1 <- rowWeightedMedians(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row medians (excluding some columns) w <- c(1, 1, 0, 1) x_est0 <- rowMedians(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMedians(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row medians (excluding some columns) w <- c(0, 1, 0, 0) x_est0 <- rowMedians(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMedians(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row medians (all zero weights) w <- c(0, 0, 0, 0) x_est0 <- rowMedians(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMedians(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted medians by rows and columns w <- 1:4 x_est1 <- rowWeightedMedians(x, w = w) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row medians with missing values x_est0 <- apply(x, MARGIN = 1L, FUN = weightedMedian, w = w, na.rm = TRUE) print(x_est0) x_est1 <- rowWeightedMedians(x, w = w, na.rm = TRUE) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, na.rm = TRUE, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, na.rm = TRUE, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted medians by rows and columns w <- 1:4 x_est1 <- rowWeightedMedians(x, w = w, na.rm = TRUE) x_est2 <- colWeightedMedians(t(x), w = w, na.rm = TRUE) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, na.rm = TRUE, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, na.rm = TRUE, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Inf weight x <- matrix(1:2, nrow = 1, ncol = 2) w <- c(7, Inf) x_est1 <- rowWeightedMedians(x, w = w) x_est2 <- colWeightedMedians(t(x), w = w) stopifnot(identical(2, x_est1)) stopifnot(identical(2, x_est2)) # Test with and without dimnames on x dimnames <- list("a", LETTERS[1:2]) for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMedians_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMedians(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMedians_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMedians(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } matrixStats/tests/rowWeightedMedians_subset.R0000644000176200001440000000373414105674332021254 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowWeightedMedians_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = weightedMedian, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as rowWeightedMedians() if (!is.null(w)) { if (isFALSE(useNames)) names(res) <- NULL } else if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) for (mode in c("numeric", "integer", "logical")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- if (mode == "logical") "integer" else mode if (mode == "numeric") w[1] <- Inf # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix_w(x, w, rows, cols, ftest = rowWeightedMedians, fsure = rowWeightedMedians_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix_w(x, w, rows, cols, fcoltest = colWeightedMedians, fsure = rowWeightedMedians_R, na.rm = na.rm, useNames = useNames) } } } } } } matrixStats/tests/rowMedians_subset.R0000644000176200001440000000270414074054377017575 0ustar liggesuserslibrary("matrixStats") rowMedians_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = median, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } colMedians_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 2L, FUN = median, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowMedians, fsure = rowMedians_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMedians, fsure = rowMedians_R, na.rm = na.rm, useNames = useNames) } } } } } matrixStats/tests/rowQuantiles_subset.R0000644000176200001440000000376614105674332020165 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowQuantiles_R <- function(x, probs, na.rm = FALSE, drop = TRUE, ..., useNames = NA) { q <- apply(x, MARGIN = 1L, FUN = function(x, probs, na.rm) { if (!na.rm && any(is.na(x))) { na_value <- NA_real_ storage.mode(na_value) <- storage.mode(x) rep(na_value, times = length(probs)) } else { as.vector(quantile(x, probs = probs, na.rm = na.rm, ...)) } }, probs = probs, na.rm = na.rm) if (!is.null(dim(q))) q <- t(q) else dim(q) <- c(nrow(x), length(probs)) digits <- max(2L, getOption("digits")) colnames(q) <- sprintf("%.*g%%", digits, 100 * probs) rownames(q) <- rownames(x) if (isFALSE(useNames)) rownames(q) <- NULL if (drop) q <- drop(q) q } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) dimnames <- lapply(dim(x), FUN = function(n) letters[seq_len(n)]) probs <- c(0, 0.25, 0.75, 1) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowQuantiles, fsure = rowQuantiles_R, probs = probs, na.rm = na.rm, drop = FALSE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colQuantiles, fsure = rowQuantiles_R, probs = probs, na.rm = na.rm, drop = FALSE, useNames = useNames) } } } } } matrixStats/tests/rowSds.R0000644000176200001440000001337714111772044015357 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowSds_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ sigma <- apply(x, MARGIN = 1L, FUN = sd, na.rm = na.rm) }) stopifnot(!any(is.infinite(sigma))) # Keep naming support consistency same as rowSds() if (is.null(center) || ncol(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(sigma) <- NULL } else if (isFALSE(useNames)) names(sigma) <- NULL sigma } colSds_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ sigma <- apply(x, MARGIN = 2L, FUN = sd, na.rm = na.rm) }) stopifnot(!any(is.infinite(sigma))) # Keep naming support consistency same as colSds() if (is.null(center) || nrow(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(sigma) <- NULL } if (isFALSE(useNames)) names(sigma) <- NULL sigma } rowSds_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowWeightedMeans(x, cols = cols, na.rm = na.rm, useNames = FALSE) sigma <- rowSds(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(sigma))) sigma } colSds_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colWeightedMeans(x, rows = rows, na.rm = na.rm, useNames = FALSE) sigma <- colSds(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(sigma))) sigma } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50 + 0.1, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowSds_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowSds(x, na.rm = na.rm, useNames = useNames) r1b <- rowSds_center(x, na.rm = na.rm, useNames = useNames) r2 <- colSds(t(x), na.rm = na.rm, useNames = useNames) r2b <- colSds_center(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r2b)) stopifnot( !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r1b)), !any(is.infinite(r2b)) ) } } } } # for (add_na ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowSds_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowSds(x, na.rm = na.rm, useNames = useNames) r1b <- rowSds_center(x, na.rm = na.rm, useNames = useNames) r2 <- colSds(t(x), na.rm = na.rm, useNames = useNames) r2b <- colSds_center(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r2b)) stopifnot( !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r1b)), !any(is.infinite(r2b)) ) } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 1L, ncol = 1L) dimnames <- list("a", "A") for (na.rm in c(FALSE, TRUE)) { cat("na.rm = ", na.rm, "\n", sep = "") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowSds_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowSds(x, na.rm = na.rm, useNames = useNames) r1b <- rowSds_center(x, na.rm = na.rm, useNames = useNames) r2 <- colSds(t(x), na.rm = na.rm, useNames = useNames) r2b <- colSds_center(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r2b)) stopifnot( !any(is.infinite(r1)), !any(is.infinite(r2)), !any(is.infinite(r1b)), !any(is.infinite(r2b)) ) } } } } matrixStats/tests/mean2.R0000644000176200001440000001241214111766664015100 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) mean2_R <- function(x, na.rm = FALSE, idxs = NULL) { if (is.null(idxs)) { mean(x, na.rm = na.rm) } else { mean(x[idxs], na.rm = na.rm) } } # mean2_R() cat("Consistency checks:\n") for (kk in 1:20) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape n <- sample(100L, size = 1L) x <- rnorm(n, sd = 100) # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1L) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Integer or double? if ((kk %% 4) %in% c(2, 0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } na.rm <- sample(c(TRUE, FALSE), size = 1L) # Sum over all y0 <- mean2_R(x, na.rm = na.rm) y1 <- mean2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) # Sum over subset nidxs <- sample(n, size = 1L) idxs <- sample(n, size = nidxs) y0 <- mean2_R(x, na.rm = na.rm, idxs = idxs) y1 <- mean2(x, na.rm = na.rm, idxs = idxs) stopifnot(all.equal(y1, y0)) if (storage.mode(x) == "integer") { storage.mode(x) <- "logical" y0 <- mean2_R(x, na.rm = na.rm) y1 <- mean2(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y0 <- mean2_R(x, na.rm = na.rm, idxs = idxs) y1 <- mean2(x, na.rm = na.rm, idxs = idxs) stopifnot(all.equal(y1, y0)) } } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (na.rm in c(FALSE, TRUE)) { # Averaging over zero elements (integers) x <- integer(0) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) x <- 1:5 idxs <- integer(0) s1 <- mean(x[idxs], na.rm = na.rm) s2 <- mean2(x, idxs = idxs, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over NA_integer_:s x <- rep(NA_integer_, times = 5L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) x <- rep(NA_integer_, times = 5L) idxs <- 1:3 s1 <- mean(x[idxs], na.rm = na.rm) s2 <- mean2(x, idxs = idxs, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over zero elements (doubles) x <- double(0) s1 <- mean(x) s2 <- mean2(x) stopifnot(identical(s1, s2)) x <- as.double(1:10) idxs <- integer(0) s1 <- mean(x[idxs]) s2 <- mean2(x, idxs = idxs) stopifnot(identical(s1, s2)) # Averaging over NA_real_:s x <- rep(NA_real_, times = 5L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) x <- rep(NA_real_, times = 5L) idxs <- 1:3 s1 <- mean(x[idxs], na.rm = na.rm) s2 <- mean2(x, idxs = idxs, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over -Inf:s x <- rep(-Inf, times = 3L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over +Inf:s x <- rep(+Inf, times = 3L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over mix of -Inf:s and +Inf:s x <- rep(c(-Inf, +Inf), times = 3L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over mix of -Inf:s and +Inf:s and numerics x <- rep(c(-Inf, +Inf, 3.14), times = 2L) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) stopifnot(identical(s1, s2)) # Averaging over mix of NaN, NA, +Inf, and numerics x <- c(NaN, NA, +Inf, 3.14) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) if (na.rm) { stopifnot(identical(s2, s1)) } else { stopifnot(is.na(s1), is.na(s2)) ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that s1 is identical to s0. } # Averaging over mix of NaN, NA_real_, +Inf, and numerics x <- c(NA_real_, NaN, +Inf, 3.14) s1 <- mean(x, na.rm = na.rm) s2 <- mean2(x, na.rm = na.rm) if (na.rm) { stopifnot(identical(s2, s1)) } else { stopifnot(is.na(s1), is.na(s2)) ## NOTE, due to compiler optimization, it is not guaranteed that NA is ## returned here (as one would expect). NaN might very well be returned, ## when both NA and NaN are involved. This is an accepted feature in R, ## which is documented in help("is.nan"). See also ## https://stat.ethz.ch/pipermail/r-devel/2017-April/074009.html. ## Thus, we cannot guarantee that s1 is identical to s0. } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'idxs' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:5 idxs_list <- list( integer = 1:3, double = as.double(1:3), logical = (x <= 3) ) for (idxs in idxs_list) { cat("idxs:\n") str(idxs) s1 <- mean(x[idxs], na.rm = TRUE) s2 <- mean2(x, idxs = idxs, na.rm = TRUE) stopifnot(identical(s1, s2)) } matrixStats/tests/rowProds_subset.R0000644000176200001440000000265014074054377017304 0ustar liggesuserslibrary("matrixStats") rowProds_R <- function(x, FUN = prod, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = FUN, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowProds, fsure = rowProds_R, method = "expSumLog", FUN = product, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colProds, fsure = rowProds_R, method = "expSumLog", FUN = product, na.rm = na.rm, useNames = useNames) } } } } } matrixStats/tests/count_subset.R0000644000176200001440000000166614063411362016610 0ustar liggesuserslibrary("matrixStats") count_R <- function(x, value = TRUE, na.rm = FALSE, ...) { if (is.na(value)) { counts <- sum(is.na(x)) } else { counts <- sum(x == value, na.rm = na.rm) } as.integer(counts) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -3, max = 3) storage.mode(x) <- "integer" for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = count, fsure = count_R, value = 0, na.rm = TRUE) validateIndicesTestVector(x, idxs, ftest = count, fsure = count_R, value = 0, na.rm = FALSE) validateIndicesTestVector(x, idxs, ftest = count, fsure = count_R, value = NA_integer_) } matrixStats/tests/benchmark.R0000644000176200001440000000075614117375107016032 0ustar liggesusers## 1. Don't test with valgrind ## 2. Test only R (>= 3.0.2) because of that's what knitr requires if (getRversion() >= "3.0.2" && Sys.getenv("_R_CHECK_USE_VALGRIND_") == "") { ## 3. Make sure all suggested packages are installed / can be loaded pkgs <- c("base64enc", "ggplot2", "knitr", "markdown", "microbenchmark", "R.devices", "R.rsp") if (all(unlist(lapply(pkgs, FUN = requireNamespace)))) { html <- matrixStats:::benchmark("binCounts") print(html) } rm(list = "pkgs") } matrixStats/tests/weightedVar.R0000644000176200001440000000475514063411362016346 0ustar liggesuserslibrary("matrixStats") weightedVar_R <- function(x, w) { mu <- weighted.mean(x, w = w) sum(w * (x - mu) ^ 2) / (sum(w) - 1) } n <- 10 x <- as.double(1:n) message("*** weightedVar() ...") message("- Zero elements") m0 <- var(integer(0)) m1 <- weightedVar(integer(0), w = integer(0)) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- One elements") m0 <- var(1) m1 <- weightedVar(1) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Uniform weights (all w = 1)") m0 <- var(x) w <- rep(1, times = n) m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Uniform weights (all w = 3)") m0 <- var(rep(x, each = 3)) w <- rep(3, times = n) m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Uniform weights on the first five elements") idxs <- 1:5 m0 <- var(x[1:5]) w <- rep(0, times = n) w[idxs] <- 1 m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Uniform weights on every second elements") idxs <- seq(from = 1, to = n, by = 2) m0 <- var(x[idxs]) w <- rep(0, times = n) w[idxs] <- 1 m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- All weights are zero") idxs <- integer(0L) m0 <- var(x[idxs]) w <- rep(0, times = n) w[idxs] <- 1 m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Infinite weight on first element") idxs <- 1L m0 <- var(x[idxs]) w <- rep(0, times = n) w[idxs] <- Inf m1 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("- Missing-value weight on first element") idxs <- 1L w <- rep(1, times = n) w[idxs] <- NA_real_ m1 <- weightedVar(x, w = w) str(list(m1 = m1)) stopifnot(identical(m1, NA_real_)) message("- Frequency weights") ## From https://en.wikipedia.org/wiki/Weighted_arithmetic_mean y <- c(2, 2, 4, 5, 5, 5) x <- unique(y) w <- table(y) stopifnot(names(w) == x) m0 <- weightedVar(x, w = w) m1 <- var(y) stopifnot(all.equal(m1, m0)) m2 <- weightedVar(x, w = w) str(list(m0 = m0, m1 = m1, m2 = m2)) stopifnot(all.equal(m2, m0)) ## From https://github.com/HenrikBengtsson/matrixStats/issues/72 large <- c(21, 8, 26, 1, 15, 33, 12, 25, 0, 84) years <- c(41706, 9301, 33678, 3082, 27040, 44188, 10049, 30591, 2275, 109831) m0 <- weightedVar(large, w = years) m1 <- weightedVar(large, w = years) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) message("*** weightedVar() ... DONE") matrixStats/tests/rowCounts_subset.R0000644000176200001440000000636714074054377017501 0ustar liggesuserslibrary("matrixStats") rowCounts_R <- function(x, value = TRUE, na.rm = FALSE, ..., useNames = NA) { if (is.na(value)) { counts <- apply(x, MARGIN = 1L, FUN = function(x) sum(is.na(x)) ) } else { counts <- apply(x, MARGIN = 1L, FUN = function(x) sum(x == value, na.rm = na.rm) ) } # Preserve names attribute names <- names(counts) counts <- as.integer(counts) if (isTRUE(useNames) && !is.null(names)) names(counts) <- names counts } # rowCounts_R() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) x[2:3, 3:4] <- NA_real_ storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCounts, fsure = rowCounts_R, value = 0, na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colCounts, fsure = rowCounts_R, value = 0, na.rm = TRUE, useNames = useNames) for (value in c(0, NA_integer_)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCounts, fsure = rowCounts_R, value = value, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colCounts, fsure = rowCounts_R, value = value, useNames = useNames) } } } } } x <- matrix(rep(letters, length.out = 6 * 6), nrow = 6, ncol = 6) x[2:3, 3:4] <- NA_character_ # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCounts, fsure = rowCounts_R, value = "g", na.rm = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colCounts, fsure = rowCounts_R, value = "g", na.rm = TRUE, useNames = useNames) for (value in c("g", NA_character_)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCounts, fsure = rowCounts_R, value = value, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colCounts, fsure = rowCounts_R, value = value, useNames = useNames) } } } } } matrixStats/tests/utils/0000755000176200001440000000000014111754602015100 5ustar liggesusersmatrixStats/tests/utils/validateIndicesFramework.R0000644000176200001440000001302314111754602022170 0ustar liggesuserslibrary("matrixStats") validateIndicesTestVector <- function(x, idxs, ftest, fsure, debug = FALSE, ...) { if (debug) cat(sprintf("idxs=%s, type=%s\n", toString(idxs), toString(typeof(idxs)))) suppressWarnings({ actual <- tryCatch(ftest(x, idxs = idxs, ...), error = function(c) "error") expect <- tryCatch({ if (!is.null(idxs)) x <- x[idxs] fsure(x, ...) }, error = function(c) "error") }) if (debug) cat(sprintf("actual=%s\nexpect=%s\n", toString(actual), toString(expect))) stopifnot(all.equal(actual, expect)) } validateIndicesTestVector_w <- function(x, w, idxs, ftest, fsure, debug = FALSE, ...) { if (debug) cat(sprintf("idxs=%s, type=%s\n", toString(idxs), toString(typeof(idxs)))) suppressWarnings({ actual <- tryCatch(ftest(x, w, idxs = idxs, ...), error = function(c) "error") expect <- tryCatch({ if (!is.null(idxs)) { x <- x[idxs] w <- w[idxs] } fsure(x, w, ...) }, error = function(c) "error") }) if (debug) cat(sprintf("actual=%s\nexpect=%s\n", toString(actual), toString(expect))) stopifnot(all.equal(actual, expect)) } validateIndicesTestMatrix <- function(x, rows, cols, ftest, fcoltest, fsure, debug = FALSE, ...) { if (debug) { cat(sprintf("rows=%s; type=%s\n", toString(rows), toString(typeof(rows)))) cat(sprintf("cols=%s; type=%s\n", toString(cols), toString(typeof(cols)))) } suppressWarnings({ if (missing(fcoltest)) { actual <- tryCatch(ftest(x, rows = rows, cols = cols, ...), error = function(c) "error") } else { actual <- tryCatch(fcoltest(t(x), rows = cols, cols = rows, ...), error = function(c) "error") } expect <- tryCatch({ if (!is.null(rows) && !is.null(cols)) { x <- x[rows, cols, drop = FALSE] } else if (!is.null(rows)) { x <- x[rows, , drop = FALSE] } else if (!is.null(cols)) { x <- x[, cols, drop = FALSE] } fsure(x, ...) }, error = function(c) "error") }) if (debug) cat(sprintf("actual=%s\nexpect=%s\n", toString(actual), toString(expect))) stopifnot(all.equal(actual, expect)) } validateIndicesTestMatrix_w <- function(x, w, rows, cols, ftest, fcoltest, fsure, debug = FALSE, ...) { if (debug) { cat(sprintf("rows=%s; type=%s\n", toString(rows), toString(typeof(rows)))) cat(sprintf("cols=%s; type=%s\n", toString(cols), toString(typeof(cols)))) } suppressWarnings({ if (missing(fcoltest)) { actual <- tryCatch(ftest(x, w, rows = rows, cols = cols, ...), error = function(c) "error") } else { actual <- tryCatch(fcoltest(t(x), w, rows = cols, cols = rows, ...), error = function(c) "error") } expect <- tryCatch({ if (!is.null(rows) && !is.null(cols)) { x <- x[rows, cols, drop = FALSE] w <- w[cols] } else if (!is.null(rows)) { x <- x[rows, , drop = FALSE] } else if (!is.null(cols)) { x <- x[, cols, drop = FALSE] w <- w[cols] } fsure(x, w, ...) }, error = function(c) "error") }) if (debug) cat(sprintf("actual=%s\nexpect=%s\n", toString(actual), toString(expect))) stopifnot(all.equal(actual, expect)) } index_cases <- list() # negative indices with duplicates index_cases[[length(index_cases) + 1]] <- c(-4, 0, 0, -3, -1, -3, -1) # positive indices index_cases[[length(index_cases) + 1]] <- c(3, 5, 1) # positive indices with duplicates index_cases[[length(index_cases) + 1]] <- c(3, 0, 0, 5, 1, 5, 5) # positive indices out of ranges index_cases[[length(index_cases) + 1]] <- 4:9 # negative out of ranges: just ignore index_cases[[length(index_cases) + 1]] <- c(-5, 0, -3, -1, -9) # negative indices exclude all index_cases[[length(index_cases) + 1]] <- -1:-6 # idxs is single number index_cases[[length(index_cases) + 1]] <- 4 index_cases[[length(index_cases) + 1]] <- -4 index_cases[[length(index_cases) + 1]] <- 0 # idxs is empty index_cases[[length(index_cases) + 1]] <- integer() # NA in idxs index_cases[[length(index_cases) + 1]] <- c(NA_real_, 0, 2) # Inf in idxs index_cases[[length(index_cases) + 1]] <- c(-Inf, -1) index_cases[[length(index_cases) + 1]] <- c(NA_real_, 0, 2, Inf) # single logical index_cases[[length(index_cases) + 1]] <- NA index_cases[[length(index_cases) + 1]] <- TRUE index_cases[[length(index_cases) + 1]] <- FALSE # full logical idxs index_cases[[length(index_cases) + 1]] <- c(FALSE, TRUE, FALSE, TRUE, TRUE, FALSE) # too many logical idxs index_cases[[length(index_cases) + 1]] <- c(FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE) # insufficient idxs index_cases[[length(index_cases) + 1]] <- c(FALSE, TRUE) index_cases[[length(index_cases) + 1]] <- c(FALSE, TRUE, NA) index_cases[[length(index_cases) + 1]] <- c(FALSE, TRUE, NA, FALSE) # NULL index_cases[length(index_cases) + 1] <- list(NULL) index_error_cases <- list() # mixed positive and negative indices index_error_cases[[length(index_cases) + 1]] <- 1:-1 # mixed positive, negative and zero indices index_error_cases[[length(index_cases) + 1]] <- c(-4, 0, 0, 1) # NA in idxs index_error_cases[[length(index_cases) + 1]] <- c(NA_real_, -2) matrixStats/tests/weightedMean.R0000644000176200001440000000447014063411362016470 0ustar liggesuserslibrary("matrixStats") for (mode in c("logical", "integer", "double")) { cat("mode: ", mode, "", sep = "") n <- 2L x <- runif(n, min = -5, max = 5) if (mode == "logical") { x <- x > 0 } storage.mode(x) <- mode str(x) cat("All weights are 1\n") w <- rep(1, times = n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) cat("First weight is 5\n") # Pull the mean towards zero w[1] <- 5 str(w) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) cat("All weights are 0\n") # All weights set to zero w <- rep(0, times = n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) cat("First weight is 8.5\n") # Put even more weight on the zero w[1] <- 8.5 m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) cat("First weight is Inf\n") # All weight on the first value w[1] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) cat("Last weight is Inf\n") # All weight on the last value w[1] <- 1 w[n] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0 = m0, m1 = m1)) stopifnot(identical(m1, m0)) } # for (mode ...) message("*** Testing for missing values") # NA tests xs <- list(A = c(1, 2, 3), B = c(NA, 2, 3), C = c(NA, 2, 3)) ws <- list(A = c(1, 1, 1), B = c(NA, 1, 1), C = c(1, NA, 1)) ## NOTE: The (B, B) case with na.rm = TRUE is interesting because ## even if NAs in 'w' are not dropped by na.rm = TRUE, this one ## is because 'x' is dropped and therefore that first element ## is skipped in the computation. It basically does ## keep <- !is.na(x); x <- x[keep]; w <- w[keep] ## without looking at 'w'. for (x in xs) { for (mode in c("logical", "integer", "double")) { storage.mode(x) <- mode for (w in ws) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("mode: %s, na.rm = %s\n", mode, na.rm)) str(list(x = x, w = w)) m0 <- weighted.mean(x, w, na.rm = na.rm) m1 <- weightedMean(x, w, na.rm = na.rm) str(list(m0 = m0, m1 = m1)) stopifnot(all.equal(m1, m0)) } } } } matrixStats/tests/sum2_subset.R0000644000176200001440000000106214063411362016334 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) storage.mode(x) <- "integer" for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = sum2, fsure = sum, na.rm = FALSE) validateIndicesTestVector(x, idxs, ftest = sum2, fsure = sum, na.rm = TRUE) } matrixStats/tests/rowAvgsPerColSet_subset.R0000644000176200001440000000266114063411362020665 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) #W <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) for (rows in index_cases) { for (cols in index_cases) { if (is.null(rows)) { rows <- seq_len(nrow(x)) rows_finite <- rows } else { rows_finite <- rows[is.finite(rows)] } if (is.null(cols)) { cols <- seq_len(ncol(x)) cols_finite <- cols } else { cols_finite <- cols[is.finite(cols)] } suppressWarnings({ actual <- tryCatch({ rowAvgsPerColSet(x, rows = rows, S = matrix(cols, ncol = 1), FUN = rowMeans) }, error = function(c) "error") expect <- tryCatch({ rowMeans(x[rows, cols_finite, drop = FALSE], na.rm = TRUE) }, error = function(c) "error") }) stopifnot(all.equal(as.vector(actual), expect)) suppressWarnings({ actual <- tryCatch({ colAvgsPerRowSet(x, cols = cols, S = matrix(rows, ncol = 1), FUN = colMeans) }, error = function(c) "error") expect <- tryCatch({ colMeans(x[rows_finite, cols, drop = FALSE], na.rm = TRUE) }, error = function(c) "error") }) stopifnot(all.equal(as.vector(actual), expect)) } } matrixStats/tests/rowMads.R0000644000176200001440000002341114111771475015507 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") rowMads_R <- function(x, na.rm = FALSE, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = mad, na.rm = na.rm) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } colMads_R <- function(x, na.rm = FALSE, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 2L, FUN = mad, na.rm = na.rm) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } rowMads_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowMedians(x, cols = cols, na.rm = na.rm, useNames = FALSE) rowMads(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) } colMads_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colMedians(x, rows = rows, na.rm = na.rm, useNames = FALSE) colMads(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 3x3 matrix (no ties) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(c(1, 2, 3, 2, 3, 4, 3, 4, 5) + 0.1, nrow = 3, ncol = 3) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check name attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("rowMads():\n") center <- rowMedians(x, na.rm = TRUE, useNames = useNames) r0 <- rowMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- rowMads(x, na.rm = TRUE, useNames = useNames) r1b <- rowMads_center(x, na.rm = TRUE, useNames = useNames) r1c <- rowMads(x, center = center, na.rm = TRUE, useNames = useNames) r2 <- colMads(tx, na.rm = TRUE, useNames = useNames) r2b <- colMads_center(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r1c, r1)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r2b, r2)) cat("colMads():\n") center <- colMedians(x, na.rm = TRUE, useNames = useNames) r0 <- colMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- colMads(x, na.rm = TRUE, useNames = useNames) r1b <- colMads_center(x, na.rm = TRUE, useNames = useNames) r1c <- colMads(x, center = center, na.rm = TRUE, useNames = useNames) r2 <- rowMads(tx, na.rm = TRUE, useNames = useNames) r2b <- rowMads_center(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r1c, r1)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r2b, r2)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Ties: a 4x4 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(c(1:4, 2:5, 3:6, 4:7) + 0.1, nrow = 4, ncol = 4) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) tx <- t(x) # To check name attribute dimnames <- list(letters[1:4], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("rowMads():\n") r0 <- rowMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- rowMads(x, na.rm = TRUE, useNames = useNames) r2 <- colMads(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) cat("colMads():\n") r0 <- colMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- colMads(x, na.rm = TRUE, useNames = useNames) r2 <- rowMads(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } } } tx <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # No ties: a 3x3 matrix with an NA value # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(c(1, 2, 3, 2, 3, 4, 3, 4, 5) + 0.1, nrow = 3, ncol = 3) x[2, 2] <- NA_real_ cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) tx <- t(x) # To check name attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("rowMads():\n") r0 <- rowMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- rowMads(x, na.rm = TRUE, useNames = useNames) r2 <- colMads(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) cat("colMads():\n") r0 <- colMads_R(x, na.rm = TRUE, useNames = useNames) r1 <- colMads(x, na.rm = TRUE, useNames = useNames) r2 <- rowMads(tx, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } } } tx <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } # To check name attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") cat("rowMads():\n") center <- rowMedians(x, na.rm = na.rm, useNames = useNames) r0 <- rowMads_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowMads(x, na.rm = na.rm, useNames = useNames) r2 <- rowMads(x, center = center, na.rm = na.rm, useNames = useNames) r3 <- colMads(tx, na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r2, r1)) stopifnot(all.equal(r3, r0)) stopifnot(all.equal(r3, r1)) stopifnot(all.equal(r3, r2)) cat("colMads():\n") center <- colMedians(x, na.rm = na.rm, useNames = useNames) r0 <- colMads_R(x, na.rm = na.rm, useNames = useNames) r1 <- colMads(x, na.rm = na.rm, useNames = useNames) r2 <- colMads(x, center = center, na.rm = na.rm, useNames = useNames) r3 <- rowMads(tx, na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r2, r1)) stopifnot(all.equal(r3, r0)) stopifnot(all.equal(r3, r1)) stopifnot(all.equal(r3, r2)) } } } tx <- NULL } # for (add_na ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(NA_real_, nrow = 10L, ncol = 5L) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowMads_R(x, na.rm = na.rm, useNames = useNames) if (na.rm) r0[is.na(r0)] <- NaN r1 <- rowMads(x, na.rm = na.rm, useNames = useNames) r2 <- colMads(tx, na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } } } tx <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 1L, ncol = 1L) dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL tx <- t(x) # Row/column ranges for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowMads_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowMads(x, na.rm = na.rm, useNames = useNames) r2 <- colMads(tx, na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 0x0 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(double(0), nrow = 0, ncol = 0) tx <- t(x) for (na.rm in c(FALSE, TRUE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowMads_R(x, na.rm = na.rm) r1 <- rowMads(x, na.rm = na.rm) r2 <- colMads(tx, na.rm = na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } matrixStats/tests/signTabulate.R0000644000176200001440000000203614111772202016502 0ustar liggesuserslibrary("matrixStats") signTabulate0 <- function(x, ...) { nneg <- sum(x < 0, na.rm = TRUE) nzero <- sum(x == 0, na.rm = TRUE) npos <- sum(x > 0, na.rm = TRUE) nna <- sum(is.na(x)) nneginf <- sum(is.infinite(x) & x < 0, na.rm = TRUE) nposinf <- sum(is.infinite(x) & x > 0, na.rm = TRUE) res <- c(nneg, nzero, npos, nna, nneginf, nposinf) res <- as.double(res) names(res) <- c("-1", "0", "+1", "NA", "-Inf", "+Inf") if (is.integer(x)) res <- res[1:4] res } # signTabulate0() # Simulate data set.seed(0xBEEF) n <- 100L x <- runif(n) x[sample(n, size = 0.1 * n)] <- 0 x[sample(n, size = 0.1 * n)] <- NA_real_ x[sample(n, size = 0.1 * n)] <- -Inf x[sample(n, size = 0.1 * n)] <- +Inf # Doubles message("Doubles:") counts0 <- signTabulate0(x) print(counts0) counts1 <- signTabulate(x) print(counts1) stopifnot(identical(counts1, counts0)) # Integers message("Integers:") x <- suppressWarnings(as.integer(x)) counts0 <- signTabulate0(x) print(counts0) counts1 <- signTabulate(x) print(counts1) stopifnot(identical(counts1, counts0)) matrixStats/tests/rowCollapse.R0000644000176200001440000000515314074054377016373 0ustar liggesuserslibrary("matrixStats") x <- matrix(1:27, ncol = 3) # To check names attribute dimnames <- list(letters[1:9], LETTERS[1:3]) rowCollapse_R <- function(x, idxs, ..., useNames = NA) { res <- x[, idxs] # Preserve names attribute? if (is.na(useNames) || !useNames) names(res) <- NULL res } idxs <- 1L # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y_truth <- rowCollapse_R(x, idxs, useNames = useNames) y <- rowCollapse(x, idxs, useNames = useNames) stopifnot(identical(y, y_truth)) y2 <- colCollapse(t(x), idxs, useNames = useNames) stopifnot(identical(y2, y)) } } idxs <- 2L # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y_truth <- rowCollapse_R(x, idxs, useNames = useNames) y <- rowCollapse(x, idxs, useNames = useNames) stopifnot(identical(y, y_truth)) y2 <- colCollapse(t(x), idxs, useNames = useNames) stopifnot(identical(y2, y)) } } rowCollapse_R <- function(x, idxs, ..., useNames = NA) { res <- c(x[1:5, 1], x[6:9, 3]) # Preserve names attribute? if (is.na(useNames) || !useNames) names(res) <- NULL res } idxs <- c(1, 1, 1, 1, 1, 3, 3, 3, 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y_truth <- rowCollapse_R(x, idxs, useNames = useNames) y <- rowCollapse(x, idxs, useNames = useNames) stopifnot(identical(y, y_truth)) y2 <- colCollapse(t(x), idxs, useNames = useNames) stopifnot(identical(y2, y)) } } rowCollapse_R <- function(x, idxs, ..., useNames = NA) { res <- c(x[1, 1], x[2, 2], x[3, 3], x[4, 1], x[5, 2], x[6, 3], x[7, 1], x[8, 2], x[9, 3]) # Preserve names attribute? if (isTRUE(useNames)) { names <- rownames(x) if (!is.null(names)) names(res) <- names } res } idxs <- 1:3 # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y_truth <- rowCollapse_R(x, idxs, useNames = useNames) y <- rowCollapse(x, idxs, useNames = useNames) stopifnot(identical(y, y_truth)) y2 <- colCollapse(t(x), idxs, useNames = useNames) stopifnot(identical(y2, y)) } } matrixStats/tests/rowAllAnys.R0000644000176200001440000002503614111775134016167 0ustar liggesuserslibrary("matrixStats") rowAlls_R <- function(x, value = TRUE, na.rm = FALSE, ..., useNames = NA) { if (is.na(value)) { res <- apply(is.na(x), MARGIN = 1L, FUN = all, na.rm = na.rm) } else { y <- x == value # Preserve dimnames attribute dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!isTRUE(all.equal(dim(y), dim))) { dim(y) <- dim dimnames <- dimnames(x) if (!is.null(dimnames)) dimnames(y) <- dimnames } res <- apply(y, MARGIN = 1L, FUN = all, na.rm = na.rm) } if (is.na(useNames) || !useNames) names(res) <- NULL res } rowAnys_R <- function(x, value = TRUE, na.rm = FALSE, ..., useNames = NA) { if (is.na(value)) { res <- apply(is.na(x), MARGIN = 1L, FUN = any, na.rm = na.rm) } else { y <- x == value # Preserve dimnames attribute dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!isTRUE(all.equal(dim(y), dim))) { dim(y) <- dim dimnames <- dimnames(x) if (!is.null(dimnames)) dimnames(y) <- dimnames } res <- apply(y, MARGIN = 1L, FUN = any, na.rm = na.rm) } if (is.na(useNames) || !useNames) names(res) <- NULL res } rowAnyMissings_R <- function(x, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = anyMissing) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: logical # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(FALSE, nrow = 10L, ncol = 4L) x[7:8, 2:3] <- TRUE x[1:3, ] <- TRUE x[, 1] <- TRUE x[4, ] <- FALSE x[, 4] <- FALSE x[2, ] <- FALSE x[3, ] <- TRUE # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:4]) for (kk in 1:3) { if (kk == 2) { x[2, 2] <- NA } else if (kk == 3) { x[, 2] <- NA x[2, ] <- NA } # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { dimnames(x) <- if (setDimnames) dimnames else NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { m0 <- rowAlls_R(x, na.rm = na.rm, useNames = useNames) m1 <- rowAlls(x, na.rm = na.rm, useNames = useNames) m2 <- colAlls(t(x), na.rm = na.rm, useNames = useNames) str(list("all()", m0 = m0, m1 = m1, m2 = m2)) stopifnot(identical(m1, m0)) stopifnot(identical(m2, m0)) m0 <- rowAnys_R(x, na.rm = na.rm, useNames = useNames) m1 <- rowAnys(x, na.rm = na.rm, useNames = useNames) m2 <- colAnys(t(x), na.rm = na.rm, useNames = useNames) str(list("any()", m0 = m0, m1 = m1, m2 = m2)) stopifnot(identical(m1, m0)) stopifnot(identical(m2, m0)) m0 <- rowAnyMissings_R(x, useNames = useNames) m1 <- rowAnyMissings(x, useNames = useNames) m2 <- colAnyMissings(t(x), useNames = useNames) str(list("anyMissing()", m0 = m0, m1 = m1, m2 = m2)) stopifnot(identical(m1, m0)) stopifnot(identical(m2, m0)) } } } } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: integer # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(rep(1:6, length.out = 4 * 5), nrow = 4L, ncol = 5L) x[2, ] <- 7L x[3, 1] <- 7L x[2:3, 3:4] <- NA_integer_ # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:5]) # Row/column counts value <- 7L # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { dimnames(x) <- if (setDimnames) dimnames else NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowAlls_R(x, value = value, na.rm = na.rm, useNames = useNames) r1 <- rowAlls(x, value = value, na.rm = na.rm, useNames = useNames) r2 <- colAlls(t(x), value = value, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) if (!is.na(useNames) && !useNames && !setDimnames) { for (rr in seq_len(nrow(x))) { c <- allValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[rr])) c <- allValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[rr])) } } r0 <- rowAnys_R(x, value = value, na.rm = na.rm, useNames = useNames) r1 <- rowAnys(x, value = value, na.rm = na.rm, useNames = useNames) r2 <- colAnys(t(x), value = value, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) if (!is.na(useNames) && !useNames && !setDimnames) { for (rr in seq_len(nrow(x))) { c <- anyValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[rr])) c <- anyValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[rr])) } } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # rowAlls(x) et al. on numeric 'x' with logical 'value' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 4L, ncol = 5L) x[2:4, 2] <- (1:3) / 4 x[2, 2:4] <- (1:3) / 4 x[3:4, 3] <- (3:4) / 4 x[3, 3:4] <- (3:4) / 4 x[1:4, 5] <- (1:4) / 5 x[4, 4] <- NA_real_ # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:5]) for (value in c(TRUE, FALSE)) { for (na.rm in c(FALSE, TRUE)) { y0 <- suppressWarnings(apply(x, MARGIN = 1L, FUN = function(e) any(as.logical(e) == value, na.rm = na.rm))) y <- rowAnys(x, na.rm = na.rm, value = value) stopifnot(identical(y, y0)) # Check names attribute dimnames(x) <- dimnames y <- rowAnys(x, na.rm = na.rm, value = value, useNames = FALSE) stopifnot(all.equal(y, y0)) y0 <- suppressWarnings(apply(x, MARGIN = 1L, FUN = function(e) any(as.logical(e) == value, na.rm = na.rm))) y <- rowAnys(x, na.rm = na.rm, value = value, useNames = TRUE) stopifnot(all.equal(y, y0)) dimnames(x) <- NULL y0 <- suppressWarnings(apply(x, MARGIN = 2L, FUN = function(e) any(as.logical(e) == value, na.rm = na.rm))) y <- colAnys(x, na.rm = na.rm, value = value) stopifnot(identical(y, y0)) # Check names attribute dimnames(x) <- dimnames y <- colAnys(x, na.rm = na.rm, value = value, useNames = FALSE) stopifnot(all.equal(y, y0)) y0 <- suppressWarnings(apply(x, MARGIN = 2L, FUN = function(e) any(as.logical(e) == value, na.rm = na.rm))) y <- colAnys(x, na.rm = na.rm, value = value, useNames = TRUE) stopifnot(all.equal(y, y0)) dimnames(x) <- NULL y0 <- suppressWarnings(apply(x, MARGIN = 1L, FUN = function(e) all(as.logical(e) == value, na.rm = na.rm))) y <- rowAlls(x, na.rm = na.rm, value = value) stopifnot(identical(y, y0)) # Check names attribute dimnames(x) <- dimnames y <- rowAlls(x, na.rm = na.rm, value = value, useNames = FALSE) stopifnot(all.equal(y, y0)) y0 <- suppressWarnings(apply(x, MARGIN = 1L, FUN = function(e) all(as.logical(e) == value, na.rm = na.rm))) y <- rowAlls(x, na.rm = na.rm, value = value, useNames = TRUE) stopifnot(all.equal(y, y0)) dimnames(x) <- NULL y0 <- suppressWarnings(apply(x, MARGIN = 2L, FUN = function(e) all(as.logical(e) == value, na.rm = na.rm))) y <- colAlls(x, na.rm = na.rm, value = value) stopifnot(identical(y, y0)) print(y0) # Check names attribute dimnames(x) <- dimnames y <- colAlls(x, na.rm = na.rm, value = value, useNames = FALSE) stopifnot(all.equal(y, y0)) y0 <- suppressWarnings(apply(x, MARGIN = 2L, FUN = function(e) all(as.logical(e) == value, na.rm = na.rm))) y <- colAlls(x, na.rm = na.rm, value = value, useNames = TRUE) stopifnot(all.equal(y, y0)) dimnames(x) <- NULL } ## for (na.rm ...) } ## for(value ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: character (not sure if this should be supported) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - all_R <- function(x, value = TRUE, ...) { if (is.na(value)) { all(is.na(x), ...) } else { all(x == value, ...) } } any_R <- function(x, value = TRUE, ...) { if (is.na(value)) { any(is.na(x), ...) } else { any(x == value, ...) } } x <- matrix(rep(letters, length.out = 10 * 5), nrow = 10L, ncol = 5L) x[2, ] <- "g" x[2:4, 3:4] <- NA_character_ # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { dimnames(x) <- if (setDimnames) dimnames else NULL # Row/column counts for (value in c("g", NA_character_)) { for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowAlls_R(x, value = value, na.rm = na.rm, useNames = useNames) r1 <- rowAlls(x, value = value, na.rm = na.rm, useNames = useNames) r2 <- colAlls(t(x), value = value, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) if (!is.na(useNames) && !useNames && !setDimnames) { for (rr in seq_len(nrow(x))) { c0 <- all_R(x[rr, ], value, na.rm = na.rm) c <- allValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, r1[rr])) stopifnot(identical(c, c0)) } } r0 <- rowAnys_R(x, value = value, na.rm = na.rm, useNames = useNames) r1 <- rowAnys(x, value = value, na.rm = na.rm, useNames = useNames) r2 <- colAnys(t(x), value = value, na.rm = na.rm, useNames = useNames) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) if (!is.na(useNames) && !useNames && !setDimnames) { for (rr in seq_len(nrow(x))) { c0 <- any_R(x[rr, ], value, na.rm = na.rm) c <- anyValue(x[rr, ], value = value, na.rm = na.rm) stopifnot(identical(c, c0)) stopifnot(identical(c, r1[rr])) } } } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # NA 0 test # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow = 3L, ncol = 3L) x[1, ] <- c(NA_real_, NA_real_, 0) x[3, ] <- c(1, 0, 1) dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { dimnames(x) <- if (setDimnames) dimnames else NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowAnys_R(x, value = 0, useNames = useNames) r1 <- rowAnys(x, value = 0, useNames = useNames) stopifnot(identical(r0, r1)) } } matrixStats/tests/rowWeightedMeans.R0000644000176200001440000002362014105674332017346 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowWeightedMeans_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = weighted.mean, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as rowWeightedMeans() idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (na.rm) na.rm <- anyMissing(x) if ((!is.null(w) && nw == 0L) || isFALSE(na.rm)) { if (is.na(useNames) || !useNames) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } colWeightedMeans_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 2L, FUN = weighted.mean, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as colWeightedMeans() idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (!is.null(w) && nw == 0L) { if (is.na(useNames) || !useNames) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } set.seed(1) x <- matrix(rnorm(20), nrow = 5, ncol = 4) print(x) # To check names attribute dimnames <- list(letters[1:5], LETTERS[1:4]) # Non-weighted row averages x_est0 <- rowMeans(x) x_est1 <- rowWeightedMeans(x) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x)) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedMeans(x, useNames = FALSE) x_est2 <- colWeightedMeans(t(x), useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowMeans(x) x_est1 <- rowWeightedMeans(x, useNames = TRUE) x_est2 <- colWeightedMeans(t(x), useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row averages (uniform weights) w <- rep(2.5, times = ncol(x)) x_est0 <- rowMeans(x) x_est1 <- rowWeightedMeans(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row averages (excluding some columns) w <- c(1, 1, 0, 1) x_est0 <- rowMeans(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMeans(x, w = w) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row averages (excluding some columns) w <- c(0, 1, 0, 0) x_est0 <- rowMeans(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMeans(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted row averages (all zero weights) w <- c(0, 0, 0, 0) x_est0 <- rowMeans(x[, (w == 1), drop = FALSE]) x_est1 <- rowWeightedMeans(x, w = w) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted averages by rows and columns w <- 1:4 x_est1 <- rowWeightedMeans(x, w = w) print(x_est1) x_est2 <- colWeightedMeans(t(x), w = w) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } x[sample(length(x), size = 0.3 * length(x))] <- NA print(x) # Non-weighted row averages with missing values x_est0 <- rowMeans(x, na.rm = TRUE) x_est1 <- rowWeightedMeans(x, na.rm = TRUE) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), na.rm = TRUE) stopifnot(all.equal(x_est2, x_est0)) # Check names attribute dimnames(x) <- dimnames x_est1 <- rowWeightedMeans(x, na.rm = TRUE, useNames = FALSE) x_est2 <- colWeightedMeans(t(x), na.rm = TRUE, useNames = FALSE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) x_est0 <- rowMeans(x, na.rm = TRUE) x_est1 <- rowWeightedMeans(x, na.rm = TRUE, useNames = TRUE) x_est2 <- colWeightedMeans(t(x), na.rm = TRUE, useNames = TRUE) stopifnot(all.equal(x_est1, x_est0)) stopifnot(all.equal(x_est2, x_est0)) dimnames(x) <- NULL # Weighted row averages with missing values x_est0 <- apply(x, MARGIN = 1L, FUN = weighted.mean, w = w, na.rm = TRUE) print(x_est0) x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE) print(x_est1) stopifnot(all.equal(x_est1, x_est0)) x_est2 <- colWeightedMeans(t(x), w = w, na.rm = TRUE) stopifnot(all.equal(x_est2, x_est0)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, na.rm = TRUE, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, na.rm = TRUE, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # Weighted averages by rows and columns w <- 1:4 x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE) x_est2 <- colWeightedMeans(t(x), w = w, na.rm = TRUE) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, na.rm = TRUE, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, na.rm = TRUE, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } # w contains missing value w[1] <- NA_integer_ x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE) x_est2 <- colWeightedMeans(t(x), w = w, na.rm = TRUE) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, na.rm = TRUE, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, na.rm = TRUE, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } x_est1 <- rowWeightedMeans(x, w = w, na.rm = FALSE) x_est2 <- colWeightedMeans(t(x), w = w, na.rm = FALSE) stopifnot(all.equal(x_est2, x_est1)) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { x_est0 <- rowWeightedMeans_R(x, w = w, na.rm = FALSE, useNames = useNames) x_est1 <- rowWeightedMeans(x, w = w, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) x_est0 <- colWeightedMeans_R(t(x), w = w, na.rm = FALSE, useNames = useNames) x_est1 <- colWeightedMeans(t(x), w = w, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(x_est1, x_est0)) } } matrixStats/tests/rowRanges_subset.R0000644000176200001440000000525314074054377017436 0ustar liggesuserslibrary("matrixStats") rowMins_R <- function(x, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = min, ...) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } # rowMins_R() rowMaxs_R <- function(x, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = max, ...) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } # rowMaxs_R() rowRanges_R <- function(x, ..., useNames = NA) { suppressWarnings({ ans <- t(apply(x, MARGIN = 1L, FUN = range, ...)) }) # Preserve rownames attribute dim <- c(dim(x)[1], 2L) if (!isTRUE(all.equal(dim(ans), dim))) { dim(ans) <- dim rownames <- rownames(x) if (!is.null(dimnames)) rownames(ans) <- rownames } if (is.na(useNames) || !useNames) dimnames(ans) <- NULL ans } # rowRanges_R() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check rownames/names attributes dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowRanges, fsure = rowRanges_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowMins, fsure = rowMins_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowMaxs, fsure = rowMaxs_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colRanges, fsure = rowRanges_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMins, fsure = rowMins_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMaxs, fsure = rowMaxs_R, na.rm = na.rm, useNames = useNames) } } } } } matrixStats/tests/rowAvgsPerColSet.R0000644000176200001440000001055114111770310017270 0ustar liggesuserslibrary("matrixStats") X <- matrix(rnorm(20 * 6), nrow = 20, ncol = 6) rownames(X) <- LETTERS[1:nrow(X)] colnames(X) <- letters[1:ncol(X)] print(X) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply rowMeans() for 3 sets of 2 columns # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 3L S <- matrix(1:ncol(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s%d", 1:nbr_of_sets) print(S) Z <- rowAvgsPerColSet(X, S = S) print(Z) # Validation Z0 <- cbind(s1 = rowMeans(X[, 1:2]), s2 = rowMeans(X[, 3:4]), s3 = rowMeans(X[, 5:6])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply colMeans() for 5 sets of 4 rows # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 5L S <- matrix(1:nrow(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s%d", 1:nbr_of_sets) print(S) Z <- colAvgsPerRowSet(X, S = S) print(Z) # Validation Z0 <- rbind(s1 = colMeans(X[1:4, ]), s2 = colMeans(X[5:8, ]), s3 = colMeans(X[9:12, ]), s4 = colMeans(X[13:16, ]), s5 = colMeans(X[17:20, ])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # When there is only one "complete" set # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 1L S <- matrix(1:ncol(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s%d", 1:nbr_of_sets) print(S) Z <- rowAvgsPerColSet(X, S = S, FUN = rowMeans) print(Z) Z0 <- rowMeans(X) stopifnot(identical(drop(Z), Z0)) nbr_of_sets <- 1L S <- matrix(1:nrow(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s%d", 1:nbr_of_sets) print(S) Z <- colAvgsPerRowSet(X, S = S, FUN = colMeans) print(Z) Z0 <- colMeans(X) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Use weights # - - - - - - - - - - - - - - - - - - - - - - - - - - nbr_of_sets <- 3L S <- matrix(1:ncol(X), ncol = nbr_of_sets) colnames(S) <- sprintf("s%d", 1:nbr_of_sets) print(S) W <- matrix(runif(length(X)), nrow = nrow(X), ncol = ncol(X)) Z1 <- rowAvgsPerColSet(X, W = W, S = S, FUN = rowWeightedMeans) print(Z1) Z2 <- colAvgsPerRowSet(X, W = W, S = S, FUN = colWeightedMeans) print(Z2) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Result should always be a matrix, including when nrow(X) <= 1 # (https://github.com/HenrikBengtsson/matrixStats/issues/108) # - - - - - - - - - - - - - - - - - - - - - - - - - - X <- matrix(1:3, nrow = 1L, ncol = 3L) S <- matrix(1, nrow = 1L, ncol = 1L) Z1 <- rowAvgsPerColSet(X, S = S) stopifnot(is.matrix(Z1)) Z2 <- rowAvgsPerColSet(X, S = S, rows = 0) stopifnot(is.matrix(Z2)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Works with many, one or zero columns / rows # (https://github.com/HenrikBengtsson/matrixStats/issues/172) # - - - - - - - - - - - - - - - - - - - - - - - - - - S <- cbind(1:2, 3:4, 5:6) X <- matrix(rnorm(2 * 6), nrow = 6, ncol = 2) Z2 <- colAvgsPerRowSet(X, S = S, FUN = colSums2) Z2_ref <- rbind(colSums2(X[S[,1], ,drop=FALSE]), colSums2(X[S[,2], ,drop=FALSE]), colSums2(X[S[,3], ,drop=FALSE])) stopifnot(identical(Z2, Z2_ref)) X <- matrix(rnorm(6), nrow = 6, ncol = 1) Z1 <- colAvgsPerRowSet(X, S = S, FUN = colSums2) Z1_ref <- rbind(colSums2(X[S[,1], ,drop=FALSE]), colSums2(X[S[,2], ,drop=FALSE]), colSums2(X[S[,3], ,drop=FALSE])) stopifnot(identical(Z1, Z1_ref)) X <- matrix(numeric(0), nrow = 6, ncol = 0) Z0 <- colAvgsPerRowSet(X, S = S, FUN = colSums2) Z0_ref <- matrix(numeric(0), nrow = ncol(S), ncol = 0) stopifnot(identical(Z0, unname(Z0_ref))) S <- rbind(1:4, 5:8) X <- matrix(rnorm(n = 2 * 8), nrow = 2, ncol = 8) Z2 <- rowAvgsPerColSet(X, S = S, FUN = rowMeans2) Z2_ref <- cbind(rowMeans2(X[,S[,1],drop=FALSE]), rowMeans2(X[,S[,2],drop=FALSE]), rowMeans2(X[,S[,3],drop=FALSE]), rowMeans2(X[,S[,4],drop=FALSE])) stopifnot(identical(Z2, Z2_ref)) X <- matrix(rnorm(n = 8), nrow = 1, ncol = 8) Z1 <- rowAvgsPerColSet(X, S = S, FUN = rowMeans2) Z1_ref <- cbind(rowMeans2(X[,S[,1],drop=FALSE]), rowMeans2(X[,S[,2],drop=FALSE]), rowMeans2(X[,S[,3],drop=FALSE]), rowMeans2(X[,S[,4],drop=FALSE])) stopifnot(identical(Z1, Z1_ref)) X <- matrix(numeric(0), nrow = 0, ncol = 8) Z0 <- rowAvgsPerColSet(X, S = S, FUN = rowMeans2) Z0_ref <- matrix(numeric(0), nrow = 0, ncol = ncol(S)) stopifnot(identical(Z0, Z0_ref)) matrixStats/tests/weightedMean_subset.R0000644000176200001440000000146114063411362020052 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") for (mode in c("numeric", "integer")) { x <- runif(6, min = -6, max = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- mode if (mode == "numeric") w[1] <- Inf for (idxs in index_cases) { validateIndicesTestVector_w(x, w, idxs, ftest = weightedMean, fsure = weighted.mean, na.rm = TRUE, refine = TRUE) validateIndicesTestVector_w(x, w, idxs, ftest = weightedMean, fsure = weighted.mean, na.rm = FALSE, refine = TRUE) } } matrixStats/tests/rowRanges.R0000644000176200001440000001653214111771777016055 0ustar liggesuserslibrary("matrixStats") rowMins_R <- function(x, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = min, ...) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } # rowMins_R() rowMaxs_R <- function(x, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = max, ...) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } # rowMaxs_R() rowRanges_R <- function(x, ..., useNames = NA) { suppressWarnings({ ans <- t(apply(x, MARGIN = 1L, FUN = range, ...)) }) # Preserve rownames attribute dim <- c(dim(x)[1], 2L) if (!isTRUE(all.equal(dim(ans), dim))) { dim(ans) <- dim rownames <- rownames(x) if (!is.null(dimnames)) rownames(ans) <- rownames } if (is.na(useNames) || !useNames) dimnames(ans) <- NULL ans } # rowRanges_R() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50 + 0.1, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } storage.mode(x) <- mode str(x) # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Row/column extremes for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") # Ranges cat("range:\n") r0 <- rowRanges_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowRanges(x, na.rm = na.rm, useNames = useNames) r2 <- colRanges(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) # Min cat("min:\n") m0 <- rowMins_R(x, na.rm = na.rm, useNames = useNames) m1 <- rowMins(x, na.rm = na.rm, useNames = useNames) m2 <- colMins(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(m1, m2)) stopifnot(all.equal(m1, m0)) # Max cat("max:\n") m0 <- rowMaxs_R(x, na.rm = na.rm, useNames = useNames) m1 <- rowMaxs(x, na.rm = na.rm, useNames = useNames) m2 <- colMaxs(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(m1, m2)) stopifnot(all.equal(m1, m0)) } } } } # for (add_na ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(NA_real_, nrow = 10L, ncol = 5L) storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { cat("na.rm = ", na.rm, "\n", sep = "") r0 <- rowRanges_R(x, na.rm = na.rm, useNames = useNames) r1 <- rowRanges(x, na.rm = na.rm, useNames = useNames) r2 <- colRanges(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) } } } } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Nx0 matrix x <- matrix(double(0L), nrow = 5L, ncol = 0L) r0 <- rowRanges_R(x) #r1 <- rowRanges(x) #r_truth <- matrix(c(Inf, -Inf), nrow = nrow(x), ncol = 2L, byrow = TRUE) #stopifnot(all.equal(r1, r_truth)) # 0xN matrix x <- t(x) #r1 <- colRanges(x) #stopifnot(all.equal(r1, r_truth)) # Nx1 matrix x <- matrix(1:5, nrow = 5L, ncol = 1L) # To check names attribute dimnames <- list(letters[1:5], "A") r1 <- rowRanges(x) r_truth <- matrix(1:5, nrow = nrow(x), ncol = 2L, byrow = FALSE) stopifnot(all.equal(r1, r_truth)) # Check names attribute dimnames(x) <- dimnames r0 <- rowRanges_R(x, useNames = TRUE) r1 <- rowRanges(x, useNames = TRUE) stopifnot(all.equal(r1, r0)) dimnames(x) <- NULL # 1xN matrix x <- t(x) r1 <- colRanges(x) stopifnot(all.equal(r1, r_truth)) # Check names attribute dimnames(x) <- list("a", LETTERS[1:5]) r1 <- colRanges(x, useNames = TRUE) stopifnot(identical(rownames(r1), colnames(x))) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Additional tests with NA_integer_, NA_real, NaN, -Inf, +Inf # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1:12, nrow = 4L, ncol = 3L) na_list <- list( "integer" = matrix(1:12, nrow = 4L, ncol = 3L), "integer w/ NA" = matrix(NA_integer_, nrow = 4L, ncol = 3L), "real" = matrix(as.double(1:12), nrow = 4L, ncol = 3L), "real w/ NA" = matrix(NA_real_, nrow = 4L, ncol = 3L) ) na <- na_list[["real"]] na[2, 2] <- NA na_list[["real + NA cell"]] <- na na <- na_list[["real"]] na[2, ] <- NA na_list[["real + NA row"]] <- na na <- na_list[["real"]] na[2, ] <- NaN na_list[["real + NaN row"]] <- na na <- na_list[["real"]] na[2, 2] <- Inf na_list[["real + Inf cell"]] <- na na <- na_list[["real"]] na[2, ] <- Inf na_list[["real + Inf row"]] <- na na <- na_list[["real"]] na[2, 2] <- NaN na_list[["real + NaN cell"]] <- na na <- na_list[["real w/ NA"]] na[2, 2] <- NaN na_list[["real w/ NA + NaN cell"]] <- na na <- na_list[["real w/ NA"]] na[2, ] <- NaN na_list[["real w/ NA + NaN row"]] <- na # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (na.rm in c(FALSE, TRUE)) { for (name in names(na_list)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { na <- na_list[[name]] cat(sprintf("%s (%s) w/ na.rm = %s:\n", name, typeof(na), na.rm)) print(na) cat(" min:\n") y0 <- rowMins_R(na, na.rm = na.rm, useNames = useNames) str(y0) y1 <- rowMins(na, na.rm = na.rm, useNames = useNames) str(y1) stopifnot(all.equal(y1, y0)) y1c <- colMins(t(na), na.rm = na.rm, useNames = useNames) str(y1c) stopifnot(all.equal(y1c, y1)) cat(" max:\n") y0 <- rowMaxs_R(na, na.rm = na.rm, useNames = useNames) str(y0) y1 <- rowMaxs(na, na.rm = na.rm, useNames = useNames) str(y1) stopifnot(all.equal(y1, y0)) y1c <- colMaxs(t(na), na.rm = na.rm, useNames = useNames) str(y1c) stopifnot(all.equal(y1c, y1)) cat(" range:\n") y0 <- rowRanges_R(na, na.rm = na.rm, useNames = useNames) str(y0) y1 <- rowRanges(na, na.rm = na.rm, useNames = useNames) str(y1) stopifnot(all.equal(y1, y0)) y1c <- colRanges(t(na), na.rm = na.rm, useNames = useNames) str(y1c) stopifnot(all.equal(y1c, y1)) } } # for (name ...) } # for (na.rm ...) } matrixStats/tests/rowCumprods_subset.R0000644000176200001440000000256014074054377020011 0ustar liggesuserslibrary("matrixStats") rowCumprods_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cumprod)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames <- dimnames(x) if (isTRUE(useNames) && !is.null(dimnames)) dimnames(y) <- dimnames y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check dimnames attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCumprods, fsure = rowCumprods_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = function(x, rows, cols, ..., useNames) { t(colCumprods(t(x), rows = cols, cols = rows, useNames = useNames)) }, fsure = rowCumprods_R, useNames = useNames) } } } } matrixStats/tests/rowTabulates.R0000644000176200001440000000755114111774641016554 0ustar liggesuserslibrary("matrixStats") nrow <- 6L ncol <- 5L data <- matrix(0:4, nrow = nrow, ncol = ncol) # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:5]) modes <- c("integer", "logical", "raw") for (mode in modes) { cat(sprintf("Mode: %s...\n", mode)) x <- data if (mode == "logical") x <- x - 2L if (mode != "raw") x[c(2,5,7)] <- NA_integer_ storage.mode(x) <- mode print(x) unique_values <- unique(as.vector(x)) nbr_of_unique_values <- length(unique_values) y <- rowTabulates(x) print(y) stopifnot( identical(dim(y), c(nrow, nbr_of_unique_values)), all(y >= 0) ) if (mode != "raw") { y0 <- t(table(x, row(x), useNA = "always")[, seq_len(nrow(x))]) stopifnot(all(y == y0)) } # Check names attribute dimnames(x) <- dimnames y1 <- rowTabulates(x, useNames = FALSE) y2 <- rowTabulates(x, useNames = NA) stopifnot(all.equal(y1, y)) stopifnot(all.equal(y2, y)) y <- rowTabulates(x, useNames = TRUE) stopifnot(identical(rownames(y), rownames(x))) dimnames(x) <- NULL y <- colTabulates(x) print(y) stopifnot( identical(dim(y), c(ncol, nbr_of_unique_values)), all(y >= 0) ) if (mode != "raw") { y0 <- t(table(x, col(x), useNA = "always")[, seq_len(ncol(x))]) stopifnot(all(y == y0)) } # Check names attribute dimnames(x) <- dimnames y1 <- colTabulates(x, useNames = FALSE) y2 <- colTabulates(x, useNames = NA) stopifnot(all.equal(y1, y)) stopifnot(all.equal(y2, y)) y <- colTabulates(x, useNames = TRUE) stopifnot(identical(rownames(y), colnames(x))) dimnames(x) <- NULL # Count only certain values if (mode == "integer") { subset <- c(0:2, NA_integer_) } else if (mode == "logical") { subset <- c(TRUE, FALSE, NA) } else { subset <- c(0:2) } y <- rowTabulates(x, values = subset) print(y) stopifnot(identical(dim(y), c(nrow, length(subset)))) # Check names attribute dimnames(x) <- dimnames y1 <- rowTabulates(x, values = subset, useNames = FALSE) y2 <- rowTabulates(x, values = subset, useNames = NA) stopifnot(all.equal(y1, y)) stopifnot(all.equal(y2, y)) y <- rowTabulates(x, values = subset, useNames = TRUE) stopifnot(identical(rownames(y), rownames(x))) dimnames(x) <- NULL y <- colTabulates(x, values = subset) print(y) stopifnot(identical(dim(y), c(ncol, length(subset)))) # Check names attribute dimnames(x) <- dimnames y1 <- colTabulates(x, values = subset, useNames = FALSE) y2 <- colTabulates(x, values = subset, useNames = NA) stopifnot(all.equal(y1, y)) stopifnot(all.equal(y2, y)) y <- colTabulates(x, values = subset, useNames = TRUE) stopifnot(identical(rownames(y), colnames(x))) dimnames(x) <- NULL # Raw if (mode %in% c("integer", "raw")) { subset <- c(0:2) y <- rowTabulates(x, values = as.raw(subset)) print(y) stopifnot(identical(dim(y), c(nrow, length(subset)))) # Check names attribute dimnames(x) <- dimnames y1 <- rowTabulates(x, values = as.raw(subset), useNames = FALSE) y2 <- rowTabulates(x, values = as.raw(subset), useNames = NA) stopifnot(all.equal(y1, y)) stopifnot(all.equal(y2, y)) y3 <- rowTabulates(x, values = as.raw(subset), useNames = TRUE) stopifnot(identical(rownames(y3), rownames(x))) dimnames(x) <- NULL y2 <- colTabulates(t(x), values = as.raw(subset)) print(y2) stopifnot( identical(dim(y2), c(nrow, length(subset))), identical(y2, y) ) # Check names attribute dimnames(x) <- dimnames y1 <- colTabulates(t(x), values = as.raw(subset), useNames = FALSE) y2 <- colTabulates(t(x), values = as.raw(subset), useNames = NA) stopifnot(all.equal(y1, y)) stopifnot(all.equal(y2, y)) y <- colTabulates(t(x), values = as.raw(subset), useNames = TRUE) stopifnot(identical(rownames(y), colnames(t(x)))) dimnames(x) <- NULL } cat(sprintf("Mode: %s...done\n", mode)) } # for (mode ...) matrixStats/tests/rowMedians.R0000644000176200001440000002311414074054377016206 0ustar liggesuserslibrary("matrixStats") rowMedians_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = median, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } colMedians_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 2L, FUN = median, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Non-ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Non-ties:\n") for (mode in c("integer", "double")) { x <- matrix(1:9 + 0.1, nrow = 3, ncol = 3) storage.mode(x) <- mode # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Ties:\n") for (mode in c("integer", "double")) { x <- matrix(1:16 + 0.1, nrow = 4, ncol = 4) storage.mode(x) <- mode # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Single-element matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Single-element matrix:\n") for (mode in c("integer", "double")) { x <- matrix(1, nrow = 1, ncol = 1) storage.mode(x) <- mode # To check names attribute dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Empty matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Empty matrix:\n") for (mode in c("integer", "double")) { x <- matrix(integer(0), nrow = 0, ncol = 0) storage.mode(x) <- mode y0 <- rowMedians_R(x, na.rm = FALSE) y1 <- rowMedians(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE) y1 <- colMedians(x, na.rm = FALSE) stopifnot(all.equal(y1, y0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NAs:\n") for (mode in c("integer", "double")) { x <- matrix(NA_integer_, nrow = 3, ncol = 3) storage.mode(x) <- mode # To check names attribute dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowMedians(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = TRUE, useNames = useNames) y1 <- colMedians(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NaNs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NaNs:\n") x <- matrix(NA_real_, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = TRUE, useNames = useNames) y1 <- rowMedians(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = TRUE, useNames = useNames) y1 <- colMedians(x, na.rm = TRUE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All Infs:\n") x <- matrix(Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All -Infs:\n") x <- matrix(-Inf, nrow = 3, ncol = 3) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Infs and -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Infs and -Infs:\n") x <- matrix(c(-Inf, +Inf), nrow = 4, ncol = 4) # To check names attribute dimnames <- list(letters[1:4], LETTERS[1:4]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Integer overflow with ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Integer overflow with ties:\n") x <- matrix(.Machine$integer.max, nrow = 4, ncol = 4) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- rowMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) y0 <- colMedians_R(x, na.rm = FALSE, useNames = useNames) y1 <- colMedians(x, na.rm = FALSE, useNames = useNames) stopifnot(all.equal(y1, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) cat("Consistency checks:\n") n_sims <- if (Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 4L else 20L for (kk in seq_len(n_sims)) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape dim <- sample(50:200, size = 2) n <- prod(dim) x <- rnorm(n, sd = 100) dim(x) <- dim # Add NAs? if ((kk %% 4) %in% c(3, 0)) { cat("Adding NAs\n") nna <- sample(n, size = 1) na_values <- c(NA_real_, NaN) t <- sample(na_values, size = nna, replace = TRUE) x[sample(length(x), size = nna)] <- t } # Integer or double? if ((kk %% 4) %in% c(2, 0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } na.rm <- sample(c(TRUE, FALSE), size = 1) # rowMedians(): y0 <- rowMedians_R(x, na.rm = na.rm) y1 <- rowMedians(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- colMedians(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) # colMedians(): y0 <- colMedians_R(x, na.rm = na.rm) y1 <- colMedians(x, na.rm = na.rm) stopifnot(all.equal(y1, y0)) y2 <- rowMedians(t(x), na.rm = na.rm) stopifnot(all.equal(y2, y0)) } # for (kk ...) matrixStats/tests/diff2.R0000644000176200001440000000112214111766461015057 0ustar liggesuserslibrary("matrixStats") set.seed(0x42) for (mode in c("integer", "double")) { x <- rnorm(10, sd = 5) storage.mode(x) <- mode str(x) for (has_na in c(FALSE, TRUE)) { if (has_na) { x[sample(1:10, size = 3)] <- NA } for (l in 1:3) { for (d in 1:4) { cat(sprintf("%s: NAs = %s, lag = %d, differences = %d\n", mode, has_na, l, d)) y0 <- diff(x, lag = l, differences = d) str(y0) y1 <- diff2(x, lag = l, differences = d) str(y1) stopifnot(identical(y1, y0)) } } } # for (has_na ...) } matrixStats/tests/rowCumsums_subset.R0000644000176200001440000000257414074054377017656 0ustar liggesuserslibrary("matrixStats") rowCumsums_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cumsum)) }) # Preserve dimnames attribute? dim(y) <- dim(x) if (isTRUE(useNames)) { dimnames <- dimnames(x) if (!is.null(dimnames)) dimnames(y) <- dimnames } y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check dimnames attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCumsums, fsure = rowCumsums_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = function(x, rows, cols, ..., useNames) { t(colCumsums(t(x), rows = cols, cols = rows, useNames = useNames)) }, fsure = rowCumsums_R, useNames = useNames) } } } } matrixStats/tests/rowVarDiffs_var,sd_subset.R0000644000176200001440000000331414074030335021156 0ustar liggesuserslibrary("matrixStats") fcns <- list( varDiff = c(rowVarDiffs, colVarDiffs), sdDiff = c(rowSdDiffs, colSdDiffs) ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) trim <- runif(1, min = 0, max = 0.5) for (fcn in names(fcns)) { cat(sprintf("subsetted tests on %s()...\n", fcn)) row_fcn <- fcns[[fcn]][[1L]] col_fcn <- fcns[[fcn]][[2L]] for (mode in c("numeric", "integer")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6L, ncol = 6L) storage.mode(x) <- mode if (mode == "numeric") x[1:2, 3:4] <- Inf # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (diff in 1:2) { for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = row_fcn, fsure = row_fcn, na.rm = na.rm, diff = diff, trim = trim, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = col_fcn, fsure = row_fcn, na.rm = na.rm, diff = diff, trim = trim, useNames = useNames) } } } } } } } cat(sprintf("%s()...DONE\n", fcn)) } matrixStats/tests/rowVarDiffs_mad,iqr_subset.R0000644000176200001440000000331214074030335021312 0ustar liggesuserslibrary("matrixStats") fcns <- list( madDiff = c(rowMadDiffs, colMadDiffs), iqrDiff = c(rowIQRDiffs, colIQRDiffs) ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) trim <- runif(1, min = 0, max = 0.5) for (fcn in names(fcns)) { cat(sprintf("subsetted tests on %s()...\n", fcn)) row_fcn <- fcns[[fcn]][[1L]] col_fcn <- fcns[[fcn]][[2L]] for (mode in c("numeric", "integer")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6L, ncol = 6L) storage.mode(x) <- mode if (mode == "numeric") x[1:2, 3:4] <- Inf # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (diff in 1:2) { for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = row_fcn, fsure = row_fcn, na.rm = na.rm, diff = diff, trim = trim, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = col_fcn, fsure = row_fcn, na.rm = na.rm, diff = diff, trim = trim, useNames = useNames) } } } } } } } cat(sprintf("%s()...DONE\n", fcn)) } matrixStats/tests/rowTabulates_subset.R0000644000176200001440000000254514074030335020130 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowTabulates, fsure = rowTabulates, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowTabulates, fsure = rowTabulates, values = 1:3, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = colTabulates, fsure = colTabulates, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = colTabulates, fsure = colTabulates, values = 1:3, useNames = useNames) } } } } matrixStats/tests/rowCumMinMaxs_subset.R0000644000176200001440000000431314074054377020234 0ustar liggesuserslibrary("matrixStats") rowCummins_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cummin)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames <- dimnames(x) if (isTRUE(useNames) && !is.null(dimnames)) dimnames(y) <- dimnames y } rowCummaxs_R <- function(x, ..., useNames = NA) { mode <- storage.mode(x) # Change mode because a bug is detected on cummax for integer in R-3.2.0 storage.mode(x) <- "numeric" suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cummax)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames <- dimnames(x) if (isTRUE(useNames) && !is.null(dimnames)) dimnames(y) <- dimnames storage.mode(y) <- mode y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check dimnames attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowCummins, fsure = rowCummins_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = function(x, rows, cols, ..., useNames) { t(colCummins(t(x), rows = cols, cols = rows, useNames = useNames)) }, fsure = rowCummins_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowCummaxs, fsure = rowCummaxs_R, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = function(x, rows, cols, ..., useNames) { t(colCummaxs(t(x), rows = cols, cols = rows, useNames = useNames)) }, fsure = rowCummaxs_R, useNames = useNames) } } } } matrixStats/tests/rowCumMinMaxs.R0000644000176200001440000001533014111770671016642 0ustar liggesuserslibrary("matrixStats") rowCummins_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cummin)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames <- dimnames(x) if (isTRUE(useNames) && !is.null(dimnames)) dimnames(y) <- dimnames y } rowCummaxs_R <- function(x, ..., useNames = NA) { mode <- storage.mode(x) # Change mode because a bug is detected on cummax for integer in R-3.2.0 storage.mode(x) <- "numeric" suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cummax)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames <- dimnames(x) if (isTRUE(useNames) && !is.null(dimnames)) dimnames(y) <- dimnames storage.mode(y) <- mode y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check dimnames attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCummins_R(x, useNames = useNames) r1 <- rowCummins(x, useNames = useNames) r2 <- t(colCummins(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x, useNames = useNames) r1 <- rowCummaxs(x, useNames = useNames) r2 <- t(colCummaxs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (add_na ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowCummins_R(x, useNames = useNames) r1 <- rowCummins(x, useNames = useNames) r2 <- t(colCummins(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x, useNames = useNames) r1 <- rowCummaxs(x, useNames = useNames) r2 <- t(colCummaxs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(0, nrow = 1L, ncol = 1L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check dimnames attribute dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowCummins_R(x, useNames = useNames) r1 <- rowCummins(x, useNames = useNames) r2 <- t(colCummins(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x, useNames = useNames) r1 <- rowCummaxs(x, useNames = useNames) r2 <- t(colCummaxs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") value <- 0 storage.mode(value) <- mode # A 0x0 matrix x <- matrix(value, nrow = 0L, ncol = 0L) str(x) r0 <- matrix(value, nrow = nrow(x), ncol = ncol(x)) r1 <- rowCummins(x) r2 <- t(colCummins(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A 0xK matrix x <- matrix(value, nrow = 0L, ncol = 5L) str(x) colnames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) colnames(x) <- colnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowCummins_R(x, useNames = useNames) r1 <- rowCummins(x, useNames = useNames) r2 <- t(colCummins(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x, useNames = useNames) r1 <- rowCummaxs(x, useNames = useNames) r2 <- t(colCummaxs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) # A Nx0 matrix x <- matrix(value, nrow = 5L, ncol = 0L) str(x) rownames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) rownames(x) <- rownames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowCummins_R(x, useNames = useNames) r1 <- rowCummins(x, useNames = useNames) r2 <- t(colCummins(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x, useNames = useNames) r1 <- rowCummaxs(x, useNames = useNames) r2 <- t(colCummaxs(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) matrixStats/tests/rowCumprods.R0000644000176200001440000001414114111770761016414 0ustar liggesuserslibrary("matrixStats") rowCumprods_R <- function(x, ..., useNames = NA) { suppressWarnings({ y <- t(apply(x, MARGIN = 1L, FUN = cumprod)) }) # Preserve dimnames attribute? dim(y) <- dim(x) dimnames <- dimnames(x) if (isTRUE(useNames) && !is.null(dimnames)) dimnames(y) <- dimnames y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { for (add_na in c(FALSE, TRUE)) { cat("add_na = ", add_na, "\n", sep = "") x <- matrix(1:50, nrow = 10L, ncol = 5L) if (add_na) { x[3:7, c(2, 4)] <- NA_real_ } cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # To check dimnames attribute dimnames <- list(letters[1:10], LETTERS[1:5]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(colCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (add_na ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { x <- matrix(NA_real_, nrow = 10L, ncol = 5L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(colCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { x <- matrix(0, nrow = 1L, ncol = 1L) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) dimnames <- list("a", "A") # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(colCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # BUG FIX TEST: Assert zeros don't trump NAs in integer matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { x <- matrix(NA_real_, nrow = 3L, ncol = 2L) x[1, 2] <- 0 x[2, 2] <- 1 x[3, 1] <- 0 storage.mode(x) <- mode cat("mode: ", mode, "\n", sep = "") str(x) dimnames <- list(letters[1:3], LETTERS[1:2]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { # Row/column ranges r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(colCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("logical", "integer", "double")) { cat("mode: ", mode, "\n", sep = "") value <- 0 storage.mode(value) <- mode if (mode == "logical") value2 <- 0L # A 0x0 matrix x <- matrix(value, nrow = 0L, ncol = 0L) str(x) r0 <- matrix(value2, nrow = nrow(x), ncol = ncol(x)) r1 <- rowCumprods(x) r2 <- t(colCumprods(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A 0xK matrix x <- matrix(value, nrow = 0L, ncol = 5L) str(x) colnames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) colnames(x) <- colnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(rowCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) # A Nx0 matrix x <- matrix(value, nrow = 5L, ncol = 0L) str(x) rownames <- LETTERS[1:5] # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) rownames(x) <- rownames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { r0 <- rowCumprods_R(x, useNames = useNames) r1 <- rowCumprods(x, useNames = useNames) r2 <- t(rowCumprods(t(x), useNames = useNames)) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (useNames ...) } # for (setDimnames ...) } # for (mode ...) matrixStats/tests/allocMatrix.R0000644000176200001440000000116314111765506016350 0ustar liggesuserslibrary("matrixStats") allocMatrix_R <- function(nrow, ncol, value = NA) { matrix(data = value, nrow = nrow, ncol = ncol) } values <- list( -1L, 0L, +1L, NA_integer_, .Machine$integer.max, -1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps, FALSE, TRUE, NA ) nrow <- 2L ncol <- 3L for (value in values) { x0 <- allocMatrix_R(nrow, ncol, value = value) x <- allocMatrix(nrow, ncol, value = value) if (!identical(x, x0)) { str(list(nrow = nrow, ncol = ncol, value = value, x = x, x0 = x0)) stopifnot(identical(x, x0)) } } matrixStats/tests/rowDiffs_subset.R0000644000176200001440000000421314074054377017245 0ustar liggesuserslibrary("matrixStats") rowDiffs_R <- function(x, lag = 1L, differences = 1L, ..., useNames = NA) { ncol2 <- ncol(x) - lag * differences if (ncol2 <= 0) { y <- matrix(x[integer(0L)], nrow = nrow(x), ncol = 0L) # Preserve names attribute if (isTRUE(useNames) && !is.null(rownames(x))) rownames(y) <- rownames(x) return(y) } suppressWarnings({ y <- apply(x, MARGIN = 1L, FUN = diff, lag = lag, differences = differences) }) y <- t(y) # Preserve dimnames attribute dim(y) <- c(nrow(x), ncol2) if (isTRUE(useNames) && !is.null(dimnames(x))) { colnames <- colnames(x) if (!is.null(colnames)) { len <- length(colnames) colnames <- colnames[(len - ncol2 + 1):len] } dimnames(y) <- list(rownames(x), colnames) } else dimnames(y) <- NULL y } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check dimnames attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (lag in 1:2) { for (differences in 1:3) { # Check dimnames attribute for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowDiffs, fsure = rowDiffs_R, lag = lag, differences = differences, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = function(x, rows, cols, ..., useNames) { t(colDiffs(t(x), rows = cols, cols = rows, ..., useNames = useNames)) }, fsure = rowDiffs_R, lag = lag, differences = differences, useNames = useNames) } } } } } } matrixStats/tests/rowMeans2_subset.R0000644000176200001440000000234614074054377017344 0ustar liggesuserslibrary("matrixStats") rowMeans_R <- function(x, na.rm = FALSE, ..., useNames = NA) { res <- rowMeans(x, na.rm = na.rm) if (is.na(useNames) || !useNames) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -3, max = 3), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowMeans2, fsure = rowMeans_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMeans2, fsure = rowMeans_R, na.rm = na.rm, useNames = useNames) } } } } } matrixStats/tests/weightedVar_etal_subset.R0000644000176200001440000000200214063411362020717 0ustar liggesuserslibrary("matrixStats") fcns <- list( weightedVar = weightedVar, weightedSd = weightedSd, weightedMad = weightedMad ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") for (name in names(fcns)) { cat(sprintf("subsetted tests on %s()...\n", name)) fcn <- fcns[[name]] for (mode in c("numeric", "integer")) { x <- runif(6, min = -6, max = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- mode if (mode == "numeric") w[1] <- Inf for (idxs in index_cases) { validateIndicesTestVector_w(x, w, idxs, ftest = fcn, fsure = fcn, na.rm = TRUE) validateIndicesTestVector_w(x, w, idxs, ftest = fcn, fsure = fcn, na.rm = FALSE) } } cat(sprintf("%s()...DONE\n", name)) } matrixStats/tests/zzz.package-unload.R0000644000176200001440000000227414063411362017576 0ustar liggesusers## These tests need to be last of all tests, otherwise ## covr::package_coverage() gives an error. cat("1. Loading package\n") loadNamespace("matrixStats") stopifnot("matrixStats" %in% loadedNamespaces()) cat("2. Unloading package\n") unloadNamespace("matrixStats") stopifnot(!"matrixStats" %in% loadedNamespaces()) if (FALSE) { ## 'covr' gives "Error in library("matrixStats") : ## there is no package called 'matrixStats'" here, cf. ## https://travis-ci.org/HenrikBengtsson/matrixStats/builds/48015577 cat("3. Attaching package\n") library("matrixStats") stopifnot("package:matrixStats" %in% search()) cat("4. Detaching package\n") detach("package:matrixStats") stopifnot(!"package:matrixStats" %in% search()) stopifnot("matrixStats" %in% loadedNamespaces()) cat("5. Unloading package\n") unloadNamespace("matrixStats") stopifnot(!"matrixStats" %in% loadedNamespaces()) cat("6. Attaching package (again)\n") library("matrixStats") stopifnot("package:matrixStats" %in% search()) cat("7. Detaching package (again)\n") detach("package:matrixStats") stopifnot(!"package:matrixStats" %in% search()) stopifnot("matrixStats" %in% loadedNamespaces()) } cat("7. DONE\n") matrixStats/tests/rowVars_subset.R0000644000176200001440000000634314105674332017125 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowVars_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = var, na.rm = na.rm) }) stopifnot(!any(is.infinite(res))) # Keep naming support consistency same as rowVars() if (is.null(center) || ncol(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } colVars_R <- function(x, na.rm = FALSE, center = NULL, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 2L, FUN = var, na.rm = na.rm) }) stopifnot(!any(is.infinite(res))) # Keep naming support consistency same as colVars() if (is.null(center) || nrow(x) <= 1L) { if (is.na(useNames) || isFALSE(useNames)) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } rowVars_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowWeightedMeans(x, cols = cols, na.rm = na.rm, useNames = FALSE) res <- rowVars(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } colVars_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colWeightedMeans(x, rows = rows, na.rm = na.rm, useNames = FALSE) res <- colVars(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) stopifnot(!any(is.infinite(res))) res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowVars, fsure = rowVars_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowVars_center, fsure = rowVars_R, na.rm = na.rm, center = TRUE, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colVars, fsure = rowVars_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colVars_center, fsure = rowVars_R, na.rm = na.rm, center = TRUE, useNames = useNames) } } } } } matrixStats/tests/rowIQRs.R0000644000176200001440000000575514074054377015457 0ustar liggesuserslibrary("matrixStats") rowIQRs_R <- function(x, na.rm = FALSE, ..., useNames = NA) { quantile_na <- function(x, ..., na.rm = FALSE) { if (!na.rm && anyMissing(x)) return(c(NA_real_, NA_real_)) quantile(x, ..., na.rm = na.rm) } q <- apply(x, MARGIN = 1L, FUN = quantile_na, probs = c(0.25, 0.75), na.rm = na.rm) rownames(q) <- NULL # Not needed anymore # Preserve names attribute dim(q) <- c(2L, nrow(x)) names <- rownames(x) if (isTRUE(useNames) && !is.null(names)) colnames(q) <- names q[2L, , drop = TRUE] - q[1L, , drop = TRUE] } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test with multiple quantiles # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") x <- matrix(1:100 + 0.1, nrow = 10, ncol = 10) storage.mode(x) <- mode str(x) # To check names attribute dimnames <- list(letters[1:10], LETTERS[1:10]) for (add_na in c(FALSE, TRUE)) { if (add_na) { x[3:5, 6:9] <- NA } # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (na.rm in c(FALSE, TRUE)) { # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { probs <- c(0, 0.5, 1) q0 <- rowIQRs_R(x, na.rm = na.rm, useNames = useNames) print(q0) q1 <- rowIQRs(x, na.rm = na.rm, useNames = useNames) print(q1) stopifnot(all.equal(q1, q0)) q2 <- colIQRs(t(x), na.rm = na.rm, useNames = useNames) stopifnot(all.equal(q2, q0)) q <- iqr(x[3, ], na.rm = na.rm) print(q) } # for (useNames ...) } # for (na.rm ...) } # for (setDimnames ...) } # for (add_na ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") # Empty vectors x <- integer(0L) storage.mode(x) <- mode str(x) q <- iqr(x) print(q) stopifnot(identical(q, NA_real_)) # Scalar x <- 1L storage.mode(x) <- mode str(x) q <- iqr(x) str(q) stopifnot(identical(q, 0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Single row matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1, nrow = 1L, ncol = 2L) dimnames <- list("a", LETTERS[1:2]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { q0 <- rowIQRs_R(x, useNames = useNames) q1 <- rowIQRs(x, useNames = useNames) q2 <- colIQRs(t(x), useNames = useNames) stopifnot(all.equal(q0, q1)) stopifnot(all.equal(q0, q2)) } } x <- matrix(1, nrow = 2L, ncol = 1L) q <- colIQRs(x) stopifnot(identical(q, 0)) matrixStats/tests/rowMads_subset.R0000644000176200001440000000502514074054377017100 0ustar liggesuserslibrary("matrixStats") ## Always allow testing of the 'center' argument (as long as it's not defunct) options(matrixStats.center.onUse = "ignore") rowMads_R <- function(x, na.rm = FALSE, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 1L, FUN = mad, na.rm = na.rm) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } colMads_R <- function(x, na.rm = FALSE, ..., useNames = NA) { suppressWarnings({ res <- apply(x, MARGIN = 2L, FUN = mad, na.rm = na.rm) }) if (is.na(useNames) || !useNames) names(res) <- NULL res } rowMads_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- rowMedians(x, cols = cols, na.rm = na.rm, useNames = FALSE) rowMads(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) } colMads_center <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { center <- colMedians(x, rows = rows, na.rm = na.rm, useNames = FALSE) colMads(x, rows = rows, cols = cols, center = center, na.rm = na.rm, useNames = useNames) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) storage.mode(x) <- "integer" # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix(x, rows, cols, ftest = rowMads, fsure = rowMads_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, ftest = rowMads_center, fsure = rowMads_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMads, fsure = rowMads_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix(x, rows, cols, fcoltest = colMads_center, fsure = rowMads_R, na.rm = na.rm, useNames = useNames) } } } } } matrixStats/tests/allocVector.R0000644000176200001440000000117114111765476016353 0ustar liggesuserslibrary("matrixStats") allocVector_R <- function(length, value = NA) { x <- vector(mode = typeof(value), length = length) if (!is.finite(value) || value != 0) x[] <- value x } values <- list( -1L, 0L, +1L, NA_integer_, .Machine$integer.max, -1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps, FALSE, TRUE, NA ) n <- 10L for (value in values) { x0 <- allocVector_R(n, value = value) x <- allocVector(n, value = value) if (!identical(x, x0)) { str(list(n = n, value = value, x = x, x0 = x0)) stopifnot(identical(x, x0)) } } matrixStats/tests/rowOrderStats.R0000644000176200001440000000444014111771650016711 0ustar liggesuserslibrary("matrixStats") library("stats") asWhich <- function(probs, max) { idx <- as.integer(round(probs * max)) if (idx < 1L) { idx <- 1L } else if (idx > max) { idx <- max } idx } # asWhich() rowOrderStats_R <- function(x, probs, ..., useNames = NA) { ans <- apply(x, MARGIN = 1L, FUN = quantile, probs = probs, type = 3L) # Remove Attributes if (is.na(useNames) || !useNames || length(ans) == 0L) attributes(ans) <- NULL ans } # rowOrderStats_R() set.seed(1) # Simulate data in a matrix of any shape nrow <- 60L ncol <- 30L x <- rnorm(nrow * ncol) dim(x) <- c(nrow, ncol) probs <- 0.3 which <- asWhich(probs, max = ncol) y0 <- rowOrderStats_R(x, probs = probs) y1 <- rowOrderStats(x, which = which) stopifnot(all.equal(y1, y0)) y2 <- colOrderStats(t(x), which = which) stopifnot(all.equal(y2, y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("Consistency checks without NAs:\n") for (kk in 1:3) { cat("Random test #", kk, "\n", sep = "") # Simulate data in a matrix of any shape nrow <- sample(20L, size = 1L) ncol <- sample(20L, size = 1L) x <- rnorm(nrow * ncol) dim(x) <- c(nrow, ncol) cat("mode: ", mode, "\n", sep = "") storage.mode(x) <- mode str(x) probs <- runif(1) which <- asWhich(probs, max = ncol) y0 <- rowOrderStats_R(x, probs = probs) y1 <- rowOrderStats(x, which = which) stopifnot(all.equal(y1, y0)) y2 <- colOrderStats(t(x), which = which) stopifnot(all.equal(y2, y0)) } # for (kk in ...) } # for (mode ...) # Check names attribute x <- matrix(1:9 + 0.1, nrow = 3L, ncol = 3L) probs <- runif(1) which <- asWhich(probs, max = ncol(x)) dimnames <- list(letters[1:3], LETTERS[1:3]) # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL # Check names attribute for (useNames in c(NA, TRUE, FALSE)) { y0 <- rowOrderStats_R(x, probs = probs, useNames = useNames) y1 <- rowOrderStats(x, which = which, useNames = useNames) stopifnot(all.equal(y1, y0)) y2 <- colOrderStats(t(x), which = which, useNames = useNames) stopifnot(all.equal(y2, y0)) } } matrixStats/tests/x_OP_y_subset.R0000644000176200001440000000474314063411362016654 0ustar liggesuserslibrary("matrixStats") x_OP_y_R <- function(x, y, OP, na.rm = FALSE) { if (na.rm) { xnok <- is.na(x) ynok <- is.na(y) anok <- xnok & ynok unit <- switch(OP, "+" = 0, "-" = NA_real_, "*" = 1, "/" = NA_real_, stop("Unknown 'OP' operator: ", OP) ) x[xnok] <- unit y[ynok] <- unit } ans <- switch(OP, "+" = x + y, "-" = x - y, "*" = x * y, "/" = x / y, stop("Unknown 'OP' operator: ", OP) ) if (na.rm) { ans[anok] <- NA_real_ } ans } # x_OP_y_R() t_tx_OP_y_R <- function(x, y, OP, na.rm = FALSE) { t(x_OP_y_R(x = t(x), y = y, OP = OP, na.rm = na.rm)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") for (OP in c("+", "-", "*", "/")) { for (mode in c("numeric", "integer", "logical")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6L, ncol = 6L) y <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(y) <- mode if (mode == "numeric") y[1] <- Inf for (xrows in index_cases) { for (xcols in index_cases) { if (is.null(xrows)) xrows <- seq_len(nrow(x)) if (is.null(xcols)) xcols <- seq_len(ncol(x)) for (yidxs in list(xrows, xcols)) { for (na.rm in c(TRUE, FALSE)) { suppressWarnings({ actual <- tryCatch( x_OP_y(x, y, OP, xrows = xrows, xcols = xcols, yidxs = yidxs, na.rm = na.rm), error = function(c) "error" ) expect <- tryCatch( x_OP_y_R(x[xrows, xcols, drop = FALSE], y[yidxs], OP, na.rm = na.rm), error = function(c) "error" ) }) stopifnot(all.equal(as.vector(actual), as.vector(expect))) suppressWarnings({ actual <- tryCatch( t_tx_OP_y(x, y, OP, xrows = xrows, xcols = xcols, yidxs = yidxs, na.rm = na.rm), error = function(c) "error" ) expect <- tryCatch( t_tx_OP_y_R(x[xrows, xcols, drop = FALSE], y[yidxs], OP, na.rm = na.rm), error = function(c) "error" ) }) stopifnot(all.equal(as.vector(actual), as.vector(expect))) } } } } } } matrixStats/tests/allocArray.R0000644000176200001440000000110214111765363016154 0ustar liggesuserslibrary("matrixStats") allocArray_R <- function(nrow, ncol, value = NA) { array(data = value, dim = dim) } values <- list( -1L, 0L, +1L, NA_integer_, .Machine$integer.max, -1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps, FALSE, TRUE, NA ) dim <- c(2L, 4L, 3L) for (value in values) { x0 <- allocArray_R(dim, value = value) x <- allocArray(dim, value = value) if (!identical(x, x0)) { str(list(dim = dim, value = value, x = x, x0 = x0)) stopifnot(identical(x, x0)) } } matrixStats/tests/binCounts.R0000644000176200001440000000512014111766007016027 0ustar liggesuserslibrary("matrixStats") library("stats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - binCounts_hist <- function(x, bx, right = FALSE, ...) { n0 <- graphics::hist(x, breaks = bx, right = right, include.lowest = TRUE, plot = FALSE)$counts } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Non-sorted and sorted positions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nx <- 1000L # Number of data points nb <- 200L # Number of bins # Uniformely distributed bins bx <- seq(from = 0, to = 1, length.out = nb + 1L) bx <- c(-1, bx, 2) # Sample data points set.seed(0xBEEF) x <- runif(nx, min = 0, max = 1) for (kk in 1:2) { n0 <- binCounts_hist(x, bx = bx) n1 <- binCounts(x, bx = bx) # Sanity check stopifnot(identical(n1, n0)) # Check reversed n1r <- rev(binCounts(-x, bx = rev(-bx), right = TRUE)) stopifnot(identical(n1r, n1)) # Retry with a sorted vector x <- sort(x) } # for (kk in 1:2) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:40 x[20] <- NA_integer_ nx <- length(x) # Bins bx <- c(0.5, 50.5, 100.5, 150.5, 200.5) y_smooth0 <- binCounts_hist(x, bx = bx) y_smooth <- binCounts(x, bx = bx) # Sanity check stopifnot(all.equal(y_smooth, y_smooth0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Border cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:10 bx <- min(x) - c(10, 1) y_smooth <- binCounts(x, bx = bx) stopifnot(all.equal(y_smooth, 0L)) bx <- range(x) y_smooth <- binCounts(x, bx = bx) stopifnot(all.equal(y_smooth, length(x) - 1L)) bx <- max(x) + c(1, 10) y_smooth <- binCounts(x, bx = bx) stopifnot(all.equal(y_smooth, 0L)) # Every second empty x <- 1:10 bx <- rep(x, each = 2L) y_smooth <- binCounts(x, bx = bx) stopifnot(all.equal(y_smooth, rep(c(0L, 1L), length.out = length(bx) - 1L))) ## NOTE: binCounts_hist() does not give the same last bin count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Exception handling # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Zero bin bounderies (invalid bin definition) bx <- double(0L) res <- try(y_smooth <- binCounts(1:10, bx = bx), silent = TRUE) stopifnot(inherits(res, "try-error")) # One bin boundery (invalid bin definition) bx <- double(1L) res <- try(y_smooth <- binCounts(1:10, bx = bx), silent = TRUE) stopifnot(inherits(res, "try-error")) matrixStats/tests/rowWeightedMeans_subset.R0000644000176200001440000000465214105674332020737 0ustar liggesuserslibrary("matrixStats") ## Create isFALSE() if running on an old version of R if (!exists("isFALSE", mode="function")) { isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x } rowWeightedMeans_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = weighted.mean, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as rowWeightedMeans() idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (na.rm) na.rm <- anyMissing(x) if ((!is.null(w) && nw == 0L) || isFALSE(na.rm)) { if (is.na(useNames) || !useNames) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } colWeightedMeans_R <- function(x, w, na.rm = FALSE, ..., useNames = NA) { res <- apply(x, MARGIN = 1L, FUN = weighted.mean, w = w, na.rm = na.rm, ...) # Keep naming support consistency same as colWeightedMeans() idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (!is.null(w) && nw == 0L) { if (is.na(useNames) || !useNames) names(res) <- NULL } else if (isFALSE(useNames)) names(res) <- NULL res } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") # To check names attribute dimnames <- list(letters[1:6], LETTERS[1:6]) for (mode in c("numeric", "integer", "logical")) { x <- matrix(runif(6 * 6, min = -6, max = 6), nrow = 6, ncol = 6) w <- runif(6, min = 0, max = 6) storage.mode(x) <- mode storage.mode(w) <- if (mode == "logical") "integer" else mode if (mode == "numeric") w[1] <- Inf # Test with and without dimnames on x for (setDimnames in c(TRUE, FALSE)) { if (setDimnames) dimnames(x) <- dimnames else dimnames(x) <- NULL for (rows in index_cases) { for (cols in index_cases) { for (na.rm in c(TRUE, FALSE)) { for (useNames in c(NA, TRUE, FALSE)) { validateIndicesTestMatrix_w(x, w, rows, cols, ftest = rowWeightedMeans, fsure = rowWeightedMeans_R, na.rm = na.rm, useNames = useNames) validateIndicesTestMatrix_w(x, w, rows, cols, fcoltest = colWeightedMeans, fsure = colWeightedMeans_R, na.rm = na.rm, useNames = useNames) } } } } } } matrixStats/tests/mean2_subset.R0000644000176200001440000000106614063411362016454 0ustar liggesuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subsetted tests # - - - - - - - - - - - - - - - - - - - - - - - - - - - - source("utils/validateIndicesFramework.R") x <- runif(6, min = -6, max = 6) storage.mode(x) <- "integer" for (idxs in index_cases) { validateIndicesTestVector(x, idxs, ftest = mean2, fsure = mean, na.rm = FALSE) validateIndicesTestVector(x, idxs, ftest = mean2, fsure = mean, na.rm = TRUE) } matrixStats/tests/product.R0000644000176200001440000000220214063411362015536 0ustar liggesuserslibrary("matrixStats") for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep = "") # Empty vector x <- 0 storage.mode(x) <- mode y <- prod(x, na.rm = TRUE) print(y) z <- product(x, na.rm = TRUE) print(z) stopifnot(all.equal(z, y)) # Test negative values x <- c(1, -4, 2) storage.mode(x) <- mode y <- prod(x, na.rm = TRUE) print(y) z <- product(x, na.rm = TRUE) print(z) stopifnot(all.equal(z, y)) # Test missing values x <- c(1, NA, NaN, 2) storage.mode(x) <- mode y <- prod(x, na.rm = TRUE) print(y) z <- product(x, na.rm = TRUE) print(z) stopifnot(all.equal(z, y)) x <- c(1, NA, NaN, 2) storage.mode(x) <- mode y <- prod(x, na.rm = FALSE) print(y) z <- product(x, na.rm = FALSE) print(z) stopifnot(all(is.na(z), is.na(y))) x <- c(1, NaN, 2) storage.mode(x) <- mode y <- prod(x, na.rm = FALSE) print(y) stopifnot(is.na(y)) z <- product(x, na.rm = FALSE) print(z) stopifnot(is.na(z)) } # for (mode ...) # NAs following 0s x <- c(0L, NA_integer_) y <- prod(x, na.rm = FALSE) print(y) z <- product(x, na.rm = FALSE) print(z) stopifnot(identical(z, y)) matrixStats/src/0000755000176200001440000000000014120430675013366 5ustar liggesusersmatrixStats/src/rowSums2_lowlevel_template.h0000644000176200001440000000462714111740760021114 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowSums2_(ARGUMENTS_LIST) Copyright: Henrik Bengtsson, 2017 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowSums2, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int narm, int hasna, int byrow, double *ans) { R_xlen_t ii, jj, idx; R_xlen_t *colOffset; X_C_TYPE value; /* Use long double (if available) for higher precision */ /* NOTE: SIMD does not long doubles - in case we ever go there */ LDOUBLE sum; /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; /* Pre-calculate the column offsets */ if (cols == NULL) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); if (byrow) { for (jj=0; jj < ncols; jj++) colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow); } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj]; } } for (ii=0; ii < nrows; ii++) { R_xlen_t rowIdx; if (rows == NULL) { rowIdx = byrow ? ii : R_INDEX_OP(ii, *, ncol); } else { rowIdx = byrow ? rows[ii] : R_INDEX_OP(rows[ii], *, ncol); } sum = 0.0; for (jj=0; jj < ncols; jj++) { if (colOffset == NULL) { if (byrow) idx = R_INDEX_OP(rowIdx, +, jj*nrow); else idx = R_INDEX_OP(rowIdx, +, jj); } else { idx = R_INDEX_OP(rowIdx, +, colOffset[jj]); } value = R_INDEX_GET(x, idx, X_NA); #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm) { sum += (LDOUBLE)value; if (jj % 1048576 == 0 && ISNA(sum)) break; } else if (!ISNAN(value)) { sum += (LDOUBLE)value; } #endif } /* for (jj ...) */ if (sum > DOUBLE_XMAX) { ans[ii] = R_PosInf; } else if (sum < -DOUBLE_XMAX) { ans[ii] = R_NegInf; } else { ans[ii] = (double)sum; } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } matrixStats/src/signTabulate.c0000644000176200001440000000232114111740760016151 0ustar liggesusers/*************************************************************************** Public methods: SEXP signTabulate(SEXP x, SEXP idxs) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "signTabulate_lowlevel.h" SEXP signTabulate(SEXP x, SEXP idxs) { SEXP ans = NILSXP; R_xlen_t nx; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'idxs': */ R_xlen_t nidxs; R_xlen_t *cidxs = validateIndices(idxs, nx, 1, &nidxs); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, 6)); signTabulate_dbl(REAL(x), nx, cidxs, nidxs, REAL(ans)); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(REALSXP, 4)); signTabulate_int(INTEGER(x), nx, cidxs, nidxs, REAL(ans)); UNPROTECT(1); } return(ans); } // signTabulate() /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/rowMads_lowlevel.h0000644000176200001440000000131314111740760017061 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowMads_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, double scale, int narm, int hasna, int byrow, double *ans) void rowMads_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, double scale, int narm, int hasna, int byrow, double *ans) */ #define X_TYPE 'i' #include "rowMads_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowMads_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/diff2.c0000644000176200001440000000340014111740760014520 0ustar liggesusers/*************************************************************************** Public methods: SEXP diff2(SEXP x, SEXP idxs, SEXP lag, SEXP differences) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include #include "000.types.h" #include "diff2_lowlevel.h" SEXP diff2(SEXP x, SEXP idxs, SEXP lag, SEXP differences) { SEXP ans = NILSXP; R_xlen_t nx, nans, lagg, diff; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'lag': */ lagg = asInteger(lag); if (lagg < 1) { error("Argument 'lag' must be a positive integer."); } /* Argument 'differences': */ diff = asInteger(differences); if (diff < 1) { error("Argument 'differences' must be a positive integer."); } /* Argument 'idxs': */ R_xlen_t nidxs; R_xlen_t *cidxs = validateIndices(idxs, nx, 1, &nidxs); /* Length of result vector */ nans = (R_xlen_t)((double)nidxs - ((double)diff*(double)lagg)); if (nans < 0) nans = 0; /* Dispatch to low-level C function */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, nans)); diff2_dbl(REAL(x), nx, cidxs, nidxs, lagg, diff, REAL(ans), nans); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(INTSXP, nans)); diff2_int(INTEGER(x), nx, cidxs, nidxs, lagg, diff, INTEGER(ans), nans); UNPROTECT(1); } else { error("Argument 'x' must be numeric."); } return ans; } // diff2() /*************************************************************************** HISTORY: 2015-06-14 [DJ] o Supported subsetted computation. 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/validateIndices.c0000644000176200001440000001514614111740760016630 0ustar liggesusers/*************************************************************************** Public methods: SEXP validate(SEXP idxs, SEXP maxIdx, SEXP allowOutOfBound) **************************************************************************/ #include #include "validateIndices_lowlevel.h" /** idxs must not be NULL, which should be checked before calling this function. **/ R_xlen_t* validateIndices_lgl(int *idxs, R_xlen_t nidxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs, int *hasna) { R_xlen_t ii, jj, kk; R_xlen_t count1 = 0, count2 = 0; // set default as no NA. *hasna = FALSE; if (nidxs == 0) { *ansNidxs = 0; return NULL; } if (nidxs > maxIdx) { if (!allowOutOfBound) { error("logical subscript too long"); } *hasna = TRUE; // out-of-bound index is NA // count how many idx items for (ii = 0; ii < maxIdx; ++ ii) { if (idxs[ii]) { // TRUE or NA ++ count1; } } for (; ii < nidxs; ++ ii) { if (idxs[ii]) { // TRUE or NA ++ count2; } } *ansNidxs = count1 + count2; R_xlen_t *ans = (R_xlen_t *) R_alloc(*ansNidxs, sizeof(R_xlen_t)); FILL_VALIDATED_ANS(maxIdx, idxs[ii], idxs[ii] == NA_LOGICAL ? NA_R_XLEN_T : ii); for (ii = count1; ii < *ansNidxs; ++ ii) { ans[ii] = NA_R_XLEN_T; } return ans; } // nidxs <= maxIdx R_xlen_t naCount = 0; R_xlen_t lastPartNum = maxIdx % nidxs; for (ii = 0; ii < lastPartNum; ++ ii) { if (idxs[ii]) { // TRUE or NA if (idxs[ii] == NA_LOGICAL) ++ naCount; ++ count1; } } for (; ii < nidxs; ++ ii) { if (idxs[ii]) { // TRUE or NA if (idxs[ii] == NA_LOGICAL) ++ naCount; ++ count2; } } R_xlen_t count = count1 + count2; if (naCount == 0 && count == nidxs) { // All True *ansNidxs = maxIdx; return NULL; } if (naCount) *hasna = TRUE; *ansNidxs = maxIdx / nidxs * count + count1; R_xlen_t *ans = (R_xlen_t *) R_alloc(*ansNidxs, sizeof(R_xlen_t)); FILL_VALIDATED_ANS(nidxs, idxs[ii], idxs[ii] == NA_LOGICAL ? NA_R_XLEN_T : ii); for (ii = count, kk = nidxs; kk+nidxs <= maxIdx; kk += nidxs, ii += count) { for (jj = 0; jj < count; ++ jj) { ans[ii+jj] = ans[jj] == NA_R_XLEN_T ? NA_R_XLEN_T : ans[jj] + kk; } } for (jj = 0; jj < count1; ++ jj) { ans[ii+jj] = ans[jj] == NA_R_XLEN_T ? NA_R_XLEN_T : ans[jj] + kk; } return ans; } /************************************************************* * The most important function which is widely called. * If `idxs` is NULL, NULL will be returned, which indicates selecting. * the whole to-be-computed vector(matrix). * `maxIdx` is the to-be-computed vector(matrix)'s length (rows/cols). * `allowOutOfBound` indicates whether to allow positve out of bound indexing. * `ansNidxs` is used for returning the new idxs array's length. * `subsettedType` is used for returning the new idxs array's datatype. * `hasna` is TRUE, if NA is included in returned result. ************************************************************/ R_xlen_t *validateIndices(SEXP idxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs) { int hasna; return validateIndicesCheckNA(idxs, maxIdx, allowOutOfBound, ansNidxs, &hasna); } R_xlen_t *validateIndicesCheckNA(SEXP idxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs, int *hasna) { R_xlen_t nidxs = xlength(idxs); int mode = TYPEOF(idxs); // Set no NA as default. *hasna = FALSE; switch (mode) { case INTSXP: return validateIndices_int(INTEGER(idxs), nidxs, maxIdx, allowOutOfBound, ansNidxs, hasna); case REALSXP: return validateIndices_dbl(REAL(idxs), nidxs, maxIdx, allowOutOfBound, ansNidxs, hasna); case LGLSXP: return validateIndices_lgl(LOGICAL(idxs), nidxs, maxIdx, allowOutOfBound, ansNidxs, hasna); case NILSXP: *ansNidxs = maxIdx; return NULL; default: error("idxs can only be integer, numeric, or logical."); } return NULL; // useless sentence. won't be executed. } /************************************************************* * This function can be called by R. * If `idxs` is NULL, NULL will be returned, which indicates selecting. * the whole to-be-computed vector(matrix). * `maxIdx` is the to-be-computed vector(matrix)'s length (rows/cols). * `allowOutOfBound` indicates whether to allow positve out of bound indexing. ************************************************************/ SEXP validate(SEXP idxs, SEXP maxIdx, SEXP allowOutOfBound) { SEXP ans; R_xlen_t ansNidxs; Rboolean need_double = FALSE; R_xlen_t cmaxIdx = asR_xlen_t(maxIdx, 0); R_xlen_t nidxs = xlength(idxs); int callowOutOfBound = asLogicalNoNA(allowOutOfBound, "allowOutOfBound"); R_xlen_t *cidxs; // Set no NA as default. int hasna = FALSE; int mode = TYPEOF(idxs); switch (mode) { case INTSXP: cidxs = validateIndices_int(INTEGER(idxs), nidxs, cmaxIdx, callowOutOfBound, &ansNidxs, &hasna); break; case REALSXP: cidxs = validateIndices_dbl(REAL(idxs), nidxs, cmaxIdx, callowOutOfBound, &ansNidxs, &hasna); break; case LGLSXP: cidxs = validateIndices_lgl(LOGICAL(idxs), nidxs, cmaxIdx, callowOutOfBound, &ansNidxs, &hasna); break; case NILSXP: return R_NilValue; default: error("idxs can only be integer, numeric, or logical."); } if (cidxs != NULL) { for (R_xlen_t i = 0; i < ansNidxs; i++) { if (cidxs[i] > INT_MAX){ need_double = TRUE; break; } } } else { if (Rf_length(idxs) > 0) { // "Pick all indices" case, if all(idxs > 0) switch (mode) { case INTSXP: if (INTEGER(idxs)[0] > 0) return R_NilValue; break; case REALSXP: if (REAL(idxs)[0] > 0) return R_NilValue; break; case LGLSXP: if (LOGICAL(idxs)[0] == TRUE) return R_NilValue; break; } // else, "Pick an empty subset of indices" } } if (!need_double) { ans = PROTECT(allocVector(INTSXP, ansNidxs)); // Copy from cidxs to ans and coerce to int if (cidxs && ansNidxs > 0) { int *ans_ptr = INTEGER(ans); for (R_xlen_t i = 0; i < ansNidxs; i++) { ans_ptr[i] = cidxs[i] == NA_R_XLEN_T ? NA_INTEGER : (int)cidxs[i] + 1; } } UNPROTECT(1); return ans; } else { ans = PROTECT(allocVector(REALSXP, ansNidxs)); // Copy from cidxs to ans and coerce to double if (cidxs && ansNidxs > 0) { double *ans_ptr = REAL(ans); for (R_xlen_t i = 0; i < ansNidxs; i++){ ans_ptr[i] = cidxs[i] == NA_R_XLEN_T ? NA_REAL : (double)cidxs[i] + 1; } } UNPROTECT(1); return ans; } } matrixStats/src/rowMeans2.c0000644000176200001440000000452214111740760015411 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowMeans2(SEXP x, SEXP naRm, SEXP hasNA) SEXP colMeans2(SEXP x, SEXP naRm, SEXP hasNA) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2017 **************************************************************************/ #include #include "000.types.h" #include "rowMeans2_lowlevel.h" #include "naming.h" SEXP rowMeans2(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) { int narm, hasna, byrow, usenames; SEXP ans; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL | R_TYPE_LGL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); if (!byrow) { SWAP(R_xlen_t, nrow, ncol); SWAP(R_xlen_t*, crows, ccols); SWAP(R_xlen_t, nrows, ncols); } /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowMeans2_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, narm, hasna, byrow, REAL(ans)); } else if (isInteger(x) || isLogical(x)) { rowMeans2_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, narm, hasna, byrow, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(2); return(ans); } matrixStats/src/productExpSumLog_lowlevel_template.h0000644000176200001440000000536614111740760022640 0ustar liggesusers/*********************************************************************** TEMPLATE: double productExpSumLog_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" double CONCAT_MACROS(productExpSumLog, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna) { LDOUBLE y = 0.0, t; R_xlen_t ii; int isneg = 0; int hasZero = 0; /* Calculate sum(log(abs(x))) */ for (ii = 0 ; ii < nidxs; ii++) { t = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA); /* Missing values? */ if (narm) { if (X_ISNAN(t)) continue; } #if X_TYPE == 'i' /* Early stopping? */ if (X_ISNAN(t)) { y = NA_REAL; break; } else if (t < 0) { isneg = !isneg; t = -t; } else if (t == 0) { hasZero = 1; /* Early stopping? */ if (narm) break; } #elif X_TYPE == 'r' if (t < 0) { isneg = !isneg; t = -t; } #endif t = log(t); y += t; /* Rprintf("#%d: x=%g, is.nan(x)=%d, abs(x)=%g, is.nan(abs(x))=%d, log(abs(x))=%g, is.nan(log(abs(x)))=%d, sum=%g, is.nan(sum)=%d\n", ii, x[ii], R_IsNaN(x[ii]), X_ABS(x[ii]), R_IsNaN(abs(x[ii])), t, R_IsNaN(y), y, R_IsNaN(y)); */ #if X_TYPE == 'r' /* Early stopping? Special for long LDOUBLE vectors */ if (ii % 1048576 == 0 && ISNAN(y)) break; #endif } if (ISNAN(y)) { /* If there where NA and/or NaN elements, then 'y' will at this point be NaN. The information on an NA value is lost when calculating fabs(NA), which returns NaN. For consistency with integers, we return NA in all cases. */ y = NA_REAL; } else if (hasZero) { /* no NA in 'x' and 'x' contains zero */ y = 0; } else { y = exp(y); /* Update sign */ if (isneg) { y = -y; } /* 2flow or underflow? */ if (y > DOUBLE_XMAX) { y = R_PosInf; } else if (y < -DOUBLE_XMAX) { y = R_NegInf; } } return (double)y; } /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/rowCummaxs_lowlevel.h0000644000176200001440000000131214111740760017611 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowCummaxs_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int byrow, int *ans) void rowCummaxs_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int byrow, int *ans) */ #define METHOD rowCummaxs #define COMP '>' #define X_TYPE 'i' #include "rowCumMinMaxs_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowCumMinMaxs_lowlevel_template.h" #include "000.templates-types_undef.h" #undef COMP matrixStats/src/diff2_lowlevel_template.h0000644000176200001440000000552114111740760020337 0ustar liggesusers/*********************************************************************** TEMPLATE: void diff2_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include #undef X_DIFF #if X_TYPE == 'i' #ifndef diff_int static R_INLINE int diff_int(int a, int b) { if (X_ISNA(a) || X_ISNA(b)) return(NA_INTEGER); return a-b; } #define diff_int diff_int #endif #define X_DIFF diff_int #elif X_TYPE == 'r' #define X_DIFF(a,b) a-b #endif void CONCAT_MACROS(diff2, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nans) { R_xlen_t ii, tt, uu; X_C_TYPE xvalue1, xvalue2; X_C_TYPE *tmp = NULL; /* Nothing to do? */ if (nans <= 0) return; /* Special case (difference == 1) */ if (differences == 1) { for (ii=0; ii < nans; ii++) { xvalue1 = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA); xvalue2 = R_INDEX_GET(x, ((idxs == NULL) ? (ii+lag) : idxs[ii+lag]), X_NA); ans[ii] = X_DIFF(xvalue2, xvalue1); } } else { /* Allocate temporary work vector (to hold intermediate differences) */ tmp = Calloc(nidxs - lag, X_C_TYPE); /* (a) First order of differences */ for (ii=0; ii < nidxs-lag; ii++) { xvalue1 = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA); xvalue2 = R_INDEX_GET(x, ((idxs == NULL) ? (ii+lag) : idxs[ii+lag]), X_NA); tmp[ii] = X_DIFF(xvalue2, xvalue1); } nidxs -= lag; /* (b) All other orders of differences but the last */ while (--differences > 1) { uu = lag; tt = 0; for (ii=0; ii < nidxs-lag; ii++) { tmp[ii] = X_DIFF(tmp[uu++], tmp[tt++]); } nidxs -= lag; } /* Sanity check */ /* if (nidxs-lag != nans) error("nidxs != nans: %d != %d\n", nidxs, nans); */ /* (c) Last order of differences */ uu = lag; tt = 0; for (ii=0; ii < nans; ii++) { ans[ii] = X_DIFF(tmp[uu++], tmp[tt++]); } /* Deallocate temorary work vector */ Free(tmp); } /* if (differences ...) */ } /*************************************************************************** HISTORY: 2015-06-14 [DJ] o Supported subsetted computation. 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/sum2.c0000644000176200001440000000435414111740760014425 0ustar liggesusers/*************************************************************************** Public methods: SEXP sum2(SEXP x, SEXP idxs, SEXP naRm, SEXP mode) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include #include "000.types.h" #include "sum2_lowlevel.h" SEXP sum2(SEXP x, SEXP idxs, SEXP naRm, SEXP mode) { SEXP ans = NILSXP; R_xlen_t nx; int narm, mode2; double sum = NA_REAL; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL | R_TYPE_LGL), "x"); nx = xlength(x); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'mode': */ if (!isInteger(mode)) { error("Argument 'mode' must be a single integer."); } mode2 = asInteger(mode); /* Argument 'idxs': */ R_xlen_t nidxs; R_xlen_t *cidxs = validateIndices(idxs, nx, 1, &nidxs); /* Dispatch to low-level C function */ if (isReal(x)) { sum = sum2_dbl(REAL(x), nx, cidxs, nidxs, narm); } else if (isInteger(x) || isLogical(x)) { sum = sum2_int(INTEGER(x), nx, cidxs, nidxs, narm); } /* Return results */ switch (mode2) { case 1: /* integer */ PROTECT(ans = allocVector(INTSXP, 1)); if (ISNAN(sum)) { INTEGER(ans)[0] = NA_INTEGER; } else if (sum > R_INT_MAX || sum < R_INT_MIN) { Rf_warning("Integer overflow. Use sum2(..., mode = \"double\") to avoid this."); INTEGER(ans)[0] = NA_INTEGER; } else { INTEGER(ans)[0] = (int)sum; } UNPROTECT(1); break; case 2: /* numeric */ PROTECT(ans = allocVector(REALSXP, 1)); if (sum > DOUBLE_XMAX) { REAL(ans)[0] = R_PosInf; } else if (sum < -DOUBLE_XMAX) { REAL(ans)[0] = R_NegInf; } else { REAL(ans)[0] = sum; } UNPROTECT(1); break; default: /* To please compiler */ ans = NILSXP; break; } return(ans); } // sum2() /*************************************************************************** HISTORY: 2015-07-11 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o Moved validation of arguments and construction of return object to this function. 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/x_OP_y.c0000644000176200001440000001326514111740760014735 0ustar liggesusers#include #include "000.types.h" #include "x_OP_y_lowlevel.h" SEXP x_OP_y(SEXP x, SEXP y, SEXP dim, SEXP operator, SEXP xrows, SEXP xcols, SEXP yidxs, SEXP commute, SEXP naRm, SEXP hasNA, SEXP byRow) { SEXP ans = NILSXP; int narm, hasna, byrow, commute2; int op; R_xlen_t nrow, ncol, ny; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'y': */ assertArgVector(y, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "y"); ny = xlength(y); /* Argument 'byRow': */ byrow = asLogicalNoNA(byRow, "byrow"); /* Argument 'commute2': */ commute2 = asLogicalNoNA(commute, "commute"); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'xrows', 'xcols' and 'yidxs': */ R_xlen_t nxrows, nxcols, nyidxs; R_xlen_t *cxrows = validateIndices(xrows, nrow, 0, &nxrows); R_xlen_t *cxcols = validateIndices(xcols, ncol, 0, &nxcols); R_xlen_t *cyidxs = validateIndices(yidxs, ny, 1, &nyidxs); /* Argument 'operator': */ op = asInteger(operator); if (op == 1) { /* Addition */ if (isReal(x) || isReal(y)) { PROTECT(ans = allocMatrix(REALSXP, nxrows, nxcols)); if (isReal(x) && isReal(y)) { x_OP_y_Add_dbl_dbl( REAL(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && (isInteger(y) || isLogical(y))) { x_OP_y_Add_dbl_int( REAL(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if ((isInteger(x) || isLogical(x)) && isReal(y)) { x_OP_y_Add_int_dbl( INTEGER(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } else { PROTECT(ans = allocMatrix(INTSXP, nxrows, nxcols)); x_OP_y_Add_int_int( INTEGER(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans)); UNPROTECT(1); } } else if (op == 2) { /* Subtraction */ if (isReal(x) || isReal(y)) { PROTECT(ans = allocMatrix(REALSXP, nxrows, nxcols)); if (isReal(x) && isReal(y)) { x_OP_y_Sub_dbl_dbl( REAL(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && (isInteger(y) || isLogical(y))) { x_OP_y_Sub_dbl_int( REAL(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if ((isInteger(x) || isLogical(x)) && isReal(y)) { x_OP_y_Sub_int_dbl( INTEGER(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } else { PROTECT(ans = allocMatrix(INTSXP, nxrows, nxcols)); x_OP_y_Sub_int_int( INTEGER(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans)); UNPROTECT(1); } } else if (op == 3) { /* Multiplication */ if (isReal(x) || isReal(y)) { PROTECT(ans = allocMatrix(REALSXP, nxrows, nxcols)); if (isReal(x) && isReal(y)) { x_OP_y_Mul_dbl_dbl( REAL(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && (isInteger(y) || isLogical(y))) { x_OP_y_Mul_dbl_int( REAL(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if ((isInteger(x) || isLogical(x)) && isReal(y)) { x_OP_y_Mul_int_dbl( INTEGER(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } else { PROTECT(ans = allocMatrix(INTSXP, nxrows, nxcols)); x_OP_y_Mul_int_int( INTEGER(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans)); UNPROTECT(1); } } else if (op == 4) { /* Division */ PROTECT(ans = allocMatrix(REALSXP, nxrows, nxcols)); if (isReal(x) && isReal(y)) { x_OP_y_Div_dbl_dbl( REAL(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && (isInteger(y) || isLogical(y))) { x_OP_y_Div_dbl_int( REAL(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if ((isInteger(x) || isLogical(x)) && isReal(y)) { x_OP_y_Div_int_dbl( INTEGER(x), nrow, ncol, REAL(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if ((isInteger(x) || isLogical(x)) && (isInteger(y) || isLogical(y))) { x_OP_y_Div_int_int( INTEGER(x), nrow, ncol, INTEGER(y), ny, cxrows, nxrows, cxcols, nxcols, cyidxs, nyidxs, byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } else { error("Unknown value on argument 'OP': %dL", op); } return(ans); } /* x_OP_y() */ matrixStats/src/logSumExp_lowlevel_template.h0000644000176200001440000001221314111740760021264 0ustar liggesusers/*********************************************************************** TEMPLATE: double logSumExp_double(ARGUMENTS_LIST) ARGUMENTS_LIST: double *x, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna, int by, double *xx ***********************************************************************/ #include #include #include "000.types.h" #include "000.templates-types.h" /* logSumExp_double(x, by=0, xx=NULL): 1. Scans for the maximum value of x=(x[0], x[1], ..., x[n-1]) 2. Computes result from 'x'. NOTE: The above sweeps the "contiguous" 'x' vector twice. --- logSumExp_double(x, by=by, xx=xx): 1. Scans for the maximum value of x=(x[0], x[by], ..., x[(n-1)*by]) and copies the values to xx = (xx[0], xx[1], xx[2], ..., xx[n-1]), which *must* be preallocated. 2. Computes result from 'xx'. NOTE: The above sweeps the "scattered" 'x' vector only once, and then the "contigous" 'xx' vector once. This is more likely to create cache hits. */ double logSumExp_double(double *x, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna, R_xlen_t by, double *xx) { R_xlen_t ii, iMax, idx; double xii, xMax; LDOUBLE sum; int hasna2 = FALSE; /* Indicates whether NAs where detected or not */ int xMaxIsNA; /* Quick return? */ if (nidxs == 0) { return(R_NegInf); } /* Find the maximum value */ iMax = 0; if (by) { idx = R_INDEX_OP(((idxs == NULL) ? (0) : idxs[0]), *, by); xMax = R_INDEX_GET(x, idx, NA_REAL); } else { xMax = R_INDEX_GET(x, ((idxs == NULL) ? (0) : idxs[0]), NA_REAL); } xMaxIsNA = ISNAN(xMax); if (nidxs == 1) { if (narm && xMaxIsNA) { return(R_NegInf); } else { return(xMax); } } if (xMaxIsNA) hasna2 = TRUE; if (by) { /* To increase the chances for cache hits below, which sweeps through the data twice, we copy data into a temporary contigous vector while scanning for the maximum value. */ xx[0] = xMax; for (ii=1; ii < nidxs; ii++) { /* Get the ii:th value */ idx = R_INDEX_OP(((idxs == NULL) ? (ii) : idxs[ii]), *, by); xii = R_INDEX_GET(x, idx, NA_REAL); /* Copy */ xx[ii] = xii; if (hasna && ISNAN(xii)) { if (narm) { hasna2 = TRUE; continue; } else { return(R_NaReal); } } if (xii > xMax || (narm && xMaxIsNA)) { iMax = ii; xMax = xii; xMaxIsNA = ISNAN(xMax); } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } else { for (ii=1; ii < nidxs; ii++) { /* Get the ii:th value */ xii = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), NA_REAL); if (hasna && ISNAN(xii)) { if (narm) { hasna2 = TRUE; continue; } else { return(R_NaReal); } } if (xii > xMax || (narm && xMaxIsNA)) { iMax = ii; xMax = xii; xMaxIsNA = ISNAN(xMax); } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } /* by */ /* Early stopping? */ if (xMaxIsNA) { /* Found only missing values? */ return narm ? R_NegInf : R_NaReal; } else if (xMax == R_PosInf) { /* Found +Inf? */ return(R_PosInf); } else if (xMax == R_NegInf) { /* all values are -Inf */ return(R_NegInf); } /* Sum differences */ sum = 0.0; if (by) { for (ii=0; ii < nidxs; ii++) { if (ii == iMax) { continue; } /* Get the ii:th value */ xii = xx[ii]; if (!hasna2 || !ISNAN(xii)) { sum += exp(xii - xMax); } /* Early LDOUBLE stopping on -Inf/+Inf and user interrupt? */ if (ii % 1048576 == 0) { if (!R_FINITE(sum)) break; R_CheckUserInterrupt(); } } /* for (ii ...) */ } else { for (ii=0; ii < nidxs; ii++) { if (ii == iMax) { continue; } /* Get the ii:th value */ xii = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), NA_REAL); if (!hasna2 || !ISNAN(xii)) { sum += exp(xii - xMax); } /* Early LDOUBLE stopping on -Inf/+Inf and user interrupt? */ if (ii % 1048576 == 0) { if (!R_FINITE(sum)) break; R_CheckUserInterrupt(); } } /* for (ii ...) */ } /* if (by) */ sum = xMax + log1p(sum); return(sum); } /*************************************************************************** HISTORY: 2015-06-11 [DJ] o Supported subsetted computation. 2015-06-10 [DJ] o Merge 'logSumExp_double_by' to 'logSumExp_double' 2015-01-26 [HB] o SPEEDUP: Now step 2 ("summing") only checks where NAs if NAs were detected in step 1 ("max value"), which should be noticibly faster since testing for NA is expensive for double values. o SPEEDUP: Now function returns early after step 1 ("max value") if the maximum value found is +Inf, or if all values where NAs. o BUG FIX: Now logSumExp(, na.rm=TRUE) also returns -Inf. 2013-05-02 [HB] o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the 'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission. 2013-04-30 [HB] o Created. **************************************************************************/ matrixStats/src/colRanges_lowlevel.h0000644000176200001440000000132114111740760017361 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void colRanges_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int what, int narm, int hasna, int *ans, int *is_counted) void colRanges_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int what, int narm, int hasna, int *ans, int *is_counted) */ #define X_TYPE 'i' #include "colRanges_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "colRanges_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowRanksWithTies.c0000644000176200001440000004004614111740760017024 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowRanksWithTies(SEXP x, SEXP rows, SEXP cols, SEXP tiesMethod, SEXP byRow) Authors: Hector Corrada Bravo, Peter Langfelder and Henrik Bengtsson TO DO: Add support for missing values. **************************************************************************/ #include #include "rowRanksWithTies_lowlevel.h" #include "naming.h" /* Peter Langfelder's modifications: * byrow: 0 => rank columns, !0 => rank rows * tiesMethod: 1: maximum, 2: average, 3:minimum * The returned rank is a REAL matrix to accomodate average ranks */ /* Brian Montgomery's modifications: * added tiesMethods first, last, random, and dense * reordered to match base::ranks * tiesMethod: 1: average, 2: first, 3: last, 5: random, 5: maximum, 6:minimum, 7:dense */ // Arrange the elements from i to j of array in random order. // Used in tiesMethod "random". void SHUFFLE_INT(int *array, size_t i, size_t j) { if (j > i) { for (size_t k = i; k < j; k++) { size_t l = k + (size_t) (unif_rand() * (j - k + 1)); SWAP(int, array[l], array[k]); } } } SEXP rowRanksWithTies(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP tiesMethod, SEXP byRow, SEXP useNames) { int tiesmethod, byrow, usenames; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'tiesMethod': */ tiesmethod = asInteger(tiesMethod); if (tiesmethod < 1 || tiesmethod > 7) { error("Argument 'tiesMethod' is out of range [1,7]: %d", tiesmethod); } /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Double matrices are more common to use. */ if (isReal(x)) { if (byrow) { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowRanksWithTies_Average_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, REAL(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_First_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Last_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 4: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); GetRNGstate(); rowRanksWithTies_Random_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); PutRNGstate(); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 5: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Max_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 6: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Min_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 7: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Dense_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; } /* switch */ } else { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); colRanksWithTies_Average_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, REAL(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_First_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Last_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 4: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); GetRNGstate(); colRanksWithTies_Random_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); PutRNGstate(); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 5: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Max_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 6: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Min_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 7: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Dense_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; } /* switch */ } } else if (isInteger(x)) { if (byrow) { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowRanksWithTies_Average_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, REAL(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_First_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Last_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 4: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); GetRNGstate(); rowRanksWithTies_Random_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); PutRNGstate(); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 5: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Max_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 6: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Min_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 7: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowRanksWithTies_Dense_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; } /* switch */ } else { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); colRanksWithTies_Average_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, REAL(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_First_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Last_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 4: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); GetRNGstate(); colRanksWithTies_Random_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); PutRNGstate(); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 5: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Max_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 6: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Min_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; case 7: PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); colRanksWithTies_Dense_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); break; } /* switch */ } } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } // rowRanksWithTies() /*************************************************************************** HISTORY: 2019-4-23 [BKM] o Added more tiesMethods: first, last, random, and dense 2015-06-12 [DJ] o Supported subsetted computation. 2013-01-13 [HB] o Added argument 'tiesMethod' to rowRanks(). **************************************************************************/ matrixStats/src/validateIndices_lowlevel.h0000644000176200001440000000202014111740760020531 0ustar liggesusers#include #include "000.utils.h" #include "000.macros.h" #define RETURN_VALIDATED_ANS(type, n, cond, item, poststmt) \ type *ans = (type*) R_alloc(count, sizeof(type)); \ jj = 0; \ for (ii = 0; ii < n; ++ ii) { \ if (cond) ans[jj ++] = item; \ } \ poststmt \ return ans #define FILL_VALIDATED_ANS(n, cond, item) \ jj = 0; \ for (ii = 0; ii < n; ++ ii) { \ if (cond) ans[jj ++] = item; \ } #define X_TYPE 'i' #define SUBSETTED_DEFAULT SUBSETTED_INTEGER #include "validateIndices_lowlevel_template.h" #include "000.templates-types_undef.h" #undef SUBSETTED_DEFAULT #define X_TYPE 'r' #define SUBSETTED_DEFAULT SUBSETTED_REAL #include "validateIndices_lowlevel_template.h" #include "000.templates-types_undef.h" #undef SUBSETTED_DEFAULT matrixStats/src/rowCummins_lowlevel.h0000644000176200001440000000133214111740760017611 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowCummins_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int byrow, int *ans) void rowCummins_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int byrow, int *ans) */ #define METHOD rowCummins #define COMP '<' #define X_TYPE 'i' #include "rowCumMinMaxs_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowCumMinMaxs_lowlevel_template.h" #include "000.templates-types_undef.h" #undef COMP #undef METHOD matrixStats/src/colCounts_lowlevel.h0000644000176200001440000000174514111740760017427 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void colCounts_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int value, int what, int narm, int hasna, double *ans) void colCounts_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int value, int what, int narm, int hasna, double *ans) void colCounts_lgl(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int value, int what, int narm, int hasna, double *ans) */ #define X_TYPE 'i' #include "colCounts_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "colCounts_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'l' #include "colCounts_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowRanksWithTies_lowlevel_template.h0000644000176200001440000001655314111740760022643 0ustar liggesusers/*********************************************************************** TEMPLATE: Ranks_dbl_ties(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, ANS_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - MARGIN: 'r' (rows) or 'c' (columns). - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' - TIESMETHOD: 'a' (average), 'f' (first), 'l' (last), 'r' (random), '0' (min), '1' (max), 'd' (dense) Authors: Hector Corrada Bravo [HCB] Peter Langfelder [PL] Henrik Bengtsson [HB] Brian Montgomery [BKM] ***********************************************************************/ #include #undef RANK #if TIESMETHOD == 'a' /* average */ #define ANS_TYPE 'r' #define RANK(firstTie, aboveTie) ((double) (firstTie + aboveTie + 1))/2 #elif TIESMETHOD == '0' /* min */ #define ANS_TYPE 'i' #define RANK(firstTie, aboveTie) firstTie + 1 #elif TIESMETHOD == '1' /* max */ #define ANS_TYPE 'i' #define RANK(firstTie, aboveTie) aboveTie #else #define ANS_TYPE 'i' /* dense and other(RANK not used) */ #define RANK(firstTie, aboveTie) firstTie + 1 #endif /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "000.templates-types.h" void SHUFFLE_INT(int *array, size_t i, size_t j); /* prototype for use with "random" */ /* Indexing formula to compute the vector index of element j of vector i. Should take arguments element, vector, nElements, nVectors. */ #undef ANS_INDEX_OF #if MARGIN == 'r' /* rows */ #define ANS_INDEX_OF(element, vector, nRows) \ vector + element*nRows #elif MARGIN == 'c' /* columns */ #define ANS_INDEX_OF(element, vector, nRows) \ element + vector*nRows #else #error "MARGIN can only be 'r' or 'c'" #endif void CONCAT_MACROS(METHOD, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, ANS_C_TYPE *ans) { ANS_C_TYPE rank; X_C_TYPE *values, current, tmp; R_xlen_t *colOffset; R_xlen_t ii, jj, kk, rowIdx; int *I; int lastFinite, firstTie, aboveTie, dense_rank_adj; int nvalues, nVec; #if MARGIN == 'r' nvalues = ncols; nVec = nrows; /* Pre-calculate the column offsets */ colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); if (cols == NULL) { for (jj=0; jj < ncols; jj++) colOffset[jj] = R_INDEX_OP(jj, *, nrow); } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow); } #elif MARGIN == 'c' nvalues = nrows; nVec = ncols; /* Pre-calculate the column offsets */ colOffset = (R_xlen_t *) R_alloc(nrows, sizeof(R_xlen_t)); if (rows == NULL) { for (jj=0; jj < nrows; jj++) colOffset[jj] = jj; } else { for (jj=0; jj < nrows; jj++) colOffset[jj] = rows[jj]; } #endif values = (X_C_TYPE *) R_alloc(nvalues, sizeof(X_C_TYPE)); I = (int *) R_alloc(nvalues, sizeof(int)); for (ii=0; ii < nVec; ii++) { #if MARGIN == 'r' rowIdx = ((rows == NULL) ? (ii) : rows[ii]); #elif MARGIN == 'c' rowIdx = R_INDEX_OP(((cols == NULL) ? (ii) : cols[ii]), *, nrow); #endif lastFinite = nvalues-1; /* Put the NA/NaN elements at the end of the vector and update the index vector appropriately. This may be a bit faster since it only uses one loop over the length of the vector, plus it shortens the sort in case there are missing values. /PL (2012-12-14) */ for (jj = 0; jj <= lastFinite; jj++) { tmp = R_INDEX_GET(x, R_INDEX_OP(rowIdx,+,colOffset[jj]), X_NA); if (X_ISNAN(tmp)) { while (lastFinite > jj && X_ISNAN(R_INDEX_GET(x, R_INDEX_OP(rowIdx,+,colOffset[lastFinite]), X_NA))) { I[lastFinite] = lastFinite; lastFinite--; } I[lastFinite] = jj; I[jj] = lastFinite; values[ jj ] = R_INDEX_GET(x, R_INDEX_OP(rowIdx,+,colOffset[lastFinite]), X_NA); values[ lastFinite ] = tmp; lastFinite--; } else { I[jj] = jj; values[ jj ] = tmp; } } /* for (jj ...) */ // Diagnostic print-outs /* Rprintf("Swapped vector:\n"); for (jj=0; jj < nvalues; jj++) { Rprintf(" %8.4f,", values[jj]); if (((jj+1) % 5==0) || (jj==nvalues-1)) Rprintf("\n"); } Rprintf("Index vector:\n"); for (jj=0; jj 0) X_QSORT_I(values, I, 1, lastFinite + 1); // Calculate the ranks. firstTie = 0; aboveTie = 1; dense_rank_adj = 0; for (jj=0; jj <= lastFinite;) { if (TIESMETHOD == 'd') { dense_rank_adj += (aboveTie - firstTie - 1); firstTie = jj - dense_rank_adj; } else { firstTie = jj; } current = values[jj]; while ((jj <= lastFinite) && (values[jj] == current)) jj++; if (TIESMETHOD == 'd') { aboveTie = jj - dense_rank_adj; } else { aboveTie = jj; } // X_QSORT_I is not stable - ties can be permuted. // This restores the original order. // It might be more efficient to use a stable sort to begin with. if (TIESMETHOD == 'f' || TIESMETHOD == 'l') { R_qsort_int(I, firstTie + 1, aboveTie); /* Function is 1-based */ // SHUFFLE_INT randomizes the order. } else if (TIESMETHOD == 'r') { SHUFFLE_INT(I, firstTie, aboveTie - 1); } else { // Get appropriate rank for average, min, max, or dense rank = RANK(firstTie, aboveTie); } for (kk=firstTie; kk < aboveTie; kk++) { if (TIESMETHOD == 'f' || TIESMETHOD == 'r') { ans[ ANS_INDEX_OF(I[kk], ii, nrows) ] = kk + 1; } else if (TIESMETHOD == 'l') { ans[ ANS_INDEX_OF(I[kk], ii, nrows) ] = aboveTie - (kk - firstTie); } else if (TIESMETHOD == 'd') { ans[ ANS_INDEX_OF(I[kk + dense_rank_adj], ii, nrows) ] = rank; } else { ans[ ANS_INDEX_OF(I[kk], ii, nrows) ] = rank; } } } // At this point jj = lastFinite + 1, no need to re-initialize again. for (; jj < nvalues; jj++) { ans[ ANS_INDEX_OF(I[jj], ii, nrows) ] = ANS_NA; } // Rprintf("\n"); } } /*************************************************************************** HISTORY: 2019-4-23 [BKM] o Added new tiesMethods: first, last, random, and dense. 2015-06-12 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2013-04-23 [HB] o BUG FIX: Ranks did not work for integers with NAs; now using X_ISNAN(). 2013-01-13 [HB] o Template cleanup. Extened tempate to integer matrices. o Added argument 'tiesMethod' to rowRanks(). 2012-12-14 [PL] o Added internal support for "min", "max" and "average" ties. Using template to generate the various versions of the functions. 2013-01-13 [HCB] o Created. Using "max" ties. **************************************************************************/ matrixStats/src/colRanges.c0000644000176200001440000001250214111740760015446 0ustar liggesusers/*************************************************************************** Public methods: SEXP colRanges(SEXP x, ...) Authors: Henrik Bengtsson. Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "colRanges_lowlevel.h" #include "naming.h" SEXP colRanges(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames) { SEXP ans = NILSXP, ans2 = NILSXP; int *mins, *maxs; double *mins2, *maxs2; int *is_counted, all_counted = 0; int what2, narm, hasna, usenames; R_xlen_t nrow, ncol, jj; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'what': */ if (length(what) != 1) error("Argument 'what' must be a single number."); if (!isNumeric(what)) error("Argument 'what' must be a numeric number."); what2 = asInteger(what); if (what2 < 0 || what2 > 2) error("Invalid value of 'what': %d", what2); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'useNames': */ usenames = asLogical(useNames); is_counted = (int *) R_alloc(ncols, sizeof(int)); if (isReal(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(REALSXP, ncols, 2)); } else { PROTECT(ans = allocVector(REALSXP, ncols)); } colRanges_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, what2, narm, hasna, REAL(ans), is_counted); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (what2 == 2) { if (ncols != 0) { /* colRanges() returns a numeric Kx2 matrix, reverse dimnames */ setDimnames(ans, dimnames, ncols, ccols, 0, crows, TRUE); } /* (else) Zero-length colnames attribute? Keep behavior same as base R function */ } else{ SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } } UNPROTECT(1); } else if (isInteger(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(INTSXP, ncols, 2)); } else { PROTECT(ans = allocVector(INTSXP, ncols)); } colRanges_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, what2, narm, hasna, INTEGER(ans), is_counted); /* Any entries with zero non-missing values? */ all_counted = 1; for (jj=0; jj < ncols; jj++) { if (!is_counted[jj]) { all_counted = 0; break; } } if (!all_counted) { /* Handle zero non-missing values */ /* Instead of return INTSXP, we must return REALSXP (to hold -Inf, and Inf) */ if (what2 == 0) { PROTECT(ans2 = allocVector(REALSXP, ncols)); mins = INTEGER(ans); mins2 = REAL(ans2); for (jj=0; jj < ncols; jj++) { if (is_counted[jj]) { mins2[jj] = (double)mins[jj]; } else { mins2[jj] = R_PosInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 1) { PROTECT(ans2 = allocVector(REALSXP, ncols)); maxs = INTEGER(ans); maxs2 = REAL(ans2); for (jj=0; jj < ncols; jj++) { if (is_counted[jj]) { maxs2[jj] = (double)maxs[jj]; } else { maxs2[jj] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 2) { PROTECT(ans2 = allocMatrix(REALSXP, ncols, 2)); mins = INTEGER(ans); maxs = &INTEGER(ans)[ncols]; mins2 = REAL(ans2); maxs2 = &REAL(ans2)[ncols]; for (jj=0; jj < ncols; jj++) { if (is_counted[jj]) { mins2[jj] = (double)mins[jj]; maxs2[jj] = (double)maxs[jj]; } else { mins2[jj] = R_PosInf; maxs2[jj] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } ans = ans2; } if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (what2 == 2) { if (ncols != 0) { /* colRanges() returns a numeric Kx2 matrix, reverse dimnames */ setDimnames(ans, dimnames, ncols, ccols, 0, crows, TRUE); } /* (else) Zero-length colnames attribute? Keep behavior same as base R function */ } else{ SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } } UNPROTECT(1); /* ans */ } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } // colRanges() /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/rowRanges_lowlevel.h0000644000176200001440000000132114111740760017413 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowRanges_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int what, int narm, int hasna, int *ans, int *is_counted) void rowRanges_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int what, int narm, int hasna, int *ans, int *is_counted) */ #define X_TYPE 'i' #include "rowRanges_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowRanges_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/colOrderStats.c0000644000176200001440000000615214111764426016332 0ustar liggesusers/*************************************************************************** Public methods: SEXP colOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames) Authors: Henrik Bengtsson To do: Add support for missing values. Copyright Henrik Bengtsson, 2007-2014 **************************************************************************/ #include #include "000.types.h" #include "colOrderStats_lowlevel.h" #include "naming.h" SEXP colOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames) { SEXP ans = NILSXP; R_xlen_t nrow, ncol, qq; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'which': */ if (length(which) != 1) error("Argument 'which' must be a single number."); if (!isNumeric(which)) error("Argument 'which' must be a numeric number."); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasna, colsHasna; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasna); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasna); /* Argument 'useNames': */ int usenames = asLogical(useNames); // Check missing rows if (rowsHasna && ncols > 0) { error("Argument 'rows' must not contain missing value"); } // Check missing cols if (colsHasna && nrows > 0) { error("Argument 'cols' must not contain missing value"); } /* Subtract one here, since rPsort does zero based addressing */ qq = asInteger(which) - 1; /* Assert that 'qq' is a valid index */ if (qq < 0 || qq >= nrows) { error("Argument 'which' is out of range: %d", qq + 1); } /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, ncols)); colOrderStats_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, qq, REAL(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(INTSXP, ncols)); colOrderStats_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, qq, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } // colOrderStats() /*************************************************************************** HISTORY: 2015-07-08 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created from rowOrderStats.c. **************************************************************************/ matrixStats/src/rowCumsums_lowlevel.h0000644000176200001440000000121314111740760017630 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowCumsums_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int byrow, int *ans) void rowCumsums_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int byrow, int *ans) */ #define X_TYPE 'i' #include "rowCumsums_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowCumsums_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowCumsums_lowlevel_template.h0000644000176200001440000001055014111740760021527 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowCumsums_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int byrow, ANS_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowCumsums, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int byrow, ANS_C_TYPE *ans) { R_xlen_t ii, jj, kk, kk_prev, idx; R_xlen_t colBegin; X_C_TYPE xvalue; LDOUBLE value; #if ANS_TYPE == 'i' double R_INT_MIN_d = (double)R_INT_MIN, R_INT_MAX_d = (double)R_INT_MAX; /* OK, i.e. no integer overflow yet? */ int warn = 0, ok, *oks = NULL; #endif if (ncols == 0 || nrows == 0) return; if (byrow) { #if ANS_TYPE == 'i' oks = (int *) R_alloc(nrows, sizeof(int)); #endif colBegin = R_INDEX_OP(((cols == NULL) ? (0) : cols[0]), *, nrow); for (kk=0; kk < nrows; kk++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (kk) : rows[kk])); xvalue = R_INDEX_GET(x, idx, X_NA); ans[kk] = (ANS_C_TYPE) xvalue; #if ANS_TYPE == 'i' oks[kk] = !X_ISNA(xvalue); #endif } kk_prev = 0; for (jj=1; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); #if ANS_TYPE == 'i' if (oks[ii]) { /* Missing value? */ if (X_ISNA(xvalue)) { oks[ii] = 0; ans[kk] = ANS_NA; } else { value = (LDOUBLE) ans[kk_prev] + (LDOUBLE) xvalue; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { oks[ii] = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else ans[kk] = (ANS_C_TYPE) ((LDOUBLE) ans[kk_prev] + (LDOUBLE) xvalue); #endif kk++; kk_prev++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } else { kk = 0; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); value = 0; #if ANS_TYPE == 'i' ok = 1; #endif for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); #if ANS_TYPE == 'i' if (ok) { /* Missing value? */ if (X_ISNA(xvalue)) { ok = 0; ans[kk] = ANS_NA; } else { value += (LDOUBLE) xvalue; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { ok = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else value += xvalue; ans[kk] = (ANS_C_TYPE) value; #endif kk++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } /* if (byrow) */ #if ANS_TYPE == 'i' /* Warn on integer overflow? */ if (warn) { warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX); } #endif } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars_TYPE-template.h. **************************************************************************/ matrixStats/src/000.templates-types.h0000644000176200001440000000672614111740760017207 0ustar liggesusers#include #include "000.macros.h" #undef X_C_TYPE #undef X_C_SIGNATURE #undef X_IN_C #undef X_ISNAN #undef X_ISNA #undef X_ABS #undef X_PSORT #undef X_QSORT_I #undef X_NA #undef Y_C_TYPE #undef Y_C_SIGNATURE #undef Y_IN_C #undef Y_ISNAN #undef Y_ISNA #undef Y_ABS #undef Y_PSORT #undef Y_QSORT_I #undef Y_NA #undef X_C_Y_C_SIGNATURE #undef ANS_SXP #undef ANS_NA #undef ANS_ISNAN #undef ANS_ISNA #undef ANS_C_TYPE #undef ANS_IN_C /* Data type macros for argument 'x' */ #ifdef X_TYPE #if X_TYPE == 'i' #define X_C_TYPE int #define X_C_SIGNATURE int #define X_IN_C INTEGER #define X_ISNAN(x) (x == NA_INTEGER) #define X_ISNA(x) (x == NA_INTEGER) #define X_ABS(x) abs(x) #define X_PSORT iPsort #define X_QSORT_I R_qsort_int_I #define X_NA NA_INTEGER #elif X_TYPE == 'r' #define X_C_TYPE double #define X_C_SIGNATURE dbl #define X_IN_C REAL #define X_ISNAN(x) ISNAN(x) /* True for R's NA and IEEE NaN */ #define X_ISNA(x) ISNA(x) /* True for R's NA */ #define X_ABS(x) fabs(x) #define X_PSORT rPsort #define X_QSORT_I R_qsort_I #define X_NA NA_REAL #elif X_TYPE == 'l' #define X_C_TYPE int #define X_C_SIGNATURE lgl #define X_IN_C LOGICAL #define X_ISNAN(x) (x == NA_LOGICAL) #define X_NA NA_LOGICAL #else #error "INTERNAL ERROR: Failed to set C macro X_C_TYPE etc.: Unknown X_TYPE" #endif #endif /* Data type macros for argument 'y' */ #ifdef Y_TYPE #if Y_TYPE == 'i' #define Y_C_TYPE int #define Y_C_SIGNATURE int #define Y_IN_C INTEGER #define Y_ISNAN(x) (x == NA_INTEGER) #define Y_ISNA(x) (x == NA_INTEGER) #define Y_ABS(x) abs(x) #define Y_PSORT iPsort #define Y_QSORT_I R_qsort_int_I #define Y_NA NA_INTEGER #elif Y_TYPE == 'r' #define Y_C_TYPE double #define Y_C_SIGNATURE dbl #define Y_IN_C REAL #define Y_ISNAN(x) ISNAN(x) /* NA or NaN */ #define Y_ISNA(x) ISNA(x) /* NA only */ #define Y_ABS(x) fabs(x) #define Y_PSORT rPsort #define Y_QSORT_I R_qsort_I #define Y_NA NA_REAL #elif Y_TYPE == 'l' #define Y_C_TYPE int #define Y_C_SIGNATURE lgl #define Y_IN_C LOGICAL #define Y_ISNAN(x) (x == NA_LOGICAL) #define Y_NA NA_LOGICAL #else #error "INTERNAL ERROR: Failed to set C macro Y_C_TYPE etc.: Unknown Y_TYPE" #endif #endif /* Data type macros for result ('ans') */ #ifndef ANS_TYPE #ifdef X_TYPE /* Default to same as 'x' */ #define ANS_TYPE X_TYPE #endif #endif #ifdef ANS_TYPE #if ANS_TYPE == 'i' #define ANS_SXP INTSXP #define ANS_NA NA_INTEGER #define ANS_ISNAN(x) (x == NA_INTEGER) #define ANS_ISNA(x) (x == NA_INTEGER) #define ANS_C_TYPE int #define ANS_IN_C INTEGER #elif ANS_TYPE == 'r' #define ANS_SXP REALSXP #define ANS_NA NA_REAL #define ANS_ISNAN(x) ISNAN(x) /* NA or NaN */ #define ANS_ISNA(x) ISNA(x) /* NA only */ #define ANS_C_TYPE double #define ANS_IN_C REAL #elif ANS_TYPE == 'l' #define ANS_SXP LGLSXP #define ANS_NA NA_LOGICAL #define ANS_ISNAN(x) (x == NA_LOGICAL) #define ANS_C_TYPE int #define ANS_IN_C LOGICAL #else #error "INTERNAL ERROR: Failed to set C macro ANS_C_TYPE: Unknown ANS_TYPE" #endif #endif /* Subsetted indexing: whether to check NA according to indexing */ #undef R_INDEX_OP #undef R_INDEX_GET #define R_INDEX_OP(a, OP, b) (a == NA_R_XLEN_T || b == NA_R_XLEN_T ? NA_R_XLEN_T : a OP b) #define R_INDEX_GET(x, i, NA) (i == NA_R_XLEN_T ? NA : x[i]) matrixStats/src/logSumExp.c0000644000176200001440000000257614111740760015466 0ustar liggesusers/*************************************************************************** Public methods: SEXP logSumExp(SEXP lx, SEXP idxs, SEXP naRm, SEXP hasNA) Arguments: lx : numeric vector idxs : subsetting indices naRm : a logical scalar hasNA: a logical scalar Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2013 **************************************************************************/ #include #include #include "logSumExp_lowlevel.h" SEXP logSumExp(SEXP lx, SEXP idxs, SEXP naRm, SEXP hasNA) { int narm, hasna; /* Argument 'lx': */ assertArgVector(lx, (R_TYPE_REAL), "lx"); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'idxs': */ R_xlen_t nidxs; R_xlen_t *cidxs = validateIndices(idxs, xlength(lx), 1, &nidxs); return(Rf_ScalarReal(logSumExp_double(REAL(lx), cidxs, nidxs, narm, hasna, 0, NULL))); } /* logSumExp() */ /*************************************************************************** HISTORY: 2015-06-11 [DJ] o Supported subsetted computation. 2013-05-02 [HB] o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the 'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission. 2013-04-30 [HB] o Created. **************************************************************************/ matrixStats/src/sum2_lowlevel.h0000644000176200001440000000102014111740760016326 0ustar liggesusers#include #include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): double sum2_int(int *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm) double sum2_dbl(double *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm) */ #define X_TYPE 'i' #include "sum2_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "sum2_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowOrderStats_lowlevel.h0000644000176200001440000000123314111740760020270 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowOrderStats_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, R_xlen_t qq, int *ans) void rowOrderStats_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, R_xlen_t qq, int *ans) */ #define X_TYPE 'i' #include "rowOrderStats_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowOrderStats_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/binCounts_lowlevel.h0000644000176200001440000000066014063411362017414 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include /* Native API (dynamically generated via macros): void binCounts_L(double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, int *count) void binCounts_R(double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, int *count) */ #define BIN_BY 'L' #include "binCounts_lowlevel_template.h" #define BIN_BY 'R' #include "binCounts_lowlevel_template.h" matrixStats/src/rowVars_lowlevel_template.h0000644000176200001440000000665614111740760021022 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowVars_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int narm, int hasna, int byrow, double *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Adopted from rowQuantiles.c by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2013 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowVars, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int narm, int hasna, int byrow, double *ans) { R_xlen_t ii, jj, kk, idx; R_xlen_t *colOffset; X_C_TYPE *values, value; double value_d, mu_d, sigma2_d; /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncols, sizeof(X_C_TYPE)); /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; /* Pre-calculate the column offsets */ if (cols == NULL) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); if (byrow) { for (jj=0; jj < ncols; jj++) colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow); } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj]; } } for (ii=0; ii < nrows; ii++) { //HJ R_xlen_t rowIdx; if (rows == NULL) { rowIdx = byrow ? ii : R_INDEX_OP(ii, *, ncol); } else { rowIdx = byrow ? rows[ii] : R_INDEX_OP(rows[ii], *, ncol); } kk = 0; for (jj=0; jj < ncols; jj++) { if (colOffset == NULL) { if (byrow) idx = R_INDEX_OP(rowIdx, +, jj*nrow); else idx = R_INDEX_OP(rowIdx, +, jj); } else { idx = R_INDEX_OP(rowIdx, +, colOffset[jj]); } value = R_INDEX_GET(x, idx, X_NA); //HJ if (X_ISNAN(value)) { if (narm == FALSE) { kk = -1; break; } } else { values[kk] = value; kk = kk + 1; } } /* for (jj ...) */ /* Note that 'values' will never contain NA/NaNs */ if (kk <= 1) { ans[ii] = NA_REAL; } else { /* (a) Calculate mu = sum(x)/length(x) */ mu_d = 0; for (jj=0; jj < kk; jj++) { mu_d += (double)values[jj]; } mu_d /= (double)kk; /* (b) Calculate sigma^2 */ sigma2_d = 0; for (jj=0; jj < kk; jj++) { value_d = ((double)values[jj] - mu_d); value_d *= value_d; sigma2_d += value_d; } sigma2_d /= (double)(kk-1); ans[ii] = sigma2_d; } /* if (kk <= 1) */ R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-11-18 [HB] o Created from rowMads_TYPE-template.h. **************************************************************************/ matrixStats/src/000.templates-gen-matrix-vector.h0000644000176200001440000000534613755623603021424 0ustar liggesusers#include "000.macros.h" #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define ROWS_TYPE 'i' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #define ROWS_TYPE 'r' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #define IDXS_TYPE 'i' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define ROWS_TYPE 'i' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #define ROWS_TYPE 'r' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #undef IDXS_TYPE #define IDXS_TYPE 'r' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define ROWS_TYPE 'i' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #define ROWS_TYPE 'r' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #undef IDXS_TYPE RETURN_TYPE (*METHOD_NAME[3][3][3])(ARGUMENTS_LIST) = {{ {METHOD_NAME_arows_acols_aidxs, METHOD_NAME_arows_acols_iidxs, METHOD_NAME_arows_acols_didxs}, {METHOD_NAME_arows_icols_aidxs, METHOD_NAME_arows_icols_iidxs, METHOD_NAME_arows_icols_didxs}, {METHOD_NAME_arows_dcols_aidxs, METHOD_NAME_arows_dcols_iidxs, METHOD_NAME_arows_dcols_didxs}, }, { {METHOD_NAME_irows_acols_aidxs, METHOD_NAME_irows_acols_iidxs, METHOD_NAME_irows_acols_didxs}, {METHOD_NAME_irows_icols_aidxs, METHOD_NAME_irows_icols_iidxs, METHOD_NAME_irows_icols_didxs}, {METHOD_NAME_irows_dcols_aidxs, METHOD_NAME_irows_dcols_iidxs, METHOD_NAME_irows_dcols_didxs}, }, { {METHOD_NAME_drows_acols_aidxs, METHOD_NAME_drows_acols_iidxs, METHOD_NAME_drows_acols_didxs}, {METHOD_NAME_drows_icols_aidxs, METHOD_NAME_drows_icols_iidxs, METHOD_NAME_drows_icols_didxs}, {METHOD_NAME_drows_dcols_aidxs, METHOD_NAME_drows_dcols_iidxs, METHOD_NAME_drows_dcols_didxs}, } }; #include "000.templates-types_undef.h" matrixStats/src/rowCounts.c0000644000176200001440000000517214111740760015541 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowCounts(SEXP x, ...) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowCounts_lowlevel.h" #include "naming.h" SEXP rowCounts(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP value, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames) { SEXP ans; int narm, hasna, what2, usenames; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' & 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'value': */ if (length(value) != 1) error("Argument 'value' must be a single value."); if (!isNumeric(value)) error("Argument 'value' must be a numeric or a logical value."); /* Argument 'what': */ what2 = asInteger(what); if (what2 < 0 || what2 > 2) error("INTERNAL ERROR: Unknown value of 'what' for rowCounts: %d", what2); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* R allocate a double vector of length 'nrow' */ PROTECT(ans = allocVector(INTSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowCounts_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, asReal(value), what2, narm, hasna, INTEGER(ans)); } else if (isInteger(x)) { rowCounts_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, asInteger(value), what2, narm, hasna, INTEGER(ans)); } else if (isLogical(x)) { rowCounts_lgl(LOGICAL(x), nrow, ncol, crows, nrows, ccols, ncols, asLogical(value), what2, narm, hasna, INTEGER(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } UNPROTECT(2); return(ans); } // rowCounts() /*************************************************************************** HISTORY: 2015-04-13 [DJ] o Supported subsetted computation. 2014-06-02 [HB] o Created. **************************************************************************/ matrixStats/src/rowSums2.c0000644000176200001440000000451414111740760015276 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowSums2(SEXP x, SEXP naRm, SEXP hasNA) SEXP colSums2(SEXP x, SEXP naRm, SEXP hasNA) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2017 **************************************************************************/ #include #include "000.types.h" #include "rowSums2_lowlevel.h" #include "naming.h" SEXP rowSums2(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) { int narm, hasna, byrow, usenames; SEXP ans; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL | R_TYPE_LGL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); if (!byrow) { SWAP(R_xlen_t, nrow, ncol); SWAP(R_xlen_t*, crows, ccols); SWAP(R_xlen_t, nrows, ncols); } /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowSums2_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, narm, hasna, byrow, REAL(ans)); } else if (isInteger(x) || isLogical(x)) { rowSums2_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, narm, hasna, byrow, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(2); return(ans); } matrixStats/src/mean2_lowlevel_template.h0000644000176200001440000000451414111740760020350 0ustar liggesusers/*********************************************************************** TEMPLATE: double mean2_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int refine Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014-2017 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include double CONCAT_MACROS(mean2, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int refine) { X_C_TYPE value; R_xlen_t ii; LDOUBLE sum = 0, avg = R_NaN; #if X_TYPE == 'r' LDOUBLE rsum = 0; #endif R_xlen_t count = 0; for (ii=0; ii < nidxs; ++ii) { value = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA); #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; ++count; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm) { sum += (LDOUBLE)value; ++count; /* Early stopping if sum is NA_real_ (but not NaN, -Inf, or +Inf) */ if (ii % 1048576 == 0 && ISNA(sum)) break; } else if (!ISNAN(value)) { sum += (LDOUBLE)value; ++count; } #endif } /* for (i ...) */ if (sum > DOUBLE_XMAX) { avg = R_PosInf; } else if (sum < -DOUBLE_XMAX) { avg = R_NegInf; } else { avg = sum / count; /* Extra precision by summing over residuals? */ #if X_TYPE == 'r' if (refine && R_FINITE(avg)) { for (ii=0; ii < nidxs; ++ii) { value = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA); if (!narm || !ISNAN(value)) { rsum += (LDOUBLE)(value - avg); } } avg += (rsum / count); } #endif } return (double)avg; } /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Now mean2_() uses only basic C types. 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/rowCumprods_lowlevel.h0000644000176200001440000000121714111740760017774 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowCumprods_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int byrow, int *ans) void rowCumprods_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int byrow, int *ans) */ #define X_TYPE 'i' #include "rowCumprods_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowCumprods_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/naming.h0000644000176200001440000000125014111740760015005 0ustar liggesusers#ifndef NAMING_H #define NAMING_H #include #include #include "000.types.h" void setNames(SEXP vec/* Answer vector*/, SEXP namesVec, R_xlen_t length, R_xlen_t *subscripts); void setDimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t *ccols, Rboolean reverseDimnames); void set_rowDiffs_Dimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t ncol_ans, R_xlen_t *ccols); void set_colDiffs_Dimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t nrow_ans, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t *ccols); #endif /* NAMING_H */ matrixStats/src/naming.c0000644000176200001440000002162614111740760015011 0ustar liggesusers#include "naming.h" void setNames(SEXP vec/*Answer vector*/, SEXP namesVec, R_xlen_t length, R_xlen_t *subscripts) { if (length == 0) { /* Zero-length names attribute? Keep behavior same as base R function */ return; } if (subscripts == NULL) { namesgets(vec, namesVec); } else { SEXP ansNames = PROTECT(allocVector(STRSXP, length)); R_xlen_t thisIdx; for (R_xlen_t i = 0; i < length; i++) { thisIdx = subscripts[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansNames, i, NA_STRING); } else { SEXP eltElement = STRING_ELT(namesVec, thisIdx); SET_STRING_ELT(ansNames, i, eltElement); } } namesgets(vec, ansNames); UNPROTECT(1); } } void setDimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t *ccols, Rboolean reverseDimnames) { if (crows == NULL && ccols == NULL && nrows > 0 && ncols > 0){ dimnamesgets(mat, dimnames); return; } SEXP rownames = VECTOR_ELT(dimnames, reverseDimnames ? 1 : 0); SEXP colnames = VECTOR_ELT(dimnames, reverseDimnames ? 0 : 1); SEXP ansDimnames = PROTECT(allocVector(VECSXP, 2)); if (nrows == 0 || rownames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 0, R_NilValue); } else if (crows == NULL) { SET_VECTOR_ELT(ansDimnames, 0, rownames); } else { SEXP ansRownames = PROTECT(allocVector(STRSXP, nrows)); R_xlen_t thisIdx; for (R_xlen_t i = 0; i < nrows; i++) { thisIdx = crows[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansRownames, i, NA_STRING); } else { SEXP eltElement = STRING_ELT(rownames, thisIdx); SET_STRING_ELT(ansRownames, i, eltElement); } } SET_VECTOR_ELT(ansDimnames, 0, ansRownames); UNPROTECT(1); } if (ncols == 0 || colnames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 1, R_NilValue); } else if (ccols == NULL) { SET_VECTOR_ELT(ansDimnames, 1, colnames); } else { if (colnames != R_NilValue) { SEXP ansColnames = PROTECT(allocVector(STRSXP, ncols)); R_xlen_t thisIdx; for (R_xlen_t i = 0; i < ncols; i++) { thisIdx = ccols[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansColnames, i, NA_STRING); } else { SEXP eltElement = STRING_ELT(colnames, thisIdx); SET_STRING_ELT(ansColnames, i, eltElement); } } SET_VECTOR_ELT(ansDimnames, 1, ansColnames); UNPROTECT(1); } } dimnamesgets(mat, ansDimnames); UNPROTECT(1); } void set_rowDiffs_Dimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t ncol_ans, R_xlen_t *ccols) { if (nrows == 0 && ncol_ans == 0) { /* Zero-length attributes? Keep behavior same as base R function */ return; } SEXP rownames = VECTOR_ELT(dimnames, 0); SEXP colnames = VECTOR_ELT(dimnames, 1); SEXP ansDimnames = PROTECT(allocVector(VECSXP, 2)); if (nrows == 0 || rownames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 0, R_NilValue); } else if (crows == NULL) { SET_VECTOR_ELT(ansDimnames, 0, rownames); } else { SEXP ansRownames = PROTECT(allocVector(STRSXP, nrows)); R_xlen_t thisIdx; for (R_xlen_t i = 0; i < nrows; i++) { thisIdx = crows[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansRownames, i, NA_STRING); } else { SEXP eltElement = STRING_ELT(rownames, thisIdx); SET_STRING_ELT(ansRownames, i, eltElement); } } SET_VECTOR_ELT(ansDimnames, 0, ansRownames); UNPROTECT(1); } if (ncol_ans == 0 || colnames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 1, R_NilValue); } else { SEXP ansColnames = PROTECT(allocVector(STRSXP, ncol_ans)); R_xlen_t j = 0; if (ccols == NULL) { for (R_xlen_t i = (ncols - ncol_ans); i < ncols; i++) { SEXP eltElement = STRING_ELT(colnames, i); SET_STRING_ELT(ansColnames, j++, eltElement); } } else { R_xlen_t thisIdx; for (R_xlen_t i = (ncols - ncol_ans); i < ncols; i++) { thisIdx = ccols[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansColnames, j++, NA_STRING); } else { SEXP eltElement = STRING_ELT(colnames, thisIdx); SET_STRING_ELT(ansColnames, j++, eltElement); } } } SET_VECTOR_ELT(ansDimnames, 1, ansColnames); UNPROTECT(1); } dimnamesgets(mat, ansDimnames); UNPROTECT(1); } void set_colDiffs_Dimnames(SEXP mat/*Answer matrix*/, SEXP dimnames, R_xlen_t nrows, R_xlen_t nrow_ans, R_xlen_t *crows, R_xlen_t ncols, R_xlen_t *ccols) { if (nrow_ans == 0 && ncols == 0) { /* Zero-length attributes? Keep behavior same as base R function */ return; } SEXP rownames = VECTOR_ELT(dimnames, 0); SEXP colnames = VECTOR_ELT(dimnames, 1); SEXP ansDimnames = PROTECT(allocVector(VECSXP, 2)); if (nrow_ans == 0 || rownames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 0, R_NilValue); } else { SEXP ansRownames = PROTECT(allocVector(STRSXP, nrow_ans)); R_xlen_t j = 0; if (crows == NULL) { for (R_xlen_t i = (nrows - nrow_ans); i < nrows; i++) { SEXP eltElement = STRING_ELT(rownames, i); SET_STRING_ELT(ansRownames, j++, eltElement); } } else { R_xlen_t thisIdx; for (R_xlen_t i = (nrows - nrow_ans); i < nrows; i++) { thisIdx = crows[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansRownames, j++, NA_STRING); } else { SEXP eltElement = STRING_ELT(rownames, thisIdx); SET_STRING_ELT(ansRownames, j++, eltElement); } } } SET_VECTOR_ELT(ansDimnames, 0, ansRownames); UNPROTECT(1); } if (ncols == 0 || colnames == R_NilValue) { SET_VECTOR_ELT(ansDimnames, 1, R_NilValue); } else if (ccols == NULL) { SET_VECTOR_ELT(ansDimnames, 1, colnames); } else { SEXP ansColnames = PROTECT(allocVector(STRSXP, ncols)); R_xlen_t thisIdx; for (R_xlen_t i = 0; i < ncols; i++) { thisIdx = ccols[i]; if (thisIdx == NA_R_XLEN_T) { SET_STRING_ELT(ansColnames, i, NA_STRING); } else { SEXP eltElement = STRING_ELT(colnames, thisIdx); SET_STRING_ELT(ansColnames, i, eltElement); } } SET_VECTOR_ELT(ansDimnames, 1, ansColnames); UNPROTECT(1); } dimnamesgets(mat, ansDimnames); UNPROTECT(1); } matrixStats/src/000.templates-gen-vector.h0000644000176200001440000000052213755623603020111 0ustar liggesusers#include "000.macros.h" #include METHOD_TEMPLATE_H #define IDXS_TYPE 'i' #include METHOD_TEMPLATE_H #undef IDXS_TYPE #define IDXS_TYPE 'r' #include METHOD_TEMPLATE_H #undef IDXS_TYPE RETURN_TYPE (*METHOD_NAME[3])(ARGUMENTS_LIST) = { METHOD_NAME_aidxs, METHOD_NAME_iidxs, METHOD_NAME_didxs }; #include "000.templates-types_undef.h" matrixStats/src/productExpSumLog_lowlevel.h0000644000176200001440000000107114111740760020732 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): double productExpSumLog_int(int *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna) double productExpSumLog_dbl(bouble *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna) */ #define X_TYPE 'i' #include "productExpSumLog_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "productExpSumLog_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/mean2.c0000644000176200001440000000264214111740760014537 0ustar liggesusers/*************************************************************************** Public methods: SEXP mean2(SEXP x, SEXP idxs, SEXP naRm, SEXP refine) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "mean2_lowlevel.h" SEXP mean2(SEXP x, SEXP idxs, SEXP naRm, SEXP refine) { SEXP ans; R_xlen_t nx; int narm, refine2; double avg = NA_REAL; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL | R_TYPE_LGL), "x"); nx = xlength(x); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'refine': */ refine2 = asLogicalNoNA(refine, "refine"); /* Argument 'idxs': */ R_xlen_t nidxs; R_xlen_t *cidxs = validateIndices(idxs, nx, 1, &nidxs); /* Double matrices are more common to use. */ if (isReal(x)) { avg = mean2_dbl(REAL(x), nx, cidxs, nidxs, narm, refine2); } else if (isInteger(x) || isLogical(x)) { avg = mean2_int(INTEGER(x), nx, cidxs, nidxs, narm, refine2); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = avg; UNPROTECT(1); return(ans); } // mean2() /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/colOrderStats_lowlevel.h0000644000176200001440000000123314111740760020236 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void colOrderStats_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, R_xlen_t qq, int *ans) void colOrderStats_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, R_xlen_t qq, int *ans) */ #define X_TYPE 'i' #include "colOrderStats_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "colOrderStats_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowMedians_lowlevel.h0000644000176200001440000000127314111740760017562 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowMedians_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int narm, int hasna, int byrow, double *ans) void rowMedians_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int narm, int hasna, int byrow, double *ans) */ #define X_TYPE 'i' #include "rowMedians_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowMedians_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/diff2_lowlevel.h0000644000176200001440000000116614111740760016445 0ustar liggesusers#include #include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void diff2_int(int *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, R_xlen_t lag, R_xlen_t differences, int *ans, R_xlen_t nans) void diff2_dbl(double *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, R_xlen_t lag, R_xlen_t differences, int *ans, R_xlen_t nans) */ #define X_TYPE 'i' #include "diff2_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "diff2_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowCumprods_lowlevel_template.h0000644000176200001440000001045314111740760021671 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowCumprods_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int byrow, ANS_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowCumprods, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int byrow, ANS_C_TYPE *ans) { R_xlen_t ii, jj, kk, kk_prev, idx; R_xlen_t colBegin; X_C_TYPE xvalue; LDOUBLE value; #if ANS_TYPE == 'i' double R_INT_MIN_d = (double)R_INT_MIN, R_INT_MAX_d = (double)R_INT_MAX; /* OK, i.e. no integer overflow yet? */ int warn = 0, ok, *oks = NULL; #endif if (ncols == 0 || nrows == 0) return; if (byrow) { #if ANS_TYPE == 'i' oks = (int *) R_alloc(nrows, sizeof(int)); #endif colBegin = R_INDEX_OP(((cols == NULL) ? (0) : cols[0]), *, nrow); for (kk=0; kk < nrows; kk++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (kk) : rows[kk])); xvalue = R_INDEX_GET(x, idx, X_NA); ans[kk] = (ANS_C_TYPE) xvalue; #if ANS_TYPE == 'i' oks[kk] = !X_ISNA(xvalue); #endif } kk_prev = 0; for (jj=1; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); #if ANS_TYPE == 'i' if (oks[ii]) { if (X_ISNA(xvalue)) { oks[ii] = 0; ans[kk] = ANS_NA; } else { value = (LDOUBLE) ans[kk_prev] * (LDOUBLE) xvalue; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { oks[ii] = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else ans[kk] = (ANS_C_TYPE) ((LDOUBLE) ans[kk_prev] * (LDOUBLE) xvalue); #endif kk++; kk_prev++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } else { kk = 0; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); value = 1; #if ANS_TYPE == 'i' ok = 1; #endif for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); #if ANS_TYPE == 'i' if (ok) { if (X_ISNA(xvalue)) { ok = 0; ans[kk] = ANS_NA; } else { value *= (LDOUBLE) xvalue; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { ok = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else value *= xvalue; ans[kk] = (ANS_C_TYPE) value; #endif kk++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } /* if (byrow) */ #if ANS_TYPE == 'i' /* Warn on integer overflow? */ if (warn) { warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX); } #endif } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars_TYPE-template.h. **************************************************************************/ matrixStats/src/000.templates-types_undef.h0000644000176200001440000000013314063411362020351 0ustar liggesusers#undef METHOD_NAME #undef X_TYPE #undef Y_TYPE #undef ANS_TYPE #undef MARGIN #undef OP matrixStats/src/rowCummins.c0000644000176200001440000000441614111740760015701 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowCummins(SEXP x, ...) SEXP colCummins(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowCummins_lowlevel.h" #include "naming.h" SEXP rowCummins(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames) { int byrow, usenames; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowCummins_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, byrow, REAL(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowCummins_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, byrow, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } /* rowCummins() */ #undef COMP #undef METHOD /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars.c. **************************************************************************/ matrixStats/src/rowRanksWithTies_lowlevel.h0000644000176200001440000001567214111740760020751 0ustar liggesusers#include #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowRanksWithTies_Min_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Min_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Max_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Max_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Min_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Average_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Min_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) void rowRanksWithTies_Average_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int *ans) */ /***************************************************************** * ties.method = "average" *****************************************************************/ #define TIESMETHOD 'a' /* average */ #define METHOD rowRanksWithTies_Average #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Average #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "first" *****************************************************************/ #define TIESMETHOD 'f' /* first */ #define METHOD rowRanksWithTies_First #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_First #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "last" *****************************************************************/ #define TIESMETHOD 'l' /* last */ #define METHOD rowRanksWithTies_Last #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Last #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "random" *****************************************************************/ #define TIESMETHOD 'r' /* random */ #define METHOD rowRanksWithTies_Random #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Random #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "min" *****************************************************************/ #define TIESMETHOD '0' /* min */ #define METHOD rowRanksWithTies_Min #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Min #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "max" *****************************************************************/ #define TIESMETHOD '1' /* max */ #define METHOD rowRanksWithTies_Max #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Max #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD /***************************************************************** * ties.method = "dense" *****************************************************************/ #define TIESMETHOD 'd' /* dense */ #define METHOD rowRanksWithTies_Dense #define MARGIN 'r' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'r' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #define METHOD colRanksWithTies_Dense #define MARGIN 'c' #define X_TYPE 'r' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #define MARGIN 'c' #define X_TYPE 'i' #include "rowRanksWithTies_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD #undef TIESMETHOD matrixStats/src/colOrderStats_lowlevel_template.h0000644000176200001440000000501714111740760022135 0ustar liggesusers/*********************************************************************** TEMPLATE: void colOrderStats_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, R_xlen_t qq, X_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Adopted from ditto for rows. Copyright: Henrik Bengtsson, 2007-2014 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(colOrderStats, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, R_xlen_t qq, X_C_TYPE *ans) { R_xlen_t ii, jj; R_xlen_t offset; X_C_TYPE *values; // Check missing rows if (rows != NULL) { for (ii=0; ii < nrows; ++ii) { if (rows[ii] == NA_R_XLEN_T) break; } if (ii < nrows && ncols > 0) { error("Argument 'rows' must not contain missing value"); } } // Check missing cols if (cols != NULL) { for (jj=0; jj < ncols; ++jj) { if (cols[jj] == NA_R_XLEN_T) break; } if (jj < ncols && nrows > 0) { error("Argument 'cols' must not contain missing value"); } } /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(nrows, sizeof(X_C_TYPE)); for (jj=0; jj < ncols; jj++) { offset = ((cols == NULL) ? (jj) : cols[jj]) * nrow; for (ii=0; ii < nrows; ii++) values[ii] = x[((rows == NULL) ? (ii) : rows[ii]) + offset]; /* Sort vector of length 'nrows' up to position 'qq'. "...partial sorting: they permute x so that x[qq] is in the correct place with smaller values to the left, larger ones to the right." */ X_PSORT(values, nrows, qq); ans[jj] = values[qq]; } } /*************************************************************************** HISTORY: 2015-07-08 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created from rowOrderStats() ditto. **************************************************************************/ matrixStats/src/productExpSumLog.c0000644000176200001440000000270314111740760017017 0ustar liggesusers/*************************************************************************** Public methods: SEXP productExpSumLog(SEXP x, SEXP idxs, SEXP naRm, SEXP hasNA) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "productExpSumLog_lowlevel.h" SEXP productExpSumLog(SEXP x, SEXP idxs, SEXP naRm, SEXP hasNA) { SEXP ans = NILSXP; R_xlen_t nx; double res = NA_REAL; int narm, hasna; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'idxs': */ R_xlen_t nidxs; R_xlen_t *cidxs = validateIndices(idxs, nx, 1, &nidxs); /* Double matrices are more common to use. */ if (isReal(x)) { res = productExpSumLog_dbl(REAL(x), nx, cidxs, nidxs, narm, hasna); } else if (isInteger(x)) { res = productExpSumLog_int(INTEGER(x), nx, cidxs, nidxs, narm, hasna); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = res; UNPROTECT(1); return(ans); } // productExpSumLog() /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/rowLogSumExp_lowlevel.h0000644000176200001440000000054414111740760020065 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" /* Native API (dynamically generated via macros): void rowLogSumExps_double(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int narm, int hasna, R_xlen_t byrow, double *ans) */ #include "rowLogSumExp_lowlevel_template.h" matrixStats/src/rowDiffs_lowlevel.h0000644000176200001440000000142714111740760017236 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowDiffs_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int byrow, R_xlen_t lag, R_xlen_t differences, int *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans) void rowDiffs_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int byrow, R_xlen_t lag, R_xlen_t differences, int *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans) */ #define X_TYPE 'i' #include "rowDiffs_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowDiffs_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowVars_lowlevel.h0000644000176200001440000000125714111740760017117 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowVars_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int narm, int hasna, int byrow, double *ans) void rowVars_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int narm, int hasna, int byrow, double *ans) */ #define X_TYPE 'i' #include "rowVars_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowVars_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowVars.c0000644000176200001440000000510114111740760015171 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowVars(SEXP x, SEXP naRm, SEXP hasNA) SEXP colVars(SEXP x, SEXP naRm, SEXP hasNA) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowVars_lowlevel.h" #include "naming.h" SEXP rowVars(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) { int narm, hasna, byrow, usenames; SEXP ans; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); if (!byrow) { SWAP(R_xlen_t, nrow, ncol); SWAP(R_xlen_t*, crows, ccols); SWAP(R_xlen_t, nrows, ncols); } /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowVars_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, narm, hasna, byrow, REAL(ans)); } else if (isInteger(x)) { rowVars_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, narm, hasna, byrow, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(2); return(ans); } /* rowVars() */ /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-11-18 [HB] o Created from rowMads.c. **************************************************************************/ matrixStats/src/rowCummaxs.c0000644000176200001440000000440014111740760015674 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowCummaxs(SEXP x, ...) SEXP colCummaxs(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowCummaxs_lowlevel.h" #include "naming.h" SEXP rowCummaxs(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames) { int byrow, usenames; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowCummaxs_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, byrow, REAL(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowCummaxs_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, byrow, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } /* rowCummaxs() */ #undef COMP /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars.c. **************************************************************************/ matrixStats/src/signTabulate_lowlevel_template.h0000644000176200001440000000333314111740760021766 0ustar liggesusers/*********************************************************************** TEMPLATE: void signTabulate_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, double *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(signTabulate, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, double *ans) { X_C_TYPE xi; R_xlen_t ii; R_xlen_t nNeg = 0, nZero = 0, nPos = 0, nNA=0; #if X_TYPE == 'r' R_xlen_t nPosInf=0, nNegInf=0; #endif for (ii = 0; ii < nidxs; ii++) { xi = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA); if (X_ISNAN(xi)) { nNA++; } else if (xi > 0) { nPos++; #if X_TYPE == 'r' if (xi == R_PosInf) nPosInf++; #endif } else if (xi < 0) { nNeg++; #if X_TYPE == 'r' if (xi == R_NegInf) nNegInf++; #endif } else if (xi == 0) { nZero++; } } ans[0] = nNeg; ans[1] = nZero; ans[2] = nPos; ans[3] = nNA; #if X_TYPE == 'r' ans[4] = nNegInf; ans[5] = nPosInf; #endif } /*************************************************************************** HISTORY: 2015-07-04 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/x_OP_y_lowlevel_template.h0000644000176200001440000002333614111740760020546 0ustar liggesusers#include "000.types.h" #include "000.templates-types.h" #define X_C_Y_C_SIGNATURE CONCAT_MACROS(X_C_SIGNATURE, Y_C_SIGNATURE) #define METHOD_NAME CONCAT_MACROS(METHOD, X_C_Y_C_SIGNATURE) #if OP == '+' #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x + (double)y; } #define FUN_narm CONCAT_MACROS(FUN, METHOD_NAME) static R_INLINE double FUN_narm(X_C_TYPE x, Y_C_TYPE y) { if (X_ISNAN(x)) { return (double)y; } else if (Y_ISNAN(y)) { return (double)x; } else { return (double)x + (double)y; } } #elif OP == '-' #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x - (double)y; } #define FUN_narm FUN_no_NA #elif OP == '*' #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x * (double)y; } #define FUN_narm CONCAT_MACROS(FUN, METHOD_NAME) static R_INLINE double FUN_narm(X_C_TYPE x, Y_C_TYPE y) { if (X_ISNAN(x)) { return (double)y; } else if (Y_ISNAN(y)) { return (double)x; } else { return (double)x * (double)y; } } #elif OP == '/' #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x / (double)y; } #define FUN_narm FUN_no_NA #else #error "INTERNAL ERROR: Failed to set C inline function FUN(x, y): Unknown OP" #endif void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, Y_C_TYPE *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, ANS_C_TYPE *ans, R_xlen_t n) { R_xlen_t ii, jj, kk, idx, colBegin; R_xlen_t txi, yi; X_C_TYPE xvalue; Y_C_TYPE yvalue; double value; #if ANS_TYPE == 'i' int ok = 1; /* OK, i.e. no integer overflow yet? */ double R_INT_MIN_d = (double)R_INT_MIN, R_INT_MAX_d = (double)R_INT_MAX; #endif yi = 0; kk = 0; if (byrow) { if (commute) { if (narm) { for (jj=0; jj < nxcols; ++jj) { colBegin = R_INDEX_OP(((xcols == NULL) ? (jj) : xcols[jj]), *, nrow); txi = jj; for (ii=0; ii < nxrows; ++ii) { idx = R_INDEX_OP(colBegin, +, ((xrows == NULL) ? (ii) : xrows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); idx = ((yidxs == NULL) ? (txi%nyidxs) : yidxs[txi%nyidxs]); yvalue = R_INDEX_GET(y, idx, Y_NA); value = FUN_narm(yvalue, xvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif txi += nxcols; /* txi = ii * nxcols + jj; */ } } } else { for (jj=0; jj < nxcols; ++jj) { colBegin = R_INDEX_OP(((xcols == NULL) ? (jj) : xcols[jj]), *, nrow); txi = jj; for (ii=0; ii < nxrows; ++ii) { idx = R_INDEX_OP(colBegin, +, ((xrows == NULL) ? (ii) : xrows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); idx = ((yidxs == NULL) ? (txi%nyidxs) : yidxs[txi%nyidxs]); yvalue = R_INDEX_GET(y, idx, Y_NA); value = FUN_no_NA(yvalue, xvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif txi += nxcols; /* txi = ii * nxcols + jj; */ } } } } else { if (narm) { for (jj=0; jj < nxcols; ++jj) { colBegin = R_INDEX_OP(((xcols == NULL) ? (jj) : xcols[jj]), *, nrow); txi = jj; for (ii=0; ii < nxrows; ++ii) { idx = R_INDEX_OP(colBegin, +, ((xrows == NULL) ? (ii) : xrows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); idx = ((yidxs == NULL) ? (txi%nyidxs) : yidxs[txi%nyidxs]); yvalue = R_INDEX_GET(y, idx, Y_NA); value = FUN_narm(xvalue, yvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif txi += nxcols; /* txi = ii * nxcols + jj; */ } } } else { for (jj=0; jj < nxcols; ++jj) { colBegin = R_INDEX_OP(((xcols == NULL) ? (jj) : xcols[jj]), *, nrow); txi = jj; for (ii=0; ii < nxrows; ++ii) { idx = R_INDEX_OP(colBegin, +, ((xrows == NULL) ? (ii) : xrows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); idx = ((yidxs == NULL) ? (txi%nyidxs) : yidxs[txi%nyidxs]); yvalue = R_INDEX_GET(y, idx, Y_NA); value = FUN_no_NA(xvalue, yvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif txi += nxcols; /* txi = ii * nxcols + jj; */ } } } } } else { if (commute) { if (narm) { for (jj=0; jj < nxcols; ++jj) { colBegin = R_INDEX_OP(((xcols == NULL) ? (jj) : xcols[jj]), *, nrow); for (ii=0; ii < nxrows; ++ii) { idx = R_INDEX_OP(colBegin, +, ((xrows == NULL) ? (ii) : xrows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); idx = ((yidxs == NULL) ? (yi) : yidxs[yi]); yvalue = R_INDEX_GET(y, idx, Y_NA); value = FUN_narm(yvalue, xvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif if (++ yi >= nyidxs) yi = 0; } } } else { for (jj=0; jj < nxcols; ++jj) { colBegin = R_INDEX_OP(((xcols == NULL) ? (jj) : xcols[jj]), *, nrow); for (ii=0; ii < nxrows; ++ii) { idx = R_INDEX_OP(colBegin, +, ((xrows == NULL) ? (ii) : xrows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); idx = ((yidxs == NULL) ? (yi) : yidxs[yi]); yvalue = R_INDEX_GET(y, idx, Y_NA); value = FUN_no_NA(yvalue, xvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif if (++ yi >= nyidxs) yi = 0; } } } } else { if (narm) { for (jj=0; jj < nxcols; ++jj) { colBegin = R_INDEX_OP(((xcols == NULL) ? (jj) : xcols[jj]), *, nrow); for (ii=0; ii < nxrows; ++ii) { idx = R_INDEX_OP(colBegin, +, ((xrows == NULL) ? (ii) : xrows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); idx = ((yidxs == NULL) ? (yi) : yidxs[yi]); yvalue = R_INDEX_GET(y, idx, Y_NA); value = FUN_narm(xvalue, yvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif if (++ yi >= nyidxs) yi = 0; } } } else { for (jj=0; jj < nxcols; ++jj) { colBegin = R_INDEX_OP(((xcols == NULL) ? (jj) : xcols[jj]), *, nrow); for (ii=0; ii < nxrows; ++ii) { idx = R_INDEX_OP(colBegin, +, ((xrows == NULL) ? (ii) : xrows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); idx = ((yidxs == NULL) ? (yi) : yidxs[yi]); yvalue = R_INDEX_GET(y, idx, Y_NA); value = FUN_no_NA(xvalue, yvalue); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk ++] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk ++] = (ANS_C_TYPE) value; #endif if (++ yi >= nyidxs) yi = 0; } } } } } /* if (byrow) */ #if ANS_TYPE == 'i' /* Warn on integer overflow? */ if (!ok) { warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX); } #endif } #undef FUN #undef FUN_narm matrixStats/src/indexByRow.c0000644000176200001440000000515614063411362015631 0ustar liggesusers/*************************************************************************** Public methods: SEXP indexByRow(SEXP dim, SEXP idxs) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" void indexByRow_i(int nrow, int ncol, int *idxs_ptr, R_xlen_t nidxs, int *ans_ptr) { R_xlen_t i, idx, n_max; int col, row; if (idxs_ptr == NULL) { row = 1; col = 0; for (i = 0; i < nidxs; i++) { ans_ptr[i] = row + col * nrow; col++; if (col == ncol) { row++; col = 0; } } } else { n_max = (R_xlen_t)nrow * (R_xlen_t)ncol; for (i = 0; i < nidxs; i++) { idx = idxs_ptr[i] - 1; if (idx < 0) { error("Argument 'idxs' may only contain positive indices: %d", idx + 1); } if (idx >= n_max) { error("Argument 'idxs' contains indices larger than %d: %d", n_max, idx + 1); } col = idx / ncol; row = idx % ncol; idx = col + nrow * row + 1; ans_ptr[i] = idx; } } } // indexByRow_i() SEXP indexByRow(SEXP dim, SEXP idxs) { SEXP ans; int d, i; R_xlen_t nidxs; double n_max; int *idxs_ptr; /* Argument 'dim': */ if (!isInteger(dim) || xlength(dim) != 2) { error("Argument 'dim' must be an integer vector of length two."); } n_max = 1.0; for (i = 0; i < xlength(dim); i++) { d = INTEGER(dim)[i]; if (d < 0) { error("Argument 'dim' specifies a negative value: %d", d); } n_max *= d; #ifndef LONG_VECTOR_SUPPORT if (n_max > R_INT_MAX) { error("Argument 'dim' (%d,%d) specifies a matrix that has more than 2^31-1 elements: %d", INTEGER(dim)[0], INTEGER(dim)[1], n_max); } #endif if (n_max > R_INT_MAX) { error("Argument 'dim' (%d,%d) specifies a matrix that has more than 2^31-1 elements: %d", INTEGER(dim)[0], INTEGER(dim)[1], n_max); } } /* Argument 'idxs': */ if (isNull(idxs)) { idxs_ptr = NULL; nidxs = (R_xlen_t)n_max; } else if (isVectorAtomic(idxs)) { idxs_ptr = INTEGER(idxs); nidxs = xlength(idxs); } else { /* To please compiler */ idxs_ptr = NULL; nidxs = 0; error("Argument 'idxs' must be NULL or a vector."); } PROTECT(ans = allocVector(INTSXP, nidxs)); indexByRow_i(INTEGER(dim)[0], INTEGER(dim)[1], idxs_ptr, nidxs, INTEGER(ans)); UNPROTECT(1); return(ans); } // indexByRow() /*************************************************************************** HISTORY: 2014-11-09 [HB] o Created. **************************************************************************/ matrixStats/src/rowOrderStats_lowlevel_template.h0000644000176200001440000000672314111740760022174 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowOrderStats_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, R_xlen_t qq, X_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Adopted from rowQ() by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2014 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(rowOrderStats, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, R_xlen_t qq, X_C_TYPE *ans) { R_xlen_t ii, jj; R_xlen_t *colOffset, rowIdx; X_C_TYPE *values; // Check missing rows if (rows != NULL) { for (ii=0; ii < nrows; ++ii) { if (rows[ii] == NA_R_XLEN_T) break; } if (ii < nrows && ncols > 0) { error("Argument 'rows' must not contain missing value"); } } // Check missing cols if (cols != NULL) { for (jj=0; jj < ncols; ++jj) { if (cols[jj] == NA_R_XLEN_T) break; } if (jj < ncols && nrows > 0) { error("Argument 'cols' must not contain missing value"); } } /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncols, sizeof(X_C_TYPE)); /* Pre-calculate the column offsets */ if (cols == NULL) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj] * nrow; } for (ii=0; ii < nrows; ii++) { rowIdx = ((rows == NULL) ? (ii) : rows[ii]); for (jj=0; jj < ncols; jj++) values[jj] = x[rowIdx + ((colOffset == NULL) ? (jj*nrow) : colOffset[jj])]; /* Sort vector of length 'ncol' up to position 'qq'. "...partial sorting: they permute x so that x[qq] is in the correct place with smaller values to the left, larger ones to the right." */ X_PSORT(values, ncols, qq); ans[ii] = values[qq]; } } /*************************************************************************** HISTORY: 2015-07-08 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2013-01-13 [HB] o Merged rowOrderStatsReal() and rowOrderStatsInteger() into one rowOrderStats_() templated function. 2009-02-04 [HB] o BUG FIX: For some errors in rowOrderStats(), the stack would not become UNPROTECTED before calling error. 2008-03-25 [HB] o Renamed from 'rowQuantiles' to 'rowOrderStats'. 2007-08-10 [HB] o Removed arguments for NAs since rowOrderStats() still don't support it. 2005-11-24 [HB] o Cool, it works and compiles nicely. o Preallocate colOffset to speed up things even more. o Added more comments and error checking. o Adopted from rowQ() in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/binCounts.c0000644000176200001440000000344314063411362015500 0ustar liggesusers/*************************************************************************** Public methods: binCounts(SEXP x, SEXP bx, SEXP right) Copyright Henrik Bengtsson, 2012-2013 **************************************************************************/ #include #include "000.types.h" #include #include "binCounts_lowlevel.h" SEXP binCounts(SEXP x, SEXP bx, SEXP right) { SEXP counts = NILSXP; R_xlen_t nbins; int closedRight; /* Argument 'x': */ assertArgVector(x, (R_TYPE_REAL), "x"); /* Argument 'bx': */ assertArgVector(bx, (R_TYPE_REAL), "bx"); nbins = xlength(bx)-1; if (nbins <= 0) { error("Argument 'bx' must specify at least two bin boundaries (= one bin): %d", xlength(bx)); } /* Argument 'right': */ closedRight = asLogicalNoNA(right, "right"); PROTECT(counts = allocVector(INTSXP, nbins)); if (closedRight) { binCounts_R(REAL(x), xlength(x), REAL(bx), nbins, INTEGER(counts)); } else { binCounts_L(REAL(x), xlength(x), REAL(bx), nbins, INTEGER(counts)); } UNPROTECT(1); return(counts); } // binCounts() /*************************************************************************** HISTORY: 2015-05-30 [HB] o Added protected against 'bx' too short. 2014-06-03 [HB] o Dropped unused variable 'count'. 2013-10-08 [HB] o Now binCounts() calls binCounts_(). 2013-05-10 [HB] o SPEEDUP: binCounts() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binCounts() would return random/garbage counts for bins that were beyond the last data point. o BUG FIX: In some cases binCounts() could try to go past the last bin. 2012-10-03 [HB] o Created. **************************************************************************/ matrixStats/src/binMeans.c0000644000176200001440000000535314063411362015272 0ustar liggesusers/*************************************************************************** Public methods: binMeans(SEXP y, SEXP x, SEXP bx, SEXP retCount, SEXP right) Copyright Henrik Bengtsson, 2012-2013 **************************************************************************/ #include #include "000.types.h" #include #include "binMeans_lowlevel.h" SEXP binMeans(SEXP y, SEXP x, SEXP bx, SEXP retCount, SEXP right) { SEXP ans = NILSXP, count = NILSXP; R_xlen_t nx, ny, nbins; int closedRight, retcount; int *count_ptr = NULL; /* Argument 'y': */ assertArgVector(y, (R_TYPE_REAL), "y"); ny = xlength(y); /* Argument 'x': */ assertArgVector(x, (R_TYPE_REAL), "x"); nx = xlength(x); if (nx != ny) { error("Argument 'y' and 'x' are of different lengths: %d != %d", ny, nx); } /* Argument 'bx': */ assertArgVector(bx, (R_TYPE_REAL), "bx"); nbins = xlength(bx)-1; if (nbins <= 0) { error("Argument 'bx' must specify at least two bin boundaries (= one bin): %d", xlength(bx)); } /* Argument 'right': */ closedRight = asLogicalNoNA(right, "right"); /* Argument 'retCount': */ retcount = asLogicalNoNA(retCount, "retCount"); PROTECT(ans = allocVector(REALSXP, nbins)); if (retcount) { PROTECT(count = allocVector(INTSXP, nbins)); count_ptr = INTEGER(count); } if (closedRight) { binMeans_R(REAL(y), ny, REAL(x), nx, REAL(bx), nbins, REAL(ans), count_ptr); } else { binMeans_L(REAL(y), ny, REAL(x), nx, REAL(bx), nbins, REAL(ans), count_ptr); } if (retcount) { setAttrib(ans, install("count"), count); UNPROTECT(1); // 'count' } UNPROTECT(1); // 'ans' return ans; return(ans); } // binMeans() /*************************************************************************** HISTORY: 2015-05-30 [HB] o Added protected against 'bx' too short. 2014-10-06 [HB] o CLEANUP: All argument validation is now done by the high-level C API. 2014-06-02 [HB] o CLEANUP: Removed unused variable in binMeans(). 2013-10-08 [HB] o Now binCounts() calls binCounts_(). 2013-05-10 [HB] o SPEEDUP: binMeans() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binMeans() would return random/garbage means/counts for bins that were beyond the last data point. o BUG FIX: In some cases binMeans() could try to go past the last bin. 2012-10-03 [HB] o Created binMeans(), which was adopted from from code proposed by Martin Morgan (Fred Hutchinson Cancer Research Center, Seattle) as a reply to HB's R-devel thread 'Fastest non-overlapping binning mean function out there?' on Oct 3, 2012. **************************************************************************/ matrixStats/src/rowLogSumExp_lowlevel_template.h0000644000176200001440000000401614111740760021756 0ustar liggesusers/*********************************************************************** TEMPLATE: double rowLogSumExp_double(ARGUMENTS_LIST) ARGUMENTS_LIST: double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int narm, int hasna, R_xlen_t byrow, double *ans ***********************************************************************/ #include "000.types.h" #include "000.templates-types.h" /* extern 1-D function 'logSumExp' */ extern double logSumExp_double(double *x, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna, R_xlen_t by, double *xx); void rowLogSumExps_double(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int narm, int hasna, R_xlen_t byrow, double *ans) { R_xlen_t ii, idx; double navalue; // double (*logsumexp)(double *x, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna, R_xlen_t by, double *xx); if (byrow) { /* R allocate memory for row-vector 'xx' of length 'ncol'. This will be taken care of by the R garbage collector later on. */ double *xx = (double *) R_alloc(ncols, sizeof(double)); navalue = (narm || ncols == 0) ? R_NegInf : NA_REAL; for (ii=0; ii < nrows; ++ii) { idx = ((rows == NULL) ? (ii) : rows[ii]); if (idx == NA_R_XLEN_T) { ans[ii] = navalue; } else { ans[ii] = logSumExp_double(x+idx, cols, ncols, narm, hasna, nrow, xx); } } } else { navalue = (narm || nrows == 0) ? R_NegInf : NA_REAL; for (ii=0; ii < ncols; ++ii) { idx = R_INDEX_OP(((cols == NULL) ? (ii) : cols[ii]), *, nrow); if (idx == NA_R_XLEN_T) { ans[ii] = navalue; } else { ans[ii] = logSumExp_double(x+idx, rows, nrows, narm, hasna, 0, NULL); } } } /* if (byrow) */ } /*************************************************************************** HISTORY: 2013-06-12 [DH] o Created. **************************************************************************/ matrixStats/src/rowCounts_lowlevel.h0000644000176200001440000000173414111740760017457 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowCounts_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int value, int what, int narm, int hasna, int *ans) void rowCounts_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int value, int what, int narm, int hasna, int *ans) void rowCounts_lgl(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int value, int what, int narm, int hasna, int *ans) */ #define X_TYPE 'i' #include "rowCounts_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowCounts_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'l' #include "rowCounts_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowMeans2_lowlevel_template.h0000644000176200001440000000461414111740760021224 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowMeans2_(ARGUMENTS_LIST) Copyright: Henrik Bengtsson, 2017 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowMeans2, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int narm, int hasna, int byrow, double *ans) { R_xlen_t ii, jj, idx; R_xlen_t *colOffset; X_C_TYPE value; LDOUBLE sum, avg; R_xlen_t count; /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; /* Pre-calculate the column offsets */ if (cols == NULL) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); if (byrow) { for (jj=0; jj < ncols; jj++) colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow); } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj]; } } for (ii=0; ii < nrows; ii++) { R_xlen_t rowIdx; if (rows == NULL) { rowIdx = byrow ? ii : R_INDEX_OP(ii, *, ncol); } else { rowIdx = byrow ? rows[ii] : R_INDEX_OP(rows[ii], *, ncol); } sum = 0.0; count = 0; for (jj=0; jj < ncols; jj++) { if (colOffset == NULL) { if (byrow) idx = R_INDEX_OP(rowIdx, +, jj*nrow); else idx = R_INDEX_OP(rowIdx, +, jj); } else { idx = R_INDEX_OP(rowIdx, +, colOffset[jj]); } value = R_INDEX_GET(x, idx, X_NA); #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; ++count; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm) { sum += (LDOUBLE)value; ++count; if (jj % 1048576 == 0 && ISNA(sum)) break; } else if (!ISNAN(value)) { sum += (LDOUBLE)value; ++count; } #endif } /* for (jj ...) */ if (sum > DOUBLE_XMAX) { avg = R_PosInf; } else if (sum < -DOUBLE_XMAX) { avg = R_NegInf; } else { avg = sum / count; } ans[ii] = (double)avg; R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } matrixStats/src/anyMissing_lowlevel_template.h0000644000176200001440000000406114111740760021464 0ustar liggesusers/*********************************************************************** TEMPLATE: int anyMissing_internal(ARGUMENTS_LIST) ARGUMENTS_LIST: SEXP x, R_xlen_t *idxs, R_xlen_t nidxs ***********************************************************************/ #include #include "000.types.h" #include "000.templates-types.h" #ifndef CHECK_MISSING #define CHECK_MISSING(cond) \ for (ii=0; ii < nidxs; ++ii) { \ if (cond) return 1; \ } #endif int anyMissing_internal(SEXP x, R_xlen_t *idxs, R_xlen_t nidxs) { R_xlen_t ii; double *xdp; int *xip, *xlp; Rcomplex *xcp; switch (TYPEOF(x)) { case REALSXP: xdp = REAL(x); CHECK_MISSING(ISNAN(R_INDEX_GET(xdp, ((idxs == NULL) ? (ii) : idxs[ii]), NA_REAL))); break; case INTSXP: xip = INTEGER(x); CHECK_MISSING(R_INDEX_GET(xip, ((idxs == NULL) ? (ii) : idxs[ii]), NA_INTEGER) == NA_INTEGER); break; case LGLSXP: xlp = LOGICAL(x); CHECK_MISSING(R_INDEX_GET(xlp, ((idxs == NULL) ? (ii) : idxs[ii]), NA_LOGICAL) == NA_LOGICAL); break; case CPLXSXP: xcp = COMPLEX(x); #ifdef IDXS_TYPE CHECK_MISSING(((idxs == NULL) ? (ii) : idxs[ii]) == NA_R_XLEN_T || ISNAN(xcp[((idxs == NULL) ? (ii) : idxs[ii])].r) || ISNAN(xcp[((idxs == NULL) ? (ii) : idxs[ii])].i)); #else CHECK_MISSING(ISNAN(xcp[ii].r) || ISNAN(xcp[ii].i)); #endif break; case STRSXP: #ifdef IDXS_TYPE CHECK_MISSING(((idxs == NULL) ? (ii) : idxs[ii]) == NA_R_XLEN_T || STRING_ELT(x, ((idxs == NULL) ? (ii) : idxs[ii])) == NA_STRING); #else CHECK_MISSING(STRING_ELT(x, ii) == NA_STRING); #endif break; case RAWSXP: /* no such thing as a raw NA; always FALSE */ break; default: break; } /* switch() */ return 0; } // anyMissing() /*************************************************************************** HISTORY: 2015-07-15 [DJ] o Avoid 'embedding a directive within macro arguments'. 2015-06-15 [DJ] o Created. **************************************************************************/ matrixStats/src/rowSums2_lowlevel.h0000644000176200001440000000126314111740760017212 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowSums2_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int narm, int hasna, int byrow, double *ans) void rowSums2_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int narm, int hasna, int byrow, double *ans) */ #define X_TYPE 'i' #include "rowSums2_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowSums2_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/binCounts_lowlevel_template.h0000644000176200001440000000640314063411362021310 0ustar liggesusers/*************************************************************************** TEMPLATE: void binCounts_(...) GENERATES: void binCounts_L(double *x, int nx, double *bx, int nbins, int *count) void binCounts_R(double *x, int nx, double *bx, int nbins, int *count) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - BIN_BY: 'L' or 'R' Copyright Henrik Bengtsson, 2012-2014 **************************************************************************/ #include #if BIN_BY == 'L' /* [u,v) */ #define METHOD_NAME binCounts_L #define IS_PART_OF_FIRST_BIN(x, bx0) (x < bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x >= bx1) #elif BIN_BY == 'R' /* (u,v] */ #define METHOD_NAME binCounts_R #define IS_PART_OF_FIRST_BIN(x, bx0) (x <= bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x > bx1) #endif void METHOD_NAME(double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, int *count) { R_xlen_t ii = 0, jj = 0, iStart = 0; int n = 0; int warn = 0; // Count? if (nbins > 0) { // Skip to the first bin while ((iStart < nx) && IS_PART_OF_FIRST_BIN(x[iStart], bx[0])) { ++iStart; } // For each x... for (ii = iStart; ii < nx; ++ii) { // Skip to a new bin? while (IS_PART_OF_NEXT_BIN(x[ii], bx[jj+1])) { count[jj++] = n; // No more bins? if (jj >= nbins) { ii = nx; // Cause outer for-loop to exit break; } n = 0; } /* Although unlikely, with long vectors the count for a bin can become greater than what is possible to represent by an integer. Detect and warn about this. */ if (n == R_INT_MAX) { warn = 1; // No point in keep counting for this bin break; } // Count ++n; } // Update count of the last bin? if (jj < nbins) { count[jj] = n; // Assign the remaining bins to zero counts while (++jj < nbins) { count[jj] = 0; } } } // if (nbins > 0) if (warn) { warning("Integer overflow. Detected one or more bins with a count that is greater than what can be represented by the integer data type. Setting count to the maximum integer possible (.Machine$integer.max = %d). The bin mean is still correct.", R_INT_MAX); } } /* Undo template macros */ #undef BIN_BY #undef IS_PART_OF_FIRST_BIN #undef IS_PART_OF_NEXT_BIN #include "000.templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-07 [HB] o ROBUSTNESS: Added protection for integer overflow in bin counts. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2013-10-08 [HB] o Created template for binCounts_() to create functions that bin either by [u,v) or (u,v]. 2013-05-10 [HB] o SPEEDUP: binCounts() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binCounts() would return random/garbage counts for bins that were beyond the last data point. o BUG FIX: In some cases binCounts() could try to go past the last bin. 2012-10-03 [HB] o Created. **************************************************************************/ matrixStats/src/rowMads_lowlevel_template.h0000644000176200001440000001756414111740760020773 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowMads_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, double scale, int narm, int hasna, int byrow, double *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Adopted from rowQuantiles.c by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2013 ***********************************************************************/ #include #include #include "000.types.h" #include /* abs() and fabs() */ /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowMads, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, double scale, int narm, int hasna, int byrow, double *ans) { int isOdd; R_xlen_t ii, jj, kk, qq, idx; R_xlen_t *colOffset; X_C_TYPE *values, value, mu; double *values_d, value_d, mu_d; /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncols, sizeof(X_C_TYPE)); values_d = (double *) R_alloc(ncols, sizeof(double)); /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; /* When narm == FALSE, isOdd and qq are the same for all rows */ if (narm == FALSE) { isOdd = (ncols % 2 == 1); qq = (R_xlen_t)(ncols/2) - 1; } else { isOdd = FALSE; qq = 0; } value = 0; /* Pre-calculate the column offsets */ if (cols == NULL) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); // HJ begin if (byrow) { for (jj=0; jj < ncols; jj++) colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow); } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj]; } // HJ end } hasna = TRUE; if (hasna == TRUE) { for (ii=0; ii < nrows; ii++) { //HJ R_xlen_t rowIdx; if (rows == NULL) { rowIdx = byrow ? (ii) : R_INDEX_OP(ii, *, ncol); } else { rowIdx = byrow ? rows[ii] : R_INDEX_OP(rows[ii], *, ncol); } kk = 0; /* The index of the last non-NA value detected */ for (jj=0; jj < ncols; jj++) { if (colOffset == NULL) { if (byrow) idx = R_INDEX_OP(rowIdx, +, jj*nrow); else idx = R_INDEX_OP(rowIdx, +, jj); } else { idx = R_INDEX_OP(rowIdx, +, colOffset[jj]); } value = R_INDEX_GET(x, idx, X_NA); //HJ if (X_ISNAN(value)) { if (narm == FALSE) { kk = -1; break; } } else { values[kk] = value; kk = kk + 1; } } /* for (jj ...) */ /* Note that 'values' will never contain NA/NaNs */ if (kk == 0) { ans[ii] = NA_REAL; } else if (kk == 1) { ans[ii] = 0; } else if (kk == -1) { ans[ii] = R_NaReal; } else { /* When narm == TRUE, isOdd and qq may change with row */ if (narm == TRUE) { isOdd = (kk % 2 == 1); qq = (R_xlen_t)(kk/2) - 1; } /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); value = values[qq+1]; /* Calculate mu and sigma */ if (isOdd == TRUE) { /* Since there are an odd number of values, then we also know that 'mu' is one of the values in 'x', which in turn mean we don't have to coerce integers to doubles, if 'x' is an integer. Simple benchmarking shows that it significantly faster to avoid coercion. */ mu = value; /* (a) Subtract mu and absolute value, i.e. x <- |x-mu| */ for (jj=0; jj < kk; jj++) { value = (values[jj] - mu); values[jj] = X_ABS(value); } /* (b) Calculate median of |x-mu| */ /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); value = values[qq+1]; ans[ii] = scale * (double)value; } else { /* Here we have to coerce to doubles since 'mu' is an average. */ /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); #if X_TYPE == 'i' /* If the difference between two integers is an even number, then their means is also an integer, and then we can avoid coercion to double also here. This should happen roughly half the time we end up here which is worth optimizing for. Simple benchmarking show a significant difference in speed, particular for the column-based version. */ if ((values[qq] - value) % 2 == 0) { /* No need to coerce */ mu = (values[qq] + value)/2; /* (a) Subtract mu and absolute value, i.e. x <- |x-mu| */ for (jj=0; jj < kk; jj++) { value = (values[jj] - mu); values[jj] = X_ABS(value); } /* (b) Calculate median of |x-mu| */ /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); X_PSORT(values, qq+1, qq); ans[ii] = scale * ((double)values[qq] + (double)values[qq+1])/2; /* Done, continue to next vector */ continue; } #endif mu_d = ((double)values[qq] + (double)value)/2; /* (a) Subtract mu and square, i.e. x <- (x-mu)^2 */ for (jj=0; jj < kk; jj++) { value_d = ((double)values[jj] - mu_d); values_d[jj] = fabs(value_d); } /* (b) Calculate median */ /* Permute x[0:kk-1] so that x[qq-1] and x[qq] are in the correct places with smaller values to the left, ... */ rPsort(values_d, kk, qq+1); rPsort(values_d, qq+1, qq); ans[ii] = scale * (values_d[qq] + values_d[qq+1])/2; } } /* if (kk == 0) */ R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } else { for (ii=0; ii < nrows; ii++) { //HJ R_xlen_t rowIdx; if (rows == NULL) { rowIdx = byrow ? (ii) : (ii)*ncol; } else { rowIdx = byrow ? rows[ii] : rows[ii]*ncol; } for (jj=0; jj < ncols; jj++) { if (colOffset == NULL) { if (byrow) values[jj] = x[rowIdx+(jj)*nrow]; else values[jj] = x[rowIdx+(jj)]; } else { values[jj] = x[rowIdx+colOffset[jj]]; } } //HJ /* Permute x[0:ncols-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, ncols, qq+1); value = values[qq+1]; if (isOdd == TRUE) { ans[ii] = (double)value; } else { /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); ans[ii] = ((double)values[qq] + value)/2; } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } /* if (hasna ...) */ } /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-11-17 [HB] o Created from rowMedians_TYPE-template.h. **************************************************************************/ matrixStats/src/mean2_lowlevel.h0000644000176200001440000000101714111740760016450 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): double mean2_int(int *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int refine) double mean2_dbl(double *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int refine) */ #define X_TYPE 'i' #include "mean2_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "mean2_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/000.utils.h0000644000176200001440000000777114111740760015210 0ustar liggesusers#include #include "000.types.h" #define R_TYPE_LGL 1 /* 0b0001 */ #define R_TYPE_INT 2 /* 0b0010 */ #define R_TYPE_REAL 4 /* 0b0100 */ static R_INLINE void assertArgVector(SEXP x, int type, char *xlabel) { /* Argument 'x': */ if (!isVectorAtomic(x)) { error("Argument '%s' must be a matrix or a vector.", xlabel); } switch (TYPEOF(x)) { case LGLSXP: if (!(type & R_TYPE_LGL)) error("Argument '%s' cannot be logical.", xlabel); break; case INTSXP: if (!(type & R_TYPE_INT)) error("Argument '%s' cannot be integer.", xlabel); break; case REALSXP: if (!(type & R_TYPE_REAL)) error("Argument '%s' cannot be numeric.", xlabel); break; default: error("Argument '%s' must be of type logical, integer or numeric, not '%s'.", xlabel, type2char(TYPEOF(x))); } /* switch */ } /* assertArgVector() */ static R_INLINE void assertArgDim(SEXP dim, double max, char *maxlabel) { double nrow, ncol; const char *dimlabel = "dim."; /* Argument 'dim': */ if (!isVectorAtomic(dim) || xlength(dim) != 2 || !isInteger(dim)) { error("Argument '%s' must be an integer vector of length two.", dimlabel); } nrow = (double)INTEGER(dim)[0]; ncol = (double)INTEGER(dim)[1]; if (nrow < 0) { error("Argument '%s' specifies a negative number of rows (%s[1]): %g", dimlabel, dimlabel, nrow); } else if (ncol < 0) { error("Argument '%s' specifies a negative number of columns (%s[2]): %g", dimlabel, dimlabel, ncol); } else if (nrow * ncol != max) { error("Argument '%s' does not match length of argument '%s': %g * %g != %g", dimlabel, maxlabel, nrow, ncol, max); } } /* assertArgDim() */ static R_INLINE void assertArgMatrix(SEXP x, SEXP dim, int type, char *xlabel) { /* Argument 'x': */ if (isMatrix(x)) { } else if (isVectorAtomic(x)) { } else { error("Argument '%s' must be a matrix or a vector.", xlabel); } switch (TYPEOF(x)) { case LGLSXP: if (!(type & R_TYPE_LGL)) error("Argument '%s' cannot be logical.", xlabel); break; case INTSXP: if (!(type & R_TYPE_INT)) error("Argument '%s' cannot be integer.", xlabel); break; case REALSXP: if (!(type & R_TYPE_REAL)) error("Argument '%s' cannot be numeric.", xlabel); break; default: error("Argument '%s' must be of type logical, integer or numeric, not '%s'.", xlabel, type2char(TYPEOF(x))); } /* switch */ /* Argument 'dim': */ assertArgDim(dim, xlength(x), "x"); } /* assertArgMatrix() */ static R_INLINE int asLogicalNoNA(SEXP x, char *xlabel) { int value = 0; if (length(x) != 1) error("Argument '%s' must be a single value.", xlabel); if (isLogical(x)) { value = asLogical(x); } else if (isInteger(x)) { value = asInteger(x); } else { error("Argument '%s' must be a logical.", xlabel); } if (value != TRUE && value != FALSE) error("Argument '%s' must be either TRUE or FALSE.", xlabel); return value; } /* asLogicalNoNA() */ /* Retrieve the 'i'th element of 'x' as R_xlen_t */ static R_INLINE R_xlen_t asR_xlen_t(SEXP x, R_xlen_t i) { int mode = TYPEOF(x); switch (mode) { case INTSXP: return INTEGER(x)[i]; case REALSXP: return REAL(x)[i]; default: error("only integer and numeric are supported, not '%s'.", type2char(TYPEOF(x))); } return 0; } /* asR_xlen_t() */ /* Specified in validateIndices.c */ R_xlen_t *validateIndicesCheckNA(SEXP idxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs, int *hasna); R_xlen_t *validateIndices(SEXP idxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs); static R_INLINE int int_from_dbl(double x) { if (ISNAN(x)) return NA_INTEGER; if (x > INT_MAX || x <= INT_MIN) return NA_INTEGER; return x; } /* int_from_dbl() */ static R_INLINE double dbl_from_int(int x) { if (x == NA_INTEGER) return NA_REAL; return x; } /* dbl_from_int() */ #define SWAP(type, x, y) { \ type tmp = x; \ x = y; \ y = tmp; \ } matrixStats/src/rowCounts_lowlevel_template.h0000644000176200001440000001370514111740760021353 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowCounts_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, X_C_TYPE value, int what, int narm, int hasna, int *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i', 'r', or 'l' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(rowCounts, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, X_C_TYPE value, int what, int narm, int hasna, int *ans) { R_xlen_t ii, jj; R_xlen_t colBegin, idx; int count; X_C_TYPE xvalue; if (what == 0) { /* all */ for (ii=0; ii < nrows; ii++) ans[ii] = 1; /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { /* Skip? */ if (ans[ii]) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); if (!X_ISNAN(xvalue)) { ans[ii] = 0; /* Found another value! Skip from now on */ } } } } } else { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { /* Skip? */ if (ans[ii]) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); if (xvalue == value) { } else if (narm && X_ISNAN(xvalue)) { /* Skip */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is not 'value' later, then we know for sure that all = FALSE regardless of missing values. In other words, at this point the answer can be either NA or FALSE.*/ ans[ii] = NA_INTEGER; } else { /* Found another value! Skip from now on */ ans[ii] = 0; } } } /* for (ii ...) */ } /* for (jj ...) */ } } else if (what == 1) { /* any */ for (ii=0; ii < nrows; ii++) ans[ii] = 0; /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { /* Skip? */ if (!ans[ii]) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); if (X_ISNAN(xvalue)) { ans[ii] = 1; /* Found value! Skip from now on */ } } } } } else { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { /* Skip? */ if (ans[ii] == 0 || ans[ii] == NA_INTEGER) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); if (xvalue == value) { /* Found value! Skip from now on */ ans[ii] = 1; } else if (narm && X_ISNAN(xvalue)) { /* Skip */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is 'value' later, then we know for sure that any = TRUE regardless of missing values. In other words, at this point the answer can be either NA or TRUE.*/ ans[ii] = NA_INTEGER; } } } /* for (ii ...) */ } /* for (jj ...) */ } } else if (what == 2) { /* count */ for (ii=0; ii < nrows; ii++) ans[ii] = 0; /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); if (X_ISNAN(xvalue)) ans[ii] = ans[ii] + 1; } } } else { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { count = ans[ii]; /* Nothing more to do on this row? */ if (count == NA_INTEGER) continue; idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); if (xvalue == value) { ans[ii] = count + 1; } else { if (!narm && X_ISNAN(xvalue)) { ans[ii] = NA_INTEGER; continue; } } } /* for (ii ...) */ } /* for (jj ...) */ } } /* if (what) */ } /*************************************************************************** HISTORY: 2015-04-13 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-11-01 [HB] o SPEEDUP: Now using ansp = INTEGER(ans) once and then querying/assigning 'ansp[i]' instead of INTEGER(ans)[i]. 2014-06-02 [HB] o Created. **************************************************************************/ matrixStats/src/rowMedians_lowlevel_template.h0000644000176200001440000001477014111740760021463 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowMedians_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int narm, int hasna, int byrow, double *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Adopted from rowQuantiles.c by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2013 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" void CONCAT_MACROS(rowMedians, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int narm, int hasna, int byrow, double *ans) { int isOdd; R_xlen_t ii, jj, kk, qq, idx; R_xlen_t *colOffset; X_C_TYPE *values, value; /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncols, sizeof(X_C_TYPE)); /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; /* When narm == FALSE, isOdd and qq are the same for all rows */ if (narm == FALSE) { isOdd = (ncols % 2 == 1); qq = (R_xlen_t)(ncols/2) - 1; } else { isOdd = FALSE; qq = 0; } value = 0; /* Pre-calculate the column offsets */ if (cols == NULL) { colOffset = NULL; } else { colOffset = (R_xlen_t *) R_alloc(ncols, sizeof(R_xlen_t)); // HJ begin if (byrow) { for (jj=0; jj < ncols; jj++) colOffset[jj] = R_INDEX_OP(cols[jj], *, nrow); } else { for (jj=0; jj < ncols; jj++) colOffset[jj] = cols[jj]; } // HJ end } if (hasna == TRUE) { for (ii=0; ii < nrows; ii++) { //HJ R_xlen_t rowIdx; if (rows == NULL) { rowIdx = byrow ? ii : R_INDEX_OP(ii, *, ncol); } else { rowIdx = byrow ? rows[ii] : R_INDEX_OP(rows[ii], *, ncol); } kk = 0; /* The index of the last non-NA value detected */ for (jj=0; jj < ncols; jj++) { if (colOffset == NULL) { if (byrow) idx = R_INDEX_OP(rowIdx, +, jj*nrow); else idx = R_INDEX_OP(rowIdx, +, jj); } else { idx = R_INDEX_OP(rowIdx, +, colOffset[jj]); } value = R_INDEX_GET(x, idx, X_NA); //HJ if (X_ISNAN(value)) { if (narm == FALSE) { kk = -1; break; } } else { values[kk] = value; kk = kk + 1; } } /* Note that 'values' will never contain NA/NaNs */ if (kk == 0) { ans[ii] = R_NaN; } else if (kk == -1) { ans[ii] = R_NaReal; } else { /* When narm == TRUE, isOdd and qq may change with row */ if (narm == TRUE) { isOdd = (kk % 2 == 1); qq = (R_xlen_t)(kk/2) - 1; } /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); value = values[qq+1]; if (isOdd == TRUE) { ans[ii] = (double)value; } else { /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); ans[ii] = ((double)values[qq] + (double)value)/2; } } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } else { for (ii=0; ii < nrows; ii++) { //HJ R_xlen_t rowIdx; if (rows == NULL) { rowIdx = byrow ? ii : ii*ncol; } else { rowIdx = byrow ? rows[ii] : rows[ii]*ncol; } for (jj=0; jj < ncols; jj++) { if (colOffset == NULL) { if (byrow) values[jj] = x[rowIdx+(jj)*nrow]; else values[jj] = x[rowIdx+(jj)]; } else { values[jj] = x[rowIdx+colOffset[jj]]; } } //HJ /* Permute x[0:ncols-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, ncols, qq+1); value = values[qq+1]; if (isOdd == TRUE) { ans[ii] = (double)value; } else { /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); ans[ii] = ((double)values[qq] + (double)value)/2; } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } /* if (hasna ...) */ } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-11-01 [HB] o SPEEDUP: Now using 'ansp = REAL(ans)' once and then assigning to 'ansp' instead of to 'REAL(ans)'. 2013-04-23 [HB] o BUG FIX: The integer template of rowMedians_() would not handle ties properly. This was because ties were calculated as '(double)((rowData[qq] + value)/2)' instead of '((double)(rowData[qq] + value))/2'. 2013-01-13 [HB] o Merged rowMedians_int() and rowMedians_dbl() into template rowMedians_(). 2013-01-13 [HB] o Using internal arguments 'by_row' instead of 'by_column'. 2011-12-11 [HB] o BUG FIX: rowMediansReal(..., na.rm=TRUE) did not handle NaN:s, only NA:s. Note that NaN:s does not exist for integers. 2011-10-12 [HJ] o Added colMedians(). o Now rowMediansInteger/Real() can operate also by columns, cf. argument 'by_column'. 2007-08-14 [HB] o Added checks for user interrupts every 1000 line. o Added argument 'hasNA' to rowMedians(). 2005-12-07 [HB] o BUG FIX: When calculating the median of an even number (non-NA) values, the length of the second sort was one element too short, which made the method to freeze, i.e. rPsort(rowData, qq, qq) is now (...qq+1, qq). 2005-11-24 [HB] o By implementing a special version for integers, there is no need to coerce to double in R, which would take up twice the amount of memory. o rowMedians() now handles NAs too. o Adopted from rowQuantiles.c in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/psortKM.c0000644000176200001440000000534214063411362015133 0ustar liggesusers/*************************************************************************** Public methods: SEXP psortKM(SEXP x, SEXP k, SEXP nk) Arguments: x: numeric vector k: integer scalar in [1,length(x)] m: integer scalar in [1,k] and not too large if k is large. Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2012 **************************************************************************/ #include #include #include "000.types.h" #include "000.utils.h" void psortKM_C(double *x, R_xlen_t nx, R_xlen_t k, R_xlen_t m, double *ans) { R_xlen_t ii, ll; double *xx; /* R allocate memory for the 'xx'. This will be taken care of by the R garbage collector later on. */ xx = (double *) R_alloc(nx, sizeof(double)); /* Create a local copy 'xx' of 'x'. */ for (ii=0; ii < nx; ii++) { xx[ii] = x[ii]; } /* Permute xx[0:partial] so that xx[partial+1] is in the correct place with smaller values to the left, ... Example: psortKM(x, k=50, m=2) with length(x) = 1000 rPsort(xx, 1000, 50); We know x[50] and that x[1:49] <= x[50] rPsort(xx, 50, 49); x[49] and that x[1:48] <= x[49] rPsort(xx, 49, 48); x[48] and that x[1:47] <= x[48] */ ll = nx; for (ii=0; ii < m; ii++) { rPsort(xx, ll, k-1-ii); ll = (k-1)-ii; } for (ii=0; ii < m; ii++) { ans[ii] = xx[(k-m)+ii]; } } /* psortKM_C() */ SEXP psortKM(SEXP x, SEXP k, SEXP m) { SEXP ans; R_xlen_t nx, kk, mm; /* Argument 'x': */ assertArgVector(x, (R_TYPE_REAL), "x"); nx = xlength(x); if (nx == 0) { error("Argument 'x' must not be empty."); } /* Argument 'k': */ if (!isInteger(k)) { error("Argument 'k' must be an integer."); } if (length(k) != 1) { error("Argument 'k' must be a single integer."); } kk = asInteger(k); if (kk <= 0) { error("Argument 'k' must be a positive integer."); } if (kk > nx) { error("Argument 'k' must not be greater than number of elements in 'x'."); } /* Argument 'm': */ if (!isInteger(m)) { error("Argument 'm' must be an integer."); } if (length(m) != 1) { error("Argument 'm' must be a single integer."); } mm = asInteger(m); if (mm <= 0) { error("Argument 'm' must be a positive integer."); } else if (mm > kk) { error("Argument 'm' must not be greater than argument 'k'."); } /* R allocate a double vector of length 'partial' */ PROTECT(ans = allocVector(REALSXP, mm)); psortKM_C(REAL(x), nx, kk, mm, REAL(ans)); UNPROTECT(1); return(ans); } /* psortKM() */ /*************************************************************************** HISTORY: 2012-09-10 [HB] o Added psortKM(). o Created. **************************************************************************/ matrixStats/src/sum2_lowlevel_template.h0000644000176200001440000000324514111740760020234 0ustar liggesusers/*********************************************************************** TEMPLATE: double sum2_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014-2018 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include double CONCAT_MACROS(sum2, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, int narm) { X_C_TYPE value; R_xlen_t ii; LDOUBLE sum = 0; for (ii=0; ii < nidxs; ++ii) { value = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA); #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm) { sum += (LDOUBLE)value; /* Early stopping if sum is NA_real_ (but not NaN, -Inf, or +Inf) */ if (ii % 1048576 == 0 && ISNA(sum)) break; } else if (!ISNAN(value)) { sum += (LDOUBLE)value; } #endif } /* for (ii ...) */ return (double)sum; } /*************************************************************************** HISTORY: 2015-07-11 [DJ] o Supported subsetted computation. 2014-11-06 [HB] o CLEANUP: Now sum2_() uses only basic C types. 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/colCounts.c0000644000176200001440000001137414111740760015510 0ustar liggesusers/*************************************************************************** Public methods: SEXP colCounts(SEXP x, ...) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "colCounts_lowlevel.h" #include "naming.h" SEXP colCounts(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP value, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames) { SEXP ans; int narm, hasna, what2, usenames; R_xlen_t ii, nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'value': */ if (length(value) != 1) error("Argument 'value' must be a single value."); if (!isNumeric(value)) error("Argument 'value' must be a numeric or a logical value."); /* Argument 'what': */ what2 = asInteger(what); if (what2 < 0 || what2 > 2) error("INTERNAL ERROR: Unknown value of 'what' for rowCounts: %d", what2); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* R allocate an integer vector of length 'ncol' */ /* R allocate memory for vector 'count' of length 'ncols'. This will be taken care of by the R garbage collector later on. */ double *count = (double *) R_alloc(ncols, sizeof(double)); if (isReal(x)) { colCounts_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, asReal(value), what2, narm, hasna, count); } else if (isInteger(x)) { colCounts_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, asInteger(value), what2, narm, hasna, count); } else if (isLogical(x)) { colCounts_lgl(LOGICAL(x), nrow, ncol, crows, nrows, ccols, ncols, asLogical(value), what2, narm, hasna, count); } /* Coerce counts from double to integer. This is needed because colCount_nnn() returns double counts, which is in turn is needed because count() may need to return > INT_MAX. */ PROTECT(ans = allocVector(INTSXP, ncols)); int *ans_ptr = INTEGER(ans); for (ii = 0; ii < ncols; ii++) { if (count[ii] == (double)NA_R_XLEN_T) { ans_ptr[ii] = NA_INTEGER; } else { ans_ptr[ii] = (int)count[ii]; } } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } UNPROTECT(2); return(ans); } // colCounts() SEXP count(SEXP x, SEXP idxs, SEXP value, SEXP what, SEXP naRm, SEXP hasNA) { SEXP ans; int narm, hasna, what2; R_xlen_t nx; double count = 0.0; /* Argument 'x' and 'dim': */ assertArgVector(x, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'value': */ if (length(value) != 1) error("Argument 'value' must be a single value."); if (!isNumeric(value)) error("Argument 'value' must be a numeric or a logical value."); /* Argument 'what': */ what2 = asInteger(what); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'idxs': */ R_xlen_t nrows, ncols = 1; R_xlen_t *crows = validateIndices(idxs, nx, 1, &nrows); R_xlen_t *ccols = NULL; if (isReal(x)) { colCounts_dbl(REAL(x), nx, 1, crows, nrows, ccols, ncols, asReal(value), what2, narm, hasna, &count); } else if (isInteger(x)) { colCounts_int(INTEGER(x), nx, 1, crows, nrows, ccols, ncols, asInteger(value), what2, narm, hasna, &count); } else if (isLogical(x)) { colCounts_lgl(LOGICAL(x), nx, 1, crows, nrows, ccols, ncols, asLogical(value), what2, narm, hasna, &count); } /* R allocate a scalar */ if (count > (double)INT_MAX && count != (double)NA_R_XLEN_T) { PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = count; UNPROTECT(1); } else { PROTECT(ans = allocVector(INTSXP, 1)); if (count == (double)NA_R_XLEN_T) { INTEGER(ans)[0] = NA_INTEGER; } else { INTEGER(ans)[0] = (int)count; } UNPROTECT(1); } return(ans); } // count() /*************************************************************************** HISTORY: 2015-04-21 [DJ] o Supported subsetted computation. 2014-11-14 [HB] o Created from rowCounts.c. **************************************************************************/ matrixStats/src/logSumExp_lowlevel.h0000644000176200001440000000042414111740760017372 0ustar liggesusers#include #include #include "000.utils.h" /* Native API (dynamically generated via macros): double logSumExp_double(double *x, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int hasna, R_xlen_t by, double *xx) */ #include "logSumExp_lowlevel_template.h" matrixStats/src/rowCumprods.c0000644000176200001440000000436714111740760016067 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowCumprods(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowCumprods_lowlevel.h" #include "naming.h" SEXP rowCumprods(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames) { int byrow, usenames; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowCumprods_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, byrow, REAL(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } else if (isInteger(x) | isLogical(x)) { PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowCumprods_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, byrow, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } /* rowCumprods() */ /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars.c. **************************************************************************/ matrixStats/src/weightedMedian_lowlevel_template.h0000644000176200001440000002117114111740760022262 0ustar liggesusers/*********************************************************************** TEMPLATE: double weightedMedian_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int interpolate, int ties Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include double CONCAT_MACROS(weightedMedian, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int interpolate, int ties) { X_C_TYPE value; X_C_TYPE *xtmp; double weight, res; double dx, dy, Dy; double *wtmp, *wcum, wtotal, wlow, whigh, tmp_d, tmp_d2; R_xlen_t nxt, ii, jj, half; int *idxs_int; int equalweights = 0; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Weights */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ wtmp = Calloc(nidxs, double); /* Check for missing, negative, and infite weights */ nxt = 0; for (ii=0; ii < nidxs; ii++) { /* Assume negative or missing weight by default or that the signals is missing and should be dropped */ wtmp[ii] = 0; weight = R_INDEX_GET(w, ((idxs == NULL) ? (ii) : idxs[ii]), NA_REAL); if (ISNAN(weight)) { if (!narm) { Free(wtmp); return NA_REAL; } } else if (weight <= 0) { /* Drop non-positive weights */ } else if (isinf(weight)) { /* Detected a +Inf. From now on, treat all +Inf weights equal and drop everything else */ nxt = 0; for (jj=0; jj < nidxs; jj++) { /* Assume non-infinite weight by default */ wtmp[jj] = 0; weight = R_INDEX_GET(w, ((idxs == NULL) ? (jj) : idxs[jj]), NA_REAL); if (isinf(weight)) { value = R_INDEX_GET(x, ((idxs == NULL) ? (jj) : idxs[jj]), X_NA); if (X_ISNAN(value)) { if (!narm) { Free(wtmp); return NA_REAL; } } else { /* Infinite weight, i.e. use data point */ wtmp[jj] = 1; nxt++; } } else if (ISNAN(weight)) { if (!narm) { Free(wtmp); return NA_REAL; } } } equalweights = 1; break; } else { /* A data points with a finite positive weight */ value = R_INDEX_GET(x, ((idxs == NULL) ? (ii) : idxs[ii]), X_NA); if (X_ISNAN(value)) { if (!narm) { Free(wtmp); return NA_REAL; } } else { /* A data point with a non-missing value */ wtmp[ii] = weight; nxt++; } } } /* printf("nx=%d, nxt=%d\n", nx, nxt); for (ii=0; ii < nx; ii++) printf("w[%d]=%g, wtmp[%d]=%g\n", (int)ii, (double)w[ii], (int)ii, wtmp[ii]); */ /* Nothing to do? */ if (nxt == 0) { Free(wtmp); return NA_REAL; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Copy (x,w) to work with and calculate total weight */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ xtmp = Calloc(nxt, X_C_TYPE); jj = 0; wtotal = 0; for (ii=0; ii < nidxs; ii++) { if (wtmp[ii] > 0) { /* printf("ii=%d, jj=%d, wtmp[%d]=%g\n", (int)ii, (int)jj, (int)ii, wtmp[ii]); */ xtmp[jj] = x[((idxs == NULL) ? (ii) : idxs[ii])]; // sure that xvalue is not NA wtmp[jj] = wtmp[ii]; wtotal += wtmp[jj]; jj++; } } x = xtmp; w = wtmp; nx = nxt; /* for (ii=0; ii < nx; ii++) printf("x[%d]=%g, w[%d]=%g\n", (int)ii, (double)x[ii], (int)ii, w[ii]); */ /* Early stopping? */ if (nx == 1) { res = (double)x[0]; Free(xtmp); Free(wtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* All weights equal? Happens if +Inf were detected. */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (equalweights) { half = (nx+1) / 2; /* printf("half=%d\n", (int)half); */ X_PSORT(x, nx, half); /* for (ii=0; ii < nx; ii++) printf("x[%d]=%g\n", (int)ii, (double)x[ii]); */ /* FIXME: Add support for ties here too */ if (nx % 2 == 1) { res = (double)x[half-1]; } else { X_PSORT(x, half, half-1); res = ((double)x[half-1] + (double)x[half]) / 2; } Free(xtmp); Free(wtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Sort x and calculated the cumulative sum of weights (normalize to */ /* one) according to the reordered vector. */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* (a) Sort x */ idxs_int = Calloc(nx, int); for (ii = 0; ii < nx; ii++) idxs_int[ii] = ii; X_QSORT_I(x, idxs_int, 1, nx); /* (b) Normalized cumulative weights */ wcum = Calloc(nx, double); tmp_d2 = 0; /* Index where cumulative weight passed 1/2 */ half = nx+1; /* Default is last */ if (interpolate) { /* Adjust */ for (ii = 0; ii < nx; ii++) { tmp_d = w[idxs_int[ii]] / wtotal; tmp_d2 += tmp_d; wcum[ii] = tmp_d2 - (tmp_d/2); if (wcum[ii] >= 0.5) { half = ii; /* Early stopping - no need to continue */ break; } } } else { for (ii = 0; ii < nx; ii++) { tmp_d2 += w[idxs_int[ii]] / wtotal; wcum[ii] = tmp_d2; if (tmp_d2 > 0.5) { half = ii; /* Early stopping - no need to continue */ break; } } } Free(wtmp); Free(idxs_int); /* Two special cases where more than half of the total weight is at a) the first, or b) the last value */ if (half == 0 || half == nx) { res = (double)x[half]; Free(wcum); Free(xtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Alt 1: Linearly interpolated weighted median */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (interpolate) { /* The width and the height of the "rectangle". */ dx = (double)(x[half] - x[half-1]); Dy = wcum[half] - wcum[half-1]; /* printf("dx=%g, Dy=%g\n", dx, Dy); */ /* The width and the height of the triangle which upper corner touches the level where the cumulative sum of weights *equals* half the total weight. */ dy = 0.5 - wcum[half]; dx = (dy/Dy) * dx; /* printf("dx=%g, dy=%g\n", dx, dy); */ /* The corresponding x value */ res = dx + x[half]; Free(wcum); Free(xtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Alt 2: Classical weighted median (tied or not) */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* At this point we know that: 1) at most half the total weight is in the set x[1:half], 2) that the set x[(half+2):n] contains less than half the total weight The question is whether x[(half+1):n] contains *more* than half the total weight (try x=c(1,2,3), w=c(1,1,1)). If it is then we can be sure that x[half+1] is the weighted median we are looking for, otherwise it is any function of x[k:(half+1)]. */ wlow = wcum[half-1]; whigh = 1 - wlow; /* printf("half=%d, wtotal=%g, wlow=%g, whigh=%g, ties=%d\n", half, (double)wtotal, (double)wlow, (double)whigh, ties); printf("x[half+(-1:1)]=c(%g, %g, %g)\n", x[half-1-1], x[half-1], x[half-1+1]); */ if (whigh > 0.5) { /* printf("matrixStats2: Not a tie!\n"); */ /* Not a tie */ res = x[half]; } else { /* printf("matrixStats2: A tie!\n"); */ /* A tie! */ if (ties == 1) { /* weighted */ /* printf("ties=%d, half=%d, wlow*x[half]=%g, whigh*x[half+1]=%g\n", ties, half, wlow*x[half-1], whigh*x[half]); */ res = wlow*(double)x[half-1] + whigh*(double)x[half]; } else if (ties == 2) { /* min */ res = (double)x[half-1]; } else if (ties == 4) { /* max */ res = (double)x[half]; } else if (ties == 8) { /* mean */ res = ((double)x[half-1] + (double)x[half]) / 2; } else { error("Unknown value of argument 'ties': %d", ties); } } Free(wcum); Free(xtmp); return res; } /*************************************************************************** HISTORY: 2015-07-09 [DJ] o Supported subsetted computation. 2015-01-01 [HB] o Created. **************************************************************************/ matrixStats/src/rowDiffs.c0000644000176200001440000000626314111740760015323 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowDiffs(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowDiffs_lowlevel.h" #include "naming.h" SEXP rowDiffs(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP lag, SEXP differences, SEXP byRow, SEXP useNames) { int byrow, usenames; SEXP ans = NILSXP; R_xlen_t lagg, diff; R_xlen_t nrow, ncol; R_xlen_t nrow_ans, ncol_ans; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'lag': */ lagg = asInteger(lag); if (lagg < 1) { error("Argument 'lag' must be a positive integer."); } /* Argument 'differences': */ diff = asInteger(differences); if (diff < 1) { error("Argument 'differences' must be a positive integer."); } /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Dimension of result matrix */ if (byrow) { nrow_ans = nrows; ncol_ans = (R_xlen_t)((double)ncols - ((double)diff*(double)lagg)); if (ncol_ans < 0) ncol_ans = 0; } else { nrow_ans = (R_xlen_t)((double)nrows - ((double)diff*(double)lagg)); if (nrow_ans < 0) nrow_ans = 0; ncol_ans = ncols; } if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrow_ans, ncol_ans)); rowDiffs_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, byrow, lagg, diff, REAL(ans), nrow_ans, ncol_ans); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { set_rowDiffs_Dimnames(ans, dimnames, nrows, crows, ncols, ncol_ans, ccols); } else { set_colDiffs_Dimnames(ans, dimnames, nrows, nrow_ans, crows, ncols, ccols); } } } UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocMatrix(INTSXP, nrow_ans, ncol_ans)); rowDiffs_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, byrow, lagg, diff, INTEGER(ans), nrow_ans, ncol_ans); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { set_rowDiffs_Dimnames(ans, dimnames, nrows, crows, ncols, ncol_ans, ccols); } else { set_colDiffs_Dimnames(ans, dimnames, nrows, nrow_ans, crows, ncols, ccols); } } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } /* rowDiffs() */ /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/x_OP_y_lowlevel.h0000644000176200001440000001535514111740760016655 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void x_OP_y_Add_int_int(int *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Add_int_dbl(int *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Add_dbl_int(double *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Add_dbl_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Sub_int_int(int *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Sub_int_dbl(int *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Sub_dbl_int(double *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Sub_dbl_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Mul_int_int(int *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Mul_int_dbl(int *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Mul_dbl_int(double *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Mul_dbl_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Div_int_int(int *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Div_int_dbl(int *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Div_dbl_int(double *x, R_xlen_t nrow, R_xlen_t ncol, int *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) void x_OP_y_Div_dbl_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, double *y, R_xlen_t ny, R_xlen_t *xrows, R_xlen_t nxrows, R_xlen_t *xcols, R_xlen_t nxcols, R_xlen_t *yidxs, R_xlen_t nyidxs, int byrow, int commute, int narm, int hasna, int *ans, R_xlen_t n) */ /* Addition */ #define METHOD x_OP_y_Add #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'i' #define OP '+' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '+' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '+' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '+' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD /* Subtraction */ #define METHOD x_OP_y_Sub #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'i' #define OP '-' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '-' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '-' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '-' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD /* Multiplication */ #define METHOD x_OP_y_Mul #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'i' #define OP '*' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '*' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '*' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '*' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD /* Division */ #define METHOD x_OP_y_Div #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_lowlevel_template.h" #include "000.templates-types_undef.h" #undef METHOD matrixStats/src/binMeans_lowlevel_template.h0000644000176200001440000001211414063411362021074 0ustar liggesusers/*************************************************************************** TEMPLATE: binMeans_(...) GENERATES: void binMeans_L(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) void binMeans_R(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - BIN_BY: 'L' or 'R' Copyright Henrik Bengtsson, 2012-2013 **************************************************************************/ #include "000.types.h" #if BIN_BY == 'L' /* [u,v) */ #define METHOD_NAME binMeans_L #define IS_PART_OF_FIRST_BIN(x, bx0) (x < bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x >= bx1) #elif BIN_BY == 'R' /* (u,v] */ #define METHOD_NAME binMeans_R #define IS_PART_OF_FIRST_BIN(x, bx0) (x <= bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x > bx1) #endif void METHOD_NAME(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) { R_xlen_t ii = 0, jj = 0, iStart=0; R_xlen_t n = 0; LDOUBLE sum = 0.0; int warn = 0; // Count? if (nbins > 0) { // Skip to the first bin while ((iStart < nx) && IS_PART_OF_FIRST_BIN(x[iStart], bx[0])) { ++iStart; } // For each x... for (ii = iStart; ii < nx; ++ii) { // Skip to a new bin? while (IS_PART_OF_NEXT_BIN(x[ii], bx[jj+1])) { // Update statistic of current bin? if (count) { /* Although unlikely, with long vectors the count for a bin can become greater than what is possible to represent by an integer. Detect and warn about this. */ if (n > R_INT_MAX) { warn = 1; count[jj] = R_INT_MAX; } else { count[jj] = n; } } ans[jj] = n > 0 ? sum / n : R_NaN; sum = 0.0; n = 0; // ...and move to next ++jj; // No more bins? if (jj >= nbins) { // Make the outer for-loop to exit... ii = nx - 1; // ...but correct for the fact that the y[nx-1] point will // be incorrectly added to the sum. Doing the correction // here avoids an if (ii < nx) sum += y[ii] below. sum -= y[ii]; break; } } // Sum and count sum += y[ii]; ++n; /* Early LDOUBLE stopping? */ if (n % 1048576 == 0 && !R_FINITE(sum)) break; } // Update statistic of the last bin? if (jj < nbins) { if (count) { /* Although unlikely, with long vectors the count for a bin can become greater than what is possible to represent by an integer. Detect and warn about this. */ if (n > R_INT_MAX) { warn= 1; count[jj] = R_INT_MAX; } else { count[jj] = n; } } ans[jj] = n > 0 ? sum / n : R_NaN; // Assign the remaining bins to zero counts and missing mean values while (++jj < nbins) { ans[jj] = R_NaN; if (count) count[jj] = 0; } } } // if (nbins > 0) if (warn) { warning("Integer overflow. Detected one or more bins with a count that is greater than what can be represented by the integer data type. Setting count to the maximum integer possible (.Machine$integer.max = %d). The bin mean is still correct.", R_INT_MAX); } } /* Undo template macros */ #undef BIN_BY #undef IS_PART_OF_FIRST_BIN #undef IS_PART_OF_NEXT_BIN #include "000.templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-07 [HB] o ROBUSTNESS: Added protection for integer overflow in bin counts. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-10-01 [HB] o BUG FIX: binMeans() returned 0.0 instead of NA_real_ for empty bins. 2014-04-04 [HB] o BUG FIX: The native code of binMeans(x, bx) would try to access an out-of-bounds value of argument 'y' iff 'x' contained elements that are left of all bins in 'bx'. This bug had no impact on the results and since no assignment was done it should also not crash/ core dump R. This was discovered thanks to new memtests (ASAN and valgrind) provided by CRAN. 2013-10-08 [HB] o Created template for binMeans_() to create functions that bin either by [u,v) or (u,v]. 2013-05-10 [HB] o SPEEDUP: binMeans() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binMeans() would return random/garbage means/counts for bins that were beyond the last data point. o BUG FIX: In some cases binMeans() could try to go past the last bin. 2012-10-03 [HB] o Created binMeans(), which was adopted from from code proposed by Martin Morgan (Fred Hutchinson Cancer Research Center, Seattle) as a reply to HB's R-devel thread 'Fastest non-overlapping binning mean function out there?' on Oct 3, 2012. **************************************************************************/ matrixStats/src/weightedMedian_lowlevel.h0000644000176200001440000000120014111740760020356 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include #include "000.macros.h" /* Native API (dynamically generated via macros): double weightedMedian_int(int *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int interpolate, int ties) double weightedMedian_dbl(double *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int interpolate, int ties) */ #define X_TYPE 'i' #include "weightedMedian_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "weightedMedian_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/allocMatrix2.c0000644000176200001440000001062114063411362016071 0ustar liggesusers#include #include "000.types.h" #include /* Checks whether setting bytes of an int/double to all zeroes corresponds to assigning a zero value. Note that the bit representation of int's and double's may not be the same on all architectures. */ int memset_zero_ok_int() { int t = 1; memset(&t, 0, sizeof(t)); return (t == 0); } int memset_zero_ok_double() { double t = 1; memset(&t, 0, sizeof(t)); return (t == 0); } /* For debugging purposes */ /* SEXP memsetZeroable() { SEXP ans; PROTECT(ans = allocVector(LGLSXP, 2)); LOGICAL(ans)[1] = memset_zero_ok_int(); LOGICAL(ans)[2] = memset_zero_ok_double(); UNPROTECT(1); return(ans); } */ void fillWithValue(SEXP ans, SEXP value) { R_xlen_t i, n; SEXPTYPE type; double *ans_ptr_d, value_d; int *ans_ptr_i, value_i; int *ans_ptr_l, value_l; /* Argument 'ans': */ if (!isVectorAtomic(ans)) { error("Argument 'ans' must be a vector."); } n = xlength(ans); /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); switch (type) { case INTSXP: value_i = asInteger(value); ans_ptr_i = INTEGER(ans); if (value_i == 0 && memset_zero_ok_int()) { memset(ans_ptr_i, 0, n*sizeof(value_i)); } else { for (i=0; i < n; i++) ans_ptr_i[i] = value_i; } break; case REALSXP: value_d = asReal(value); ans_ptr_d = REAL(ans); if (value_d == 0 && memset_zero_ok_double()) { memset(ans_ptr_d, 0, n*sizeof(value_d)); } else { for (i=0; i < n; i++) ans_ptr_d[i] = value_d; } break; case LGLSXP: value_l = asLogical(value); ans_ptr_l = LOGICAL(ans); if (value_l == 0 && memset_zero_ok_int()) { memset(ans_ptr_l, 0, n*sizeof(value_l)); } else { for (i=0; i < n; i++) ans_ptr_l[i] = value_l; } break; default: error("Argument 'value' must be either of type integer, numeric or logical."); break; } } /* fillWithValue() */ SEXP allocVector2(SEXP length, SEXP value) { SEXP ans; SEXPTYPE type; R_xlen_t n = 0; /* Argument 'length': */ if (isInteger(length) && xlength(length) == 1) { n = (R_xlen_t)asInteger(length); } else if (isReal(length) && xlength(length) == 1) { n = (R_xlen_t)asReal(length); } else { error("Argument 'length' must be a single numeric."); } if (n < 0) error("Argument 'length' is negative."); /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); PROTECT(ans = allocVector(type, n)); fillWithValue(ans, value); UNPROTECT(1); return(ans); } /* allocVector2() */ SEXP allocMatrix2(SEXP nrow, SEXP ncol, SEXP value) { SEXP ans; SEXPTYPE type; int nc, nr; /* Argument 'nrow' & 'ncol': */ if (!isInteger(nrow) || xlength(nrow) != 1) { error("Argument 'nrow' must be a single integer."); } if (!isInteger(ncol) || xlength(ncol) != 1) { error("Argument 'ncol' must be a single integer."); } nr = asInteger(nrow); nc = asInteger(ncol); if (nr < 0) error("Argument 'nrow' is negative."); if (nr < 0) error("Argument 'ncol' is negative."); /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); PROTECT(ans = allocMatrix(type, nr, nc)); fillWithValue(ans, value); UNPROTECT(1); return(ans); } /* allocMatrix2() */ SEXP allocArray2(SEXP dim, SEXP value) { SEXP ans; SEXPTYPE type; int i, d; double nd = 1.0; R_xlen_t n; /* Argument 'dim': */ if (!isInteger(dim) || xlength(dim) == 0) { error("Argument 'dim' must be an integer vector of at least length one."); } for (i = 0; i < xlength(dim); i++) { d = INTEGER(dim)[i]; nd *= d; #ifndef LONG_VECTOR_SUPPORT if (nd > R_INT_MAX) { error("Argument 'dim' specifies too many elements: %.g > %d", nd, R_INT_MAX); } #endif } n = (R_xlen_t)nd; /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); PROTECT(dim = duplicate(dim)); PROTECT(ans = allocVector(type, n)); fillWithValue(ans, value); setAttrib(ans, R_DimSymbol, dim); UNPROTECT(2); return(ans); } /* allocArray2() */ matrixStats/src/rowMads.c0000644000176200001440000000530314111740760015146 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowMads(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowMads_lowlevel.h" #include "naming.h" SEXP rowMads(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP constant, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) { int narm, hasna, byrow, usenames; SEXP ans; R_xlen_t nrow, ncol; double scale; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'constant': */ if (!isNumeric(constant)) error("Argument 'constant' must be a numeric scale."); scale = asReal(constant); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); if (!byrow) { SWAP(R_xlen_t, nrow, ncol); SWAP(R_xlen_t*, crows, ccols); SWAP(R_xlen_t, nrows, ncols); } /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowMads_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, scale, narm, hasna, byrow, REAL(ans)); } else if (isInteger(x)) { rowMads_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, scale, narm, hasna, byrow, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(2); return(ans); } /* rowMads() */ /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-11-17 [HB] o Created from rowMedians.c. **************************************************************************/ matrixStats/src/000.init.c0000644000176200001440000000237514111740760015001 0ustar liggesusers#include #include #include "000.api.h" #define CALLDEF(name, n) {#name, (DL_FUNC) &name, n} static R_CallMethodDef callMethods[] = { CALLDEF(allocArray2, 2), CALLDEF(allocMatrix2, 3), CALLDEF(allocVector2, 2), CALLDEF(anyMissing, 2), CALLDEF(binCounts, 3), CALLDEF(binMeans, 5), CALLDEF(colCounts, 9), CALLDEF(colOrderStats, 6), CALLDEF(colRanges, 8), CALLDEF(count, 6), CALLDEF(diff2, 4), CALLDEF(indexByRow, 2), CALLDEF(logSumExp, 4), CALLDEF(mean2, 4), CALLDEF(productExpSumLog, 4), CALLDEF(psortKM, 3), CALLDEF(rowCounts, 9), CALLDEF(rowCummaxs, 6), CALLDEF(rowCummins, 6), CALLDEF(rowCumprods, 6), CALLDEF(rowCumsums, 6), CALLDEF(rowDiffs, 8), CALLDEF(rowLogSumExps, 8), CALLDEF(rowMads, 9), CALLDEF(rowMeans2, 8), CALLDEF(rowMedians, 8), CALLDEF(rowOrderStats, 6), CALLDEF(rowRanges, 8), CALLDEF(rowRanksWithTies, 7), CALLDEF(rowSums2, 8), CALLDEF(rowVars, 8), CALLDEF(signTabulate, 2), CALLDEF(sum2, 4), CALLDEF(validate, 3), CALLDEF(weightedMean, 5), CALLDEF(weightedMedian, 6), CALLDEF(x_OP_y, 11), {NULL, NULL, 0} }; void R_init_matrixStats(DllInfo *info) { R_registerRoutines(info, NULL, callMethods, NULL, NULL); R_useDynamicSymbols(info, FALSE); } matrixStats/src/weightedMean.c0000644000176200001440000000330214111740760016130 0ustar liggesusers/*************************************************************************** Public methods: SEXP weightedMean(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP refine) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include #include "weightedMean_lowlevel.h" SEXP weightedMean(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP refine) { SEXP ans; int narm, refine2; double avg = NA_REAL; R_xlen_t nx, nw; /* Argument 'x': */ assertArgVector(x, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'x': */ assertArgVector(w, (R_TYPE_REAL), "w"); nw = xlength(w); if (nx != nw) { error("Argument 'x' and 'w' are of different lengths: %d != %d", nx, nw); } /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'refine': */ refine2 = asLogicalNoNA(refine, "refine"); /* Argument 'idxs': */ R_xlen_t nidxs; R_xlen_t *cidxs = validateIndices(idxs, nx, 1, &nidxs); /* Double matrices are more common to use. */ if (isReal(x)) { avg = weightedMean_dbl(REAL(x), nx, REAL(w), cidxs, nidxs, narm, refine2); } else if (isInteger(x) | isLogical(x)) { avg = weightedMean_int(INTEGER(x), nx, REAL(w), cidxs, nidxs, narm, refine2); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = avg; UNPROTECT(1); return(ans); } // weightedMean() /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-12-08 [HB] o Created. **************************************************************************/ matrixStats/src/000.types.h0000644000176200001440000000207314063411362015201 0ustar liggesusers#include /* R_xlen_t, ... */ #ifndef R_INT_MIN #define R_INT_MIN -INT_MAX #endif #ifndef R_INT_MAX #define R_INT_MAX INT_MAX #endif /* inf */ #ifndef IS_INF #define IS_INF(x) (x == R_PosInf || x == R_NegInf) #endif /* Subsetting index mode */ #ifndef SUBSETTED_MODE_INDEX #define SUBSETTED_MODE_INDEX #define SUBSETTED_ALL 0 #define SUBSETTED_INTEGER 1 #define SUBSETTED_REAL 2 #endif /* As in /src/include/Defn.h */ #ifdef HAVE_LONG_DOUBLE #define LDOUBLE long double #else #define LDOUBLE double #endif /* Backward compatibility with R (< 3.0.0) As in /src/include/Rinternals.h */ #ifndef R_XLEN_T_MAX typedef int R_xlen_t; #define R_XLEN_T_MAX R_LEN_T_MAX #ifndef xlength #define xlength length #endif #endif /* define NA_R_XLEN_T */ #ifdef LONG_VECTOR_SUPPORT #define R_XLEN_T_MIN -R_XLEN_T_MAX-1 #define NA_R_XLEN_T R_XLEN_T_MIN #else #define NA_R_XLEN_T NA_INTEGER #endif /* Macro to check for user interrupts every 2^20 iteration */ #define R_CHECK_USER_INTERRUPT(i) if (i % 1048576 == 0) R_CheckUserInterrupt() matrixStats/src/rowCumMinMaxs_lowlevel_template.h0000644000176200001440000000751314111740760022121 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowCummins_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int byrow, ANS_C_TYPE *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN) */ #include "000.templates-types.h" #if COMP == '<' #define OP < #elif COMP == '>' #define OP > #endif void CONCAT_MACROS(METHOD, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int byrow, ANS_C_TYPE *ans) { R_xlen_t ii, jj, kk, kk_prev, idx; R_xlen_t colBegin; ANS_C_TYPE value; int ok; int *oks = NULL; if (ncols == 0 || nrows == 0) return; if (byrow) { oks = (int *) R_alloc(nrows, sizeof(int)); colBegin = R_INDEX_OP(((cols == NULL) ? (0) : cols[0]), *, nrow); for (kk=0; kk < nrows; kk++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (kk) : rows[kk])); value = (ANS_C_TYPE) R_INDEX_GET(x, idx, X_NA); if (ANS_ISNAN(value)) { oks[kk] = 0; value = ANS_NA; ans[kk] = ANS_NA; } else { oks[kk] = 1; ans[kk] = value; } } kk_prev = 0; for (jj=1; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); value = (ANS_C_TYPE) R_INDEX_GET(x, idx, X_NA); if (oks[ii]) { if (ANS_ISNAN(value)) { oks[ii] = 0; ans[kk] = ANS_NA; } else { if (value OP ans[kk_prev]) { ans[kk] = value; } else { ans[kk] = (ANS_C_TYPE) ans[kk_prev]; } } } else { ans[kk] = ANS_NA; } kk++; kk_prev++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } else { kk = 0; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (0) : rows[0])); value = (ANS_C_TYPE) R_INDEX_GET(x, idx, X_NA); if (ANS_ISNAN(value)) { ok = 0; value = ANS_NA; ans[kk] = ANS_NA; } else { ok = 1; ans[kk] = value; } kk_prev = kk; kk++; for (ii=1; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); value = (ANS_C_TYPE) R_INDEX_GET(x, idx, X_NA); if (ok) { if (ANS_ISNAN(value)) { ok = 0; value = ANS_NA; ans[kk] = ANS_NA; } else { if (value OP ans[kk_prev]) { ans[kk] = value; } else { ans[kk] = (ANS_C_TYPE) ans[kk_prev]; } } kk++; kk_prev++; } else { ans[kk] = ANS_NA; kk++; } R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } /* if (byrow) */ } #undef OP /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars_TYPE-template.h. **************************************************************************/ matrixStats/src/rowMedians.c0000644000176200001440000000706014111740760015644 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowMedians(SEXP x, ...) Authors: Adopted from rowQuantiles.c by R. Gentleman. Copyright Henrik Bengtsson, 2007 **************************************************************************/ #include #include "000.types.h" #include "rowMedians_lowlevel.h" #include "naming.h" SEXP rowMedians(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) { int narm, hasna, byrow, usenames; SEXP ans; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); /* Get dimensions of 'x'. */ nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); if (!byrow) { SWAP(R_xlen_t, nrow, ncol); SWAP(R_xlen_t*, crows, ccols); SWAP(R_xlen_t, nrows, ncols); } /* R allocate a double vector of length 'nrows' Note that 'nrows' means 'ncols' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrows)); /* Double matrices are more common to use. */ if (isReal(x)) { rowMedians_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, narm, hasna, byrow, REAL(ans)); } else if (isInteger(x)) { rowMedians_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, narm, hasna, byrow, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(2); return(ans); } /* rowMedians() */ /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2013-01-13 [HB] o Added argument 'byRow' to rowMedians() and dropped colMedians(). o Using internal arguments 'by_row' instead of 'by_column'. 2011-12-11 [HB] o BUG FIX: rowMediansReal(..., na.rm=TRUE) did not handle NaN:s, only NA:s. Note that NaN:s does not exist for integers. 2011-10-12 [HJ] o Added colMedians(). o Now rowMediansInteger/Real() can operate also by columns, cf. argument 'by_column'. 2007-08-14 [HB] o Added checks for user interrupts every 1000 line. o Added argument 'hasNA' to rowMedians(). 2005-12-07 [HB] o BUG FIX: When calculating the median of an even number (non-NA) values, the length of the second sort was one element too short, which made the method to freeze, i.e. rPsort(rowData, qq, qq) is now (...qq+1, qq). 2005-11-24 [HB] o By implementing a special version for integers, there is no need to coerce to double in R, which would take up twice the amount of memory. o rowMedians() now handles NAs too. o Adopted from rowQuantiles.c in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/weightedMean_lowlevel.h0000644000176200001440000000110114111740760020041 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): double weightedMean_int(int *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int refine) double weightedMean_dbl(double *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int refine) */ #define X_TYPE 'i' #include "weightedMean_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "weightedMean_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowRanges_lowlevel_template.h0000644000176200001440000001534514111740760021321 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowRanges_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(rowRanges, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted) { R_xlen_t ii, jj; R_xlen_t colBegin, idx; X_C_TYPE value, *mins = NULL, *maxs = NULL; int *skip = NULL; /* Rprintf("(nrow,ncol)=(%d,%d), what=%d\n", nrow, ncol, what); */ /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; if (hasna) { skip = (int *) R_alloc(nrows, sizeof(int)); for (ii=0; ii < nrows; ii++) { is_counted[ii] = 0; skip[ii] = 0; } /* Missing values */ if (what == 0) { /* rowMins() */ mins = ans; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { if (!narm && skip[ii]) continue; idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); value = R_INDEX_GET(x, idx, X_NA); if (X_ISNAN(value)) { if (!narm) { mins[ii] = value; is_counted[ii] = 1; /* Early stopping? */ #if X_TYPE == 'i' skip[ii] = 1; #elif X_TYPE == 'r' if (X_ISNA(value)) skip[ii] = 1; #endif } } else if (!is_counted[ii]) { mins[ii] = value; is_counted[ii] = 1; } else if (value < mins[ii]) { mins[ii] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (ii=0; ii < nrows; ii++) { if (!is_counted[ii]) { mins[ii] = R_PosInf; } } #endif } else if (what == 1) { /* rowMaxs() */ maxs = ans; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { if (!narm && skip[ii]) continue; idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); value = R_INDEX_GET(x, idx, X_NA); if (X_ISNAN(value)) { if (!narm) { maxs[ii] = value; is_counted[ii] = 1; /* Early stopping? */ #if X_TYPE == 'i' skip[ii] = 1; #elif X_TYPE == 'r' if (X_ISNA(value)) skip[ii] = 1; #endif } } else if (!is_counted[ii]) { maxs[ii] = value; is_counted[ii] = 1; } else if (value > maxs[ii]) { maxs[ii] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (ii=0; ii < nrows; ii++) { if (!is_counted[ii]) { maxs[ii] = R_NegInf; } } #endif } else if (what == 2) { /* rowRanges() */ mins = ans; maxs = &ans[nrows]; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { if (!narm && skip[ii]) continue; idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); value = R_INDEX_GET(x, idx, X_NA); if (X_ISNAN(value)) { if (!narm) { mins[ii] = value; maxs[ii] = value; is_counted[ii] = 1; /* Early stopping? */ #if X_TYPE == 'i' skip[ii] = 1; #elif X_TYPE == 'r' if (X_ISNA(value)) skip[ii] = 1; #endif } } else if (!is_counted[ii]) { mins[ii] = value; maxs[ii] = value; is_counted[ii] = 1; } else if (value < mins[ii]) { mins[ii] = value; } else if (value > maxs[ii]) { maxs[ii] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (ii=0; ii < nrows; ii++) { if (!is_counted[ii]) { mins[ii] = R_PosInf; maxs[ii] = R_NegInf; } } #endif } /* if (what ...) */ } else { /* No missing values */ if (what == 0) { /* rowMins() */ mins = ans; /* Initiate results */ for (ii=0; ii < nrows; ii++) { mins[ii] = x[ii]; } for (jj=1; jj < ncols; jj++) { colBegin = ((cols == NULL) ? (jj) : cols[jj]) * nrow; for (ii=0; ii < nrows; ii++) { value = x[((rows == NULL) ? (ii) : rows[ii])+colBegin]; if (value < mins[ii]) mins[ii] = value; } } } else if (what == 1) { /* rowMax() */ maxs = ans; /* Initiate results */ for (ii=0; ii < nrows; ii++) { maxs[ii] = x[ii]; } for (jj=1; jj < ncols; jj++) { colBegin = ((cols == NULL) ? (jj) : cols[jj]) * nrow; for (ii=0; ii < nrows; ii++) { value = x[((rows == NULL) ? (ii) : rows[ii])+colBegin]; if (value > maxs[ii]) maxs[ii] = value; } } } else if (what == 2) { /* rowRanges()*/ mins = ans; maxs = &ans[nrows]; /* Initiate results */ for (ii=0; ii < nrows; ii++) { mins[ii] = x[ii]; maxs[ii] = x[ii]; } for (jj=1; jj < ncols; jj++) { colBegin = ((cols == NULL) ? (jj) : cols[jj]) * nrow; for (ii=0; ii < nrows; ii++) { value = x[((rows == NULL) ? (ii) : rows[ii])+colBegin]; if (value < mins[ii]) { mins[ii] = value; } else if (value > maxs[ii]) { maxs[ii] = value; } } } } /* if (what ...) */ } /* if (narm) */ } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/colCounts_lowlevel_template.h0000644000176200001440000001310014111740760021306 0ustar liggesusers/*********************************************************************** TEMPLATE: void colCounts_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, X_C_TYPE value, int what, int narm, int hasna, double *ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i', 'r', or 'l' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(colCounts, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, X_C_TYPE value, int what, int narm, int hasna, double *ans) { R_xlen_t ii, jj; R_xlen_t colBegin, idx; R_xlen_t count; X_C_TYPE xvalue; if (what == 0L) { /* all */ /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); count = 1; for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); if (!X_ISNAN(R_INDEX_GET(x, idx, X_NA))) { count = 0; /* Found another value! Early stopping */ break; } } ans[jj] = (double)count; } } else { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); count = 1; for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); if (xvalue == value) { } else if (narm && X_ISNAN(xvalue)) { /* Skip */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is not 'value' later, then we know for sure that all = FALSE regardless of missing values. In other words, at this point the answer can be either NA or FALSE.*/ count = NA_R_XLEN_T; } else { count = 0; /* Found another value! Early stopping */ break; } } /* for (ii ...) */ ans[jj] = (double)count; } /* for (jj ...) */ } /* if (X_ISNAN(value)) */ } else if (what == 1L) { /* any */ /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); count = 0; for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); if (X_ISNAN(R_INDEX_GET(x, idx, X_NA))) { count = 1; /* Found value! Early stopping */ break; } } ans[jj] = (double)count; } } else { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); count = 0; for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); if (xvalue == value) { count = 1; /* Found value! Early stopping */ break; } else if (narm && X_ISNAN(xvalue)) { /* Skipping */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is 'value' later, then we know for sure that any = TRUE regardless of missing values. In other words, at this point the answer can be either NA or TRUE.*/ count = NA_R_XLEN_T; } } /* for (ii ...) */ ans[jj] = (double)count; } /* for (jj ...) */ } /* if (X_ISNAN(value)) */ } else if (what == 2L) { /* count */ /* Count missing values? [sic!] */ if (X_ISNAN(value)) { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); count = 0; for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); if (X_ISNAN(R_INDEX_GET(x, idx, X_NA))) { ++count; } } ans[jj] = (double)count; } } else { for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); count = 0; for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue = R_INDEX_GET(x, idx, X_NA); if (xvalue == value) { ++count; } else if (!narm && X_ISNAN(xvalue)) { count = NA_R_XLEN_T; /* Early stopping */ break; } } /* for (ii ...) */ ans[jj] = (double)count; } /* for (jj ...) */ } /* if (X_ISNAN(value)) */ } /* if (what) */ } /*************************************************************************** HISTORY: 2015-04-18 [DJ] o Supported subsetted computation. 2014-11-14 [HB] o Created colCounts() templates from rowCounts() templates. **************************************************************************/ matrixStats/src/rowCumsums.c0000644000176200001440000000436114111740760015721 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowCumsums(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowCumsums_lowlevel.h" #include "naming.h" SEXP rowCumsums(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames) { int byrow, usenames; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Argument 'useNames': */ usenames = asLogical(useNames); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrows, ncols)); rowCumsums_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, byrow, REAL(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } else if (isInteger(x) | isLogical(x)) { PROTECT(ans = allocMatrix(INTSXP, nrows, ncols)); rowCumsums_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, byrow, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { setDimnames(ans, dimnames, nrows, crows, ncols, ccols, FALSE); } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } /* rowCumsums() */ /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-26 [HB] o Created from rowVars.c. **************************************************************************/ matrixStats/src/weightedMedian.c0000644000176200001440000000352714111740760016456 0ustar liggesusers/*************************************************************************** Public methods: SEXP weightedMedian(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP interpolate, SEXP ties) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include #include "weightedMedian_lowlevel.h" SEXP weightedMedian(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP interpolate, SEXP ties) { SEXP ans; int narm, interpolate2, ties2; double mu = NA_REAL; R_xlen_t nx, nw; /* Argument 'x': */ assertArgVector(x, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'x': */ assertArgVector(w, (R_TYPE_REAL), "w"); nw = xlength(w); if (nx != nw) { error("Argument 'x' and 'w' are of different lengths: %d != %d", nx, nw); } /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'interpolate': */ interpolate2 = asLogicalNoNA(interpolate, "interpolate"); /* Argument 'idxs': */ R_xlen_t nidxs; R_xlen_t *cidxs = validateIndices(idxs, nx, 1, &nidxs); /* Argument 'ties': */ ties2 = asInteger(ties); /* Double matrices are more common to use. */ if (isReal(x)) { mu = weightedMedian_dbl(REAL(x), nx, REAL(w), cidxs, nidxs, narm, interpolate2, ties2); } else if (isInteger(x) | isLogical(x)) { mu = weightedMedian_int(INTEGER(x), nx, REAL(w), cidxs, nidxs, narm, interpolate2, ties2); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = mu; UNPROTECT(1); return(ans); } // weightedMedian() /*************************************************************************** HISTORY: 2015-07-09 [DJ] o Supported subsetted computation. 2015-01-01 [HB] o Created. **************************************************************************/ matrixStats/src/000.macros.h0000644000176200001440000000031314111740760015315 0ustar liggesusers#ifndef _MACROS_H_ #define _MACROS_H_ #define CONCAT(x,y) x ##_## y #define CONCAT_MACROS(x,y) CONCAT(x,y) #define QUOTE(str) #str #define QUOTE_MACROS(str) QUOTE(str) #endif /* END OF _MACROS_H_ */ matrixStats/src/rowOrderStats.c0000644000176200001440000000712314111764441016360 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames) Authors: Henrik Bengtsson. Adopted from rowQ() by R. Gentleman. To do: Add support for missing values. Copyright Henrik Bengtsson, 2007-2014 **************************************************************************/ #include #include "000.types.h" #include "rowOrderStats_lowlevel.h" #include "naming.h" SEXP rowOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames) { SEXP ans = NILSXP; R_xlen_t nrow, ncol, qq; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'which': */ if (length(which) != 1) error("Argument 'which' must be a single number."); if (!isNumeric(which)) error("Argument 'which' must be a numeric number."); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; int rowsHasna, colsHasna; R_xlen_t *crows = validateIndicesCheckNA(rows, nrow, 0, &nrows, &rowsHasna); R_xlen_t *ccols = validateIndicesCheckNA(cols, ncol, 0, &ncols, &colsHasna); /* Argument 'useNames': */ int usenames = asLogical(useNames); // Check missing rows if (rowsHasna && ncols > 0) { error("Argument 'rows' must not contain missing value"); } // Check missing cols if (colsHasna && nrows > 0) { error("Argument 'cols' must not contain missing value"); } /* Subtract one here, since rPsort does zero based addressing */ qq = asInteger(which) - 1; /* Assert that 'qq' is a valid index */ if (qq < 0 || qq >= ncols) { error("Argument 'which' is out of range: %d", qq + 1); } /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, nrows)); rowOrderStats_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, qq, REAL(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(INTSXP, nrows)); rowOrderStats_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, qq, INTEGER(ans)); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } UNPROTECT(1); } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } // rowOrderStats() /*************************************************************************** HISTORY: 2015-07-11 [DJ] o Supported subsetted computation. 2009-02-04 [HB] o BUG FIX: For some errors in rowOrderStats(), the stack would not become UNPROTECTED before calling error. 2008-03-25 [HB] o Renamed from 'rowQuantiles' to 'rowOrderStats'. 2007-08-10 [HB] o Removed arguments for NAs since rowOrderStats() still don't support it. 2005-11-24 [HB] o Cool, it works and compiles nicely. o Preallocate colOffset to speed up things even more. o Added more comments and error checking. o Adopted from rowQ() in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/rowLogSumExp.c0000644000176200001440000000517514111740760016154 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowLogSumExps(SEXP lx, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2013-2014 **************************************************************************/ #include #include "000.types.h" #include "rowLogSumExp_lowlevel.h" #include "naming.h" SEXP rowLogSumExps(SEXP lx, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames) { SEXP ans; int narm, hasna, byrow, usenames; R_xlen_t nrow, ncol; /* Coercion moved down to C */ PROTECT(lx = coerceVector(lx, REALSXP)); PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'lx' and 'dim': */ assertArgMatrix(lx, dim, (R_TYPE_REAL), "lx"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'byRow': */ byrow = asLogical(byRow); if (byrow) { ans = PROTECT(allocVector(REALSXP, nrows)); rowLogSumExps_double(REAL(lx), nrow, ncol, crows, nrows, ccols, ncols, narm, hasna, 1, REAL(ans)); } else { ans = PROTECT(allocVector(REALSXP, ncols)); rowLogSumExps_double(REAL(lx), nrow, ncol, crows, nrows, ccols, ncols, narm, hasna, 0, REAL(ans)); } /* Argument 'useNames': */ usenames = asLogical(useNames); if (usenames == NA_LOGICAL || usenames){ SEXP dimnames = getAttrib(lx, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (byrow) { SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } else { SEXP namesVec = VECTOR_ELT(dimnames, 1); if (namesVec != R_NilValue) { setNames(ans, namesVec, ncols, ccols); } } } } UNPROTECT(3); /* ans = PROTECT(...), PROTECT(lx = ...), PROTECT(dim = ...) */ return(ans); } /* rowLogSumExps() */ /*************************************************************************** HISTORY: 2015-06-12 [DJ] o Supported subsetted computation. 2013-05-02 [HB] o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the 'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission. 2013-04-30 [HB] o Created. **************************************************************************/ matrixStats/src/rowRanges.c0000644000176200001440000001225214111740760015502 0ustar liggesusers/*************************************************************************** Public methods: SEXP rowRanges(SEXP x, ...) Authors: Henrik Bengtsson. Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "000.types.h" #include "rowRanges_lowlevel.h" #include "naming.h" SEXP rowRanges(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames) { SEXP ans = NILSXP, ans2 = NILSXP; int *mins, *maxs; double *mins2, *maxs2; int *is_counted, all_counted = 0; int what2, narm, hasna, usenames; R_xlen_t nrow, ncol, ii; /* Coercion moved down to C */ PROTECT(dim = coerceVector(dim, INTSXP)); /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = asR_xlen_t(dim, 0); ncol = asR_xlen_t(dim, 1); /* Argument 'what': */ if (length(what) != 1) error("Argument 'what' must be a single number."); if (!isNumeric(what)) error("Argument 'what' must be a numeric number."); what2 = asInteger(what); if (what2 < 0 || what2 > 2) error("Invalid value of 'what': %d", what2); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'rows' and 'cols': */ R_xlen_t nrows, ncols; R_xlen_t *crows = validateIndices(rows, nrow, 0, &nrows); R_xlen_t *ccols = validateIndices(cols, ncol, 0, &ncols); /* Argument 'useNames': */ usenames = asLogical(useNames); is_counted = (int *) R_alloc(nrows, sizeof(int)); if (isReal(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(REALSXP, nrows, 2)); } else { PROTECT(ans = allocVector(REALSXP, nrows)); } rowRanges_dbl(REAL(x), nrow, ncol, crows, nrows, ccols, ncols, what2, narm, hasna, REAL(ans), is_counted); if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (what2 == 2) { if (nrows != 0) { setDimnames(ans, dimnames, nrows, crows, 0, ccols, FALSE); } /* (else) Zero-length rownames attribute? Keep behavior same as base R function */ } else{ SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(1); } else if (isInteger(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(INTSXP, nrows, 2)); } else { PROTECT(ans = allocVector(INTSXP, nrows)); } rowRanges_int(INTEGER(x), nrow, ncol, crows, nrows, ccols, ncols, what2, narm, hasna, INTEGER(ans), is_counted); /* Any entries with zero non-missing values? */ all_counted = 1; for (ii=0; ii < nrows; ii++) { if (!is_counted[ii]) { all_counted = 0; break; } } if (!all_counted) { /* Handle zero non-missing values */ /* Instead of return INTSXP, we must return REALSXP (to hold -Inf, and Inf) */ if (what2 == 0) { PROTECT(ans2 = allocVector(REALSXP, nrows)); mins = INTEGER(ans); mins2 = REAL(ans2); for (ii=0; ii < nrows; ii++) { if (is_counted[ii]) { mins2[ii] = (double)mins[ii]; } else { mins2[ii] = R_PosInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 1) { PROTECT(ans2 = allocVector(REALSXP, nrows)); maxs = INTEGER(ans); maxs2 = REAL(ans2); for (ii=0; ii < nrows; ii++) { if (is_counted[ii]) { maxs2[ii] = (double)maxs[ii]; } else { maxs2[ii] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 2) { PROTECT(ans2 = allocMatrix(REALSXP, nrows, 2)); mins = INTEGER(ans); maxs = &INTEGER(ans)[nrows]; mins2 = REAL(ans2); maxs2 = &REAL(ans2)[nrows]; for (ii=0; ii < nrows; ii++) { if (is_counted[ii]) { mins2[ii] = (double)mins[ii]; maxs2[ii] = (double)maxs[ii]; } else { mins2[ii] = R_PosInf; maxs2[ii] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } ans = ans2; } if (usenames != NA_LOGICAL && usenames){ SEXP dimnames = getAttrib(x, R_DimNamesSymbol); if (dimnames != R_NilValue) { if (what2 == 2) { if (nrows != 0) { setDimnames(ans, dimnames, nrows, crows, 0, ccols, FALSE); } /* (else) Zero-length rownames attribute? Keep behavior same as base R function */ } else{ SEXP namesVec = VECTOR_ELT(dimnames, 0); if (namesVec != R_NilValue) { setNames(ans, namesVec, nrows, crows); } } } } UNPROTECT(1); /* ans */ } UNPROTECT(1); /* PROTECT(dim = ...) */ return(ans); } // rowRanges() /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/anyMissing_lowlevel.h0000644000176200001440000000024214111740760017566 0ustar liggesusers/* Native API (dynamically generated via macros): int anyMissing_internal(SEXP x, R_xlen_t *idxs, R_xlen_t nidxs) */ #include "anyMissing_lowlevel_template.h" matrixStats/src/000.templates-gen-matrix.h0000644000176200001440000000162613755623603020121 0ustar liggesusers#include "000.macros.h" #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define ROWS_TYPE 'i' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE #define ROWS_TYPE 'r' #include METHOD_TEMPLATE_H #define COLS_TYPE 'i' #include METHOD_TEMPLATE_H #undef COLS_TYPE #define COLS_TYPE 'r' #include METHOD_TEMPLATE_H #undef COLS_TYPE #undef ROWS_TYPE RETURN_TYPE (*METHOD_NAME[3][3])(ARGUMENTS_LIST) = { {METHOD_NAME_arows_acols, METHOD_NAME_arows_icols, METHOD_NAME_arows_dcols}, {METHOD_NAME_irows_acols, METHOD_NAME_irows_icols, METHOD_NAME_irows_dcols}, {METHOD_NAME_drows_acols, METHOD_NAME_drows_icols, METHOD_NAME_drows_dcols}, }; #include "000.templates-types_undef.h" matrixStats/src/rowMeans2_lowlevel.h0000644000176200001440000000126714111740760017332 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void rowMeans2_int(int *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int narm, int hasna, int byrow, double *ans) void rowMeans2_dbl(double *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t Rf_nrows, R_xlen_t *cols, R_xlen_t Rf_ncols, int narm, int hasna, int byrow, double *ans) */ #define X_TYPE 'i' #include "rowMeans2_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "rowMeans2_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/anyMissing.c0000644000176200001440000000224314111740760015653 0ustar liggesusers/*************************************************************************** Public methods: anyMissing(SEXP x, SEXP idxs) TO DO: Support list():s too. Copyright Henrik Bengtsson, 2007 **************************************************************************/ #include #include "000.types.h" #include "000.utils.h" #include "anyMissing_lowlevel.h" SEXP anyMissing(SEXP x, SEXP idxs) { R_xlen_t nx; nx = xlength(x); /* anyMissing() on zero-length objects should always return FALSE, just like any(double(0)). */ if (nx == 0) return(ScalarLogical(FALSE)); /* Argument 'idxs': */ R_xlen_t nidxs; R_xlen_t *cidxs = validateIndices(idxs, nx, 1, &nidxs); if (nidxs == 0) return(ScalarLogical(FALSE)); if (anyMissing_internal(x, cidxs, nidxs)) { return(ScalarLogical(TRUE)); } return(ScalarLogical(FALSE)); } // anyMissing() /*************************************************************************** HISTORY: 2015-06-14 [DJ] o Supported subsetted computation. 2007-08-14 [HB] o Created using do_isna() in src/main/coerce.c as a template. **************************************************************************/ matrixStats/src/binMeans_lowlevel.h0000644000176200001440000000076614063411362017213 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include /* Native API (dynamically generated via macros): void binMeans_L(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) void binMeans_R(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) */ #define BIN_BY 'L' #include "binMeans_lowlevel_template.h" #define BIN_BY 'R' #include "binMeans_lowlevel_template.h" matrixStats/src/000.api.h0000644000176200001440000000565414111740760014617 0ustar liggesusers/* C-level API that is called from R */ SEXP allocArray2(SEXP dim, SEXP value); SEXP allocMatrix2(SEXP nrow, SEXP ncol, SEXP value); SEXP allocVector2(SEXP length, SEXP value); SEXP anyMissing(SEXP x, SEXP idxs); SEXP binCounts(SEXP x, SEXP bx, SEXP right); SEXP binMeans(SEXP y, SEXP x, SEXP bx, SEXP retCount, SEXP right); SEXP colCounts(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP value, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames); SEXP colOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames); SEXP colRanges(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames); SEXP count(SEXP x, SEXP idxs, SEXP value, SEXP what, SEXP naRm, SEXP hasNA); SEXP diff2(SEXP x, SEXP idxs, SEXP lag, SEXP differences); SEXP indexByRow(SEXP dim, SEXP idxs); SEXP logSumExp(SEXP lx, SEXP idxs, SEXP naRm, SEXP hasNA); SEXP mean2(SEXP x, SEXP idxs, SEXP naRm, SEXP refine); SEXP productExpSumLog(SEXP x, SEXP idxs, SEXP naRm, SEXP hasNA); SEXP psortKM(SEXP x, SEXP k, SEXP m); SEXP rowCounts(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP value, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames); SEXP rowCummaxs(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames); SEXP rowCummins(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames); SEXP rowCumprods(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames); SEXP rowCumsums(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP byRow, SEXP useNames); SEXP rowDiffs(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP lag, SEXP differences, SEXP byRow, SEXP useNames); SEXP rowLogSumExps(SEXP lx, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP rowMads(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP constant, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP rowMeans2(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP rowMedians(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP rowOrderStats(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP which, SEXP useNames); SEXP rowRanges(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP what, SEXP naRm, SEXP hasNA, SEXP useNames); SEXP rowRanksWithTies(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP tiesMethod, SEXP byRow, SEXP useNames); SEXP rowSums2(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP rowVars(SEXP x, SEXP dim, SEXP rows, SEXP cols, SEXP naRm, SEXP hasNA, SEXP byRow, SEXP useNames); SEXP signTabulate(SEXP x, SEXP idxs); SEXP sum2(SEXP x, SEXP idxs, SEXP naRm, SEXP mode); SEXP validate(SEXP idxs, SEXP maxIdx, SEXP allowOutOfBound); SEXP weightedMean(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP refine); SEXP weightedMedian(SEXP x, SEXP w, SEXP idxs, SEXP naRm, SEXP interpolate, SEXP ties); SEXP x_OP_y(SEXP x, SEXP y, SEXP dim, SEXP operator, SEXP xrows, SEXP xcols, SEXP yidxs, SEXP commute, SEXP naRm, SEXP hasNA, SEXP byRow); matrixStats/src/weightedMean_lowlevel_template.h0000644000176200001440000000545314111740760021752 0ustar liggesusers/*********************************************************************** TEMPLATE: double weightedMean_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int refine Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include double CONCAT_MACROS(weightedMean, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t *idxs, R_xlen_t nidxs, int narm, int refine) { X_C_TYPE value; double weight; R_xlen_t i; LDOUBLE sum = 0, wtotal = 0; LDOUBLE avg = R_NaN; for (i=0; i < nidxs; i++) { weight = R_INDEX_GET(w, ((idxs == NULL) ? (i) : idxs[i]), NA_REAL); /* Skip or early stopping? */ if (weight == 0) { continue; } value = R_INDEX_GET(x, ((idxs == NULL) ? (i) : idxs[i]), X_NA); #if X_TYPE == 'i' if (X_ISNAN(value)) { /* Skip or early stopping? */ if (narm) { continue; } else { sum = R_NaReal; break; } } else { sum += (LDOUBLE)weight * (LDOUBLE)value; wtotal += weight; } #elif X_TYPE == 'r' if (!narm) { sum += (LDOUBLE)weight * (LDOUBLE)value; wtotal += weight; /* Early stopping? Special for long LDOUBLE vectors */ if (i % 1048576 == 0 && ISNAN(sum)) break; } else if (!X_ISNAN(value)) { sum += (LDOUBLE)weight * (LDOUBLE)value; wtotal += weight; } #endif } /* for (i ...) */ if (wtotal > DOUBLE_XMAX || wtotal < -DOUBLE_XMAX) { avg = R_NaN; } else if (sum > DOUBLE_XMAX) { avg = R_PosInf; } else if (sum < -DOUBLE_XMAX) { avg = R_NegInf; } else { avg = sum / wtotal; #if X_TYPE == 'r' /* Extra precision by summing over residuals? */ if (refine && R_FINITE(avg)) { sum = 0; for (i=0; i < nidxs; i++) { weight = R_INDEX_GET(w, ((idxs == NULL) ? (i) : idxs[i]), NA_REAL); /* Skip? */ if (weight == 0) { continue; } value = R_INDEX_GET(x, ((idxs == NULL) ? (i) : idxs[i]), X_NA); if (!narm) { sum += (LDOUBLE)weight * (value - avg); /* Early stopping? Special for long LDOUBLE vectors */ if (i % 1048576 == 0 && ISNAN(sum)) break; } else if (!ISNAN(value)) { sum += (LDOUBLE)weight * (value - avg); } } avg += (sum / wtotal); } #endif } return (double)avg; } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-12-08 [HB] o Created. **************************************************************************/ matrixStats/src/colRanges_lowlevel_template.h0000644000176200001440000001472114111740760021264 0ustar liggesusers/*********************************************************************** TEMPLATE: void colRanges_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "000.templates-types.h" void CONCAT_MACROS(colRanges, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted) { R_xlen_t ii, jj; R_xlen_t colBegin, idx; X_C_TYPE value, *mins = NULL, *maxs = NULL; /* Rprintf("(nrow,ncol)=(%d,%d), what=%d\n", nrow, ncol, what); */ /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; if (hasna) { for (jj=0; jj < ncols; jj++) is_counted[jj] = 0; /* Missing values */ if (what == 0) { /* colMins() */ mins = ans; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); value = R_INDEX_GET(x, idx, X_NA); if (X_ISNAN(value)) { if (!narm) { mins[jj] = value; is_counted[jj] = 1; /* Early stopping? */ #if X_TYPE == 'i' break; #elif X_TYPE == 'r' if (X_ISNA(value)) break; #endif } } else if (!is_counted[jj]) { mins[jj] = value; is_counted[jj] = 1; } else if (value < mins[jj]) { mins[jj] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (jj=0; jj < ncols; jj++) { if (!is_counted[jj]) { mins[jj] = R_PosInf; } } #endif } else if (what == 1) { /* colMaxs() */ maxs = ans; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); value = R_INDEX_GET(x, idx, X_NA); if (X_ISNAN(value)) { if (!narm) { maxs[jj] = value; is_counted[jj] = 1; /* Early stopping? */ #if X_TYPE == 'i' break; #elif X_TYPE == 'r' if (X_ISNA(value)) break; #endif } } else if (!is_counted[jj]) { maxs[jj] = value; is_counted[jj] = 1; } else if (value > maxs[jj]) { maxs[jj] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (jj=0; jj < ncols; jj++) { if (!is_counted[jj]) { maxs[jj] = R_NegInf; } } #endif } else if (what == 2) { /* colRanges() */ mins = ans; maxs = &ans[ncols]; for (jj=0; jj < ncols; jj++) { colBegin = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrows; ii++) { idx = R_INDEX_OP(colBegin, +, ((rows == NULL) ? (ii) : rows[ii])); value = R_INDEX_GET(x, idx, X_NA); if (X_ISNAN(value)) { if (!narm) { mins[jj] = value; maxs[jj] = value; is_counted[jj] = 1; /* Early stopping? */ #if X_TYPE == 'i' break; #elif X_TYPE == 'r' if (X_ISNA(value)) break; #endif } } else if (!is_counted[jj]) { mins[jj] = value; maxs[jj] = value; is_counted[jj] = 1; } else if (value < mins[jj]) { mins[jj] = value; } else if (value > maxs[jj]) { maxs[jj] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (jj=0; jj < ncols; jj++) { if (!is_counted[jj]) { mins[jj] = R_PosInf; maxs[jj] = R_NegInf; } } #endif } /* if (what ...) */ } else { /* No missing values */ if (what == 0) { /* colMins() */ mins = ans; /* Initiate results */ for (jj=0; jj < ncols; jj++) { mins[jj] = x[jj]; } for (jj=1; jj < ncols; jj++) { colBegin = ((cols == NULL) ? (jj) : cols[jj]) * nrow; for (ii=0; ii < nrows; ii++) { value = x[((rows == NULL) ? (ii) : rows[ii])+colBegin]; if (value < mins[jj]) mins[jj] = value; } } } else if (what == 1) { /* colMax() */ maxs = ans; /* Initiate results */ for (jj=0; jj < ncols; jj++) { maxs[jj] = x[jj]; } for (jj=1; jj < ncols; jj++) { colBegin = ((cols == NULL) ? (jj) : cols[jj]) * nrow; for (ii=0; ii < nrows; ii++) { value = x[((rows == NULL) ? (ii) : rows[ii])+colBegin]; if (value > maxs[jj]) maxs[jj] = value; } } } else if (what == 2) { /* colRanges()*/ mins = ans; maxs = &ans[ncols]; /* Initiate results */ for (jj=0; jj < ncols; jj++) { mins[jj] = x[jj]; maxs[jj] = x[jj]; } for (jj=1; jj < ncols; jj++) { colBegin = ((cols == NULL) ? (jj) : cols[jj]) * nrow; for (ii=0; ii < nrows; ii++) { value = x[((rows == NULL) ? (ii) : rows[ii])+colBegin]; if (value < mins[jj]) { mins[jj] = value; } else if (value > maxs[jj]) { maxs[jj] = value; } } } } /* if (what ...) */ } /* if (narm) */ } /*************************************************************************** HISTORY: 2015-06-07 [DJ] o Supported subsetted computation. 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/signTabulate_lowlevel.h0000644000176200001440000000102514111740760020067 0ustar liggesusers#include #include "000.types.h" #include "000.utils.h" #include "000.macros.h" /* Native API (dynamically generated via macros): void signTabulate_int(int *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, double *ans) void signTabulate_dbl(double *x, R_xlen_t nx, R_xlen_t *idxs, R_xlen_t nidxs, double *ans) */ #define X_TYPE 'i' #include "signTabulate_lowlevel_template.h" #include "000.templates-types_undef.h" #define X_TYPE 'r' #include "signTabulate_lowlevel_template.h" #include "000.templates-types_undef.h" matrixStats/src/rowDiffs_lowlevel_template.h0000644000176200001440000001302614111740760021127 0ustar liggesusers/*********************************************************************** TEMPLATE: void rowDiffs_(ARGUMENTS_LIST) ARGUMENTS_LIST: X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int byrow, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C) */ #include "000.templates-types.h" #include #undef X_DIFF #undef DIFF_X_MATRIX #if X_TYPE == 'i' #ifndef diff_int static R_INLINE int diff_int(int a, int b) { if (X_ISNA(a) || X_ISNA(b)) return(NA_INTEGER); return a-b; } #define diff_int diff_int #endif #define X_DIFF diff_int #define DIFF_X_MATRIX diff_matrix_int #elif X_TYPE == 'r' #define X_DIFF(a,b) a-b #define DIFF_X_MATRIX diff_matrix_double #endif #if (X_TYPE == 'i' && !defined(diff_matrix_int)) || (X_TYPE == 'r' && !defined(diff_matrix_double)) static R_INLINE void DIFF_X_MATRIX(X_C_TYPE *x, R_xlen_t nrow_x, R_xlen_t ncol_x, int byrow, R_xlen_t lag, X_C_TYPE *y, R_xlen_t nrow_y, R_xlen_t ncol_y) { R_xlen_t ii, jj, ss, tt, uu; if (byrow) { uu = lag * nrow_x; tt = 0; ss = 0; for (jj=0; jj < ncol_y; jj++) { for (ii=0; ii < nrow_y; ii++) { y[ss++] = X_DIFF(x[uu++], x[tt++]); } } } else { uu = lag; tt = 0; ss = 0; for (jj=0; jj < ncol_y; jj++) { for (ii=0; ii < nrow_y; ii++) { /* Rprintf("y[%d] = x[%d] - x[%d] = %g - %g = %g\n", ss, uu, tt, (double)x[uu], (double)x[tt], (double)X_DIFF(x[uu], x[tt])); */ y[ss++] = X_DIFF(x[uu++], x[tt++]); } tt += lag; uu += lag; } } } #if X_TYPE == 'i' #define diff_matrix_int diff_matrix_int #elif X_TYPE == 'r' #define diff_matrix_double diff_matrix_double #endif #endif #undef DIFF_X_MATRIX_TYPE #if X_TYPE == 'i' #define DIFF_X_MATRIX_TYPE CONCAT_MACROS(DIFF_X_MATRIX_TYPE, int) #elif X_TYPE == 'r' #define DIFF_X_MATRIX_TYPE CONCAT_MACROS(DIFF_X_MATRIX_TYPE, double) #endif static R_INLINE void DIFF_X_MATRIX_TYPE(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int byrow, R_xlen_t lag, X_C_TYPE *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans) { R_xlen_t ii, jj, ss; R_xlen_t idx, colBegin1, colBegin2; X_C_TYPE xvalue1, xvalue2; ss = 0; if (byrow) { for (jj=0; jj < ncol_ans; jj++) { colBegin1 = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); colBegin2 = R_INDEX_OP(((cols == NULL) ? (jj+lag) : cols[jj+lag]), *, nrow); for (ii=0; ii < nrow_ans; ii++) { idx = R_INDEX_OP(colBegin1, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue1 = R_INDEX_GET(x, idx, X_NA); idx = R_INDEX_OP(colBegin2, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue2 = R_INDEX_GET(x, idx, X_NA); ans[ss++] = X_DIFF(xvalue2, xvalue1); } } } else { for (jj=0; jj < ncol_ans; jj++) { colBegin1 = R_INDEX_OP(((cols == NULL) ? (jj) : cols[jj]), *, nrow); for (ii=0; ii < nrow_ans; ii++) { idx = R_INDEX_OP(colBegin1, +, ((rows == NULL) ? (ii) : rows[ii])); xvalue1 = R_INDEX_GET(x, idx, X_NA); idx = R_INDEX_OP(colBegin1, +, ((rows == NULL) ? (ii+lag) : rows[ii+lag])); xvalue2 = R_INDEX_GET(x, idx, X_NA); ans[ss++] = X_DIFF(xvalue2, xvalue1); } } } } void CONCAT_MACROS(rowDiffs, X_C_SIGNATURE)(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t *rows, R_xlen_t nrows, R_xlen_t *cols, R_xlen_t ncols, int byrow, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans) { R_xlen_t nrow_tmp, ncol_tmp; X_C_TYPE *tmp = NULL; /* Nothing to do? */ if (ncol_ans <= 0 || nrow_ans <= 0) return; /* Special case (difference == 1) */ if (differences == 1) { DIFF_X_MATRIX_TYPE(x, nrow, rows, nrows, cols, ncols, byrow, lag, ans, nrow_ans, ncol_ans); } else { /* Allocate temporary work matrix (to hold intermediate differences) */ if (byrow) { nrow_tmp = nrows; ncol_tmp = ncols - lag; } else { nrow_tmp = nrows - lag; ncol_tmp = ncols; } tmp = Calloc(nrow_tmp*ncol_tmp, X_C_TYPE); /* (a) First order of differences */ DIFF_X_MATRIX_TYPE(x, nrow, rows, nrows, cols, ncols, byrow, lag, tmp, nrow_tmp, ncol_tmp); if (byrow) { ncol_tmp = ncol_tmp - lag; } else { nrow_tmp = nrow_tmp - lag; } /* (a) Intermediate orders of differences */ while (--differences > 1) { DIFF_X_MATRIX(tmp, nrow_tmp, ncol_tmp, byrow, lag, tmp, nrow_tmp, ncol_tmp); if (byrow) { ncol_tmp = ncol_tmp - lag; } else { nrow_tmp = nrow_tmp - lag; } } /* (c) Last order of differences */ DIFF_X_MATRIX(tmp, nrow_tmp, ncol_tmp, byrow, lag, ans, nrow_ans, ncol_ans); /* Deallocate temporary work matrix */ Free(tmp); } /* if (differences ...) */ } /*************************************************************************** HISTORY: 2015-06-13 [DJ] o Supported subsetted computation. 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/validateIndices_lowlevel_template.h0000644000176200001440000000725314111740760022441 0ustar liggesusers/*********************************************************************** TEMPLATE: void validateIndices_(X_C_TYPE *idxs, R_xlen_t nidxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs, int *hasna) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i', 'r' ***********************************************************************/ #include #include "000.types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "000.templates-types.h" #undef R_xlen_t_from_idx_TYPE #if X_TYPE == 'i' #define R_xlen_t_from_idx_TYPE CONCAT_MACROS(R_xlen_t_from_idx, int) #elif X_TYPE == 'r' #define R_xlen_t_from_idx_TYPE CONCAT_MACROS(R_xlen_t_from_idx, dbl) #endif static R_INLINE R_xlen_t R_xlen_t_from_idx_TYPE(X_C_TYPE x, R_xlen_t maxIdx) { if (X_ISNAN(x)) return NA_R_XLEN_T; #if X_TYPE == 'r' if (IS_INF(x)) return NA_R_XLEN_T; #endif if (x > maxIdx) return NA_R_XLEN_T; return x - 1; } /** idxs must not be NULL, which should be checked before calling this function. **/ R_xlen_t* CONCAT_MACROS(validateIndices, X_C_SIGNATURE)(X_C_TYPE *idxs, R_xlen_t nidxs, R_xlen_t maxIdx, int allowOutOfBound, R_xlen_t *ansNidxs, int *hasna) { // set default as no NA. *hasna = FALSE; R_xlen_t ii, jj; int state = 0; R_xlen_t count = 0; Rboolean needReAlloc = FALSE; // figure out whether idxs are all positive or all negative. for (ii = 0; ii < nidxs; ++ ii) { X_C_TYPE idx = idxs[ii]; if (idx > 0 || X_ISNAN(idx) #if X_TYPE == 'r' || IS_INF(idx) #endif ) { if (state < 0) error("only 0's may be mixed with negative subscripts"); #if X_TYPE == 'r' if (IS_INF(idx)) { needReAlloc = TRUE; // need to realloc indices array to set inf to NA } else #endif if (!X_ISNAN(idx)) { if (idx > maxIdx) { if (!allowOutOfBound) error("subscript out of bounds"); *hasna = TRUE; // out-of-bound index is NA needReAlloc = TRUE; } } else { *hasna = TRUE; } state = 1; ++ count; } else if (idx < 0) { if (state > 0) error("only 0's may be mixed with negative subscripts"); state = -1; needReAlloc = TRUE; } else { // idx == 0, need to realloc indices array needReAlloc = TRUE; } } if (state >= 0) *ansNidxs = count; if (!needReAlloc) { // must have: state >= 0 // return idxs; RETURN_VALIDATED_ANS(R_xlen_t, nidxs, idxs[ii], R_xlen_t_from_idx_TYPE(idxs[ii],maxIdx),); } // fill positive idxs into ans if (state >= 0) { // NOTE: braces is needed here, because of macro-defined function RETURN_VALIDATED_ANS(R_xlen_t, nidxs, idxs[ii], R_xlen_t_from_idx_TYPE(idxs[ii],maxIdx),); } // state < 0 // use filter as bitset to find out all required idxs Rboolean *filter = Calloc(maxIdx, Rboolean); count = maxIdx; memset(filter, 0, maxIdx*sizeof(Rboolean)); // set to FALSE for (ii = 0; ii < nidxs; ++ ii) { R_xlen_t idx = -idxs[ii]; if (idx > 0 && idx <= maxIdx) { if (filter[idx-1] == 0) { -- count; filter[idx-1] = TRUE; } } } *ansNidxs = count; if (count == 0) { Free(filter); return NULL; } // find the biggest number 'upperBound' R_xlen_t upperBound; for (upperBound = maxIdx-1; upperBound >= 0; -- upperBound) { if (!filter[upperBound]) break; } ++ upperBound; // fill required idxs into ans // NOTE: braces is needed here, because of macro-defined function RETURN_VALIDATED_ANS(R_xlen_t, upperBound, !filter[ii], ii, Free(filter);); } #include "000.templates-types_undef.h" matrixStats/vignettes/0000755000176200001440000000000014120430675014607 5ustar liggesusersmatrixStats/vignettes/matrixStats-methods.md.rsp0000644000176200001440000002132413615621102021715 0ustar liggesusers<%@meta language="R-vignette" content="-------------------------------- DIRECTIVES FOR R: %\VignetteIndexEntry{matrixStats: Summary of functions} %\VignetteAuthor{Henrik Bengtsson} %\VignetteKeyword{matrix} %\VignetteKeyword{vector} %\VignetteKeyword{apply} %\VignetteKeyword{rows} %\VignetteKeyword{columns} %\VignetteKeyword{memory} %\VignetteKeyword{speed} %\VignetteEngine{R.rsp::rsp} %\VignetteTangle{FALSE} --------------------------------------------------------------------"%> <% pkgName <- "matrixStats" library(pkgName, character.only=TRUE) ns <- getNamespace(pkgName) env <- as.environment(sprintf("package:%s", pkgName)) R.utils::use("R.utils") kable <- function(df, ...) { fcns <- as.character(df$Functions) fcns <- strsplit(fcns, split=",") fcns <- sapply(fcns, FUN=function(names) { names <- trim(names) ok <- sapply(names, FUN=exists, envir=ns, mode="function") names[ok] <- sprintf("%s()", names[ok]) names[!ok] <- sprintf("~~%s()~~", names[!ok]) names <- paste(names, collapse=", ") }) df$Functions <- fcns df$Example <- sprintf("`%s`", df$Example) print(knitr::kable(df, ..., format="markdown")) } # Find all functions all <- ls(envir=env) keep <- sapply(all, FUN=function(name) { is.function(get(name, envir=env)) }) all <- all[keep] keep <- !grepl("[.]([^.]*)$", all) all <- all[keep] # Hidden functions skip <- c("rowAvgsPerColSet", "colAvgsPerRowSet") skip <- c(skip, "allocArray", "allocMatrix", "allocVector") all <- setdiff(all, skip) # Column and row functions crfcns <- grep("^(col|row)", all, value=TRUE) # Vector functions vfcns <- setdiff(all, crfcns) %> # <%@meta name="title"%> <% pkg <- R.oo::Package(pkgName) %> <%@meta name="author"%> on <%=format(as.Date(pkg$date), format="%B %d, %Y")%> <% fcns <- crfcns base <- gsub("^(col|row)", "", fcns) groups <- tapply(fcns, base, FUN=list) stopifnot(all(sapply(groups, FUN=length) == 2L)) groups <- matrix(unlist(groups, use.names=FALSE), nrow=2L) %> <%--- ## Functions that apply to column and rows of matrices ``` <% print(fcns) %> ``` ---%> <% fcns <- vfcns %> <%--- ## Functions that apply to vectors ``` <% print(fcns) %> ``` ---%> ## Location and scale estimators <% tbl <- NULL row <- data.frame( "Estimator" = "Weighted sample mean", "Functions" = "weightedMean, colWeightedMeans, rowWeightedMeans", "Example" = "weightedMean(x, w); rowWeightedMeans(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Mean", "Functions" = "mean2, colMeans2, rowMeans2", "Example" = "mean2(x); rowMeans2(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median", "Functions" = "median, colMedians, rowMedians", "Example" = "median(x); rowMedians(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median", "Functions" = "weightedMedian, colWeightedMedians, rowWeightedMedians", "Example" = "weightedMedian(x, w); rowWeightedMedians(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance", "Functions" = "var, colVars, rowVars", "Example" = "var(x); rowVars(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample variance", "Functions" = "weightedVar, colWeightedVars, rowWeightedVars", "Example" = "weightedVar(x, w), rowWeightedVars(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance by n-order differences", "Functions" = "varDiff, colVarDiffs, rowVarDiffs", "Example" = "varDiff(x); rowVarDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation", "Functions" = "sd, colSds, rowSds", "Example" = "sd(x); rowSds(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample deviation", "Functions" = "weightedSd, colWeightedSds, rowWeightedSds", "Example" = "weightedSd(x, w), rowWeightedSds(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation by n-order differences", "Functions" = "sdDiff, colSdDiffs, rowSdDiffs", "Example" = "sdDiff(x); rowSdDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD)", "Functions" = "mad, colMads, rowMads", "Example" = "mad(x); rowMads(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median absolute deviation (MAD)", "Functions" = "weightedMad, colWeightedMads, rowWeightedMads", "Example" = "weightedMad(x, w), rowWeightedMads(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD) by n-order differences", "Functions" = "madDiff, colMadDiffs, rowMadDiffs", "Example" = "madDiff(x); rowMadDiffs()" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Quantile", "Functions" = "quantile, colQuantiles, rowQuantiles", "Example" = "quantile(x, probs); rowQuantiles(x, probs)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR)", "Functions" = "iqr, colIQRs, rowIQRs", "Example" = "iqr(x); rowIQRs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR) by n-order differences", "Functions" = "iqrDiff, colIQRDiffs, rowIQRDiffs", "Example" = "iqrDiff(x); rowIQRDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Range", "Functions" = "range, colRanges, rowRanges", "Example" = "range(x); rowRanges(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Minimum", "Functions" = "min, colMins, rowMins", "Example" = "min(x); rowMins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Maximum", "Functions" = "max, colMaxs, rowMaxs", "Example" = "max(x); rowMaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Testing for and counting values <% tbl <- NULL row <- data.frame( "Operator" = "Are there any missing values?", "Functions" = "anyMissing, colAnyMissings, rowAnyMissings", "Example" = "anyMissing(x); rowAnyMissings(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does TRUE exists?", "Functions" = "any, colAnys, rowAnys", "Example" = "any(x); rowAnys(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Are all values TRUE?", "Functions" = "all, colAlls, rowAlls", "Example" = "all(x); rowAlls(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does value exists?", "Functions" = "anyValue, colAnys, rowAnys", "Example" = "anyValue(x, value); rowAnys(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Do all elements have a given value?", "Functions" = "allValue, colAlls, rowAlls", "Example" = "allValue(x, value); rowAlls(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Number of occurrences of a value?", "Functions" = "count, colCounts, rowCounts", "Example" = "count(x, value); rowCounts(x, value)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Cumulative functions <% tbl <- NULL row <- data.frame( "Operator" = "Cumulative sum", "Functions" = "cumsum, colCumsums, rowCumsums", "Example" = "cumsum(x); rowCumsums(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative product", "Functions" = "cumprod, colCumprods, rowCumprods", "Example" = "cumprod(x); rowCumprods(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative minimum", "Functions" = "cummin, colCummins, rowCummins", "Example" = "cummin(x); rowCummins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative maximum", "Functions" = "cummax, colCummaxs, rowCummaxs", "Example" = "cummax(x); rowCummaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Binning <% tbl <- NULL row <- data.frame( "Estimator" = "Counts in disjoint bins", "Functions" = "binCounts", "Example" = "binCounts(x, bx)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample means (and counts) in disjoint bins", "Functions" = "binMeans", "Example" = "binMeans(y, x, bx)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Miscellaneous <% tbl <- NULL row <- data.frame( "Operation" = "Sum", "Functions" = "sum2, colSums2, rowSums2", "Example" = "sum2(x); rowSums2(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operation" = "Lagged differences", "Functions" = c("diff2, colDiffs, rowDiffs"), "Example" = "diff2(x), rowDiffs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ------------------------------------------------------------- <%=pkgName%> v<%=getVersion(pkg)%>. Release: [CRAN](https://cran.r-project.org/package=<%=pkgName%>), Development: [GitHub](<%=getUrl(pkg)%>). matrixStats/NEWS0000644000176200001440000014126514120430646013305 0ustar liggesusersPackage: matrixStats ==================== Version: 0.61.0 [2021-09-12] NEW FEATURES: * When argument 'which' for colOrderStats() and rowOrderStats() is out of range, the error message now reports on the value of 'which'. Similarly, when argument 'probs' for colQuantiles() and rowQuantiles() is out of range, the error message reports on its value too. DEPRECATED AND DEFUNCT: * validateIndices() is deprecated and will eventually be removed from the package API. MISCELLANEOUS: * The package test for benchmark reports failed because the 'markdown' package was not declared as a suggested package. Version: 0.60.1 [2021-08-22] PERFORMANCE: * Handling of the 'useNames' argument is now done in the native code. * Passing 'idxs', 'rows', and 'cols' arguments of type integer is now less efficient than it used to, because the new code re-design (see below) requires an internal allocation of an equally long 'R_xlen_t' vector that is populated by indices coerced from 'R_len_t' to 'R_xlen_t' integers. CODE DESIGN: * No longer using native-code implementations that are specific to the type of index that is passed for subsetting of vectors, rows, and columns. This was done to avoid the complex use of macros that was cumbersome to maintain and added an extra threshold for new contributors to overcome. Another advantage is that faster compilation time when built from source and a smaller size of compiled library. In previous version 'R CMD check' would produce a NOTE on the package installation size being large, which no longer is the case. The downside is that extra overhead when passing integer indices (see above comment). BUG FIXES: * Contrary to other functions which gained new argument useNames=NA in the previous release, colQuantiles() and rowQuantiles() got useNames=TRUE. Version: 0.60.0 [2021-07-26] NEW FEATURES: * Add row and column names support to all row and column functions. To return row and column names, set argument useNames=TRUE. To drop them, set useNames=FALSE. To preserve the current, inconsistent behavior, set useNames=NA, which, for backward compatibility reasons, remains the default for now. Version: 0.59.0 [2021-05-31] MISCELLANEOUS: * Harmonized error messages. BUG FIXES: * Some of the examples and package tests would allocated matrices with dimensions that did not match the number of elements in the input data. DEPRECATED AND DEFUNCT: * Dropped meanOver() and sumOver(), and argument 'method' from weightedVar(), that have been defunct since January 2018. Version: 0.58.0 [2021-01-26] SIGNIFICANT CHANGES: * colVars() and rowVars() with argument 'center' now calculates the sample variance using the n/(n-1)*avg((x-center)^2) formula rather than the n/(n-1)*(avg(x^2)-center^2) formula that was used in the past. Both give the same result when 'center' is the correct sample mean estimate. The main reason for this change is that, if an incorrect 'center' is provided, in contrast to the old approach, the new approach is guaranteed to give at least non-negative results, despite being incorrect. BACKWARD COMPATIBILITY: Out of all 314 reverse dependencies on CRAN and Bioconductor, only four called these functions with argument 'center'. All of them pass their package checks also after this update. To further protect against a negative impact in existing user scripts, colVars() and rowVars() will calculate both versions and assert that the result is the same. If not, an informative error is produced. To limit the performance impact, this validation is run only once every 50:th call, a frequency that can be controlled by R option 'matrixStats.vars.formula.freq'. Setting it to 0 or NULL will disable the validation. The default can also be controlled by environment variable 'R_MATRIXSTATS_VARS_FORMULA_FREQ'. This validation framework will be removed in a future version of the package after it has been established that this change has no negative impact. NEW FEATURES: * Now colWeightedMads() and rowWeightedMads() accept 'center' of the same length as the number of columns and rows, respectively. * colAvgsPerRowSet() and rowAvgsPerRowSet() gained argument 'na.rm'. * Now weightedMean() and weightedMedian() and the corresponding row- and column-based functions accept logical 'x', where FALSE is treated as integer 0 and TRUE as 1. * Now x_OP_y() and t_tx_OP_y() accept logical 'x' and 'y', where FALSE is treated as integer 0 and TRUE as 1. BUG FIXES: * colQuantiles() and rowQuantiles() on a logical matrix should return a numeric vector for 'type = 7'. However, when there were only missing values (= NA) in the matrix, then it would return a "logical" vector instead. * colAvgsPerRowSet() on a single-column matrix would produce an error on non-matching dimensions. Analogously, for rowAvgsPerRowSet() and single- row matrices. * colVars(x) and rowVars(x) with 'x' being an array would give the wrong value if both argument 'dim.' and 'center' would be specified. * The documentation was unclear on what the 'center' argument should be. They would not detect when an incorrect specification was used, notably when the length of 'center' did not match the matrix dimensions. Now these functions give an informative error message when 'center' is of the incorrect length. DEPRECATED AND DEFUNCT: * Using a scalar value for argument 'center' of colSds(), rowSds(), colVars(), rowVars(), colMads(), rowMads(), colWeightedMads(), and rowWeightedMads() is now deprecated. Version: 0.57.0 [2020-09-25] NEW FEATURES: * colCumprods() and rowCumprods() now support also logical input. Thanks to Constantin Ahlmann-Eltze at EMBL Heidelberg for the patch. BUG FIXES: * colCollapse() and rowCollapse() did not expand 'idxs' argument before subsetting by 'cols' and 'rows', respectively. Thanks to Constantin Ahlmann-Eltze for reporting on this. * colAnys(), rowAnys(), anyValue(), colAlls(), rowAlls(), and allValue() with value=FALSE and *numeric* input would incorrectly consider all values different from one as FALSE. Now it is only values that are zero that are considered FALSE. Thanks to Constantin Ahlmann-Eltze for the bug fix. Version: 0.56.0 [2020-03-12] SIGNIFICANT CHANGES: * colQuantiles() and rowQuantiles() now supports only integer, numeric and logical input. Previously, it was also possible to pass, for instance, 'character' input, but that was a mistake. The restriction on input allows for further optimization of these functions. * The returned type of colQuantiles() and rowQuantiles() is now the same as for stats::quantile(), which depends on argument 'type'. PERFORMANCE: * colQuantiles() and rowQuantiles() with the default type=7L and when there are no missing values are now significantly faster and use significantly fewer memory allocations. BUG FIXES: * colDiffs() and rowDiffs() gave an error if argument 'dim.' was of type numeric rather than type integer. * varDiff(), sdDiff(), madDiff(), iqrDiff(), and the corresponding row- and column functions silently treated a 'diff' less than zero as diff = 0. Now an error is produced. * Error messages on argument 'dim.' referred to non-existing argument 'dim'. * Error messages on negative values in argument 'dim.' reported a garbage value instead of the negative value. * The Markdown reports produced by the internal benchmark report generator did not add a line between tables and the following text (a figure caption) causing the following text to be included in a cell on an extra row in the table (at least when rendered on GitHub Wiki pages). Version: 0.55.0 [2019-09-05] SIGNIFICANT CHANGES: * weightedVar(), weightedSd(), weightedMad(), and their row- and column- specific counter parts now return a missing value if there are missing values in any of the weights 'w' after possibly dropping (x, w) elements with missing values in 'x' (na.rm = TRUE). Previously, na.rm=TRUE would also drop (x, w) elements where 'w' was missing. With this change, we now have that for all functions in this package, na.rm=TRUE never applies to weights - only 'x' values. NEW FEATURES: * colRanks() and rowRanks() now supports the same set of 'ties.method' as base::rank() plus "dense" as defined by data.table::frank(). For backward compatible reasons, the default 'ties.method' remains the same as in previous versions. Thank to Brian Montgomery for contributing this. * colCumsums() and rowCumsums() now support also logical input. BUG FIXES: * weightedVar(), weightedSd(), weightedMad(), and their row- and column- specific counter parts would produce an error instead of returning a missing value when one of the weights is a missing value. DEPRECATED AND DEFUNCT: * Calling indexByRow(x) where 'x' is a matrix is now defunct. Use indexByRow(dim(x)) instead. Version: 0.54.0 [2018-07-23] PERFORMANCE: * SPEEDUP: No longer using stopifnot() for internal validation, because it comes with a great overhead. This was only used in weightedMad(), col-, and rowWeightedMads(), as well as col- and rowAvgsPerColSet(). BUG FIXES: * Despite being an unlikely use case, colLogSumExps(lx) / rowLogSumExps(lx) now also accepts integer 'lx' values. * The error produced when using indexByRow(dim) with prod(dim) >= 2^31 would report garbage dimensions instead of 'dim'. DEPRECATED AND DEFUNCT: * Calling indexByRow(x) where 'x' is a matrix is deprecated. Use indexByRow(dim(x)) instead. Version: 0.53.1 [2018-02-10] CODE REFACTORING: * Now col-/rowSds() explicitly replicate all arguments that are passed to col-/rowVars(). DOCUMENTATION: * Added details on how weightedMedian(x, interpolate = TRUE) works. BUG FIXES: * colLogSumExps(lx, cols) / rowLogSumExps(lx, rows) gave an error if 'lx' has rownames / colnames. * col-/rowQuantiles() would lose rownames of output in certain cases. Version: 0.53.0 [2018-01-23] NEW FEATURES: * Functions sum2(x) and means2(x) now accept also logical input 'x', which corresponds to using as.integer(x) but without the need for neither coercion nor internal extra copies. With sum2(x, mode = "double") it is possible to count number of TRUE elements beyond 2^31-1, which base::sum() does not support. * Functions col-/rowSums2() and col-/rowMeans2() now accept also logical input 'x'. * Function binMeans(y, x, bx) now accepts logical 'y', which corresponds to to using as.integer(y) but without the need for coercion to integer. * Functions col-/rowTabulates(x) now support logical input 'x'. * Now count() can count beyond 2^31-1. * allocVector() can now allocate long vectors (longer than 2^31-1). * Now sum2(x, mode = "integer") generates a warning if typeof(x) == "double" asking if as.integer(sum2(x)) was intended. * Inspired by Hmisc::wtd.var(), when sum(w) <= 1, now weightedVar(x, w) produces an informative warning that the estimate is invalid. CODE REFACTORING: * Harmonized the ordering of the arguments of colAvgsPerColSet() with that of rowAvgsPerColSet(). BUG FIXES: * col-/rowLogSumExp() could core dump R for "large" number of columns/rows. Thanks Brandon Stewart at Princeton University for reporting on this. * count() beyond 2^31-1 would return invalid results. * Functions col-/rowTabulates(x) did not count missing values. * indexByRow(dim, idxs) would give nonsense results if 'idxs' had indices greater than prod(dim) or non-positive indices; now it gives an error. * indexByRow(dim) would give nonsense results when prod(dim) >= 2^31; now it gives an informative error. * col-/rowAvgsPerColSet() would return vector rather than matrix if nrow(X) <= 1. Thanks to Peter Hickey (Johns Hopkins University) for troubleshooting and providing a fix. DEPRECATED AND DEFUNCT: * Previously deprecated meanOver() and sumOver() are defunct. Use mean2() and sum2() instead. * Previously deprecated weightedVar(x, w, method = "0.14.2") is defunct. * Dropped previously defunct weightedMedian(..., ties = "both"). * Dropped previously defunct argument 'centers' for col-/rowMads(). Use 'center' instead. * Dropped previously defunct argument 'flavor' of colRanks() and rowRanks(). Version: 0.52.2 [2017-04-13] BUG FIXES: * Several of the row- and column-based functions would core dump R if the matrix was of a data type other than logical, integer, or numeric, e.g. character or complex. This is now detected and an informative error is produced instead. Similarly, some vector-based functions could potentially core dump R or silently return a nonsense result. Thank you Hervé Pagès, Bioconductor Core, for the report. DEPRECATED AND DEFUNCT: * rowVars(..., method = "0.14.2") that was added for very unlikely needs of backward compatibility of an invalid degree-of-freedom term is deprecated. Version: 0.52.1 [2017-04-04] BUG FIXES: * The package test on matrixStats:::benchmark() tried to run even if not all suggested packages were available. Version: 0.52.0 [2017-04-03] SIGNIFICANT CHANGES: * Since anyNA() is a built-in function since R (>= 3.1.0), please use that instead of anyMissing() part of this package. The latter will eventually be deprecated. For consistency with the anyNA() name, colAnyNAs() and rowAnyNAs() are now also available replacing the identically colAnyMissings() and rowAnyMissings() functions, which will also be deprecated in a future release. * meanOver() was renamed to mean2() and sumOver() was renamed to sum2(). NEW FEATURES: * Added colSums2() and rowSums2() which work like colSums() and rowSums() of the base package but also supports efficient subsetting via optional arguments 'rows' and 'cols'. * Added colMeans2() and rowMeans2() which work like colMeans() and rowMeans() of the base package but also supports efficient subsetting via optional arguments 'rows' and 'cols'. * Functions colDiffs() and rowDiffs() gained argument 'dim.'. * Functions colWeightedMads() and rowWeightedMads() gained arguments 'constant' and 'center'. The current implementation only support scalars for these arguments, which means that the same values are applied to all columns and rows, respectively. In previous version a hard-to-understand error would be produced if 'center' was of length greater than one; now an more informative error message is given. * Package is now silent when loaded; it no longer displays a startup message. SOFTWARE QUALITY: * Continuous-integration testing is now also done on macOS, in addition to Linux and Windows. * ROBUSTNESS: Package now registers the native API using also R_useDynamicSymbols(). CODE REFACTORING: * Cleaned up native low-level API and renamed native source code files to make it easier to navigate the native API. * Now using roxygen for help and NAMESPACE (was R.oo::Rdoc). BUG FIXES: * rowAnys(x) on numeric matrices 'x' would return rowAnys(x == 1) and not rowAnys(x != 0). Same for colAnys(), rowAlls(), and colAlls(). Thanks Richard Cotton for reporting on this. * sumOver(x) and meanOver(x) would incorrectly return -Inf or +Inf if the intermediate sum would have that value, even if one of the following elements would turn the intermediate sum into NaN or NA, e.g. with 'x' as c(-Inf, NaN), c(-Inf, +Inf), or c(+Inf, NA). * WORKAROUND: Benchmark reports generated by matrixStats:::benchmark() would use any custom R prompt that is currently set in the R session, which may not render very well. Now it forces the prompt to be the built-in "> " one. DEPRECATED AND DEFUNCT: * The package API is only intended for matrices and vectors of type numeric, integer and logical. However, a few functions would still return if called with a data.frame. This was never intended to work and is now an error. Specifically, functions colAlls(), colAnys(), colProds(), colQuantiles(), colIQRs(), colWeightedMeans(), colWeightedMedians(), and colCollapse() now produce warnings if called with a data.frame. Same for the corresponding row- functions. The use of a data.frame will be produce an error in future releases. * meanOver() and sumOver() are deprecated because they were renamed to mean2() and sum2(), respectively. * Previously deprecated (and ignored) argument 'flavor' of colRanks() and rowRanks() is now defunct. * Previously deprecated support for passing non-vector, non-matrix objects to rowAlls(), rowAnys(), rowCollapse(), and the corresponding column-based versions are now defunct. Likewise, rowProds(), rowQuantiles(), rowWeightedMeans(), rowWeightedMedians(), and the corresponding column-based versions are also defunct. The rationale for this is to tighten up the identity of the matrixStats package and what types of input it accepts. This will also help optimize the code further. Version: 0.51.0 [2016-10-08] PERFORMANCE AND MEMORY: * SPEEDUP / CLEANUP: rowMedians() and colMedians() are now plain functions. They were previously S4 methods (due to a Bioconductor legacy). The package no longer imports the methods package. * SPEEDUP: Now native API is formally registered allowing for faster lookup of routines from R. Version: 0.50.2 [2016-04-24] BUG FIXES: * Package now installs on R (>= 2.12.0) as claimed. Thanks to Mikko Korpela at Aalto University School of Science, Finland, for troubleshooting and providing a fix. * logSumExp(c(-Inf, -Inf, ...)) would return NaN rather than -Inf. Thanks to Jason Xu (University of Washington) for reporting and Brennan Vincent for troubleshooting and contributing a fix. Version: 0.50.1 [2015-12-14] BUG FIXES: * The Undefined Behavior Sanitizer (UBsan) reported on a memcall(src, dest, 0) call when dest == null. Thanks to Brian Ripley and the CRAN check tools for catching this. We could reproduce this with gcc 5.1.1 but not with gcc 4.9.2. Version: 0.50.0 [2015-12-13] NEW FEATURES: * MAJOR FEATURE UPDATE: Subsetting arguments 'idxs', 'rows' and 'cols' were added to all functions such that the calculations are performed on the requested subset while avoiding creating a subsetted copy, i.e. rowVars(x, cols = 4:6) is a much faster and more memory efficient version than rowVars(x[, 4:6]) and even yet more efficient than apply(x, MARGIN = 1L, FUN = var). These features were added by Dongcan Jiang, Peking University, with support from the Google Summer of Code program. A great thank you to Dongcan and to Google for making this possible. Version: 0.15.0 [2015-10-26] NEW FEATURES: * CONSISTENCY: Now all weight arguments ('w' and 'W') default to NULL, which corresponds to uniform weights. CODE REFACTORING: * ROBUSTNESS: Importing 'stats' functions in namespace. BUG FIXES: * weightedVar(x, w) used the wrong bias correction factor resulting in an estimate that was tau too large, where tau = ((sum(w) - 1) / sum(w)) / ((length(w) - 1) / length(w)). Thanks to Wolfgang Abele for reporting and troubleshooting on this. * weightedVar(x) with length(x) = 1 returned 0 no NA. Same for weightedSd(). * weightedMedian(x, w = NA_real_) returned 'x' rather than NA_real_. This only happened for length(w) = 1. * allocArray(dim) failed for prod(dim) >= .Machine$integer.max. DEPRECATED AND DEFUNCT: * CLEANUP: Defunct argument 'centers' for col-/rowMads(); use 'center'. * weightedVar(x, w, method = "0.14.2") is deprecated. Version: 0.14.2 [2015-06-23] BUG FIXES: * x_OP_y() and t_tx_OP_y() would return garbage on Solaris SPARC (and possibly other architectures as well) when input was integer and had missing values. Version: 0.14.1 [2015-06-17] BUG FIXES: * product(x, na.rm = FALSE) for integer 'x' with both zeros and NAs returned zero rather than NA. * weightedMean(x, w, na.rm = TRUE) did not handle missing values in 'x' properly, if it was an integer. It would also return NaN if there were weights 'w' with missing values, whereas stats::weighted.mean() would skip such data points. Now weightedMean() does the same. * (col|row)WeightedMedians() did not handle infinite weights as weightedMedian() does. * x_OP_y(x, y, OP, na.rm = FALSE) returned garbage iff 'x' or 'y' had missing values of type integer. * rowQuantiles() and rowIQRs() did not work for single-row matrices. Analogously for the corresponding column functions. * rowCumsums(), rowCumprods() rowCummins(), and rowCummaxs(), accessed out-of-bound elements for Nx0 matrices where N > 0. The corresponding column methods has similar memory errors for 0xK matrices where K > 0. * anyMissing(list(NULL)) returned NULL; now FALSE. * rowCounts() resulted in garbage if a previous column had NAs (because it forgot to update index kk in such cases). * rowCumprods(x) handled missing values and zeros incorrectly for integer 'x (not double); a zero would trump an existing missing value causing the following cumulative products to become zero. It was only a zero that trumped NAs; any other integer would work as expected. Note, this bug was not in colCumprods(). * rowAnys(x, value, na.rm = FALSE) did not handle missing values in a numeric 'x' properly. Similarly, for non-numeric and non-logical 'x', row- and colAnys(), row- and colAlls(), anyValue() and allValue() did not handle when 'value' was a missing value. * All of the above bugs were identified and fixed by Dongcan Jiang (Peking University, China), who also added corresponding unit tests. Version: 0.14.0 [2015-02-13] SIGNIFICANT CHANGES: * CLEANUP: anyMissing() is no longer an S4 generic. This was done as part of the migration of making all functions of matrixStats plain R functions, which minimizes calling overhead and it will also allow us to drop 'methods' from the package dependencies. I've scanned all CRAN and Bioconductor packages depending on matrixStats and none of them relied on anyMissing() dispatching on class, so hopefully this move has little impact. The only remaining S4 methods are now colMedians() and rowMedians(). NEW FEATURES: * CONSISTENCY: Renamed argument 'centers' of col-/rowMads() to 'center'. This is consistent with col-/rowVars(). * CONSISTENCY: col-/rowVars() now use na.rm = FALSE as the default (na.rm = TRUE was mistakenly introduced as the default in v0.9.7). PERFORMANCE AND MEMORY: * SPEEDUP: The check for user interrupts at the C level is now done less frequently of the functions. It does every k:th iteration, where k = 2^20, which is tested for using (iter % k == 0). It turns out, at least with the default compiler optimization settings that I use, that this test is 3 times faster if k = 2^n where n is an integer. The following functions checks for user interrupts: logSumExp(), (col|row)LogSumExps(), (col|row)Medians(),, (col|row)Mads(), (col|row)Vars(), and (col|row)Cum(Min|Max|prod|sum)s(). * SPEEDUP: logSumExp(x) is now faster if 'x' does not contain any missing values. It is also faster if all values are missing or the maximum value is +Inf - in both cases it can skip the actual summation step. SOFTWARE QUALITY: * ROBUSTNESS/TESTS: Package tests cover 96% of the code (was 91%). CODE REFACTORING: * CLEANUP: Package no longer depends on R.methodsS3. BUG FIXES: * all() and any() flavored methods on non-numeric and non-logical (e.g. character) vectors and matrices with na.rm = FALSE did not give results consistent with all() and any() if there were missing values. For example, with x <- c("a", NA, "b") we have all(x == "a") == FALSE and any(x == "a") == TRUE whereas our corresponding methods would return NA in those cases. The methods fixed are allValue(), anyValue(), col-/rowAlls(), and col-/rowAnys(). Added more package tests to cover these cases. * logSumExp(x, na.rm = TRUE) would return NA if all values were NA and length(x) > 1. Now it returns -Inf for all length(x):s. Version: 0.13.1 [2015-01-21] BUG FIXES: * diff2() with differences >= 3 would *read* spurious values beyond the allocated memory. This error, introduced in 0.13.0, was harmless in the sense that the returned value was unaffected and still correct. Thanks to Brian Ripley and the CRAN check tools for catching this. I could reproduce it locally with 'valgrind'. Version: 0.13.0 [2015-01-20] SIGNIFICANT CHANGES: * SPEEDUP/CLEANUP: Turned several S3 and S4 methods into plain R functions, which decreases the overhead of calling the functions. After this there are no longer any S3 methods. Remaining S4 methods are anyMissing() and rowMedians(). NEW FEATURES: * Added weightedMean(), which is ~10 times faster than stats::weighted.mean(). * Added count(x, value) which is a notably faster than sum(x == value). This can also be used to count missing values etc. * Added allValue() and anyValue() for all(x == value) and any(x == value). * Added diff2(), which is notably faster than base::diff() for vectors, which it is designed for. * Added iqrDiff() and (col|row)IqrDiffs(). * CONSISTENCY: Now rowQuantiles(x, na.rm = TRUE) returns all NAs for rows with missing values. Analogously for colQuantiles(), colIQRs(), rowIQRs() and iqr(). Previously, all these functions gave an error saying missing values are not allowed. * COMPLETENESS: Added corresponding "missing" vector functions for already existing column and row functions. Similarly, added "missing" column and row functions for already existing vector functions, e.g. added iqr() and count() to complement already existing (col|row)IQRs() and (col|row)Counts() functions. * ROBUSTNESS: Now column and row methods give slightly more informative error messages if a data.frame is passed instead of a matrix. DOCUMENTATION: * Added vignette summarizing available functions. PERFORMANCE AND MEMORY: * SPEEDUP: (col|row)Diffs() are now implemented in native code and notably faster than diff() for matrices. * SPEEDUP: Made binCounts() and binMeans() a bit faster. * SPEEDUP: Implemented weightedMedian() in native code, which made it ~3-10 times faster. Dropped support for ties = "both", because it would have to return two values in case of ties, which made the API unnecessarily complicated. If really needed, then call the function twice with ties = "min" and ties = "max". * SPEEDUP: (col|row)Anys() and (col|row)Alls() is now notably faster compared to previous versions. CODE REFACTORING: * CLEANUP: In the effort of migrating anyMissing() into a plain R function, the specific anyMissing() implementations for data.frame:s and and list:s were dropped and is now handled by anyMissing() for "ANY", which is the only S4 method remaining now. In a near future release, this remaining "ANY" method will turned into a plain R function and the current S4 generic will be dropped. We know of know CRAN and Bioconductor packages that relies on it being a generic function. Note also that since R (>= 3.1.0) there is a base::anyNA() function that does the exact same thing making anyMissing() obsolete. BUG FIXES: * weightedMedian(..., ties = "both") would give an error if there was a tie. Added package test for this case. DEPRECATED AND DEFUNCT: * weightedMedian(..., ties = "both") is now defunct. Version: 0.12.2 [2014-12-07] BUG FIXES: * CODE FIX: The native code for product() on integer vector incorrectly used C-level abs() on intermediate values despite those being doubles requiring fabs(). Despite this, the calculated product would still be correct (at least when validated on several local setups as well as on the CRAN servers). Again, thanks to Brian Ripley for pointing out another invalid integer-double coercion at the C level. DEPRECATED AND DEFUNCT: * weightedMedian(..., interpolate = FALSE, ties = "both") is defunct. Version: 0.12.1 [2014-12-06] SOFTWARE QUALITY: * ROBUSTNESS: Updated package tests to check methods in more scenarios, especially with both integer and numeric input data. BUG FIXES: * (col|row)Cumsums(x) where 'x' is integer would return garbage for columns (rows) containing missing values. * rowMads(x) where 'x' is numeric (not integer) would give incorrect results for rows that had an *odd* number of values (no ties). Analogously issues with colMads(). Added package tests for such cases too. Thanks to Brian Ripley and the CRAN check tools for (yet again) catching another coding mistake. Details: This was because the C-level calculation of the absolute value of residuals toward the median would use integer-based abs() rather than double-based fabs(). Now it fabs() is used when the values are double and abs() when they are integers. Version: 0.12.0 [2014-12-05] * Submitted to CRAN. Version: 0.11.9 [2014-11-26] NEW FEATURES: * Added (col|row)Cumsums(), (col|row)Cumprods(), (col|row)Cummins(), and (col|row)Cummaxs(). BUG FIXES: * (col|row)WeightedMeans() with all zero weights gave mean estimates with values 0 instead of NaN. Version: 0.11.8 [2014-11-25] PERFORMANCE AND MEMORY: * SPEEDUP: Implemented (col|row)Mads(), (col|row)Sds() and (col|row)Vars() in native code. * SPEEDUP: Made (col|row)Quantiles(x) faster for 'x' without missing values (and default type = 7L quantiles). It should still be implemented in native code though. * SPEEDUP: Made rowWeightedMeans() faster. BUG FIXES: * (col|row)Medians(x) when 'x' is integer would give invalid median values in case (a) it was calculated as the mean of two values ("ties"), and (b) the sum of those values where greater than .Machine$integer.max. Now such ties are calculated using floating point precision. Add lots of package tests. Version: 0.11.6 [2014-11-16] PERFORMANCE AND MEMORY: * SPEEDUP: Now (col|row)Mins(), (col|row)Maxs() and (col|row)Ranges() are implemented in native code providing a significant speedup. * SPEEDUP: Now colOrderStats() also is implemented in native code, which indirectly makes colMins(), colMaxs() and colRanges() faster. * SPEEDUP: colTabulates(x) no longer uses rowTabulates(t(x)). * SPEEDUP: colQuantiles(x) no longer uses rowQuantiles(t(x)). DEPRECATED AND DEFUNCT: * CLEANUP: Argument 'flavor' of (col|row)Ranks() is now ignored. Version: 0.11.5 [2014-11-15] SIGNIFICANT CHANGES: * (col|row)Prods() now uses default method = "direct" (was "expSumLog"). PERFORMANCE AND MEMORY: * SPEEDUP: Now colCollapse(x) no longer utilizes rowCollapse(t(x)). Added package tests for (col|row)Collapse(). * SPEEDUP: Now colDiffs(x) no longer uses rowDiffs(t(x)). Added package tests for (col|row)Diffs(). * SPEEDUP: Package no longer utilizes match.arg() due to its overhead; methods sumOver(), (col|row)Prods() and (col|row)Ranks() were updated. Version: 0.11.4 [2014-11-14] NEW FEATURES: * Added support for vector input to several of the row- and column methods as long as the "intended" matrix dimension is specified via argument 'dim'. For instance, rowCounts(x, dim = c(nrow, ncol)) is the same as rowCounts(matrix(x, nrow, ncol)), but more efficient since it avoids creating/allocating a temporary matrix. PERFORMANCE AND MEMORY: * SPEEDUP: Now colCounts() is implemented in native code. Moreover, (col|row)Counts() are now also implemented in native code for logical input (previously only for integer and double input). Added more package tests and benchmarks for these functions. Version: 0.11.3 [2014-11-11] SIGNIFICANT CHANGES: * Turned sdDiff(), madDiff(), varDiff(), weightedSd(), weightedVar() and weightedMad() into plain functions (were generic functions). CODE REFACTORING: * Removed unnecessary usage of '::'. Version: 0.11.2 [2014-11-09] SIGNIFICANT CHANGES: * SPEEDUP: Implemented indexByRow() in native code and it is no longer a generic function, but a regular function, which is also faster to call. The first argument of indexByRow() has been changed to 'dim' such that one should use indexByRow(dim(X)) instead of indexByRow(X) as in the past. The latter form is still supported, but deprecated. NEW FEATURES: * Added allocVector(), allocMatrix() and allocArray() for faster allocation numeric vectors, matrices and arrays, particularly when filled with non-missing values. DEPRECATED AND DEFUNCT: * Calling indexByRow(X) with a matrix 'X' is deprecatated. Instead call it with indexByRow(dim(X)). Version: 0.11.1 [2014-11-07] NEW FEATURES: * Better support for long vectors. * PRECISION: Using greater floating-point precision in more internal intermediate calculations, where possible. SOFTWARE QUALITY: * ROBUSTNESS: Although unlikely, with long vectors support for binCounts() and binMeans() it is possible that a bin gets a higher count than what can be represented by an R integer (.Machine$integer.max = 2^31-1). If that happens, an informative warning is generated and the bin count is set to .Machine$integer.max. If this happens for binMeans(), the corresponding mean is still properly calculated and valid. CODE REFACTORING: * CLEANUP: Cleanup and harmonized the internal C API such there are two well defined API levels. The high-level API is called by R via .Call() and takes care of most of the argument validation and construction of the return value. This function dispatch to functions in the low-level API based on data type(s) and other arguments. The low-level API is written to work with basic C data types only. BUG FIXES: * Package incorrectly redefined R_xlen_t on R (>= 3.0.0) systems where LONG_VECTOR_SUPPORT is not supported. Version: 0.11.0 [2014-11-02] NEW FEATURES: * Added sumOver() and meanOver(), which are notably faster versions of sum(x[idxs]) and mean(x[idxs]). Moreover, instead of having to do sum(as.numeric(x)) to avoid integer overflow when 'x' is an integer vector, one can do sumOver(x, mode = "numeric"), which avoids the extra copy created when coercing to numeric (this numeric copy is also twice as large as the integer vector). Added package tests and benchmark reports for these functions. Version: 0.10.4 [2014-11-01] PERFORMANCE AND MEMORY: * SPEEDUP: Made anyMissing(), logSumExp(), (col|row)Medians(), (col|row)Counts() slightly faster by making the native code assign the results directly to the native vector instead of to the R vector, e.g. ansp[i] = v where ansp = REAL(ans) instead of REAL(ans)[i] = v. * Added benchmark reports for anyMissing() and logSumExp(). Version: 0.10.3 [2014-10-01] BUG FIXES: * binMeans() returned 0.0 instead of NA_real_ for empty bins. Version: 0.10.2 [2014-09-01] BUG FIXES: * On some systems, the package failed to build on R (<= 2.15.3) with compilation error: "redefinition of typedef 'R_xlen_t'". Version: 0.10.1 [2014-06-09] PERFORMANCE AND MEMORY: * Added benchmark reports for also non-matrixStats functions col-/rowSums() and col-/rowMeans(). * Now all colNnn() and rowNnn() methods are benchmarked in a combined report making it possible to also compare colNnn(x) with rowNnn(t(x)). Version: 0.10.0 [2014-06-07] SOFTWARE QUALITY: * Relaxed some packages tests such that they assert numerical correctness via all.equal() rather than identical(). * Submitted to CRAN. BUG FIXES: * The package tests for product() incorrectly assumed that the value of prod(c(NaN, NA)) is uniquely defined. However, as documented in help("is.nan"), it may be NA or NaN depending on R system/platform. Version: 0.9.7 [2014-06-05] BUG FIXES: * Introduced a bug in v0.9.5 causing col-/rowVars() and hence also col-/rowSds() to return garbage. Add package tests for these now. * Submitted to CRAN. Version: 0.9.6 [2014-06-04] NEW FEATURES: * Added signTabulate() for tabulating the number of negatives, zeros, positives and missing values. For doubles, the number of negative and positive infinite values are also counted. PERFORMANCE AND MEMORY: * SPEEDUP: Now col-/rowProds() utilizes new product() function. * SPEEDUP: Added product() for calculating the product of a numeric vector via the logarithm. Version: 0.9.5 [2014-06-04] SIGNIFICANT CHANGES: * SPEEDUP: Made weightedMedian() a plain function (was an S3 method). * CLEANUP: Now only exporting plain functions and generic functions. * SPEEDUP: Turned more S4 methods into S3 methods, e.g. rowCounts(), rowAlls(), rowAnys(), rowTabulates() and rowCollapse(). NEW FEATURES: * Added argument 'method' to col-/rowProds() for controlling how the product is calculated. PERFORMANCE AND MEMORY: * SPEEDUP: Package is now byte compiled. * SPEEDUP: Made rowProds() and rowTabulates() notably faster. * SPEEDUP: Now rowCounts(), rowAnys(), rowAlls() and corresponding column methods can search for any value in addition to the default TRUE. The search for a matching integer or double value is done in native code, which is notably faster (and more memory efficient because it avoids creating any new objects). * SPEEDUP: Made colVars() and colSds() notably faster and rowVars() and rowSds() a slightly bit faster. * Added benchmark reports, e.g. matrixStats:::benchmark('colMins'). Version: 0.9.4 [2014-05-23] SIGNIFICANT CHANGES: * SPEEDUP: Turned several S4 methods into S3 methods, e.g. indexByRow(), madDiff(), sdDiff() and varDiff(). Version: 0.9.3 [2014-04-26] NEW FEATURES: * Added argument 'trim' to madDiff(), sdDiff() and varDiff(). Version: 0.9.2 [2014-04-04] BUG FIXES: * The native code of binMeans(x, bx) would try to access an out-of-bounds value of argument 'y' iff 'x' contained elements that are left of all bins in 'bx'. This bug had no impact on the results and since no assignment was done it should also not crash/core dump R. This was discovered thanks to new memtests (ASAN and valgrind) provided by CRAN. Version: 0.9.1 [2014-03-31] BUG FIXES: * rowProds() would throw "Error in rowSums(isNeg) : 'x' must be an array of at least two dimensions" on matrices where all rows contained at least one zero. Thanks to Roel Verbelen at KU Leuven for the report. Version: 0.9.0 [2014-03-26] NEW FEATURES: * Added weighedVar() and weightedSd(). Version: 0.8.14 [2013-11-23] PERFORMANCE AND MEMORY: * MEMORY: Updated all functions to do a better job of cleaning out temporarily allocated objects as soon as possible such that the garbage collector can remove them sooner, iff wanted. This increase the chance for a smaller memory footprint. * Submitted to CRAN. Version: 0.8.13 [2013-10-08] NEW FEATURES: * Added argument 'right' to binCounts() and binMeans() to specify whether binning should be done by (u,v] or [u,v). Added system tests validating the correctness of the two cases. CODE REFACTORING: * Bumped up package dependencies. Version: 0.8.12 [2013-09-26] PERFORMANCE AND MEMORY: * SPEEDUP: Now utilizing anyMissing() everywhere possible. Version: 0.8.11 [2013-09-21] SOFTWARE QUALITY: * ROBUSTNESS: Now importing 'loadMethod' from 'methods' package such that 'matrixStats' S4-based methods also work when 'methods' is not loaded, e.g. when 'Rscript' is used, cf. Section 'Default packages' in 'R Installation and Administration'. * ROBUSTNESS: Updates package system tests such that the can run with only the 'base' package loaded. Version: 0.8.10 [2013-09-15] CODE REFACTORING: * CLEANUP: Now only importing two functions from the 'methods' package. * Bumped up package dependencies. Version: 0.8.9 [2013-08-29] NEW FEATURES: * CLEANUP: Now the package startup message acknowledges argument 'quietly' of library()/require(). Version: 0.8.8 [2013-07-29] DOCUMENTATION: * The dimension of the return value was swapped in help("rowQuantiles"). Version: 0.8.7 [2013-07-28] PERFORMANCE AND MEMORY: * SPEEDUP: Made (col|row)Mins() and (col|row)Maxs() much faster. BUG FIXES: * rowRanges(x) on an Nx0 matrix would give an error. Same for colRanges(x) on an 0xN matrix. Added system tests for these and other special cases. Version: 0.8.6 [2013-07-20] CODE REFACTORING: * Bumped up package dependencies. BUG FIXES: * Forgot to declare S3 methods (col|row)WeightedMedians(). Version: 0.8.5 [2013-05-25] PERFORMANCE AND MEMORY: * Minor speedup of (col|row)Tabulates() by replacing rm() calls with NULL assignments. Version: 0.8.4 [2013-05-20] DOCUMENTATION: * CRAN POLICY: Now all Rd \usage{} lines are at most 90 characters long. Version: 0.8.3 [2013-05-10] PERFORMANCE AND MEMORY: * SPEEDUP: binCounts() and binMeans() now uses Hoare's Quicksort for presorting 'x' before counting/averaging. They also no longer test in every iteration (== for every data point) whether the last bin has been reached or not, but only after completing a bin. Version: 0.8.2 [2013-05-02] DOCUMENTATION: * Minor corrections and updates to help pages. Version: 0.8.1 [2013-05-02] BUG FIXES: * Native code of logSumExp() used an invalid check for missing value of an integer argument. Detected by Brian Ripley upon CRAN submission. Version: 0.8.0 [2013-05-01] NEW FEATURES: * Added logSumExp(lx) and (col|row)LogSumExps(lx) for accurately computing of log(sum(exp(lx))) for standalone vectors, and row and column vectors of matrices. Thanks to Nakayama (Japan) for the suggestion and contributing a draft in R. Version: 0.7.1 [2013-04-23] NEW FEATURES: * Added argument 'preserveShape' to colRanks(). For backward compatibility the default is preserveShape = FALSE, but it may change in the future. BUG FIXES: * Since v0.6.4, (col|row)Ranks() gave the incorrect results for integer matrices with missing values. * Since v0.6.4, (col|row)Medians() for integers would calculate ties as floor(tieAvg). Version: 0.7.0 [2013-01-14] NEW FEATURES: * Now (col|row)Ranks() support "max" (default), "min" and "average" for argument 'ties.method'. Added system tests validation these cases. Thanks Peter Langfelder (UCLA) for contributing this. Version: 0.6.4 [2013-01-13] NEW FEATURES: * Added argument 'ties.method' to rowRanks() and colRanks(), but still only support for "max" (as before). CODE REFACTORING: * ROBUSTNESS: Lots of cleanup of the internal/native code. Native code for integer and double cases have been harmonized and are now generated from a common code template. This was inspired by code contributions from Peter Langfelder (UCLA). Version: 0.6.3 [2013-01-13] NEW FEATURES: * Added anyMissing() for data type 'raw', which always returns FALSE. SOFTWARE QUALITY: * ROBUSTNESS: Added system test for anyMissing(). * ROBUSTNESS: Now S3 methods are declared in the namespace. Version: 0.6.2 [2012-11-15] SOFTWARE QUALITY: * CRAN POLICY: Made example(weightedMedian) faster. Version: 0.6.1 [2012-10-10] BUG FIXES: * In some cases binCounts() and binMeans() could try to go past the last bin resulting a core dump. * binCounts() and binMeans() would return random/garbage values for bins that were beyond the last data point. Version: 0.6.0 [2012-10-04] NEW FEATURES: * Added binMeans() for fast sample-mean calculation in bins. Thanks to Martin Morgan at the Fred Hutchinson Cancer Research Center, Seattle, for contributing the core code for this. * Added binCounts() for fast element counting in bins. Version: 0.5.3 [2012-09-10] SOFTWARE QUALITY: * CRAN POLICY: Replaced the .Internal(psort(...)) call with a call to a new internal partial sorting function, which utilizes the native rPsort() part of the R internals. Version: 0.5.2 [2012-07-02] CODE REFACTORING: * Updated package dependencies to match CRAN. Version: 0.5.1 [2012-06-25] NEW FEATURES: * GENERALIZATION: Now (col|row)Prods() handle missing values. CODE REFACTORING: * Package now only imports the 'methods' package. BUG FIXES: * In certain cases, (col|row)Prods() would return NA instead of 0 for some elements. Added a redundancy test for the case. Thanks Brenton Kenkel at University of Rochester for reporting on this. Version: 0.5.0 [2012-04-16] NEW FEATURES: * Added weightedMad() from aroma.core v2.5.0. * Added weightedMedian() from aroma.light v1.25.2. CODE REFACTORING: * This package no longer depends on the aroma.light package for any of its functions. * Now this package only imports R.methodsS3, meaning it no longer loads R.methodsS3 when it is loaded. Version: 0.4.5 [2012-03-19] NEW FEATURES: * Updated the default argument 'centers' of rowMads()/colMads() to explicitly be (col|row)Medians(x,...). The default behavior has not changed. Version: 0.4.4 [2012-03-05] SOFTWARE QUALITY: * ROBUSTNESS: Added system/redundancy tests for rowMads()/colMads(). * CRAN: Made the system tests "lighter" by default, but full tests can still be run, cf. tests/*.R scripts. BUG FIXES: * colMads() would return the incorrect estimates. This bug was introduced in matrixStats v0.4.0 (2011-11-11). Version: 0.4.3 [2011-12-11] BUG FIXES: * rowMedians(..., na.rm = TRUE) did not handle NaN (only NA). The reason for this was the the native code used ISNA() to test for NA and NaN, but it should have been ISNAN(), which is opposite to how is.na() and is.nan() at the R level work. Added system tests for this case. Version: 0.4.2 [2011-11-29] NEW FEATURES: * Added rowAvgsPerColSet() and colAvgsPerRowSet(). Version: 0.4.1 [2011-11-25] DOCUMENTATION: * Added help pages with an example to rowIQRs() and colIQRs(). * Added example to rowQuantiles(). BUG FIXES: * rowIQRs() and colIQRs() would return the 25% and the 75% quantiles, not the difference between them. Thanks Pierre Neuvial at CNRS, Evry, France for the report. Version: 0.4.0 [2011-11-11] SIGNIFICANT CHANGES: * Dropped the previously introduced expansion of 'center' in rowMads() and colMads(). It added unnecessary overhead if not needed. NEW FEATURES: * Added rowRanks() and colRanks(). Thanks Hector Corrada Bravo (University of Maryland) and Harris Jaffee (John Hopkins). Version: 0.3.0 [2011-10-13] PERFORMANCE AND MEMORY: * SPEEDUP/LESS MEMORY: colMedians(x) no longer uses rowMedians(t(x)); instead there is now an optimized native-code implementation. Also, colMads() utilizes the new colMedians() directly. This improvement was kindly contributed by Harris Jaffee at Biostatistics of John Hopkins, USA. SOFTWARE QUALITY: * Added additional unit tests for colMedians() and rowMedians(). Version: 0.2.2 [2010-10-06] NEW FEATURES: * Now the result of (col|row)Quantiles() contains column names. Version: 0.2.1 [2010-04-05] NEW FEATURES: * Added a startup message when package is loaded. CODE REFACTORING: * CLEANUP: Removed obsolete internal .First.lib() and .Last.lib(). Version: 0.2.0 [2010-03-30] * DOCUMENTATION: Fixed some incorrect cross references. Version: 0.1.9 [2010-02-03] BUG FIXES: * (col|row)WeightedMeans(..., na.rm = TRUE) would incorrectly treat missing values as zeros. Added corresponding redundancy tests (also for the median case). Thanks Pierre Neuvial for reporting this. Version: 0.1.8 [2009-11-13] BUG FIXES: * colRanges(x) would return a matrix of wrong dimension if 'x' did not have any missing values. This would affect all functions relying on colRanges(), e.g. colMins() and colMaxs(). Added a redundancy test for this case. Thanks Pierre Neuvial at UC Berkeley for reporting this. * (col|row)Ranges() return a matrix with dimension names. Version: 0.1.7 [2009-06-20] BUG FIXES: * WORKAROUND: Cannot use "%#x" in rowTabulates() when creating the column names of the result matrix. It gave an error OSX with R v2.9.0 devel (2009-01-13 r47593b) current the OSX server at R-forge. Version: 0.1.6 [2009-06-17] DOCUMENTATION: * Updated the help example for rowWeightedMedians() to run conditionally on aroma.light, which is only a suggested package - not a required one. This in order to prevent R CMD check to fail on CRAN, which prevents it for building binaries (as it currently happens on their OSX servers). Version: 0.1.5 [2009-02-04] BUG FIXES: * For some errors in rowOrderStats(), the stack would not become UNPROTECTED before calling error. Version: 0.1.4 [2009-02-02] NEW FEATURES: * Added methods (col|row)Weighted(Mean|Median)s() for weighted averaging. DOCUMENTATION: * Added help to more functions. SOFTWARE QUALITY: * Package passes R CMD check flawlessly. Version: 0.1.3 [2008-07-30] NEW FEATURES: * Added (col|row)Tabulates() for integer and raw matrices. BUG FIXES: * rowCollapse(x) was broken and returned the wrong elements. Version: 0.1.2 [2008-04-13] NEW FEATURES: * Added (col|row)Collapse(). * Added varDiff(), sdDiff() and madDiff(). * Added indexByRow(). Version: 0.1.1 [2008-03-25] NEW FEATURES: * Added (col|row)OrderStats(). * Added (col|row)Ranges() and (col|row)(Min|Max)s(). * Added colMedians(). * Now anyMissing() support most data types as structures. Version: 0.1.0 [2007-11-26] NEW FEATURES: * Imported the rowNnn() methods from Biobase. * Created. matrixStats/R/0000755000176200001440000000000014111764040012774 5ustar liggesusersmatrixStats/R/rowCounts.R0000644000176200001440000001403414111740760015127 0ustar liggesusers#' Counts the number of occurrences of a specific value #' #' The row- and column-wise functions take either a matrix or a vector as #' input. If a vector, then argument \code{dim.} must be specified and fulfill #' \code{prod(dim.) == length(x)}. The result will be identical to the results #' obtained when passing \code{matrix(x, nrow = dim.[1L], ncol = dim.[2L])}, #' but avoids having to temporarily create/allocate a matrix, if only such is #' needed only for these calculations. #' #' @inheritParams rowAlls #' #' @param value A value to search for. #' #' @return \code{rowCounts()} (\code{colCounts()}) returns an #' \code{\link[base]{integer}} \code{\link[base]{vector}} of length N (K). #' \code{count()} returns a scalar of type \code{\link[base]{integer}} if #' the count is less than 2^31-1 (= \code{.Machine$integer.max}) otherwise #' a scalar of type \code{\link[base]{double}}. #' #' @example incl/rowCounts.R #' #' @author Henrik Bengtsson #' @seealso rowAlls #' @keywords array logic iteration univar #' @export rowCounts <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { # Argument 'x': if (is.matrix(x)) { } else if (is.vector(x)) { } else { stop(sprintf("Argument '%s' is not a matrix or a vector: %s", "x", mode(x)[1L])) } # Argument 'value': if (length(value) != 1L) { stop(sprintf("Argument '%s' is not a scalar: %.0f", "value", length(value))) } # Coerce 'value' to matrix storage.mode(value) <- storage.mode(x) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.numeric(x) || is.logical(x)) { # Preserve rownames names <- rownames(x) has_nas <- TRUE counts <- .Call(C_rowCounts, x, dim., rows, cols, value, 2L, na.rm, has_nas, useNames) # Preserve names attribute names <- names(counts) counts <- as.integer(counts) names(counts) <- names } else { # Preserve rownames names <- rownames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { counts <- apply(x, MARGIN = 1L, FUN = function(x) { sum(is.na(x)) }) } else { counts <- apply(x, MARGIN = 1L, FUN = function(x) { sum(x == value, na.rm = na.rm) }) } counts <- as.integer(counts) # Update names attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(names)) { if (!is.null(rows)) { names <- names[rows] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(counts) <- names } } else { names(counts) <- NULL } } } counts } #' @rdname rowCounts #' @export colCounts <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { # Argument 'x': if (is.matrix(x)) { } else if (is.vector(x)) { } else { stop(sprintf("Argument '%s' is not a matrix or a vector: %s", "x", mode(x)[1L])) } # Argument 'value': if (length(value) != 1L) { stop(sprintf("Argument '%s' is not a scalar: %.0f", "value", length(value))) } # Coerce 'value' to matrix storage.mode(value) <- storage.mode(x) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.numeric(x) || is.logical(x)) { # Preserve colnames names <- colnames(x) has_nas <- TRUE counts <- .Call(C_colCounts, x, dim., rows, cols, value, 2L, na.rm, has_nas, useNames) # Preserve names attribute names <- names(counts) counts <- as.integer(counts) names(counts) <- names } else { # Preserve colnames names <- colnames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { counts <- apply(x, MARGIN = 2L, FUN = function(x) sum(is.na(x)) ) } else { counts <- apply(x, MARGIN = 2L, FUN = function(x) { sum(x == value, na.rm = na.rm) }) } counts <- as.integer(counts) # Update names attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(names)) { if (!is.null(cols)) { names <- names[cols] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(counts) <- names } } else { names(counts) <- NULL } } } counts } #' @rdname rowCounts #' @export count <- function(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) { # Argument 'x': if (!is.vector(x)) { stop(sprintf("Argument '%s' is not a vector: %s", "x", mode(x)[1L])) } # Argument 'value': if (length(value) != 1L) { stop(sprintf("Argument '%s' is not a scalar: %.0f", "value", length(value))) } # Coerce 'value' to matrix storage.mode(value) <- storage.mode(x) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_count, x, idxs, value, 2L, na.rm, has_nas) } else { # Apply subset if (!is.null(idxs)) x <- x[idxs] if (is.na(value)) { counts <- sum2(is.na(x)) } else { counts <- sum2(x == value, na.rm = na.rm) } } counts } matrixStats/R/rowCumsums.R0000644000176200001440000000415114111740760015307 0ustar liggesusers#' Cumulative sums, products, minima and maxima for each row (column) in a #' matrix #' #' Cumulative sums, products, minima and maxima for each row (column) in a #' matrix. #' #' @inheritParams rowAlls #' #' @return Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}} #' of the same mode as \code{x}, except when \code{x} is of mode #' \code{\link[base]{logical}}, then the return type is #' \code{\link[base]{integer}}. #' #' @example incl/rowCumsums.R #' #' @author Henrik Bengtsson #' #' @seealso See \code{\link[base]{cumsum}}(), \code{\link[base]{cumprod}}(), #' \code{\link[base]{cummin}}(), and \code{\link[base]{cummax}}(). #' #' @keywords array iteration univar #' @export rowCumsums <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) { .Call(C_rowCumsums, x, dim., rows, cols, TRUE, useNames) } #' @rdname rowCumsums #' @export colCumsums <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) { .Call(C_rowCumsums, x, dim., rows, cols, FALSE, useNames) } #' @rdname rowCumsums #' @export rowCumprods <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) { .Call(C_rowCumprods, x, dim., rows, cols, TRUE, useNames) } #' @rdname rowCumsums #' @export colCumprods <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) { .Call(C_rowCumprods, x, dim., rows, cols, FALSE, useNames) } #' @rdname rowCumsums #' @export rowCummins <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) { .Call(C_rowCummins, x, dim., rows, cols, TRUE, useNames) } #' @rdname rowCumsums #' @export colCummins <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) { .Call(C_rowCummins, x, dim., rows, cols, FALSE, useNames) } #' @rdname rowCumsums #' @export rowCummaxs <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) { .Call(C_rowCummaxs, x, dim., rows, cols, TRUE, useNames) } #' @rdname rowCumsums #' @export colCummaxs <- function(x, rows = NULL, cols = NULL, dim. = dim(x), ..., useNames = NA) { .Call(C_rowCummaxs, x, dim., rows, cols, FALSE, useNames) } matrixStats/R/rowLogSumExps.R0000644000176200001440000000336314111740760015725 0ustar liggesusers#' Accurately computes the logarithm of the sum of exponentials across rows or #' columns #' #' Accurately computes the logarithm of the sum of exponentials across rows or #' columns. #' #' @inheritParams rowAlls #' @inheritParams logSumExp #' #' @param lx A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}. #' Typically \code{lx} are \eqn{log(x)} values. #' #' @param rows,cols A \code{\link[base]{vector}} indicating subset of rows #' (and/or columns) to operate over. If \code{\link[base]{NULL}}, no subsetting #' is done. #' #' @param na.rm If \code{\link[base:logical]{TRUE}}, any missing values are #' ignored, otherwise not. #' #' @param dim. An \code{\link[base]{integer}} \code{\link[base]{vector}} of #' length two specifying the dimension of \code{x}, also when not a #' \code{\link[base]{matrix}}. #' #' @param ... Not used. #' #' @return A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N #' (K). #' #' @section Benchmarking: #' These methods are implemented in native code and have been optimized for #' speed and memory. #' #' @author Native implementation by Henrik Bengtsson. Original R code by #' Nakayama ??? (Japan). #' #' @seealso To calculate the same on vectors, \code{\link{logSumExp}}(). #' #' @keywords array #' @export rowLogSumExps <- function(lx, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(lx), ..., useNames = NA) { has_na <- TRUE .Call(C_rowLogSumExps, lx, dim., rows, cols, na.rm, has_na, TRUE, useNames) } #' @rdname rowLogSumExps #' @export colLogSumExps <- function(lx, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(lx), ..., useNames = NA) { has_na <- TRUE .Call(C_rowLogSumExps, lx, dim., rows, cols, na.rm, has_na, FALSE, useNames) } matrixStats/R/x_OP_y.R0000644000176200001440000000526514074030335014325 0ustar liggesusers#' Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)' #' #' Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)', where OP can be #' +, -, *, and /. For + and *, na.rm = TRUE will drop missing values first. #' #' @inheritParams rowAlls #' #' @param x A \code{\link[base]{numeric}} or \code{\link[base]{logical}} #' NxK \code{\link[base]{matrix}}. #' #' @param y A \code{\link[base]{numeric}} or \code{\link[base]{logical}} #' \code{\link[base]{vector}} of length L. #' #' @param OP A \code{\link[base]{character}} specifying which operator to use. #' #' @param xrows,xcols A \code{\link[base]{vector}} indicating subset of rows #' (and/or columns) to operate over 'x'. If \code{\link[base]{NULL}}, no #' subsetting is done. #' #' @param idxs A \code{\link[base]{vector}} indicating subset of elements to #' operate over 'y'. If \code{\link[base]{NULL}}, no subsetting is done. #' #' @param commute If \code{\link[base:logical]{TRUE}}, 'y OP x' ('t(y OP #' t(x))') is calculated, otherwise 'x OP y' ('t(t(x) OP y)'). #' #' @return Returns a \code{\link[base]{numeric}} NxK #' \code{\link[base]{matrix}}. #' #' @example incl/x_OP_y.R #' #' @section Missing values: If \code{na.rm = TRUE}, then missing values are #' "dropped" before applying the operator to each pair of values. For #' instance, if \code{x[1, 1]} is a missing value, then the result of #' \code{x[1, 1] + y[1]} equals \code{y[1]}. If also \code{y[1]} is a missing #' value, then the result is a missing value. This only applies to additions #' and multiplications. For subtractions and divisions, argument \code{na.rm} #' is ignored. #' #' @author Henrik Bengtsson #' #' @keywords internal #' @export x_OP_y <- function(x, y, OP, xrows = NULL, xcols = NULL, yidxs = NULL, commute = FALSE, na.rm = FALSE) { commute <- as.logical(commute) na.rm <- as.logical(na.rm) if (is.character(OP)) { op <- charmatch(OP, c("+", "-", "*", "/"), nomatch = 0L) if (op == 0L) { stop(sprintf("Unknown value of argument '%s': %s", "OP", sQuote(OP))) } } else { op <- as.integer(OP) } .Call(C_x_OP_y, x, y, dim(x), op, xrows, xcols, yidxs, commute, na.rm, TRUE, FALSE) } #' @rdname x_OP_y #' @export t_tx_OP_y <- function(x, y, OP, xrows = NULL, xcols = NULL, yidxs = NULL, commute = FALSE, na.rm = FALSE) { commute <- as.logical(commute) na.rm <- as.logical(na.rm) if (is.character(OP)) { op <- charmatch(OP, c("+", "-", "*", "/"), nomatch = 0L) if (op == 0L) { stop(sprintf("Unknown value of argument '%s': %s", "OP", sQuote(OP))) } } else { op <- as.integer(OP) } .Call(C_x_OP_y, x, y, dim(x), op, xrows, xcols, yidxs, commute, na.rm, TRUE, TRUE) } matrixStats/R/binMeans.R0000644000176200001440000001102214074030335014650 0ustar liggesusers#' Fast mean calculations in non-overlapping bins #' #' Computes the sample means in non-overlapping bins #' #' \code{binMeans(x, bx, right = TRUE)} gives equivalent results as #' \code{rev(binMeans(-x, bx = sort(-bx), right = FALSE))}, but is faster. #' #' @inheritParams rowAlls #' #' @param y A \code{\link[base]{numeric}} or \code{\link[base]{logical}} #' \code{\link[base]{vector}} of K values to calculate means on. #' #' @param x A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K #' positions for to be binned. #' #' @param bx A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B + 1 #' ordered positions specifying the B > 0 bins \code{[bx[1], bx[2])}, #' \code{[bx[2], bx[3])}, ..., \code{[bx[B], bx[B + 1])}. #' #' @param na.rm If \code{\link[base:logical]{TRUE}}, missing values in \code{y} #' are dropped before calculating the mean, otherwise not. #' #' @param count If \code{\link[base:logical]{TRUE}}, the number of data points #' in each bins is returned as attribute \code{count}, which is an #' \code{\link[base]{integer}} \code{\link[base]{vector}} of length B. #' #' @param right If \code{\link[base:logical]{TRUE}}, the bins are right-closed #' (left open), otherwise left-closed (right open). #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length B. #' #' @section Missing and non-finite values: #' Data points where either of \code{y} and \code{x} is missing are dropped #' (and therefore are also not counted). Non-finite values in \code{y} are #' not allowed and gives an error. Missing values in \code{bx} are not allowed #' and gives an error. #' #' @example incl/binMeans.R #' #' @author Henrik Bengtsson with initial code contributions by #' Martin Morgan [1]. #' #' @seealso \code{\link{binCounts}}(). \code{\link[stats]{aggregate}} and #' \code{\link[base]{mean}}(). #' #' @references [1] R-devel thread \emph{Fastest non-overlapping binning mean #' function out there?} on Oct 3, 2012\cr #' #' @keywords univar #' @export binMeans <- function(y, x, idxs = NULL, bx, na.rm = TRUE, count = TRUE, right = FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'y': if (!is.numeric(y) && !is.logical(y)) { stop(sprintf("Argument '%s' is not numeric or logical: %s", "y", mode(y))) } if (is.numeric(y) && !is.integer(y) && any(is.infinite(y))) { stop(sprintf("Argument '%s' must not contain infinite values", "y")) } n <- length(y) # Argument 'x': if (!is.numeric(x)) { stop(sprintf("Argument '%s' is not numeric: %s", "x", mode(x))) } if (length(x) != n) { stop(sprintf("Argument '%s' and '%s' have different lengths: %.0f != %.0f", "y", "x", length(y), length(x))) } # Argument 'bx': if (!is.numeric(bx)) { stop(sprintf("Argument '%s' is not numeric: %s", "bx", mode(bx))) } if (any(is.infinite(bx))) { stop(sprintf("Argument '%s' must not contain infinite values", "bx")) } if (is.unsorted(bx)) { stop(sprintf("Argument '%s' is not ordered", "bx")) } # Argument 'na.rm': if (!is.logical(na.rm)) { stop(sprintf("Argument '%s' is not logical: %s", "na.rm", mode(na.rm))) } # Argument 'count': if (!is.logical(count)) { stop(sprintf("Argument '%s' is not logical: %s", "count", mode(count))) } # Apply subset if (!is.null(idxs)) { x <- x[idxs] y <- y[idxs] } # Argument 'right': right <- as.logical(right) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Preprocessing of (x, y) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Drop missing values in 'x' keep <- which(!is.na(x)) if (length(keep) < n) { x <- x[keep] y <- y[keep] n <- length(y) } keep <- NULL # Not needed anymore # Drop missing values in 'y'? if (na.rm) { keep <- which(!is.na(y)) if (length(keep) < n) { x <- x[keep] y <- y[keep] } keep <- NULL # Not needed anymore } # Order (x, y) by increasing x. # If 'x' is already sorted, the overhead of (re)sorting is # relatively small. x <- sort.int(x, method = "quick", index.return = TRUE) y <- y[x$ix] x <- x$x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Bin # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - y <- as.numeric(y) x <- as.numeric(x) bx <- as.numeric(bx) count <- as.logical(count) .Call(C_binMeans, y, x, bx, count, right) } matrixStats/R/999.package.R0000644000176200001440000000154614063411361015052 0ustar liggesusers#' Package matrixStats #' #' High-performing functions operating on rows and columns of matrices, e.g. #' col / rowMedians(), col / rowRanks(), and col / rowSds(). Functions #' optimized per data type and for subsetted calculations such that both memory #' usage and processing time is minimized. There are also optimized #' vector-based methods, e.g. binMeans(), madDiff() and weightedMedian(). #' #' @section How to cite this package: #' Henrik Bengtsson (2017). matrixStats: Functions that Apply to Rows and #' Columns of Matrices (and to Vectors). R package version 0.52.2. #' https://github.com/HenrikBengtsson/matrixStats #' #' @author Henrik Bengtsson, Hector Corrada Bravo, Robert Gentleman, Ola #' Hossjer, Harris Jaffee, Dongcan Jiang, Peter Langfelder #' #' @keywords package #' #' @name matrixStats-package #' @aliases matrixStats #' @docType package NULL matrixStats/R/rowProds.R0000644000176200001440000000666414074054377014767 0ustar liggesusers#' Calculates the product for each row (column) in a matrix #' #' Calculates the product for each row (column) in a matrix. #' #' If \code{method = "expSumLog"}, then then \code{\link{product}}() function is #' used, which calculates the product via the logarithmic transform (treating #' negative values specially). This improves the precision and lowers the risk #' for numeric overflow. If \code{method = "direct"}, the direct product is #' calculated via the \code{\link[base]{prod}}() function. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param method A \code{\link[base]{character}} string specifying how each #' product is calculated. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @section Missing values: #' Note, if \code{method = "expSumLog"}, \code{na.rm = FALSE}, and \code{x} #' contains missing values (\code{\link[base]{NA}} or #' \code{\link[base:is.finite]{NaN}}), then the calculated value is also #' missing value. Note that it depends on platform whether #' \code{\link[base:is.finite]{NaN}} or \code{\link[base]{NA}} is returned #' when an \code{\link[base:is.finite]{NaN}} exists, cf. #' \code{\link[base]{is.nan}}(). #' #' @author Henrik Bengtsson #' #' @keywords array iteration robust univar #' @export rowProds <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, method = c("direct", "expSumLog"), ..., useNames = NA) { # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preallocate result (zero:ed by default) n <- nrow(x) y <- double(length = n) # Nothing todo? if (n == 0L) return(y) # Argument 'method': method <- method[1L] # How to calculate product? if (method == "expSumLog") { prod <- product } else if (method == "direct") { } else { stop(sprintf("Unknown value of argument '%s': %s", "method", method)) } for (ii in seq_len(n)) { y[ii] <- prod(x[ii, , drop = TRUE], na.rm = na.rm) } # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- rownames(x) if (!is.null(names)) names(y) <- names } else { names(y) <- NULL } } y } #' @rdname rowProds #' @export colProds <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, method = c("direct", "expSumLog"), ..., useNames = NA) { # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preallocate result (zero:ed by default) n <- ncol(x) y <- double(length = n) # Nothing todo? if (n == 0L) return(y) # Argument 'method': method <- method[1L] # How to calculate product? if (method == "expSumLog") { prod <- product } else if (method == "direct") { } else { stop(sprintf("Unknown value of argument '%s': %s", "method", method)) } for (ii in seq_len(n)) { y[ii] <- prod(x[, ii, drop = TRUE], na.rm = na.rm) } # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- colnames(x) if (!is.null(names)) names(y) <- names } else { names(y) <- NULL } } y } matrixStats/R/sum2.R0000644000176200001440000000611714074030335014013 0ustar liggesusers#' Fast sum over subset of vector elements #' #' Computes the sum of all or a subset of values. #' #' \code{sum2(x, idxs)} gives equivalent results as \code{sum(x[idxs])}, but #' is faster and more memory efficient since it avoids the actual subsetting #' which requires copying of elements and garbage collection thereof. #' #' Furthermore, \code{sum2(x, mode = "double")} is equivalent to #' \code{sum(as.numeric(x))} and may therefore be used to avoid integer #' overflow(*), but at the same time is much more memory efficient that #' the regular \code{sum()} function when \code{x} is an #' \code{\link[base]{integer}} vector. #' #' (*) \emph{In R (>= 3.5.0), \code{sum(x)} will no longer integer overflow #' and return \code{NA_integer_}. Instead it will return the correct sum in #' form of a double value.} #' #' @inheritParams rowAlls #' @inheritParams weightedMad #' #' @param mode A \code{\link[base]{character}} string specifying the data type #' of the return value. Default is to use the same mode as argument \code{x}, #' unless it is logical when it defaults to \code{"integer"}. #' #' @return Returns a scalar of the data type specified by argument \code{mode}. #' If \code{mode = "integer"}, then integer overflow occurs if the \emph{sum} #' is outside the range of defined integer values. #' Note that the intermediate sum (\code{sum(x[1:n])}) is internally #' represented as a floating point value and will therefore never be outside of #' the range. #' If \code{mode = "integer"} and \code{typeof{x} == "double"}, then a warning #' is generated. #' #' @example incl/sum2.R #' #' @author Henrik Bengtsson #' #' @seealso \code{\link[base]{sum}}(). #' To efficiently average over a subset, see \code{\link{mean2}}(). #' #' @keywords univar internal #' @export sum2 <- function(x, idxs = NULL, na.rm = FALSE, mode = typeof(x), ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': x_mode <- typeof(x) x_logical <- (x_mode == "logical") if (!is.numeric(x) && !x_logical) { stop(sprintf("Argument '%s' is not numeric or logical: %s", "x", x_mode)) } # Argument 'na.rm': if (!is.logical(na.rm)) { stop(sprintf("Argument '%s' is not logical: %s", "na.rm", mode(na.rm))) } # Argument 'mode': mode <- mode[1L] if (x_logical && mode == "logical") { ## SPECIAL CASE: If `x` is logical, default mode should be `integer` mode_idx <- 1L } else if (mode == "integer") { mode_idx <- 1L ## Coercing results from double to integer is likely a mistake if (x_mode == "double") { warning('sum2(x, mode = "integer") called with typeof(x) == "double"; did you mean to use as.integer(sum2(x))?') } } else if (mode == "double") { mode_idx <- 2L } else { stop(sprintf("Unknown value of argument '%s': %s", "mode", mode)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summing # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - .Call(C_sum2, x, idxs, na.rm, mode_idx) } matrixStats/R/rowQuantiles.R0000644000176200001440000002026514111764040015621 0ustar liggesusers#' Estimates quantiles for each row (column) in a matrix #' #' Estimates quantiles for each row (column) in a matrix. #' #' @inheritParams rowAlls #' #' @param x An \code{\link[base]{integer}}, \code{\link[base]{numeric}} or #' \code{\link[base]{logical}} NxK \code{\link[base]{matrix}} with N >= 0. #' #' @param probs A \code{\link[base]{numeric}} \code{\link[base]{vector}} of J #' probabilities in [0, 1]. #' #' @param type An \code{\link[base]{integer}} specify the type of estimator. #' See \code{\link[stats]{quantile}} for more details. #' #' @param ... Additional arguments passed to \code{\link[stats]{quantile}}. #' #' @param drop If TRUE, singleton dimensions in the result are dropped, #' otherwise not. #' #' @return Returns a NxJ (KxJ) \code{\link[base]{matrix}}, where N (K) is the #' number of rows (columns) for which the J quantiles are calculated. #' The return type is either integer or numeric depending on \code{type}. #' #' @example incl/rowQuantiles.R #' #' @author Henrik Bengtsson #' @seealso \code{\link[stats]{quantile}}. #' @keywords array iteration robust univar #' #' @importFrom stats quantile #' @export rowQuantiles <- function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, ..., useNames = NA, drop = TRUE) { # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) if (!is.numeric(x) && !is.integer(x) && !is.logical(x)) { .Defunct(msg = sprintf("Argument 'x' is of type %s. Only 'integer', 'numeric', and 'logical' is supported.", sQuote(storage.mode(x)))) #nolint } # Argument 'probs': if (anyMissing(probs)) { stop(sprintf("Argument '%s' must not contain missing values", "probs")) } eps <- 100 * .Machine$double.eps if (any((probs < -eps | probs > 1 + eps))) { stop(sprintf("Argument '%s' is out of range [0-eps, 1+eps]: %g", "probs", probs)) } # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Coerce? # if (is.logical(x)) { # storage.mode(x) <- "integer" # } # Argument 'x': nrow <- nrow(x) ncol <- ncol(x) # Allocate result na_value <- NA_real_ if (type != 7L) storage.mode(na_value) <- storage.mode(x) q <- matrix(na_value, nrow = nrow, ncol = length(probs)) if (nrow > 0L && ncol > 0L) { na_rows <- rowAnyMissings(x) has_na <- any(na_rows) if (!has_na) na.rm <- FALSE if (!has_na && type == 7L) { n <- ncol idxs <- 1 + (n - 1) * probs idxs_lo <- floor(idxs) idxs_hi <- ceiling(idxs) partial <- sort.int(unique(c(idxs_lo, idxs_hi))) # Adjust? idxs_adj <- which(idxs > idxs_lo) adj <- (length(idxs_adj) > 0L) # Adjust if (adj) { idxs_hi <- idxs_hi[idxs_adj] w <- (idxs - idxs_lo)[idxs_adj] q_lo <- matrix(na_value, nrow = length(idxs_adj), ncol = nrow) q_hi <- matrix(na_value, nrow = length(idxs_adj), ncol = nrow) cols <- seq_len(ncol) for (rr in seq_len(nrow)) { x_rr <- .subset(x, rr, cols, drop = TRUE) xp_rr <- sort.int(x_rr, partial = partial) q_rr <- .subset(xp_rr, idxs_lo) q[rr,] <- q_rr q_hi[,rr] <- .subset(xp_rr, idxs_hi) q_lo[,rr] <- .subset(q_rr, idxs_adj) # Not needed anymore x_rr <- xp_rr <- NULL } q_adj <- (1 - w) * q_lo + w * q_hi for (cc in seq_along(idxs_adj)) { q[, idxs_adj[cc]] <- q_adj[cc, , drop = TRUE] } # Not needed anymore q_adj <- q_lo <- q_hi <- NULL } else { cols <- seq_len(ncol) for (rr in seq_len(nrow)) { x_rr <- .subset(x, rr, cols, drop = TRUE) xp_rr <- sort.int(x_rr, partial = partial) q[rr,] <- .subset(xp_rr, idxs_lo) # Not needed anymore x_rr <- xp_rr <- NULL } } storage.mode(q) <- "numeric" } else { # For each row... rows <- seq_len(nrow) # Rows with NAs should return all NAs (so skip those) if (has_na && !na.rm) rows <- rows[!na_rows] for (kk in rows) { xkk <- x[kk, ] if (na.rm) xkk <- xkk[!is.na(xkk)] q[kk, ] <- quantile(xkk, probs = probs, na.rm = FALSE, type = type, ...) } } # if (type ...) } # Add dim names digits <- max(2L, getOption("digits")) colnames(q) <- sprintf("%.*g%%", digits, 100 * probs) # Preserve names attribute? if (is.na(useNames) || useNames) { rownames(q) <- rownames(x) } else { rownames(q) <- NULL } # Drop singleton dimensions? if (drop) { q <- drop(q) } q } #' @importFrom stats quantile #' @rdname rowQuantiles #' @export colQuantiles <- function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, ..., useNames = NA, drop = TRUE) { # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) if (!is.numeric(x) && !is.integer(x) && !is.logical(x)) { .Defunct(msg = sprintf("Argument 'x' is of type %s. Only 'integer', 'numeric', and 'logical' is supported.", sQuote(storage.mode(x)))) #nolint } # Argument 'probs': if (anyMissing(probs)) { stop(sprintf("Argument '%s' must not contain missing values", "probs")) } eps <- 100 * .Machine$double.eps if (any((probs < -eps | probs > 1 + eps))) { stop(sprintf("Argument '%s' is out of range [0-eps, 1+eps]: %g", "probs", probs)) } # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Coerce? # if (is.logical(x)) { # storage.mode(x) <- "integer" # } # Argument 'x': nrow <- nrow(x) ncol <- ncol(x) # Allocate result na_value <- NA_real_ if (type != 7L) storage.mode(na_value) <- storage.mode(x) q <- matrix(na_value, nrow = ncol, ncol = length(probs)) if (nrow > 0L && ncol > 0L) { na_cols <- colAnyMissings(x) has_na <- any(na_cols) if (!has_na) na.rm <- FALSE if (!has_na && type == 7L) { n <- nrow idxs <- 1 + (n - 1) * probs idxs_lo <- floor(idxs) idxs_hi <- ceiling(idxs) partial <- sort.int(unique(c(idxs_lo, idxs_hi))) # Adjust? idxs_adj <- which(idxs > idxs_lo) adj <- (length(idxs_adj) > 0L) if (adj) { idxs_hi <- idxs_hi[idxs_adj] w <- (idxs - idxs_lo)[idxs_adj] q_lo <- matrix(na_value, nrow = length(idxs_adj), ncol = ncol) q_hi <- matrix(na_value, nrow = length(idxs_adj), ncol = ncol) rows <- seq_len(nrow) for (cc in seq_len(ncol)) { x_cc <- .subset(x, rows, cc, drop = TRUE) xp_cc <- sort.int(x_cc, partial = partial) q_cc <- .subset(xp_cc, idxs_lo) q[cc,] <- q_cc q_hi[,cc] <- .subset(xp_cc, idxs_hi) q_lo[,cc] <- .subset(q_cc, idxs_adj) # Not needed anymore x_cc <- xp_cc <- NULL } q_adj <- (1 - w) * q_lo + w * q_hi for (cc in seq_along(idxs_adj)) { q[, idxs_adj[cc]] <- q_adj[cc, , drop = TRUE] } # Not needed anymore q_adj <- q_lo <- q_hi <- NULL } else { rows <- seq_len(nrow) for (cc in seq_len(ncol)) { x_cc <- .subset(x, rows, cc, drop = TRUE) xp_cc <- sort.int(x_cc, partial = partial) q[cc,] <- .subset(xp_cc, idxs_lo) # Not needed anymore x_cc <- xp_cc <- NULL } } } else { # For each column... cols <- seq_len(ncol) # Columns with NAs should return all NAs (so skip those) if (has_na && !na.rm) cols <- cols[!na_cols] for (kk in cols) { xkk <- x[, kk] if (na.rm) xkk <- xkk[!is.na(xkk)] q[kk, ] <- quantile(xkk, probs = probs, na.rm = FALSE, type = type, ...) } } # if (type ...) } # Add dim names digits <- max(2L, getOption("digits")) colnames(q) <- sprintf("%.*g%%", digits, 100 * probs) # Preserve names attribute? if (is.na(useNames) || useNames) { rownames(q) <- colnames(x) } else { rownames(q) <- NULL } # Drop singleton dimensions? if (drop) { q <- drop(q) } q } matrixStats/R/zzz.R0000644000176200001440000000137214111755254013766 0ustar liggesusers.onLoad <- function(libname, pkgname) { ## https://github.com/HenrikBengtsson/matrixStats/issues/183 onLoadSetCenterOnScalar() ## https://github.com/HenrikBengtsson/matrixStats/issues/187 onLoadSetCenterOnUse() ## https://github.com/HenrikBengtsson/matrixStats/issues/16 onLoadSetVarsFormulaFreq() onLoadSetVarsFormulaOnMistake() ## Deprecate validateIndices() /HB 2021-08-26 action <- Sys.getenv("R_MATRIXSTATS_VALIDATEINDICES", NA_character_) if (!is.na(action)) { action <- match.arg(action, choices = c("deprecated", "defunct")) options(matrixStats.validateIndices = action) } } #' @useDynLib "matrixStats", .registration = TRUE, .fixes = "C_" .onUnload <- function(libpath) { library.dynam.unload("matrixStats", libpath) } matrixStats/R/rowVars.R0000644000176200001440000002231214111740760014565 0ustar liggesusers#' Variance estimates for each row (column) in a matrix #' #' Variance estimates for each row (column) in a matrix. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param center (optional; a vector or length N (K)) If the row (column) #' means are already estimated, they can be pre-specified using this argument. #' This avoid re-estimating them again. (*Warning: If biased estimated are #' given, the estimate of the spread will also be biased.*) #' If NULL (default), the row/column means are estimated internally. #' #' @param ... Additional arguments passed to \code{rowMeans()} and #' \code{rowSums()}. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @example incl/rowMethods.R #' #' @author Henrik Bengtsson #' #' @seealso See \code{rowMeans()} and \code{rowSums()} in #' \code{\link[base]{colSums}}(). #' @keywords array iteration robust univar #' @export rowVars <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, dim. = dim(x), ..., useNames = NA) { if (is.null(center)) { has_nas <- TRUE sigma2 <- .Call(C_rowVars, x, dim., rows, cols, na.rm, has_nas, TRUE, useNames) return(sigma2) } ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("rowVars") ## BACKWARD COMPATIBILITY: matrixStats (<= 0.57.0) returns names ## when !is.null(center), which is tested by DelayedMatrixStats ## and sparseMatrixStats names <- rownames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset on 'center' if (length(center) != nrow(x)) { ## Scalar 'center'? if (length(center) == 1L && is.null(rows)) { validateScalarCenter(center, nrow(x), "rows") center <- rep(center, times = nrow(x)) } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "rows", "x", length(center), nrow(x))) } } if (!is.null(rows)) center <- center[rows] # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) ncol <- ncol(x) # Nothing to do? if (ncol <= 1L) { x <- rep(NA_real_, times = nrow(x)) # Update names attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(names)) { if (!is.null(rows)) { names <- names[rows] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(x) <- names } } else { names(x) <- NULL } } return(x) } if (na.rm) { # Count number of missing values in each row na_counts <- rowCounts(x, value = NA_real_, na.rm = FALSE) # Number of non-missing values n <- ncol - na_counts has_na <- any(na_counts > 0L) if (has_na) { # Set NA estimates for rows with less than two observations n[n <= 1L] <- NA_integer_ } else { # No need to check for missing values below na.rm <- FALSE } } else { # Assuming no missing values n <- ncol } validate <- validateVarsCenterFormula() if (!validate) { ## The primary formula for estimating the sample variance x <- (x - center)^2 ## SPECIAL: The variance estimate when the mean estimate is infinite should be NaN ## just like for stats::var() - not Inf, e.g. var(c(0,Inf)) == NaN x[is.infinite(center)] <- NaN x <- rowMeans(x, na.rm = na.rm) } else { ## The alternative formula for estimating the sample variance x2 <- x * x x2 <- rowMeans(x2, na.rm = na.rm) x2 <- (x2 - center^2) ## The primary formula for estimating the sample variance x <- (x - center)^2 x <- rowMeans(x, na.rm = na.rm) ## SPECIAL: The variance estimate when the mean estimate is infinite should be NaN ## just like for stats::var() - not Inf, e.g. var(c(0,Inf)) == NaN x[is.infinite(center)] <- NaN equal <- all.equal(x, x2, check.attribute = FALSE) x2 <- NULL if (!isTRUE(equal)) { fcn <- getOption("matrixStats.vars.formula.onMistake", "deprecated") fcn <- switch(fcn, deprecated = .Deprecated, .Defunct) fcn(msg = sprintf("rowVars() was called with a 'center' argument that does not meet the assumption that estimating the variance using the 'primary' or the 'alternative' formula does not matter as they should give the same results. This suggests a misunderstanding on what argument 'center' should be. The reason was: %s", equal)) } } x <- x * (n / (n - 1)) # Preserve names attribute? if (is.na(useNames) || useNames) { if (!is.null(names)) { if (!is.null(rows)) { names <- names[rows] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(x) <- names } } else { names(x) <- NULL } x } #' @rdname rowVars #' @export colVars <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, dim. = dim(x), ..., useNames = NA) { if (is.null(center)) { has_nas <- TRUE sigma2 <- .Call(C_rowVars, x, dim., rows, cols, na.rm, has_nas, FALSE, useNames) return(sigma2) } ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("colVars") ## BACKWARD COMPATIBILITY: matrixStats (<= 0.57.0) returns names ## when !is.null(center), which is tested by DelayedMatrixStats ## and sparseMatrixStats names <- colnames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset on 'center' if (length(center) != ncol(x)) { ## Scalar 'center'? if (length(center) == 1L && is.null(cols)) { validateScalarCenter(center, ncol(x), "columns") center <- rep(center, times = ncol(x)) } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "columns", "x", length(center), ncol(x))) } } if (!is.null(cols)) center <- center[cols] # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) nrow <- nrow(x) # Nothing to do? if (nrow <= 1L) { x <- rep(NA_real_, times = ncol(x)) # Update names attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(names)) { if (!is.null(cols)) { names <- names[cols] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(x) <- names } } else { names(x) <- NULL } } return(x) } if (na.rm) { # Count number of missing values in each column na_counts <- colCounts(x, value = NA_real_, na.rm = FALSE) # Number of non-missing values n <- nrow - na_counts has_na <- any(na_counts > 0L) if (has_na) { # Set NA estimates for rows with less than two observations n[n <= 1L] <- NA_integer_ } else { # No need to check for missing values below na.rm <- FALSE } } else { # Assuming no missing values n <- nrow } validate <- validateVarsCenterFormula() if (!validate) { ## The primary formula for estimating the sample variance for (cc in seq_len(ncol(x))) { x[, cc] <- (x[, cc] - center[cc])^2 } x <- colMeans(x, na.rm = na.rm) ## SPECIAL: The variance estimate when the mean estimate is infinite should be NaN ## just like for stats::var() - not Inf, e.g. var(c(0,Inf)) == NaN x[is.infinite(center)] <- NaN } else { ## The alternative formula for estimating the sample variance x2 <- x * x x2 <- colMeans(x2, na.rm = na.rm) x2 <- (x2 - center^2) ## The primary formula for estimating the sample variance for (cc in seq_len(ncol(x))) { x[, cc] <- (x[, cc] - center[cc])^2 } x <- colMeans(x, na.rm = na.rm) ## SPECIAL: The variance estimate when the mean estimate is infinite should be NaN ## just like for stats::var() - not Inf, e.g. var(c(0,Inf)) == NaN x[is.infinite(center)] <- NaN equal <- all.equal(x, x2) x2 <- NULL if (!isTRUE(equal)) { fcn <- getOption("matrixStats.vars.formula.onMistake", "deprecated") fcn <- switch(fcn, deprecated = .Deprecated, .Defunct) fcn(sprintf("colVars() was called with a 'center' argument that does not meet the assumption that estimating the variance using the 'primary' or the 'alternative' formula does not matter as they should give the same results. This suggests a misunderstanding on what argument 'center' should be. The reason was: %s", equal)) } } x <- x * (n / (n - 1)) # Preserve names attribute? if (is.na(useNames) || useNames) { if (!is.null(names)) { if (!is.null(cols)) { names <- names[cols] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(x) <- names } } else { names(x) <- NULL } x } matrixStats/R/rowSums2.R0000644000176200001440000000162014111740760014662 0ustar liggesusers#' Calculates the sum for each row (column) in a matrix #' #' Calculates the sum for each row (column) in a matrix. #' #' The implementation of \code{rowSums2()} and \code{colSums2()} is #' optimized for both speed and memory. #' #' @inheritParams rowAlls #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @author Henrik Bengtsson #' #' @keywords array iteration robust univar #' @export rowSums2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { has_nas <- TRUE .Call(C_rowSums2, x, dim., rows, cols, na.rm, has_nas, TRUE, useNames) } #' @rdname rowSums2 #' @export colSums2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { has_nas <- TRUE .Call(C_rowSums2, x, dim., rows, cols, na.rm, has_nas, FALSE, useNames) } matrixStats/R/logSumExp.R0000644000176200001440000000470014074030335015044 0ustar liggesusers#' Accurately computes the logarithm of the sum of exponentials #' #' Accurately computes the logarithm of the sum of exponentials, that is, #' \eqn{log(sum(exp(lx)))}. If \eqn{lx = log(x)}, then this is equivalently to #' calculating \eqn{log(sum(x))}. #' #' This function, which avoid numerical underflow, is often used when computing #' the logarithm of the sum of small numbers (\eqn{|x| << 1}) such as #' probabilities. #' #' This is function is more accurate than \code{log(sum(exp(lx)))} when the #' values of \eqn{x = exp(lx)} are \eqn{|x| << 1}. The implementation of this #' function is based on the observation that \deqn{ log(a + b) = [ la = log(a), #' lb = log(b) ] = log( exp(la) + exp(lb) ) = la + log ( 1 + exp(lb - la) ) } #' Assuming \eqn{la > lb}, then \eqn{|lb - la| < |lb|}, and it is less likely #' that the computation of \eqn{1 + exp(lb - la)} will not underflow/overflow #' numerically. Because of this, the overall result from this function should #' be more accurate. Analogously to this, the implementation of this function #' finds the maximum value of \code{lx} and subtracts it from the remaining #' values in \code{lx}. #' #' @inheritParams rowAlls #' #' @param lx A \code{\link[base]{numeric}} \code{\link[base]{vector}}. #' Typically \code{lx} are \eqn{log(x)} values. #' #' @return Returns a \code{\link[base]{numeric}} scalar. #' #' @section Benchmarking: This method is optimized for correctness, that #' avoiding underflowing. It is implemented in native code that is optimized #' for speed and memory. #' #' @example incl/logSumExp.R #' #' @author Henrik Bengtsson #' #' @seealso To compute this function on rows or columns of a matrix, see #' \code{\link{rowLogSumExps}}(). #' #' For adding \emph{two} double values in native code, R provides the C #' function \code{logspace_add()} [1]. For properties of the #' log-sum-exponential function, see [2]. #' #' @references #' [1] R Core Team, \emph{Writing R Extensions}, v3.0.0, April 2013. \cr #' [2] Laurent El Ghaoui, \emph{Hyper-Textbook: Optimization Models #' and Applications}, University of California at Berkeley, August 2012. #' (Chapter 'Log-Sum-Exp (LSE) Function and Properties') \cr #' [3] R-help thread \emph{logsumexp function in R}, 2011-02-17. #' \url{https://stat.ethz.ch/pipermail/r-help/2011-February/269205.html}\cr #' #' @export logSumExp <- function(lx, idxs = NULL, na.rm = FALSE, ...) { has_na <- TRUE .Call(C_logSumExp, as.numeric(lx), idxs, as.logical(na.rm), has_na) } matrixStats/R/indexByRow.R0000644000176200001440000000221014074030335015205 0ustar liggesusers#' Translates matrix indices by rows into indices by columns #' #' Translates matrix indices by rows into indices by columns. #' #' @inheritParams rowAlls #' #' @param dim A \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length two specifying the length of the "template" matrix. #' #' @return Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of #' indices. #' #' @section Known limitations: #' The current implementation does not support long-vector indices, #' because both input and output indices are of type integers. #' This means that the indices in argument \code{idxs} can only be in #' range [1,2^31-1]. Using a greater value will be coerced to #' \code{NA_integer_}. Moreover, returned indices can only be in the #' same range [1,2^31-1]. #' #' @example incl/indexByRow.R #' #' @author Henrik Bengtsson #' @keywords iteration logic #' @export indexByRow <- function(dim, idxs = NULL, ...) { if (is.matrix(dim)) { .Defunct(msg = "indexByRow(x) where 'x' is a matrix is defunct. Use indexByRow(dim(x)) instead.") } if (!is.null(idxs)) idxs <- as.integer(idxs) .Call(C_indexByRow, as.integer(dim), idxs) } matrixStats/R/rowDiffs.R0000644000176200001440000000160314111740760014705 0ustar liggesusers#' Calculates difference for each row (column) in a matrix #' #' Calculates difference for each row (column) in a matrix. #' #' @inheritParams rowAlls #' @inheritParams diff2 #' #' @return Returns a \code{\link[base]{numeric}} Nx(K-1) or (N-1)xK #' \code{\link[base]{matrix}}. #' #' @example incl/rowDiffs.R #' #' @author Henrik Bengtsson #' #' @seealso See also \code{\link{diff2}}(). #' @keywords array iteration robust univar #' @export rowDiffs <- function(x, rows = NULL, cols = NULL, lag = 1L, differences = 1L, dim. = dim(x), ..., useNames = NA) { .Call(C_rowDiffs, x, dim., rows, cols, lag, differences, TRUE, useNames) } #' @rdname rowDiffs #' @export colDiffs <- function(x, rows = NULL, cols = NULL, lag = 1L, differences = 1L, dim. = dim(x), ..., useNames = NA) { .Call(C_rowDiffs, x, dim., rows, cols, lag, differences, FALSE, useNames) } matrixStats/R/rowMeans2.R0000644000176200001440000000163114111740760015000 0ustar liggesusers#' Calculates the mean for each row (column) in a matrix #' #' Calculates the mean for each row (column) in a matrix. #' #' The implementation of \code{rowMeans2()} and \code{colMeans2()} is #' optimized for both speed and memory. #' #' @inheritParams rowAlls #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @author Henrik Bengtsson #' #' @keywords array iteration robust univar #' @export rowMeans2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { has_nas <- TRUE .Call(C_rowMeans2, x, dim., rows, cols, na.rm, has_nas, TRUE, useNames) } #' @rdname rowMeans2 #' @export colMeans2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { has_nas <- TRUE .Call(C_rowMeans2, x, dim., rows, cols, na.rm, has_nas, FALSE, useNames) } matrixStats/R/psortKM.R0000644000176200001440000000020014063411361014507 0ustar liggesusers.psortKM <- function(x, k = length(x), m = 1L, ...) { .Call(C_psortKM, as.numeric(x), k = as.integer(k), m = as.integer(m)) } matrixStats/R/rowRanks.R0000644000176200001440000001316114111740760014732 0ustar liggesusers#' Gets the rank of the elements in each row (column) of a matrix #' #' Gets the rank of the elements in each row (column) of a matrix. #' #' @inheritParams rowAlls #' #' @param ties.method A \code{\link[base]{character}} string specifying how #' ties are treated. For details, see below. #' #' @param preserveShape A \code{\link[base]{logical}} specifying whether the #' \code{\link[base]{matrix}} returned should preserve the input shape of #' \code{x}, or not. #' #' @return A \code{\link[base]{matrix}} of type \code{\link[base]{integer}} is #' returned, unless \code{ties.method = "average"} when it is of type #' \code{\link[base]{numeric}}. #' #' The \code{rowRanks()} function always returns an NxK #' \code{\link[base]{matrix}}, where N (K) is the number of rows (columns) #' whose ranks are calculated. #' #' The \code{colRanks()} function returns an NxK \code{\link[base]{matrix}}, if #' \code{preserveShape = TRUE}, otherwise a KxN \code{\link[base]{matrix}}. #' #' Any \code{\link[base]{names}} of \code{x} are ignored and absent in the #' result. #' #' @details #' These functions rank values and treats missing values the same way as #' \code{\link[base]{rank}}(). #' For equal values ("ties"), argument \code{ties.method} determines how these #' are ranked among each other. More precisely, for the following values of #' \code{ties.method}, each index set of ties consists of: #' \itemize{ #' \item{\code{"first"} - increasing values that are all unique} #' \item{\code{"last"} - decreasing values that are all unique} #' \item{\code{"min"} - identical values equaling the minimum of #' their original ranks} #' \item{\code{"max"} - identical values equaling the maximum of #' their original ranks} #' \item{\code{"average"} - identical values that equal the sample mean of #' their original ranks. Because the average is calculated, the returned #' ranks may be non-integer values} #' \item{\code{"random"} - randomly shuffled values of their original ranks.} #' \item{\code{"dense"} - increasing values that are all unique and, #' contrary to \code{"first"}, never contain any gaps} #' } #' For more information on \code{ties.method = "dense"}, see \code{frank()} of #' the \pkg{data.table} package. #' For more information on the other alternatives, see \code{\link[base]{rank}}(). #' #' Note that, due to different randomization strategies, the shuffling order #' produced by these functions when using \code{ties.method = "random"} does #' not reproduce that of \code{\link[base]{rank}}(). #' #' \emph{WARNING: For backward-compatibility reasons, the default is #' \code{ties.method = "max"}, which differs from \code{\link[base]{rank}}() #' which uses \code{ties.method = "average"} by default. #' Since we plan to change the default behavior in a future version, we recommend #' to explicitly specify the intended value of argument \code{ties.method}.} #' #' @section Missing values: #' Missing values are ranked as \code{NA_integer_}, as with \code{na.last = "keep"} #' in the \code{\link[base]{rank}}() function. #' #' @section Performance: #' The implementation is optimized for both speed and memory. To avoid #' coercing to \code{\link[base]{double}}s (and hence memory allocation), #' there is a unique implementation for \code{\link[base]{integer}} matrices. #' Furthermore, it is more memory efficient to do #' \code{colRanks(x, preserveShape = TRUE)} than #' \code{t(colRanks(x, preserveShape = FALSE))}. #' #' @author Hector Corrada Bravo and Harris Jaffee. Peter Langfelder for adding #' 'ties.method' support. Brian Montgomery for adding more 'ties.method's. #' Henrik Bengtsson adapted the original native #' implementation of \code{rowRanks()} from Robert Gentleman's \code{rowQ()} in #' the \pkg{Biobase} package. #' #' @seealso #' For developers, see also Section Utility functions' in #' 'Writing R Extensions manual', particularly the #' native functions \code{R_qsort_I()} and \code{R_qsort_int_I()}. #' @keywords array iteration robust univar #' #' @export rowRanks <- function(x, rows = NULL, cols = NULL, # max is listed twice so that it remains the default for now ties.method = c("max", "average", "first", "last", "random", "max", "min", "dense"), dim. = dim(x), ..., useNames = NA) { # Argument 'ties.method': ties.method <- ties.method[1L] ties_method <- charmatch(ties.method, c("average", "first", "last", "random", "max", "min", "dense"), nomatch = 0L) if (ties_method == 0L) { stop(sprintf("Unknown value of argument '%s': %s", "ties.method", ties.method)) } # byrow = TRUE .Call(C_rowRanksWithTies, x, dim., rows, cols, ties_method, TRUE, useNames) } #' @rdname rowRanks #' @export colRanks <- function(x, rows = NULL, cols = NULL, # max is listed twice so that it remains the default for now ties.method = c("max", "average", "first", "last", "random", "max", "min", "dense"), dim. = dim(x), preserveShape = FALSE, ..., useNames = NA) { # Argument 'ties.method': ties.method <- ties.method[1L] # Argument 'preserveShape' preserveShape <- as.logical(preserveShape) ties_method <- charmatch(ties.method, c("average", "first", "last", "random", "max", "min", "dense"), nomatch = 0L) if (ties_method == 0L) { stop(sprintf("Unknown value of argument '%s': %s", "ties.method", ties.method)) } # byrow = FALSE y <- .Call(C_rowRanksWithTies, x, dim., rows, cols, ties_method, FALSE, useNames) if (!preserveShape) y <- t(y) y } matrixStats/R/weightedMedian.R0000644000176200001440000001026014074030335016035 0ustar liggesusers#' Weighted Median Value #' #' Computes a weighted median of a numeric vector. #' #' @inheritParams weightedMad #' #' @param na.rm a logical value indicating whether \code{\link[base]{NA}} #' values in \code{x} should be stripped before the computation proceeds, or #' not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s #' is done. #' #' @param interpolate If \code{\link[base:logical]{TRUE}}, linear interpolation #' is used to get a consistent estimate of the weighted median. #' #' @param ties If \code{interpolate == FALSE}, a character string specifying #' how to solve ties between two \code{x}'s that are satisfying the weighted #' median criteria. Note that at most two values can satisfy the criteria. #' When \code{ties} is \code{"min"} ("lower weighted median"), the smaller #' value of the two is returned and when it is \code{"max"} ("upper weighted #' median"), the larger value is returned. If \code{ties} #' is \code{"mean"}, the mean of the two values is returned. Finally, if #' \code{ties} is \code{"weighted"} (or \code{\link[base]{NULL}}) a weighted #' average of the two are returned, where the weights are weights of all values #' \code{x[i] <= x[k]} and \code{x[i] >= x[k]}, respectively. #' #' @return Returns a \code{\link[base]{numeric}} scalar. #' #' For the \code{n} elements \code{x = c(x[1], x[2], ..., x[n])} with positive #' weights \code{w = c(w[1], w[2], ..., w[n])} such that \code{sum(w) = S}, the #' \emph{weighted median} is defined as the element \code{x[k]} for which the #' total weight of all elements \code{x[i] < x[k]} is less or equal to #' \code{S/2} and for which the total weight of all elements \code{x[i] > x[k]} #' is less or equal to \code{S/2} (c.f. [1]). #' #' When using linear interpolation, the weighted mean of \code{x[k-1]} and #' \code{x[k]} with weights \code{S[k-1]} and \code{S[k]} corresponding to the #' cumulative weights of those two elements is used as an estimate. #' #' If \code{w} is missing then all elements of \code{x} are given the same #' positive weight. If all weights are zero, \code{\link[base:NA]{NA_real_}} is #' returned. #' #' If one or more weights are \code{Inf}, it is the same as these weights have #' the same weight and the others have zero. This makes things easier for cases #' where the weights are result of a division with zero. #' #' If there are missing values in \code{w} that are part of the calculation #' (after subsetting and dropping missing values in \code{x}), then the final #' result is always \code{NA} of the same type as \code{x}. #' #' The weighted median solves the following optimization problem: #' #' \deqn{\alpha^* = \arg_\alpha \min \sum_{i = 1}^{n} w_i |x_i-\alpha|} where #' \eqn{x = (x_1, x_2, \ldots, x_n)} are scalars and #' \eqn{w = (w_1, w_2, \ldots, w_n)} are the corresponding "weights" for each #' individual \eqn{x} value. #' #' @example incl/weightedMedian.R #' #' @author Henrik Bengtsson and Ola Hossjer, Centre for Mathematical Sciences, #' Lund University. Thanks to Roger Koenker, Econometrics, University of #' Illinois, for the initial ideas. #' #' @seealso \code{\link[stats]{median}}, \code{\link[base]{mean}}() and #' \code{\link{weightedMean}}(). #' #' @references [1] T.H. Cormen, C.E. Leiserson, R.L. Rivest, Introduction to #' Algorithms, The MIT Press, Massachusetts Institute of Technology, 1989. #' #' @keywords univar robust #' @export weightedMedian <- function(x, w = NULL, idxs = NULL, na.rm = FALSE, interpolate = is.null(ties), ties = NULL, ...) { # Argument 'w': if (is.null(w)) { w <- rep(1, times = length(x)) } else { w <- as.double(w) } # Argument 'na.rm': na.rm <- as.logical(na.rm) if (is.na(na.rm)) na.rm <- FALSE # Argument 'interpolate': interpolate <- as.logical(interpolate) # Argument 'ties': if (is.null(ties)) { ties_id <- 1L } else { if (ties == "weighted") { ties_id <- 1L } else if (ties == "min") { ties_id <- 2L } else if (ties == "max") { ties_id <- 4L } else if (ties == "mean") { ties_id <- 8L } else { stop(sprintf("Unknown value of argument '%s': %s", "ties", ties)) } } .Call(C_weightedMedian, x, w, idxs, na.rm, interpolate, ties_id) } matrixStats/R/000.DEPRECATION.R0000644000176200001440000001022714111754002015151 0ustar liggesusersdefunctShouldBeMatrixOrDim <- function(x) { x_class <- sQuote(class(x)[1]) x_name <- sQuote(as.character(substitute(x))) .Defunct(msg = sprintf("Argument %s is of class %s, but should be a matrix or 'dim.' should specify one. The use of a %s is not supported, the correctness of the result is not guaranteed. Please update your code accordingly.", x_name, x_class, x_class)) #nolint } defunctShouldBeMatrix <- function(x) { x_class <- sQuote(class(x)[1]) x_name <- sQuote(as.character(substitute(x))) .Defunct(msg = sprintf("Argument %s is of class %s, but should be a matrix. The use of a %s is not supported, the correctness of the result is not guaranteed. Please update your code accordingly.", x_name, x_class, x_class)) #nolint } defunctShouldBeMatrixOrVector <- function(x) { x_class <- sQuote(class(x)[1]) x_name <- sQuote(as.character(substitute(x))) .Defunct(msg = sprintf("Argument %s is of class %s, but should be a matrix or a vector. The use of a %s is not supported, the correctness of the result is not guaranteed. Please update your code accordingly.", x_name, x_class, x_class)) #nolint } validateScalarCenter <- function(center, n, dimname) { onScalar <- getOption("matrixStats.center.onScalar", NULL) ## Nothing to do? if (is.null(onScalar)) return() fcn <- switch(onScalar, deprecated = .Deprecated, defunct = .Defunct, NULL) ## Nothing to do? if (is.null(fcn)) return() msg <- sprintf("Argument '%s' should be of the same length as number of %s of '%s'. Use of a scalar value is deprecated: %s != %s", "center", dimname, "x", length(center), n) fcn(msg = msg, package = .packageName) } validateVarsCenterFormula <- local({ .curr <- 1 .next <- 1 function() { freq <- getOption("matrixStats.vars.formula.freq", NULL) ## Nothing to do? if (is.null(freq)) return(FALSE) ## never? if (freq <= 0) return(FALSE) ## always? if (is.infinite(freq)) return(TRUE) ## each time? if (freq == 1) return(TRUE) ## once in a while? .curr <<- .curr + 1 if (.curr <= .next) return(FALSE) .curr <<- 1 ## reset .next <<- freq TRUE } }) centerOnUse <- function(fcnname, calls = sys.calls(), msg = NULL) { value <- getOption("matrixStats.center.onUse") if (is.null(value) || identical(value, "ignore")) return() value <- match.arg(value, c("deprecated", "defunct")) fcn <- switch(value, deprecated = .Deprecated, defunct = .Defunct) if (is.null(msg)) { msg <- sprintf("Argument '%s' of %s::%s() is %s: %s", "center", .packageName, fcnname, value, deparse(calls[[1]])[1]) } fcn(msg = msg, package = .packageName) } onLoadSetVarsFormulaFreq <- function() { ## Option is already set? if (!is.null(getOption("matrixStats.vars.formula.freq", NULL))) return() ## Is environment variable set? value <- Sys.getenv("R_MATRIXSTATS_VARS_FORMULA_FREQ", "50") if (nzchar(value)) { value <- as.numeric(value) } else { value <- NULL } options(matrixStats.vars.formula.freq = value) } onLoadSetVarsFormulaOnMistake <- function() { ## Option is already set? if (!is.null(getOption("matrixStats.vars.formula.onMistake", NULL))) return() ## Is environment variable set? value <- Sys.getenv("R_MATRIXSTATS_VARS_FORMULA_MISTAKE", "defunct") if (is.na(value)) return() value <- match.arg(value, c("deprecated", "defunct")) options(matrixStats.vars.formula.onMistake = value) } onLoadSetCenterOnUse <- function() { ## Option is already set? if (!is.null(getOption("matrixStats.center.onUse", NULL))) return() ## Is environment variable set? value <- Sys.getenv("R_MATRIXSTATS_CENTER_ONUSE", NA_character_) if (is.na(value)) return() value <- match.arg(value, c("ignore", "deprecated", "defunct")) options(matrixStats.center.onUse = value) } onLoadSetCenterOnScalar <- function() { ## Option is already set? if (!is.null(getOption("matrixStats.center.onScalar", NULL))) return() ## Is environment variable set? value <- Sys.getenv("R_MATRIXSTATS_CENTER_ONSCALAR", "deprecated") if (is.na(value)) return() value <- match.arg(value, c("ignore", "deprecated", "defunct")) options(matrixStats.center.onScalar = value) } matrixStats/R/anyMissing.R0000644000176200001440000000421314074054377015255 0ustar liggesusers#' Checks if there are any missing values in an object or not #' #' Checks if there are any missing values in an object or not. #' \emph{Please use \code{base::anyNA()} instead of \code{anyMissing()}, #' \code{colAnyNAs()} instead of \code{colAnyMissings()}, and #' \code{rowAnyNAs()} instead of \code{rowAnyMissings()}.} #' #' The implementation of this method is optimized for both speed and memory. #' The method will return \code{\link[base:logical]{TRUE}} as soon as a missing #' value is detected. #' #' @inheritParams rowAlls #' #' @param x A \code{\link[base]{vector}}, a \code{\link[base]{list}}, a #' \code{\link[base]{matrix}}, a \code{\link[base]{data.frame}}, or #' \code{\link[base]{NULL}}. #' #' @return Returns \code{\link[base:logical]{TRUE}} if a missing value was #' detected, otherwise \code{\link[base:logical]{FALSE}}. #' #' @examples #' x <- rnorm(n = 1000) #' x[seq(300, length(x), by = 100)] <- NA #' stopifnot(anyMissing(x) == any(is.na(x))) #' #' @author Henrik Bengtsson #' #' @seealso Starting with R v3.1.0, there is \code{anyNA()} in the \pkg{base}, #' which provides the same functionality as \code{anyMissing()}. #' #' @keywords iteration logic #' @export anyMissing <- function(x, idxs = NULL, ...) { ## All list or a data.frame? if (is.list(x)) { for (kk in seq_along(x)) { if (.Call(C_anyMissing, x[[kk]], idxs)) return(TRUE) } return(FALSE) } else { ## All other data types .Call(C_anyMissing, x, idxs) } } #' @rdname anyMissing #' @export colAnyMissings <- function(x, rows = NULL, cols = NULL, ..., useNames = NA) { colAnys(x, rows, cols, value = NA, ..., useNames = useNames) } #' @rdname anyMissing #' @export rowAnyMissings <- function(x, rows = NULL, cols = NULL, ..., useNames = NA) { rowAnys(x, rows, cols, value = NA, ..., useNames = useNames) } #' @rdname anyMissing #' @export colAnyNAs <- function(x, rows = NULL, cols = NULL, ..., useNames = NA) { colAnys(x, rows, cols, value = NA, ..., useNames = useNames) } #' @rdname anyMissing #' @export rowAnyNAs <- function(x, rows = NULL, cols = NULL, ..., useNames = NA) { rowAnys(x, rows, cols, value = NA, ..., useNames = useNames) } matrixStats/R/validateIndices.R0000644000176200001440000000170514111754710016215 0ustar liggesusers#' Validate indices #' #' Computes validated positive indices from given indices. #' #' @inheritParams rowAlls #' #' @param maxIdx The possible max index. #' #' @param allowOutOfBound Allow positive out of bound to indicate #' \code{\link[base]{NA}}. #' #' @return Returns a validated integers list indicating the indices. #' If some of the indices cannot be represented as an integer, the #' indices are returned as doubles. #' #' @example incl/validateIndices.R #' #' @keywords internal #' @export validateIndices <- function(idxs = NULL, maxIdx, allowOutOfBound = TRUE) { action <- getOption("matrixStats.validateIndices", "deprecated") if (!is.null(action)) { fcn <- switch(action, deprecated = .Deprecated, defunct = .Defunct) fcn(msg = sprintf("validateIndices() is %s and will eventually be removed from %s", action, .packageName)) } ans <- .Call(C_validate, idxs, maxIdx, allowOutOfBound) if (is.null(ans)) ans <- seq_len(maxIdx) ans } matrixStats/R/rowWeightedMedians.R0000644000176200001440000001002314074054377016721 0ustar liggesusers#' Calculates the weighted medians for each row (column) in a matrix #' #' Calculates the weighted medians for each row (column) in a matrix. #' #' The implementations of these methods are optimized for both speed and #' memory. If no weights are given, the corresponding #' \code{\link{rowMedians}}()/\code{colMedians()} is used. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' @inheritParams rowWeightedMeans #' #' @param ... Additional arguments passed to \code{\link{weightedMedian}}(). #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @example incl/rowWeightedMedians.R #' #' @author Henrik Bengtsson #' #' @seealso Internally, \code{\link{weightedMedian}}() is used. #' See \code{\link{rowMedians}}() and \code{colMedians()} for non-weighted #' medians. #' #' @keywords array iteration robust univar #' @export rowWeightedMedians <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Argument 'w': has_weights <- !is.null(w) if (has_weights) { n <- ncol(x) if (length(w) != n) { stop(sprintf("The length of argument '%s' does not match the number of %s in '%s': %d != %d", "w", "columns", "x", length(w), n)) #nolint } if (!is.numeric(w)) { stop(sprintf("Argument '%s' is not numeric: %s", "w", mode(w))) } if (any(!is.na(w) & w < 0)) { stop(sprintf("Argument '%s' must not contain negative values", "w")) } } # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(cols)) w <- w[cols] if (has_weights) { # Allocate results m <- nrow(x) if (m == 0L) return(double(0L)) res <- apply(x, MARGIN = 1L, FUN = function(x) { weightedMedian(x, w = w, na.rm = na.rm, ...) }) # Preserve names attribute? if (!(is.na(useNames) || useNames)) { names(res) <- NULL } w <- NULL # Not needed anymore } else { res <- rowMedians(x, na.rm = na.rm, useNames = useNames) } res } #' @rdname rowWeightedMedians #' @export colWeightedMedians <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Argument 'w': has_weights <- !is.null(w) if (has_weights) { n <- nrow(x) if (length(w) != n) { stop(sprintf("The length of argument '%s' does not match the number of %s in '%s': %d != %d", "w", "rows", "x", length(w), n)) #nolint } if (!is.numeric(w)) { stop(sprintf("Argument '%s' is not numeric: %s", "w", mode(w))) } if (any(!is.na(w) & w < 0)) { stop(sprintf("Argument '%s' must not contain negative values", "w")) } } # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(rows)) w <- w[rows] if (has_weights) { # Allocate results m <- ncol(x) if (m == 0L) return(double(0L)) res <- apply(x, MARGIN = 2L, FUN = function(x) { weightedMedian(x, w = w, na.rm = na.rm, ...) }) # Preserve names attribute? if (!(is.na(useNames) || useNames)) { names(res) <- NULL } w <- NULL # Not needed anymore } else { res <- colMedians(x, na.rm = na.rm, useNames = useNames) } res } matrixStats/R/rowAlls.R0000644000176200001440000002557614111740760014564 0ustar liggesusers#' Checks if a value exists / does not exist in each row (column) of a matrix #' #' Checks if a value exists / does not exist in each row (column) of a matrix. #' #' These functions takes either a matrix or a vector as input. If a vector, #' then argument \code{dim.} must be specified and fulfill \code{prod(dim.) == #' length(x)}. The result will be identical to the results obtained when #' passing \code{matrix(x, nrow = dim.[1L], ncol = dim.[2L])}, but avoids #' having to temporarily create/allocate a matrix, if only such is needed #' only for these calculations. #' #' @param x An NxK \code{\link[base]{matrix}} or, if \code{dim.} is specified, #' an N * K \code{\link[base]{vector}}. #' #' @param idxs A \code{\link[base]{vector}} indicating subset of elements to #' operate over. If \code{\link[base]{NULL}}, no subsetting is done. #' #' @param rows A \code{\link[base]{vector}} indicating subset of rows to #' operate over. If \code{\link[base]{NULL}}, no subsetting is done. #' #' @param cols A \code{\link[base]{vector}} indicating subset of columns to #' operate over. If \code{\link[base]{NULL}}, no subsetting is done. #' #' @param value A value to search for. #' #' @param na.rm If \code{\link[base:logical]{TRUE}}, missing values are #' excluded. #' #' @param dim. An \code{\link[base]{integer}} \code{\link[base]{vector}} of #' length two specifying the dimension of \code{x}, also when not a #' \code{\link[base]{matrix}}. \emph{Comment:} The reason for this argument #' being named with a period at the end is purely technical (we get a run-time #' error if we try to name it \code{dim}). #' #' @param ... Not used. #' #' @param useNames If \code{\link[base]{NA}}, the default behavior of the #' function about naming support is remained. If \code{\link[base:logical]{FALSE}}, #' no naming support is done. Else if \code{\link[base:logical]{TRUE}}, names #' attributes of result are set. #' #' @return \code{rowAlls()} (\code{colAlls()}) returns an #' \code{\link[base]{logical}} \code{\link[base]{vector}} of length N (K). #' Analogously for \code{rowAnys()} (\code{rowAlls()}). #' #' @section Logical \code{value}: #' When \code{value} is logical, the result is as if the function is applied #' on \code{as.logical(x)}. More specifically, if \code{x} is numeric, then #' all zeros are treated as \code{FALSE}, non-zero values as \code{TRUE}, #' and all missing values as \code{NA}. #' #' @example incl/rowAlls.R #' #' @author Henrik Bengtsson #' @seealso rowCounts #' @keywords array logic iteration univar #' @export rowAlls <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_rowCounts, x, dim., rows, cols, FALSE, 1L, na.rm, has_nas, useNames) res <- (counts == 0L) } else { counts <- .Call(C_rowCounts, x, dim., rows, cols, FALSE, 0L, na.rm, has_nas, useNames) res <- (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_rowCounts, x, dim., rows, cols, value, 0L, na.rm, has_nas, FALSE) res <- as.logical(counts) # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- rownames(x) if (!is.null(names)) { if (!is.null(rows)) { names <- names[rows] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(res) <- names } } else { names(res) <- NULL } } } else { if (!identical(dim(x), dim.)) dim(x) <- dim. if (!is.matrix(x)) defunctShouldBeMatrixOrDim(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { return(rowAlls(is.na(x), na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } else { z <- (x == value) dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!identical(dim(z), as.integer(dim))) dim(z) <- dim if (isTRUE(useNames)) dimnames(z) <- dimnames(x) return(rowAlls(z, na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } } res } #' @rdname rowAlls #' @export colAlls <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_colCounts, x, dim., rows, cols, FALSE, 1L, na.rm, has_nas, useNames) res <- (counts == 0L) } else { counts <- .Call(C_colCounts, x, dim., rows, cols, FALSE, 0L, na.rm, has_nas, useNames) res <- (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_colCounts, x, dim., rows, cols, value, 0L, na.rm, has_nas, FALSE) res <- as.logical(counts) # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- colnames(x) if (!is.null(names)) { if (!is.null(cols)) { names <- names[cols] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } names(res) <- names } } else { names(res) <- NULL } } } else { if (!identical(dim(x), dim.)) dim(x) <- dim. if (!is.matrix(x)) defunctShouldBeMatrixOrDim(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { return(colAlls(is.na(x), na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } else { z <- (x == value) dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!identical(dim(z), as.integer(dim))) dim(z) <- dim if (isTRUE(useNames)) dimnames(z) <- dimnames(x) return(colAlls(z, na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } } res } #' @rdname rowAlls #' @export allValue <- function(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_count, x, idxs, FALSE, 1L, na.rm, has_nas) (counts == 0L) } else { counts <- .Call(C_count, x, idxs, FALSE, 0L, na.rm, has_nas) (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_count, x, idxs, value, 0L, na.rm, has_nas) as.logical(counts) } else { # Apply subset if (!is.null(idxs)) x <- x[idxs] if (is.na(value)) { allValue(is.na(x), na.rm = na.rm, ...) } else { allValue(x == value, na.rm = na.rm, ...) } } } #' @rdname rowAlls #' @export rowAnys <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_rowCounts, x, dim., rows, cols, FALSE, 0L, na.rm, has_nas, useNames) res <- (counts == 0L) } else { counts <- .Call(C_rowCounts, x, dim., rows, cols, FALSE, 1L, na.rm, has_nas, useNames) res <- (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_rowCounts, x, dim., rows, cols, value, 1L, na.rm, has_nas, useNames) res <- as.logical(counts) # Preserve names attribute names <- names(counts) res <- as.logical(counts) names(res) <- names } else { if (!identical(dim(x), dim.)) dim(x) <- dim. if (!is.matrix(x)) defunctShouldBeMatrixOrDim(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { return(rowAnys(is.na(x), na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } else { z <- (x == value) dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!identical(dim(z), as.integer(dim))) dim(z) <- dim if (isTRUE(useNames)) dimnames(z) <- dimnames(x) return(rowAnys(z, na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } } res } #' @rdname rowAlls #' @export colAnys <- function(x, rows = NULL, cols = NULL, value = TRUE, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_colCounts, x, dim., rows, cols, FALSE, 0L, na.rm, has_nas, useNames) res <- (counts == 0L) } else { counts <- .Call(C_colCounts, x, dim., rows, cols, FALSE, 1L, na.rm, has_nas, useNames) res <- (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_colCounts, x, dim., rows, cols, value, 1L, na.rm, has_nas, useNames) # Preserve names attribute names <- names(counts) res <- as.logical(counts) names(res) <- names } else { if (!identical(dim(x), dim.)) dim(x) <- dim. if (!is.matrix(x)) defunctShouldBeMatrixOrDim(x) # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) if (is.na(value)) { return(colAnys(is.na(x), na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } else { z <- (x == value) dim <- dim(x) # for 0xN and Mx0 cases; needed in R (< 3.4.0) if (!identical(dim(z), as.integer(dim))) dim(z) <- dim if (isTRUE(useNames)) dimnames(z) <- dimnames(x) return(colAnys(z, na.rm = na.rm, dim. = dim., ..., useNames = useNames)) } } res } #' @rdname rowAlls #' @export anyValue <- function(x, idxs = NULL, value = TRUE, na.rm = FALSE, ...) { if (is.numeric(x) && is.logical(value) && !is.na(value)) { has_nas <- TRUE if (isTRUE(value)) { counts <- .Call(C_count, x, idxs, FALSE, 0L, na.rm, has_nas) (counts == 0L) } else { counts <- .Call(C_count, x, idxs, FALSE, 1L, na.rm, has_nas) (counts == 1L) } } else if (is.numeric(x) || is.logical(x)) { has_nas <- TRUE counts <- .Call(C_count, x, idxs, value, 1L, na.rm, has_nas) as.logical(counts) } else { # Apply subset if (!is.null(idxs)) x <- x[idxs] if (is.na(value)) { anyValue(is.na(x), na.rm = na.rm, ...) } else { anyValue(x == value, na.rm = na.rm, ...) } } } matrixStats/R/rowSds.R0000644000176200001440000000240714074054377014420 0ustar liggesusers#' Standard deviation estimates for each row (column) in a matrix #' #' Standard deviation estimates for each row (column) in a matrix. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param center (optional) The center, defaults to the row means for the #' SD estimators and row medians for the MAD estimators. #' #' @param ... Additional arguments passed to \code{rowMeans()} and #' \code{rowSums()}. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @author Henrik Bengtsson #' #' @seealso \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and #' \code{\link[stats:cor]{var}}. \code{\link{rowIQRs}}(). #' @keywords array iteration robust univar #' #' @export rowSds <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, dim. = dim(x), ..., useNames = NA) { x <- rowVars(x, rows = rows, cols = cols, na.rm = na.rm, center = center, dim. = dim., useNames = useNames, ...) sqrt(x) } #' @rdname rowSds #' @export colSds <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, dim. = dim(x), ..., useNames = NA) { x <- colVars(x, rows = rows, cols = cols, na.rm = na.rm, center = center, dim. = dim., useNames = useNames, ...) sqrt(x) } matrixStats/R/mean2.R0000644000176200001440000000425514074030335014130 0ustar liggesusers#' Fast averaging over subset of vector elements #' #' Computes the sample mean of all or a subset of values. #' #' \code{mean2(x, idxs)} gives equivalent results as \code{mean(x[idxs])}, #' but is faster and more memory efficient since it avoids the actual #' subsetting which requires copying of elements and garbage collection #' thereof. #' #' If \code{x} is \code{\link[base]{numeric}} and \code{refine = TRUE}, then a #' two-pass scan is used to calculate the average. The first scan calculates #' the total sum and divides by the number of (non-missing) values. In the #' second scan, this average is refined by adding the residuals towards the #' first average. The \code{\link[base]{mean}}() uses this approach. #' \code{mean2(..., refine = FALSE)} is almost twice as fast as #' \code{mean2(..., refine = TRUE)}. #' #' @inheritParams rowAlls #' @inheritParams weightedMad #' #' @param refine If \code{\link[base:logical]{TRUE}} and \code{x} is #' \code{\link[base]{numeric}}, then extra effort is used to calculate the #' average with greater numerical precision, otherwise not. #' #' @return Returns a \code{\link[base]{numeric}} scalar. #' #' @example incl/mean2.R #' #' @author Henrik Bengtsson #' #' @seealso \code{\link[base]{mean}}(). #' To efficiently sum over a subset, see \code{\link{sum2}}(). #' @keywords univar internal #' @export mean2 <- function(x, idxs = NULL, na.rm = FALSE, refine = TRUE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.numeric(x) && !is.logical(x)) { stop(sprintf("Argument '%s' is not numeric or logical: %s", "x", mode(x))) } # Argument 'na.rm': if (!is.logical(na.rm)) { stop(sprintf("Argument '%s' is not logical: %s", "na.rm", mode(na.rm))) } # Argument 'refine': if (!is.logical(refine)) { stop(sprintf("Argument '%s' is not logical: %s", "refine", mode(refine))) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Averaging # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - .Call(C_mean2, x, idxs, na.rm, refine) } matrixStats/R/weightedMad.R0000644000176200001440000001666714074054377015376 0ustar liggesusers#' Weighted Median Absolute Deviation (MAD) #' #' Computes a weighted MAD of a numeric vector. #' #' @inheritParams rowAlls #' #' @param x \code{\link[base]{vector}} of type \code{\link[base]{integer}}, #' \code{\link[base]{numeric}}, or \code{\link[base]{logical}}. #' #' @param w a vector of weights the same length as \code{x} giving the weights #' to use for each element of \code{x}. Negative weights are treated as zero #' weights. Default value is equal weight to all values. #' #' @param constant A \code{\link[base]{numeric}} scale factor, cf. #' \code{\link[stats]{mad}}. #' #' @param center Optional \code{\link[base]{numeric}} scalar specifying the #' center location of the data. If \code{\link[base]{NULL}}, it is estimated #' from data. #' #' @return Returns a \code{\link[base]{numeric}} scalar. #' #' @example incl/weightedMad.R #' #' @section Missing values: Missing values are dropped at the very beginning, #' if argument \code{na.rm} is \code{\link[base:logical]{TRUE}}, otherwise not. #' #' @author Henrik Bengtsson #' #' @seealso For the non-weighted MAD, see \code{\link[stats]{mad}}. Internally #' \code{\link{weightedMedian}}() is used to calculate the weighted median. #' #' @importFrom stats mad median #' @keywords univar robust #' @export weightedMad <- function(x, w = NULL, idxs = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ...) { # No weights? Fall back to non-weighted method. if (is.null(w)) { if (is.null(center)) center <- median(x, na.rm = na.rm) return(mad(x, center = center, constant = constant, na.rm = na.rm, ...)) } # Argument 'x': n <- length(x) # Argument 'w': if (length(w) != n) { stop(sprintf("The number of elements in arguments '%s' and '%s' does not match: %.0f != %.0f", "w", "x", length(w), n)) } else if (!is.null(idxs)) { # Apply subset on w w <- w[idxs] } # Argument 'constant': if (length(constant) != 1L) stop(sprintf("Argument '%s' is not a scalar: %.0f", "constant", length(constant))) if (!is.numeric(constant)) stop(sprintf("Argument '%s' is not numeric: %s", "constant", mode(constant))) # Argument 'center': if (!is.null(center) && length(center) != 1L) stop(sprintf("Argument '%s' is not a scalar or NULL: %.0f", "center", length(center))) # Apply subset on x if (!is.null(idxs)) { x <- x[idxs] n <- length(x) } na_value <- NA storage.mode(na_value) <- storage.mode(x) # Remove values with zero (and negative) weight. This will: # 1) take care of the case when all weights are zero, # 2) it will most likely speed up the sorting. tmp <- (is.na(w) | w > 0) if (!all(tmp)) { x <- .subset(x, tmp) w <- .subset(w, tmp) n <- length(x) } tmp <- NULL # Not needed anymore # Drop missing values? if (na.rm) { keep <- which(!is.na(x)) x <- .subset(x, keep) w <- .subset(w, keep) n <- length(x) keep <- NULL # Not needed anymore } else if (anyMissing(x)) { return(na_value) } # Missing values in 'w'? if (anyMissing(w)) return(na_value) # Are any weights Inf? Then treat them with equal weight and all others # with weight zero. tmp <- is.infinite(w) if (any(tmp)) { keep <- tmp x <- .subset(x, keep) n <- length(x) w <- rep(1, times = n) keep <- NULL # Not needed anymore } tmp <- NULL # Not needed anymore # Are there any values left to calculate the weighted median of? # This is consistent with how stats::mad() works. if (n == 0L) { return(na_value) } else if (n == 1L) { zero_value <- 0 storage.mode(zero_value) <- storage.mode(x) return(zero_value) } # Estimate the mean? if (is.null(center)) { center <- weightedMedian(x, w = w, na.rm = NA) } else { ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("weightedMad") } # Estimate the standard deviation x <- abs(x - center) sigma <- weightedMedian(x, w = w, na.rm = NA) x <- w <- NULL # Not needed anymore # Rescale for normal distributions sigma <- constant * sigma sigma } #' @rdname weightedMad #' @export rowWeightedMads <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ..., useNames = NA) { # Argument 'constant': if (length(constant) != 1L) stop(sprintf("Argument '%s' is not a scalar: %.0f", "constant", length(constant))) if (!is.numeric(constant)) stop(sprintf("Argument '%s' is not numeric: %s", "constant", mode(constant))) # Apply subset on 'center'? if (!is.null(center)) { if (length(center) != nrow(x)) { ## Scalar 'center'? if (length(center) == 1L && is.null(rows)) { validateScalarCenter(center, nrow(x), "rows") center <- rep(center, times = nrow(x)) } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "rows", "x", length(center), nrow(x))) } } if (!is.null(rows)) center <- center[rows] } # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(cols)) w <- w[cols] y <- numeric(nrow(x)) for (kk in seq_along(y)) { y[kk] <- weightedMad(x[kk, , drop = TRUE], w = w, na.rm = na.rm, constant = constant, center = center[kk], ...) } ## BACKWARD COMPATIBILITY: matrixStats (<= 0.57.0) returns names ## when !is.null(center), which is tested by DelayedMatrixStats ## and sparseMatrixStats if (is.na(useNames) || useNames) { names(y) <- rownames(x) } else { names(y) <- NULL } y } #' @rdname weightedMad #' @export colWeightedMads <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, constant = 1.4826, center = NULL, ..., useNames = NA) { # Argument 'constant': if (length(constant) != 1L) stop(sprintf("Argument '%s' is not a scalar: %.0f", "constant", length(constant))) if (!is.numeric(constant)) stop(sprintf("Argument '%s' is not numeric: %s", "constant", mode(constant))) # Argument 'center': # Apply subset on 'center'? if (!is.null(center)) { if (length(center) != ncol(x)) { ## Scalar 'center'? if (length(center) == 1L && is.null(cols)) { validateScalarCenter(center, ncol(x), "cols") center <- rep(center, times = ncol(x)) } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "columns", "x", length(center), ncol(x))) } } if (!is.null(cols)) center <- center[cols] } # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(rows)) w <- w[rows] y <- numeric(ncol(x)) for (kk in seq_along(y)) { y[kk] <- weightedMad(x[, kk, drop = TRUE], w = w, na.rm = na.rm, constant = constant, center = center[kk], ...) } ## BACKWARD COMPATIBILITY: matrixStats (<= 0.57.0) returns names ## when !is.null(center), which is tested by DelayedMatrixStats ## and sparseMatrixStats if (is.na(useNames) || useNames) { names(y) <- colnames(x) } else { names(y) <- NULL } y } matrixStats/R/benchmark.R0000644000176200001440000000117614074030335015057 0ustar liggesusersbenchmark <- function(fcn, tags = NULL, path = NULL, workdir = "reports", envir = parent.frame(), ...) { requireNamespace("R.rsp") || stop(sprintf("Package %s is not installed", "R.rsp")) if (is.function(fcn)) { fcn <- deparse(substitute(fcn)) } if (is.null(path)) { path <- system.file("benchmarking", package = "matrixStats") } fullname <- paste(c(fcn, tags), collapse = ", ") filename <- sprintf("%s.md.rsp", fullname) pathname <- file.path(path, filename) oopts <- options("prompt" = "> ") on.exit(options(oopts)) R.rsp::rfile(pathname, workdir = workdir, envir = envir, ...) } matrixStats/R/weightedVar.R0000644000176200001440000001276114074054377015414 0ustar liggesusers#' Weighted variance and weighted standard deviation #' #' Computes a weighted variance / standard deviation of a numeric vector or #' across rows or columns of a matrix. #' #' @inheritParams weightedMad #' @inheritParams rowAlls #' #' @return Returns a \code{\link[base]{numeric}} scalar. #' #' @section Missing values: #' This function handles missing values consistently with #' \code{\link{weightedMean}}(). #' More precisely, if \code{na.rm = FALSE}, then any missing values in either #' \code{x} or \code{w} will give result \code{NA_real_}. #' If \code{na.rm = TRUE}, then all \code{(x, w)} data points for which #' \code{x} is missing are skipped. Note that if both \code{x} and \code{w} #' are missing for a data points, then it is also skipped (by the same rule). #' However, if only \code{w} is missing, then the final results will always #' be \code{NA_real_} regardless of \code{na.rm}. #' #' @author Henrik Bengtsson #' #' @details #' The estimator used here is the same as the one used by the "unbiased" #' estimator of the \bold{Hmisc} package. More specifically, #' \code{weightedVar(x, w = w) == Hmisc::wtd.var(x, weights = w)}, #' #' @seealso For the non-weighted variance, see \code{\link[stats]{var}}. #' #' @keywords univar robust #' @export weightedVar <- function(x, w = NULL, idxs = NULL, na.rm = FALSE, center = NULL, ...) { # Argument 'x': n <- length(x) # Argument 'w': if (is.null(w)) { w <- rep(1, times = n) } else if (length(w) != n) { stop(sprintf("The number of elements in arguments '%s' and '%s' does not match: %.0f != %.0f", "w", "x", length(w), n)) } else if (!is.null(idxs)) { # Apply subset on 'w' w <- w[idxs] } # Apply subset on 'x' if (!is.null(idxs)) { x <- x[idxs] n <- length(x) } na_value <- NA storage.mode(na_value) <- storage.mode(x) # Remove values with zero (and negative) weight. This will: # 1) take care of the case when all weights are zero, # 2) it will most likely speed up the sorting. tmp <- (is.na(w) | w > 0) if (!all(tmp)) { x <- .subset(x, tmp) w <- .subset(w, tmp) n <- length(x) } tmp <- NULL # Not needed anymore # Drop missing values? if (na.rm) { keep <- which(!is.na(x)) x <- .subset(x, keep) w <- .subset(w, keep) n <- length(x) keep <- NULL # Not needed anymore } else if (anyMissing(x)) { return(na_value) } # Missing values in 'w'? if (anyMissing(w)) return(na_value) # Are any weights Inf? Then treat them with equal weight and all others # with weight zero. tmp <- is.infinite(w) if (any(tmp)) { keep <- tmp x <- .subset(x, keep) n <- length(x) w <- rep(1, times = n) keep <- NULL # Not needed anymore } tmp <- NULL # Not needed anymore # Are there any values left to calculate the weighted variance of? # This is consistent with how stats::var() works. if (n <= 1L) return(na_value) # Standardize weights to sum to one wsum <- sum(w) # Estimate the mean? if (is.null(center)) { center <- sum(w * x) / wsum } else { ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("weightedVar") } # Estimate the variance x <- x - center # Residuals x <- x^2 # Squared residuals ## Correction factor lambda <- 1 / (wsum - 1) sigma2 <- lambda * sum(w * x) x <- w <- NULL # Not needed anymore ## Undefined estimate? (adopted from Hmisc::wtd.var()) if (wsum <= 1) { warning(sprintf("Produced invalid variance estimate, because the weights suggest at most one effective observation (sum(w) <= 1): %g (wsum = %g)", sigma2, wsum)) } sigma2 } #' @rdname weightedVar #' @export weightedSd <- function(...) { sqrt(weightedVar(...)) } #' @rdname weightedVar #' @export rowWeightedVars <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on 'w' if (!is.null(w) && !is.null(cols)) w <- w[cols] # Preserve names attributes? if (!(is.na(useNames) || useNames)) { rownames(x) <- NULL } apply(x, MARGIN = 1L, FUN = weightedVar, w = w, na.rm = na.rm, ...) } #' @rdname weightedVar #' @export colWeightedVars <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on 'w' if (!is.null(w) && !is.null(rows)) w <- w[rows] # Preserve names attributes? if (!(is.na(useNames) || useNames)) { colnames(x) <- NULL } apply(x, MARGIN = 2L, FUN = weightedVar, w = w, na.rm = na.rm, ...) } #' @rdname weightedVar #' @export rowWeightedSds <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { sqrt(rowWeightedVars(x = x, w = w, rows = rows, cols = cols, na.rm = na.rm, useNames = useNames, ...)) } #' @rdname weightedVar #' @export colWeightedSds <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { sqrt(colWeightedVars(x = x, w = w, rows = rows, cols = cols, na.rm = na.rm, useNames = useNames, ...)) } matrixStats/R/varDiff.R0000644000176200001440000002526114074054377014523 0ustar liggesusers#' Estimation of scale based on sequential-order differences #' #' Estimation of scale based on sequential-order differences, corresponding to #' the scale estimates provided by \code{\link[stats]{var}}, #' \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and #' \code{\link[stats]{IQR}}. #' #' Note that n-order difference MAD estimates, just like the ordinary MAD #' estimate by \code{\link[stats]{mad}}, apply a correction factor such that #' the estimates are consistent with the standard deviation under Gaussian #' distributions. #' #' The interquartile range (IQR) estimates does \emph{not} apply such a #' correction factor. If asymptotically normal consistency is wanted, the #' correction factor for IQR estimate is \code{1 / (2 * qnorm(3/4))}, which is #' half of that used for MAD estimates, which is \code{1 / qnorm(3/4)}. This #' correction factor needs to be applied manually, i.e. there is no #' \code{constant} argument for the IQR functions. #' #' @inheritParams rowAlls #' #' @param x A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length #' N or a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}. #' #' @param diff The positional distance of elements for which the difference #' should be calculated. #' #' @param trim A \code{\link[base]{double}} in [0,1/2] specifying the fraction #' of observations to be trimmed from each end of (sorted) \code{x} before #' estimation. #' #' @param constant A scale factor adjusting for asymptotically normal #' consistency. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length 1, length N, or length K. #' #' @author Henrik Bengtsson #' #' @seealso For the corresponding non-differentiated estimates, see #' \code{\link[stats]{var}}, \code{\link[stats]{sd}}, \code{\link[stats]{mad}} #' and \code{\link[stats]{IQR}}. Internally, \code{\link{diff2}}() is used #' which is a faster version of \code{\link[base]{diff}}(). #' #' @references [1] J. von Neumann et al., \emph{The mean square successive #' difference}. Annals of Mathematical Statistics, 1941, 12, 153-162.\cr #' #' @keywords iteration robust univar #' @export varDiff <- function(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) { if (length(diff) != 1L) stop(sprintf("Argument '%s' is not scalar: %d", "diff", length(diff))) if (diff < 0L) stop(sprintf("Argument '%s' must not be negative: %d", "diff", diff)) # Apply subset if (!is.null(idxs)) x <- x[idxs] if (na.rm) x <- x[!is.na(x)] # Nothing to do? n <- length(x) if (n <= 1L) return(NA_real_) # Calculate differences? if (diff > 0L) { x <- diff2(x, differences = diff) # Nothing to do? n <- length(x) if (n == 1L) return(NA_real_) } # Trim? if (trim > 0 && n > 0L) { if (anyMissing(x)) return(NA_real_) lo <- floor(n * trim) + 1 hi <- (n + 1) - lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial = partial) x <- x[lo:hi] } # Estimate var <- var(x, na.rm = FALSE) x <- NULL # Not needed anymore # Correction for the differentiation var / (2^diff) } #' @rdname varDiff #' @export sdDiff <- function(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) { if (length(diff) != 1L) stop(sprintf("Argument '%s' is not scalar: %d", "diff", length(diff))) if (diff < 0L) stop(sprintf("Argument '%s' must not be negative: %d", "diff", diff)) # Apply subset if (!is.null(idxs)) x <- x[idxs] if (na.rm) x <- x[!is.na(x)] # Nothing to do? n <- length(x) if (n <= 1L) return(NA_real_) # Calculate differences? if (diff > 0L) { x <- diff2(x, differences = diff) # Nothing to do? n <- length(x) if (n == 1L) return(NA_real_) } # Trim? if (trim > 0 && n > 0L) { if (anyMissing(x)) return(NA_real_) lo <- floor(n * trim) + 1 hi <- (n + 1) - lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial = partial) x <- x[lo:hi] } # Estimate sd <- sd(x, na.rm = FALSE) x <- NULL # Not needed anymore # Correction for the differentiation sd / (sqrt(2) ^ diff) } #' @importFrom stats mad #' @rdname varDiff #' @export madDiff <- function(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, constant = 1.4826, ...) { if (length(diff) != 1L) stop(sprintf("Argument '%s' is not scalar: %d", "diff", length(diff))) if (diff < 0L) stop(sprintf("Argument '%s' must not be negative: %d", "diff", diff)) # Apply subset if (!is.null(idxs)) x <- x[idxs] if (na.rm) x <- x[!is.na(x)] # Nothing to do? n <- length(x) if (n <= 0L) return(NA_real_) # Calculate differences? if (diff > 0L) { x <- diff2(x, differences = diff) # Nothing to do? n <- length(x) if (n == 1L) return(NA_real_) } # Trim? if (trim > 0 && n > 0L) { if (anyMissing(x)) return(NA_real_) lo <- floor(n * trim) + 1 hi <- (n + 1) - lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial = partial) x <- x[lo:hi] } # Estimate sd <- mad(x, na.rm = FALSE, constant = constant, ...) x <- NULL # Not needed anymore # Correction for the differentiation sd / (sqrt(2) ^ diff) } #' @importFrom stats quantile #' @rdname varDiff #' @export iqrDiff <- function(x, idxs = NULL, na.rm = FALSE, diff = 1L, trim = 0, ...) { if (length(diff) != 1L) stop(sprintf("Argument '%s' is not scalar: %d", "diff", length(diff))) if (diff < 0L) stop(sprintf("Argument '%s' must not be negative: %d", "diff", diff)) # Apply subset if (!is.null(idxs)) x <- x[idxs] if (na.rm) { x <- x[!is.na(x)] } else if (anyMissing(x)) { return(NA_real_) } # At this point, there should be no missing values # Nothing to do? n <- length(x) if (n == 0L) { return(NA_real_) } else if (n == 1L) { return(0) } # Calculate differences? if (diff > 0L) { x <- diff2(x, differences = diff) # Nothing to do? n <- length(x) if (n == 1L) return(0) } # Trim? if (trim > 0 && n > 0L) { lo <- floor(n * trim) + 1 hi <- (n + 1) - lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial = partial) x <- x[lo:hi] } # Estimate qs <- quantile(x, probs = c(0.25, 0.75), na.rm = FALSE, names = FALSE, ...) x <- NULL # Not needed anymore iqr <- (qs[2L] - qs[1L]) # Correction for the differentiation iqr / (sqrt(2) ^ diff) } #' @rdname varDiff #' @export rowVarDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (!(is.na(useNames) || useNames)) { rownames(x) <- NULL } apply(x, MARGIN = 1L, FUN = varDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export colVarDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (!(is.na(useNames) || useNames)) { colnames(x) <- NULL } apply(x, MARGIN = 2L, FUN = varDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export rowSdDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (!(is.na(useNames) || useNames)) { rownames(x) <- NULL } apply(x, MARGIN = 1L, FUN = sdDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export colSdDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (!(is.na(useNames) || useNames)) { colnames(x) <- NULL } apply(x, MARGIN = 2L, FUN = sdDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export rowMadDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (!(is.na(useNames) || useNames)) { rownames(x) <- NULL } apply(x, MARGIN = 1L, FUN = madDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export colMadDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (!(is.na(useNames) || useNames)) { colnames(x) <- NULL } apply(x, MARGIN = 2L, FUN = madDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export rowIQRDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (!(is.na(useNames) || useNames)) { rownames(x) <- NULL } apply(x, MARGIN = 1L, FUN = iqrDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } #' @rdname varDiff #' @export colIQRDiffs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, diff = 1L, trim = 0, ..., useNames = NA) { # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Preserve names attribute? if (!(is.na(useNames) || useNames)) { colnames(x) <- NULL } apply(x, MARGIN = 2L, FUN = iqrDiff, na.rm = na.rm, diff = diff, trim = trim, ...) } matrixStats/R/weightedMean.R0000644000176200001440000000355514074030335015531 0ustar liggesusers#' Weighted Arithmetic Mean #' #' Computes the weighted sample mean of a numeric vector. #' #' @inheritParams rowAlls #' @inheritParams weightedMad #' @inheritParams mean2 #' #' @param w a vector of weights the same length as \code{x} giving the weights #' to use for each element of \code{x}. Negative weights are treated as zero #' weights. Default value is equal weight to all values. #' If a missing-value weight exists, the result is always a missing value. #' #' @return Returns a \code{\link[base]{numeric}} scalar. If \code{x} is of #' zero length, then \code{NaN} is returned, which is consistent with #' \code{\link[base]{mean}}(). #' #' @example incl/weightedMean.R #' #' @section Missing values: #' This function handles missing values consistently with #' \code{\link[stats]{weighted.mean}}. More precisely, if \code{na.rm = FALSE}, #' then any missing values in either \code{x} or \code{w} will give result #' \code{NA_real_}. If \code{na.rm = TRUE}, then all \code{(x, w)} data points #' for which \code{x} is missing are skipped. Note that if both \code{x} and #' \code{w} are missing for a data points, then it is also skipped (by the same #' rule). However, if only \code{w} is missing, then the final results will #' always be \code{NA_real_} regardless of \code{na.rm}. #' #' @author Henrik Bengtsson #' #' @seealso \code{\link[base]{mean}}() and \code{\link[stats]{weighted.mean}}. #' @keywords univar robust #' @export weightedMean <- function(x, w = NULL, idxs = NULL, na.rm = FALSE, refine = FALSE, ...) { # Argument 'refine': refine <- as.logical(refine) # Argument 'w': if (is.null(w)) { ## We won't fall back to stats::mean(), because it's has some overhead ## and it doesn't support refine = FALSE. w <- rep(1, times = length(x)) } else { w <- as.numeric(w) } .Call(C_weightedMean, x, w, idxs, na.rm, refine) } matrixStats/R/rowMads.R0000644000176200001440000000726114111740760014544 0ustar liggesusers#' @param constant A scale factor. See \code{\link[stats]{mad}} for details. #' #' @rdname rowSds #' @export rowMads <- function(x, rows = NULL, cols = NULL, center = NULL, constant = 1.4826, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { if (is.null(center)) { constant <- as.numeric(constant) has_nas <- TRUE x <- .Call(C_rowMads, x, dim., rows, cols, constant, na.rm, has_nas, TRUE, useNames) } else { ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("rowMads") # Preserve names names <- rownames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset on 'center' if (length(center) != nrow(x)) { if (length(center) == 1L && is.null(rows)) { validateScalarCenter(center, nrow(x), "rows") } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "rows", "x", length(center), nrow(x))) } } if (!is.null(rows)) center <- center[rows] # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) x <- x - center if (is.null(dim(x))) { dim(x) <- dim. # prevent from dim dropping # Preserve names attribute? if (!is.na(useNames) && useNames) { if (!is.null(names)) { if (!is.null(rows)) { names <- names[rows] } rownames(x) <- names } } } x <- abs(x) x <- rowMedians(x, na.rm = na.rm, ..., useNames = useNames) x <- constant * x } x } #' @rdname rowSds #' @export colMads <- function(x, rows = NULL, cols = NULL, center = NULL, constant = 1.4826, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { if (is.null(center)) { constant <- as.numeric(constant) has_nas <- TRUE x <- .Call(C_rowMads, x, dim., rows, cols, constant, na.rm, has_nas, FALSE, useNames) } else { ## https://github.com/HenrikBengtsson/matrixStats/issues/187 centerOnUse("colMads") # Preserve names names <- colnames(x) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset on 'center' if (length(center) != ncol(x)) { if (length(center) == 1L && is.null(cols)) { validateScalarCenter(center, ncol(x), "columns") } else { stop(sprintf("Argument '%s' should be of the same length as number of %s of '%s': %d != %d", "center", "columns", "x", length(center), ncol(x))) } } if (!is.null(cols)) center <- center[cols] # Apply subset on 'x' if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] dim. <- dim(x) ## SLOW: # for (cc in seq_len(ncol(x))) { # x[, cc] <- x[, cc] - center[cc] # } ## FAST: x <- t_tx_OP_y(x, center, OP = "-", na.rm = FALSE) # Preserve names attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(names)) { if (!is.null(cols)) { names <- names[cols] # Zero-length attribute? Keep behavior same as base R function if (length(names) == 0L) names <- NULL } colnames(x) <- names } } else { colnames(x) <- NULL } } x <- abs(x) x <- colMedians(x, na.rm = na.rm, ..., useNames = useNames) x <- constant * x } x } matrixStats/R/signTabulate.R0000644000176200001440000000142414074030335015543 0ustar liggesusers#' Calculates the number of negative, zero, positive and missing values #' #' Calculates the number of negative, zero, positive and missing values in a #' \code{\link[base]{numeric}} vector. For \code{\link[base]{double}} vectors, #' the number of negative and positive infinite values are also counted. #' #' @inheritParams rowAlls #' #' @param x a \code{\link[base]{numeric}} \code{\link[base]{vector}}. #' #' @return Returns a \code{\link[base]{name}}d \code{\link[base]{numeric}} #' \code{\link[base]{vector}}. #' #' @author Henrik Bengtsson #' @seealso \code{\link[base]{sign}}(). #' @keywords internal #' @export signTabulate <- function(x, idxs = NULL, ...) { res <- .Call(C_signTabulate, x, idxs) names(res) <- c("-1", "0", "+1", "NA", "-Inf", "+Inf")[1:length(res)] res } matrixStats/R/rowCollapse.R0000644000176200001440000000447414074054377015437 0ustar liggesusers#' Extracts one cell per row (column) from a matrix #' #' Extracts one cell per row (column) from a matrix. The implementation is #' optimized for memory and speed. #' #' @inheritParams rowAlls #' #' @param idxs An index \code{\link[base]{vector}} of (maximum) length N (K) #' specifying the columns (rows) to be extracted. #' #' @return Returns a \code{\link[base]{vector}} of length N (K). #' #' @example incl/rowCollapse.R #' #' @author Henrik Bengtsson #' #' @seealso \emph{Matrix indexing} to index elements in matrices and arrays, #' cf. \code{\link[base]{[}}(). #' @keywords utilities #' @export rowCollapse <- function(x, idxs, rows = NULL, dim. = dim(x), ..., useNames = NA) { # Argument 'x': if (!is.matrix(x) && !is.vector(x)) defunctShouldBeMatrixOrVector(x) # Argument 'idxs': idxs <- rep(idxs, length.out = dim.[1L]) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset if (!is.null(rows)) { x <- x[rows, , drop = FALSE] idxs <- idxs[rows] } dim. <- dim(x) # Columns of interest cols <- 0:(dim.[2L] - 1L) cols <- cols[idxs] # Calculate column-based indices idxs <- dim.[1L] * cols + seq_len(dim.[1L]) cols <- NULL # Not needed anymore # Update names attribute? res <- x[idxs] if (!is.na(useNames)) { if (useNames) { names <- rownames(x) if (!is.null(names)) { names(res) <- names } } else { names(res) <- NULL } } res } #' @rdname rowCollapse #' @export colCollapse <- function(x, idxs, cols = NULL, dim. = dim(x), ..., useNames = NA) { # Argument 'x': if (!is.matrix(x) && !is.vector(x)) defunctShouldBeMatrixOrVector(x) # Argument 'idxs': idxs <- rep(idxs, length.out = dim.[2L]) # Apply new dimensions if (!identical(dim(x), dim.)) dim(x) <- dim. # Apply subset if (!is.null(cols)) { x <- x[, cols, drop = FALSE] idxs <- idxs[cols] } dim. <- dim(x) # Rows of interest rows <- seq_len(dim.[1L]) rows <- rows[idxs] # Calculate column-based indices idxs <- dim.[1L] * 0:(dim.[2L] - 1L) + rows rows <- NULL # Not needed anymore # Update names attribute? res <- x[idxs] if (!is.na(useNames)) { if (useNames) { names <- colnames(x) if (!is.null(names)) { names(res) <- names } } else { names(res) <- NULL } } res } matrixStats/R/rowWeightedMeans.R0000644000176200001440000001616714074054377016423 0ustar liggesusers#' Calculates the weighted means for each row (column) in a matrix #' #' Calculates the weighted means for each row (column) in a matrix. #' #' The implementations of these methods are optimized for both speed and #' memory. If no weights are given, the corresponding #' \code{rowMeans()}/\code{colMeans()} is used. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param w A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length #' K (N). #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @example incl/rowWeightedMeans.R #' #' @author Henrik Bengtsson #' #' @seealso See \code{rowMeans()} and \code{colMeans()} in #' \code{\link[base]{colSums}}() for non-weighted means. See also #' \code{\link[stats]{weighted.mean}}. #' #' @keywords array iteration robust univar #' @export rowWeightedMeans <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Argument 'w': has_weights <- !is.null(w) if (has_weights) { n <- ncol(x) if (length(w) != n) { stop(sprintf("The length of argument '%s' does not match the number of %s in '%s': %d != %d", "w", "columns", "x", length(w), n)) #nolint } if (!is.numeric(w)) { stop(sprintf("Argument '%s' is not numeric: %s", "w", mode(w))) } if (any(!is.na(w) & w < 0)) { stop(sprintf("Argument '%s' must not contain negative values", "w")) } } # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(cols)) w <- w[cols] if (has_weights) { # Allocate results m <- nrow(x) if (m == 0L) return(double(0L)) # Drop entries with zero weight? ...but keep NAs idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (nw == 0L) { res <- rep(NaN, times = m) # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- rownames(x) if (!is.null(names)) { names(res) <- names } } else { names(res) <- NULL } } return(res) } else if (nw < n) { w <- w[idxs] x <- x[, idxs, drop = FALSE] } idxs <- NULL # Not needed anymore # Has missing values? if (na.rm) { # Really? na.rm <- anyMissing(x) } if (na.rm) { # Indices of missing values nas <- which(is.na(x)) # Weight matrix W <- matrix(w, nrow = nrow(x), ncol = ncol(x), byrow = TRUE) w <- NULL # Not needed anymore W[nas] <- NA wS <- rowSums(W, na.rm = TRUE) # Standarized weights summing to one w/out missing values W[nas] <- 0 W <- W / wS x[nas] <- 0 nas <- NULL # Not needed anymore x <- W * x # Preserve dimnames attribute? if (!(is.na(useNames) || useNames)) { dimnames(x) <- NULL } W <- NULL # Not needed anymore } else { wS <- sum(w) # Standardize weights summing to one. w <- w / wS # Preserve dimnames attribute dimnames <- dimnames(x) # Weighted values ## SLOW: for (rr in 1:m) x[rr, ] <- w * x[rr, , drop = TRUE] ## FAST: x <- t_tx_OP_y(x, w, OP = "*", na.rm = FALSE) # Update dimnames attribute? if (!is.na(useNames)) { if (useNames) { if (!is.null(dimnames)) { dimnames(x) <- dimnames } } else { dimnames(x) <- NULL } } w <- NULL # Not needed anymore } # Here we know there are no missing value in the new 'x' res <- rowSums(x, na.rm = FALSE) } else { res <- rowMeans(x, na.rm = na.rm) # Preserve names attribute? if (!(is.na(useNames) || useNames)) { names(res) <- NULL } } res } #' @rdname rowWeightedMeans #' @export colWeightedMeans <- function(x, w = NULL, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.matrix(x)) defunctShouldBeMatrix(x) # Argument 'w': has_weights <- !is.null(w) if (has_weights) { n <- nrow(x) if (length(w) != n) { stop(sprintf("The length of argument '%s' does not match the number of %s in '%s': %d != %d", "w", "rows", "x", length(w), n)) #nolint } if (!is.numeric(w)) { stop(sprintf("Argument '%s' is not numeric: %s", "w", mode(w))) } if (any(!is.na(w) & w < 0)) { stop(sprintf("Argument '%s' must not contain negative values", "w")) } } # Apply subset on x if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Apply subset on w if (!is.null(w) && !is.null(rows)) w <- w[rows] if (has_weights) { # Allocate results m <- ncol(x) if (m == 0L) return(double(0L)) # Drop entries with zero weight? ...but keep NAs idxs <- which(is.na(w) | w != 0) nw <- length(idxs) if (nw == 0L) { res <- rep(NaN, times = m) # Update names attribute? if (!is.na(useNames)) { if (useNames) { names <- colnames(x) if (!is.null(names)) { names(res) <- names } } else { names(res) <- NULL } } return(res) } else if (nw < n) { w <- w[idxs] x <- x[idxs, , drop = FALSE] } idxs <- NULL # Not needed anymore # Has missing values? if (na.rm) { # Really? na.rm <- anyMissing(x) } if (na.rm) { # Indices of missing values nas <- which(is.na(x)) # Weight matrix W <- matrix(w, nrow = nrow(x), ncol = ncol(x), byrow = FALSE) w <- NULL # Not needed anymore W[nas] <- NA wS <- colSums(W, na.rm = TRUE) # Standarized weights summing to one w/out missing values W[nas] <- 0 for (cc in 1:m) { W[, cc] <- W[, cc, drop = TRUE] / wS[cc] } x[nas] <- 0 nas <- NULL # Not needed anymore x <- W * x W <- NULL # Not needed anymore } else { wS <- sum(w) # Standardize weights summing to one. w <- w / wS # Weighted values x <- w * x ## SLIGHTLY SLOWER: x <- x_OP_y(x, w, OP = "*") w <- NULL # Not needed anymore } # Here we know there are no missing value in the new 'x' res <- colSums(x, na.rm = FALSE) } else { res <- colMeans(x, na.rm = na.rm) } # Preserve names attribute? if (!(is.na(useNames) || useNames)) { names(res) <- NULL } res } matrixStats/R/rowAvgsPerColSet.R0000644000176200001440000001426114074030335016335 0ustar liggesusers#' Applies a row-by-row (column-by-column) averaging function to equally-sized #' subsets of matrix columns (rows) #' #' Applies a row-by-row (column-by-column) averaging function to equally-sized #' subsets of matrix columns (rows). Each subset is averaged independently of #' the others. #' #' If argument \code{S} is a single column vector with indices \code{1:N}, then #' \code{rowAvgsPerColSet(X, S = S, FUN = rowMeans)} gives the same result as #' \code{rowMeans(X)}. Analogously, for \code{colAvgsPerRowSet()}. #' #' @inheritParams rowAlls #' #' @param X A \code{\link[base]{numeric}} NxM \code{\link[base]{matrix}}. #' #' @param W An optional \code{\link[base]{numeric}} NxM #' \code{\link[base]{matrix}} of weights. #' #' @param S An \code{\link[base]{integer}} KxJ \code{\link[base]{matrix}} #' specifying the J subsets. Each column holds K column (row) indices for the #' corresponding subset. #' #' @param FUN The row-by-row (column-by-column) \code{\link[base]{function}} #' used to average over each subset of \code{X}. This function must accept a #' \code{\link[base]{numeric}} NxK (KxM) \code{\link[base]{matrix}} and the #' \code{\link[base]{logical}} argument \code{na.rm}, and return a #' \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (M). #' #' @param ... Additional arguments passed to then \code{FUN} #' \code{\link[base]{function}}. #' #' @param na.rm (logical) Argument passed to \code{FUN()} as #' \code{na.rm = na.rm}. If \code{\link[base:logical]{NA}} (default), then #' \code{na.rm = TRUE} is used if \code{X} or \code{S} holds missing values, #' otherwise \code{na.rm = FALSE}. #' #' @param tFUN If \code{\link[base:logical]{TRUE}}, the NxK (KxM) #' \code{\link[base]{matrix}} passed to \code{FUN()} is transposed first. #' #' @return Returns a \code{\link[base]{numeric}} JxN (MxJ) #' \code{\link[base]{matrix}}, where row names equal \code{rownames(X)} #' (\code{colnames(S)}) and column names \code{colnames(S)} #' (\code{colnames(X)}). #' #' @example incl/rowAvgsPerColSet.R #' #' @author Henrik Bengtsson #' @keywords internal utilities #' @export rowAvgsPerColSet <- function(X, W = NULL, rows = NULL, S, FUN = rowMeans, ..., na.rm = NA, tFUN = FALSE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'X': if (!is.matrix(X)) { stop(sprintf("Argument '%s' is not a matrix: %s", "X", class(X)[1L])) } dimX <- dim(X) # Argument 'W': hasW <- !is.null(W) if (hasW) { if (!is.matrix(W)) { stop(sprintf("Argument '%s' is not a matrix: %s", "W", class(W)[1L])) } if (any(dim(W) != dimX)) { stop(sprintf("Argument '%s' and '%s' have different dimensions: %s != %s", "W", "X", paste(dim(W), collapse = "x"), paste(dimX, collapse = "x"))) } if (!is.numeric(W)) { stop(sprintf("Argument '%s' is not numeric: %s", "W", mode(W))) } } # Argument 'S': if (!is.matrix(S)) { stop(sprintf("Argument '%s' is not a matrix: %s", "S", class(S)[1L])) } nbrOfSets <- ncol(S) setNames <- colnames(S) # Argument 'FUN': if (!is.function(FUN)) { stop(sprintf("Argument '%s' is not a function: %s", "FUN", mode(S))) } # Apply subset if (!is.null(rows)) { X <- X[rows, , drop = FALSE] if (hasW) W <- W[rows, , drop = FALSE] dimX <- dim(X) } # Argument 'tFUN': tFUN <- as.logical(tFUN) # Check if missing values have to be excluded while averaging if (is.na(na.rm)) na.rm <- (anyMissing(X) || anyMissing(S)) # Record names of dimension rownamesX <- rownames(X) dimnames(X) <- NULL # Average in sets of columns of X. Z <- apply(S, MARGIN = 2L, FUN = function(jj) { # Extract set of columns from X jj <- jj[is.finite(jj)] Zjj <- X[, jj, drop = FALSE] jj <- NULL # Not needed anymore if (tFUN) { Zjj <- t(Zjj) } # Average by weights if (hasW) { Wjj <- W[, jj, drop = FALSE] Zjj <- FUN(Zjj, W = Wjj, ..., na.rm = na.rm) Wjj <- NULL # Not needed anymore } else { Zjj <- FUN(Zjj, ..., na.rm = na.rm) } # Sanity check if (length(Zjj) != dimX[1L]) stop("Internal error: length(Zjj) != dimX[1L]") # Return set average Zjj }) # apply() drops 2nd dimension if nrow(X) <= 1 (and FUN returns a vector of # length nrow(X) as it should), cf. ?apply if (!is.matrix(Z)) { if (dimX[1] > 1L) stop("Internal error: dimX[1] > 1L") dim(Z) <- c(dimX[1L], nbrOfSets) } # Sanity check if (any(dim(Z) != c(dimX[1L], nbrOfSets))) stop("Internal error: dim(Z) != c(dimX[1L], nbrOfSets)") # Set names rownames(Z) <- rownamesX colnames(Z) <- setNames Z } #' @rdname rowAvgsPerColSet #' @export colAvgsPerRowSet <- function(X, W = NULL, cols = NULL, S, FUN = colMeans, ..., na.rm = NA, tFUN = FALSE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'X': if (!is.matrix(X)) { stop(sprintf("Argument '%s' is not a matrix: %s", "X", class(X)[1L])) } # Argument 'W': # Argument 'S': if (!is.matrix(S)) { stop(sprintf("Argument '%s' is not a matrix: %s", "S", class(S)[1L])) } # Argument 'FUN': if (!is.function(FUN)) { stop(sprintf("Argument '%s' is not a function: %s", "FUN", mode(S))) } # Apply subset if (!is.null(cols)) { X <- X[, cols, drop = FALSE] if (is.null(W)) W <- W[, cols, drop = FALSE] } # Argument 'tFUN': tFUN <- as.logical(tFUN) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Transpose # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - tX <- t(X) if (is.null(W)) { tW <- NULL } else { tW <- t(W) } # ... tZ <- rowAvgsPerColSet(X = tX, W = tW, S = S, FUN = FUN, ..., na.rm = na.rm, tFUN = !tFUN) tX <- tW <- NULL # Not needed anymore # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Transpose back # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Z <- t(tZ) tZ <- NULL # Not needed anymore Z } matrixStats/R/rowRanges.R0000644000176200001440000000400014111740760015063 0ustar liggesusers#' Gets the range of values in each row (column) of a matrix #' #' Gets the range of values in each row (column) of a matrix. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @return \code{rowRanges()} (\code{colRanges()}) returns a #' \code{\link[base]{numeric}} Nx2 (Kx2) \code{\link[base]{matrix}}, where N #' (K) is the number of rows (columns) for which the ranges are calculated. #' #' \code{rowMins()/rowMaxs()} (\code{colMins()/colMaxs()}) returns a #' \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). #' #' @author Henrik Bengtsson #' #' @seealso \code{\link{rowOrderStats}}() and \code{\link[base]{pmin.int}}(). #' #' @keywords array iteration robust univar #' #' @export rowRanges <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { .Call(C_rowRanges, x, dim., rows, cols, 2L, na.rm, TRUE, useNames) } #' @rdname rowRanges #' @export rowMins <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { .Call(C_rowRanges, x, dim., rows, cols, 0L, na.rm, TRUE, useNames) } #' @rdname rowRanges #' @export rowMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { .Call(C_rowRanges, x, dim., rows, cols, 1L, na.rm, TRUE, useNames) } #' @rdname rowRanges #' @export colRanges <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { .Call(C_colRanges, x, dim., rows, cols, 2L, na.rm, TRUE, useNames) } #' @rdname rowRanges #' @export colMins <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { .Call(C_colRanges, x, dim., rows, cols, 0L, na.rm, TRUE, useNames) } #' @rdname rowRanges #' @export colMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { .Call(C_colRanges, x, dim., rows, cols, 1L, na.rm, TRUE, useNames) } matrixStats/R/rowTabulates.R0000644000176200001440000001211114074054377015604 0ustar liggesusers#' Tabulates the values in a matrix by row (column). #' #' @inheritParams rowAlls #' #' @param x An \code{\link[base]{integer}}, a \code{\link[base]{logical}}, or #' a \code{\link[base]{raw}} NxK \code{\link[base]{matrix}}. #' #' @param values An \code{\link[base]{vector}} of J values of count. If #' \code{\link[base]{NULL}}, all (unique) values are counted. #' #' @return Returns a NxJ (KxJ) \code{\link[base]{matrix}} where N (K) is the #' number of row (column) \code{\link[base]{vector}}s tabulated and J is the #' number of values counted. #' #' @details #' An alternative to these functions, is to use \code{table(x, row(x))} #' and \code{table(x, col(x))}, with the exception that the latter do not #' support the \code{\link[base]{raw}} data type. #' When there are no missing values in \code{x}, we have that #' \code{all(rowTabulates(x) == t(table(x, row(x))))} and #' \code{all(colTabulates(x) == t(table(x, col(x))))}. #' When there are missing values, we have that #' \code{all(rowTabulates(x) == t(table(x, row(x), useNA = "always")[, seq_len(nrow(x))]))} and #' \code{all(colTabulates(x) == t(table(x, col(x), useNA = "always")[, seq_len(ncol(x))]))}. #' #' @example incl/rowTabulates.R #' #' @author Henrik Bengtsson #' @keywords utilities #' @export rowTabulates <- function(x, rows = NULL, cols = NULL, values = NULL, ..., useNames = NA) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (is.integer(x)) { } else if (is.logical(x)) { } else if (is.raw(x)) { } else { stop(sprintf("Argument '%s' is not integer, logical, or raw: %s", "x", class(x)[1])) } # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Argument 'values': if (is.null(values)) { values <- as.vector(x) values <- unique(values) if (is.raw(values)) { values <- as.integer(values) values <- sort.int(values) # WORKAROUND: Cannot use "%#x" because it gives an error OSX with # R v2.9.0 devel (2009-01-13 r47593b) at R-forge. /HB 2009-06-20 names <- sprintf("%x", values) names <- paste("0x", names, sep = "") values <- as.raw(values) } else { values <- sort.int(values, na.last = TRUE) names <- as.character(values) } } else { if (is.raw(values)) { names <- sprintf("%x", as.integer(values)) names <- paste("0x", names, sep = "") } else { names <- as.character(values) } } nbr_of_values <- length(values) counts <- matrix(0L, nrow = nrow(x), ncol = nbr_of_values) colnames(counts) <- names na.rm <- anyMissing(x) for (kk in seq_len(nbr_of_values)) { counts[, kk] <- rowCounts(x, value = values[kk], na.rm = na.rm) } # Update rownames attribute? if (!is.na(useNames)) { if (useNames) { rownames <- rownames(x) if (!is.null(rownames)) rownames(counts) <- rownames } else { rownames(counts) <- NULL } } counts } #' @rdname rowTabulates #' @export colTabulates <- function(x, rows = NULL, cols = NULL, values = NULL, ..., useNames = NA) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (is.integer(x)) { } else if (is.logical(x)) { } else if (is.raw(x)) { } else { stop(sprintf("Argument '%s' is not integer, logical, or raw: %s", "x", class(x)[1])) } # Apply subset if (!is.null(rows) && !is.null(cols)) x <- x[rows, cols, drop = FALSE] else if (!is.null(rows)) x <- x[rows, , drop = FALSE] else if (!is.null(cols)) x <- x[, cols, drop = FALSE] # Argument 'values': if (is.null(values)) { values <- as.vector(x) values <- unique(values) if (is.raw(values)) { values <- as.integer(values) values <- sort.int(values) # WORKAROUND: Cannot use "%#x" because it gives an error OSX with # R v2.9.0 devel (2009-01-13 r47593b) at R-forge. /HB 2009-06-20 names <- sprintf("%x", values) names <- paste("0x", names, sep = "") values <- as.raw(values) } else { values <- sort.int(values, na.last = TRUE) names <- as.character(values) } } else { if (is.raw(values)) { names <- sprintf("%x", as.integer(values)) names <- paste("0x", names, sep = "") } else { names <- as.character(values) } } transpose <- FALSE if (!transpose) { nbr_of_values <- length(values) counts <- matrix(0L, nrow = ncol(x), ncol = nbr_of_values) colnames(counts) <- names na.rm <- anyMissing(x) for (kk in seq_len(nbr_of_values)) { counts[, kk] <- colCounts(x, value = values[kk], na.rm = na.rm) } } # Update rownames attribute? if (!is.na(useNames)) { if (useNames) { colnames <- colnames(x) if (!is.null(colnames)) rownames(counts) <- colnames } else { rownames(counts) <- NULL } } counts } matrixStats/R/rowMedians.R0000644000176200001440000000422214111740760015232 0ustar liggesusers#' Calculates the median for each row (column) in a matrix #' #' Calculates the median for each row (column) in a matrix. #' #' The implementation of \code{rowMedians()} and \code{colMedians()} is #' optimized for both speed and memory. To avoid coercing to #' \code{\link[base]{double}}s (and hence memory allocation), there is a #' special implementation for \code{\link[base]{integer}} matrices. That is, #' if \code{x} is an \code{\link[base]{integer}} \code{\link[base]{matrix}}, #' then \code{rowMedians(as.double(x))} (\code{rowMedians(as.double(x))}) would #' require three times the memory of \code{rowMedians(x)} #' (\code{colMedians(x)}), but all this is avoided. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param rows,cols A \code{\link[base]{vector}} indicating subset of rows #' (and/or columns) to operate over. If \code{\link[base]{NULL}}, no subsetting #' is done. #' #' @param na.rm If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s #' are excluded first, otherwise not. #' #' @param dim. An \code{\link[base]{integer}} \code{\link[base]{vector}} of #' length two specifying the dimension of \code{x}, also when not a #' \code{\link[base]{matrix}}. #' #' @param ... Not used. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @author Henrik Bengtsson, Harris Jaffee #' #' @seealso See \code{\link{rowWeightedMedians}()} and #' \code{colWeightedMedians()} for weighted medians. #' For mean estimates, see \code{\link{rowMeans2}()} and #' \code{\link[base:colSums]{rowMeans}()}. #' #' @keywords array iteration robust univar #' @export rowMedians <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { has_nas <- TRUE # Add as an argument? /2007-08-24 .Call(C_rowMedians, x, dim., rows, cols, na.rm, has_nas, TRUE, useNames) } #' @rdname rowMedians #' @export colMedians <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ..., useNames = NA) { has_nas <- TRUE # Add as an argument? /2007-08-24 .Call(C_rowMedians, x, dim., rows, cols, na.rm, has_nas, FALSE, useNames) } matrixStats/R/diff2.R0000644000176200001440000000236614074030335014121 0ustar liggesusers#' Fast lagged differences #' #' Computes the lagged and iterated differences. #' #' @inheritParams rowAlls #' #' @param x A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length #' N. #' #' @param lag An \code{\link[base]{integer}} specifying the lag. #' #' @param differences An \code{\link[base]{integer}} specifying the order of #' difference. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N - \code{differences}. #' #' @examples #' diff2(1:10) #' #' @author Henrik Bengtsson #' #' @seealso \code{\link[base]{diff}}(). #' @keywords univar internal #' #' @export diff2 <- function(x, idxs = NULL, lag = 1L, differences = 1L, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'lag': if (length(lag) != 1L) { stop(sprintf("Argument '%s' is not a scalar: %.0f", "lag", length(lag))) } # Argument 'differences': if (length(differences) != 1L) { stop(sprintf("Argument '%s' is not a scalar: %.0f", "differences", length(differences))) } lag <- as.integer(lag) differences <- as.integer(differences) .Call(C_diff2, x, idxs, lag, differences) } matrixStats/R/allocMatrix.R0000644000176200001440000000234714074030335015405 0ustar liggesusers#' Allocates an empty vector, matrix or array #' #' Allocates an empty vector, matrix or array faster than the corresponding #' function in R. #' #' @param value A \code{\link[base]{numeric}} scalar that all elements will #' have as value. #' #' @param length,nrow,ncol,dim \code{\link[base]{numeric}}s specifying the #' dimension of the created \code{\link[base]{vector}}, #' \code{\link[base]{matrix}} or \code{\link[base]{array}}. #' #' @return Returns a \code{\link[base]{vector}}, \code{\link[base]{matrix}} and #' \code{\link[base]{array}} respectively of the same data type as #' \code{value}. #' #' @author Henrik Bengtsson #' #' @seealso See also \code{\link[base]{vector}}, \code{\link[base]{matrix}} and #' \code{\link[base]{array}}. #' #' @keywords internal programming #' #' @export allocMatrix <- function(nrow, ncol, value = 0.0, ...) { nrow <- as.integer(nrow) ncol <- as.integer(ncol) .Call(C_allocMatrix2, nrow, ncol, value) } #' @rdname allocMatrix #' @export allocVector <- function(length, value = 0.0, ...) { length <- as.numeric(length) .Call(C_allocVector2, length, value) } #' @rdname allocMatrix #' @export allocArray <- function(dim, value = 0.0, ...) { dim <- as.integer(dim) .Call(C_allocArray2, dim, value) } matrixStats/R/rowIQRs.R0000644000176200001440000000432514074054377014506 0ustar liggesusers#' Estimates of the interquartile range for each row (column) in a matrix #' #' Estimates of the interquartile range for each row (column) in a matrix. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param ... Additional arguments passed to \code{\link{rowQuantiles}}() #' (\code{colQuantiles()}). #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @section Missing values: Contrary to \code{\link[stats]{IQR}}, which gives #' an error if there are missing values and \code{na.rm = FALSE}, \code{iqr()} #' and its corresponding row and column-specific functions return #' \code{\link[base]{NA}}_real_. #' #' @example incl/rowIQRs.R #' #' @author Henrik Bengtsson #' @seealso See \code{\link[stats]{IQR}}. See \code{\link{rowSds}}(). #' @keywords array iteration robust univar #' #' @importFrom stats quantile #' @export rowIQRs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { Q <- rowQuantiles(x, rows = rows, cols = cols, probs = c(0.25, 0.75), na.rm = na.rm, useNames = useNames, drop = FALSE, ...) colnames(Q) <- NULL # Not needed anymore ans <- Q[, 2L, drop = TRUE] - Q[, 1L, drop = TRUE] # Remove attributes if (is.na(useNames)) { attributes(ans) <- NULL } ans } #' @rdname rowIQRs #' @export colIQRs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = NA) { Q <- colQuantiles(x, rows = rows, cols = cols, probs = c(0.25, 0.75), na.rm = na.rm, useNames = useNames, drop = FALSE, ...) colnames(Q) <- NULL # Not needed anymore ans <- Q[, 2L, drop = TRUE] - Q[, 1L, drop = TRUE] # Remove attributes if (is.na(useNames)) { attributes(ans) <- NULL } ans } #' @rdname rowIQRs #' @export iqr <- function(x, idxs = NULL, na.rm = FALSE, ...) { # Apply subset if (!is.null(idxs)) x <- x[idxs] if (na.rm) { x <- x[!is.na(x)] } else if (anyMissing(x)) { return(NA_real_) } # At this point, there should be no missing values # Nothing to do? n <- length(x) if (n == 0L) { return(NA_real_) } else if (n == 1L) { return(0) } q <- quantile(x, probs = c(0.25, 0.75), names = FALSE, na.rm = FALSE, ...) q[2L] - q[1L] } matrixStats/R/rowOrderStats.R0000644000176200001440000000344114111740760015746 0ustar liggesusers#' Gets an order statistic for each row (column) in a matrix #' #' Gets an order statistic for each row (column) in a matrix. #' #' The implementation of \code{rowOrderStats()} is optimized for both speed and #' memory. To avoid coercing to \code{\link[base]{double}}s (and hence memory #' allocation), there is a unique implementation for #' \code{\link[base]{integer}} matrices. #' #' @inheritParams rowAlls #' @inheritParams rowDiffs #' #' @param which An \code{\link[base]{integer}} index in [1,K] ([1,N]) #' indicating which order statistic to be returned. #' #' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of #' length N (K). #' #' @section Missing values: This method does \emph{not} handle missing values, #' that is, the result corresponds to having \code{na.rm = FALSE} (if such an #' argument would be available). #' #' @author The native implementation of \code{rowOrderStats()} was adopted by #' Henrik Bengtsson from Robert Gentleman's \code{rowQ()} in the \pkg{Biobase} #' package. #' #' @seealso See \code{rowMeans()} in \code{\link[base]{colSums}}(). #' #' @keywords array iteration robust univar #' @export rowOrderStats <- function(x, rows = NULL, cols = NULL, which, dim. = dim(x), ..., useNames = NA) { # Check missing values if (anyMissing(x)) { stop(sprintf("Argument '%s' must not contain missing values", "x")) } .Call(C_rowOrderStats, x, dim., rows, cols, which, useNames) } #' @rdname rowOrderStats #' @export colOrderStats <- function(x, rows = NULL, cols = NULL, which, dim. = dim(x), ..., useNames = NA) { # Check missing values if (anyMissing(x)) { stop(sprintf("Argument '%s' must not contain missing values", "x")) } .Call(C_colOrderStats, x, dim., rows, cols, which, useNames) } matrixStats/R/binCounts.R0000644000176200001440000000615214074030335015070 0ustar liggesusers#' Fast element counting in non-overlapping bins #' #' Counts the number of elements in non-overlapping bins #' #' \code{binCounts(x, bx, right = TRUE)} gives equivalent results as #' \code{rev(binCounts(-x, bx = rev(-bx), right = FALSE))}, but is faster #' and more memory efficient. #' #' @inheritParams rowAlls #' #' @param x A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K #' positions for to be binned and counted. #' #' @param bx A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B + 1 #' ordered positions specifying the B > 0 bins \code{[bx[1], bx[2])}, #' \code{[bx[2], bx[3])}, ..., \code{[bx[B], bx[B + 1])}. #' #' @param right If \code{\link[base:logical]{TRUE}}, the bins are right-closed #' (left open), otherwise left-closed (right open). #' #' @return Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of #' length B with non-negative integers. #' #' @section Missing and non-finite values: #' Missing values in \code{x} are ignored/dropped. Missing values in \code{bx} #' are not allowed and gives an error. #' #' @author Henrik Bengtsson #' #' @seealso An alternative for counting occurrences within bins is #' \code{\link[graphics]{hist}}, e.g. \code{hist(x, breaks = bx, #' plot = FALSE)$counts}. That approach is ~30-60\% slower than #' \code{binCounts(..., right = TRUE)}. #' #' To count occurrences of indices \code{x} (positive #' \code{\link[base]{integer}}s) in \code{[1, B]}, use \code{tabulate(x, #' nbins = B)}, where \code{x} does \emph{not} have to be sorted first. For #' details, see \code{\link[base]{tabulate}}(). #' #' To average values within bins, see \code{\link{binMeans}}(). #' #' @keywords univar #' @export binCounts <- function(x, idxs = NULL, bx, right = FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.numeric(x)) { stop(sprintf("Argument '%s' is not numeric: %s", "x", mode(x))) } # Argument 'bx': if (!is.numeric(bx)) { stop(sprintf("Argument '%s' is not numeric: %s", "bx", mode(bx))) } if (any(is.infinite(bx))) { stop(sprintf("Argument '%s' must not contain infinite values", "bx")) } if (is.unsorted(bx)) { stop(sprintf("Argument '%s' is not ordered", "bx")) } # Apply subset if (!is.null(idxs)) x <- x[idxs] # Argument 'right': right <- as.logical(right) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Preprocessing of x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Drop missing values keep <- which(!is.na(x)) if (length(keep) < length(x)) { x <- x[keep] } keep <- NULL # Not needed anymore # Order x (by increasing x). # If 'x' is already sorted, the overhead of (re)sorting is # relatively small. x <- sort.int(x, method = "quick") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Bin # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- as.numeric(x) bx <- as.numeric(bx) .Call(C_binCounts, x, bx, right) } matrixStats/R/product.R0000644000176200001440000000023114063411361014574 0ustar liggesusers#' @rdname rowProds #' @export product <- function(x, idxs = NULL, na.rm = FALSE, ...) { .Call(C_productExpSumLog, x, idxs, as.logical(na.rm), TRUE) } matrixStats/MD50000644000176200001440000005422714121073612013114 0ustar liggesusersc5849977d8015840e88824d9724190a5 *DESCRIPTION 27bd90e8e917d72efbffe86ec0c70adf *NAMESPACE a20eaad11a6811bbdf19c1d679ba465d *NEWS 70319a2f175b64e4ff690389b5093a81 *R/000.DEPRECATION.R bb69907c26b811fcf67fddc0e9cd5b77 *R/999.package.R 22692aefe0cfed876cf940f760dfe1a8 *R/allocMatrix.R 18b4372040729ff34c6018f6ef974421 *R/anyMissing.R 029a1ceab8c720d15f9c8887bf580b18 *R/benchmark.R e1ab7f274317f2c22753ea86d5c59619 *R/binCounts.R d949de2d5a1007e9b53ab269a50dbe01 *R/binMeans.R 1ace0949c010003b716530dbbc9ba400 *R/diff2.R 347eadcaf24f2a10ab26e8142aeba913 *R/indexByRow.R 8953d8ff68c0269108adeb3bf473041e *R/logSumExp.R cb915487b0215f1ea4468a8ab9a12b21 *R/mean2.R 53451895bb9b2462bc6f658234faa87f *R/product.R 9da47eff39df6e33896014f79961fd5c *R/psortKM.R 96034b2205821c36b95b621201ad02c0 *R/rowAlls.R 87d4f7fb3c618784a640d617fe6ce077 *R/rowAvgsPerColSet.R 44c64b1ff90ada9af2a9de0d041c5bcf *R/rowCollapse.R 83cbd39e4c3169f30822625fb5890633 *R/rowCounts.R e645e43862b24d1ec86e8fe9795b6ad2 *R/rowCumsums.R 857d574f86352e60601575fb56585dbb *R/rowDiffs.R 426febb91d707973d70944eef694c0ec *R/rowIQRs.R 9ff8d243833d84cbd9d9d6dc63e99195 *R/rowLogSumExps.R a5cecdbb37aba55367d871a01ca75304 *R/rowMads.R dd0ce0646b2fede8cb6580c3447d676f *R/rowMeans2.R cfbad91fc59b4e748b1298934c5b2d44 *R/rowMedians.R 988d1d52d6f6bae3ce2c959b4191e7c0 *R/rowOrderStats.R 36f14956d08b330247184000fd2547d0 *R/rowProds.R c9484013e58d2bb3dd72bf06dc33f586 *R/rowQuantiles.R 2b883ebd286f2539e2dffef629d3b470 *R/rowRanges.R 8e0ce7553b71537604d0b9e56bbd5ba5 *R/rowRanks.R 520ad02eab1061babc643d78195be0f2 *R/rowSds.R 187ab997046c128c15ccd74f52660abc *R/rowSums2.R a360b119801ff6af30d476b3f45fd78c *R/rowTabulates.R 2ec2a88dd9feae40a45281e084556c3a *R/rowVars.R 649ba3bef570b03541b407d832c14212 *R/rowWeightedMeans.R 08a54ba7dda574f27a1d71615ca00b3b *R/rowWeightedMedians.R 797df1655bc7c5236c01e136c7b04b79 *R/signTabulate.R 7fbde9f738ca8e0378cdbe4a87bdbdbc *R/sum2.R d9295cede2b984c2ea6ff0a574986b67 *R/validateIndices.R 76c00469327cbde063a72f9a2346ff68 *R/varDiff.R 6194931ef15614d179a1ed5f5274e545 *R/weightedMad.R c250fd34f40c5fc816f505eeff9e4839 *R/weightedMean.R 707dcc1630616260447f26a2feb85721 *R/weightedMedian.R 5eb180345a8cd2cf989e9487bd5ee444 *R/weightedVar.R 2b996ad05b5cbb581e4af567a5f703cb *R/x_OP_y.R e0794de31f9d5ea53d722ed4ca435b40 *R/zzz.R 41f12faff92bb89daa64032a60a7770c *build/vignette.rds a2a17725f2a942b072007006713212f5 *inst/WORDLIST ca773a01fc2df607bef2bf6ee72f01fb *inst/benchmarking/R/random-matrices.R 68d9d75e6b36f6bd61569553be4cb133 *inst/benchmarking/R/random-vectors.R e00cf911d14b0c6bc13bb17b3dd62f1e *inst/benchmarking/allocMatrix.md.rsp 81d8eb603b80c27bed9570bd49ae03f0 *inst/benchmarking/allocVector.md.rsp 87c66c03ab530a473a7cfab448e30664 *inst/benchmarking/anyMissing.md.rsp f97927a62ed926d0092677057692449f *inst/benchmarking/anyMissing_subset.md.rsp 86bfbd81cb938e1caa98d01bfc32849c *inst/benchmarking/binCounts.md.rsp 17aa0aa0a6be8f9359f1cb0f09286db2 *inst/benchmarking/binCounts_subset.md.rsp 4e7fff2c598adc6b0982245a5542dfd9 *inst/benchmarking/binMeans.md.rsp 3797dd738454272a3431c4979b39c0c9 *inst/benchmarking/binMeans_subset.md.rsp 3a3911d3e2b396d9f55015baef8f04f5 *inst/benchmarking/colRowAlls.md.rsp ffe602c8b8a9e9ce2aab09ad6b3e37d8 *inst/benchmarking/colRowAlls_subset.md.rsp 23f7cd533e7631799278786bca210bf9 *inst/benchmarking/colRowAnyMissings.md.rsp 69ab6c0d6c537a30b5550ad5c4a231b2 *inst/benchmarking/colRowAnyMissings_subset.md.rsp 73f26ffb9340049cd3818c63b622a549 *inst/benchmarking/colRowAnys.md.rsp 0098b1085ada124e68e8dfa7f7d3620f *inst/benchmarking/colRowAnys_subset.md.rsp 40d79f5b1e5f42e33318a5bcd36a554d *inst/benchmarking/colRowCounts.md.rsp 313513c280d51f3638c2cd6eebf650d5 *inst/benchmarking/colRowCounts_subset.md.rsp 2b1c4e6a925ee6443499c01c846ea04b *inst/benchmarking/colRowCummins.md.rsp 5261ab39cdf0673af2d3589071f00fb2 *inst/benchmarking/colRowCummins_subset.md.rsp e619768942c419a9e0628d0ef324a914 *inst/benchmarking/colRowCumprods.md.rsp 6f6045c415f73cdceef948a6978f5707 *inst/benchmarking/colRowCumprods_subset.md.rsp a8697a83212a1d59887f547e097104b2 *inst/benchmarking/colRowCumsums.md.rsp efba595e480cbd813311cc298d5e2c88 *inst/benchmarking/colRowCumsums_subset.md.rsp 927737181751c970fae9963e4e4bc370 *inst/benchmarking/colRowDiffs.md.rsp 4e74122ee4454982b84331ef7d7951e9 *inst/benchmarking/colRowDiffs_subset.md.rsp c20505dc8b1d26b04899a0a627ec7bd3 *inst/benchmarking/colRowLogSumExps.md.rsp 505c90416b1dedd80ce017b7ebae6d9b *inst/benchmarking/colRowLogSumExps_subset.md.rsp 24d973f08bca1b563257c1dc1a10544a *inst/benchmarking/colRowMads.md.rsp 54cb05bb7c246cca315156a83cd8087d *inst/benchmarking/colRowMads_subset.md.rsp e4362ddc6881e551ad8609d15dfa5430 *inst/benchmarking/colRowMeans2.md.rsp 1625e937116e331749caaf0b6f3f23c7 *inst/benchmarking/colRowMeans2_subset.md.rsp 10cc2739e1485f274478af45cc535386 *inst/benchmarking/colRowMedians.md.rsp 2149f29d22e6f6a5ca0b3952ea4b9521 *inst/benchmarking/colRowMedians_subset.md.rsp fdc8aea9103c8f029188621562fbd985 *inst/benchmarking/colRowMins.md.rsp f8e85f562710a8ab093c7c64c8b88597 *inst/benchmarking/colRowMins_subset.md.rsp eb98ef33bed1925407cfcf6a2b133b5d *inst/benchmarking/colRowOrderStats.md.rsp ae91cc26681c8784f56d078a6983feb9 *inst/benchmarking/colRowOrderStats_subset.md.rsp 3d8008f33266df3de8da082280f93fa1 *inst/benchmarking/colRowProds.md.rsp 66699301fd6260e59762b488bf8c13fe *inst/benchmarking/colRowProds_subset.md.rsp c8bd43b47ecb77a45536af8f9ad39949 *inst/benchmarking/colRowQuantiles.md.rsp 7f84b255abdcf984e7d845a999662587 *inst/benchmarking/colRowQuantiles_subset.md.rsp 88c7df3b546ffb3b96dd82d7d7b584c1 *inst/benchmarking/colRowRanges.md.rsp 3513026d04a7983083fb4e8f06b99362 *inst/benchmarking/colRowRanges_subset.md.rsp f4f381ce25f6718d064cb9cffa89db68 *inst/benchmarking/colRowRanks.md.rsp 11291514300345f2ff5428b5a216d498 *inst/benchmarking/colRowRanks_subset.md.rsp ad61d0827bffb9751d6d2836b85387db *inst/benchmarking/colRowSums2.md.rsp 0b3508d6488f5f133dcdd3b98753d961 *inst/benchmarking/colRowSums2_subset.md.rsp 50b104a7f39a08ea581f0964907333e6 *inst/benchmarking/colRowTabulates.md.rsp 113d098fd3a2b8ccfa2657dc67c50d1b *inst/benchmarking/colRowTabulates_subset.md.rsp b7029ee5fb5966559b19a3ed574546f7 *inst/benchmarking/colRowVars.md.rsp fd0f2f8afa805375950d79ea1fd7757e *inst/benchmarking/colRowVars_subset.md.rsp 02b139cf5d9e2eb4abe496b26a745d76 *inst/benchmarking/colRowWeightedMeans.md.rsp 5643a1fd128eb081a40fa632404d87ee *inst/benchmarking/colRowWeightedMeans_subset.md.rsp bbaedf22f44a0a1d038d2e3b89826f37 *inst/benchmarking/colRowWeightedMedians.md.rsp 9545c448dd47ea1314617936fbf5fc81 *inst/benchmarking/colRowWeightedMedians_subset.md.rsp ca9f0ec0b3fdba6f089b42f284eafbae *inst/benchmarking/count.md.rsp 58aaf19b7253bb01ccca1de1719b9d9e *inst/benchmarking/count_subset.md.rsp ab9f5049c780d39eca80e084f60cf68e *inst/benchmarking/includes/appendix.md.rsp 75a4dbe1cebc11442ce3f8626cb2f786 *inst/benchmarking/includes/footer.md.rsp 80fec8731611547e148434c4a8af80a7 *inst/benchmarking/includes/header.md.rsp 126e65a1ededbfcac666238fd271a0dd *inst/benchmarking/includes/references.md.rsp e4f6f67e64fa5e2dc270cd443add0053 *inst/benchmarking/includes/results.md.rsp f2400acb9800a049f23305a37cac9db3 *inst/benchmarking/includes/setup.md.rsp b02cc2f4ed76e14478c85429397ac129 *inst/benchmarking/index.md.rsp b9c2a2f843b653034c1654bc12d48fe5 *inst/benchmarking/indexByRow.md.rsp e55144f01e221ec010b701b273e164cc *inst/benchmarking/logSumExp.md.rsp 0c1b2fa6b417e39be2da23c774ff663b *inst/benchmarking/logSumExp_subset.md.rsp 1a6af4356e4e9edb2a59e9632e6d4615 *inst/benchmarking/madDiff.md.rsp 07ad9f539a5cee89cd0bdad8175990cf *inst/benchmarking/madDiff_subset.md.rsp a4dbef70e5817836b15dcf085deb426d *inst/benchmarking/mean2.md.rsp 9f87d271ce0a2ec1387c304ccdb128d9 *inst/benchmarking/mean2_subset.md.rsp 1402880c69595363a1e3c3c985bc013f *inst/benchmarking/product.md.rsp 62d982e249e0d8cfee8729d35af302ef *inst/benchmarking/product_subset.md.rsp 9c94919d6a73a46b27b97673f771afe1 *inst/benchmarking/sum2.md.rsp 18bdbfc55c0bff412054f683c4f26342 *inst/benchmarking/sum2_subset.md.rsp b744777ba0af2a8a50e5d1c9207e7177 *inst/benchmarking/t_tx_OP_y.md.rsp 03905f14b790eb96b401d934d953cfbd *inst/benchmarking/t_tx_OP_y_subset.md.rsp e8c65eec84796fa104ebd81eaf06baee *inst/benchmarking/varDiff.md.rsp 7673948c3ce870b5a2f9d777e1e94a16 *inst/benchmarking/varDiff_subset.md.rsp 4056928e8440de8793c172c949b1a64d *inst/benchmarking/weightedMean.md.rsp 414e2188ce3206a01b280c3786d187b2 *inst/benchmarking/weightedMean_subset.md.rsp 0d6e67649cafa21f08f3405456d0365e *inst/benchmarking/weightedMedian.md.rsp fa6e9a4a08145bce053c6e4d774e81a0 *inst/benchmarking/weightedMedian_subset.md.rsp e68f83709396b851d7c53e36561ed6de *inst/benchmarking/x_OP_y.md.rsp 2dd5fa9d134fc7d05ded89f7d458b1eb *inst/benchmarking/x_OP_y_subset.md.rsp 8d81256a2cd25783c2af1dcd0d5b2efd *inst/doc/matrixStats-methods.html b5d58ec253e5089b3b1ca4887eab8707 *inst/doc/matrixStats-methods.md.rsp c15095e33173fb3d53787d941d897b38 *man/allocMatrix.Rd b929244cc9392f8ba41496f625dbcd1c *man/anyMissing.Rd 1597c2bff07ae712ad7f8ae2f4463bda *man/binCounts.Rd 1cc55791e600ccdb92fa85755afd1bd4 *man/binMeans.Rd 002193bcf08da4d3d4203baa818c5fc3 *man/diff2.Rd dfec8268244c7ba31e5f05259de042fa *man/indexByRow.Rd 54b2a88dd66b4375d970bb60494a0d0b *man/logSumExp.Rd d43c64f9b7c712624975ec95c1f35da8 *man/matrixStats-package.Rd 5931291ee15bb5e5db7dab9444422ccb *man/mean2.Rd d0792dacb6bbb00034580bdeccde3949 *man/rowAlls.Rd 023d7d19d51a9a5864586bf3ef5f2849 *man/rowAvgsPerColSet.Rd 99d3dd491afaf5fd052944a5bbf54a6b *man/rowCollapse.Rd 53ebac5e38102cece8e2a2b356aaf693 *man/rowCounts.Rd c8d80702a090f29a63590a3e9868d478 *man/rowCumsums.Rd 478375183e979a1422b70feda6100a87 *man/rowDiffs.Rd a63202e4868a762403be46351b409944 *man/rowIQRs.Rd 995f299c71d4537b35fbc8090c91fc93 *man/rowLogSumExps.Rd dcbf9ac516da470c7e23a639ea9056cd *man/rowMeans2.Rd 31808c4b19adefce2723bf140e00428e *man/rowMedians.Rd 6aa302100ecc4523141001c92f4c0af3 *man/rowOrderStats.Rd be8df3998d1314149298d750a2f83636 *man/rowProds.Rd 68be94a7113a6b53638ca3e6594a8087 *man/rowQuantiles.Rd 1d0658d294548925152327afa355fb9e *man/rowRanges.Rd 8c4ad0d73389472eacf64d535720f25e *man/rowRanks.Rd edf5d46a27f95115326b470da79f478a *man/rowSds.Rd 86e7c07feeb1b2b48fa425db6f578546 *man/rowSums2.Rd e99860dd4285c92dcceeae89e17b48f7 *man/rowTabulates.Rd 1113583a954837a11e7cb0ead4822f47 *man/rowVars.Rd 0e12968c98f58490658d92ac37850302 *man/rowWeightedMeans.Rd c1a50d54c3cdb14a2b17180217cd8dfc *man/rowWeightedMedians.Rd aa7b600a019f2582cd7428cc7f0398f4 *man/signTabulate.Rd 85f0956a77d87513e695be0c869fa769 *man/sum2.Rd 66638fde38a3708c7e8b14d498ba511d *man/validateIndices.Rd f712c88b690b9d109c55f43d5601ea2c *man/varDiff.Rd 5f371b9beb5bba1b942dbeb735cf0a71 *man/weightedMad.Rd bd94bd87f6d2a394d3c498fd2cb3227d *man/weightedMean.Rd 4baf1676a824eacb67ec185ab56d295e *man/weightedMedian.Rd ef18451192fecc7f1bae85fa94c19e58 *man/weightedVar.Rd 1431beb726a0bf4176c81eb79d0e3ca1 *man/x_OP_y.Rd 991fff984e618b4d6114077355f4b777 *src/000.api.h a9424c37b9a983226be0bd904fa70fc4 *src/000.init.c 868a4fcddfaaac11ab9baa14ad540ff7 *src/000.macros.h 3591d2ca94a33b65f00d043d8336c7d1 *src/000.templates-gen-matrix-vector.h 707b5b0c13bd4613ba117a0d4f04a80a *src/000.templates-gen-matrix.h 16aca8455b398420b74fc43d6726d0cc *src/000.templates-gen-vector.h e7ee8d95addf2b97b42dd8358b140cd1 *src/000.templates-types.h 78bbf931f308f8cf910cf70f646d4597 *src/000.templates-types_undef.h 1fe96f23b52c292f78943b4980eb0f33 *src/000.types.h ad0eb460a823c99bc125e5538abed0af *src/000.utils.h 4a1670c8cdf053e2aa559aac01864474 *src/allocMatrix2.c 6586ee6a479679fc15d50915cb456169 *src/anyMissing.c 179d034f589910fb1e5d9bcf56949470 *src/anyMissing_lowlevel.h 4e2981a7c579a838aa7ffe59c4602bac *src/anyMissing_lowlevel_template.h ec1fbc1b0025fafa1689bf21ed262ecd *src/binCounts.c f026702ce73cfca5e4c129595a871b8c *src/binCounts_lowlevel.h d972d21de1f4216d249fe461d3b87a53 *src/binCounts_lowlevel_template.h 7f24eaca486646513f0a6d7125362217 *src/binMeans.c a50741688f74c650961f648bd4c8bba6 *src/binMeans_lowlevel.h c58cc3cf0939d9f3346396493f36ad46 *src/binMeans_lowlevel_template.h f06365b8a24043e02e26dc2af205a752 *src/colCounts.c 6bff1d3ee1a2f6fc261f20fbe11a19c6 *src/colCounts_lowlevel.h 6b1754905f2cf8b6f0489e01b4368e5c *src/colCounts_lowlevel_template.h c11c024d1e28420f203a13c4ad95277a *src/colOrderStats.c d0492740e45af2e94f5e2b0ebbf840e6 *src/colOrderStats_lowlevel.h 953656060629b45b8ba473643eaba4a4 *src/colOrderStats_lowlevel_template.h 0356ce934de0ff52e833631dcffad230 *src/colRanges.c ddedbfd1ad65ed1f982f4817b6604107 *src/colRanges_lowlevel.h b1fae896fe8819fceccf26e34f6f956b *src/colRanges_lowlevel_template.h ff89a633c13c749090f907d8fa4269fb *src/diff2.c 4e80408ba0396c2fc20ca6377bee5e89 *src/diff2_lowlevel.h 6c00b603f8ea15cc00451fb92a1ee802 *src/diff2_lowlevel_template.h c15a708813c2abb534d54842047d9117 *src/indexByRow.c 8734d97e9cb707425267bbdd365c5dc6 *src/logSumExp.c 18b041e72633c98970002a39b667cd37 *src/logSumExp_lowlevel.h fdd0460e4a39074997d8f39b2cf8ae0f *src/logSumExp_lowlevel_template.h c4c6e8602caf9a985dc44c8e04808d80 *src/mean2.c 9b1bd3a848b5358e443a360bcb0de665 *src/mean2_lowlevel.h e22185d6de65d4cd08950061ba07863a *src/mean2_lowlevel_template.h ad1086404f725578bff53cec8caa5378 *src/naming.c 94d1aa5a1b9e874dd4dcbfddab971ca1 *src/naming.h 2014527d876dd291424fa0409d0ccc71 *src/productExpSumLog.c 854cc2afd4f6d7dbe81da1e5430f45ea *src/productExpSumLog_lowlevel.h 190075c907554d72dd9804c8e5b1ec75 *src/productExpSumLog_lowlevel_template.h a2e008bbd2037b65edec7e0d65724354 *src/psortKM.c 13e0f8028dddf264a40a4e3a94a354de *src/rowCounts.c f6eed3167155220e914d2d2f6243d323 *src/rowCounts_lowlevel.h df29c82d23b5944232029a6d0a46cda3 *src/rowCounts_lowlevel_template.h d9b999ce8d9705b79944875f1db11222 *src/rowCumMinMaxs_lowlevel_template.h 3d03ddf00f517d9c81998e1844bca4cb *src/rowCummaxs.c a519936ab68ec6edcaa8371ad9a9e89e *src/rowCummaxs_lowlevel.h a6e6d70de6797e4ba43e2c3bd265531f *src/rowCummins.c 9ab3f48106ed47948ce7726193d5f76d *src/rowCummins_lowlevel.h 495e55f9f2da6c4be888eb256b994cb7 *src/rowCumprods.c ffb9b064dd5e48dbc72c406e6f3f5068 *src/rowCumprods_lowlevel.h 06699368f3a4711001e370514acba2ee *src/rowCumprods_lowlevel_template.h f6760efe302019b65361550274d0e8ae *src/rowCumsums.c 3002113bff53509640c8f1d9577cf36f *src/rowCumsums_lowlevel.h 47c33f07f70c3d39801fb3e47ec792a7 *src/rowCumsums_lowlevel_template.h e54ac96137a0f3099e70c2b2e8d86c95 *src/rowDiffs.c d613aac5a99b495b7dddadfd9f072e76 *src/rowDiffs_lowlevel.h 142ec3726d55d5aa3abe6b4518d58359 *src/rowDiffs_lowlevel_template.h 11e331db48924ba3ce43da49eceff9dd *src/rowLogSumExp.c 64e4deabb7163ff1b12d0127d19ea960 *src/rowLogSumExp_lowlevel.h 3bae1884403042ed50f37ceed61fdeab *src/rowLogSumExp_lowlevel_template.h 6c5a949d976c22934e7d371459e74d35 *src/rowMads.c 5daeff7f63f5dab9cfaa5039f7e7a1e5 *src/rowMads_lowlevel.h 178c41ad54d5e612b89b21c157c30868 *src/rowMads_lowlevel_template.h daf82d4223bcb3a742d96e29e95e77b7 *src/rowMeans2.c 81ddd1ebb34b946f50b6e5cb5f87c490 *src/rowMeans2_lowlevel.h ac570e4fbe67a58cdecd7532a96cd52f *src/rowMeans2_lowlevel_template.h 37fd7d5981ec1454440ddb5152f84262 *src/rowMedians.c 2cf1b5439f7ecc541c7c2eeed4cde256 *src/rowMedians_lowlevel.h 58d7a2f6328d21f3582e5317a2f3f7f8 *src/rowMedians_lowlevel_template.h d791d77269651623e53b2a0c004e302e *src/rowOrderStats.c 8652eb4cd0030805139cb455984774ee *src/rowOrderStats_lowlevel.h 082b44d5aa35a6163b9705b377673f5e *src/rowOrderStats_lowlevel_template.h 594de4f560beb14edf5841519d2e6907 *src/rowRanges.c bf83e397419a9b046abacc61021bbf4d *src/rowRanges_lowlevel.h c1a970553068570726946f104a277f86 *src/rowRanges_lowlevel_template.h a3f62623cc5f6d3beec5910e58317ba0 *src/rowRanksWithTies.c 8bd7e83a6234fe6ce792316ce0842429 *src/rowRanksWithTies_lowlevel.h eba764fae48a0bd53f5b83c64d1e19f5 *src/rowRanksWithTies_lowlevel_template.h 8785d5f30312d1030ca70ff08e86a5ef *src/rowSums2.c d504136f492c5a48bb7b178ce9cbc4c4 *src/rowSums2_lowlevel.h 418cafbe3e93aa408e8dabec1a0b83e5 *src/rowSums2_lowlevel_template.h f6df020b0b64ffd9e512b889af5b5d1f *src/rowVars.c 9a98b18e3f3e4e9fd14dd45838b1d788 *src/rowVars_lowlevel.h 33315a062a273dd7bb1f1dc195352b37 *src/rowVars_lowlevel_template.h 849457441c8054b481a17c160939c22e *src/signTabulate.c dd3283b767cc8cf24dd806b789301773 *src/signTabulate_lowlevel.h 1919f4ac9bec204a8cd51c84155d7fe1 *src/signTabulate_lowlevel_template.h 5faa0d92ca915d92fecae07a4f9fda58 *src/sum2.c 48faa1f503b20ef2a19051ada554f6b4 *src/sum2_lowlevel.h 9aa68c01b897689e6da1466b79860278 *src/sum2_lowlevel_template.h 3742047a844af58b6f0aaa69e4f3ccc8 *src/validateIndices.c 673e1caf745e7aa9613f5e62dd292cea *src/validateIndices_lowlevel.h ea4a7b5c1eaa0c5625be4d386e397d1e *src/validateIndices_lowlevel_template.h d0f607c9ff05772847371090b0bbf95c *src/weightedMean.c 666b4aca5cc045ba4d6e66bc9a600f44 *src/weightedMean_lowlevel.h b1cbfcdda3c3a7f39acdd2d58cbc25ba *src/weightedMean_lowlevel_template.h 87661f30c66339cda3a72f31280d886b *src/weightedMedian.c 104f07df36a5cb01d9bae487420b6d5a *src/weightedMedian_lowlevel.h aebbd3dce8131c0277ab67550d63de90 *src/weightedMedian_lowlevel_template.h fafeb0e021133180556c7adee244a8aa *src/x_OP_y.c 2fcbecfc78f80e9a7adc872b2b2dbe4e *src/x_OP_y_lowlevel.h 26f59ffbe6ae3fa9e8f1d18ac55b11d4 *src/x_OP_y_lowlevel_template.h 631bd7ebf66662a7a83d3695cdd98149 *tests/allocArray.R 26b24f578ea01425c6575469f4858b05 *tests/allocMatrix.R 1c7839cfd44c26855ea1aa654105748d *tests/allocVector.R bd72ad4ad55040aaff979fdfbe2e593e *tests/anyMissing.R 67fb9105a0b3ace0bc15046f90bbd5da *tests/anyMissing_subset.R a1e2e2c6a832f0e0ce4fd3c55a7e2f0e *tests/benchmark.R feb57eb54180362c581b6fe53b500b29 *tests/binCounts.R 099a8ddd1b5e46d307a624d87fc12169 *tests/binCounts_subset.R c7f7cea187264d3fb7152ef5bb3e94d1 *tests/binMeans,binCounts.R e0b1c88a59eaf59571a6af74d26b55d8 *tests/binMeans,binCounts_subset.R 14f1b37ac91da690e225bad78a1a632b *tests/count.R 880706ea6c5cfc1d213df9ee5b758211 *tests/count_subset.R 5ed1b8bee9180c536393aa76f41d6914 *tests/diff2.R 1e614439e55c47e345637000600b32e3 *tests/diff2_subset.R 02750ad0adf3675733e3f1006a703a3b *tests/indexByRow.R 3367c853594ca5ff2cb145a7b4ba9670 *tests/logSumExp.R 4c911012825f2d15f5d8abd1abc411fb *tests/logSumExp_subset.R bc615a404d2aa4ac9ff678afbc2a8e67 *tests/mean2.R 028eee10d3e9da4822795bf96e47b1f4 *tests/mean2_subset.R d5c028c3973250bbfd46fde0f6201609 *tests/product.R e0bb296810a798ae8d481f2402e70846 *tests/product_subset.R 16228ebb15d78d1d8a80b9342e4c88d3 *tests/psortKM.R 641b838d3e550b221b031673ab76bed4 *tests/rowAllAnys.R a1070af32af420c9a9b5e5233e3d0779 *tests/rowAllAnys_subset.R df340cb87673d51c7f7912f4e42dddc6 *tests/rowAvgsPerColSet.R 0689281bc5dcadfb63cb6e4a8b9ca719 *tests/rowAvgsPerColSet_subset.R 287c0850b4cabfc4f4f3b026e5151002 *tests/rowCollapse.R 8cdb04910dc36151da77e7f863fcb75c *tests/rowCollapse_subset.R 5a789da826c23c27c4dd6ac2e757246e *tests/rowCounts.R 9041e5e028f0ad9f51cff5ef7a58f3ef *tests/rowCounts_subset.R 0005e4907c42f0fcbab3ff2963f2d7bd *tests/rowCumMinMaxs.R deb20df5f517177ede3bfb1457cb8125 *tests/rowCumMinMaxs_subset.R 6d782d8d8e8c5c12a15b2dc537f43500 *tests/rowCumprods.R 1798a9fbedb7ab4904fe64e2d0e04ed8 *tests/rowCumprods_subset.R cd4344231ca5ade8e08c26ea9a694e71 *tests/rowCumsums.R 9ef91ab7d05f6c0f4037a1afa707061c *tests/rowCumsums_subset.R c915e032e5b413456a6101d2ee10b045 *tests/rowDiffs.R c6522b849bb9e86ae53ca17e373732f0 *tests/rowDiffs_subset.R c16f0b9f75ba563d117e5815a30871af *tests/rowIQRs.R 5a82980886d9b7e98e5aaeb6cf8c94c1 *tests/rowIQRs_subset.R 81ecce1b63456567b996466f9ce4aa79 *tests/rowLogSumExps.R 63edde5a61ded37653a7524dfa1d0ae0 *tests/rowLogSumExps_subset.R 3b149ef3e3ead6a5ef25b9509d0910e8 *tests/rowMads.R dcb66846cdc9c61d169180181ed614d6 *tests/rowMads_subset.R 3397f5940474d1b921fcfef2d710d61e *tests/rowMeans2.R 15c7742aa9dcb1f5d96b499e041610df *tests/rowMeans2_subset.R c6c4f0629941f82d69c05fcbaf69ceee *tests/rowMedians.R a0e4e35498ba833f450cd54a11a9c7e8 *tests/rowMedians_subset.R 38cdc4fb138189d63091420ed661b31d *tests/rowOrderStats.R 9fcb13dd97c45643009cdc22a5a530c9 *tests/rowOrderStats_subset.R 204fae1b3585ee52f322eab58e3a4630 *tests/rowProds.R d8af498d32c126021015efc022cbb279 *tests/rowProds_subset.R 5aa4d27ac311a70533e23ad96f038dea *tests/rowQuantiles.R 52750c5afabc1e8575f5ea25a24e5268 *tests/rowQuantiles_subset.R c75fe4d1bb1b888b348cc591818172b6 *tests/rowRanges.R 2195f65bae9f3ac3a4a5baf5f2d88de5 *tests/rowRanges_subset.R d9d16aba677d6b1c48ddbec77f64ae18 *tests/rowRanks.R 3d345e7c598851edc7c736b127bd5bc9 *tests/rowRanks_subset.R bfcd1d137f69831f899e2d41eb98a084 *tests/rowSds.R 51fd0dd622c1064434e28127fcaf40ce *tests/rowSds_subset.R 750cbe2ca2c73f31fdae39e17cbe872c *tests/rowSums2.R 9a54734f107c1be66ce0f66f596f5250 *tests/rowSums2_subset.R d0f5b51b4de58e5823f50a9e115cc79b *tests/rowTabulates.R 33970467c1c4c2760e96a39f3b3c7c74 *tests/rowTabulates_subset.R a60a94b25db18dd33090aa943e0eb8e3 *tests/rowVarDiffs.R 5bc7e500c5233f593e1454b68f588924 *tests/rowVarDiffs_mad,iqr_subset.R 65f98c2b6054df4e9ed47b1ab886c360 *tests/rowVarDiffs_var,sd_subset.R 08f7efd73dfa195d4ae495c5c601589c *tests/rowVars.R feb9af881788e0fc9f596915b22e72c8 *tests/rowVars_subset.R cdba67911728c1ef60924f683fa52bcf *tests/rowWeightedMeans.R 543393ef080a5f9a9dd1b3c1ce58c789 *tests/rowWeightedMeans_subset.R 334a0c42ac21bd6cee60ee4952cd7d7f *tests/rowWeightedMedians.R c2f5a538f47e96334807a77682d55163 *tests/rowWeightedMedians_subset.R 05e47f121daabb16021a310716cb9658 *tests/rowWeightedVars.R ba7ad0ab872f300be6b5306b25fd56d1 *tests/rowWeightedVars_subset.R d3d529257f5e212fe44e4d7edec07f28 *tests/signTabulate.R fb3a48080d0a12e86997c1b96a9cf68c *tests/signTabulate_subset.R a45c9c4723df1e16ccc9849bf407b6a9 *tests/sum2.R 5b542504aabd2cf782b11d5987ca6b93 *tests/sum2_subset.R e0044a74c4d6585bd2071a8097128fbe *tests/utils/validateIndicesFramework.R cee5312ed8283da2ed42494dccdfb172 *tests/validateIndices.R 62dff0f6f8cd327315ab719bc2b4abdc *tests/varDiff_etal.R 776e5c2aac0f37ee8e9ee0bfd5625bf6 *tests/varDiff_etal_subset.R 7ae3041de11cbc9b7d7d990357ecb829 *tests/weightedMean.R f45aabcd7da19289147c9fe12dd41566 *tests/weightedMean_subset.R e5785aced77e842fe7c1d35781976bab *tests/weightedMedian.R 31af6e65986eed830fa17824e002b9bc *tests/weightedMedian_subset.R be4415d793eb8bd7a3d1bf5906abccb8 *tests/weightedVar.R 66f915b07f523fd74bc54cb2139c34b8 *tests/weightedVar_etal.R 115fd03bc3822bf1be54f36dc2cca212 *tests/weightedVar_etal_subset.R 53ebd8c7218cea409ff3e4a81bf812ae *tests/x_OP_y.R a32e4d6d831823488d07ae2b5e9117e3 *tests/x_OP_y_subset.R ae02684a77e833bf48a7a1e4b3b10dca *tests/zzz.package-unload.R b5d58ec253e5089b3b1ca4887eab8707 *vignettes/matrixStats-methods.md.rsp matrixStats/inst/0000755000176200001440000000000014120430675013554 5ustar liggesusersmatrixStats/inst/doc/0000755000176200001440000000000014120430675014321 5ustar liggesusersmatrixStats/inst/doc/matrixStats-methods.html0000644000176200001440000002243114120430674021174 0ustar liggesusers matrixStats: Summary of functions

matrixStats: Summary of functions

Henrik Bengtsson on NA

Location and scale estimators

Estimator Functions Example
Weighted sample mean weightedMean(), colWeightedMeans(), rowWeightedMeans() weightedMean(x, w); rowWeightedMeans(x, w)
Mean mean2(), colMeans2(), rowMeans2() mean2(x); rowMeans2(x)
Median median(), colMedians(), rowMedians() median(x); rowMedians(x)
Weighted median weightedMedian(), colWeightedMedians(), rowWeightedMedians() weightedMedian(x, w); rowWeightedMedians(x, w)
Sample variance var(), colVars(), rowVars() var(x); rowVars(x)
Weighted sample variance weightedVar(), colWeightedVars(), rowWeightedVars() weightedVar(x, w), rowWeightedVars(x, w)
Sample variance by n-order differences varDiff(), colVarDiffs(), rowVarDiffs() varDiff(x); rowVarDiffs(x)
Sample standard deviation sd(), colSds(), rowSds() sd(x); rowSds(x)
Weighted sample deviation weightedSd(), colWeightedSds(), rowWeightedSds() weightedSd(x, w), rowWeightedSds(x, w)
Sample standard deviation by n-order differences sdDiff(), colSdDiffs(), rowSdDiffs() sdDiff(x); rowSdDiffs(x)
Median absolute deviation (MAD) mad(), colMads(), rowMads() mad(x); rowMads(x)
Weighted median absolute deviation (MAD) weightedMad(), colWeightedMads(), rowWeightedMads() weightedMad(x, w), rowWeightedMads(x, w)
Median absolute deviation (MAD) by n-order differences madDiff(), colMadDiffs(), rowMadDiffs() madDiff(x); rowMadDiffs()
Quantile quantile(), colQuantiles(), rowQuantiles() quantile(x, probs); rowQuantiles(x, probs)
Interquartile range (IQR) iqr(), colIQRs(), rowIQRs() iqr(x); rowIQRs(x)
Interquartile range (IQR) by n-order differences iqrDiff(), colIQRDiffs(), rowIQRDiffs() iqrDiff(x); rowIQRDiffs(x)
Range range(), colRanges(), rowRanges() range(x); rowRanges(x)
Minimum min(), colMins(), rowMins() min(x); rowMins(x)
Maximum max(), colMaxs(), rowMaxs() max(x); rowMaxs(x)

Testing for and counting values

Operator Functions Example
Are there any missing values? anyMissing(), colAnyMissings(), rowAnyMissings() anyMissing(x); rowAnyMissings(x)
Does TRUE exists? any(), colAnys(), rowAnys() any(x); rowAnys(x)
Are all values TRUE? all(), colAlls(), rowAlls() all(x); rowAlls(x)
Does value exists? anyValue(), colAnys(), rowAnys() anyValue(x, value); rowAnys(x, value)
Do all elements have a given value? allValue(), colAlls(), rowAlls() allValue(x, value); rowAlls(x, value)
Number of occurrences of a value? count(), colCounts(), rowCounts() count(x, value); rowCounts(x, value)

Cumulative functions

Operator Functions Example
Cumulative sum cumsum(), colCumsums(), rowCumsums() cumsum(x); rowCumsums(x)
Cumulative product cumprod(), colCumprods(), rowCumprods() cumprod(x); rowCumprods(x)
Cumulative minimum cummin(), colCummins(), rowCummins() cummin(x); rowCummins(x)
Cumulative maximum cummax(), colCummaxs(), rowCummaxs() cummax(x); rowCummaxs(x)

Binning

Estimator Functions Example
Counts in disjoint bins binCounts() binCounts(x, bx)
Sample means (and counts) in disjoint bins binMeans() binMeans(y, x, bx)

Miscellaneous

Operation Functions Example
Sum sum2(), colSums2(), rowSums2() sum2(x); rowSums2(x)
Lagged differences diff2(), colDiffs(), rowDiffs() diff2(x), rowDiffs(x)

matrixStats v0.61.0. Release: CRAN, Development: GitHub.

matrixStats/inst/doc/matrixStats-methods.md.rsp0000644000176200001440000002132413615621102021427 0ustar liggesusers<%@meta language="R-vignette" content="-------------------------------- DIRECTIVES FOR R: %\VignetteIndexEntry{matrixStats: Summary of functions} %\VignetteAuthor{Henrik Bengtsson} %\VignetteKeyword{matrix} %\VignetteKeyword{vector} %\VignetteKeyword{apply} %\VignetteKeyword{rows} %\VignetteKeyword{columns} %\VignetteKeyword{memory} %\VignetteKeyword{speed} %\VignetteEngine{R.rsp::rsp} %\VignetteTangle{FALSE} --------------------------------------------------------------------"%> <% pkgName <- "matrixStats" library(pkgName, character.only=TRUE) ns <- getNamespace(pkgName) env <- as.environment(sprintf("package:%s", pkgName)) R.utils::use("R.utils") kable <- function(df, ...) { fcns <- as.character(df$Functions) fcns <- strsplit(fcns, split=",") fcns <- sapply(fcns, FUN=function(names) { names <- trim(names) ok <- sapply(names, FUN=exists, envir=ns, mode="function") names[ok] <- sprintf("%s()", names[ok]) names[!ok] <- sprintf("~~%s()~~", names[!ok]) names <- paste(names, collapse=", ") }) df$Functions <- fcns df$Example <- sprintf("`%s`", df$Example) print(knitr::kable(df, ..., format="markdown")) } # Find all functions all <- ls(envir=env) keep <- sapply(all, FUN=function(name) { is.function(get(name, envir=env)) }) all <- all[keep] keep <- !grepl("[.]([^.]*)$", all) all <- all[keep] # Hidden functions skip <- c("rowAvgsPerColSet", "colAvgsPerRowSet") skip <- c(skip, "allocArray", "allocMatrix", "allocVector") all <- setdiff(all, skip) # Column and row functions crfcns <- grep("^(col|row)", all, value=TRUE) # Vector functions vfcns <- setdiff(all, crfcns) %> # <%@meta name="title"%> <% pkg <- R.oo::Package(pkgName) %> <%@meta name="author"%> on <%=format(as.Date(pkg$date), format="%B %d, %Y")%> <% fcns <- crfcns base <- gsub("^(col|row)", "", fcns) groups <- tapply(fcns, base, FUN=list) stopifnot(all(sapply(groups, FUN=length) == 2L)) groups <- matrix(unlist(groups, use.names=FALSE), nrow=2L) %> <%--- ## Functions that apply to column and rows of matrices ``` <% print(fcns) %> ``` ---%> <% fcns <- vfcns %> <%--- ## Functions that apply to vectors ``` <% print(fcns) %> ``` ---%> ## Location and scale estimators <% tbl <- NULL row <- data.frame( "Estimator" = "Weighted sample mean", "Functions" = "weightedMean, colWeightedMeans, rowWeightedMeans", "Example" = "weightedMean(x, w); rowWeightedMeans(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Mean", "Functions" = "mean2, colMeans2, rowMeans2", "Example" = "mean2(x); rowMeans2(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median", "Functions" = "median, colMedians, rowMedians", "Example" = "median(x); rowMedians(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median", "Functions" = "weightedMedian, colWeightedMedians, rowWeightedMedians", "Example" = "weightedMedian(x, w); rowWeightedMedians(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance", "Functions" = "var, colVars, rowVars", "Example" = "var(x); rowVars(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample variance", "Functions" = "weightedVar, colWeightedVars, rowWeightedVars", "Example" = "weightedVar(x, w), rowWeightedVars(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance by n-order differences", "Functions" = "varDiff, colVarDiffs, rowVarDiffs", "Example" = "varDiff(x); rowVarDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation", "Functions" = "sd, colSds, rowSds", "Example" = "sd(x); rowSds(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample deviation", "Functions" = "weightedSd, colWeightedSds, rowWeightedSds", "Example" = "weightedSd(x, w), rowWeightedSds(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation by n-order differences", "Functions" = "sdDiff, colSdDiffs, rowSdDiffs", "Example" = "sdDiff(x); rowSdDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD)", "Functions" = "mad, colMads, rowMads", "Example" = "mad(x); rowMads(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median absolute deviation (MAD)", "Functions" = "weightedMad, colWeightedMads, rowWeightedMads", "Example" = "weightedMad(x, w), rowWeightedMads(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD) by n-order differences", "Functions" = "madDiff, colMadDiffs, rowMadDiffs", "Example" = "madDiff(x); rowMadDiffs()" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Quantile", "Functions" = "quantile, colQuantiles, rowQuantiles", "Example" = "quantile(x, probs); rowQuantiles(x, probs)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR)", "Functions" = "iqr, colIQRs, rowIQRs", "Example" = "iqr(x); rowIQRs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR) by n-order differences", "Functions" = "iqrDiff, colIQRDiffs, rowIQRDiffs", "Example" = "iqrDiff(x); rowIQRDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Range", "Functions" = "range, colRanges, rowRanges", "Example" = "range(x); rowRanges(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Minimum", "Functions" = "min, colMins, rowMins", "Example" = "min(x); rowMins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Maximum", "Functions" = "max, colMaxs, rowMaxs", "Example" = "max(x); rowMaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Testing for and counting values <% tbl <- NULL row <- data.frame( "Operator" = "Are there any missing values?", "Functions" = "anyMissing, colAnyMissings, rowAnyMissings", "Example" = "anyMissing(x); rowAnyMissings(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does TRUE exists?", "Functions" = "any, colAnys, rowAnys", "Example" = "any(x); rowAnys(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Are all values TRUE?", "Functions" = "all, colAlls, rowAlls", "Example" = "all(x); rowAlls(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does value exists?", "Functions" = "anyValue, colAnys, rowAnys", "Example" = "anyValue(x, value); rowAnys(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Do all elements have a given value?", "Functions" = "allValue, colAlls, rowAlls", "Example" = "allValue(x, value); rowAlls(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Number of occurrences of a value?", "Functions" = "count, colCounts, rowCounts", "Example" = "count(x, value); rowCounts(x, value)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Cumulative functions <% tbl <- NULL row <- data.frame( "Operator" = "Cumulative sum", "Functions" = "cumsum, colCumsums, rowCumsums", "Example" = "cumsum(x); rowCumsums(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative product", "Functions" = "cumprod, colCumprods, rowCumprods", "Example" = "cumprod(x); rowCumprods(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative minimum", "Functions" = "cummin, colCummins, rowCummins", "Example" = "cummin(x); rowCummins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative maximum", "Functions" = "cummax, colCummaxs, rowCummaxs", "Example" = "cummax(x); rowCummaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Binning <% tbl <- NULL row <- data.frame( "Estimator" = "Counts in disjoint bins", "Functions" = "binCounts", "Example" = "binCounts(x, bx)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample means (and counts) in disjoint bins", "Functions" = "binMeans", "Example" = "binMeans(y, x, bx)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Miscellaneous <% tbl <- NULL row <- data.frame( "Operation" = "Sum", "Functions" = "sum2, colSums2, rowSums2", "Example" = "sum2(x); rowSums2(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operation" = "Lagged differences", "Functions" = c("diff2, colDiffs, rowDiffs"), "Example" = "diff2(x), rowDiffs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ------------------------------------------------------------- <%=pkgName%> v<%=getVersion(pkg)%>. Release: [CRAN](https://cran.r-project.org/package=<%=pkgName%>), Development: [GitHub](<%=getUrl(pkg)%>). matrixStats/inst/benchmarking/0000755000176200001440000000000014005425634016205 5ustar liggesusersmatrixStats/inst/benchmarking/colRowMedians.md.rsp0000644000176200001440000000274713615621101022103 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMedians"%> <%@string rowname="rowMedians"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + median() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMedians = colMedians(X, na.rm = FALSE), "apply+median" = apply(X, MARGIN = 2L, FUN = median, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMedians = rowMedians(X, na.rm = FALSE), "apply+median" = apply(X, MARGIN = 1L, FUN = median, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowLogSumExps_subset.md.rsp0000644000176200001440000000336313615621101024151 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colLogSumExps"%> <%@string rowname="rowLogSumExps"%> <%@string fcnname="colRowLogSumExps_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colLogSumExps_X_S" = colLogSumExps(X_S, na.rm = FALSE), "colLogSumExps(X, rows, cols)" = colLogSumExps(X, rows = rows, cols = cols, na.rm = FALSE), "colLogSumExps(X[rows, cols])" = colLogSumExps(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowLogSumExps_X_S" = rowLogSumExps(X_S, na.rm = FALSE), "rowLogSumExps(X, cols, rows)" = rowLogSumExps(X, rows = cols, cols = rows, na.rm = FALSE), "rowLogSumExps(X[cols, rows])" = rowLogSumExps(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/mean2_subset.md.rsp0000644000176200001440000000310013615621101021704 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="mean2_subset"%> <%@string subname="mean2"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) ##data <- data[1:3] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "mean2_x_S" = mean2(x_S, refine = TRUE), "mean2_x_S_no_refine" = mean2(x_S, refine = FALSE), "mean2(x, idxs)" = mean2(x, idxs = idxs, refine = TRUE), "mean2_no_refine(x, idxs)" = mean2(x, idxs = idxs, refine = FALSE), "mean2(x[idxs])" = mean2(x[idxs], refine = TRUE), "mean2_no_refine(x[idxs])" = mean2(x[idxs], refine = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowWeightedMeans.md.rsp0000644000176200001440000000272613615621101023244 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colWeightedMeans"%> <%@string rowname="rowWeightedMeans"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + weighted.mean() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] w <- runif(nrow(X)) gc() colStats <- microbenchmark( colWeightedMeans = colWeightedMeans(X, w = w, na.rm = FALSE), "apply+weigthed.mean" = apply(X, MARGIN = 2L, FUN = weighted.mean, w = w, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowWeightedMeans = rowWeightedMeans(X, w = w, na.rm = FALSE), "apply+weigthed.mean" = apply(X, MARGIN = 1L, FUN = weighted.mean, w = w, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowOrderStats.md.rsp0000644000176200001440000000346413615621101022612 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colOrderStats"%> <%@string rowname="rowOrderStats"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> <% use("Biobase", how = "load") rowQ <- Biobase::rowQ %> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + quantile(..., type = 3L) * Biobase::rowQ() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() probs <- 0.3 which <- round(probs*nrow(X)) colStats <- microbenchmark( colOrderStats = colOrderStats(X, which = which, na.rm = FALSE), "apply+quantile" = apply(X, MARGIN = 2L, FUN = quantile, probs = probs, na.rm = FALSE, type = 3L), "rowQ(t(X))" = rowQ(t(X), which = which), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowOrderStats = rowOrderStats(X, which = which, na.rm = FALSE), "apply+quantile" = apply(X, MARGIN = 1L, FUN = quantile, probs = probs, na.rm = FALSE, type = 3L), rowQ = rowQ(X, which = which), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAnyMissings_subset.md.rsp0000644000176200001440000000344413615621101024347 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAnyMissings"%> <%@string rowname="rowAnyMissings"%> <%@string fcnname="colRowAnyMissings_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colAnyMissings_X_S" = colAnyMissings(X_S), "colAnyMissings(X, rows, cols)" = colAnyMissings(X, rows = rows, cols = cols), "colAnyMissings(X[rows, cols])" = colAnyMissings(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowAnyMissings_X_S" = rowAnyMissings(X_S), "rowAnyMissings(X, cols, rows)" = rowAnyMissings(X, rows = cols, cols = rows), "rowAnyMissings(X[cols, rows])" = rowAnyMissings(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMins.md.rsp0000644000176200001440000000357713615621101021433 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMins"%> <%@string rowname="rowMins"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + min() * lapply() + pmin() * lapply() + pmin.int() See also [StackOverflow:colMins?]. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMins = colMins(X, na.rm = FALSE), "apply+min" = apply(X, MARGIN = 2L, FUN = min, na.rm = FALSE), "lapply+pmin" = do.call(pmin, lapply(seq_len(nrow(X)), function(i) X[i, ])), "lapply+pmin.int" = do.call(pmin.int, lapply(seq_len(nrow(X)), function(i) X[i, ])), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMins = rowMins(X, na.rm = FALSE), "apply+min" = apply(X, MARGIN = 1L, FUN = min, na.rm = FALSE), "lapply+pmin" = do.call(pmin, lapply(seq_len(ncol(X)), function(i) X[, i])), "lapply+pmin.int" = do.call(pmin.int, lapply(seq_len(ncol(X)), function(i) X[, i])), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCumsums.md.rsp0000644000176200001440000000265313615621101022153 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCumsums"%> <%@string rowname="rowCumsums"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + cumsum() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colCumsums = colCumsums(X), "apply+cumsum" = apply(X, MARGIN = 2L, FUN = cumsum), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowCumsums = rowCumsums(X), "apply+cumsum" = apply(X, MARGIN = 1L, FUN = cumsum), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/weightedMean.md.rsp0000644000176200001440000000312013632742260021731 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="weightedMean"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-12-09"%> <%@include file="${header}"%> <% weighted.mean <- stats:::weighted.mean weighted.mean.default <- stats:::weighted.mean.default %> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * stats::weighted.mean() * stats:::weighted.mean.default() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] w <- runif(length(x)) gc() stats <- microbenchmark( "weightedMean" = weightedMean(x, w = w, na.rm = FALSE), "stats::weighted.mean" = weighted.mean(x, w = w, na.rm = FALSE), "stats:::weighted.mean.default" = weighted.mean.default(x, w = w, na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-12-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowProds_subset.md.rsp0000644000176200001440000000504313615621101023167 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colProds"%> <%@string rowname="rowProds"%> <%@string fcnname="colRowProds_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] })%> <% gc() %> <%=withCapture({ colStats <- microbenchmark( "colProds_X_S w/ direct" = colProds(X_S, method = "direct", na.rm = FALSE), "colProds_X_S w/ expSumLog" = colProds(X_S, method = "expSumLog", na.rm = FALSE), "colProds(X, rows, cols) w/ direct" = colProds(X, rows = rows, cols = cols, method = "direct", na.rm = FALSE), "colProds(X, rows, cols) w/ expSumLog" = colProds(X, rows = rows, cols = cols, method = "expSumLog", na.rm = FALSE), "colProds(X[rows, cols]) w/ direct" = colProds(X[rows, cols], method = "direct", na.rm = FALSE), "colProds(X[rows, cols]) w/ expSumLog" = colProds(X[rows, cols], method = "expSumLog", na.rm = FALSE), unit = "ms" ) })%> <%=withCapture({ X <- t(X) X_S <- t(X_S) })%> <% gc() %> <%=withCapture({ rowStats <- microbenchmark( "rowProds_X_S w/ direct" = rowProds(X_S, method = "direct", na.rm = FALSE), "rowProds_X_S w/ expSumLog" = rowProds(X_S, method = "expSumLog", na.rm = FALSE), "rowProds(X, cols, rows) w/ direct" = rowProds(X, rows = cols, cols = rows, method = "direct", na.rm = FALSE), "rowProds(X, cols, rows) w/ expSumLog" = rowProds(X, rows = cols, cols = rows, method = "expSumLog", na.rm = FALSE), "rowProds(X[cols, rows]) w/ direct" = rowProds(X[cols, rows], method = "direct", na.rm = FALSE), "rowProds(X[cols, rows]) w/ expSumLog" = rowProds(X[cols, rows], method = "expSumLog", na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/sum2_subset.md.rsp0000644000176200001440000000251313615621101021577 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="sum2_subset"%> <%@string subname="sum2"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) ##data <- data[1:3] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "sum2_x_S" = sum2(x_S), "sum2(x, idxs)" = sum2(x, idxs = idxs), "sum2(x[idxs])" = sum2(x[idxs]), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/binCounts_subset.md.rsp0000644000176200001440000000354613615621101022664 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="binCounts_subset"%> <%@string subname="binCounts"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-04"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Non-sorted simulated data ```r <%=withCapture({ set.seed(0xBEEF) nx <- 100e3 # Number of data points xmax <- 0.01*nx x <- runif(nx, min = 0, max = xmax) storage.mode(x) <- mode str(x) # Uniformely distributed bins nb <- 10e3 # Number of bins bx <- seq(from = 0, to = xmax, length.out = nb+1L) bx <- c(-1, bx, xmax+1) # indices for subsetting idxs <- sample.int(length(x), size = length(x)*0.7) })%> ``` ### Results <% benchmark <- function() { %> <% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %> <% mprintf("%s: %s\n", mode, dataLabel) %> ```r <%=withCapture({ x_S <- x[idxs] gc() stats <- microbenchmark( "binCounts_x_S" = binCounts(x_S, bx = bx), "binCounts(x, idxs)" = binCounts(x, idxs = idxs, bx = bx), "binCounts(x[idxs])" = binCounts(x[idxs], bx = bx), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% # Sanity checks n1 <- binCounts(x, idxs = idxs, bx = bx) n1r <- rev(binCounts(-x, idxs = idxs, bx = rev(-bx), right = TRUE)) stopifnot(identical(n1r, n1)) %> <% } # benchmark() %> <% benchmark() %> ### Sorted simulated data ```r <%=withCapture({ x <- sort(x) idxs <- sort(idxs) })%> ``` <% benchmark() %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-04 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/mean2.md.rsp0000644000176200001440000000436413615621101020334 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="mean2"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-02"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * mean() + [() * mean.default() + [() - avoids method dispatching as below ```r <%=withCapture({ mean2_R_v1 <- function(x, na.rm = FALSE, idxs) { mean(x[idxs], na.rm = na.rm) } })%> ``` and ```r <%=withCapture({ mean2_R_v2 <- function(x, na.rm = FALSE, idxs) { mean.default(x[idxs], na.rm = na.rm) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) ##data <- data[1:3] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "mean2" = mean2(x, refine = TRUE), "mean2_no_refine" = mean2(x, refine = FALSE), "mean" = mean(x), "mean.default" = mean.default(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(dataLabel, "all")) %> <% for (subset in c(0.2, 0.4, 0.8)) { %> #### A <%=sprintf("%g", 100*subset)%>% subset ```r <%=withCapture({ x <- data[[.dataLabel.]] subset idxs <- sort(sample(length(x), size = subset*length(x), replace = FALSE)) gc() stats <- microbenchmark( "mean2" = mean2(x, idxs = idxs, refine = TRUE), "mean2_no_refine" = mean2(x, idxs = idxs, refine = FALSE), "mean+[()" = mean2_R_v1(x, idxs = idxs), "mean.default+[()" = mean2_R_v2(x, idxs = idxs), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, subset)) %> <% } # for (subset in ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-02 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAnyMissings.md.rsp0000644000176200001440000000352113615621101022756 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAnyMissings"%> <%@string rowname="rowAnyMissings"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * colAnyMissings() and rowAnyMissings() * apply() + anyMissing() * colSums() + is.na() and rowSums() + is.na() where ```r <%=withCapture({ colAnyMissings <- function(x, ...) colAnys(x, value = NA) })%> ``` and ```r <%=withCapture({ rowAnyMissings <- function(x, ...) rowAnys(x, value = NA) })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colAnyMissings = colAnyMissings(X), "apply+anyMissing" = apply(X, MARGIN = 2L, FUN = anyMissing), colSums = is.na(colSums(X, na.rm = FALSE)), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowAnyMissings = rowAnyMissings(X), "apply+anyMissing" = apply(X, MARGIN = 1L, FUN = anyMissing), rowSums = is.na(rowSums(X, na.rm = FALSE)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowQuantiles.md.rsp0000644000176200001440000000273613615621101022466 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colQuantiles"%> <%@string rowname="rowQuantiles"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + quantile() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() probs <- seq(from = 0, to = 1, by = 0.25) colStats <- microbenchmark( colQuantiles = colQuantiles(X, probs = probs, na.rm = FALSE), "apply+quantile" = apply(X, MARGIN = 2L, FUN = quantile, probs = probs, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowQuantiles = rowQuantiles(X, probs = probs, na.rm = FALSE), "apply+quantile" = apply(X, MARGIN = 1L, FUN = quantile, probs = probs, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/x_OP_y.md.rsp0000644000176200001440000000404013615621101020516 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="x_OP_y"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * x_OP_y_R() as below ```r <%=withCapture({ x_OP_y_R <- function(x, y, OP, na.rm = FALSE) { if (na.rm) { xnok <- is.na(x) ynok <- is.na(y) anok <- xnok & ynok unit <- switch(OP, "+" = 0, "-" = NA_real_, "*" = 1, "/" = NA_real_, stop("Unknown 'OP' operator: ", OP) ) x[xnok] <- unit y[ynok] <- unit } ans <- switch(OP, "+" = x + y, "-" = x - y, "*" = x * y, "/" = x / y, stop("Unknown 'OP' operator: ", OP) ) if (na.rm) { ans[anok] <- NA_real_ } ans } # x_OP_y_R() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] y <- x[, 1L] })%> ``` <% for (OP in c("+", "-", "*", "/")) { %> <% OPTag <- c("+" = "add", "-" = "sub", "*" = "mul", "/" = "div")[OP] gc() %> ```r <%=withCapture({ OP stats <- microbenchmark( "x_OP_y" = x_OP_y(x, y, OP = OP, na.rm = FALSE), "x_OP_y_R" = x_OP_y_R(x, y, OP = OP, na.rm = FALSE), unit = "ms" ) gc() })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %> <% } # for (OP ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCumsums_subset.md.rsp0000644000176200001440000000336213615621101023536 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCumsums"%> <%@string rowname="rowCumsums"%> <%@string fcnname="colRowCumsums_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colCumsums_X_S" = colCumsums(X_S), "colCumsums(X, rows, cols)" = colCumsums(X, rows = rows, cols = cols), "colCummins(X[rows, cols])" = colCumsums(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowCumsums_X_S" = rowCumsums(X_S), "rowCumsums(X, cols, rows)" = rowCumsums(X, rows = cols, cols = rows), "rowCumsums(X[cols, rows])" = rowCumsums(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCumprods.md.rsp0000644000176200001440000000271013615621101022305 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCumprods"%> <%@string rowname="rowCumprods"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + cumprod() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode, range = c(-1, 1)) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colCumprods = colCumprods(X), "apply+cumprod" = apply(X, MARGIN = 2L, FUN = cumprod), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowCumprods = rowCumprods(X), "apply+cumprod" = apply(X, MARGIN = 1L, FUN = cumprod), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCounts_subset.md.rsp0000644000176200001440000000377513615621101023365 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCounts"%> <%@string rowname="rowCounts"%> <%@string fcnname="colRowCounts_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-04-18"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("logical", "integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> #### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] value <- 42 })%> ``` <% gc() %> ```r <%=withCapture({ colStats <- microbenchmark( "colCounts_X_S" = colCounts(X_S, value = value, na.rm = FALSE), "colCounts(X, rows, cols)" = colCounts(X, value = value, na.rm = FALSE, rows = rows, cols = cols), "colCounts(X[rows, cols])" = colCounts(X[rows, cols], value = value, na.rm = FALSE), unit = "ms" ) })%> ``` ```r <%=withCapture({ X <- t(X) X_S <- t(X_S) })%> ``` <% gc() %> ```r <%=withCapture({ rowStats <- microbenchmark( "rowCounts_X_S" = rowCounts(X_S, value = value, na.rm = FALSE), "rowCounts(X, cols, rows)" = rowCounts(X, value = value, na.rm = FALSE, rows = cols, cols = rows), "rowCounts(X[cols, rows])" = rowCounts(X[cols, rows], value = value, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-04-18 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowTabulates.md.rsp0000644000176200001440000000234713615621101022443 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colTabulates"%> <%@string rowname="rowTabulates"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * ??? ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "integer", range = c(-10, 10)) })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colTabulates = colTabulates(X, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowTabulates = rowTabulates(X, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowWeightedMedians_subset.md.rsp0000644000176200001440000000371513615621101025145 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colWeightedMedians"%> <%@string rowname="rowWeightedMedians"%> <%@string fcnname="colRowWeightedMedians_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%> on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] w <- runif(nrow(X)) w_S <- w[rows] gc() colStats <- microbenchmark( "colWeightedMedians_X_w_S" = colWeightedMedians(X_S, w = w_S, na.rm = FALSE), "colWeightedMedians(X, w, rows, cols)" = colWeightedMedians(X, w = w, rows = rows, cols = cols, na.rm = FALSE), "colWeightedMedians(X[rows, cols], w[rows])" = colWeightedMedians(X[rows, cols], w = w[rows], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowWeightedMedians_X_w_S" = rowWeightedMedians(X_S, w = w_S, na.rm = FALSE), "rowWeightedMedians(X, w, cols, rows)" = rowWeightedMedians(X, w = w, rows = cols, cols = rows, na.rm = FALSE), "rowWeightedMedians(X[cols, rows], w[rows])" = rowWeightedMedians(X[cols, rows], w = w[rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/madDiff_subset.md.rsp0000644000176200001440000000246413615621101022250 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="madDiff_subset"%> <%@string subname="madDiff"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "madDiff_x_S" = madDiff(x_S), "madDiff(x, idxs)" = madDiff(x, idxs = idxs), "madDiff(x[idxs])" = madDiff(x[idxs]), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowRanks.md.rsp0000644000176200001440000000300413615621101021564 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colRanks"%> <%@string rowname="rowRanks"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + rank() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colRanks = colRanks(X, na.rm = FALSE), "apply+rank" = apply(X, MARGIN = 2L, FUN = rank, na.last = "keep", ties.method = "max"), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowRanks = rowRanks(X, na.rm = FALSE), "apply+rank" = apply(X, MARGIN = 1L, FUN = rank, na.last = "keep", ties.method = "max"), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/x_OP_y_subset.md.rsp0000644000176200001440000000340613615621101022110 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="x_OP_y_subset"%> <%@string subname="x_OP_y"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] y <- x[, 1L] xrows <- sample.int(nrow(x), size = nrow(x)*0.7) xcols <- sample.int(ncol(x), size = ncol(x)*0.7) x_S <- x[xrows, xcols] yidxs <- xrows y_S <- y[yidxs] })%> ``` <% for (OP in c("+", "-", "*", "/")) { %> <% OPTag <- c("+" = "add", "-" = "sub", "*" = "mul", "/" = "div")[OP] gc() %> ```r <%=withCapture({ OP stats <- microbenchmark( "x_OP_y_x_y_S" = x_OP_y(x_S, y_S, OP = OP, na.rm = FALSE), "x_OP_y(x, y, OP, xrows, xcols, yidxs)" = x_OP_y(x, y, OP = OP, xrows = xrows, xcols = xcols, yidxs = yidxs, na.rm = FALSE), "x_OP_y(x[xrows, xcols], y[yidxs], OP)" = x_OP_y(x[xrows, xcols], y[yidxs], OP = OP, na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %> <% } # for (OP ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/allocMatrix.md.rsp0000644000176200001440000000410013615621101021575 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="allocMatrix"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * matrix() * matrix() special trick for NA where ```r <%=withCapture({ allocMatrix_R <- function(nrow, ncol, value = NA) { if (is.na(value) && !is.nan(value)) { matrix(data = value[c()], nrow = nrow, ncol = ncol) } else { matrix(data = value, nrow = nrow, ncol = ncol) } } # allocMatrix_R() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) values <- list(zero = 0, one = 1, "NA" = NA_real_) if (mode != "double") values <- lapply(values, FUN = function(x) { storage.mode(x) <- mode; x }) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> matrix <% for (value in values) { %> <% valueLabel <- as.character(value) mprintf("%s: %s, value=%s\n", mode, dataLabel, valueLabel) %> ```r <%=withCapture({ dim <- dim(data[[.dataLabel.]]) nrow <- dim[1L] ncol <- dim[2L] str(value) })%> ``` <% gc() %> ```r <%=withCapture({ stats <- microbenchmark( "allocMatrix" = allocMatrix(nrow = nrow, ncol = ncol, value = value), "matrix" = matrix(data = value, nrow = nrow, ncol = ncol), "allocMatrix_R" = allocMatrix_R(nrow = nrow, ncol = ncol, value = value), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, valueLabel)) %> <% } # for (value in values) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/product_subset.md.rsp0000644000176200001440000000237013615621101022372 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="product_subset"%> <%@string subname="product"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "product_x_S" = product(x_S, na.rm = FALSE), "product(x, idxs)" = product(x, idxs = idxs, na.rm = FALSE), "product(x[idxs])" = product(x[idxs], na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/weightedMedian_subset.md.rsp0000644000176200001440000000275413615621101023636 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="weightedMedian_subset"%> <%@string subname="weightedMedian"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> <% weightedMedian <- matrixStats::weightedMedian %> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:3] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] w <- runif(length(x)) w_S <- x[idxs] gc() stats <- microbenchmark( "weightedMedian_x_w_S" = weightedMedian(x_S, w = w_S, ties = "mean", na.rm = FALSE), "weightedMedian(x, w, idxs)" = weightedMedian(x, w = w, idxs = idxs, ties = "mean", na.rm = FALSE), "weightedMedian(x[idxs], w[idxs])" = weightedMedian(x[idxs], w = w[idxs], ties = "mean", na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCummins_subset.md.rsp0000644000176200001440000000336213615621101023515 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCummins"%> <%@string rowname="rowCummins"%> <%@string fcnname="colRowCummins_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colCummins_X_S" = colCummins(X_S), "colCummins(X, rows, cols)" = colCummins(X, rows = rows, cols = cols), "colCummins(X[rows, cols])" = colCummins(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowCummins_X_S" = rowCummins(X_S), "rowCummins(X, cols, rows)" = rowCummins(X, rows = cols, cols = rows), "rowCummins(X[cols, rows])" = rowCummins(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMeans2.md.rsp0000644000176200001440000000306613615621101021643 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMeans2"%> <%@string rowname="rowMeans2"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2017-03-31"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + mean() * .colMeans() and .rowMeans() * colMeans() and rowMeans() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMeans2 = colMeans2(X, na.rm = FALSE), .colMeans = .colMeans(X, m = nrow(X), n = ncol(X), na.rm = FALSE), colMeans = colMeans(X, na.rm = FALSE), "apply+mean" = apply(X, MARGIN = 2L, FUN = mean, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMeans2 = rowMeans2(X, na.rm = FALSE), .rowMeans = .rowMeans(X, m = nrow(X), n = ncol(X), na.rm = FALSE), rowMeans = rowMeans(X, na.rm = FALSE), "apply+mean" = apply(X, MARGIN = 1L, FUN = mean, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> matrixStats/inst/benchmarking/madDiff.md.rsp0000644000176200001440000000243613615621101020662 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="madDiff"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-10"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * N/A <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) data <- data[1:4] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] stats <- microbenchmark( "madDiff" = madDiff(x), "mad" = mad(x), "diff" = diff(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-10 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/binMeans_subset.md.rsp0000644000176200001440000000323013615621101022442 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="binMeans"%> <%@string subname="binMeans"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2014-06-05"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. ## Results ### Non-sorted simulated data ```r <%=withCapture({ nx <- 100e3 # Number of data points set.seed(0xBEEF) x <- runif(nx, min = 0, max = 1) y <- runif(nx, min = 0, max = 1) # Uniformely distributed bins nb <- 1e3 # Number of bins bx <- seq(from = 0, to = 1, length.out = nb+1L) bx <- c(-1, bx, 2) # indices for subsetting idxs <- sample.int(length(x), size = length(x)*0.7) })%> ``` <% benchmark <- function() { %> <% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %> <% message(dataLabel) %> ```r <%=withCapture({ x_S <- x[idxs] y_S <- y[idxs] gc() stats <- microbenchmark( "binMeans_x_y_S" = binMeans(x = x_S, y = y_S, bx = bx, count = TRUE), "binMeans(x, y, idxs)" = binMeans(x = x, y = y, idxs = idxs, bx = bx, count = TRUE), "binMeans(x[idxs], y[idxs])" = binMeans(x = x[idxs], y = y[idxs], bx = bx, count = TRUE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # benchmark() %> <% benchmark() %> ### Sorted simulated data ```r <%=withCapture({ x <- sort(x) idxs <- sort(idxs) })%> ``` <% benchmark() %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-05 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/includes/0000755000176200001440000000000013632742260020015 5ustar liggesusersmatrixStats/inst/benchmarking/includes/setup.md.rsp0000644000176200001440000000236413615621101022276 0ustar liggesusers<%@string header="includes/header.md.rsp"%> <%@string footer="includes/footer.md.rsp"%> <%@string references="../includes/references.md.rsp"%> <%@string appendix="${appendix}" default="true"%> <%@string colname=""%> <%@string rowname=""%> <%@string fcnname=""%> <%@string fcntags=""%> <%@meta author="Henrik Bengtsson"%> <%-------------------------------------------------------------- RSP specific --------------------------------------------------------------%> <% R.utils::use("R.utils, R.devices (>= 2.12.0), knitr, ggplot2") devOptions("png", width=390) options("withCapture/newline"=FALSE) options(deparse.cutoff=100) kable <- function(...) { t <- knitr::kable(..., format="markdown") print(t) } %> <%-------------------------------------------------------------- Report/package specific --------------------------------------------------------------%> <% use("matrixStats") use("microbenchmark") %> <%@include file="results.md.rsp"%> <%-------------------------------------------------------------- Macros --------------------------------------------------------------%> <%-------------------------------------------------------------- Timing --------------------------------------------------------------%> <% rspStartTime <- Sys.time() %> matrixStats/inst/benchmarking/includes/references.md.rsp0000644000176200001440000000121713632742260023264 0ustar liggesusers<%--------------------------------------------------------------- REFERENCES ---------------------------------------------------------------%> [RSP]: https://cran.r-project.org/package=R.rsp [matrixStats]: https://cran.r-project.org/package=matrixStats [StackOverflow:colMins?]: https://stackoverflow.com/questions/13676878 "Stack Overflow: fastest way to get Min from every column in a matrix?" [StackOverflow:colSds?]: https://stackoverflow.com/questions/17549762 "Stack Overflow: Is there such 'colsd' in R?" [StackOverflow:rowProds?]: https://stackoverflow.com/questions/20198801/ "Stack Overflow: Row product of matrix and column sum of matrix" matrixStats/inst/benchmarking/includes/results.md.rsp0000644000176200001440000001242613632742260022650 0ustar liggesusers<%-------------------------------------------------------------- BENCHMARK RESULTS --------------------------------------------------------------%> <%-------------------------------------------------------------- Local functions --------------------------------------------------------------%> <% toImage <- function(stats, name=levels(stats$expr)[1L], tags=NULL, ylim="auto", col=NULL, alpha=NULL, ...) { %> <% # Replace spaces in name with hypen, e.g. ' w/ direct' -> '-w/-direct' name <- gsub(" ", "-", name, fixed=TRUE) # Drop any forward slashes in name, e.g. ' w/ ' -> ' w ' name <- gsub("/", "", name, fixed=TRUE) # Drop any spaces in tags, e.g. 'n = 1000' -> 'n=1000' tags <- gsub(" ", "", tags, fixed=TRUE) cat("\n") %> ![](<%=toPNG(name, tags=c(tags, "benchmark"), aspectRatio=2/3, { if (identical(ylim, "auto")) { y <- stats$time/1e6 ymax <- max(y, na.rm=TRUE) y75 <- quantile(y, probs=0.75, na.rm=TRUE) yupper <- min(c(1.5*y75, ymax), na.rm=TRUE) ylim <- c(0, yupper) } if (!is.null(ylim)) { stats$outlier <- (stats$time > ylim[2]*1e6) stats$time[stats$outlier] <- ylim[2]*1e6 } gg <- ggplot(data=stats, aes(x=seq_along(time)/length(levels(expr)), y=time/1e6)) gg <- gg + geom_point(aes(colour=expr, shape=outlier)) gg <- gg + scale_shape_manual(values=c(16,4), guide="none") if (!is.null(col)) gg <- gg + scale_colour_manual(values=col) if (!is.null(alpha)) gg <- gg + scale_alpha_manual(values=alpha) gg <- gg + xlab("iteration") + ylab("time (ms)") if (!is.null(ylim)) gg <- gg + ylim(ylim) print(gg) })%>) <% } # toImage() %> <% toTable <- function(stats, tags=NULL, order="median", ...) { kable({ s <- summary(stats) s$neval <- NULL s$cld <- NULL s <- s[order(s[[order]]),] s }, row.names=TRUE) kable({ s <- summary(stats, unit="relative") s$neval <- NULL s$cld <- NULL s <- s[order(s[[order]]),] s }, row.names=TRUE) cat("\n") } %> <%-------------------------------------------------------------- Benchmark results for vector functions --------------------------------------------------------------%> <% benchmarkResults <- function(stats, tags=NULL, ...) { %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. The top panel shows times in milliseconds and the bottom panel shows relative times._ <% toTable(stats, tags=tags) %> _Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. Outliers are displayed as crosses. Times are in milliseconds._ <% toImage(stats, tags=tags) %> <% } # benchmarkResults() %> <%-------------------------------------------------------------- Benchmark results for col- and row-specific functions --------------------------------------------------------------%> <% crBenchmarkResults <- function(colStats, rowStats=NULL, tags=NULL, ...) { %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(colStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. The top panel shows times in milliseconds and the bottom panel shows relative times._ <% toTable(colStats, tags=tags) %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(rowStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (transposed). The top panel shows times in milliseconds and the bottom panel shows relative times._ <% if (!is.null(rowStats)) { toTable(rowStats, tags=tags) } %> _Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(colStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data <% if (!is.null(rowStats)) { %> as well as <%=hpaste(sprintf("%s()", levels(rowStats$expr)), lastCollapse=" and ")%> on the same data transposed<% } # if (!is.null(rowStats)) %>. Outliers are displayed as crosses. Times are in milliseconds._ <% y <- c(colStats$time, rowStats$time)/1e6 ymax <- max(y, na.rm=TRUE) y75 <- quantile(y, probs=0.75, na.rm=TRUE) yupper <- min(c(1.5*y75, ymax), na.rm=TRUE) ylim <- c(0, yupper) %> <% toImage(colStats, tags=tags, ylim=ylim) %> <% if (!is.null(rowStats)) toImage(rowStats, tags=tags, ylim=ylim) %> <% if (!is.null(rowStats)) { %> <% # Compare performance or the column- and the row-specific methods # for the "main" function. stats <- list(colStats, rowStats) stats <- lapply(stats, FUN=function(x) { level <- levels(x$expr)[1] x <- subset(x, expr %in% level) x$expr <- factor(as.character(x$expr)) x }) stats <- Reduce(rbind, stats) odd <- seq(from=1L, to=nrow(stats), by=2L) top <- 1:(nrow(stats)/2) stats0 <- stats stats[ odd,] <- stats0[ top,] stats[-odd,] <- stats0[-top,] %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (original and transposed). The top panel shows times in milliseconds and the bottom panel shows relative times._ <% toTable(stats, tags=tags) %> _Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (original and transposed). Outliers are displayed as crosses. Times are in milliseconds._ <% toImage(stats, name=paste(levels(stats$expr), collapse="_vs_"), tags=tags, col=c("#000000", "#999999")) %> <% } # if (!is.null(rowStats)) %> <% } # crBenchmarkResults() %> matrixStats/inst/benchmarking/includes/appendix.md.rsp0000644000176200001440000000067513602473116022760 0ustar liggesusers## Appendix ### Session information ```r <% print(sessionInfo()) %> ``` Total processing time was <%=rspDuration <- round(Sys.time()-rspStartTime, digits=2)%> <%=attr(rspDuration, "units")%>. ### Reproducibility To reproduce this report, do: ```r <%@ifeq fcnname=""%><%@string fcnname="${colname}"%><%@endif%> html <- matrixStats:::benchmark('<%@string name="fcnname"%>'<%@ifneq fcntags=""%>, tags='<%@string name="fcntags"%>'<%@endif%>) ``` matrixStats/inst/benchmarking/includes/header.md.rsp0000644000176200001440000000011213322430442022354 0ustar liggesusers[matrixStats]: Benchmark report --------------------------------------- matrixStats/inst/benchmarking/includes/footer.md.rsp0000644000176200001440000000136313602473116022441 0ustar liggesusers<%--------------------------------------------------------------- Page footer ---------------------------------------------------------------%> <%@string appendix="${appendix}" default="false"%> <%@ifeq appendix="true"%> <%@include file="appendix.md.rsp"%> <%@endif%> <%@include file="${references}"%> --------------------------------------- Copyright <%@meta name="author"%>. Last updated on <%=format(Sys.time(), format="%Y-%m-%d %H:%M:%S (%z UTC)")%>. Powered by [RSP]. <%--------------------------------------------------------------- Dynamically insert an HTML favicon ---------------------------------------------------------------%> <%=toFavicon({ plot(1, col="blue", bg="yellow", pch=21, cex=4, lwd=4, axes=FALSE) }, force=FALSE)%> matrixStats/inst/benchmarking/anyMissing_subset.md.rsp0000644000176200001440000000251313615621101023032 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="anyMissing_subset"%> <%@string subname="anyMissing"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-04"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "anyMissing_x_S" = anyMissing(x_S), "anyMissing(x, idxs)" = anyMissing(x, idxs = idxs), "anyMissing(x[idxs])" = anyMissing(x[idxs]), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-04 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAlls_subset.md.rsp0000644000176200001440000000310113615621101022764 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAlls"%> <%@string rowname="rowAlls"%> <%@string fcnname="colRowAlls_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "logical") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colAlls_X_S" = colAlls(X_S), "colAlls(X, rows, cols)" = colAlls(X, rows = rows, cols = cols), "colAlls(X[rows, cols])" = colAlls(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowAlls_X_S" = rowAlls(X_S), "rowAlls(X, cols, rows)" = rowAlls(X, rows = cols, cols = rows), "rowAlls(X[cols, rows])" = rowAlls(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowWeightedMeans_subset.md.rsp0000644000176200001440000000366113615621101024630 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colWeightedMeans"%> <%@string rowname="rowWeightedMeans"%> <%@string fcnname="colRowWeightedMeans_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] w <- runif(nrow(X)) w_S <- w[rows] gc() colStats <- microbenchmark( "colWeightedMeans_X_w_S" = colWeightedMeans(X_S, w = w_S, na.rm = FALSE), "colWeightedMeans(X, w, rows, cols)" = colWeightedMeans(X, w = w, rows = rows, cols = cols, na.rm = FALSE), "colWeightedMeans(X[rows, cols], w[rows])" = colWeightedMeans(X[rows, cols], w = w[rows], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowWeightedMeans_X_w_S" = rowWeightedMeans(X_S, w = w_S, na.rm = FALSE), "rowWeightedMeans(X, w, cols, rows)" = rowWeightedMeans(X, w = w, rows = cols, cols = rows, na.rm = FALSE), "rowWeightedMeans(X[cols, rows], w[rows])" = rowWeightedMeans(X[cols, rows], w = w[rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/t_tx_OP_y_subset.md.rsp0000644000176200001440000000343613615621101022622 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="t_tx_OP_y_subset"%> <%@string subname="t_tx_OP_y"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] y <- x[, 1L] xrows <- sample.int(nrow(x), size = nrow(x)*0.7) xcols <- sample.int(ncol(x), size = ncol(x)*0.7) x_S <- x[xrows, xcols] yidxs <- xrows y_S <- y[yidxs] })%> ``` <% for (OP in c("+", "-", "*", "/")) { %> <% OPTag <- c("+" = "add", "-" = "sub", "*" = "mul", "/" = "div")[OP] gc() %> ```r <%=withCapture({ OP stats <- microbenchmark( "t_tx_OP_y_x_y_S" = t_tx_OP_y(x_S, y_S, OP = OP, na.rm = FALSE), "t_tx_OP_y(x, y, OP, xrows, xcols, yidxs)" = t_tx_OP_y(x, y, OP = OP, xrows = xrows, xcols = xcols, yidxs = yidxs, na.rm = FALSE), "t_tx_OP_y(x[xrows, xcols], y[yidxs], OP)" = t_tx_OP_y(x[xrows, xcols], y[yidxs], OP = OP, na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %> <% } # for (OP ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/allocVector.md.rsp0000644000176200001440000000422213615621101021600 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="allocVector"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * vector() + assignment * rep() * matrix() + as.vector() where ```r <%=withCapture({ allocVector_R1 <- function(length, value = NA) { x <- vector(mode = typeof(value), length = length) if (!is.finite(value) || value != 0) x[] <- value x } # allocVector_R1() allocVector_R2 <- function(length, value = NA) { x <- matrix(data = value, nrow = length, ncol = 1L) as.vector(x) } # allocVector_R2() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) values <- list(zero = 0, one = 1, "NA" = NA_real_) if (mode != "double") values <- lapply(values, FUN = function(x) { storage.mode(x) <- mode; x }) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> matrix <% for (value in values) { %> <% valueLabel <- as.character(value) mprintf("%s: %s, value=%s\n", mode, dataLabel, valueLabel) %> ```r <%=withCapture({ n <- length(data[[.dataLabel.]]) str(value) })%> ``` <% gc() %> ```r <%=withCapture({ stats <- microbenchmark( "allocVector" = allocVector(length = n, value = value), "rep" = rep(value, times = n), "allocVector_R1" = allocVector_R1(length = n, value = value), "allocVector_R2" = allocVector_R2(length = n, value = value), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, valueLabel)) %> <% } # for (value in values) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAnys.md.rsp0000644000176200001440000000256613615621101021434 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAnys"%> <%@string rowname="rowAnys"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-18"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + any() * colSums() > 0 or rowSums() > 0 ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "logical") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colAnys = colAnys(X), "apply+any" = apply(X, MARGIN = 2L, FUN = any), "colSums > 0" = (colSums(X) > 0L), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowAnys = rowAnys(X), "apply+any" = apply(X, MARGIN = 1L, FUN = any), "rowSums > 0" = (rowSums(X) > 0L), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-18 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowVars.md.rsp0000644000176200001440000000567614005425634021451 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colVars"%> <%@string rowname="rowVars"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2015-01-06"%> <%@include file="${header}"%> <%@string test_genefilter="TRUE"%> <%@ifeq test_genefilter="TRUE"%> <% use("genefilter", how = "load") genefilter_rowVars <- genefilter::rowVars genefilter_colVars <- function(x, ...) genefilter_rowVars(t(x), ...) %> <%@endif%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + var() * colVarColMeans() and rowVarColMeans() <%@ifeq test_genefilter="TRUE"%> * genefilter::rowVars(t(.)) and genefilter::rowVars() <%@endif%> where ```r <%=withCapture({ colVarColMeans <- function(x, na.rm = TRUE) { if (na.rm) { n <- colSums(!is.na(x)) } else { n <- nrow(x) } var <- colMeans(x*x, na.rm = na.rm) - (colMeans(x, na.rm = na.rm))^2 var * n/(n-1) } })%> ``` and ```r <%=withCapture({ rowVarRowMeans <- function(x, na.rm = TRUE) { if (na.rm) { n <- rowSums(!is.na(x)) } else { n <- ncol(x) } mu <- rowMeans(x, na.rm = na.rm) var <- rowMeans(x*x, na.rm = na.rm) - mu^2 var * (n/(n-1)) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colVars = colVars(X, na.rm = FALSE), colVarsCenter = colVars(X, center = colMeans(X, na.rm = FALSE), na.rm = FALSE), colVarColMeans = colVarColMeans(X, na.rm = FALSE), "apply+var" = apply(X, MARGIN = 2L, FUN = var, na.rm = FALSE), <%@ifeq test_genefilter="TRUE"%> "genefilter::rowVars(t(.))" = genefilter_colVars(X, na.rm = FALSE), <%@endif%> unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowVars = rowVars(X, na.rm = FALSE), rowVarsCenter = rowVars(X, center = rowMeans(X, na.rm = FALSE), na.rm = FALSE), rowVarRowMeans = rowVarRowMeans(X, na.rm = FALSE), "apply+var" = apply(X, MARGIN = 1L, FUN = var, na.rm = FALSE), <%@ifeq test_genefilter="TRUE"%> "genefilter::rowVars" = genefilter_rowVars(X, na.rm = FALSE), <%@endif%> unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-01-06 o Now benchmarking 'genefilter' functions too. 2014-11-23 o Now benchmarking rowVars() instead of rowSds() since the latter uses the former. 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCounts.md.rsp0000644000176200001440000000340213615621101021763 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCounts"%> <%@string rowname="rowCounts"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * colSums() and rowSums() * apply() + sum() <% for (mode in c("logical", "integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> #### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] value <- 42 })%> ``` <% gc() %> ```r <%=withCapture({ colStats <- microbenchmark( colCounts = colCounts(X, value = value, na.rm = FALSE), colSums = colSums(X == value, na.rm = FALSE), "apply+sum" = apply(X, MARGIN = 2L, FUN = function(x) sum(x == value, na.rm = FALSE)), unit = "ms" ) })%> ``` ```r <%=withCapture({ X <- t(X) })%> ``` <% gc() %> ```r <%=withCapture({ rowStats <- microbenchmark( rowCounts = rowCounts(X, value = value, na.rm = FALSE), rowSums = rowSums(X == value, na.rm = FALSE), "apply+sum" = apply(X, MARGIN = 1L, FUN = function(x) sum(x == value, na.rm = FALSE)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/weightedMean_subset.md.rsp0000644000176200001440000000276513615621101023323 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="weightedMean_subset"%> <%@string subname="weightedMean"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] w <- runif(length(x)) w_S <- w[idxs] gc() stats <- microbenchmark( "weightedMean_x_w_S" = weightedMean(x_S, w = w_S, na.rm = FALSE), "weightedMean(x, w, idxs)" = weightedMean(x, w = w, idxs = idxs, na.rm = FALSE), "weightedMean(x[idxs], w[idxs])" = weightedMean(x[idxs], w = w[idxs], na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/varDiff_subset.md.rsp0000644000176200001440000000252513615621101022275 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="varDiff_subset"%> <%@string subname="varDiff"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) data <- data[1:4] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "varDiff_x_S" = varDiff(x_S), "varDiff(x, idxs)" = varDiff(x, idxs = idxs), "varDiff(x[idxs])" = varDiff(x[idxs]), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMeans2_subset.md.rsp0000644000176200001440000000320513615621101023223 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMeans2"%> <%@string rowname="rowMeans2"%> <%@string fcnname="colRowMeans2_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2017-03-31"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colMeans2_X_S" = colMeans2(X_S, na.rm = FALSE), "colMeans2(X, rows, cols)" = colMeans2(X, rows = rows, cols = cols, na.rm = FALSE), "colMeans2(X[rows, cols])" = colMeans2(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowMeans2_X_S" = rowMeans2(X_S, na.rm = FALSE), "rowMeans2(X, cols, rows)" = rowMeans2(X, rows = cols, cols = rows, na.rm = FALSE), "rowMeans2(X[cols, rows])" = rowMeans2(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> matrixStats/inst/benchmarking/colRowCumprods_subset.md.rsp0000644000176200001440000000342413615621101023675 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCumprods"%> <%@string rowname="rowCumprods"%> <%@string fcnname="colRowCumprods_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode, range = c(-1, 1)) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colCumprods_X_S" = colCumprods(X_S), "colCumprods(X, rows, cols)" = colCumprods(X, rows = rows, cols = cols), "colCumprods(X[rows, cols])" = colCumprods(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowCumprods_X_S" = rowCumprods(X_S), "rowCumprods(X, cols, rows)" = rowCumprods(X, rows = cols, cols = rows), "rowCumprods(X[cols, rows])" = rowCumprods(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowTabulates_subset.md.rsp0000644000176200001440000000337113615621101024026 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colTabulates"%> <%@string rowname="rowTabulates"%> <%@string fcnname="colRowTabulates_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "integer", range = c(-10, 10)) })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colTabulates_X_S" = colTabulates(X_S, na.rm = FALSE), "colTabulates(X, rows, cols)" = colTabulates(X, rows = rows, cols = cols, na.rm = FALSE), "colTabulates(X[rows, cols])" = colTabulates(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowTabulates_X_S" = rowTabulates(X_S, na.rm = FALSE), "rowTabulates(X, cols, rows)" = rowTabulates(X, rows = cols, cols = rows, na.rm = FALSE), "rowTabulates(X[cols, rows])" = rowTabulates(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCummins.md.rsp0000644000176200001440000000265313615621101022132 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCummins"%> <%@string rowname="rowCummins"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + cummin() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colCummins = colCummins(X), "apply+cummin" = apply(X, MARGIN = 2L, FUN = cummin), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowCummins = rowCummins(X), "apply+cummin" = apply(X, MARGIN = 1L, FUN = cummin), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMads_subset.md.rsp0000644000176200001440000000343713615621101022771 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMads"%> <%@string rowname="rowMads"%> <%@string fcnname="colRowMads_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colMads_X_S" = colMads(X_S, na.rm = FALSE), "colMads(X, rows, cols)" = colMads(X, rows = rows, cols = cols, na.rm = FALSE), "colMads(X[rows, cols])" = colMads(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowMads_X_S" = rowMads(X_S, na.rm = FALSE), "rowMads(X, cols, rows)" = rowMads(X, rows = cols, cols = rows, na.rm = FALSE), "rowMads(X[cols, rows])" = rowMads(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAlls.md.rsp0000644000176200001440000000257413615621101021414 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAlls"%> <%@string rowname="rowAlls"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-18"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + all() * colSums() == n or rowSums() == n ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "logical") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colAlls = colAlls(X), "apply+all" = apply(X, MARGIN = 2L, FUN = all), "colSums==n" = (colSums(X) == nrow(X)), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowAlls = rowAlls(X), "apply+all" = apply(X, MARGIN = 1L, FUN = all), "rowSums==n" = (rowSums(X) == ncol(X)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-18 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowDiffs_subset.md.rsp0000644000176200001440000000332513615621101023134 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colDiffs"%> <%@string rowname="rowDiffs"%> <%@string fcnname="colRowDiffs_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colDiffs_X_S" = colDiffs(X_S), "colDiffs(X, rows, cols)" = colDiffs(X, rows = rows, cols = cols), "colDiffs(X[rows, cols])" = colDiffs(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowDiffs_X_S" = rowDiffs(X_S), "rowDiffs(X, cols, rows)" = rowDiffs(X, rows = cols, cols = rows), "rowDiffs(X[cols, rows])" = rowDiffs(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/anyMissing.md.rsp0000644000176200001440000000254613615621101021453 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="anyMissing"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-01"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * anyNA() * any() + is.na() as below ```r <%=withCapture({ any_is.na <- function(x) { any(is.na(x)) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "anyMissing" = anyMissing(x), "anyNA" = anyNA(x), "any_is.na" = any_is.na(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMads.md.rsp0000644000176200001440000000422313615621101021376 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMads"%> <%@string rowname="rowMads"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-18"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + mad() * colMads2() and rowMads2() where `rowMads2()` and `colMads2()` are: ```r <%=withCapture({ rowMads2 <- function(x, const = 1.4826, na.rm = FALSE) { mu <- rowMedians(x, na.rm = na.rm) x <- abs(x - mu) mad <- rowMedians(x, na.rm = FALSE) const * mad } colMads2 <- function(x, const = 1.4826, na.rm = FALSE) { mu <- colMedians(x, na.rm = na.rm) x <- abs(x - mu) mad <- colMedians(x, na.rm = FALSE) const * mad } })%> ``` <% rowMads_R <- function(x, na.rm = FALSE) { apply(x, MARGIN = 1L, FUN = mad, na.rm = na.rm) } colMads_R <- function(x, na.rm = FALSE) { apply(x, MARGIN = 2L, FUN = mad, na.rm = na.rm) } %> <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMads = colMads(X, na.rm = FALSE), colMads2 = colMads2(X, na.rm = FALSE), "apply+mad" = apply(X, MARGIN = 2L, FUN = mad, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMads = rowMads(X, na.rm = FALSE), rowMads2 = rowMads2(X, na.rm = FALSE), "apply+mad" = apply(X, MARGIN = 1L, FUN = mad, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-17 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowOrderStats_subset.md.rsp0000644000176200001440000000400113615621101024163 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colOrderStats"%> <%@string rowname="rowOrderStats"%> <%@string fcnname="colRowOrderStats_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() probs <- 0.3 which <- round(probs*nrow(X)) colStats <- microbenchmark( "colOrderStats_X_S" = colOrderStats(X_S, which = which, na.rm = FALSE), "colOrderStats(X, rows, cols)" = colOrderStats(X, rows = rows, cols = cols, which = which, na.rm = FALSE), "colOrderStats(X[rows, cols])" = colOrderStats(X[rows, cols], which = which, na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowOrderStats_X_S" = rowOrderStats(X_S, which = which, na.rm = FALSE), "rowOrderStats(X, cols, rows)" = rowOrderStats(X, rows = cols, cols = rows, which = which, na.rm = FALSE), "rowOrderStats(X[cols, rows])" = rowOrderStats(X[cols, rows], which = which, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/product.md.rsp0000644000176200001440000000323313615621101021004 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="product"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-04"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * product_R() * prod() where ```r <%=withCapture({ product_R <- function(x, na.rm = FALSE, ...) { # Nothing todo? if (length(x) == 0L) return(0); # Any missing values? if (na.rm) { x <- x[!is.na(x)]; } # Any zeros? if (is.integer(x) && any(x == 0)) return(0); # Calculate product via logarithmic sum sign <- if (sum(x < 0) %% 2 == 0) +1 else -1; x <- abs(x); x <- log(x); x <- sum(x, na.rm = FALSE); x <- exp(x); y <- sign*x; y; } # product_R() })%> ``` ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( product = product(x, na.rm = FALSE), product_R = product_R(x, na.rm = FALSE), prod = prod(x, na.rm = FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-02 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMedians_subset.md.rsp0000644000176200001440000000351413615621101023461 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMedians"%> <%@string rowname="rowMedians"%> <%@string fcnname="colRowMedians_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colMedians_X_S" = colMedians(X_S, na.rm = FALSE), "colMedians(X, rows, cols)" = colMedians(X, rows = rows, cols = cols, na.rm = FALSE), "colMedians(X[rows, cols])" = colMedians(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowMedians_X_S" = rowMedians(X_S, na.rm = FALSE), "rowMedians(X, cols, rows)" = rowMedians(X, rows = cols, cols = rows, na.rm = FALSE), "rowMedians(X[cols, rows])" = rowMedians(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowProds.md.rsp0000644000176200001440000000356113615621101021605 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colProds"%> <%@string rowname="rowProds"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-15"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * <%=colname%>()/<%=rowname%>() using method="expSumLog" * apply() + prod() * apply() + product() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] })%> <% gc() %> <%=withCapture({ colStats <- microbenchmark( "colProds w/ direct" = colProds(X, method = "direct", na.rm = FALSE), "colProds w/ expSumLog" = colProds(X, method = "expSumLog", na.rm = FALSE), "apply+prod" = apply(X, MARGIN = 2L, FUN = prod, na.rm = FALSE), "apply+product" = apply(X, MARGIN = 2L, FUN = product, na.rm = FALSE), unit = "ms" ) })%> <%=withCapture({ X <- t(X) })%> <% gc() %> <%=withCapture({ rowStats <- microbenchmark( "rowProds w/ direct" = rowProds(X, method = "direct", na.rm = FALSE), "rowProds w/ expSumLog" = rowProds(X, method = "expSumLog", na.rm = FALSE), "apply+prod" = apply(X, MARGIN = 1L, FUN = prod, na.rm = FALSE), "apply+product" = apply(X, MARGIN = 1L, FUN = product, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowVars_subset.md.rsp0000644000176200001440000000344113632742260023024 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colVars"%> <%@string rowname="rowVars"%> <%@string fcnname="colRowVars_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colVars_X_S" = colVars(X_S, na.rm = FALSE), "colVars(X, rows, cols)" = colVars(X, rows = rows, cols = cols, na.rm = FALSE), "colVars(X[rows, cols])" = colVars(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowVars_X_S" = rowVars(X_S, na.rm = FALSE), "rowVars(X, cols, rows)" = rowVars(X, rows = cols, cols = rows, na.rm = FALSE), "rowVars(X[cols, rows])" = rowVars(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/logSumExp.md.rsp0000644000176200001440000000247413615621101021255 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="logSumExp"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-01"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * logSumExp_R() where ```r <%=withCapture({ logSumExp_R <- function(lx, ...) { iMax <- which.max(lx) log1p(sum(exp(lx[-iMax] - lx[iMax]))) + lx[iMax] } # logSumExp_R() })%> ``` ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "logSumExp" = logSumExp(x), "logSumExp_R" = logSumExp_R(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowQuantiles_subset.md.rsp0000644000176200001440000000355113615621101024047 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colQuantiles"%> <%@string rowname="rowQuantiles"%> <%@string fcnname="colRowQuantiles_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() probs <- seq(from = 0, to = 1, by = 0.25) colStats <- microbenchmark( "colQuantiles_X_S" = colQuantiles(X_S, probs = probs, na.rm = FALSE), "colQuantiles(X, rows, cols)" = colQuantiles(X, rows = rows, cols = cols, probs = probs, na.rm = FALSE), "colQuantiles(X[rows, cols])" = colQuantiles(X[rows, cols], probs = probs, na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowQuantiles_X_S" = rowQuantiles(X_S, probs = probs, na.rm = FALSE), "rowQuantiles(X, cols, rows)" = rowQuantiles(X, rows = cols, cols = rows, probs = probs, na.rm = FALSE), "rowQuantiles(X[cols, rows])" = rowQuantiles(X[cols, rows], probs = probs, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/index.md.rsp0000644000176200001440000000413613632742260020447 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@meta title="Benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-05"%> <%@include file="${header}"%> <% use("matrixStats") use("R.utils (>= 1.34.0)") ## Assert that all third-party packages to be benchmarked ## are available and can be loaded/installed already here R.utils::use("cwhmisc, ergm, laeken, genefilter", how = "load") # Simple logging function lenter <- function(...) { s <- mprintf(...) s <- gsub("[.][.][.](\n)*", "...done\\1", s) lexit <<- function() mprintf(s) } %> # <%@meta name="title"%> List of benchmark report for some of the functions available in the [matrixStats] package. <% path <- cmdArg(path = system.file("benchmarking", package = "matrixStats")) path <- getAbsolutePath(path) message("Processing benchmark report directory: ", path) pattern <- "[.]md[.]rsp$" filenames <- list.files(path = path, pattern = pattern) filenames <- setdiff(filenames, "index.md.rsp") names <- gsub(pattern, "", filenames) # col- and rowAnyMissing() does not really exist names <- setdiff(names, c("colAnyMissing", "rowAnyMissing")) message("Number of reports found: ", length(names)) mprintf("Report #%d: %s\n", seq_along(names), names) %> <% for (ii in seq_along(names)) { %> <% name <- names[ii] if (regexpr("^colRow", name) != -1L) { label <- gsub("^colRow", "", name) label <- sprintf("col%s() and row%s()", label, label) } else { label <- sprintf("%s()", name) } %> * [<%=label%>](<%={ lenter("%d of %d. Benchmarking %s...\n", ii, length(names), label) html <- sprintf("%s.html", name) if (!file_test("-f", html)) { html <- matrixStats:::benchmark(name, path = path, workdir = ".", envir = new.env()) html <- getRelativePath(html) gc() } lexit() html }%>) <% } # for (ii ...) %> ## Appendix To reproduce this page and all of its reports, do: ```r path <- system.file("benchmarking", package = "matrixStats") R.rsp::rfile("index.md.rsp", path = path) ``` _Note: Each of the above reports takes up to several minutes to complete._ <%@string appendix="false"%> <%@include file="${footer}"%> <%@string appendix="true"%> matrixStats/inst/benchmarking/colRowLogSumExps.md.rsp0000644000176200001440000000335713615621101022567 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colLogSumExps"%> <%@string rowname="rowLogSumExps"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + matrixStats::logSumExp() * apply() + logSumExp0() where ```r <%=withCapture({ logSumExp0 <- function(lx, ...) { iMax <- which.max(lx) log1p(sum(exp(lx[-iMax] - lx[iMax]))) + lx[iMax] } # logSumExp0() })%> ``` ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colLogSumExps = colLogSumExps(X, na.rm = FALSE), "apply+logSumExp" = apply(X, MARGIN = 2L, FUN = logSumExp, na.rm = FALSE), "apply+logSumExp0" = apply(X, MARGIN = 2L, FUN = logSumExp0, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowLogSumExps = rowLogSumExps(X, na.rm = FALSE), "apply+logSumExp" = apply(X, MARGIN = 1L, FUN = logSumExp, na.rm = FALSE), "apply+logSumExp0" = apply(X, MARGIN = 1L, FUN = logSumExp0, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowRanges_subset.md.rsp0000644000176200001440000000347613615621101023327 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colRanges"%> <%@string rowname="rowRanges"%> <%@string fcnname="colRowRanges_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colRanges_X_S" = colRanges(X_S, na.rm = FALSE), "colRanges(X, rows, cols)" = colRanges(X, rows = rows, cols = cols, na.rm = FALSE), "colRanges(X[rows, cols])" = colRanges(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowRanges_X_S" = rowRanges(X_S, na.rm = FALSE), "rowRanges(X, cols, rows)" = rowRanges(X, rows = cols, cols = rows, na.rm = FALSE), "rowRanges(X[cols, rows])" = rowRanges(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowRanks_subset.md.rsp0000644000176200001440000000345713615621101023165 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colRanks"%> <%@string rowname="rowRanks"%> <%@string fcnname="colRowRanks_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colRanks_X_S" = colRanks(X_S, na.rm = FALSE), "colRanks(X, rows, cols)" = colRanks(X, rows = rows, cols = cols, na.rm = FALSE), "colRanks(X[rows, cols])" = colRanks(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowRanks_X_S" = rowRanks(X_S, na.rm = FALSE), "rowRanks(X, cols, rows)" = rowRanks(X, rows = cols, cols = rows, na.rm = FALSE), "rowRanks(X[cols, rows])" = rowRanks(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/R/0000755000176200001440000000000013615621101016377 5ustar liggesusersmatrixStats/inst/benchmarking/R/random-matrices.R0000644000176200001440000000202513615621101021606 0ustar liggesusersrmatrix <- function(nrow, ncol, mode = c("logical", "double", "integer", "index"), range = c(-100, +100), na_prob = 0) { mode <- match.arg(mode) n <- nrow * ncol if (mode == "logical") { x <- sample(c(FALSE, TRUE), size = n, replace = TRUE) } else if (mode == "index") { x <- seq_len(n) mode <- "integer" } else { x <- runif(n, min = range[1], max = range[2]) } storage.mode(x) <- mode if (na_prob > 0) x[sample(n, size = na_prob * n)] <- NA dim(x) <- c(nrow, ncol) x } rmatrices <- function(scale = 10, seed = 1, ...) { set.seed(seed) data <- list() data[[1]] <- rmatrix(nrow = scale * 1, ncol = scale * 1, ...) data[[2]] <- rmatrix(nrow = scale * 10, ncol = scale * 10, ...) data[[3]] <- rmatrix(nrow = scale * 100, ncol = scale * 1, ...) data[[4]] <- t(data[[3]]) data[[5]] <- rmatrix(nrow = scale * 10, ncol = scale * 100, ...) data[[6]] <- t(data[[5]]) names(data) <- sapply(data, FUN = function(x) paste(dim(x), collapse = "x")) data } matrixStats/inst/benchmarking/R/random-vectors.R0000644000176200001440000000142013615621101021462 0ustar liggesusersrvector <- function(n, mode = c("logical", "double", "integer"), range = c(-100, +100), na_prob = 0) { mode <- match.arg(mode) if (mode == "logical") { x <- sample(c(FALSE, TRUE), size = n, replace = TRUE) } else { x <- runif(n, min = range[1], max = range[2]) } storage.mode(x) <- mode if (na_prob > 0) x[sample(n, size = na_prob * n)] <- NA x } # rvector() rvectors <- function(scale = 10, seed = 1, ...) { set.seed(seed) data <- list() data[[1]] <- rvector(n = scale * 1e2, ...) data[[2]] <- rvector(n = scale * 1e3, ...) data[[3]] <- rvector(n = scale * 1e4, ...) data[[4]] <- rvector(n = scale * 1e5, ...) data[[5]] <- rvector(n = scale * 1e6, ...) names(data) <- sprintf("n = %d", sapply(data, FUN = length)) data } matrixStats/inst/benchmarking/colRowSums2_subset.md.rsp0000644000176200001440000000316613615621101023115 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colSums2"%> <%@string rowname="rowSums2"%> <%@string fcnname="colRowSums2_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2017-03-31"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colSums2_X_S" = colSums2(X_S, na.rm = FALSE), "colSums2(X, rows, cols)" = colSums2(X, rows = rows, cols = cols, na.rm = FALSE), "colSums2(X[rows, cols])" = colSums2(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowSums2_X_S" = rowSums2(X_S, na.rm = FALSE), "rowSums2(X, cols, rows)" = rowSums2(X, rows = cols, cols = rows, na.rm = FALSE), "rowSums2(X[cols, rows])" = rowSums2(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> matrixStats/inst/benchmarking/colRowDiffs.md.rsp0000644000176200001440000000312713615621101021547 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colDiffs"%> <%@string rowname="rowDiffs"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-12-30"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + diff() * apply() + diff2() * diff() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colDiffs = colDiffs(X), "apply+diff" = apply(X, MARGIN = 2L, FUN = diff), "apply+diff2" = apply(X, MARGIN = 2L, FUN = diff2), diff = diff(X), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowDiffs = rowDiffs(X), "apply+diff" = apply(X, MARGIN = 1L, FUN = diff), "apply+diff2" = apply(X, MARGIN = 1L, FUN = diff2), "diff + t" = diff(t(X)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-17 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowWeightedMedians.md.rsp0000644000176200001440000000274513615621101023562 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colWeightedMedians"%> <%@string rowname="rowWeightedMedians"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + weightedMedian() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] w <- runif(nrow(X)) gc() colStats <- microbenchmark( colWeightedMedians = colWeightedMedians(X, w = w, na.rm = FALSE), "apply+weigthedMedian" = apply(X, MARGIN = 2L, FUN = weightedMedian, w = w, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowWeightedMedians = rowWeightedMedians(X, w = w, na.rm = FALSE), "apply+weigthedMedian" = apply(X, MARGIN = 1L, FUN = weightedMedian, w = w, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAnys_subset.md.rsp0000644000176200001440000000310113615621101023003 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAnys"%> <%@string rowname="rowAnys"%> <%@string fcnname="colRowAnys_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = "logical") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colAnys_X_S" = colAnys(X_S), "colAnys(X, rows, cols)" = colAnys(X, rows = rows, cols = cols), "colAnys(X[rows, cols])" = colAnys(X[rows, cols]), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowAnys_X_S" = rowAnys(X_S), "rowAnys(X, cols, rows)" = rowAnys(X, rows = cols, cols = rows), "rowAnys(X[cols, rows])" = rowAnys(X[cols, rows]), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-06 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/binCounts.md.rsp0000644000176200001440000000367713615621101021304 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="binCounts"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-05-25"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * hist() as below ```r <%=withCapture({ hist <- graphics::hist binCounts_hist <- function(x, bx, right = FALSE, ...) { hist(x, breaks = bx, right = right, include.lowest = TRUE, plot = FALSE)$counts } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Non-sorted simulated data ```r <%=withCapture({ set.seed(0xBEEF) nx <- 100e3 # Number of data points xmax <- 0.01*nx x <- runif(nx, min = 0, max = xmax) storage.mode(x) <- mode str(x) # Uniformely distributed bins nb <- 10e3 # Number of bins bx <- seq(from = 0, to = xmax, length.out = nb+1L) bx <- c(-1, bx, xmax+1) })%> ``` ### Results <% benchmark <- function() { %> <% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %> <% mprintf("%s: %s\n", mode, dataLabel) %> ```r <%=withCapture({ gc() stats <- microbenchmark( binCounts = binCounts(x, bx = bx), hist = binCounts_hist(x, bx = bx), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% # Sanity checks n0 <- binCounts_hist(x, bx = bx) n1 <- binCounts(x, bx = bx) stopifnot(identical(n1, n0)) n1r <- rev(binCounts(-x, bx = rev(-bx), right = TRUE)) stopifnot(identical(n1r, n1)) %> <% } # benchmark() %> <% benchmark() %> ### Sorted simulated data ```r <%=withCapture({ x <- sort(x) })%> ``` <% benchmark() %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-02 o Restructured. 2014-05-25 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/logSumExp_subset.md.rsp0000644000176200001440000000233213615621101022633 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="logSumExp_subset"%> <%@string subname="logSumExp"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "logSumExp_x_S" = logSumExp(x_S), "logSumExp(x, idxs)" = logSumExp(x, idxs = idxs), "logSumExp(x[idxs])" = logSumExp(x[idxs]), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/binMeans.md.rsp0000644000176200001440000000376213615621101021067 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="binMeans"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-04"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * binMeans_R() which is defined as ```r <%=withCapture({ binMeans_R <- function(y, x, bx, na.rm = FALSE, count = TRUE, right = FALSE) { B <- length(bx)-1L res <- double(B) counts <- integer(B) # For each bin... for (kk in seq_len(B)) { if (right) { idxs <- which(bx[kk] < x & x <= bx[kk+1L]) } else { idxs <- which(bx[kk] <= x & x < bx[kk+1L]) } yKK <- y[idxs] muKK <- mean(yKK) res[kk] <- muKK counts[kk] <- length(idxs) } # for (kk ...) if (count) attr(res, "count") <- counts res } # binMeans_R() })%> ``` ## Results ### Non-sorted simulated data ```r <%=withCapture({ nx <- 10e3 # Number of data points set.seed(0xBEEF) x <- runif(nx, min = 0, max = 1) y <- runif(nx, min = 0, max = 1) # Uniformely distributed bins nb <- 1e3 # Number of bins bx <- seq(from = 0, to = 1, length.out = nb+1L) bx <- c(-1, bx, 2) })%> ``` <% benchmark <- function() { %> <% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %> <% message(dataLabel) %> ```r <%=withCapture({ gc() stats <- microbenchmark( binMeans = binMeans(x = x, y = y, bx = bx, count = TRUE), binMeans_R = binMeans_R(x = x, y = y, bx = bx, count = TRUE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # benchmark() %> <% benchmark() %> ### Sorted simulated data ```r <%=withCapture({ x <- sort(x) })%> ``` <% benchmark() %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-02 o Restructured. 2014-05-25 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/count_subset.md.rsp0000644000176200001440000000250313615621101022040 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="count_subset"%> <%@string subname="count"%> <%@meta title="${subname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-07"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=subname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) value <- 42 %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] idxs <- sample.int(length(x), size = length(x)*0.7) x_S <- x[idxs] gc() stats <- microbenchmark( "count_x_S" = count(x_S, value), "count(x, idxs)" = count(x, idxs = idxs, value), "count(x[idxs])" = count(x[idxs], value), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/count.md.rsp0000644000176200001440000000240513615621101020454 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="count"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-12-08"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * sum(x == value) <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] value <- 42 gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "count" = count(x, value), "sum(x == value)" = sum(x == value), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowRanges.md.rsp0000644000176200001440000000275113615621101021735 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colRanges"%> <%@string rowname="rowRanges"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + range() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colRanges = colRanges(X, na.rm = FALSE), "apply+range" = apply(X, MARGIN = 2L, FUN = range, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowRanges = rowRanges(X, na.rm = FALSE), "apply+range" = apply(X, MARGIN = 1L, FUN = range, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/indexByRow.md.rsp0000644000176200001440000000557013615621101021424 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="indexByRow"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-09"%> <%@include file="${header}"%> <% lfun <- local({ locals <- list() function(txt = NULL) { if (is.null(txt)) return(locals) local <- list(txt) locals <<- c(locals, local) } }) %> # <%@meta name="title"%> This report benchmark the performance of `<%=fcnname%>()` against alternative methods: * `indexByRow_R1()` based in `matrix(..., byrow = TRUE)` * `indexByRow_R2()` is a modified version of `indexByRow_R1()` where `indexByRow_R1()` and `indexByRow_R2()` are defined as in the Appendix. <% lfun(withCapture({ indexByRow_R1 <- function(dim, idxs = NULL, ...) { n <- prod(dim) x <- matrix(seq_len(n), nrow = dim[2L], ncol = dim[1L], byrow = TRUE) if (!is.null(idxs)) x <- x[idxs] as.vector(x) } # indexByRow_R1() })) lfun(withCapture({ indexByRow_R2 <- function(dim, idxs = NULL, ...) { n <- prod(dim) if (is.null(idxs)) { x <- matrix(seq_len(n), nrow = dim[2L], ncol = dim[1L], byrow = TRUE) as.vector(x) } else { idxs <- idxs - 1 cols <- idxs %/% dim[2L] rows <- idxs %% dim[2L] cols + dim[1L]*rows + 1L } } # indexByRow_R2() })) %> ## Data <% lfun(withCapture({ <%@include file="R/random-matrices.R"%> })) %> ```r <%=withCapture({ data <- rmatrices(mode = "index") })%> ``` where `rmatrices()` is defined in the Appendix. <% # data <- data[1:2] %> ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] dim <- dim(X) idxsList <- list( 'all-by-NULL' = NULL, all = seq_len(prod(dim)), odd = seq(from = 1, to = prod(dim), by = 2L) ) str(idxsList) })%> ``` <% for (ii in seq_along(idxsList)) { %> #### Index set '<%=names(idxsList)[ii]%>' <% idxs <- idxsList[[ii]] idxsTag <- names(idxsList)[ii] # Validate correctness res <- list( indexByRow = indexByRow(dim, idxs = idxs), indexByRow_R1 = indexByRow_R1(dim, idxs = idxs), indexByRow_R2 = indexByRow_R2(dim, idxs = idxs) ) lapply(res, FUN = function(x) stopifnot(all.equal(x, res[[1]]))) gc() %> ```r <%=withCapture({ stats <- microbenchmark( indexByRow = indexByRow(dim, idxs = idxs), indexByRow_R1 = indexByRow_R1(dim, idxs = idxs), indexByRow_R2 = indexByRow_R2(dim, idxs = idxs), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(dataLabel, idxsTag)) %> <% } # for (ii ...) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> ### Local functions ```r <%=lfun()[[1]]%> ``` ```r <%=lfun()[[2]]%> ``` ```r <%=lfun()[[3]]%> ``` <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/varDiff.md.rsp0000644000176200001440000000243613615621101020711 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="varDiff"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-10"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * N/A <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) data <- data[1:4] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] stats <- microbenchmark( "varDiff" = varDiff(x), "var" = var(x), "diff" = diff(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-10 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/t_tx_OP_y.md.rsp0000644000176200001440000000411213615621101021225 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="t_tx_OP_y"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * t_tx_OP_y_R() as below ```r <%=withCapture({ t_tx_OP_y_R <- function(x, y, OP, na.rm = FALSE) { x <- t(x) if (na.rm) { xnok <- is.na(x) ynok <- is.na(y) anok <- xnok & ynok unit <- switch(OP, "+" = 0, "-" = NA_real_, "*" = 1, "/" = NA_real_, stop("Unknown 'OP' operator: ", OP) ) x[xnok] <- unit y[ynok] <- unit } ans <- switch(OP, "+" = x + y, "-" = x - y, "*" = x * y, "/" = x / y, stop("Unknown 'OP' operator: ", OP) ) if (na.rm) { ans[anok] <- NA_real_ } t(ans) } # t_tx_OP_y_R() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] y <- x[, 1L] })%> ``` <% for (OP in c("+", "-", "*", "/")) { %> <% OPTag <- c("+" = "add", "-" = "sub", "*" = "mul", "/" = "div")[OP] gc() %> ```r <%=withCapture({ OP stats <- microbenchmark( "t_tx_OP_y" = t_tx_OP_y(x, y, OP = OP, na.rm = FALSE), "t_tx_OP_y_R" = t_tx_OP_y_R(x, y, OP = OP, na.rm = FALSE), unit = "ms" ) gc() })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %> <% } # for (OP ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/sum2.md.rsp0000644000176200001440000000350413615621101020213 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="sum2"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-02"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * sum() + [() as below ```r <%=withCapture({ sum2_R <- function(x, na.rm = FALSE, idxs) { sum(x[idxs], na.rm = na.rm) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = mode) ##data <- data[1:3] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "sum2" = sum2(x), "sum" = sum(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(dataLabel, "all")) %> <% for (subset in c(0.2, 0.4, 0.8)) { %> #### A <%=sprintf("%g", 100*subset)%>% subset ```r <%=withCapture({ x <- data[[.dataLabel.]] subset idxs <- sort(sample(length(x), size = subset*length(x), replace = FALSE)) gc() stats <- microbenchmark( "sum2" = sum2(x, idxs = idxs), "sum+[()" = sum2_R(x, idxs = idxs), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, subset)) %> <% } # for (subset in ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-02 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/weightedMedian.md.rsp0000644000176200001440000000434713615621101022251 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="weightedMedian"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-03"%> <%@include file="${header}"%> <%--- 'ergm' could be hard to install, because it imports 'Rglpk', which requires GLPK library on the system. ---%> <%@string test_ergm="FALSE"%> <% use("limma", how = "load") limma_weighted.median <- limma::weighted.median use("cwhmisc", how = "load") cwhmisc_w.median <- cwhmisc::w.median use("laeken", how = "load") laeken_weightedMedian <- laeken::weightedMedian <%@ifeq test_ergm="TRUE"%> use("ergm", how = "load") ergm_wtd.median <- ergm::wtd.median <%@endif%> weightedMedian <- matrixStats::weightedMedian %> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * apply() + limma::weighted.median() * apply() + cwhmisc::w.median() * apply() + laeken::weightedMedian() <%@ifeq test_ergm="TRUE"%> * apply() + ergm::wtd.median() --%> <%@endif%> ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode = "double") data <- data[1:3] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] w <- runif(length(x)) gc() stats <- microbenchmark( "weightedMedian" = weightedMedian(x, w = w, ties = "mean", na.rm = FALSE), "limma::weighted.median" = limma_weighted.median(x, w = w, na.rm = FALSE), "cwhmisc::w.median" = cwhmisc_w.median(x, w = w), "laeken::weightedMedian" = laeken_weightedMedian(x, w = w), <%@ifeq test_ergm="TRUE"%> "ergm::wtd.median" = ergm_wtd.median(x, w = w), <%@endif%> unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-12-17 o Made 'ergm::wtd.median' optional. 2014-06-03 o Created using benchmark snippet in incl/weightedMedian.Rex. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowSums2.md.rsp0000644000176200001440000000303713615621101021525 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colSums2"%> <%@string rowname="rowSums2"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2017-03-31"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + sum() * colSums() and rowSums() * .colSums() and .rowSums() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colSums2 = colSums2(X, na.rm = FALSE), .colSums = .colSums(X, m = nrow(X), n = ncol(X), na.rm = FALSE), colSums = colSums(X, na.rm = FALSE), "apply+sum" = apply(X, MARGIN = 2L, FUN = sum, na.rm = FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowSums2 = rowSums2(X, na.rm = FALSE), .rowSums = .rowSums(X, m = nrow(X), n = ncol(X), na.rm = FALSE), rowSums = rowSums(X, na.rm = FALSE), "apply+sum" = apply(X, MARGIN = 1L, FUN = sum, na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> matrixStats/inst/benchmarking/colRowMins_subset.md.rsp0000644000176200001440000000344013615621101023005 0ustar liggesusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMins"%> <%@string rowname="rowMins"%> <%@string fcnname="colRowMins_subset"%> <%@meta title="${colname}() and ${rowname}() benchmarks on subsetted computation"%> <%@meta author="Dongcan Jiang"%> <%@meta date="2015-06-06"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() on subsetted computation. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode = mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] rows <- sample.int(nrow(X), size = nrow(X)*0.7) cols <- sample.int(ncol(X), size = ncol(X)*0.7) X_S <- X[rows, cols] gc() colStats <- microbenchmark( "colMins_X_S" = colMins(X_S, na.rm = FALSE), "colMins(X, rows, cols)" = colMins(X, rows = rows, cols = cols, na.rm = FALSE), "colMins(X[rows, cols])" = colMins(X[rows, cols], na.rm = FALSE), unit = "ms" ) X <- t(X) X_S <- t(X_S) gc() rowStats <- microbenchmark( "rowMins_X_S" = rowMins(X_S, na.rm = FALSE), "rowMins(X, cols, rows)" = rowMins(X, rows = cols, cols = rows, na.rm = FALSE), "rowMins(X[cols, rows])" = rowMins(X[cols, rows], na.rm = FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-06-07 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/WORDLIST0000644000176200001440000000446714077615560014772 0ustar liggesusersASAN Aalto Abele Ahlmann Alls Anys AppVeyor Benchmarking Bengtsson Biobase Biostatistics CMD CNRS Centre Constantin Cormen Corrada Cummaxs Cummins Cumprods Cumsums Dongcan EMBL El Eltze Evry Ghaoui HenrikBengtsson Hervé Hmisc Hoare's ISNA ISNAN IqrDiffs Jaffee Jiang JxN KU Kenkel Koenker Korpela Kx KxJ KxM KxN LSE Langfelder Leiserson Leuven LogSumExps Lund MATRIXSTATS Maxs Mikko Mins MxJ NaN Nakayama Neumann Neuvial Nx NxJ NxK NxM OSX OrderStats Pagès Pre Quicksort Rdoc Rivest Roel Rscript Rtools SPARC Sds UBsan UC Verbelen VignetteAuthor VignetteEngine VignetteIndexEntry VignetteKeyword VignetteTangle WeightedMeans WeightedMedians Xcode Xu al allValue allocArray allocMatrix allocVector ansp anyMissing anyNA anyValue arg benchmarked benchmarking binCounts binMeans colAlls colAnyMissings colAnyNAs colAnys colAvgsPerColSet colAvgsPerRowSet colCollapse colCounts colCummaxs colCummins colCumprods colCumsums colDiffs colIQRDiffs colIQRs colLogSumExps colMadDiffs colMads colMaxs colMeans colMedians colMins colNnn colOrderStats colProds colQuantiles colRanges colRanks colSdDiffs colSds colSums colTabulates colVarDiffs colVars colWeightedMads colWeightedMeans colWeightedMedians colWeightedSds colWeightedVars cran crfcns cummax cummin cumprod cumsum deprecatated dest df env envir et expSumLog exponentials fabs fcns gcc getNamespace getUrl getVersion github grepl gsub https idxs indexByRow iqr iqrDiff isNeg iter kable kk knitr loadMethod logSumExp logsumexp lx macOS madDiff matrixStats meanOver memcall memtests methodsS na nan ncol nrow ns oo pkgName pre preserveShape probs psort rPsort rowAlls rowAnyMissings rowAnyNAs rowAnys rowAvgsPerColSet rowAvgsPerRowSet rowCollapse rowCounts rowCummaxs rowCummins rowCumprods rowCumsums rowDiffs rowIQRDiffs rowIQRs rowLogSumExp rowLogSumExps rowMadDiffs rowMads rowMaxs rowMeans rowMedians rowMins rowNnn rowOrderStats rowProds rowQuantiles rowRanges rowRanks rowSdDiffs rowSds rowSums rowTabulates rowVarDiffs rowVars rowWeightedMads rowWeightedMeans rowWeightedMedians rowWeightedSds rowWeightedVars roxygen rsp sapply sd sdDiff setdiff signTabulate sprintf src stopifnot strsplit sumOver tapply tbl th tieAvg tx typedef typeof underflowing unlist useDynamicSymbols useNames valgrind varDiff vfcns von weighedVar weightedMad weightedMean weightedMedian weightedSd weightedVar wtd xK xN xlen md