future.apply/0000755000176200001440000000000014104474122012707 5ustar liggesusersfuture.apply/NAMESPACE0000644000176200001440000000161114104465634014135 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(future_by,data.frame) S3method(future_by,default) export(future_.mapply) export(future_Map) export(future_apply) export(future_by) export(future_eapply) export(future_lapply) export(future_mapply) export(future_replicate) export(future_sapply) export(future_tapply) export(future_vapply) importFrom(future,Future) importFrom(future,FutureError) importFrom(future,as.FutureGlobals) importFrom(future,future) importFrom(future,getGlobalsAndPackages) importFrom(future,nbrOfWorkers) importFrom(future,resolve) importFrom(future,value) importFrom(globals,findGlobals) importFrom(globals,globalsByName) importFrom(parallel,nextRNGStream) importFrom(parallel,nextRNGSubStream) importFrom(parallel,splitIndices) importFrom(utils,capture.output) importFrom(utils,globalVariables) importFrom(utils,head) importFrom(utils,packageVersion) importFrom(utils,str) future.apply/man/0000755000176200001440000000000014104217116013460 5ustar liggesusersfuture.apply/man/future_mapply.Rd0000644000176200001440000001570314104262315016652 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/future_Map.R, R/future_mapply.R \name{future_Map} \alias{future_Map} \alias{future_mapply} \alias{future_.mapply} \title{Apply a Function to Multiple List or Vector Arguments} \usage{ future_Map( f, ..., future.envir = parent.frame(), future.label = "future_Map-\%d" ) future_mapply( FUN, ..., MoreArgs = NULL, SIMPLIFY = TRUE, USE.NAMES = TRUE, future.envir = parent.frame(), future.stdout = TRUE, future.conditions = "condition", future.globals = TRUE, future.packages = NULL, future.lazy = FALSE, future.seed = FALSE, future.scheduling = 1, future.chunk.size = NULL, future.label = "future_mapply-\%d" ) future_.mapply(FUN, dots, MoreArgs, ..., future.label = "future_.mapply-\%d") } \arguments{ \item{f}{A function of the arity \eqn{k} if \code{future_Map()} is called with \eqn{k} arguments.} \item{future.envir}{An \link{environment} passed as argument \code{envir} to \code{\link[future:future]{future::future()}} as-is.} \item{future.label}{If a character string, then each future is assigned a label \code{sprintf(future.label, chunk_idx)}. If TRUE, then the same as \code{future.label = "future_lapply-\%d"}. If FALSE, no labels are assigned.} \item{FUN}{A function to apply, found via \code{\link[base:match.fun]{base::match.fun()}}.} \item{MoreArgs}{A list of other arguments to \code{FUN}.} \item{SIMPLIFY}{A logical or character string; attempt to reduce the result to a vector, matrix or higher dimensional array; see the simplify argument of \code{\link[base:lapply]{base::sapply()}}.} \item{USE.NAMES}{A logical; use names if the first \verb{\\ldots} argument has names, or if it is a character vector, use that character vector as the names.} \item{future.stdout}{If \code{TRUE} (default), then the standard output of the underlying futures is captured, and re-outputted as soon as possible. If \code{FALSE}, any output is silenced (by sinking it to the null device as it is outputted). If \code{NA} (not recommended), output is \emph{not} intercepted.} \item{future.conditions}{A character string of conditions classes to be captured and relayed. The default is the same as the \code{condition} argument of \code{\link[future:Future-class]{future::Future()}}. To not intercept conditions, use \code{conditions = character(0L)}. Errors are always relayed.} \item{future.globals}{A logical, a character vector, or a named list for controlling how globals are handled. For details, see \code{\link[=future_lapply]{future_lapply()}}.} \item{future.packages}{(optional) a character vector specifying packages to be attached in the R environment evaluating the future.} \item{future.lazy}{Specifies whether the futures should be resolved lazily or eagerly (default).} \item{future.seed}{A logical or an integer (of length one or seven), or a list of \code{max(lengths(list(...)))} with pre-generated random seeds. For details, see \code{\link[=future_lapply]{future_lapply()}}.} \item{future.scheduling}{Average number of futures ("chunks") per worker. If \code{0.0}, then a single future is used to process all elements of \code{X}. If \code{1.0} or \code{TRUE}, then one future per worker is used. If \code{2.0}, then each worker will process two futures (if there are enough elements in \code{X}). If \code{Inf} or \code{FALSE}, then one future per element of \code{X} is used. Only used if \code{future.chunk.size} is \code{NULL}.} \item{future.chunk.size}{The average number of elements per future ("chunk"). If \code{Inf}, then all elements are processed in a single future. If \code{NULL}, then argument \code{future.scheduling} is used.} \item{dots}{A list of arguments to vectorize over (vectors or lists of strictly positive length, or all of zero length).} \item{\ldots}{Arguments to vectorize over (vectors or lists of strictly positive length, or all of zero length).} } \value{ \code{future_Map()} is a simple wrapper to \code{future_mapply()} which does not attempt to simplify the result. See \code{\link[base:funprog]{base::Map()}} for details. \code{future_mapply()} returns a list, or for \code{SIMPLIFY = TRUE}, a vector, array or list. See \code{\link[base:mapply]{base::mapply()}} for details. \code{future_.mapply()} returns a list. See \code{\link[base:base-internal]{base::.mapply()}} for details. } \description{ \code{future_mapply()} implements \code{\link[base:mapply]{base::mapply()}} using futures with perfect replication of results, regardless of future backend used. Analogously to \code{mapply()}, \code{future_mapply()} is a multivariate version of \code{future_sapply()}. It applies \code{FUN} to the first elements of each \verb{\\ldots} argument, the second elements, the third elements, and so on. Arguments are recycled if necessary. } \details{ Note that \code{\link[base:base-internal]{base::.mapply()}}, which \code{future_.mapply()} is modeled after is listed as an "internal" function in \R despite being exported. } \examples{ ## --------------------------------------------------------- ## mapply() ## --------------------------------------------------------- y0 <- mapply(rep, 1:4, 4:1) y1 <- future_mapply(rep, 1:4, 4:1) stopifnot(identical(y1, y0)) y0 <- mapply(rep, times = 1:4, x = 4:1) y1 <- future_mapply(rep, times = 1:4, x = 4:1) stopifnot(identical(y1, y0)) y0 <- mapply(rep, times = 1:4, MoreArgs = list(x = 42)) y1 <- future_mapply(rep, times = 1:4, MoreArgs = list(x = 42)) stopifnot(identical(y1, y0)) y0 <- mapply(function(x, y) seq_len(x) + y, c(a = 1, b = 2, c = 3), # names from first c(A = 10, B = 0, C = -10)) y1 <- future_mapply(function(x, y) seq_len(x) + y, c(a = 1, b = 2, c = 3), # names from first c(A = 10, B = 0, C = -10)) stopifnot(identical(y1, y0)) word <- function(C, k) paste(rep.int(C, k), collapse = "") y0 <- mapply(word, LETTERS[1:6], 6:1, SIMPLIFY = FALSE) y1 <- future_mapply(word, LETTERS[1:6], 6:1, SIMPLIFY = FALSE) stopifnot(identical(y1, y0)) ## --------------------------------------------------------- ## Parallel Random Number Generation ## --------------------------------------------------------- \donttest{ ## Regardless of the future plan, the number of workers, and ## where they are, the random numbers produced are identical plan(multisession) y1 <- future_mapply(stats::runif, n = 1:4, max = 2:5, MoreArgs = list(min = 1), future.seed = 0xBEEF) print(y1) plan(sequential) y2 <- future_mapply(stats::runif, n = 1:4, max = 2:5, MoreArgs = list(min = 1), future.seed = 0xBEEF) print(y2) stopifnot(all.equal(y1, y2)) } \dontshow{ ## R CMD check: make sure any open connections are closed afterward if (!inherits(plan(), "sequential")) plan(sequential) } } \author{ The implementations of \code{future_Map()} is adopted from the source code of the corresponding base \R function \code{Map()}, which is licensed under GPL (>= 2) with 'The R Core Team' as the copyright holder. } \keyword{iteration} \keyword{manip} \keyword{programming} future.apply/man/fold.Rd0000644000176200001440000000256414024036060014700 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fold.R \name{fold} \alias{fold} \title{Efficient Fold, Reduce, Accumulate, Combine of a Vector} \usage{ fold(x, f, left = TRUE, unname = TRUE, threshold = 1000L) } \arguments{ \item{x}{A vector.} \item{f}{A binary function, i.e. a function take takes two arguments.} \item{left}{If \code{TRUE}, vector is combined from the left (the first element), otherwise the right (the last element).} \item{unname}{If \code{TRUE}, function \code{f} is called as \code{f(unname(y), x[[ii]])}, otherwise as \code{f(y, x[[ii]])}, which may introduce name \code{"y"}.} \item{threshold}{An integer (>= 2) specifying the length where the recursive divide-and-conquer call will stop and incremental building of the partial value is performed. Using \code{threshold = +Inf} will disable recursive folding.} } \value{ A vector. } \description{ Efficient Fold, Reduce, Accumulate, Combine of a Vector } \details{ In order for recursive folding to give the same results as non-recursive folding, binary function \code{f} must be \emph{associative} with itself, i.e. \code{f(f(x[[1]], x[[2]]), x[[3]])} equals \code{f(x[[1]], f(x[[2]]), x[[3]])}. This function is a more efficient (memory and speed) of \code{\link[base:funprog]{Reduce(f, x, right = !left, accumulate = FALSE)}}, especially when \code{x} is long. } \keyword{internal} future.apply/man/makeChunks.Rd0000644000176200001440000000353014104216357016047 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/makeChunks.R \name{makeChunks} \alias{makeChunks} \title{Create Chunks of Index Vectors} \usage{ makeChunks( nbrOfElements, nbrOfWorkers, future.scheduling = 1, future.chunk.size = NULL ) } \arguments{ \item{nbrOfElements}{(integer) Total number of elements to iterate over.} \item{nbrOfWorkers}{(integer) Number of workers available.} \item{future.scheduling}{(numeric) A strictly positive scalar. Only used if argument \code{future.chunk.size} is \code{NULL}.} \item{future.chunk.size}{(numeric) The maximum number of elements per chunk, or \code{NULL}. If \code{NULL}, then the chunk sizes are given by the \code{future.scheduling} argument.} } \value{ A list of chunks, where each chunk is an integer vector of unique indices \code{[1, nbrOfElements]}. The union of all chunks holds \code{nbrOfElements} elements and equals \code{1:nbrOfElements}. If \code{nbrOfElements == 0}, then an empty list is returned. } \description{ \emph{This is an internal function.} } \section{Control processing order of elements}{ Attribute \code{ordering} of \code{future.chunk.size} or \code{future.scheduling} can be used to control the ordering the elements are iterated over, which only affects the processing order \emph{not} the order values are returned. This attribute can take the following values: \itemize{ \item index vector - an numeric vector of length \code{nbrOfElements} specifying how elements are remapped \item function - an function taking one argument which is called as \code{ordering(nbrOfElements)} and which must return an index vector of length \code{nbrOfElements}, e.g. \code{function(n) rev(seq_len(n))} for reverse ordering. \item \code{"random"} - this will randomize the ordering via random index vector \code{sample.int(nbrOfElements)}. } } \keyword{internal} future.apply/man/future.apply.Rd0000644000176200001440000001132714024036060016407 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/future.apply-package.R \docType{package} \name{future.apply} \alias{future.apply} \alias{future.apply-package} \title{future.apply: Apply Function to Elements in Parallel using Futures} \description{ The \pkg{future.apply} packages provides parallel implementations of common "apply" functions provided by base \R. The parallel processing is performed via the \pkg{future} ecosystem, which provides a large number of parallel backends, e.g. on the local machine, a remote cluster, and a high-performance compute cluster. } \details{ Currently implemented functions are: \itemize{ \item \code{\link[=future_apply]{future_apply()}}: a parallel version of \link[base:apply]{apply()} \item \code{\link[=future_by]{future_by()}}: a parallel version of \link[base:by]{by()} \item \code{\link[=future_eapply]{future_eapply()}}: a parallel version of \link[base:lapply]{eapply()} \item \code{\link[=future_lapply]{future_lapply()}}: a parallel version of \link[base:lapply]{lapply()} \item \code{\link[=future_mapply]{future_mapply()}}: a parallel version of \link[base:mapply]{mapply()} \item \code{\link[=future_sapply]{future_sapply()}}: a parallel version of \link[base:lapply]{sapply()} \item \code{\link[=future_tapply]{future_tapply()}}: a parallel version of \link[base:tapply]{tapply()} \item \code{\link[=future_vapply]{future_vapply()}}: a parallel version of \link[base:lapply]{vapply()} \item \code{\link[=future_Map]{future_Map()}}: a parallel version of \link[base:funprog]{Map()} \item \code{\link[=future_replicate]{future_replicate()}}: a parallel version of \link[base:lapply]{replicate()} \item \code{\link[=future_.mapply]{future_.mapply()}}: a parallel version of \link[base:base-internal]{.mapply()} } Reproducibility is part of the core design, which means that perfect, parallel random number generation (RNG) is supported regardless of the amount of chunking, type of load balancing, and future backend being used. Since these \verb{future_*()} functions have the same arguments as the corresponding base \R function, start using them is often as simple as renaming the function in the code. For example, after attaching the package:\if{html}{\out{
}}\preformatted{library(future.apply) }\if{html}{\out{
}} code such as:\if{html}{\out{
}}\preformatted{x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE)) y <- lapply(x, quantile, probs = 1:3/4) }\if{html}{\out{
}} can be updated to:\if{html}{\out{
}}\preformatted{y <- future_lapply(x, quantile, probs = 1:3/4) }\if{html}{\out{
}} The default settings in the \pkg{future} framework is to process code \emph{sequentially}. To run the above in parallel on the local machine (on any operating system), use:\if{html}{\out{
}}\preformatted{plan(multisession) }\if{html}{\out{
}} first. That's it! To go back to sequential processing, use \code{plan(sequential)}. If you have access to multiple machines on your local network, use:\if{html}{\out{
}}\preformatted{plan(cluster, workers = c("n1", "n2", "n2", "n3")) }\if{html}{\out{
}} This will set up four workers, one on \code{n1} and \code{n3}, and two on \code{n2}. If you have SSH access to some remote machines, use:\if{html}{\out{
}}\preformatted{plan(cluster, workers = c("m1.myserver.org", "m2.myserver.org)) }\if{html}{\out{
}} See the \pkg{future} package and \code{\link[future:plan]{future::plan()}} for more examples. The \pkg{future.batchtools} package provides support for high-performance compute (HPC) cluster schedulers such as SGE, Slurm, and TORQUE / PBS. For example, \itemize{ \item \code{plan(batchtools_slurm)}: Process via a Slurm scheduler job queue. \item \code{plan(batchtools_torque)}: Process via a TORQUE / PBS scheduler job queue. } This builds on top of the queuing framework that the \pkg{batchtools} package provides. For more details on backend configuration, please see the \pkg{future.batchtools} and \pkg{batchtools} packages. These are just a few examples of parallel/distributed backend for the future ecosystem. For more alternatives, see the 'Reverse dependencies' section on the \href{https://cran.r-project.org/package=future}{future CRAN package page}. } \author{ Henrik Bengtsson, except for the implementations of \code{future_apply()}, \code{future_Map()}, \code{future_replicate()}, \code{future_sapply()}, and \code{future_tapply()}, which are adopted from the source code of the corresponding base \R functions, which are licensed under GPL (>= 2) with 'The R Core Team' as the copyright holder. Because of these dependencies, the license of this package is GPL (>= 2). } \keyword{iteration} \keyword{manip} \keyword{programming} future.apply/man/future_lapply.Rd0000644000176200001440000003067314104262315016654 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/future_eapply.R, R/future_lapply.R, % R/future_replicate.R, R/future_sapply.R, R/future_tapply.R, % R/future_vapply.R \name{future_eapply} \alias{future_eapply} \alias{future_lapply} \alias{future_replicate} \alias{future_sapply} \alias{future_tapply} \alias{future_vapply} \title{Apply a Function over a List or Vector via Futures} \usage{ future_eapply( env, FUN, ..., all.names = FALSE, USE.NAMES = TRUE, future.envir = parent.frame(), future.label = "future_eapply-\%d" ) future_lapply( X, FUN, ..., future.envir = parent.frame(), future.stdout = TRUE, future.conditions = "condition", future.globals = TRUE, future.packages = NULL, future.lazy = FALSE, future.seed = FALSE, future.scheduling = 1, future.chunk.size = NULL, future.label = "future_lapply-\%d" ) future_replicate( n, expr, simplify = "array", future.seed = TRUE, ..., future.envir = parent.frame(), future.label = "future_replicate-\%d" ) future_sapply( X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE, future.envir = parent.frame(), future.label = "future_sapply-\%d" ) future_tapply( X, INDEX, FUN = NULL, ..., default = NA, simplify = TRUE, future.envir = parent.frame(), future.label = "future_tapply-\%d" ) future_vapply( X, FUN, FUN.VALUE, ..., USE.NAMES = TRUE, future.envir = parent.frame(), future.label = "future_vapply-\%d" ) } \arguments{ \item{env}{An \R environment.} \item{FUN}{A function taking at least one argument.} \item{all.names}{If \code{TRUE}, the function will also be applied to variables that start with a period (\code{.}), otherwise not. See \code{\link[base:eapply]{base::eapply()}} for details.} \item{USE.NAMES}{See \code{\link[base:lapply]{base::sapply()}}.} \item{future.envir}{An \link{environment} passed as argument \code{envir} to \code{\link[future:future]{future::future()}} as-is.} \item{future.label}{If a character string, then each future is assigned a label \code{sprintf(future.label, chunk_idx)}. If TRUE, then the same as \code{future.label = "future_lapply-\%d"}. If FALSE, no labels are assigned.} \item{X}{A vector-like object to iterate over.} \item{future.stdout}{If \code{TRUE} (default), then the standard output of the underlying futures is captured, and re-outputted as soon as possible. If \code{FALSE}, any output is silenced (by sinking it to the null device as it is outputted). If \code{NA} (not recommended), output is \emph{not} intercepted.} \item{future.conditions}{A character string of conditions classes to be captured and relayed. The default is the same as the \code{condition} argument of \code{\link[future:Future-class]{future::Future()}}. To not intercept conditions, use \code{conditions = character(0L)}. Errors are always relayed.} \item{future.globals}{A logical, a character vector, or a named list for controlling how globals are handled. For details, see below section.} \item{future.packages}{(optional) a character vector specifying packages to be attached in the R environment evaluating the future.} \item{future.lazy}{Specifies whether the futures should be resolved lazily or eagerly (default).} \item{future.seed}{A logical or an integer (of length one or seven), or a list of \code{length(X)} with pre-generated random seeds. For details, see below section.} \item{future.scheduling}{Average number of futures ("chunks") per worker. If \code{0.0}, then a single future is used to process all elements of \code{X}. If \code{1.0} or \code{TRUE}, then one future per worker is used. If \code{2.0}, then each worker will process two futures (if there are enough elements in \code{X}). If \code{Inf} or \code{FALSE}, then one future per element of \code{X} is used. Only used if \code{future.chunk.size} is \code{NULL}.} \item{future.chunk.size}{The average number of elements per future ("chunk"). If \code{Inf}, then all elements are processed in a single future. If \code{NULL}, then argument \code{future.scheduling} is used.} \item{n}{The number of replicates.} \item{expr}{An \R expression to evaluate repeatedly.} \item{simplify}{See \code{\link[base:lapply]{base::sapply()}} and \code{\link[base:tapply]{base::tapply()}}, respectively.} \item{INDEX}{A list of one or more factors, each of same length as \code{X}. The elements are coerced to factors by \code{as.factor()}. See also \code{\link[base:tapply]{base::tapply()}}.} \item{default}{See \code{\link[base:tapply]{base::tapply()}}.} \item{FUN.VALUE}{A template for the required return value from each \code{FUN(X[ii], ...)}. Types may be promoted to a higher type within the ordering logical < integer < double < complex, but not demoted. See \code{\link[base:lapply]{base::vapply()}} for details.} \item{\ldots}{(optional) Additional arguments passed to \code{FUN()}. For \code{future_*apply()} functions and \code{replicate()}, any \verb{future.*} arguments part of \verb{\\ldots} are passed on to \code{future_lapply()} used internally.} } \value{ A named (unless \code{USE.NAMES = FALSE}) list. See \code{\link[base:eapply]{base::eapply()}} for details. For \code{future_lapply()}, a list with same length and names as \code{X}. See \code{\link[base:lapply]{base::lapply()}} for details. \code{future_replicate()} is a wrapper around \code{future_sapply()} and return simplified object according to the \code{simplify} argument. See \code{\link[base:lapply]{base::replicate()}} for details. Since \code{future_replicate()} usually involves random number generation (RNG), it uses \code{future.seed = TRUE} by default in order produce sound random numbers regardless of future backend and number of background workers used. For \code{future_sapply()}, a vector with same length and names as \code{X}. See \code{\link[base:lapply]{base::sapply()}} for details. \code{future_tapply()} returns an array with mode \code{"list"}, unless \code{simplify = TRUE} (default) \emph{and} \code{FUN} returns a scalar, in which case the mode of the array is the same as the returned scalars. See \code{\link[base:tapply]{base::tapply()}} for details. For \code{future_vapply()}, a vector with same length and names as \code{X}. See \code{\link[base:lapply]{base::vapply()}} for details. } \description{ \code{future_lapply()} implements \code{\link[base:lapply]{base::lapply()}} using futures with perfect replication of results, regardless of future backend used. Analogously, this is true for all the other \code{future_nnn()} functions. } \section{Global variables}{ Argument \code{future.globals} may be used to control how globals should be handled similarly how the \code{globals} argument is used with \code{future()}. Since all function calls use the same set of globals, this function can do any gathering of globals upfront (once), which is more efficient than if it would be done for each future independently. If \code{TRUE}, \code{NULL} or not is specified (default), then globals are automatically identified and gathered. If a character vector of names is specified, then those globals are gathered. If a named list, then those globals are used as is. In all cases, \code{FUN} and any \verb{\\ldots} arguments are automatically passed as globals to each future created as they are always needed. } \section{Reproducible random number generation (RNG)}{ Unless \code{future.seed = FALSE}, this function guarantees to generate the exact same sequence of random numbers \emph{given the same initial seed / RNG state} - this regardless of type of futures, scheduling ("chunking") strategy, and number of workers. RNG reproducibility is achieved by pregenerating the random seeds for all iterations (over \code{X}) by using L'Ecuyer-CMRG RNG streams. In each iteration, these seeds are set before calling \code{FUN(X[[ii]], ...)}. \emph{Note, for large \code{length(X)} this may introduce a large overhead.} As input (\code{future.seed}), a fixed seed (integer) may be given, either as a full L'Ecuyer-CMRG RNG seed (vector of 1+6 integers) or as a seed generating such a full L'Ecuyer-CMRG seed. If \code{future.seed = TRUE}, then \code{\link[base:Random]{.Random.seed}} is returned if it holds a L'Ecuyer-CMRG RNG seed, otherwise one is created randomly. If \code{future.seed = NA}, a L'Ecuyer-CMRG RNG seed is randomly created. If none of the function calls \code{FUN(X[[ii]], ...)} uses random number generation, then \code{future.seed = FALSE} may be used. In addition to the above, it is possible to specify a pre-generated sequence of RNG seeds as a list such that \code{length(future.seed) == length(X)} and where each element is an integer seed vector that can be assigned to \code{\link[base:Random]{.Random.seed}}. One approach to generate a set of valid RNG seeds based on fixed initial seed (here \code{42L}) is:\if{html}{\out{
}}\preformatted{seeds <- future_lapply(seq_along(X), FUN = function(x) .Random.seed, future.chunk.size = Inf, future.seed = 42L) }\if{html}{\out{
}} \strong{Note that \code{as.list(seq_along(X))} is \emph{not} a valid set of such \code{.Random.seed} values.} In all cases but \code{future.seed = FALSE}, the RNG state of the calling R processes after this function returns is guaranteed to be "forwarded one step" from the RNG state that was before the call and in the same way regardless of \code{future.seed}, \code{future.scheduling} and future strategy used. This is done in order to guarantee that an \R script calling \code{future_lapply()} multiple times should be numerically reproducible given the same initial seed. } \section{Control processing order of elements}{ Attribute \code{ordering} of \code{future.chunk.size} or \code{future.scheduling} can be used to control the ordering the elements are iterated over, which only affects the processing order and \emph{not} the order values are returned. This attribute can take the following values: \itemize{ \item index vector - an numeric vector of length \code{length(X)} \item function - an function taking one argument which is called as \code{ordering(length(X))} and which must return an index vector of length \code{length(X)}, e.g. \code{function(n) rev(seq_len(n))} for reverse ordering. \item \code{"random"} - this will randomize the ordering via random index vector \code{sample.int(length(X))}. For example, \code{future.scheduling = structure(TRUE, ordering = "random")}. \emph{Note}, when elements are processed out of order, then captured standard output and conditions are also relayed in that order, that is out of order. } } \examples{ ## --------------------------------------------------------- ## lapply(), sapply(), tapply() ## --------------------------------------------------------- x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE, FALSE, FALSE, TRUE)) y0 <- lapply(x, FUN = quantile, probs = 1:3/4) y1 <- future_lapply(x, FUN = quantile, probs = 1:3/4) print(y1) stopifnot(all.equal(y1, y0)) y0 <- sapply(x, FUN = quantile) y1 <- future_sapply(x, FUN = quantile) print(y1) stopifnot(all.equal(y1, y0)) y0 <- vapply(x, FUN = quantile, FUN.VALUE = double(5L)) y1 <- future_vapply(x, FUN = quantile, FUN.VALUE = double(5L)) print(y1) stopifnot(all.equal(y1, y0)) ## --------------------------------------------------------- ## Parallel Random Number Generation ## --------------------------------------------------------- \donttest{ ## Regardless of the future plan, the number of workers, and ## where they are, the random numbers produced are identical plan(multisession) y1 <- future_lapply(1:5, FUN = rnorm, future.seed = 0xBEEF) str(y1) plan(sequential) y2 <- future_lapply(1:5, FUN = rnorm, future.seed = 0xBEEF) str(y2) stopifnot(all.equal(y1, y2)) } ## --------------------------------------------------------- ## Process chunks of data.frame rows in parallel ## --------------------------------------------------------- iris <- datasets::iris chunks <- split(iris, seq(1, nrow(iris), length.out = 3L)) y0 <- lapply(chunks, FUN = function(iris) sum(iris$Sepal.Length)) y0 <- do.call(sum, y0) y1 <- future_lapply(chunks, FUN = function(iris) sum(iris$Sepal.Length)) y1 <- do.call(sum, y1) print(y1) stopifnot(all.equal(y1, y0)) \dontshow{ ## R CMD check: make sure any open connections are closed afterward if (!inherits(plan(), "sequential")) plan(sequential) } } \author{ The implementations of \code{future_replicate()}, \code{future_sapply()}, and \code{future_tapply()} are adopted from the source code of the corresponding base \R functions, which are licensed under GPL (>= 2) with 'The R Core Team' as the copyright holder. } \keyword{iteration} \keyword{manip} \keyword{programming} future.apply/man/future_apply.Rd0000644000176200001440000001225614104262315016475 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/future_apply.R \name{future_apply} \alias{future_apply} \title{Apply Functions Over Array Margins via Futures} \usage{ future_apply( X, MARGIN, FUN, ..., simplify = TRUE, future.envir = parent.frame(), future.stdout = TRUE, future.conditions = "condition", future.globals = TRUE, future.packages = NULL, future.lazy = FALSE, future.seed = FALSE, future.scheduling = 1, future.chunk.size = NULL, future.label = "future_apply-\%d" ) } \arguments{ \item{X}{an array, including a matrix.} \item{MARGIN}{A vector giving the subscripts which the function will be applied over. For example, for a matrix \code{1} indicates rows, \code{2} indicates columns, \code{c(1, 2)} indicates rows and columns. Where \code{X} has named dimnames, it can be a character vector selecting dimension names.} \item{FUN}{A function taking at least one argument.} \item{simplify}{a logical indicating whether results should be simplified if possible.} \item{future.envir}{An \link{environment} passed as argument \code{envir} to \code{\link[future:future]{future::future()}} as-is.} \item{future.stdout}{If \code{TRUE} (default), then the standard output of the underlying futures is captured, and re-outputted as soon as possible. If \code{FALSE}, any output is silenced (by sinking it to the null device as it is outputted). If \code{NA} (not recommended), output is \emph{not} intercepted.} \item{future.conditions}{A character string of conditions classes to be captured and relayed. The default is the same as the \code{condition} argument of \code{\link[future:Future-class]{future::Future()}}. To not intercept conditions, use \code{conditions = character(0L)}. Errors are always relayed.} \item{future.globals}{A logical, a character vector, or a named list for controlling how globals are handled. For details, see below section.} \item{future.packages}{(optional) a character vector specifying packages to be attached in the R environment evaluating the future.} \item{future.lazy}{Specifies whether the futures should be resolved lazily or eagerly (default).} \item{future.seed}{A logical or an integer (of length one or seven), or a list of \code{length(X)} with pre-generated random seeds. For details, see below section.} \item{future.scheduling}{Average number of futures ("chunks") per worker. If \code{0.0}, then a single future is used to process all elements of \code{X}. If \code{1.0} or \code{TRUE}, then one future per worker is used. If \code{2.0}, then each worker will process two futures (if there are enough elements in \code{X}). If \code{Inf} or \code{FALSE}, then one future per element of \code{X} is used. Only used if \code{future.chunk.size} is \code{NULL}.} \item{future.chunk.size}{The average number of elements per future ("chunk"). If \code{Inf}, then all elements are processed in a single future. If \code{NULL}, then argument \code{future.scheduling} is used.} \item{future.label}{If a character string, then each future is assigned a label \code{sprintf(future.label, chunk_idx)}. If TRUE, then the same as \code{future.label = "future_lapply-\%d"}. If FALSE, no labels are assigned.} \item{\ldots}{(optional) Additional arguments passed to \code{FUN()}, except \verb{future.*} arguments, which are passed on to \code{\link[=future_lapply]{future_lapply()}} used internally.} } \value{ Returns a vector or array or list of values obtained by applying a function to margins of an array or matrix. See \code{\link[base:apply]{base::apply()}} for details. } \description{ \code{future_apply()} implements \code{\link[base:apply]{base::apply()}} using future with perfect replication of results, regardless of future backend used. It returns a vector or array or list of values obtained by applying a function to margins of an array or matrix. } \examples{ ## --------------------------------------------------------- ## apply() ## --------------------------------------------------------- X <- matrix(c(1:4, 1, 6:8), nrow = 2L) Y0 <- apply(X, MARGIN = 1L, FUN = table) Y1 <- future_apply(X, MARGIN = 1L, FUN = table) print(Y1) stopifnot(all.equal(Y1, Y0, check.attributes = FALSE)) ## FIXME Y0 <- apply(X, MARGIN = 1L, FUN = stats::quantile) Y1 <- future_apply(X, MARGIN = 1L, FUN = stats::quantile) print(Y1) stopifnot(all.equal(Y1, Y0)) ## --------------------------------------------------------- ## Parallel Random Number Generation ## --------------------------------------------------------- \donttest{ ## Regardless of the future plan, the number of workers, and ## where they are, the random numbers produced are identical X <- matrix(c(1:4, 1, 6:8), nrow = 2L) plan(multisession) Y1 <- future_apply(X, MARGIN = 1L, FUN = sample, future.seed = 0xBEEF) print(Y1) plan(sequential) Y2 <- future_apply(X, MARGIN = 1L, FUN = sample, future.seed = 0xBEEF) print(Y2) stopifnot(all.equal(Y1, Y2)) } \dontshow{ ## R CMD check: make sure any open connections are closed afterward if (!inherits(plan(), "sequential")) plan(sequential) } } \author{ The implementations of \code{future_apply()} is adopted from the source code of the corresponding base \R function, which is licensed under GPL (>= 2) with 'The R Core Team' as the copyright holder. } future.apply/man/make_rng_seeds.Rd0000644000176200001440000000152614104217517016726 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rng.R \name{make_rng_seeds} \alias{make_rng_seeds} \title{Produce Reproducible Seeds for Parallel Random Number Generation} \usage{ make_rng_seeds(count, seed = FALSE, debug = NA) } \arguments{ \item{count}{The number of RNG seeds to produce.} \item{seed}{A logical specifying whether RNG seeds should be generated or not. (\code{seed = NULL} corresponds to \code{seed = FALSE}). If a list, then it should be of length \code{count} and each element should consist of a valid RNG seed.} \item{debug}{If \code{TRUE}, debug output is produced, otherwise not.} } \value{ Returns a non-named list of length \code{count}, or \code{NULL}. Any seed returned is a valid RNG seed. } \description{ Produce Reproducible Seeds for Parallel Random Number Generation } \keyword{internal} future.apply/man/future_by.Rd0000644000176200001440000000533314104262741015763 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/future_by.R \name{future_by} \alias{future_by} \title{Apply a Function to a Data Frame Split by Factors via Futures} \usage{ future_by( data, INDICES, FUN, ..., simplify = TRUE, future.envir = parent.frame() ) } \arguments{ \item{data}{An \R object, normally a data frame, possibly a matrix.} \item{INDICES}{A factor or a list of factors, each of length \code{nrow(data)}.} \item{FUN}{a function to be applied to (usually data-frame) subsets of \code{data}.} \item{simplify}{logical: see \link[base:tapply]{base::tapply}.} \item{future.envir}{An \link{environment} passed as argument \code{envir} to \code{\link[future:future]{future::future()}} as-is.} \item{\ldots}{Additional arguments pass to \code{\link[=future_lapply]{future_lapply()}} and then to \code{FUN()}.} } \value{ An object of class "by", giving the results for each subset. This is always a list if simplify is false, otherwise a list or array (see \link[base:tapply]{base::tapply}). See also \code{\link[base:by]{base::by()}} for details. } \description{ Apply a Function to a Data Frame Split by Factors via Futures } \details{ Internally, \code{data} is grouped by \code{INDICES} into a list of \code{data} subset elements which is then processed by \code{\link[=future_lapply]{future_lapply()}}. When the groups differ significantly in size, the processing time may differ significantly between the groups. To correct for processing-time imbalances, adjust the amount of chunking via arguments \code{future.scheduling} and \code{future.chunk.size}. } \section{Note on 'stringsAsFactors'}{ The \code{future_by()} is modeled as closely as possible to the behavior of \code{base::by()}. Both functions have "default" S3 methods that calls \code{data <- as.data.frame(data)} internally. This call may in turn call an S3 method for \code{as.data.frame()} that coerces strings to factors or not depending on whether it has a \code{stringsAsFactors} argument and what its default is. For example, the S3 method of \code{as.data.frame()} for lists changed its (effective) default from \code{stringsAsFactors = TRUE} to \code{stringsAsFactors = TRUE} in R 4.0.0. } \examples{ ## --------------------------------------------------------- ## by() ## --------------------------------------------------------- library(datasets) ## warpbreaks library(stats) ## lm() y0 <- by(warpbreaks, warpbreaks[,"tension"], function(x) lm(breaks ~ wool, data = x)) plan(multisession) y1 <- future_by(warpbreaks, warpbreaks[,"tension"], function(x) lm(breaks ~ wool, data = x)) plan(sequential) y2 <- future_by(warpbreaks, warpbreaks[,"tension"], function(x) lm(breaks ~ wool, data = x)) } future.apply/man/future.apply.options.Rd0000644000176200001440000000310714104217116020100 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/options.R \name{future.apply.options} \alias{future.apply.options} \alias{future.apply.debug} \alias{R_FUTURE_APPLY_DEBUG} \title{Options used for future.apply} \description{ Below are the \R options and environment variables that are used by the \pkg{future.apply} package and packages enhancing it.\cr \cr \emph{WARNING: Note that the names and the default values of these options may change in future versions of the package. Please use with care until further notice.} } \details{ For settings specific to the \pkg{future} package, see \link[future:future.options]{future::future.options} page. } \section{Options for debugging future.apply}{ \describe{ \item{\option{future.apply.debug}:}{(logical) If \code{TRUE}, extensive debug messages are generated. (Default: \code{FALSE})} } } \section{Environment variables that set R options}{ All of the above \R \option{future.apply.*} options can be set by corresponding environment variable \env{R_FUTURE_APPLY_*} \emph{when the \pkg{future.apply} package is loaded}. For example, if \code{R_FUTURE_APPLY_DEBUG = "TRUE"}, then option \option{future.apply.debug} is set to \code{TRUE} (logical). } \examples{ options(future.apply.debug = TRUE) } \seealso{ To set \R options or environment variables when \R starts (even before the \pkg{future} package is loaded), see the \link[base]{Startup} help page. The \href{https://cran.r-project.org/package=startup}{\pkg{startup}} package provides a friendly mechanism for configurating \R's startup process. } \keyword{internal} future.apply/DESCRIPTION0000644000176200001440000000257714104474122014430 0ustar liggesusersPackage: future.apply Version: 1.8.1 Title: Apply Function to Elements in Parallel using Futures Depends: R (>= 3.2.0), future (>= 1.21.0) Imports: globals (>= 0.14.0), parallel, utils Suggests: datasets, stats, tools, listenv (>= 0.8.0), R.rsp, markdown VignetteBuilder: R.rsp Authors@R: c(person("Henrik", "Bengtsson", role=c("aut", "cre", "cph"), email = "henrikb@braju.com"), person("R Core Team", role = c("cph", "ctb"))) Description: Implementations of apply(), by(), eapply(), lapply(), Map(), .mapply(), mapply(), replicate(), sapply(), tapply(), and vapply() that can be resolved using any future-supported backend, e.g. parallel on the local machine or distributed on a compute cluster. These future_*apply() functions come with the same pros and cons as the corresponding base-R *apply() functions but with the additional feature of being able to be processed via the future framework. License: GPL (>= 2) LazyLoad: TRUE URL: https://future.apply.futureverse.org, https://github.com/HenrikBengtsson/future.apply BugReports: https://github.com/HenrikBengtsson/future.apply/issues RoxygenNote: 7.1.1 NeedsCompilation: no Packaged: 2021-08-10 12:15:45 UTC; hb Author: Henrik Bengtsson [aut, cre, cph], R Core Team [cph, ctb] Maintainer: Henrik Bengtsson Repository: CRAN Date/Publication: 2021-08-10 13:00:02 UTC future.apply/build/0000755000176200001440000000000014104466760014017 5ustar liggesusersfuture.apply/build/vignette.rds0000644000176200001440000000047314104466760016362 0ustar liggesusersuQN0t4RWVdaOkؑ _NqQGXKr~wyBF$ L k87%6DdjZ*.AxL= 3.14)) foo <- function(x, abc) future_mapply(function(y) y, x) y <- foo(1:2, abc = (xyz >= 3.14)) stopifnot(identical(y, y0)) message("*** future_mapply() - false positives ... DONE") message("*** future_mapply() - too large ...") X <- replicate(10L, 1:100, simplify = FALSE) FUN <- function(x) { getOption("future.globals.maxSize") } y0 <- mapply(FUN = FUN, X) sizes <- unclass(c(FUN = object.size(FUN), X = object.size(X))) cat(sprintf("Baseline size of globals: %.2f KiB\n", sizes[["FUN"]] / 1024)) message("- true positive ...") oMaxSize <- getOption("future.globals.maxSize") options(future.globals.maxSize = 1L) res <- tryCatch({ y <- future_mapply(FUN = FUN, X) }, error = identity) stopifnot(inherits(res, "error")) res <- NULL options(future.globals.maxSize = oMaxSize) maxSize <- getOption("future.globals.maxSize") y <- future_mapply(FUN = FUN, X) str(y) stopifnot(all(sapply(y, FUN = identical, oMaxSize))) message("- approximately invariant to chunk size ...") maxSize <- sizes[["FUN"]] + sizes[["X"]] / length(X) options(future.globals.maxSize = maxSize) for (chunk.size in c(1L, 2L, 5L, structure(10L, ordering = "random"))) { y <- future_mapply(FUN = FUN, X, future.chunk.size = chunk.size) str(y) stopifnot(all(unlist(y) == maxSize)) cat(sprintf("maxSize = %g bytes\nfuture.globals.maxSize = %g bytes\n", maxSize, getOption("future.globals.maxSize"))) stopifnot(getOption("future.globals.maxSize") == maxSize) } y <- NULL options(future.globals.maxSize = oMaxSize) message("*** future_mapply() - too large ... DONE") message("*** future_mapply() - globals exceptions ...") res <- tryCatch({ y <- future_mapply(function(x) x, 1, future.globals = 42) }, error = identity) stopifnot(inherits(res, "error")) res <- tryCatch({ y <- future_mapply(function(x) x, 1, future.globals = list(1)) }, error = identity) stopifnot(inherits(res, "error")) res <- tryCatch({ y <- future_mapply(function(x) x, 1, future.globals = "...future.FUN") }, error = identity) stopifnot(inherits(res, "error")) ...future.elements_ii <- 42L X <- list(function() 2 * ...future.elements_ii) res <- tryCatch({ y <- future_mapply(FUN = function(f) f(), X) }, error = identity) stopifnot(inherits(res, "error")) message("*** future_mapply() - globals exceptions ... DONE") source("incl/end.R") future.apply/tests/future_tapply.R0000644000176200001440000001146014024036060017075 0ustar liggesuserssource("incl/start.R") library("datasets") ## warpbreaks options(future.debug = FALSE) message("*** future_tapply() ...") for (strategy in supportedStrategies()[1]) { message(sprintf("*** strategy = %s ...", sQuote(strategy))) plan(strategy) message("- From example(tapply) ...") message(" - Example #1") library("stats") ## rbinom() groups <- as.factor(stats::rbinom(32, n = 5, prob = 0.4)) t <- table(groups) print(t) y0 <- tapply(groups, INDEX = groups, FUN = length) print(y0) y1 <- future_tapply(groups, INDEX = groups, FUN = length) print(y1) stopifnot(all.equal(y1, y0)) message(" - Example #2") ## contingency table from data.frame : array with named dimnames y0 <- tapply(warpbreaks$breaks, INDEX = warpbreaks[,-1], FUN = sum) print(y0) y1 <- future_tapply(warpbreaks$breaks, INDEX = warpbreaks[,-1], FUN = sum) print(y1) stopifnot(all.equal(y1, y0)) message(" - Example #3") y0 <- tapply(warpbreaks$breaks, warpbreaks[, 3, drop = FALSE], sum) print(y0) y1 <- future_tapply(warpbreaks$breaks, warpbreaks[, 3, drop = FALSE], sum) print(y1) stopifnot(all.equal(y1, y0)) message(" - Example #4") n <- 17 fac <- factor(rep_len(1:3, n), levels = 1:5) t <- table(fac) y0 <- tapply(1:n, fac, sum) print(y0) y1 <- future_tapply(1:n, fac, sum) print(y1) stopifnot(all.equal(y1, y0)) message(" - Example #5") if ("default" %in% names(formals(tapply))) { y0 <- tapply(1:n, fac, sum, default = 0) # maybe more desirable print(y0) y1 <- future_tapply(1:n, fac, sum, default = 0) # maybe more desirable print(y1) stopifnot(all.equal(y1, y0)) } message(" - Example #6") y0 <- tapply(1:n, fac, sum, simplify = FALSE) print(y0) y1 <- future_tapply(1:n, fac, sum, simplify = FALSE) print(y1) stopifnot(all.equal(y1, y0)) message(" - Example #7") y0 <- tapply(1:n, fac, range) print(y0) y1 <- future_tapply(1:n, fac, range) print(y1) stopifnot(all.equal(y1, y0)) message(" - Example #8") y0 <- tapply(1:n, fac, quantile) print(y0) y1 <- future_tapply(1:n, fac, quantile) print(y1) stopifnot(all.equal(y1, y0)) message(" - Example #9") y0 <- tapply(1:n, fac, length) ## NA's print(y0) y1 <- future_tapply(1:n, fac, length) ## NA's print(y1) stopifnot(all.equal(y1, y0)) message(" - Example #10") if ("default" %in% names(formals(tapply))) { y0 <- tapply(1:n, fac, length, default = 0) # == table(fac) print(y0) y1 <- future_tapply(1:n, fac, length, default = 0) # == table(fac) print(y1) stopifnot(all.equal(y1, y0)) } message(" - Example #11") ## example of ... argument: find quarterly means y0 <- tapply(presidents, cycle(presidents), mean, na.rm = TRUE) print(y0) y1 <- future_tapply(presidents, cycle(presidents), mean, na.rm = TRUE) print(y1) stopifnot(all.equal(y1, y0)) message(" - Example #12") ind <- list(c(1, 2, 2), c("A", "A", "B")) t <- table(ind) print(t) y0 <- tapply(1:3, ind) #-> the split vector print(y0) y1 <- future_tapply(1:3, ind) #-> the split vector print(y1) stopifnot(all.equal(y1, y0)) message(" - Example #13") y0 <- tapply(1:3, ind, sum) print(y0) y1 <- future_tapply(1:3, ind, sum) print(y1) stopifnot(all.equal(y1, y0)) ## Some assertions (not held by all patch propsals): message(" - Example #14") nq <- names(quantile(1:5)) y_truth <- c(1L, 2L, 4L) stopifnot(identical(tapply(1:3, ind), y_truth)) stopifnot(identical(future_tapply(1:3, ind), y_truth)) message(" - Example #15") y_truth <- matrix(c(1L, 2L, NA, 3L), nrow = 2L, dimnames = list(c("1", "2"), c("A", "B"))) stopifnot(identical(tapply(1:3, ind, sum), y_truth)) stopifnot(identical(future_tapply(1:3, ind, sum), y_truth)) message(" - Example #16") y_truth <- array(list( `2` = structure(c(2, 5.75, 9.5, 13.25, 17), .Names = nq), `3` = structure(c(3, 6, 9, 12, 15), .Names = nq), `4` = NULL, `5` = NULL), dim = 4L, dimnames = list(as.character(2:5))) stopifnot(identical(tapply(1:n, fac, quantile)[-1], y_truth)) stopifnot(identical(future_tapply(1:n, fac, quantile)[-1], y_truth)) plan(sequential) message(sprintf("*** strategy = %s ... done", sQuote(strategy))) } ## for (strategy in ...) message("*** exceptions ...") ## Error: 'INDEX' is of length zero res <- tryCatch({ y <- future_tapply(1L, INDEX = list()) }, error = identity) stopifnot(inherits(res, "error")) ## Error: total number of levels >= 2^31 res <- tryCatch({ y <- future_tapply(1:216, INDEX = rep(list(1:216), times = 4L)) }, error = identity) stopifnot(inherits(res, "error")) ## Error: arguments must have same length res <- tryCatch({ y <- future_tapply(1L, INDEX = list(1:2)) }, error = identity) stopifnot(inherits(res, "error")) message("*** future_tapply() ... DONE") source("incl/end.R") future.apply/tests/incl/0000755000176200001440000000000014104222505014772 5ustar liggesusersfuture.apply/tests/incl/start,load-only.R0000644000176200001440000000373114104222505020151 0ustar liggesusers## Record original state ovars <- ls() oenvs <- oenvs0 <- Sys.getenv() oopts0 <- options() covr_testing <- ("covr" %in% loadedNamespaces()) on_solaris <- grepl("^solaris", R.version$os) ## Default options oopts <- options( warn = 1L, mc.cores = 2L, future.debug = TRUE, ## Reset the following during testing in case ## they are set on the test system future.availableCores.system = NULL, future.availableCores.fallback = NULL ) ## Reset the following during testing in case ## they are set on the test system oenvs2 <- Sys.unsetenv(c( "R_FUTURE_AVAILABLECORES_SYSTEM", "R_FUTURE_AVAILABLECORES_FALLBACK", ## SGE "NSLOTS", "PE_HOSTFILE", ## Slurm "SLURM_CPUS_PER_TASK", ## TORQUE / PBS "PBS_NUM_PPN", "PBS_NODEFILE", "PBS_NP", "PBS_NUM_NODES" )) oplan <- future::plan() ## Use eager futures by default future::plan("sequential") ## Private future.apply functions fold <- future.apply:::fold hpaste <- future.apply:::hpaste mdebug <- future.apply:::mdebug mdebugf <- future.apply:::mdebugf import_from <- future.apply:::import_from get_random_seed <- future.apply:::get_random_seed set_random_seed <- future.apply:::set_random_seed as_lecyer_cmrg_seed <- future.apply:::as_lecyer_cmrg_seed is_lecyer_cmrg_seed <- future.apply:::is_lecyer_cmrg_seed make_rng_seeds <- future.apply:::make_rng_seeds ## Local functions for test scripts printf <- function(...) cat(sprintf(...)) mstr <- function(...) message(paste(capture.output(str(...)), collapse = "\n")) attachLocally <- function(x, envir = parent.frame()) { for (name in names(x)) { assign(name, value = x[[name]], envir = envir) } } supportedStrategies <- function(cores = 1L, excl = c("multiprocess", "cluster"), ...) { strategies <- future:::supportedStrategies(...) strategies <- setdiff(strategies, excl) if (cores > 1) { strategies <- setdiff(strategies, c("sequential", "uniprocess", "eager", "lazy")) } strategies } availCores <- min(2L, future::availableCores()) future.apply/tests/incl/start.R0000644000176200001440000000007114024036060016250 0ustar liggesuserslibrary("future.apply") source("incl/start,load-only.R") future.apply/tests/incl/end.R0000644000176200001440000000241114024036060015661 0ustar liggesusers## Undo future strategy future::plan(oplan) ## Undo options ## (a) Added added <- setdiff(names(options()), names(oopts0)) opts <- vector("list", length = length(added)) names(opts) <- added options(opts) ## (b) Modified options(oopts) ## (c) Assert that everything was undone stopifnot(all.equal(options(), oopts0)) ## Undo system environment variables ## (a) Added cenvs <- Sys.getenv() added <- setdiff(names(cenvs), names(oenvs0)) for (name in added) Sys.unsetenv(name) ## (b) Missing missing <- setdiff(names(oenvs0), names(cenvs)) if (length(missing) > 0) do.call(Sys.setenv, as.list(oenvs0[missing])) ## (c) Modified? for (name in intersect(names(cenvs), names(oenvs0))) { ## WORKAROUND: On Linux Wine, base::Sys.getenv() may ## return elements with empty names. /HB 2016-10-06 if (nchar(name) == 0) next if (!identical(cenvs[[name]], oenvs0[[name]])) { do.call(Sys.setenv, as.list(oenvs0[name])) } } ## (d) Assert that everything was undone stopifnot(identical(Sys.getenv(), oenvs0)) ## Undo variables rm(list = c(setdiff(ls(), ovars))) ## Travis CI specific: Explicit garbage collection because it ## looks like Travis CI might run out of memory during 'covr' ## testing and we now have so many tests. /HB 2017-01-11 if ("covr" %in% loadedNamespaces()) gc() future.apply/tests/utils.R0000644000176200001440000000557214024036060015341 0ustar liggesuserssource("incl/start,load-only.R") stop_if_not <- future.apply:::stop_if_not message("*** utils ...") message("*** hpaste() ...") # Some vectors x <- 1:6 y <- 10:1 z <- LETTERS[x] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Abbreviation of output vector # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - printf("x = %s.\n", hpaste(x)) ## x = 1, 2, 3, ..., 6. printf("x = %s.\n", hpaste(x, maxHead = 2)) ## x = 1, 2, ..., 6. printf("x = %s.\n", hpaste(x, maxHead = 3)) # Default ## x = 1, 2, 3, ..., 6. # It will never output 1, 2, 3, 4, ..., 6 printf("x = %s.\n", hpaste(x, maxHead = 4)) ## x = 1, 2, 3, 4, 5 and 6. # Showing the tail printf("x = %s.\n", hpaste(x, maxHead = 1, maxTail = 2)) ## x = 1, ..., 5, 6. # Turning off abbreviation printf("y = %s.\n", hpaste(y, maxHead = Inf)) ## y = 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ## ...or simply printf("y = %s.\n", paste(y, collapse = ", ")) ## y = 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 # Change last separator printf("x = %s.\n", hpaste(x, lastCollapse = " and ")) ## x = 1, 2, 3, 4, 5 and 6. # No collapse stopifnot(all(hpaste(x, collapse = NULL) == x)) # Empty input stopifnot(identical(hpaste(character(0)), character(0))) message("*** hpaste() ...") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # debug() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - message("*** mdebug() ...") mdebug("Hello #", 1) mdebugf("Hello #%d", 1) options(future.debug = TRUE) mdebug("Hello #", 2) mdebugf("Hello #%d", 2) options(future.debug = FALSE) mdebug("Hello #", 3) mdebugf("Hello #%d", 3) message("*** mdebug() ... DONE") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # import_from() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - message("*** import_from() ...") obj <- import_from("non-existing-fcn", default = NA, package = "future") stopifnot(identical(obj, NA)) res <- tryCatch({ obj <- import_from("non-existing-fcn", package = "future") }, error = identity) print(res) stopifnot(inherits(res, "simpleError")) message("*** import_from() ... DONE") message("*** stop_if_not() ...") stop_if_not(TRUE) stop_if_not(TRUE, TRUE) res <- tryCatch({ stop_if_not(FALSE) }, error = identity) stopifnot(inherits(res, "simpleError")) res <- tryCatch({ stop_if_not(list(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE)) }, error = identity) stopifnot(inherits(res, "simpleError")) message("*** stop_if_not() ... DONE") message("*** assert_values2() ...") assert_values2 <- future.apply:::assert_values2 assert_values2(nX = 2L, values2 = as.list(1:2)) res <- tryCatch({ assert_values2(nX = 1L, values = as.list(1:2), values2 = as.list(1:2), fcn = "tests", debug = TRUE) }, error = identity) stopifnot(inherits(res, "FutureError")) message("*** assert_values2() ... DONE") message("*** utils ... DONE") source("incl/end.R") future.apply/tests/future_eapply.R0000644000176200001440000000157614024036060017065 0ustar liggesuserssource("incl/start.R") message("*** future_eapply() ...") message("- From example(eapply) ...") for (strategy in supportedStrategies()) { message(sprintf("*** strategy = %s ...", sQuote(strategy))) plan(strategy) env <- new.env(hash = FALSE) env$a <- 1:10 env$beta <- exp(-3:3) env$logic <- c(TRUE, FALSE, FALSE, TRUE) y0 <- unlist(eapply(env, mean, USE.NAMES = FALSE)) y1 <- unlist(future_eapply(env, mean, USE.NAMES = FALSE)) stopifnot(all.equal(y1, y0)) y0 <- eapply(env, quantile, probs = 1:3/4) y1 <- future_eapply(env, quantile, probs = 1:3/4) stopifnot(all.equal(y1, y0)) y0 <- eapply(env, quantile) y1 <- future_eapply(env, quantile) stopifnot(all.equal(y1, y0)) plan(sequential) message(sprintf("*** strategy = %s ... done", sQuote(strategy))) } ## for (strategy in ...) message("*** future_eapply() ... DONE") source("incl/end.R") future.apply/tests/future_mapply.R0000644000176200001440000001123114024036060017062 0ustar liggesuserssource("incl/start.R") message("*** future_mapply() ...") message("- Parallel RNG truth (for later)...") plan(sequential) y_rng_0 <- future_mapply(stats::runif, n = 1:4, max = 2:5, MoreArgs = list(min = 1), future.seed = 0xBEEF) print(y_rng_0) for (strategy in supportedStrategies()) { message(sprintf("*** strategy = %s ...", sQuote(strategy))) plan(strategy) message("- From example(mapply) ...") y0 <- mapply(rep, 1:4, 4:1) y1 <- future_mapply(rep, 1:4, 4:1) stopifnot(identical(y1, y0)) y0 <- mapply(rep, times = 1:4, x = 4:1) y1 <- future_mapply(rep, times = 1:4, x = 4:1) stopifnot(identical(y1, y0)) y0 <- mapply(rep, times = 1:4, MoreArgs = list(x = 42)) y1 <- future_mapply(rep, times = 1:4, MoreArgs = list(x = 42)) stopifnot(identical(y1, y0)) y0 <- mapply(function(x, y) seq_len(x) + y, c(a = 1, b = 2, c = 3), # names from first c(A = 10, B = 0, C = -10)) y1 <- future_mapply(function(x, y) seq_len(x) + y, c(a = 1, b = 2, c = 3), # names from first c(A = 10, B = 0, C = -10)) stopifnot(identical(y1, y0)) message("- future_.mapply()") dots <- list(c(a = 1, b = 2, c = 3), # names from first c(A = 10, B = 0, C = -10)) y2 <- .mapply(function(x, y) seq_len(x) + y, dots = dots, MoreArgs = list()) names(y0) <- NULL ## .mapply() don't set names stopifnot(all.equal(y2, y0)) y3 <- future_.mapply(function(x, y) seq_len(x) + y, dots = dots, MoreArgs = list()) stopifnot(all.equal(y3, y2)) word <- function(C, k) paste(rep.int(C, k), collapse = "") for (chunk.size in list(1L, structure(2L, ordering = "random"), structure(3L, ordering = 5:1))) { y0 <- mapply(word, LETTERS[1:5], 5:1, SIMPLIFY = FALSE) y1 <- future_mapply(word, LETTERS[1:5], 5:1, SIMPLIFY = FALSE, future.chunk.size = chunk.size) stopifnot(identical(y1, y0)) dots <- list(LETTERS[1:5], 5:1) MoreArgs <- list() y2 <- .mapply(word, dots = dots, MoreArgs = list()) names(y0) <- NULL ## .mapply() don't set names stopifnot(all.equal(y2, y0)) y3 <- future_.mapply(word, dots = dots, MoreArgs = list()) stopifnot(all.equal(y3, y2)) } message("- Subsetting (Issue #17) ...") X <- as.Date("2018-06-01") y0 <- mapply(FUN = identity, X, SIMPLIFY = FALSE) y1 <- future_mapply(FUN = identity, X, SIMPLIFY = FALSE) stopifnot(identical(y1, y0)) dots <- list(X) y2 <- .mapply(FUN = identity, dots = dots, MoreArgs = MoreArgs) stopifnot(identical(y2, y0)) y3 <- future_.mapply(FUN = identity, dots = dots, MoreArgs = MoreArgs) stopifnot(identical(y3, y2)) message("- Non-recycling of MoreArgs (Issue #51) ...") y0 <- base::mapply( function(x, y) y, x = 1:2, MoreArgs = list(y = 3:4) ) y1 <- future.apply::future_mapply( function(x, y) y, x = 1:2, MoreArgs = list(y = 3:4), future.seed = FALSE ) stopifnot(identical(y1, y0)) y2 <- future.apply::future_mapply( function(x, y) y, x = 1:2, MoreArgs = list(y = 3:4), future.seed = TRUE ) stopifnot(identical(y2, y0)) dots <- list(x = 1:2) MoreArgs <- list(y = 3:4) y3 <- .mapply(function(x, y) y, dots = dots, MoreArgs = MoreArgs) y4 <- future_.mapply(function(x, y) y, dots = dots, MoreArgs = MoreArgs) stopifnot(identical(y4, y3)) message("- Recycle arguments to same length ...") y0 <- mapply(rep, 1:4, 2:1) y1 <- future_mapply(rep, 1:4, 2:1) stopifnot(identical(y1, y0)) message("- Parallel RNG ...") y_rng_1 <- future_mapply(stats::runif, n = 1:4, max = 2:5, MoreArgs = list(min = 1), future.seed = 0xBEEF) print(y_rng_1) stopifnot(all.equal(y_rng_1, y_rng_0)) message("- future_Map() ...") xs <- replicate(5, stats::runif(10), simplify = FALSE) ws <- replicate(5, stats::rpois(10, 5) + 1, simplify = FALSE) y0 <- Map(weighted.mean, xs, ws) y1 <- future_Map(stats::weighted.mean, xs, ws) stopifnot(all.equal(y1, y0)) plan(sequential) message(sprintf("*** strategy = %s ... done", sQuote(strategy))) } ## for (strategy in ...) message("- Empty input [non parallel] ...") y0 <- mapply(search) y1 <- future_mapply(search) stopifnot(identical(y1, y0)) y0 <- mapply(list, integer(0L)) y1 <- future_mapply(list, integer(0L)) stopifnot(identical(y1, y0)) message("*** future_mapply() - special cases ...") X <- list() names(X) <- character(0L) y <- future_mapply(FUN = identity, X) stopifnot(length(y) == 0L, !is.null(names(y)), identical(y, X)) y <- future_mapply(FUN = identity, X, X) stopifnot(length(y) == 0L, !is.null(names(y)), identical(y, X)) message("*** future_mapply() - special cases ... DONE") message("*** future_mapply() ... DONE") source("incl/end.R") future.apply/tests/globals,tricky_recursive.R0000644000176200001440000000402514024036060021205 0ustar liggesuserssource("incl/start.R") ## Test adopted from http://stackoverflow.com/questions/42561088/nested-do-call-within-a-foreach-dopar-environment-cant-find-function-passed-w options(future.debug = FALSE) message("*** Tricky globals requiring recursive search ...") my_add <- function(a, b) a + b call_my_add <- function(a, b) { do.call(my_add, args = list(a = a, b = b)) } call_my_add_caller <- function(a, b, FUN = call_my_add) { do.call(FUN, args = list(a = a, b = b)) } main_lapply <- function(x = 1:2, caller = call_my_add_caller, args = list(FUN = call_my_add)) { lapply(x, FUN = function(i) { do.call(caller, args = c(list(a = i, b = i+1L), args)) }) } main_lapply_no_FUN <- function(x = 1:2, caller = call_my_add_caller, args = list(FUN = call_my_add)) { lapply(x, FUN = function(i) { do.call(caller, args = list(a = i, b = i+1L)) }) } main_future_lapply <- function(x = 1:2, caller = call_my_add_caller, args = list(FUN = call_my_add)) { future_lapply(x, FUN = function(i) { do.call(caller, args = c(list(a = i, b = i + 1L), args)) }) } main_future_lapply_no_FUN <- function(x = 1:2, caller = call_my_add_caller, args = list(FUN = call_my_add)) { future_lapply(x, FUN = function(i) { do.call(caller, args = list(a = i, b = i + 1L)) }) } x0 <- y0 <- z0 <- NULL for (strategy in supportedStrategies()) { message(sprintf("*** strategy = %s ...", sQuote(strategy))) plan(strategy) z <- main_lapply() str(list(z = z)) if (is.null(z0)) z0 <- z stopifnot(identical(z, z0)) z2 <- main_lapply_no_FUN() str(list(z2 = z2)) stopifnot(identical(z2, z0)) z3 <- main_future_lapply() str(list(z3 = z3)) stopifnot(identical(z3, z0)) z4 <- main_future_lapply_no_FUN() str(list(z4 = z4)) stopifnot(identical(z4, z0)) message(sprintf("*** strategy = %s ... DONE", sQuote(strategy))) } message("*** Tricky globals requiring recursive search ... DONE") source("incl/end.R") future.apply/tests/stdout.R0000644000176200001440000000356314024036060015521 0ustar liggesuserssource("incl/start.R") message("*** future_*apply() and 'future.stdout' ...") options(future.debug = FALSE) truth <- list() out <- utils::capture.output({ y <- future_lapply(1:0, FUN = function(x) { print(x) }) }) truth[["lapply"]] <- list(value = y, stdout = out) out <- utils::capture.output({ y <- future_mapply(1:0, 0:1, FUN = function(x, y) { print(list(x = x, y = y)) }) }) truth[["mapply"]] <- list(value = y, stdout = out) for (cores in 1:availCores) { message(sprintf(" - Testing with %d cores ...", cores)) options(mc.cores = cores) for (strategy in supportedStrategies(cores)) { message(sprintf("* plan('%s') ...", strategy)) plan(strategy) for (fun in c("lapply", "mapply")) { for (stdout in c(FALSE, TRUE, NA)) { message(sprintf("* future_%s(x, ..., future.stdout = %s) ...", fun, stdout)) out <- utils::capture.output({ if (fun == "lapply") { y <- future_lapply(1:0, FUN = function(x) { Sys.sleep(x / 2) print(x) }, future.stdout = stdout) } else if (fun == "mapply") { y <- future_mapply(1:0, 0:1, FUN = function(x, y) { Sys.sleep(x / 2) print(list(x = x, y = y)) }, future.stdout = stdout) } }) stopifnot(identical(y, truth[[fun]]$value)) if (isTRUE(stdout)) { stopifnot(identical(out, truth[[fun]]$stdout)) } else if (is.na(stdout)) { } else { stopifnot(nchar(out) == 0) } message(sprintf("* future_%s(x, ..., future.stdout = %s) ... DONE", fun, stdout)) } ## for (stdout ...) } ## for (fun ...) message(sprintf("* plan('%s') ... DONE", strategy)) } message(sprintf(" - Testing with %d cores ... DONE", cores)) } message("*** future_*apply() and 'future.stdout' ... DONE") source("incl/end.R") future.apply/tests/future_replicate.R0000644000176200001440000000216714024036060017540 0ustar liggesuserssource("incl/start.R") message("*** future_replicate() ...") for (strategy in supportedStrategies()) { message(sprintf("*** strategy = %s ...", sQuote(strategy))) plan(strategy) y0 <- replicate(5L, sample(10L, size = 1L)) y1 <- future_replicate(5L, sample(10L, size = 1L)) stopifnot(length(y0) == length(y1)) set.seed(0xBEEF) y1 <- future_replicate(5L, sample(10L, size = 1L)) set.seed(0xBEEF) y2 <- future_replicate(5L, sample(10L, size = 1L)) stopifnot(all.equal(y2, y1)) y3 <- future_replicate(5L, sample(10L, size = 1L), future.seed = 0xBEEF) y4 <- future_replicate(5L, sample(10L, size = 1L), future.seed = 0xBEEF) stopifnot(all.equal(y4, y3)) message("- example(replicate) ...") foo <- function(x = 1, y = 2) c(x, y) bar0 <- function(n, x) replicate(n, foo(x = x)) y0 <- bar0(5, x = 3) bar1 <- function(n, x) future_replicate(n, foo(x = x)) y1 <- bar1(5, x = 3) stopifnot(all.equal(y1, y0)) plan(sequential) message(sprintf("*** strategy = %s ... done", sQuote(strategy))) } ## for (strategy in ...) message("*** future_replicate() ... DONE") source("incl/end.R") future.apply/tests/future_sapply.R0000644000176200001440000000325714104320770017104 0ustar liggesuserssource("incl/start.R") message("*** future_sapply() ...") xs <- list( A = c(a = 1, b = 2, c = 3), B = c(a = 1:2, b = 2:3, c = 3:4), C = letters[1:3], D = structure(10 * 5:8, names = LETTERS[1:4]) ) FUNS <- list( a = identity, b = as.matrix, c = function(x, y = 2 * 1:5) outer(rep(x, length.out = 3L), y) ) for (strategy in supportedStrategies()) { message(sprintf("*** strategy = %s ...", sQuote(strategy))) plan(strategy) for (x in xs) { FUNS_x <- FUNS if (!is.numeric(x)) FUNS_x[["c"]] <- NULL for (USE.NAMES in list(FALSE, TRUE)) { for (simplify in list(FALSE, TRUE, "array")) { for (FUN in FUNS_x) { y0 <- sapply(x, FUN = FUN, USE.NAMES = USE.NAMES, simplify = simplify) y1 <- future_sapply(x, FUN = FUN, USE.NAMES = USE.NAMES, simplify = simplify) str(list(y0 = y0, y1 = y1)) stopifnot(identical(y1, y0)) if (identical(simplify, FALSE)) { y2 <- lapply(x, FUN = FUN) str(list(y0 = y0, y2 = y2)) stopifnot(identical(unname(y2), unname(y0))) } } } } } ## https://github.com/HenrikBengtsson/future.apply/issues/61 compute <- function(a, x_vec) a + x_vec call_compute <- function(..., x_vec = 1:2){ compute_with_dots <- function(x) compute(..., x_vec = x) future_sapply(x_vec, FUN = compute_with_dots) } y <- call_compute(0L) print(y) stopifnot(identical(y, 1:2)) plan(sequential) message(sprintf("*** strategy = %s ... done", sQuote(strategy))) } ## for (strategy in ...) message("*** future_sapply() ... DONE") source("incl/end.R") future.apply/tests/future_apply.R0000644000176200001440000000746114024036060016717 0ustar liggesuserssource("incl/start.R") message("*** future_apply() ...") z0 <- NULL for (strategy in supportedStrategies()) { message(sprintf("*** strategy = %s ...", sQuote(strategy))) plan(strategy) message("- From example(apply) ...") X <- matrix(c(1:4, 1, 6:8), nrow = 2L) Y0 <- apply(X, MARGIN = 1L, FUN = table) Y1 <- future_apply(X, MARGIN = 1L, FUN = table) print(Y1) stopifnot(all.equal(Y1, Y0, check.attributes = FALSE)) ## FIXME Y0 <- apply(X, MARGIN = 1L, FUN = stats::quantile) Y1 <- future_apply(X, MARGIN = 1L, FUN = stats::quantile) print(Y1) stopifnot(all.equal(Y1, Y0)) x <- cbind(x1 = 3, x2 = c(4:1, 2:5)) names(dimnames(x)) <- c("row", "col") x3 <- array(x, dim = c(dim(x), 3), dimnames = c(dimnames(x), list(C = paste0("cop.", 1:3)))) y0 <- apply(x, MARGIN = 2L, FUN = identity) stopifnot(identical(y0, x)) y1 <- future_apply(x, MARGIN = 2L, FUN = identity) print(y1) stopifnot(identical(y1, y0)) y0 <- apply(x3, MARGIN = 2:3, FUN = identity) stopifnot(identical(y0, x3)) y1 <- future_apply(x3, MARGIN = 2:3, FUN = identity) print(y1) stopifnot(identical(y1, y0)) z <- array(1:24, dim = 2:4) y0 <- apply(z, MARGIN = 1:2, FUN = function(x) seq_len(max(x))) y1 <- future_apply(z, MARGIN = 1:2, FUN = function(x) seq_len(max(x))) print(y1) stopifnot(identical(y1, y0)) message("- apply(X, MARGIN = , ...) ...") X <- matrix(1:2, nrow = 2L, ncol = 1L, dimnames = list(rows = c("a", "b"))) y0 <- apply(X, MARGIN = "rows", FUN = identity) y1 <- future_apply(X, MARGIN = "rows", FUN = identity) print(y1) stopifnot(identical(y1, y0)) message("- apply(X, ...) - dim(X) > 2 ...") X <- array(1:12, dim = c(2, 2, 3)) y0 <- apply(X, MARGIN = 1L, FUN = identity) y1 <- future_apply(X, MARGIN = 1L, FUN = identity) print(y1) stopifnot(identical(y1, y0)) message("- apply(X, ...) - not all same names ...") FUN <- function(x) { if (x[1] == 1L) names(x) <- letters[seq_along(x)] x } X <- matrix(1:4, nrow = 2L, ncol = 2L) y0 <- apply(X, MARGIN = 1L, FUN = FUN) y1 <- future_apply(X, MARGIN = 1L, FUN = FUN) print(y1) stopifnot(identical(y1, y0)) message("- example(future_apply) - reproducible RNG ...") z1 <- future_apply(X, MARGIN = 1L, FUN = sample, future.seed = 0xBEEF, ## Test also all other 'future.*' arguments future.stdout = TRUE, future.conditions = NULL, future.globals = TRUE, future.packages = NULL, future.lazy = FALSE, future.scheduling = 1.0, future.chunk.size = NULL, future.label = "future_apply-%d" ) print(z1) if (is.null(z0)) { z0 <- z1 } else { stopifnot(identical(z1, z0)) } plan(sequential) message(sprintf("*** strategy = %s ... done", sQuote(strategy))) } ## for (strategy in ...) message("*** apply(X, ...) - prod(dim(X)) == 0 [non-parallel] ...") X <- matrix(nrow = 0L, ncol = 2L) y0 <- apply(X, MARGIN = 1L, FUN = identity) y1 <- future_apply(X, MARGIN = 1L, FUN = identity) print(y1) stopifnot(identical(y1, y0)) message("*** exceptions ...") ## Error: dim(X) must have a positive length res <- tryCatch({ y <- future_apply(1L, MARGIN = 1L, FUN = identity) }, error = identity) stopifnot(inherits(res, "error")) ## Error: 'X' must have named dimnames X <- matrix(1:2, nrow = 2L, ncol = 1L) res <- tryCatch({ y <- future_apply(X, MARGIN = "rows", FUN = identity) }, error = identity) stopifnot(inherits(res, "error")) ## Error: not all elements of 'MARGIN' are names of dimensions X <- matrix(1:2, nrow = 2L, ncol = 1L, dimnames = list(rows = c("a", "b"))) res <- tryCatch({ y <- future_apply(X, MARGIN = "cols", FUN = identity) }, error = identity) stopifnot(inherits(res, "error")) message("*** future_apply() ... DONE") source("incl/end.R") future.apply/tests/future_lapply,RNG.R0000644000176200001440000001337514024036060017517 0ustar liggesuserssource("incl/start.R") message("*** future_lapply() and RNGs ...") options(future.debug = FALSE) message("* future_lapply(x, ..., future.seed = ) ...") res <- tryCatch({ y <- future_lapply(1:3, FUN = identity, future.seed = as.list(1:2)) }, error = identity) print(res) stopifnot(inherits(res, "simpleError")) res <- tryCatch({ y <- future_lapply(1:3, FUN = identity, future.seed = list(1, 2, 3:4)) }, error = identity) print(res) stopifnot(inherits(res, "simpleError")) res <- tryCatch({ y <- future_lapply(1:3, FUN = identity, future.seed = as.list(1:3)) }, error = identity) print(res) stopifnot(inherits(res, "simpleError")) seeds <- lapply(1:3, FUN = as_lecyer_cmrg_seed) res <- tryCatch({ y <- future_lapply(1:3, FUN = identity, future.seed = lapply(seeds, FUN = as.numeric)) }, error = identity) print(res) stopifnot(inherits(res, "simpleError")) seeds[[1]][1] <- seeds[[1]][1] + 1L res <- tryCatch({ y <- future_lapply(1:3, FUN = identity, future.seed = seeds) }, error = identity) print(res) stopifnot(inherits(res, "simpleError")) message("* future_lapply(x, ..., future.seed = ) ... DONE") ## Iterate of the same set in all tests x <- 1:5 message("* future_lapply(x, ..., future.seed = FALSE) ...") y0 <- y0_nested <- seed00 <- NULL for (cores in 1:availCores) { message(sprintf(" - Testing with %d cores ...", cores)) options(mc.cores = cores) for (strategy in supportedStrategies(cores)) { message(sprintf("* plan('%s') ...", strategy)) plan(strategy) set.seed(0xBEEF) seed0 <- get_random_seed() y <- future_lapply(x, FUN = function(i) i, future.seed = FALSE) y <- unlist(y) seed <- get_random_seed() if (is.null(y0)) { y0 <- y seed00 <- seed } str(list(y = y)) stopifnot(identical(seed, seed0), identical(seed, seed00)) ## NOTE: We cannot guarantee the same random numbers, because ## future.seed = FALSE. message(sprintf("* plan('%s') ... DONE", strategy)) } ## for (strategy ...) message(sprintf(" - Testing with %d cores ... DONE", cores)) } ## for (core ...) message("* future_lapply(x, ..., future.seed = FALSE) ... DONE") seed_sets <- list( A = TRUE, ## B = NA, C = 42L, D = as_lecyer_cmrg_seed(42L), E = list(), F = vector("list", length = length(x)), G = NULL ) ## Generate sequence of seeds of the current RNGkind() ## NOTE: This is NOT a good way to generate random seeds!!! seeds <- lapply(seq_along(x), FUN = function(i) { set.seed(i) globalenv()$.Random.seed }) seed_sets$E <- seeds ## Generate sequence of L'Ecyer CMRG seeds seeds <- seed_sets$F seeds[[1]] <- seed_sets$D for (kk in 2:length(x)) seeds[[kk]] <- parallel::nextRNGStream(seeds[[kk - 1]]) seed_sets$F <- seeds seed_sets$G <- seed_sets$A rm(list = "seeds") for (name in names(seed_sets)) { future.seed <- seed_sets[[name]] if (is.list(future.seed)) { label <- sprintf("", length(future.seed), length(future.seed[[1]])) } else { label <- hpaste(future.seed) } message(sprintf("* future_lapply(x, ..., future.seed = %s) ...", label)) set.seed(0xBEEF) y0 <- seed00 <- NULL for (cores in 1:availCores) { message(sprintf(" - Testing with %d cores ...", cores)) options(mc.cores = cores) for (strategy in supportedStrategies(cores)) { message(sprintf("* plan('%s') ...", strategy)) plan(strategy) set.seed(0xBEEF) seed0 <- get_random_seed() y <- future_lapply(x, FUN = function(i) { rnorm(1L) }, future.seed = future.seed) y <- unlist(y) seed <- get_random_seed() if (is.null(y0)) { y0 <- y seed00 <- seed } str(list(y = y)) stopifnot(!identical(seed, seed0), identical(seed, seed00), identical(y, y0)) ## RNG-based results should also be identical regardless of ## load-balance scheduling. for (scheduling in list(FALSE, TRUE, 0, 0.5, 2.0, Inf)) { set.seed(0xBEEF) seed0 <- get_random_seed() y <- future_lapply(x, FUN = function(i) { rnorm(1L) }, future.seed = future.seed, future.scheduling = scheduling) seed <- get_random_seed() y <- unlist(y) str(list(y = y)) stopifnot(!identical(seed, seed0), identical(seed, seed00), identical(y, y0)) } ## Nested future_lapply():s for (scheduling in list(FALSE, TRUE)) { y <- future_lapply(x, FUN = function(i) { .seed <- globalenv()$.Random.seed z <- future_lapply(1:3, FUN = function(j) { list(j = j, seed = globalenv()$.Random.seed) }, future.seed = .seed) ## Assert that all future seeds are unique seeds <- lapply(z, FUN = function(x) x$seed) for (kk in 2:length(seeds)) stopifnot(!all(seeds[[kk]] == seeds[[1]])) list(i = i, seed = .seed, sample = rnorm(1L), z = z) }, future.seed = 42L, future.scheduling = scheduling) if (is.null(y0_nested)) y0_nested <- y str(list(y = y)) ## Assert that all future seeds (also nested ones) are unique seeds <- Reduce(c, lapply(y, FUN = function(x) { c(list(seed = x$seed), lapply(x$z, FUN = function(x) x$seed)) })) for (kk in 2:length(seeds)) stopifnot(!all(seeds[[kk]] == seeds[[1]])) stopifnot(identical(y, y0_nested)) } message(sprintf("* plan('%s') ... DONE", strategy)) } ## for (strategy ...) message(sprintf(" - Testing with %d cores ... DONE", cores)) } ## for (cores ...) message(sprintf("* future_lapply(x, ..., future.seed = %s) ... DONE", label)) } ## for (name ...) message("*** future_lapply() and RNGs ... DONE") source("incl/end.R") future.apply/tests/rng.R0000644000176200001440000000406014024036060014756 0ustar liggesuserssource("incl/start,load-only.R") message("*** RNG ...") set_random_seed(seed = NULL) seed <- get_random_seed() stopifnot(is.null(seed)) set_random_seed(seed = 42L) seed <- get_random_seed() stopifnot(identical(seed, 42L)) res <- tryCatch({ seed <- as_lecyer_cmrg_seed(seed = FALSE) }, error = identity) print(res) stopifnot(inherits(res, "simpleError")) seed <- as_lecyer_cmrg_seed(seed = 42L) str(seed) set_random_seed(seed = seed) stopifnot(identical(get_random_seed(), seed)) seed2 <- as_lecyer_cmrg_seed(seed = TRUE) str(seed2) stopifnot(identical(seed2, seed)) seed3 <- as_lecyer_cmrg_seed(seed = seed) str(seed3) stopifnot(identical(seed3, seed)) message(" - make_rng_seeds ...") seeds <- make_rng_seeds(2L, seed = FALSE) stopifnot(is.null(seeds)) seeds <- make_rng_seeds(0L, seed = 42L, debug = TRUE) stopifnot(length(seeds) == 0L, identical(seeds, list())) seeds <- make_rng_seeds(2L, seed = TRUE) stopifnot(length(seeds) == 2L, all(sapply(seeds, FUN = is_lecyer_cmrg_seed))) seeds <- make_rng_seeds(3L, seed = 42L) stopifnot(length(seeds) == 3L, all(sapply(seeds, FUN = is_lecyer_cmrg_seed))) seeds <- make_rng_seeds(1L, seed = 42L, debug = TRUE) stopifnot(length(seeds) == 1L, all(sapply(seeds, FUN = is_lecyer_cmrg_seed))) seeds0 <- lapply(1:3, FUN = as_lecyer_cmrg_seed) seeds <- make_rng_seeds(length(seeds0), seed = seeds0, debug = TRUE) stopifnot(length(seeds) == length(seeds0), all(sapply(seeds, FUN = is_lecyer_cmrg_seed))) message(" - exceptions ...") ## Invalid L'Ecuyer seed seed_invalid <- seed + 1L res <- tryCatch({ seed <- as_lecyer_cmrg_seed(seed = seed_invalid) }, error = identity) print(res) stopifnot(inherits(res, "simpleError")) ## Invalid seed res <- tryCatch({ seed <- as_lecyer_cmrg_seed(seed = 1:2) }, error = identity) print(res) stopifnot(inherits(res, "simpleError")) ## Invalid length seeds0 <- lapply(1:2, FUN = as_lecyer_cmrg_seed) res <- tryCatch({ seeds <- make_rng_seeds(1L, seed = seeds0) }, error = identity) print(res) stopifnot(inherits(res, "simpleError")) message("*** RNG ... DONE") source("incl/end.R") future.apply/tests/future_lapply.R0000644000176200001440000000710014024036060017061 0ustar liggesuserssource("incl/start.R") library("listenv") message("*** future_lapply() ...") x_a <- list(a = "integer", b = "numeric", c = "character", c = "list") str(list(x_a = x_a)) y_a <- lapply(x_a, FUN = base::vector, length = 2L) str(list(y_a = y_a)) x_b <- list(a = c("hello", b = 1:100)) str(list(x_b = x_b)) y_b <- lapply(x_b, FUN = future:::hpaste, collapse = "; ", maxHead = 3L) str(list(y_b = y_b)) x_c <- list() y_c <- listenv() y_c$A <- 3L x_c$a <- y_c y_c<- listenv() y_c$A <- 3L y_c$B <- c("hello", b = 1:100) x_c$b <- y_c print(x_c) y_c <- lapply(x_c, FUN = listenv::map) str(list(y_c = y_c)) for (cores in 1:availCores) { message(sprintf("Testing with %d cores ...", cores)) options(mc.cores = cores) strategies <- supportedStrategies(cores) for (strategy in supportedStrategies()) { message(sprintf("- plan('%s') ...", strategy)) plan(strategy) for (scheduling in list(FALSE, TRUE, structure(TRUE, ordering = "random"), structure(TRUE, ordering = function(n) rev(seq_len(n))))) { message("- future_lapply(x, FUN = vector, ...) ...") y <- future_lapply(x_a, FUN = vector, length = 2L, future.scheduling = scheduling) str(list(y = y)) stopifnot(identical(y, y_a)) message("- future_lapply(x, FUN = base::vector, ...) ...") y <- future_lapply(x_a, FUN = base::vector, length = 2L, future.scheduling = scheduling) str(list(y = y)) stopifnot(identical(y, y_a)) message("- future_lapply(x, FUN = future:::hpaste, ...) ...") y <- future_lapply(x_b, FUN = future:::hpaste, collapse = "; ", maxHead = 3L, future.scheduling = scheduling) str(list(y = y)) stopifnot(identical(y, y_b)) message("- future_lapply(x, FUN = listenv::listenv, ...) ...") y <- future_lapply(x_c, FUN = listenv::map, future.scheduling = scheduling) str(list(y = y)) stopifnot(identical(y, y_c)) } ## for (scheduling ...) message("- future_lapply(x, FUN, ...) for large length(x) ...") a <- 3.14 x_d <- 1:1e4 y <- future_lapply(x_d, FUN = function(z) sqrt(z + a)) y <- unlist(y, use.names = FALSE) stopifnot(all.equal(y, sqrt(x_d + a))) message("- future_lapply(x, FUN = table, ...) ...") x <- list(a = 1:4, b = 5:8) y0 <- lapply(x, FUN = table) y1 <- future_lapply(x, FUN = table) stopifnot(all.equal(y1, y0, check.attributes = FALSE)) ## FIXME message("- future_lapply(x, ...) where length(x) != length(as.list(x)) ...") x <- structure(list(a = 1, b = 2), class = "Foo") as.list.Foo <- function(x, ...) c(x, c = 3) y0 <- lapply(x, FUN = length) y1 <- future_lapply(x, FUN = length) stopifnot(identical(y1, y0)) } ## for (strategy ...) message(sprintf("Testing with %d cores ... DONE", cores)) } ## for (cores ...) message("*** future_lapply() - special cases ...") X <- list() names(X) <- character(0L) y <- future_lapply(X, FUN = identity) stopifnot(length(y) == 0L, !is.null(names(y)), identical(y, X)) X <- character(0L) y0 <- lapply(X, FUN = identity) y <- future_lapply(X, FUN = identity) stopifnot(identical(y, y0)) X <- character(0L) names(X) <- character(0L) y0 <- lapply(X, FUN = identity) y <- future_lapply(X, FUN = identity) stopifnot(identical(y, y0)) message("*** future_lapply() - special cases ... DONE") message("*** future_lapply() - exceptions ...") res <- tryCatch({ future_lapply(1:3, FUN = identity, future.chunk.size = structure(1L, ordering = "invalid")) }, error = identity) stopifnot(inherits(res, "error")) message("*** future_lapply() - exceptions ... DONE") message("*** future_lapply() ... DONE") source("incl/end.R") future.apply/tests/future_vapply.R0000644000176200001440000001156114024036060017101 0ustar liggesuserssource("incl/start.R") message("*** future_vapply() ...") for (strategy in supportedStrategies()) { message(sprintf("*** strategy = %s ...", sQuote(strategy))) plan(strategy) x <- NULL fun <- is.factor fun_value <- logical(1L) y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y0) y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y1) stopifnot(all.equal(y1, y0)) x <- list() fun <- is.numeric fun_value <- logical(1L) y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y0) y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y1) stopifnot(all.equal(y1, y0)) x <- integer() fun <- identity fun_value <- fun(integer(1L)) y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y0) y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y1) stopifnot(all.equal(y1, y0)) df <- data.frame(x = 1:10, y = letters[1:10], stringsAsFactors=FALSE) fun <- class fun_value <- character(1L) y0 <- vapply(df, FUN = fun, FUN.VALUE = fun_value) str(y0) y1 <- future_vapply(df, FUN = fun, FUN.VALUE = fun_value) str(y1) stopifnot(all.equal(y1, y0)) x <- 1:10 fun <- function(x) double(0L) fun_value <- fun(double(1L)) y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y0) y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y1) stopifnot(all.equal(y1, y0)) fun <- function(x) integer(0L) fun_value <- fun(double(1L)) y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y0) y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y1) stopifnot(all.equal(y1, y0)) fun <- sqrt fun_value <- fun(double(1L)) y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y0) y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y1) stopifnot(all.equal(y1, y0)) fun <- function(x) c(x, x^2) fun_value <- fun(double(1L)) y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y0) y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y1) stopifnot(all.equal(y1, y0)) fun <- function(x) matrix(x, nrow = 2L, ncol = 2L) fun_value <- fun(integer(1L)) y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y0) y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y1) stopifnot(all.equal(y1, y0)) fun <- function(x) matrix(x, nrow = 2L, ncol = 2L) fun_value <- fun(double(1L)) y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y0) y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y1) stopifnot(all.equal(y1, y0)) ## Ditto with dimnames on FUN.VALUE fun <- function(x) { matrix(x, nrow = 2L, ncol = 2L, dimnames = list(c("a", "b"), c("A", "B"))) } fun_value <- fun(double(1L)) y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y0) y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value) str(y1) stopifnot(all.equal(y1, y0)) message("- From example(vapply) ...") x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE, FALSE, FALSE, TRUE)) y0 <- vapply(x, FUN = quantile, FUN.VALUE = double(5L)) y1 <- future_vapply(x, FUN = quantile, FUN.VALUE = double(5L)) str(y1) stopifnot(all.equal(y1, y0)) i39 <- sapply(3:9, seq) ys0 <- sapply(i39, fivenum) ys1 <- future_sapply(i39, fivenum) stopifnot(all.equal(ys1, ys0)) yv0 <- vapply(i39, fivenum, c(Min. = 0, "1st Qu." = 0, Median = 0, "3rd Qu." = 0, Max. = 0)) yv1 <- future_vapply(i39, fivenum, c(Min. = 0, "1st Qu." = 0, Median = 0, "3rd Qu." = 0, Max. = 0)) str(yv1) stopifnot(all.equal(yv1, yv0)) v <- structure(10*(5:8), names = LETTERS[1:4]) f <- function(x, y) outer(rep(x, length.out = 3L), y) ys0 <- sapply(v, f, y = 2*(1:5), simplify = "array") ys1 <- future_sapply(v, f, y = 2*(1:5), simplify = "array") stopifnot(all.equal(ys1, ys0)) fv <- outer(1:3, 1:5) y <- 2*(1:5) yv0 <- vapply(v, f, fv, y = y) yv1 <- future_vapply(v, f, fv, y = y) str(yv1) stopifnot(all.equal(yv1, yv0)) y0 <- vapply(mtcars, FUN = is.numeric, FUN.VALUE = logical(1L)) y1 <- future_vapply(mtcars, FUN = is.numeric, FUN.VALUE = logical(1L)) str(y1) stopifnot(all.equal(y1, y0)) message("- future_vapply(x, ...) where length(x) != length(as.list(x)) ...") x <- structure(list(a = 1, b = 2), class = "Foo") as.list.Foo <- function(x, ...) c(x, c = 3) y0 <- vapply(x, FUN = length, FUN.VALUE = -1L) y1 <- future_vapply(x, FUN = length, FUN.VALUE = -1L) stopifnot(identical(y1, y0)) message("- exceptions ...") res <- tryCatch({ y0 <- vapply(1:3, FUN = identity, FUN.VALUE = c(3, 3)) }, error = identity) stopifnot(inherits(res, "error")) res <- tryCatch({ y1 <- future_vapply(1:3, FUN = identity, FUN.VALUE = c(3, 3)) }, error = identity) stopifnot(inherits(res, "error")) plan(sequential) message(sprintf("*** strategy = %s ... done", sQuote(strategy))) } ## for (strategy in ...) message("*** future_vapply() ... DONE") source("incl/end.R") future.apply/tests/future_by.R0000644000176200001440000000526714024036060016206 0ustar liggesuserssource("incl/start.R") library("listenv") all_equal_but_call <- function(target, current, ...) { attr(target, "call") <- NULL attr(current, "call") <- NULL all.equal(target = target, current = current, ...) } message("*** future_by() ...") ## --------------------------------------------------------- ## by() ## --------------------------------------------------------- if (require("datasets") && require("stats")) { ## warpbreaks & lm() ## Use a local variable to test that it is properly exported, because ## 'warpbreaks' is available in all R sessions data <- warpbreaks y0 <- by(data[, 1:2], INDICES = data[,"tension"], FUN = summary) y1 <- by(data[, 1], INDICES = data[, -1], FUN = summary, digits = 2L) y2 <- by(data, INDICES = data[,"tension"], FUN = function(x, ...) { lm(breaks ~ wool, data = x, ...) }, singular.ok = FALSE) ## now suppose we want to extract the coefficients by group tmp <- with(data, by(data, INDICES = tension, FUN = function(x) { lm(breaks ~ wool, data = x) })) y3 <- sapply(tmp, coef) ## Source: {r-source}/tests/reg-tests-1d.R by2 <- function(data, INDICES, FUN) { by(data, INDICES = INDICES, FUN = FUN) } future_by2 <- function(data, INDICES, FUN) { future_by(data, INDICES = INDICES, FUN = FUN) } y4 <- by2(data, INDICES = data[,"tension"], FUN = summary) for (cores in 1:availCores) { message(sprintf("Testing with %d cores ...", cores)) options(mc.cores = cores) strategies <- supportedStrategies(cores) for (strategy in supportedStrategies()) { message(sprintf("- plan('%s') ...", strategy)) plan(strategy) y0f <- future_by(data[, 1:2], INDICES = data[,"tension"], FUN = summary) stopifnot(all_equal_but_call(y0f, y0, check.attributes = FALSE)) y1f <- future_by(data[, 1], INDICES = data[, -1], FUN = summary, digits = 2L) stopifnot(all_equal_but_call(y1f, y1)) y2f <- future_by(data, INDICES = data[,"tension"], FUN = function(x, ...) { lm(breaks ~ wool, data = x, ...) }, singular.ok = FALSE) stopifnot(all_equal_but_call(y2f, y2)) ## now suppose we want to extract the coefficients by group tmp <- with(data, future_by(data, INDICES = tension, FUN = function(x) { lm(breaks ~ wool, data = x) })) y3f <- sapply(tmp, coef) stopifnot(all_equal_but_call(y3f, y3)) y4f <- future_by2(data, INDICES = data[,"tension"], FUN = summary) stopifnot(all_equal_but_call(y4f, y4)) } ## for (strategy ...) message(sprintf("Testing with %d cores ... DONE", cores)) } ## for (cores ...) } ## if (require("stats")) message("*** future_by() ... DONE") source("incl/end.R") future.apply/tests/future_lapply,globals.R0000644000176200001440000001763714104320162020517 0ustar liggesuserssource("incl/start.R") library("tools") ## toTitleCase() options(future.debug = FALSE) options(future.apply.debug = TRUE) message("*** future_lapply() - globals ...") plan(cluster, workers = "localhost") a <- 1 b <- 2 globals_set <- list( A = FALSE, B = TRUE, C = c("a", "b"), D = list(a = 2, b = 3) ) x <- list(1) y_truth <- list(A = NULL, B = list(1), C = list(1), D = list(2)) str(y_truth) for (name in names(globals_set)) { globals <- globals_set[[name]] message("Globals set ", sQuote(name)) y <- tryCatch({ future_lapply(x, FUN = function(x) { median(c(x, a, b)) }, future.globals = globals, future.packages = "utils") }, error = identity) print(y) stopifnot((name == "A" && inherits(y, "error")) || identical(y, y_truth[[name]])) } message("*** future_lapply() - globals ... DONE") message("*** future_lapply() - manual globals ...") d <- 42 y <- future_lapply(1:2, FUN = function(x) { x * d }, future.globals = structure(FALSE, add = "d")) stopifnot(identical(y, list(42, 84))) e <- 42 res <- tryCatch({ future_lapply(1:2, FUN = function(x) { 2 * e }, future.globals = structure(TRUE, ignore = "e")) }, error = identity) stopifnot(inherits(res, "error")) message("*** future_lapply() - manual globals ... DONE") ## Test adopted from http://stackoverflow.com/questions/42561088/nested-do-call-within-a-foreach-dopar-environment-cant-find-function-passed-w message("*** future_lapply() - tricky globals ...") my_add <- function(a, b) a + b call_my_add <- function(a, b) { do.call(my_add, args = list(a = a, b = b)) } call_my_add_caller <- function(a, b, FUN = call_my_add) { do.call(FUN, args = list(a = a, b = b)) } main <- function(x = 1:2, caller = call_my_add_caller, args = list(FUN = call_my_add)) { results <- future_lapply(x, FUN = function(i) { do.call(caller, args = c(list(a = i, b = i + 1L), args)) }) results } x <- list(list(1:2)) z_length <- lapply(x, FUN = do.call, what = length) fun <- function(...) sum(...) z_fun <- lapply(x, FUN = do.call, what = fun) y0 <- NULL for (strategy in supportedStrategies()) { plan(strategy) y <- main(1:3) if (is.null(y0)) y0 <- y stopifnot(identical(y, y0)) message("- future_lapply(x, FUN = do.call, ...) ...") z <- future_lapply(x, FUN = do.call, what = length) stopifnot(identical(z, z_length)) z <- future_lapply(x, FUN = do.call, what = fun) stopifnot(identical(z, z_fun)) message("- future_lapply(x, ...) - passing arguments via '...' ...") ## typeof() == "list" obj <- data.frame(a = 1:2) stopifnot(typeof(obj) == "list") y <- future_lapply(1L, function(a, b) typeof(b), b = obj) stopifnot(identical(y[[1]], typeof(obj))) ## typeof() == "environment" obj <- new.env() stopifnot(typeof(obj) == "environment") y <- future_lapply(1L, function(a, b) typeof(b), b = obj) stopifnot(identical(y[[1]], typeof(obj))) ## typeof() == "S4" if (requireNamespace("methods")) { obj <- methods::getClass("MethodDefinition") stopifnot(typeof(obj) == "S4") y <- future_lapply(1L, function(a, b) typeof(b), b = obj) stopifnot(identical(y[[1]], typeof(obj))) } message("- future_lapply(X, ...) - 'X' containing globals ...") ## From https://github.com/HenrikBengtsson/future.apply/issues/12 a <- 42 b <- 21 X <- list( function(b) 2 * a, function() b / 2, function() a + b, function() nchar(toTitleCase("hello world")) ) z0 <- lapply(X, FUN = function(f) f()) str(z0) z1 <- future_lapply(X, FUN = function(f) f()) str(z1) stopifnot(identical(z1, z0)) message("- future_lapply(x, ...) - passing '...' as a global ...") ## https://github.com/HenrikBengtsson/future/issues/417 fcn0 <- function(...) { lapply(1, FUN = function(x) list(...)) } z0 <- fcn0(a = 1) str(list(z0 = z0)) stopifnot(identical(z0, list(list(a = 1)))) fcn <- function(...) { future_lapply(1, FUN = function(x) list(...)) } z1 <- fcn(a = 1) str(list(z1 = z1)) stopifnot(identical(z1, z0)) ## https://github.com/HenrikBengtsson/future.apply/issues/47 message("- future_lapply(X, ...) - '{ a <- a + 1; a }' ...") a <- 1 z0 <- lapply(1, function(ii) { a <- a + 1 a }) z1 <- future_lapply(1, function(ii) { a <- a + 1 a }) stopifnot(identical(z1, z0)) ## https://github.com/HenrikBengtsson/future.apply/issues/47 message("- future_lapply(X, ...) - '{ a; a <- a + 1 }' ...") z2 <- tryCatch(future_lapply(1, function(ii) { a a <- a + 1 }), error = identity) stopifnot(identical(z2, z0)) ## https://github.com/HenrikBengtsson/future.apply/issues/85 message("- future_lapply(..., future.globals = ) ...") a <- 0 y <- future_lapply(1, FUN = function(x) a, future.globals = list(a = 42)) str(y) if (packageVersion("future") <= "1.21.0" && strategy %in% c("sequential", "multicore")) { stopifnot(y[[1]] == 0) } else { stopifnot(y[[1]] == 42) } } ## for (strategy ...) message("*** future_lapply() - tricky globals ... DONE") message("*** future_lapply() - missing arguments ...") ## Here 'abc' becomes missing, i.e. missing(abc) is TRUE foo <- function(x, abc) future_lapply(x, FUN = function(y) y) y <- foo(1:2) stopifnot(identical(y, as.list(1:2))) message("*** future_lapply() - missing arguments ... DONE") message("*** future_lapply() - false positives ...") ## Here 'abc' becomes a promise, which fails to resolve ## iff 'xyz' does not exist. (Issue #161) suppressWarnings(rm(list = "xyz")) foo <- function(x, abc) future_lapply(x, FUN = function(y) y) y <- foo(1:2, abc = (xyz >= 3.14)) stopifnot(identical(y, as.list(1:2))) message("*** future_lapply() - false positives ... DONE") message("*** future_lapply() - too large ...") oMaxSize <- getOption("future.globals.maxSize") X <- replicate(10L, 1:100, simplify = FALSE) FUN <- function(x) { getOption("future.globals.maxSize") } y0 <- lapply(X, FUN = FUN) stopifnot(all(sapply(y0, FUN = identical, oMaxSize))) sizes <- unclass(c(FUN = object.size(FUN), X = object.size(X))) cat(sprintf("Baseline size of globals: %.2f KiB\n", sizes[["FUN"]] / 1024)) message("- true positive ...") options(future.globals.maxSize = 1L) res <- tryCatch({ y <- future_lapply(X, FUN = FUN) }, error = identity) stopifnot(inherits(res, "error")) res <- NULL options(future.globals.maxSize = oMaxSize) maxSize <- getOption("future.globals.maxSize") y <- future_lapply(X, FUN = FUN) str(y) stopifnot(all(sapply(y, FUN = identical, oMaxSize))) message("- approximately invariant to chunk size ...") maxSize <- sizes[["FUN"]] + sizes[["X"]] / length(X) options(future.globals.maxSize = maxSize) for (chunk.size in c(1L, 2L, 5L, 10L)) { y <- future_lapply(X, FUN = FUN, future.chunk.size = chunk.size) str(y) stopifnot(all(unlist(y) == maxSize)) cat(sprintf("maxSize = %g bytes\nfuture.globals.maxSize = %g bytes\n", maxSize, getOption("future.globals.maxSize"))) stopifnot(getOption("future.globals.maxSize") == maxSize) } y <- NULL options(future.globals.maxSize = oMaxSize) message("*** future_lapply() - too large ... DONE") message("*** future_lapply() - globals exceptions ...") res <- tryCatch({ y <- future_lapply(1, FUN = function(x) x, future.globals = 42) }, error = identity) stopifnot(inherits(res, "error")) res <- tryCatch({ y <- future_lapply(1, FUN = function(x) x, future.globals = list(1)) }, error = identity) stopifnot(inherits(res, "error")) res <- tryCatch({ y <- future_lapply(1, FUN = function(x) x, future.globals = "...future.FUN") }, error = identity) stopifnot(inherits(res, "error")) res <- tryCatch({ y <- future_lapply(1, FUN = function(x) x, future.globals = "...future.FUN") }, error = identity) stopifnot(inherits(res, "error")) ...future.elements_ii <- 42L X <- list(function() 2 * ...future.elements_ii) res <- tryCatch({ y <- future_lapply(X, FUN = function(f) f()) }, error = identity) stopifnot(inherits(res, "error")) message("*** future_lapply() - globals exceptions ... DONE") source("incl/end.R") future.apply/vignettes/0000755000176200001440000000000014104466760014730 5ustar liggesusersfuture.apply/vignettes/future.apply-1-overview.md.rsp0000644000176200001440000001536714104216357022524 0ustar liggesusers<%@meta language="R-vignette" content="-------------------------------- %\VignetteIndexEntry{A Future for R: Apply Function to Elements in Parallel} %\VignetteAuthor{Henrik Bengtsson} %\VignetteKeyword{R} %\VignetteKeyword{package} %\VignetteKeyword{vignette} %\VignetteKeyword{future} %\VignetteKeyword{lazy evaluation} %\VignetteKeyword{synchronous} %\VignetteKeyword{asynchronous} %\VignetteKeyword{parallel} %\VignetteKeyword{cluster} %\VignetteEngine{R.rsp::rsp} %\VignetteTangle{FALSE} --------------------------------------------------------------------"%> # A Future for R: Apply Function to Elements in Parallel ## Introduction The purpose of this package is to provide worry-free parallel alternatives to base-R "apply" functions, e.g. `apply()`, `lapply()`, and `vapply()`. The goal is that one should be able to replace any of these in the core with its futurized equivalent and things will just work. For example, instead of doing: ```r library("datasets") library("stats") y <- lapply(mtcars, FUN = mean, trim = 0.10) ``` one can do: ```r library("future.apply") plan(multisession) ## Run in parallel on local computer library("datasets") library("stats") y <- future_lapply(mtcars, FUN = mean, trim = 0.10) ``` Reproducibility is part of the core design, which means that perfect, parallel random number generation (RNG) is supported regardless of the amount of chunking, type of load balancing, and future backend being used. _To enable parallel RNG, use argument `future.seed = TRUE`._ ## Role Where does the [future.apply] package fit in the software stack? You can think of it as a sibling to [foreach], [furrr], [BiocParallel], [plyr], etc. Just as parallel provides `parLapply()`, foreach provides `foreach()`, BiocParallel provides `bplapply()`, and plyr provides `llply()`, future.apply provides `future_lapply()`. Below is a table summarizing this idea:
Package Functions Backends
future.apply

Future-versions of common goto *apply() functions available in base R (of the 'base' package):
future_apply(), future_by(), future_eapply(), future_lapply(), future_Map(), future_mapply(), future_.mapply(), future_replicate(), future_sapply(), future_tapply(), and future_vapply().
The following function is yet not implemented:
future_rapply()
All future backends
parallel mclapply(), mcmapply(), clusterMap(), parApply(), parLapply(), parSapply(), ... Built-in and conditional on operating system
foreach foreach(), times() All future backends via doFuture
furrr future_imap(), future_map(), future_pmap(), future_map2(), ... All future backends
BiocParallel Bioconductor's parallel mappers:
bpaggregate(), bpiterate(), bplapply(), and bpvec()
All future backends via doFuture (because it supports foreach) or via BiocParallel.FutureParam (direct BiocParallelParam support; prototype)
plyr **ply(..., .parallel = TRUE) functions:
aaply(), ddply(), dlply(), llply(), ...
All future backends via doFuture (because it uses foreach internally)
Note that, except for the built-in parallel package, none of these higher-level APIs implement their own parallel backends, but they rather enhance existing ones. The foreach framework leverages backends such as [doParallel], [doMC] and [doFuture], and the future.apply framework leverages the [future] ecosystem and therefore backends such as built-in parallel, [future.callr], and [future.batchtools]. By separating `future_lapply()` and friends from the [future] package, it helps clarifying the purpose of the future package, which is to define and provide the core Future API, which higher-level parallel APIs can build on and for which any futurized parallel backends can be plugged into. ## Roadmap 1. Implement `future_*apply()` versions for all common `*apply()` functions that exist in base R. This also involves writing a large set of package tests asserting the correctness and the same behavior as the corresponding `*apply()` functions. 2. Harmonize all `future_*apply()` functions with each other, e.g. the future-specific arguments. 3. Consider additional `future_*apply()` functions and features that fit in this package but don't necessarily have a corresponding function in base R. Examples of this may be "apply" functions that return futures rather than values, mechanisms for benchmarking, and richer control over load balancing. The API and identity of the future.apply package will be kept close to the `*apply()` functions in base R. In other words, it will _neither_ keep growing nor be expanded with new, more powerful apply-like functions beyond those core ones in base R. Such extended functionality should be part of a separate package. [batchtools]: https://cran.r-project.org/package=batchtools [BiocParallel]: https://bioconductor.org/packages/BiocParallel/ [doFuture]: https://cran.r-project.org/package=doFuture [doMC]: https://cran.r-project.org/package=doMC [doParallel]: https://cran.r-project.org/package=doParallel [foreach]: https://cran.r-project.org/package=foreach [future]: https://cran.r-project.org/package=future [future.apply]: https://cran.r-project.org/package=future.apply [future.batchtools]: https://cran.r-project.org/package=future.batchtools [future.callr]: https://cran.r-project.org/package=future.callr [furrr]: https://cran.r-project.org/package=furrr [plyr]: https://cran.r-project.org/package=plyr future.apply/NEWS0000644000176200001440000001575314104465563013432 0ustar liggesusersPackage: future.apply ===================== Version: 1.8.1 [2021-08-09] BUG FIX: * citEntry() in CITATION used argument 'notes' instead of 'note'. Version: 1.8.0 [2021-08-09] NEW FEATURES: * Add argument 'future.envir' to all future_nnn() functions, which is passed as argument 'envir' to future(). * Add option 'future.apply.debug' for debugging features specific to this package. It defaults to option 'future.debug'. PERFORMANCE: * Internal getGlobalsAndPackagesXApply() now avoids calculating the object size of '...' arguments if option 'future.globals.maxSize' is +Inf. BUG FIX: * f <- function(...) future_lapply(X, function(x) list(...)); f(a=1) would produce an error on 'unused argument (a = 1)" with the upcoming release of future 1.22.0. Version: 1.7.0 [2021-01-02] NEW FEATURES: * The automatic capturing of conditions can be disabled by specifying 'future.conditions = NULL'. * Warnings and errors on using the RNG without specifying 'future.seed' are now tailored to the 'future.apply' package. Version: 1.6.0 [2020-06-30] SIGNIFICANT CHANGES: * future_apply() gained argument 'simplify', which is added to R-devel (to become R 4.1.0). BUG FIXES: * future_apply(X, FUN, ...) would pass all 'future.*' arguments except 'future.globals', 'future.packages', and 'future.labels' to the 'FUN' function instead of processing them locally. This would often result in the 'FUN' producing an error on "unused argument". It also affected 'future.seed' not being applied, which means for some 'FUN' functions that did not produce this error, non-reproducible results could have been produced. Version: 1.5.0 [2020-04-16] NEW FEATURES: * Add future_.mapply() corresponding to .mapply() in the 'base' package. BUG FIXES: * future_mapply() would chunk up 'MoreArgs' when future.seed = TRUE. Version: 1.4.0 [2020-01-06] NEW FEATURES: * Now all future_nnn() functions set a label on each future that reflects the name of the future_nnn() function and the index of the chunk, e.g. 'future_lapply-3'. The format can be controlled by argument 'future.label'. PERFORMANCE: * The assertion of the maximum size of globals per chunk is now significantly faster for future_apply(). BUG FIXES: * future_lapply(X) and future_mapply(FUN, X) would drop 'names' argument of the returned empty list when length(X) == 0. * Package could set '.Random.seed' to NULL, instead of removing it, which in turn would produce a warning on "'.Random.seed' is not an integer vector but of type 'NULL', so ignored" when the next random number generated. Version: 1.3.0 [2019-06-17] NEW FEATURES: * Now 'future.conditions' defaults to the same as argument 'conditions' of future::future(). If the latter changes, this package will follow. * Debug messages are now prepended with a timestamp. BUG FIXES: * The error "sprintf(...) : 'fmt' length exceeds maximal format length 8192" could be produced when debugging tried to report on too many globals. Version: 1.2.0 [2019-03-06] NEW FEATURES: * Added future_by(). BUG FIXES: * Attributes 'add' and 'ignore' of argument 'future.globals' were ignored although support for them was added in future (>= 1.10.0). * Validation of L'Ecuyer-CMRG RNG seeds failed in recent R devel. Version: 1.1.0 [2019-01-16] SIGNIFICANT CHANGES: * Added argument 'future.stdout' and 'future.conditions' for controlling whether standard output and conditions (e.g. messages and warnings) produced during the evaluation of futures should be captured and relayed or not. Standard output is guaranteed to be relayed in the same order as it would when using sequential processing. Analogously for conditions. However, standard output is always relayed before conditions. Errors are always relayed. Relaying of non-error conditions requires future (>= 1.11.0). NEW FEATURES: * Elements can be processed in random order by setting attribute 'ordering' to "random" of argument 'future.chunk.size' or 'future.scheduling', e.g. future.chunk.size = structure(TRUE, ordering = "random"). This can help improve load balancing in cases where there is a correlation between processing time and ordering of the elements. Note that the order of the returned values is not affected when randomizing the processing order. * Swapped order of arguments 'future.lazy' and 'future.seed' to be consistent with ditto arguments of future::future(). Version: 1.0.1 [2018-08-26] DOCUMENTATION / LICENCE: * The license is GPL (>= 2). Previously it was documented as GPL (>= 2.1) but that is a non-existing GPL version. BUG FIXES: * For list objects 'X' where X != as.list(X), future_lapply(X) did not give the same result as lapply(X). Analogously for future_vapply(X). * future_mapply() could drop class attribute on elements iterated over, because .subset() was used internally instead of `[`(). For instance, iteration over Date objects were affected. Version: 1.0.0 [2018-06-19] SIGNIFICANT CHANGES: * License changed from LGPL (>= 2.1) to GPL (>= 2) to make sure it is compatible with the source code adopted from R base's apply(), Map(), replicate(), sapply(), and tapply(), which are all GPL (>= 2). NEW FEATURES: * Added future_apply(), future_mapply(), and future_Map(). * Added argument `future.chunk.size` as an alternative to argument `future.scheduling` for controlling the average number of elements processed per future ("chunk"). In R 3.5.0, the parallel package introduced argument 'chunk.size'. * The maximum total size of globals allowed (option 'future.globals.maxSize') per future ("chunk") is now scaled up by the number of elements processed by the future ("chunk") making the protection approximately invariant to the amount of chunking (arguments 'future.scheduling' and 'future.chunk.size'). BUG FIXES: * future_lapply(X, ...) did not search for globals in 'X'. * future_vapply() did not return the same dimension names as vapply() when FUN.VALUE had no names but FUN(X[[1]]) had. SOFTWARE QUALITY: * Test code coverage is 100%. Version: 0.2.0 [2018-05-01] NEW FEATURES: * Added future_eapply(), future_tapply(), future_vapply(), and future_replicate(). Version: 0.1.0 [2018-01-15] * Package submitted to CRAN. Version: 0.0.3 [2017-12-06] DOCUMENTATION: * Vignette now covers the basics of the package and describes its role in the R package ecosystem together with a road map going forward. SOFTWARE QUALITY: * Added more package tests. Code coverage is currently at 100%. Version: 0.0.2 [2017-12-06] PERFORMANCE: * future_lapply(x, ...) is now much faster and more memory efficient for large 'x' vectors because it uses internal fold() function that is more efficient (memory and speed) version of base::Reduce(f, x), especially when length(x) is large. Version: 0.0.0-9000 [2017-08-31] NEW FEATURES: * Added future_sapply(). * Added future_lapply() - originally from the future package. * Created package. future.apply/R/0000755000176200001440000000000014104451747013120 5ustar liggesusersfuture.apply/R/fold.R0000644000176200001440000000501114024036060014150 0ustar liggesusers#' Efficient Fold, Reduce, Accumulate, Combine of a Vector #' #' @param x A vector. #' #' @param f A binary function, i.e. a function take takes two arguments. #' #' @param left If `TRUE`, vector is combined from the left (the first element), #' otherwise the right (the last element). #' #' @param unname If `TRUE`, function `f` is called as #' \code{f(unname(y), x[[ii]])}, otherwise as \code{f(y, x[[ii]])}, #' which may introduce name `"y"`. #' #' @param threshold An integer (>= 2) specifying the length where the #' recursive divide-and-conquer call will stop and incremental building of #' the partial value is performed. Using `threshold = +Inf` will disable #' recursive folding. #' #' @return A vector. #' #' @details #' In order for recursive folding to give the same results as non-recursive #' folding, binary function `f` must be _associative_ with itself, i.e. #' \code{f(f(x[[1]], x[[2]]), x[[3]])} equals #' \code{f(x[[1]], f(x[[2]]), x[[3]])}. #' #' This function is a more efficient (memory and speed) of #' \code{\link[base:funprog]{Reduce(f, x, right = !left, accumulate = FALSE)}}, #' especially when `x` is long. #' #' @keywords internal fold <- function(x, f, left = TRUE, unname = TRUE, threshold = 1000L) { f <- match.fun(f) n <- length(x) if (n == 0L) return(NULL) if (!is.vector(x) || is.object(x)) x <- as.list(x) if (n == 1L) return(x[[1]]) stop_if_not(length(left) == 1, is.logical(left), !is.na(left)) stop_if_not(length(threshold) == 1, is.numeric(threshold), !is.na(threshold), threshold >= 2) if (n >= threshold) { ## Divide and conquer, i.e. split, build the two parts, and merge n_mid <- n %/% 2 y_left <- Recall(f = f, x = x[ 1:n_mid], left = left, unname = unname, threshold = threshold) y_right <- Recall(f = f, x = x[(n_mid+1L):n], left = left, unname = unname, threshold = threshold) y <- f(y_left, y_right) y_left <- y_right <- NULL } else { ## Incrementally build result vector if (left) { y <- x[[1L]] if (unname) { for (ii in 2:n) y <- forceAndCall(n = 2L, FUN = f, unname(y), x[[ii]]) } else { for (ii in 2:n) y <- forceAndCall(n = 2L, FUN = f, y, x[[ii]]) } } else { y <- x[[n]] if (unname) { for (ii in (n-1):1) y <- forceAndCall(n = 2L, FUN = f, x[[ii]], unname(y)) } else { for (ii in (n-1):1) y <- forceAndCall(n = 2L, FUN = f, x[[ii]], y) } } } y } future.apply/R/future_tapply.R0000644000176200001440000000422514104262260016137 0ustar liggesusers#' @inheritParams future_lapply #' #' @param INDEX A list of one or more factors, each of same length as `X`. #' The elements are coerced to factors by `as.factor()`. #' See also [base::tapply()]. #' #' @param default See [base::tapply()]. #' #' @return #' `future_tapply()` returns an array with mode `"list"`, unless #' `simplify = TRUE` (default) _and_ `FUN` returns a scalar, in which #' case the mode of the array is the same as the returned scalars. #' See [base::tapply()] for details. #' #' @rdname future_lapply #' @export future_tapply <- function(X, INDEX, FUN = NULL, ..., default = NA, simplify = TRUE, future.envir = parent.frame(), future.label = "future_tapply-%d") { FUN <- if (!is.null(FUN)) match.fun(FUN) if (!is.list(INDEX)) INDEX <- list(INDEX) INDEX <- lapply(INDEX, FUN = as.factor) nI <- length(INDEX) if (!nI) stop("'INDEX' is of length zero") if (!all(lengths(INDEX) == length(X))) stop("arguments must have same length") namelist <- lapply(INDEX, FUN = levels) extent <- lengths(namelist, use.names = FALSE) cumextent <- cumprod(extent) if (cumextent[nI] > .Machine$integer.max) stop("total number of levels >= 2^31") storage.mode(cumextent) <- "integer" ngroup <- cumextent[nI] group <- as.integer(INDEX[[1L]]) if (nI > 1L) { for (i in 2L:nI) { group <- group + cumextent[i - 1L] * (as.integer(INDEX[[i]]) - 1L) } } if (is.null(FUN)) return(group) levels(group) <- as.character(seq_len(ngroup)) class(group) <- "factor" ans <- split(X, f = group) names(ans) <- NULL index <- as.logical(lengths(ans)) ans <- future_lapply(X = ans[index], FUN = FUN, ..., future.envir = future.envir, future.label = future.label) ansmat <- array({ if (simplify && all(lengths(ans) == 1L)) { ans <- unlist(ans, recursive = FALSE, use.names = FALSE) if (!is.null(ans) && is.na(default) && is.atomic(ans)) vector(typeof(ans)) else default } else { vector("list", prod(extent)) } }, dim = extent, dimnames = namelist) if (length(ans) > 0L) ansmat[index] <- ans ansmat } future.apply/R/utils.R0000644000176200001440000001153014104220103014357 0ustar liggesusersisFALSE <- function(x) { is.logical(x) && length(x) == 1L && !is.na(x) && !x } isNA <- function(x) { is.logical(x) && length(x) == 1L && is.na(x) } stop_if_not <- function(...) { res <- list(...) for (ii in 1L:length(res)) { res_ii <- .subset2(res, ii) if (length(res_ii) != 1L || is.na(res_ii) || !res_ii) { mc <- match.call() call <- deparse(mc[[ii + 1]], width.cutoff = 60L) if (length(call) > 1L) call <- paste(call[1L], "....") stop(sprintf("%s is not TRUE", sQuote(call)), call. = FALSE, domain = NA) } } NULL } ## From R.utils 2.0.2 (2015-05-23) hpaste <- function(..., sep = "", collapse = ", ", lastCollapse = NULL, maxHead = if (missing(lastCollapse)) 3 else Inf, maxTail = if (is.finite(maxHead)) 1 else Inf, abbreviate = "...") { if (is.null(lastCollapse)) lastCollapse <- collapse # Build vector 'x' x <- paste(..., sep = sep) n <- length(x) # Nothing todo? if (n == 0) return(x) if (is.null(collapse)) return(x) # Abbreviate? if (n > maxHead + maxTail + 1) { head <- x[seq_len(maxHead)] tail <- rev(rev(x)[seq_len(maxTail)]) x <- c(head, abbreviate, tail) n <- length(x) } if (!is.null(collapse) && n > 1) { if (lastCollapse == collapse) { x <- paste(x, collapse = collapse) } else { xT <- paste(x[1:(n-1)], collapse = collapse) x <- paste(xT, x[n], sep = lastCollapse) } } x } # hpaste() trim <- function(s) sub("[\t\n\f\r ]+$", "", sub("^[\t\n\f\r ]+", "", s)) comma <- function(x, sep = ", ") paste(x, collapse = sep) commaq <- function(x, sep = ", ") paste(sQuote(x), collapse = sep) now <- function(x = Sys.time(), format = "[%H:%M:%OS3] ") { ## format(x, format = format) ## slower format(as.POSIXlt(x, tz = ""), format = format) } mdebug <- function(..., debug = NA) { if (is.na(debug)) debug <- getOption("future.apply.debug", getOption("future.debug", FALSE)) if (!debug) return() message(now(), ...) } mdebugf <- function(..., appendLF = TRUE, debug = NA) { if (is.na(debug)) debug <- getOption("future.apply.debug", getOption("future.debug", FALSE)) if (!debug) return() message(now(), sprintf(...), appendLF = appendLF) } #' @importFrom utils capture.output mprint <- function(..., appendLF = TRUE, debug = NA) { if (is.na(debug)) debug <- getOption("future.apply.debug", getOption("future.debug", FALSE)) if (!debug) return() message(paste(now(), capture.output(print(...)), sep = "", collapse = "\n"), appendLF = appendLF) } #' @importFrom utils capture.output mstr <- function(..., appendLF = TRUE, debug = NA) { if (is.na(debug)) debug <- getOption("future.apply.debug", getOption("future.debug", FALSE)) if (!debug) return() message(paste(now(), capture.output(str(...)), sep = "", collapse = "\n"), appendLF = appendLF) } ## When 'default' is specified, this is 30x faster than ## base::getOption(). The difference is that here we use ## use names(.Options) whereas in 'base' names(options()) ## is used. getOption <- local({ go <- base::getOption function(x, default = NULL) { if (missing(default) || match(x, table = names(.Options), nomatch = 0L) > 0L) go(x) else default } }) ## getOption() #' @importFrom future FutureError #' @importFrom utils capture.output head str assert_values2 <- function(nX, values, values2, fcn_name, debug = FALSE) { if (length(values2) != nX) { chunk_sizes <- sapply(values, FUN = length) chunk_sizes <- table(chunk_sizes) chunk_summary <- sprintf("%d chunks with %s elements", chunk_sizes, names(chunk_sizes)) chunk_summary <- paste(chunk_summary, collapse = ", ") msg <- sprintf("Unexpected error in %s(): After gathering and merging the values from %d chunks in to a list, the total number of elements (= %d) does not match the number of input elements in 'X' (= %d). There were in total %d chunks and %d elements (%s)", fcn_name, length(values), length(values2), nX, length(values), sum(chunk_sizes), chunk_summary) if (debug) { mdebug(msg) mprint(chunk_sizes) mdebug("Results before merge chunks:") mstr(values) mdebug("Results after merge chunks:") mstr(values2) } msg <- sprintf("%s. Example of the first few values: %s", msg, paste(capture.output(str(head(values2, 3L))), collapse = "\\n")) ex <- FutureError(msg) stop(ex) } } stealth_sample.int <- function(n, size = n, replace = FALSE, ...) { oseed <- .GlobalEnv$.Random.seed on.exit({ if (is.null(oseed)) { rm(list = ".Random.seed", envir = .GlobalEnv, inherits = FALSE) } else { .GlobalEnv$.Random.seed <- oseed } }) sample.int(n = n, size = size, replace = replace, ...) } #' @importFrom utils packageVersion future_version <- local({ ver <- NULL function() { if (is.null(ver)) ver <<- packageVersion("future") ver } }) future.apply/R/zzz.R0000644000176200001440000000115314104217706014073 0ustar liggesusers.package <- new.env() ## covr: skip=all .onLoad <- function(libname, pkgname) { .package[["version"]] <- utils::packageVersion(pkgname) update_package_option("future.apply.debug", mode = "logical") debug <- getOption("future.apply.debug", FALSE) if (debug) { envs <- Sys.getenv() envs <- envs[grep("R_FUTURE_APPLY_", names(envs), fixed = TRUE)] envs <- sprintf("- %s=%s", names(envs), sQuote(envs)) mdebug(paste(c("Rnvironment variables specific to future.apply:", envs), collapse = "\n")) } ## Set future options based on environment variables update_package_options(debug = debug) } future.apply/R/future_eapply.R0000644000176200001440000000140214104262155016115 0ustar liggesusers#' @inheritParams future_lapply #' #' @param env An \R environment. #' #' @param all.names If `TRUE`, the function will also be applied to variables #' that start with a period (`.`), otherwise not. #' See [base::eapply()] for details. #' #' @return #' A named (unless `USE.NAMES = FALSE`) list. #' See [base::eapply()] for details. #' #' @rdname future_lapply #' @export future_eapply <- function(env, FUN, ..., all.names = FALSE, USE.NAMES = TRUE, future.envir = parent.frame(), future.label = "future_eapply-%d") { names <- ls(envir = env, all.names = all.names, sorted = FALSE) X <- mget(names, envir = env, inherits = FALSE) if (!USE.NAMES) names(X) <- NULL future_lapply(X = X, FUN = FUN, ..., future.envir = future.envir, future.label = future.label) } future.apply/R/future_mapply.R0000644000176200001440000001450314104451341016130 0ustar liggesusers#' Apply a Function to Multiple List or Vector Arguments #' #' `future_mapply()` implements [base::mapply()] using futures with perfect #' replication of results, regardless of future backend used. #' Analogously to `mapply()`, `future_mapply()` is a multivariate version of #' `future_sapply()`. #' It applies `FUN` to the first elements of each `\ldots` argument, #' the second elements, the third elements, and so on. #' Arguments are recycled if necessary. #' #' @inheritParams future_lapply #' #' @param FUN A function to apply, found via [base::match.fun()]. #' #' @param \ldots Arguments to vectorize over (vectors or lists of strictly #' positive length, or all of zero length). #' #' @param MoreArgs A list of other arguments to `FUN`. #' #' @param SIMPLIFY A logical or character string; attempt to reduce the #' result to a vector, matrix or higher dimensional array; see the simplify #' argument of [base::sapply()]. #' #' @param USE.NAMES A logical; use names if the first `\ldots` argument has #' names, or if it is a character vector, use that character vector as the #' names. #' #' @param future.globals A logical, a character vector, or a named list for #' controlling how globals are handled. #' For details, see [future_lapply()]. #' #' @param future.seed A logical or an integer (of length one or seven), or #' a list of `max(lengths(list(...)))` with pre-generated random seeds. #' For details, see [future_lapply()]. #' #' @return #' `future_mapply()` returns a list, or for `SIMPLIFY = TRUE`, a vector, #' array or list. See [base::mapply()] for details. #' #' @example incl/future_mapply.R #' #' @keywords manip programming iteration #' #' @export future_mapply <- function(FUN, ..., MoreArgs = NULL, SIMPLIFY = TRUE, USE.NAMES = TRUE, future.envir = parent.frame(), future.stdout = TRUE, future.conditions = "condition", future.globals = TRUE, future.packages = NULL, future.lazy = FALSE, future.seed = FALSE, future.scheduling = 1.0, future.chunk.size = NULL, future.label = "future_mapply-%d") { fcn_name <- "future_mapply" args_name <- "..." FUN <- match.fun(FUN) dots <- list(...) ## Nothing to do? if (length(dots) == 0L) return(list()) ns <- lengths(dots) ## Nothing to do? if (all(ns == 0L)) { if (!USE.NAMES) return(list()) values <- list() first <- dots[[1]] names <- names(first) if (is.null(names) && is.character(first)) names <- first names(values) <- names return(values) } stop_if_not(all(ns > 0L)) ## Recycle? nX <- max(ns) stretch <- which(ns < nX) if (length(stretch) > 0L) { for (kk in stretch) dots[[kk]] <- rep(dots[[kk]], length.out = nX) ns <- lengths(dots) } stop_if_not(all(ns == nX)) stop_if_not(is.null(MoreArgs) || is.list(MoreArgs)) debug <- getOption("future.apply.debug", getOption("future.debug", FALSE)) if (debug) mdebugf("%s() ...", fcn_name) ## NOTE TO SELF: We'd ideally have a 'future.envir' argument also for ## this function, cf. future(). However, it's not yet clear to me how ## to do this, because we need to have globalsOf() to search for globals ## from the current environment in order to identify the globals of ## arguments 'FUN' and '...'. /HB 2017-03-10 envir <- future.envir envir <- environment() ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Future expression ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ...future.FUN <- NULL ## To please R CMD check ## Set .Random.seed? if (is.null(future.seed) || isFALSE(future.seed) || isNA(future.seed)) { expr <- quote({ args <- c(list(FUN = ...future.FUN), ...future.elements_ii, MoreArgs = list(MoreArgs), SIMPLIFY = FALSE, USE.NAMES = FALSE) do.call(mapply, args = args) }) } else { expr <- quote({ ...future.FUN2 <- function(..., ...future.seeds_ii_jj) { assign(".Random.seed", ...future.seeds_ii_jj, envir = globalenv(), inherits = FALSE) ...future.FUN(...) } args <- c(list(FUN = ...future.FUN2), ...future.elements_ii, list(...future.seeds_ii_jj = ...future.seeds_ii), MoreArgs = list(MoreArgs), SIMPLIFY = FALSE, USE.NAMES = FALSE) do.call(mapply, args = args) }) } ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Process ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - values <- future_xapply( FUN = FUN, nX = nX, chunk_args = dots, MoreArgs = MoreArgs, get_chunk = function(X, chunk) lapply(X, FUN = `[`, chunk), expr = expr, envir = envir, future.envir = future.envir, future.globals = future.globals, future.packages = future.packages, future.scheduling = future.scheduling, future.chunk.size = future.chunk.size, future.stdout = future.stdout, future.conditions = future.conditions, future.seed = future.seed, future.lazy = future.lazy, future.label = future.label, fcn_name = fcn_name, args_name = args_name, debug = debug ) ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Reduce ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (USE.NAMES && length(dots) > 0L) { if (is.null(names1 <- names(dots[[1L]])) && is.character(dots[[1L]])) { names(values) <- dots[[1L]] } else if (!is.null(names1)) { names(values) <- names1 } } if (!isFALSE(SIMPLIFY) && length(values) > 0L) { values <- simplify2array(values, higher = (SIMPLIFY == "array")) } if (debug) mdebugf("%s() ... DONE", fcn_name) values } #' @param dots A list of arguments to vectorize over (vectors or lists of #' strictly positive length, or all of zero length). #' #' @return #' `future_.mapply()` returns a list. See [base::.mapply()] for details. #' #' @details #' Note that [base::.mapply()], which `future_.mapply()` is modeled after #' is listed as an "internal" function in \R despite being exported. #' #' @rdname future_mapply #' @export future_.mapply <- function(FUN, dots, MoreArgs, ..., future.label = "future_.mapply-%d") { args <- c( list(FUN = FUN), dots, list( MoreArgs = MoreArgs, SIMPLIFY = FALSE, USE.NAMES = FALSE, ..., future.label = future.label ) ) do.call(future_mapply, args = args, envir = parent.frame()) } future.apply/R/001.bquote.R0000644000176200001440000000456514104216357015047 0ustar liggesusers#' @importFrom utils globalVariables globalVariables(c(".", "..")) ## bquote_compile() and bquote_apply() are only available in future (>= 1.22.0) bquote_compile <- import_future("bquote_compile", default = function(expr, substitute = TRUE) { if (substitute) expr <- substitute(expr) tmpl <- list() unquote <- function(e, at = integer(0L)) { n <- length(e) if (n == 0L) return() if (is.pairlist(e)) { for (kk in 1:n) unquote(e[[kk]], at = c(at, kk)) return() } if (!is.call(e)) return() ## .()? if (is.name(e[[1L]]) && as.character(e[[1]]) == ".") { ## Record location in expression tree entry <- list( expression = e[[2L]], at = at ) tmpl <<- c(tmpl, list(entry)) return() } ## `{`, `+`, ... for (kk in 1:n) unquote(e[[kk]], at = c(at, kk)) } dummy <- unquote(expr) attr(tmpl, "expression") <- expr tmpl }) bquote_apply <- import_future("bquote_apply", default = function(tmpl, envir = parent.frame()) { expr <- attr(tmpl, "expression") for (kk in seq_along(tmpl)) { entry <- tmpl[[kk]] value <- eval(entry$expression, envir = envir) at <- entry$at ## Special case: Result becomes just a value nat <- length(at) if (nat == 0) return(value) ## Inject a NULL (needs special care) or a regular value? if (is.null(value)) { head <- if (nat == 1L) NULL else at[-nat] e <- if (is.null(head)) expr else expr[[head]] if (is.call(e)) { f <- as.list(e) f[at[nat]] <- list(NULL) e <- as.call(f) } else if (is.pairlist(e)) { e[1] <- list(NULL) e <- as.pairlist(e) } else { stop("Unknown type of expression (please report to the maintainer): ", sQuote(paste(deparse(e), collapse = "\\n"))) } if (is.null(head)) { expr <- e } else { expr[[head]] <- e } } else { expr[[at]] <- value } } expr }) bquote2 <- import_future("bquote2", default = function(expr, where = parent.frame(), splice = FALSE, substitute = TRUE) { stop_if_not(!splice) if (substitute) expr <- substitute(expr) tmpl <- bquote_compile(expr, substitute = FALSE) bquote_apply(tmpl, envir = where) }) future.apply/R/000.import.R0000644000176200001440000000071414104216357015051 0ustar liggesusersimport_from <- function(name, default = NULL, package) { ns <- getNamespace(package) if (exists(name, mode = "function", envir = ns, inherits = FALSE)) { get(name, mode = "function", envir = ns, inherits = FALSE) } else if (!is.null(default)) { default } else { stop(sprintf("No such '%s' function: %s()", package, name)) } } import_future <- function(name, default = NULL) { import_from(name, default = default, package = "future") } future.apply/R/globals.R0000644000176200001440000001033014104225424014652 0ustar liggesusers#' @importFrom globals globalsByName #' @importFrom future as.FutureGlobals getGlobalsAndPackages resolve getGlobalsAndPackagesXApply <- function(FUN, args = NULL, MoreArgs = NULL, envir, future.globals = TRUE, future.packages = NULL, debug = NA) { use_args <- !is.null(args) if (is.na(debug)) debug <- getOption("future.apply.debug", getOption("future.debug", FALSE)) if (debug) { mdebug("getGlobalsAndPackagesXApply() ...") on.exit(mdebug("getGlobalsAndPackagesXApply() ... DONE"), add = TRUE) } packages <- NULL globals <- future.globals scanForGlobals <- FALSE if (is.logical(globals)) { if (debug) mdebugf(" - future.globals: %s", globals) ## Gather all globals? if (globals) { scanForGlobals <- TRUE expr <- do.call(call, args = c(list("FUN"), if (use_args) args else MoreArgs)) } else { expr <- NULL attr(globals, "add") <- c(attr(globals, "add"), c("FUN", if (use_args) "..." else "MoreArgs")) } gp <- getGlobalsAndPackages(expr, envir = envir, globals = globals) globals <- gp$globals packages <- gp$packages gp <- NULL if (debug) { mdebugf(" - globals found/used: [n=%d] %s", length(globals), commaq(names(globals))) mdebugf(" - needed namespaces: [n=%d] %s", length(packages), commaq(packages)) mdebug("Finding globals ... DONE") } } else if (is.character(globals)) { if (debug) mdebugf(" - future.globals: %s", commaq(globals)) globals <- unique(c(globals, "FUN", if (use_args) "..." else "MoreArgs")) globals <- globalsByName(globals, envir = envir, mustExist = FALSE) } else if (is.list(globals)) { names <- names(globals) if (debug) mdebugf(" - future.globals: with names %s", commaq(globals)) if (length(globals) > 0 && is.null(names)) { stop("Invalid argument 'future.globals'. All globals must be named.") } } else { stop("Invalid argument 'future.globals': ", mode(globals)) } globals <- as.FutureGlobals(globals) stop_if_not(inherits(globals, "FutureGlobals")) names <- names(globals) if (!is.element("FUN", names)) { globals <- c(globals, FUN = FUN) } if (use_args) { if (debug) mdebug(" - use_args: TRUE") if (!is.element("...", names)) { if (debug) mdebug(" - Getting '...' globals ...") dotdotdot <- globalsByName("...", envir = envir, mustExist = TRUE) dotdotdot <- as.FutureGlobals(dotdotdot) dotdotdot <- resolve(dotdotdot) if (debug) { mdebugf(" - '...' content: [n=%d] %s", length(dotdotdot[[1]]), commaq(names(dotdotdot[[1]]))) mstr(dotdotdot) } ## Recalculate the total size? maxSize <- getOption("future.globals.maxSize") if (is.null(maxSize) || is.finite(maxSize)) { objectSize <- import_future("objectSize") attr(dotdotdot, "total_size") <- objectSize(dotdotdot) } if (debug) mdebug(" - Getting '...' globals ... DONE") globals <- c(globals, dotdotdot) } } else if (!is.element("MoreArgs", names)) { globals <- c(globals, list(MoreArgs = MoreArgs)) } ## Assert there are no reserved variables names among globals reserved <- intersect(c("...future.FUN", "...future.elements_ii", "...future.seeds_ii"), names) if (length(reserved) > 0) { stop("Detected globals using reserved variables names: ", paste(sQuote(reserved), collapse = ", ")) } ## Avoid FUN() clash with mapply(..., FUN) below. names <- names(globals) names[names == "FUN"] <- "...future.FUN" names(globals) <- names if (debug) { mdebugf("Globals to be used in all futures (chunks): [n=%d] %s", length(globals), commaq(names(globals))) mstr(globals) } if (!is.null(future.packages)) { stop_if_not(is.character(future.packages)) future.packages <- unique(future.packages) stop_if_not(!anyNA(future.packages), all(nzchar(future.packages))) packages <- unique(c(packages, future.packages)) } if (debug) { mdebugf("Packages to be attached in all futures: [n=%d] %s", length(packages), commaq(packages)) } list(globals = globals, packages = packages, scanForGlobals = scanForGlobals) } ## findGlobalsStep1() future.apply/R/options.R0000644000176200001440000000772114104216762014741 0ustar liggesusers#' Options used for future.apply #' #' Below are the \R options and environment variables that are used by the #' \pkg{future.apply} package and packages enhancing it.\cr #' \cr #' _WARNING: Note that the names and the default values of these options may #' change in future versions of the package. Please use with care until #' further notice._ #' #' For settings specific to the \pkg{future} package, see #' [future::future.options] page. #' #' @section Options for debugging future.apply: #' \describe{ #' \item{\option{future.apply.debug}:}{(logical) If `TRUE`, extensive debug messages are generated. (Default: `FALSE`)} #' } #' #' #' @section Environment variables that set R options: #' All of the above \R \option{future.apply.*} options can be set by #' corresponding environment variable \env{R_FUTURE_APPLY_*} _when the #' \pkg{future.apply} package is loaded_. #' For example, if `R_FUTURE_APPLY_DEBUG = "TRUE"`, then option #' \option{future.apply.debug} is set to `TRUE` (logical). #' #' @examples #' options(future.apply.debug = TRUE) #' #' @seealso #' To set \R options or environment variables when \R starts (even before the \pkg{future} package is loaded), see the \link[base]{Startup} help page. The \href{https://cran.r-project.org/package=startup}{\pkg{startup}} package provides a friendly mechanism for configurating \R's startup process. #' #' @aliases #' future.apply.debug #' R_FUTURE_APPLY_DEBUG #' #' @keywords internal #' @name future.apply.options NULL # Set an R option from an environment variable update_package_option <- function(name, mode = "character", default = NULL, split = NULL, trim = TRUE, disallow = c("NA"), force = FALSE, debug = FALSE) { ## Nothing to do? value <- getOption(name, NULL) if (!force && !is.null(value)) return(getOption(name, default = default)) ## name="future.plan.disallow" => env="R_FUTURE_PLAN_DISALLOW" env <- gsub(".", "_", toupper(name), fixed = TRUE) env <- paste("R_", env, sep = "") env_value <- value <- Sys.getenv(env, unset = NA_character_) ## Nothing to do? if (is.na(value)) { if (debug) mdebugf("Environment variable %s not set", sQuote(env)) return(getOption(name, default = default)) } if (debug) mdebugf("%s=%s", env, sQuote(value)) ## Trim? if (trim) value <- trim(value) ## Nothing to do? if (!nzchar(value)) return(getOption(name, default = default)) ## Split? if (!is.null(split)) { value <- strsplit(value, split = split, fixed = TRUE) value <- unlist(value, use.names = FALSE) if (trim) value <- trim(value) } ## Coerce? mode0 <- storage.mode(value) if (mode0 != mode) { suppressWarnings({ storage.mode(value) <- mode }) if (debug) { mdebugf("Coercing from %s to %s: %s", mode0, mode, commaq(value)) } } if (length(disallow) > 0) { if ("NA" %in% disallow) { if (any(is.na(value))) { stop(sprintf("Coercing environment variable %s=%s to %s would result in missing values for option %s: %s", sQuote(env), sQuote(env_value), sQuote(mode), sQuote(name), commaq(value))) } } if (is.numeric(value)) { if ("non-positive" %in% disallow) { if (any(value <= 0, na.rm = TRUE)) { stop(sprintf("Environment variable %s=%s specifies a non-positive value for option %s: %s", sQuote(env), sQuote(env_value), sQuote(name), commaq(value))) } } if ("negative" %in% disallow) { if (any(value < 0, na.rm = TRUE)) { stop(sprintf("Environment variable %s=%s specifies a negative value for option %s: %s", sQuote(env), sQuote(env_value), sQuote(name), commaq(value))) } } } } if (debug) { mdebugf("=> options(%s = %s) [n=%d, mode=%s]", dQuote(name), commaq(value), length(value), storage.mode(value)) } do.call(options, args = structure(list(value), names = name)) getOption(name, default = default) } ## Set future options based on environment variables update_package_options <- function(debug = FALSE) { } future.apply/R/future_xapply.R0000644000176200001440000003033014104451514016141 0ustar liggesusers#' @importFrom future Future nbrOfWorkers future resolve value as.FutureGlobals getGlobalsAndPackages future_xapply <- local({ tmpl_expr_options <- bquote_compile({ ...future.globals.maxSize.org <- getOption("future.globals.maxSize") if (!identical(...future.globals.maxSize.org, ...future.globals.maxSize)) { oopts <- options(future.globals.maxSize = ...future.globals.maxSize) on.exit(options(oopts), add = TRUE) } .(expr) }) function(FUN, nX, chunk_args, args = NULL, MoreArgs = NULL, expr, envir = parent.frame(), future.envir, future.globals, future.packages, future.scheduling, future.chunk.size, future.stdout, future.conditions, future.seed, future.lazy, future.label, get_chunk, fcn_name, args_name, ..., debug) { stop_if_not(is.function(FUN)) stop_if_not(is.logical(future.stdout), length(future.stdout) == 1L) stop_if_not(is.logical(future.lazy), length(future.lazy) == 1L) stop_if_not(length(future.scheduling) == 1L, !is.na(future.scheduling), is.numeric(future.scheduling) || is.logical(future.scheduling)) stop_if_not(length(future.label) == 1L, !is.na(future.label), is.logical(future.label) || is.character(future.label)) ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Reproducible RNG (for sequential and parallel processing) ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - seeds <- make_rng_seeds(nX, seed = future.seed, debug = debug) ## Future expression (with or without setting the RNG state) and ## pass possibly tweaked 'future.seed' to future() if (is.null(seeds)) { stop_if_not(is.null(future.seed) || isFALSE(future.seed)) if (isFALSE(future.seed) && future_version() <= "1.15.1") { future.seed <- NULL } } else { ## If RNG seeds are used (given or generated), make sure to reset ## the RNG state afterward oseed <- next_random_seed() on.exit(set_random_seed(oseed)) ## As seed=FALSE but without the RNG check future.seed <- NULL } ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Load balancing ("chunking") ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - chunks <- makeChunks(nX, nbrOfWorkers = nbrOfWorkers(), future.scheduling = future.scheduling, future.chunk.size = future.chunk.size) if (debug) mdebugf("Number of chunks: %d", length(chunks)) ## Process elements in a custom order? ordering <- attr(chunks, "ordering") if (!is.null(ordering)) { if (debug) mdebugf("Index remapping (attribute 'ordering'): [n = %d] %s", length(ordering), hpaste(ordering)) chunks <- lapply(chunks, FUN = function(idxs) .subset(ordering, idxs)) } ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Globals and Packages ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - gp <- getGlobalsAndPackagesXApply(FUN = FUN, args = args, MoreArgs = MoreArgs, envir = envir, future.globals = future.globals, future.packages = future.packages, debug = debug) packages <- gp$packages globals <- gp$globals scanForGlobals <- gp$scanForGlobals gp <- NULL ## Add argument placeholders globals_extra <- as.FutureGlobals(list( ...future.elements_ii = NULL, ...future.seeds_ii = NULL, ...future.globals.maxSize = NULL )) attr(globals_extra, "resolved") <- TRUE attr(globals_extra, "total_size") <- 0 globals <- c(globals, globals_extra) ## At this point a globals should be resolved and we should know their total size ## stop_if_not(attr(globals, "resolved"), !is.na(attr(globals, "total_size"))) ## To please R CMD check ...future.FUN <- ...future.elements_ii <- ...future.seeds_ii <- ...future.globals.maxSize <- NULL globals.maxSize <- getOption("future.globals.maxSize") globals.maxSize.default <- globals.maxSize if (is.null(globals.maxSize.default)) globals.maxSize.default <- 500 * 1024^2 nchunks <- length(chunks) if (debug) mdebugf("Number of futures (= number of chunks): %d", nchunks) ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Futures ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - expr <- bquote_apply(tmpl_expr_options) ## Create labels? if (isTRUE(future.label)) { future.label <- sprintf("%s-%%d", fcn_name) } if (is.character(future.label)) { labels <- sprintf(future.label, seq_len(nchunks)) stopifnot(length(labels) == nchunks) } else { labels <- NULL } if (debug) mdebugf("Launching %d futures (chunks) ...", nchunks) fs <- vector("list", length = nchunks) for (ii in seq_along(chunks)) { chunk <- chunks[[ii]] if (debug) mdebugf("Chunk #%d of %d ...", ii, length(chunks)) args_ii <- get_chunk(chunk_args, chunk) globals_ii <- globals ## Subsetting outside future is more efficient globals_ii[["...future.elements_ii"]] <- args_ii packages_ii <- packages if (scanForGlobals) { if (debug) mdebugf(" - Finding globals in '%s' for chunk #%d ...", args_name, ii) gp <- getGlobalsAndPackages(args_ii, envir = envir, globals = TRUE) globals_args <- gp$globals packages_args <- gp$packages gp <- NULL if (debug) { mdebugf(" + additional globals found: [n=%d] %s", length(globals_args), commaq(names(globals_args))) mdebugf(" + additional namespaces needed: [n=%d] %s", length(packages_args), commaq(packages_args)) } ## Export also globals found in arguments? if (length(globals_args) > 0L) { reserved <- intersect(c("...future.FUN", "...future.elements_ii", "...future.seeds_ii"), names(globals_args)) if (length(reserved) > 0) { stop("Detected globals in '%s' using reserved variables names: ", args_name, paste(sQuote(reserved), collapse = ", ")) } globals_args <- as.FutureGlobals(globals_args) globals_ii <- unique(c(globals_ii, globals_args)) ## Packages needed due to globals in arguments? if (length(packages_args) > 0L) packages_ii <- unique(c(packages_ii, packages_args)) } if (debug) mdebugf(" - Finding globals in '%s' for chunk #%d ... DONE", args_name, ii) } args_ii <- NULL ## stop_if_not(attr(globals_ii, "resolved")) ## Adjust option 'future.globals.maxSize' to account for the fact that more ## than one element is processed per future. The adjustment is done by ## scaling up the limit by the number of elements in the chunk. This is ## a "good enough" approach. ## (https://github.com/HenrikBengtsson/future.apply/issues/8). if (length(chunk) > 1L) { globals_ii["...future.globals.maxSize"] <- list(globals.maxSize) options(future.globals.maxSize = length(chunk) * globals.maxSize.default) if (debug) mdebugf(" - Adjusted option 'future.globals.maxSize': %g -> %d * %g = %g (bytes)", globals.maxSize.default, length(chunk), globals.maxSize.default, getOption("future.globals.maxSize")) on.exit(options(future.globals.maxSize = globals.maxSize), add = TRUE) } ## Using RNG seeds or not? if (is.null(seeds)) { if (debug) mdebug(" - seeds: ") } else { if (debug) mdebugf(" - seeds: [%d] ", length(chunk)) globals_ii[["...future.seeds_ii"]] <- seeds[chunk] } fs[[ii]] <- future( expr, substitute = FALSE, envir = future.envir, stdout = future.stdout, conditions = future.conditions, globals = globals_ii, packages = packages_ii, seed = future.seed, lazy = future.lazy, label = labels[ii] ) if (debug) { mdebug("Created future:") mprint(fs[[ii]]) } ## Not needed anymore rm(list = c("chunk", "globals_ii")) if (debug) mdebugf("Chunk #%d of %d ... DONE", ii, nchunks) } ## for (ii ...) if (debug) mdebugf("Launching %d futures (chunks) ... DONE", nchunks) ## 4. Resolving futures if (debug) mdebugf("Resolving %d futures (chunks) ...", nchunks) ## Check for RngFutureCondition:s when resolving futures? if (isFALSE(future.seed)) { withCallingHandlers({ values <- local({ oopts <- options(future.rng.onMisuse.keepFuture = FALSE) on.exit(options(oopts)) value(fs) }) }, RngFutureCondition = function(cond) { ## One of "our" futures? idx <- NULL ## Compare future UUIDs or whole futures? uuid <- attr(cond, "uuid") if (!is.null(uuid)) { ## (a) Future UUIDs are available for (kk in seq_along(fs)) { if (identical(fs[[kk]]$uuid, uuid)) idx <- kk } } else { ## (b) Future UUIDs are not available, use Future object? f <- attr(cond, "future") if (is.null(f)) return() ## Nothing to do? if (!isFALSE(f$seed)) return() ## shouldn't really happen for (kk in seq_along(fs)) { if (identical(fs[[kk]], f)) idx <- kk } } ## Nothing more to do, i.e. not one of our futures? if (is.null(idx)) return() ## Adjust message to give instructions relevant to this package f <- fs[[idx]] label <- f$label if (is.null(label)) label <- "" message <- sprintf("UNRELIABLE VALUE: One of the %s iterations (%s) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to \"ignore\".", sQuote(.packageName), sQuote(label)) cond$message <- message if (inherits(cond, "warning")) { warning(cond) invokeRestart("muffleWarning") } else if (inherits(cond, "error")) { stop(cond) } }) ## withCallingHandlers() } else { values <- value(fs) } ## Not needed anymore rm(list = "fs") if (debug) { mdebugf(" - Number of value chunks collected: %d", length(values)) mdebugf("Resolving %d futures (chunks) ... DONE", nchunks) } ## Sanity check stop_if_not(length(values) == nchunks) if (debug) mdebugf("Reducing values from %d chunks ...", nchunks) values2 <- do.call(c, args = values) if (debug) { mdebugf(" - Number of values collected after concatenation: %d", length(values2)) mdebugf(" - Number of values expected: %d", nX) } assert_values2(nX, values, values2, fcn_name = fcn_name, debug = debug) values <- values2 rm(list = "values2") ## Sanity check (this may happen if the future backend is broken) stop_if_not(length(values) == nX) ## Were elements processed in a custom order? if (length(values) > 1L && !is.null(ordering)) { invOrdering <- vector(mode(ordering), length = nX) idx <- 1:nX invOrdering[.subset(ordering, idx)] <- idx rm(list = c("ordering", "idx")) if (debug) mdebugf("Reverse index remapping (attribute 'ordering'): [n = %d] %s", length(invOrdering), hpaste(invOrdering)) values <- .subset(values, invOrdering) rm(list = c("invOrdering")) } if (debug) mdebugf("Reducing values from %d chunks ... DONE", nchunks) values } ## future_xapply() }) future.apply/R/future_replicate.R0000644000176200001440000000214014104262231016566 0ustar liggesusers#' @inheritParams future_lapply #' #' @param n The number of replicates. #' #' @param expr An \R expression to evaluate repeatedly. #' #' @return #' `future_replicate()` is a wrapper around `future_sapply()` and return #' simplified object according to the `simplify` argument. #' See [base::replicate()] for details. #' Since `future_replicate()` usually involves random number generation (RNG), #' it uses `future.seed = TRUE` by default in order produce sound random #' numbers regardless of future backend and number of background workers used. #' #' @export #' #' @rdname future_lapply future_replicate <- function(n, expr, simplify = "array", future.seed = TRUE, ..., future.envir = parent.frame(), future.label = "future_replicate-%d") future_sapply(X = integer(n), FUN = eval.parent(substitute(function(...)expr)), simplify = simplify, future.seed = future.seed, ..., future.envir = future.envir, future.label = future.label) future.apply/R/future_sapply.R0000644000176200001440000000203614104262246016140 0ustar liggesusers#' @inheritParams future_lapply #' #' @param simplify See [base::sapply()] and [base::tapply()], respectively. #' #' @param USE.NAMES See [base::sapply()]. #' #' @return #' For `future_sapply()`, a vector with same length and names as \code{X}. #' See [base::sapply()] for details. #' #' @export #' #' @author #' The implementations of `future_replicate()`, `future_sapply()`, and #' `future_tapply()` are adopted from the source code of the corresponding #' base \R functions, which are licensed under GPL (>= 2) with #' 'The R Core Team' as the copyright holder. #' #' @rdname future_lapply future_sapply <- function(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE, future.envir = parent.frame(), future.label = "future_sapply-%d") { answer <- future_lapply(X = X, FUN = FUN, ..., future.envir = future.envir, future.label = future.label) if (USE.NAMES && is.character(X) && is.null(names(answer))) names(answer) <- X if (!isFALSE(simplify) && length(answer)) simplify2array(answer, higher = (simplify == "array")) else answer } future.apply/R/future_apply.R0000644000176200001440000001555014104262510015754 0ustar liggesusers#' Apply Functions Over Array Margins via Futures #' #' `future_apply()` implements [base::apply()] using future with perfect #' replication of results, regardless of future backend used. #' It returns a vector or array or list of values obtained by applying a #' function to margins of an array or matrix. #' #' @inheritParams future_lapply #' #' @param X an array, including a matrix. #' #' @param MARGIN A vector giving the subscripts which the function will be #' applied over. For example, for a matrix `1` indicates rows, `2` indicates #' columns, `c(1, 2)` indicates rows and columns. #' Where `X` has named dimnames, it can be a character vector selecting #' dimension names. #' #' @param \ldots (optional) Additional arguments passed to `FUN()`, except #' `future.*` arguments, which are passed on to [future_lapply()] used #' internally. #' #' @param simplify a logical indicating whether results should be simplified #' if possible. #' #' @return #' Returns a vector or array or list of values obtained by applying a #' function to margins of an array or matrix. #' See [base::apply()] for details. #' #' @author #' The implementations of `future_apply()` is adopted from the source code #' of the corresponding base \R function, which is licensed under GPL (>= 2) #' with 'The R Core Team' as the copyright holder. #' #' @example incl/future_apply.R #' #' @importFrom future nbrOfWorkers #' @export future_apply <- function(X, MARGIN, FUN, ..., simplify = TRUE, future.envir = parent.frame(), future.stdout = TRUE, future.conditions = "condition", future.globals = TRUE, future.packages = NULL, future.lazy = FALSE, future.seed = FALSE, future.scheduling = 1.0, future.chunk.size = NULL, future.label = "future_apply-%d") { debug <- getOption("future.apply.debug", getOption("future.debug", FALSE)) FUN <- match.fun(FUN) simplify <- isTRUE(simplify) ## Ensure that X is an array object dl <- length(dim(X)) if(!dl) stop("dim(X) must have a positive length") if(is.object(X)) X <- if(dl == 2L) as.matrix(X) else as.array(X) ## now record dim as coercion can change it ## (e.g. when a data frame contains a matrix). d <- dim(X) dn <- dimnames(X) ds <- seq_len(dl) ## Extract the margins and associated dimnames if (is.character(MARGIN)) { if(is.null(dnn <- names(dn))) # names(NULL) is NULL stop("'X' must have named dimnames") MARGIN <- match(MARGIN, dnn) if (anyNA(MARGIN)) stop("not all elements of 'MARGIN' are names of dimensions") } s.call <- ds[-MARGIN] s.ans <- ds[MARGIN] d.call <- d[-MARGIN] d.ans <- d[MARGIN] dn.call <- dn[-MARGIN] dn.ans <- dn[MARGIN] ## dimnames(X) <- NULL ## do the calls d2 <- prod(d.ans) if(d2 == 0L) { ## arrays with some 0 extents: return ``empty result'' trying ## to use proper mode and dimension: ## The following is still a bit `hackish': use non-empty X newX <- array(vector(typeof(X), 1L), dim = c(prod(d.call), 1L)) ans <- forceAndCall(1, FUN, if(length(d.call) < 2L) newX[,1] else array(newX[, 1L], d.call, dn.call), ...) return(if(is.null(ans)) ans else if(length(d.ans) < 2L) ans[1L][-1L] else array(ans, d.ans, dn.ans)) } ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Globals and Packages ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - gp <- getGlobalsAndPackagesXApply( FUN, args = list(X = X, ...), envir = environment(), future.globals = future.globals, future.packages = future.packages, debug = debug ) globals <- gp$globals packages <- gp$packages gp <- NULL ## Check size of global variables? ## Doing it here, on the matrix object, is much faster than doing it for ## the list elements passed to future_lapply() oldMaxSize <- maxSize <- getOption("future.globals.maxSize") if (is.null(maxSize) || is.finite(maxSize)) { if (is.null(maxSize)) maxSize <- 500 * 1024^2 objectSize <- import_future("objectSize") size <- objectSize(X) nWorkers <- nbrOfWorkers() chunk_size <- size / nWorkers other_size <- attr(globals, "total_size") if (is.numeric(other_size)) chunk_size <- chunk_size + other_size if (chunk_size > maxSize) { asIEC <- import_future("asIEC") msg <- sprintf("The total size of %s (of class %s and type %s) is %s and the total size of the other argument is %s. With %d workers, this translates to %s per worker needed for future_apply(), which exceeds the maximum allowed size of %s (option 'future.globals.maxSize').", sQuote("X"), sQuote(class(X)[1]), sQuote(typeof(X)), asIEC(size), asIEC(other_size), nWorkers, asIEC(chunk_size), asIEC(maxSize)) if (debug) mdebug(msg) stop(msg) } on.exit(options(future.globals.maxSize = oldMaxSize), add = TRUE) options(future.globals.maxSize = +Inf) } newX <- aperm(X, c(s.call, s.ans)) dim(newX) <- c(prod(d.call), d2) if(length(d.call) < 2L) {# vector if (length(dn.call)) dimnames(newX) <- c(dn.call, list(NULL)) newX <- lapply(1L:d2, FUN = function(i) newX[,i]) } else newX <- lapply(1L:d2, FUN = function(i) array(newX[,i], dim = d.call, dimnames = dn.call)) globals$...future.FUN <- NULL ans <- future_lapply( X = newX, FUN = FUN, ..., future.envir = future.envir, future.stdout = future.stdout, future.conditions = future.conditions, future.lazy = future.lazy, future.seed = future.seed, future.scheduling = future.scheduling, future.chunk.size = future.chunk.size, future.globals = globals, future.packages = packages, future.label = future.label ) ## answer dims and dimnames ans.list <- !simplify || is.recursive(ans[[1L]]) l.ans <- length(ans[[1L]]) ans.names <- names(ans[[1L]]) if(!ans.list) ans.list <- any(lengths(ans) != l.ans) if(!ans.list && length(ans.names)) { all.same <- vapply(ans, function(x) identical(names(x), ans.names), NA) if (!all(all.same)) ans.names <- NULL } len.a <- if(ans.list) d2 else length(ans <- unlist(ans, recursive = FALSE)) if(length(MARGIN) == 1L && len.a == d2) { names(ans) <- if(length(dn.ans[[1L]])) dn.ans[[1L]] # else NULL ans } else if(len.a == d2) array(ans, d.ans, dn.ans) else if(len.a && len.a %% d2 == 0L) { if(is.null(dn.ans)) dn.ans <- vector(mode="list", length(d.ans)) dn1 <- list(ans.names) if(length(dn.call) && !is.null(n1 <- names(dn <- dn.call[1])) && nzchar(n1) && length(ans.names) == length(dn[[1]])) names(dn1) <- n1 dn.ans <- c(dn1, dn.ans) array(ans, c(len.a %/% d2, d.ans), if(!is.null(names(dn.ans)) || !all(vapply(dn.ans, is.null, NA))) dn.ans) } else ans } future.apply/R/future_Map.R0000644000176200001440000000142014104262162015336 0ustar liggesusers#' @inheritParams future_mapply #' #' @param f A function of the arity \eqn{k} if `future_Map()` is called with #' \eqn{k} arguments. #' #' @return #' `future_Map()` is a simple wrapper to `future_mapply()` which does not #' attempt to simplify the result. #' See [base::Map()] for details. #' #' @export #' #' @author #' The implementations of `future_Map()` is adopted from the source code #' of the corresponding base \R function `Map()`, which is licensed under #' GPL (>= 2) with 'The R Core Team' as the copyright holder. #' #' @rdname future_mapply future_Map <- function(f, ..., future.envir = parent.frame(), future.label = "future_Map-%d") { f <- match.fun(f) future_mapply(FUN = f, ..., SIMPLIFY = FALSE, future.envir = future.envir, future.label = future.label) } future.apply/R/rng.R0000644000176200001440000001505114104217314014021 0ustar liggesusersget_random_seed <- function() { env <- globalenv() env$.Random.seed } set_random_seed <- function(seed) { env <- globalenv() if (is.null(seed)) { rm(list = ".Random.seed", envir = env, inherits = FALSE) } else { env$.Random.seed <- seed } } next_random_seed <- function(seed = get_random_seed()) { sample.int(n = 1L, size = 1L, replace = FALSE) seed_next <- get_random_seed() stop_if_not(!any(seed_next != seed)) invisible(seed_next) } is_valid_random_seed <- function(seed) { oseed <- get_random_seed() on.exit(set_random_seed(oseed)) env <- globalenv() env$.Random.seed <- seed res <- tryCatch({ sample.int(n = 1L, size = 1L, replace = FALSE) }, simpleWarning = function(w) w) !inherits(res, "simpleWarning") } ## For RNGkind("L'Ecuyer-CMRG") we should have (see help('RNGkind')): ## .Random.seed <- c(rng.kind, n) where length(n) == 6L. ## From R source code: check for rng.kind %% 10000L == 407L is_lecyer_cmrg_seed <- function(seed) { is.numeric(seed) && length(seed) == 7L && all(is.finite(seed)) && (seed[1] %% 10000L == 407L) } # @importFrom utils capture.output as_lecyer_cmrg_seed <- function(seed) { ## Generate a L'Ecuyer-CMRG seed (existing or random)? if (is.logical(seed)) { stop_if_not(length(seed) == 1L) if (!is.na(seed) && !seed) { stop("Argument 'seed' must be TRUE if logical: ", seed) } oseed <- get_random_seed() ## Already a L'Ecuyer-CMRG seed? Then use that as is. if (!is.na(seed) && seed) { if (is_lecyer_cmrg_seed(oseed)) return(oseed) } ## Otherwise, generate a random one. on.exit(set_random_seed(oseed), add = TRUE) RNGkind("L'Ecuyer-CMRG") return(get_random_seed()) } stop_if_not(is.numeric(seed), all(is.finite(seed))) seed <- as.integer(seed) ## Already a L'Ecuyer-CMRG seed? if (is_lecyer_cmrg_seed(seed)) { return(seed) } ## Generate a new L'Ecuyer-CMRG seed? if (length(seed) == 1L) { oseed <- get_random_seed() on.exit(set_random_seed(oseed), add = TRUE) RNGkind("L'Ecuyer-CMRG") set.seed(seed) return(get_random_seed()) } stop("Argument 'seed' must be L'Ecuyer-CMRG RNG seed as returned by parallel::nextRNGStream() or an single integer: ", capture.output(str(seed))) } #' Produce Reproducible Seeds for Parallel Random Number Generation #' #' @param count The number of RNG seeds to produce. #' #' @param seed A logical specifying whether RNG seeds should be generated #' or not. (`seed = NULL` corresponds to `seed = FALSE`). #' If a list, then it should be of length `count` and each element should #' consist of a valid RNG seed. #' #' @param debug If `TRUE`, debug output is produced, otherwise not. #' #' @return Returns a non-named list of length `count`, or `NULL`. #' Any seed returned is a valid RNG seed. #' #' @importFrom parallel nextRNGStream nextRNGSubStream splitIndices #' @importFrom utils capture.output str #' #' @keywords internal make_rng_seeds <- function(count, seed = FALSE, debug = NA) { if (is.na(debug)) debug <- getOption("future.apply.debug", getOption("future.debug", FALSE)) ## Don't use RNGs? (seed = {FALSE, NULL}) if (is.null(seed)) return(NULL) if (is.logical(seed) && !is.na(seed) && !seed) return(NULL) stop_if_not(is.numeric(count), length(count) == 1L, !is.na(count), count >= 0L) ## Placeholder for all RNG stream seeds. seeds <- NULL # Use RNGs? if (debug) mdebug("Generating random seeds ...") ## A pregenerated sequence of random seeds? if (is.list(seed)) { if (debug) mdebugf("Using a pre-define stream of %d random seeds ...", count) seeds <- seed nseeds <- length(seeds) if (nseeds != count) { stop(sprintf("Argument 'seed' is a list, which specifies the sequence of seeds to be used for each element iterated over, but length(seed) != number of elements: %g != %g", nseeds, count)) } ## Assert same type of RNG seeds? ns <- unique(unlist(lapply(seeds, FUN = length), use.names = FALSE)) if (length(ns) != 1L) { stop("The elements of the list specified in argument 'seed' are not all of the same lengths (did you really pass RNG seeds?): ", hpaste(ns)) } ## Did use specify scalar integers as meant for set.seed()? if (ns == 1L) { stop("Argument 'seed' is invalid. Pre-generated random seeds must be valid .Random.seed seeds, which means they should be all integers and consists of two or more elements, not just one.") } types <- unlist(lapply(seeds, FUN = typeof), use.names = FALSE) if (!all(types == "integer")) { stop("The elements of the list specified in argument 'seed' are not all integers (did you really pass RNG seeds?): ", hpaste(unique(types))) } ## Check if valid random seeds are specified. ## For efficiency, only look at the first one. if (!is_valid_random_seed(seeds[[1]])) { stop("The list in argument 'seed' does not seem to hold elements that are valid .Random.seed values: ", capture.output(str(seeds[[1]]))) } if (debug) { mdebugf("Using a pre-define stream of %d random seeds ... DONE", count) mdebug("Generating random seeds ... DONE") } return(seeds) } if (debug) mdebugf("Generating random seed streams for %d elements ...", count) ## Generate sequence of _all_ RNG seeds starting with an initial seed ## '.seed' that is based on argument 'seed'. .seed <- as_lecyer_cmrg_seed(seed) ## future_*apply() should return with the same RNG state regardless of ## future strategy used. This is be done such that RNG kind is preserved ## and the seed is "forwarded" one step from what it was when this ## function was called. The forwarding is done by generating one random ## number. Note that this approach is also independent on the number of ## elements iterated over and the different FUN() calls. oseed <- next_random_seed() on.exit(set_random_seed(oseed)) seeds <- vector("list", length = count) for (ii in seq_len(count)) { ## RNG substream seed used when calling FUN() for element(s) 'ii': ## This way each future can in turn generate further seeds, also ## recursively, with minimal risk of generating the same seeds as ## another future. This should make it safe to recursively call ## future_*apply(). /HB 2017-01-11 seeds[[ii]] <- nextRNGSubStream(.seed) ## Main random seed for next iteration (= ii + 1) .seed <- nextRNGStream(.seed) } if (debug) { mdebugf("Generating random seed streams for %d elements ... DONE", count) mdebug("Generating random seeds ... DONE") } seeds } # make_rng_seeds() future.apply/R/future_lapply.R0000644000176200001440000002454014104451747016143 0ustar liggesusers#' Apply a Function over a List or Vector via Futures #' #' `future_lapply()` implements [base::lapply()] using futures with perfect #' replication of results, regardless of future backend used. #' Analogously, this is true for all the other `future_nnn()` functions. #' #' @param X A vector-like object to iterate over. #' #' @param FUN A function taking at least one argument. #' #' @param \ldots (optional) Additional arguments passed to `FUN()`. #' For `future_*apply()` functions and `replicate()`, any `future.*` arguments #' part of `\ldots` are passed on to `future_lapply()` used internally. #' #' @param future.envir An [environment] passed as argument `envir` to #' [future::future()] as-is. #' #' @param future.stdout If `TRUE` (default), then the standard output of the #' underlying futures is captured, and re-outputted as soon as possible. #' If `FALSE`, any output is silenced (by sinking it to the null device #' as it is outputted). #' If `NA` (not recommended), output is _not_ intercepted. #' #' @param future.conditions A character string of conditions classes to be #' captured and relayed. The default is the same as the `condition` #' argument of [future::Future()]. #' To not intercept conditions, use `conditions = character(0L)`. #' Errors are always relayed. #' #' @param future.globals A logical, a character vector, or a named list for #' controlling how globals are handled. For details, see below section. #' #' @param future.packages (optional) a character vector specifying packages #' to be attached in the R environment evaluating the future. #' #' @param future.lazy Specifies whether the futures should be resolved #' lazily or eagerly (default). #' #' @param future.seed A logical or an integer (of length one or seven), #' or a list of `length(X)` with pre-generated random seeds. #' For details, see below section. #' #' @param future.scheduling Average number of futures ("chunks") per worker. #' If `0.0`, then a single future is used to process all elements #' of `X`. #' If `1.0` or `TRUE`, then one future per worker is used. #' If `2.0`, then each worker will process two futures #' (if there are enough elements in `X`). #' If `Inf` or `FALSE`, then one future per element of #' `X` is used. #' Only used if `future.chunk.size` is `NULL`. #' #' @param future.chunk.size The average number of elements per future ("chunk"). #' If `Inf`, then all elements are processed in a single future. #' If `NULL`, then argument `future.scheduling` is used. #' #' @param future.label If a character string, then each future is assigned #' a label `sprintf(future.label, chunk_idx)`. If TRUE, then the #' same as `future.label = "future_lapply-%d"`. If FALSE, no labels #' are assigned. #' #' @return #' For `future_lapply()`, a list with same length and names as `X`. #' See [base::lapply()] for details. #' #' @section Global variables: #' Argument `future.globals` may be used to control how globals #' should be handled similarly how the `globals` argument is used with #' `future()`. #' Since all function calls use the same set of globals, this function can do #' any gathering of globals upfront (once), which is more efficient than if #' it would be done for each future independently. #' If `TRUE`, `NULL` or not is specified (default), then globals #' are automatically identified and gathered. #' If a character vector of names is specified, then those globals are gathered. #' If a named list, then those globals are used as is. #' In all cases, `FUN` and any `\ldots` arguments are automatically #' passed as globals to each future created as they are always needed. #' #' @section Reproducible random number generation (RNG): #' Unless `future.seed = FALSE`, this function guarantees to generate #' the exact same sequence of random numbers _given the same initial #' seed / RNG state_ - this regardless of type of futures, scheduling #' ("chunking") strategy, and number of workers. #' #' RNG reproducibility is achieved by pregenerating the random seeds for all #' iterations (over `X`) by using L'Ecuyer-CMRG RNG streams. In each #' iteration, these seeds are set before calling `FUN(X[[ii]], ...)`. #' _Note, for large `length(X)` this may introduce a large overhead._ #' As input (`future.seed`), a fixed seed (integer) may be given, either #' as a full L'Ecuyer-CMRG RNG seed (vector of 1+6 integers) or as a seed #' generating such a full L'Ecuyer-CMRG seed. #' If `future.seed = TRUE`, then \code{\link[base:Random]{.Random.seed}} #' is returned if it holds a L'Ecuyer-CMRG RNG seed, otherwise one is created #' randomly. #' If `future.seed = NA`, a L'Ecuyer-CMRG RNG seed is randomly created. #' If none of the function calls `FUN(X[[ii]], ...)` uses random number #' generation, then `future.seed = FALSE` may be used. #' #' In addition to the above, it is possible to specify a pre-generated #' sequence of RNG seeds as a list such that #' `length(future.seed) == length(X)` and where each element is an #' integer seed vector that can be assigned to #' \code{\link[base:Random]{.Random.seed}}. One approach to generate a #' set of valid RNG seeds based on fixed initial seed (here `42L`) is: #' ```r #' seeds <- future_lapply(seq_along(X), FUN = function(x) .Random.seed, #' future.chunk.size = Inf, future.seed = 42L) #' ``` #' **Note that `as.list(seq_along(X))` is _not_ a valid set of such #' `.Random.seed` values.** #' #' In all cases but `future.seed = FALSE`, the RNG state of the calling #' R processes after this function returns is guaranteed to be #' "forwarded one step" from the RNG state that was before the call and #' in the same way regardless of `future.seed`, `future.scheduling` #' and future strategy used. This is done in order to guarantee that an \R #' script calling `future_lapply()` multiple times should be numerically #' reproducible given the same initial seed. #' #' @section Control processing order of elements: #' Attribute `ordering` of `future.chunk.size` or `future.scheduling` can #' be used to control the ordering the elements are iterated over, which #' only affects the processing order and _not_ the order values are returned. #' This attribute can take the following values: #' * index vector - an numeric vector of length `length(X)` #' * function - an function taking one argument which is called as #' `ordering(length(X))` and which must return an #' index vector of length `length(X)`, e.g. #' `function(n) rev(seq_len(n))` for reverse ordering. #' * `"random"` - this will randomize the ordering via random index #' vector `sample.int(length(X))`. #' For example, `future.scheduling = structure(TRUE, ordering = "random")`. #' _Note_, when elements are processed out of order, then captured standard #' output and conditions are also relayed in that order, that is out of order. #' #' @example incl/future_lapply.R #' #' @keywords manip programming iteration #' #' @importFrom globals findGlobals #' @export future_lapply <- local({ tmpl_expr <- bquote_compile({ lapply(seq_along(...future.elements_ii), FUN = function(jj) { ...future.X_jj <- ...future.elements_ii[[jj]] .(expr_FUN) }) }) tmpl_expr_with_rng <- bquote_compile({ lapply(seq_along(...future.elements_ii), FUN = function(jj) { ...future.X_jj <- ...future.elements_ii[[jj]] assign(".Random.seed", ...future.seeds_ii[[jj]], envir = globalenv(), inherits = FALSE) .(expr_FUN) }) }) function(X, FUN, ..., future.envir = parent.frame(), future.stdout = TRUE, future.conditions = "condition", future.globals = TRUE, future.packages = NULL, future.lazy = FALSE, future.seed = FALSE, future.scheduling = 1.0, future.chunk.size = NULL, future.label = "future_lapply-%d") { fcn_name <- "future_lapply" args_name <- "X" ## Coerce to as.list()? if (!is.vector(X) || is.object(X)) X <- as.list(X) ## Nothing to do? nX <- length(X) if (nX == 0L) return(as.list(X)) debug <- getOption("future.apply.debug", getOption("future.debug", FALSE)) if (debug) mdebugf("%s() ...", fcn_name) ## NOTE TO SELF: We'd ideally have a 'future.envir' argument also for ## this function, cf. future(). However, it's not yet clear to me how ## to do this, because we need to have globalsOf() to search for globals ## from the current environment in order to identify the globals of ## arguments 'FUN' and '...'. /HB 2017-03-10 envir <- environment() ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Future expression ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ...future.FUN <- NULL ## To please R CMD check ## Does FUN() rely on '...' being a global? global_dotdotdot <- ("..." %in% findGlobals(FUN, dotdotdot = "return")) if (global_dotdotdot) { expr_FUN <- quote(...future.FUN(...future.X_jj)) } else { expr_FUN <- quote(...future.FUN(...future.X_jj, ...)) } ## With or without RNG? expr <- bquote_apply( if (is.null(future.seed) || isFALSE(future.seed) || isNA(future.seed)) { tmpl_expr } else { tmpl_expr_with_rng } ) ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Process ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - values <- future_xapply( FUN = FUN, nX = nX, chunk_args = X, args = list(...), get_chunk = `[`, expr = expr, envir = envir, future.envir = future.envir, future.globals = future.globals, future.packages = future.packages, future.scheduling = future.scheduling, future.chunk.size = future.chunk.size, future.stdout = future.stdout, future.conditions = future.conditions, future.seed = future.seed, future.lazy = future.lazy, future.label = future.label, fcn_name = fcn_name, args_name = args_name, debug = debug ) ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Reduce ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - names(values) <- names(X) if (debug) mdebugf("%s() ... DONE", fcn_name) values } })future.apply/R/makeChunks.R0000644000176200001440000001011214104216357015323 0ustar liggesusers#' Create Chunks of Index Vectors #' #' _This is an internal function._ #' #' @param nbrOfElements (integer) Total number of elements to iterate over. #' #' @param nbrOfWorkers (integer) Number of workers available. #' #' @param future.scheduling (numeric) A strictly positive scalar. #' Only used if argument `future.chunk.size` is `NULL`. #' #' @param future.chunk.size (numeric) The maximum number of elements per #' chunk, or `NULL`. If `NULL`, then the chunk sizes are given by the #' `future.scheduling` argument. #' #' @return A list of chunks, where each chunk is an integer vector of #' unique indices \code{[1, nbrOfElements]}. The union of all chunks #' holds `nbrOfElements` elements and equals `1:nbrOfElements`. #' If `nbrOfElements == 0`, then an empty list is returned. #' #' @section Control processing order of elements: #' Attribute `ordering` of `future.chunk.size` or `future.scheduling` can #' be used to control the ordering the elements are iterated over, which #' only affects the processing order _not_ the order values are returned. #' This attribute can take the following values: #' * index vector - an numeric vector of length `nbrOfElements` specifying #' how elements are remapped #' * function - an function taking one argument which is called as #' `ordering(nbrOfElements)` and which must return an #' index vector of length `nbrOfElements`, e.g. #' `function(n) rev(seq_len(n))` for reverse ordering. #' * `"random"` - this will randomize the ordering via random index #' vector `sample.int(nbrOfElements)`. #' #' @importFrom parallel splitIndices #' @keywords internal makeChunks <- function(nbrOfElements, nbrOfWorkers, future.scheduling = 1.0, future.chunk.size = NULL) { stop_if_not(nbrOfElements >= 0L, nbrOfWorkers >= 1L) ## 'future.chunk.size != NULL' takes precedence over 'future.scheduling' if (!is.null(future.chunk.size)) { stop_if_not(length(future.chunk.size) == 1L, !is.na(future.chunk.size), future.chunk.size > 0) ## Same definition as parallel:::staticNChunks() in R (>= 3.5.0) nbrOfChunks <- max(1, ceiling(nbrOfElements / future.chunk.size)) ## Customized ordering? ordering <- attr(future.chunk.size, "ordering", exact = TRUE) } else { if (is.logical(future.scheduling)) { stop_if_not(length(future.scheduling) == 1L, !is.na(future.scheduling)) if (future.scheduling) { nbrOfChunks <- nbrOfWorkers if (nbrOfChunks > nbrOfElements) nbrOfChunks <- nbrOfElements } else { nbrOfChunks <- nbrOfElements } } else { ## Treat 'future.scheduling' as the number of chunks per worker, i.e. ## the number of chunks each worker should process on average. stop_if_not(length(future.scheduling) == 1L, !is.na(future.scheduling), future.scheduling >= 0) if (nbrOfWorkers > nbrOfElements) nbrOfWorkers <- nbrOfElements nbrOfChunks <- future.scheduling * nbrOfWorkers if (nbrOfChunks < 1L) { nbrOfChunks <- 1L } else if (nbrOfChunks > nbrOfElements) { nbrOfChunks <- nbrOfElements } } ## Customized ordering? ordering <- attr(future.scheduling, "ordering", exact = TRUE) } chunks <- splitIndices(nbrOfElements, ncl = nbrOfChunks) ## Customized ordering? if (nbrOfElements > 1L && !is.null(ordering)) { if (is.character(ordering) && ordering == "random") { map <- stealth_sample.int(nbrOfElements) } else if (is.numeric(ordering)) { map <- ordering } else if (is.function(ordering)) { map <- ordering(nbrOfElements) } else { stop(sprintf("Unknown value of attribute %s for argument %s: ", "ordering", if (!is.null(future.chunk.size)) "future.chunk.size" else "future.scheduling"), mode(ordering)) } if (!is.null(map)) { ## Simple validity check of "ordering". Looking for NAs, range, ## uniqueness is too expensive so skipped. stop_if_not(length(map) == nbrOfElements) attr(chunks, "ordering") <- map } } chunks } future.apply/R/future_vapply.R0000644000176200001440000000547614104262270016153 0ustar liggesusers#' @inheritParams future_lapply #' #' @param FUN.VALUE A template for the required return value from #' each `FUN(X[ii], ...)`. #' Types may be promoted to a higher type within the ordering #' logical < integer < double < complex, but not demoted. #' See [base::vapply()] for details. #' #' @return #' For `future_vapply()`, a vector with same length and names as \code{X}. #' See [base::vapply()] for details. #' #' @export #' #' @rdname future_lapply future_vapply <- function(X, FUN, FUN.VALUE, ..., USE.NAMES = TRUE, future.envir = parent.frame(), future.label = "future_vapply-%d") { ## Coerce to as.list()? if (!is.vector(X) || is.object(X)) X <- as.list(X) n <- length(X) stop_if_not(is.function(FUN)) stop_if_not(is.vector(FUN.VALUE) || is.array(FUN.VALUE)) type <- typeof(FUN.VALUE) times <- length(FUN.VALUE) dim <- dim(FUN.VALUE) stop_if_not(is.logical(USE.NAMES), length(USE.NAMES) == 1L, !is.na(USE.NAMES)) valid_types <- switch( type, logical = "logical", integer = c("logical", "integer"), double = c("logical", "integer", "double"), complex = c("logical", "integer", "double", "complex"), type ) x_FUN <- FUN res <- future_lapply(X, FUN = function(x, ...) { value <- x_FUN(x, ...) if (length(value) != times) { stop(sprintf( "values must be length %d, but FUN(X[[ii]]) result is length %d", times, length(value))) } stop_if_not(all(dim(value) == dim), typeof(value) %in% valid_types) value }, ..., future.envir = future.envir, future.label = future.label) if (!is.null(dim)) { dim_res <- c(dim, n) } else if (times != 1L) { dim_res <- c(times, n) } else { dim_res <- NULL } if (USE.NAMES && length(res) > 0L) { if (is.null(dim)) { names_FUN.VALUE <- names(FUN.VALUE) if (is.null(names_FUN.VALUE)) names_FUN.VALUE <- names(res[[1]]) } else { names_FUN.VALUE <- dimnames(FUN.VALUE) if (is.null(names_FUN.VALUE)) names_FUN.VALUE <- dimnames(res[[1]]) } } res <- unlist(res, use.names = FALSE) if (is.null(res)) res <- vector(mode = type, length = 0L) if (!is.null(dim_res)) dim(res) <- dim_res if (USE.NAMES) { if (is.array(res)) { n_dim <- length(dim(res)) dimnames <- vector("list", length = n_dim) if (is.null(dim)) { names <- names(X) if (!is.null(names)) dimnames[[2]] <- names names <- names_FUN.VALUE if (!is.null(names)) dimnames[[1]] <- names } else { names <- names(X) if (!is.null(names)) dimnames[[n_dim]] <- names names <- names_FUN.VALUE if (!is.null(names)) dimnames[-n_dim] <- names } if (!all(unlist(lapply(dimnames, FUN = is.null), use.names = FALSE))) { dimnames(res) <- dimnames } } else { names(res) <- names(X) } } res } future.apply/R/future.apply-package.R0000644000176200001440000000770314024036060017265 0ustar liggesusers#' future.apply: Apply Function to Elements in Parallel using Futures #' #' The \pkg{future.apply} packages provides parallel implementations of #' common "apply" functions provided by base \R. The parallel processing #' is performed via the \pkg{future} ecosystem, which provides a large #' number of parallel backends, e.g. on the local machine, a remote cluster, #' and a high-performance compute cluster. #' #' Currently implemented functions are: #' #' * [future_apply()]: a parallel version of [apply()][base::apply] #' * [future_by()]: a parallel version of [by()][base::by] #' * [future_eapply()]: a parallel version of [eapply()][base::lapply] #' * [future_lapply()]: a parallel version of [lapply()][base::lapply] #' * [future_mapply()]: a parallel version of [mapply()][base::mapply] #' * [future_sapply()]: a parallel version of [sapply()][base::sapply] #' * [future_tapply()]: a parallel version of [tapply()][base::tapply] #' * [future_vapply()]: a parallel version of [vapply()][base::vapply] #' * [future_Map()]: a parallel version of [Map()][base::Map] #' * [future_replicate()]: a parallel version of [replicate()][base::replicate] #' * [future_.mapply()]: a parallel version of [.mapply()][base::.mapply] #' #' Reproducibility is part of the core design, which means that perfect, #' parallel random number generation (RNG) is supported regardless of the #' amount of chunking, type of load balancing, and future backend being used. #' #' Since these `future_*()` functions have the same arguments as the #' corresponding base \R function, start using them is often as simple as #' renaming the function in the code. For example, after attaching the package: #' ```r #' library(future.apply) #' ``` #' code such as: #' ```r #' x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE)) #' y <- lapply(x, quantile, probs = 1:3/4) #' ``` #' can be updated to: #' ```r #' y <- future_lapply(x, quantile, probs = 1:3/4) #' ``` #' #' The default settings in the \pkg{future} framework is to process code #' _sequentially_. To run the above in parallel on the local machine #' (on any operating system), use: #' ```r #' plan(multisession) #' ``` #' first. That's it! #' #' To go back to sequential processing, use `plan(sequential)`. #' If you have access to multiple machines on your local network, use: #' ```r #' plan(cluster, workers = c("n1", "n2", "n2", "n3")) #' ``` #' This will set up four workers, one on `n1` and `n3`, and two on `n2`. #' If you have SSH access to some remote machines, use: #' ```r #' plan(cluster, workers = c("m1.myserver.org", "m2.myserver.org)) #' ``` #' See the \pkg{future} package and [future::plan()] for more examples. #' #' The \pkg{future.batchtools} package provides support for high-performance #' compute (HPC) cluster schedulers such as SGE, Slurm, and TORQUE / PBS. #' For example, #' #' * `plan(batchtools_slurm)`: #' Process via a Slurm scheduler job queue. #' * `plan(batchtools_torque)`: #' Process via a TORQUE / PBS scheduler job queue. #' #' This builds on top of the queuing framework that the \pkg{batchtools} #' package provides. For more details on backend configuration, please see #' the \pkg{future.batchtools} and \pkg{batchtools} packages. #' #' These are just a few examples of parallel/distributed backend for the #' future ecosystem. For more alternatives, see the 'Reverse dependencies' #' section on the #' [future CRAN package page](https://cran.r-project.org/package=future). #' #' @author #' Henrik Bengtsson, except for the implementations of `future_apply()`, #' `future_Map()`, `future_replicate()`, `future_sapply()`, and #' `future_tapply()`, which are adopted from the source code of the #' corresponding base \R functions, which are licensed under GPL (>= 2) #' with 'The R Core Team' as the copyright holder. #' Because of these dependencies, the license of this package is GPL (>= 2). #' #' @keywords manip programming iteration #' #' @docType package #' @aliases future.apply-package #' @name future.apply NULL future.apply/R/future_by.R0000644000176200001440000001166614104262756015261 0ustar liggesusers#' Apply a Function to a Data Frame Split by Factors via Futures #' #' @inheritParams future_lapply #' #' @param data An \R object, normally a data frame, possibly a matrix. #' #' @param INDICES A factor or a list of factors, each of length `nrow(data)`. #' #' @param FUN a function to be applied to (usually data-frame) subsets of `data`. #' #' @param \ldots Additional arguments pass to [future_lapply()] and #' then to `FUN()`. #' #' @param simplify logical: see [base::tapply]. #' #' @return #' An object of class "by", giving the results for each subset. #' This is always a list if simplify is false, otherwise a list #' or array (see [base::tapply]). #' See also [base::by()] for details. #' #' @example incl/future_by.R #' #' @details #' Internally, `data` is grouped by `INDICES` into a list of `data` #' subset elements which is then processed by [future_lapply()]. #' When the groups differ significantly in size, the processing time #' may differ significantly between the groups. #' To correct for processing-time imbalances, adjust the amount of chunking #' via arguments `future.scheduling` and `future.chunk.size`. #' #' @section Note on 'stringsAsFactors': #' The `future_by()` is modeled as closely as possible to the #' behavior of `base::by()`. Both functions have "default" S3 methods that #' calls `data <- as.data.frame(data)` internally. This call may in turn call #' an S3 method for `as.data.frame()` that coerces strings to factors or not #' depending on whether it has a `stringsAsFactors` argument and what its #' default is. #' For example, the S3 method of `as.data.frame()` for lists changed its #' (effective) default from `stringsAsFactors = TRUE` to #' `stringsAsFactors = TRUE` in R 4.0.0. #' #' #' @rdname future_by #' @export future_by <- function(data, INDICES, FUN, ..., simplify = TRUE, future.envir = parent.frame()) { future.envir <- force(future.envir) UseMethod("future_by") } #' @export future_by.default <- function(data, INDICES, FUN, ..., simplify = TRUE, future.envir = parent.frame()) { ndim <- length(dim(data)) .SUBSETTER <- if (ndim == 0L) { function(row) data[row, , drop = TRUE] } else { function(row) data[row, , drop = FALSE] } data <- as.data.frame(data) future_by_internal(data = data, INDICES = INDICES, FUN = FUN, ..., simplify = simplify, .INDICES.NAME = deparse(substitute(INDICES))[1L], .CALL = match.call(), .SUBSETTER = .SUBSETTER, future.envir = future.envir) } #' @export future_by.data.frame <- function(data, INDICES, FUN, ..., simplify = TRUE, future.envir = parent.frame()) { future_by_internal(data = data, INDICES = INDICES, FUN = FUN, ..., simplify = simplify, .INDICES.NAME = deparse(substitute(INDICES))[1L], .CALL = match.call(), .SUBSETTER = function(row) data[row, , drop = FALSE], future.envir = future.envir) } future_by_internal <- function(data, INDICES, FUN, ..., simplify = TRUE, .SUBSETTER, .CALL, .INDICES.NAME, future.envir = parent.frame(), future.label = "future_by-%d") { FUN <- if (!is.null(FUN)) match.fun(FUN) stop_if_not(is.function(.SUBSETTER)) if (!is.list(INDICES)) { INDEX <- vector("list", length = 1L) INDEX[[1L]] <- INDICES names(INDEX) <- .INDICES.NAME INDICES <- INDEX INDEX <- NULL ## Not needed anymore } INDICES <- lapply(INDICES, FUN = as.factor) nI <- length(INDICES) if (!nI) stop("'INDICES' is of length zero") nd <- nrow(data) if (!all(lengths(INDICES) == nd)) { stop("All elements of argument 'INDICES' must have same length as 'data'") } namelist <- lapply(INDICES, FUN = levels) extent <- lengths(namelist, use.names = FALSE) cumextent <- cumprod(extent) if (cumextent[nI] > .Machine$integer.max) stop("total number of levels >= 2^31") storage.mode(cumextent) <- "integer" ngroup <- cumextent[nI] group <- as.integer(INDICES[[1L]]) if (nI > 1L) { for (i in 2L:nI) { group <- group + cumextent[i - 1L] * (as.integer(INDICES[[i]]) - 1L) } } cumextent <- NULL ## Not needed anymore levels(group) <- as.character(seq_len(ngroup)) class(group) <- "factor" ans <- split(seq_len(nd), f = group) names(ans) <- NULL index <- as.logical(lengths(ans) > 0L) group <- NULL ## Not needed anymore grouped_data <- lapply(X = ans[index], FUN = .SUBSETTER) ans <- future_lapply(X = grouped_data, FUN = FUN, ..., future.envir = future.envir, future.label = future.label) grouped_data <- NULL ## Not needed anymore ansmat <- array({ if (simplify && all(lengths(ans) == 1L)) { ans <- unlist(ans, recursive = FALSE, use.names = FALSE) if (!is.null(ans) && is.atomic(ans)) vector(typeof(ans)) else NA } else { vector("list", length = prod(extent)) } }, dim = extent, dimnames = namelist) if (length(ans) > 0L) ansmat[index] <- ans ans <- NULL ## Not needed anymore structure(ansmat, call = .CALL, class = "by" ) } future.apply/MD50000644000176200001440000000607614104474122013230 0ustar liggesusers6efdd5ade379c3bb9eb3881171b8dacd *DESCRIPTION 3204e62059c5100b47722db1bf65ae7b *NAMESPACE 66fcb537c2bc77eecf1a002288324b7f *NEWS c856395e17c07373fa14c21e5c8dc5f1 *R/000.import.R 59030753c107d6d43d70edb62249890f *R/001.bquote.R 140d32b29a7ab921bd7cee341830bab5 *R/fold.R 7747a7df12d3edf74da86fe498131e9d *R/future.apply-package.R df54f772bddd565a18cd86469e37bd07 *R/future_Map.R 4abdbb471ee68e27744330b0a74345bf *R/future_apply.R e63429bfed0625d3ce10ba2f28fcd177 *R/future_by.R c4408ff4824298a480a5057f98dab66e *R/future_eapply.R 9c9e08ac351bf3a6a4383e24ac70ca82 *R/future_lapply.R f11fb674a6a323277e893305831c2327 *R/future_mapply.R 381866b20dd5e68d76ae2c0bdc9eef79 *R/future_replicate.R 5ef8b5ab6e8e3c2c2122fe8bcfcf450a *R/future_sapply.R 27d3f28dae275d093fbe17e742c95f33 *R/future_tapply.R f27027d45335a1cff2d0e39fbbdc91f9 *R/future_vapply.R 8224feae7bb003c984d034919bc51b8a *R/future_xapply.R 49c203c0e67c9bdac344900138526a90 *R/globals.R 85dedfbb022fc7e0f5dc3a79293dd239 *R/makeChunks.R c932ad14e2982a4927dd56071a06113a *R/options.R 82a022733b127c50f15238f845364f0f *R/rng.R 3a3deebafe38f5282209ad2940a63111 *R/utils.R b0d20cba1cf067a274d2d768befc2440 *R/zzz.R 3d294d7702823554135941a9a6d9092e *build/vignette.rds 2eba7d5b97ff92e245df449f9e4de52a *inst/CITATION f7a83e97061ddc1cd975cf4367c8a934 *inst/WORDLIST 40842e92d752732f7ee36d5c6d35faf6 *inst/doc/future.apply-1-overview.html 8d75f91d2a7b541daae17ddcbbf22646 *inst/doc/future.apply-1-overview.md.rsp 823b12da71976538c0523b3cfa2b10aa *man/fold.Rd ee4ce14fd8e8102c73edc35838bb2dee *man/future.apply.Rd 7a89b92bb2ab6205e7460be5d87accc9 *man/future.apply.options.Rd 15ab0a106d5aefb0a6a0c964b629e0eb *man/future_apply.Rd 84eed4efe8ceef7052103039d54fc82b *man/future_by.Rd 85924fe27f0808068b7d838241521806 *man/future_lapply.Rd c5b10b36ce799c6d540c124d93d20416 *man/future_mapply.Rd d64916ea7b40e821495f14c05522ed09 *man/makeChunks.Rd 96b1e638537cf8a012e4c59589113471 *man/make_rng_seeds.Rd 7205b468aaede70bf75ff613fb62acc1 *tests/fold.R e5682aa8ba2e36695171ddf5c8cae0f6 *tests/future_apply.R 475ac0461e474ebf2d264c4e41e9062e *tests/future_by.R 16ce9cc54c711594ba68858573da0ebb *tests/future_eapply.R 6368c0b7cf1c330162596a0962c13f33 *tests/future_lapply,RNG.R 6fbf69a774e2169e81766e9b48424b36 *tests/future_lapply,globals.R 5545c54b64fa338f3246a6a15a4e6e38 *tests/future_lapply.R 334c7b0b165e54e0506fd6fcf49427fe *tests/future_mapply,globals.R f2f3498a424ae7fb836903549876eb8a *tests/future_mapply.R 8a0e88bd5caff070ee6023064ec3a85b *tests/future_replicate.R 9ee25286d8bbeee78e57b9867895a82e *tests/future_sapply.R d640c99128ff228abe80c17ed5f5c789 *tests/future_tapply.R c40caf1803dab94cfd5cfd68281729ba *tests/future_vapply.R befa096c99ef36f2194b7bd036565a46 *tests/globals,tricky_recursive.R d1b606b24136d2d7ad8cda37792cbe62 *tests/incl/end.R a22ffb53f44b646dc66473fec5ebb5f6 *tests/incl/start,load-only.R 0d757150beb30ecef6a18a539d432264 *tests/incl/start.R c18b68f21db39e30a734f3080d68c21d *tests/rng.R d23d1bde69f1d66c3922111d6f1fc875 *tests/stdout.R 12395d516dfae147fb8957b3d0b4c15f *tests/utils.R 8d75f91d2a7b541daae17ddcbbf22646 *vignettes/future.apply-1-overview.md.rsp future.apply/inst/0000755000176200001440000000000014104466760013675 5ustar liggesusersfuture.apply/inst/doc/0000755000176200001440000000000014104466760014442 5ustar liggesusersfuture.apply/inst/doc/future.apply-1-overview.html0000644000176200001440000004746614104466760022011 0ustar liggesusers A Future for R: Apply Function to Elements in Parallel

A Future for R: Apply Function to Elements in Parallel

Introduction

The purpose of this package is to provide worry-free parallel alternatives to base-R “apply” functions, e.g. apply(), lapply(), and vapply(). The goal is that one should be able to replace any of these in the core with its futurized equivalent and things will just work. For example, instead of doing:

library("datasets")
library("stats")
y <- lapply(mtcars, FUN = mean, trim = 0.10)

one can do:

library("future.apply")
plan(multisession) ## Run in parallel on local computer

library("datasets")
library("stats")
y <- future_lapply(mtcars, FUN = mean, trim = 0.10)

Reproducibility is part of the core design, which means that perfect, parallel random number generation (RNG) is supported regardless of the amount of chunking, type of load balancing, and future backend being used. To enable parallel RNG, use argument future.seed = TRUE.

Role

Where does the future.apply package fit in the software stack? You can think of it as a sibling to foreach, furrr, BiocParallel, plyr, etc. Just as parallel provides parLapply(), foreach provides foreach(), BiocParallel provides bplapply(), and plyr provides llply(), future.apply provides future_lapply(). Below is a table summarizing this idea:

Package Functions Backends
future.apply

Future-versions of common goto *apply() functions available in base R (of the ‘base’ package):
future_apply(), future_by(), future_eapply(), future_lapply(), future_Map(), future_mapply(), future_.mapply(), future_replicate(), future_sapply(), future_tapply(), and future_vapply().
The following function is yet not implemented:
future_rapply()
All future backends
parallel mclapply(), mcmapply(), clusterMap(), parApply(), parLapply(), parSapply(), … Built-in and conditional on operating system
foreach foreach(), times() All future backends via doFuture
furrr future_imap(), future_map(), future_pmap(), future_map2(), … All future backends
BiocParallel Bioconductor’s parallel mappers:
bpaggregate(), bpiterate(), bplapply(), and bpvec()
All future backends via doFuture (because it supports foreach) or via BiocParallel.FutureParam (direct BiocParallelParam support; prototype)
plyr **ply(..., .parallel = TRUE) functions:
aaply(), ddply(), dlply(), llply(), …
All future backends via doFuture (because it uses foreach internally)

Note that, except for the built-in parallel package, none of these higher-level APIs implement their own parallel backends, but they rather enhance existing ones. The foreach framework leverages backends such as doParallel, doMC and doFuture, and the future.apply framework leverages the future ecosystem and therefore backends such as built-in parallel, future.callr, and future.batchtools.

By separating future_lapply() and friends from the future package, it helps clarifying the purpose of the future package, which is to define and provide the core Future API, which higher-level parallel APIs can build on and for which any futurized parallel backends can be plugged into.

Roadmap

  1. Implement future_*apply() versions for all common *apply() functions that exist in base R. This also involves writing a large set of package tests asserting the correctness and the same behavior as the corresponding *apply() functions.

  2. Harmonize all future_*apply() functions with each other, e.g. the future-specific arguments.

  3. Consider additional future_*apply() functions and features that fit in this package but don't necessarily have a corresponding function in base R. Examples of this may be “apply” functions that return futures rather than values, mechanisms for benchmarking, and richer control over load balancing.

The API and identity of the future.apply package will be kept close to the *apply() functions in base R. In other words, it will neither keep growing nor be expanded with new, more powerful apply-like functions beyond those core ones in base R. Such extended functionality should be part of a separate package.

future.apply/inst/doc/future.apply-1-overview.md.rsp0000644000176200001440000001536714104216357022236 0ustar liggesusers<%@meta language="R-vignette" content="-------------------------------- %\VignetteIndexEntry{A Future for R: Apply Function to Elements in Parallel} %\VignetteAuthor{Henrik Bengtsson} %\VignetteKeyword{R} %\VignetteKeyword{package} %\VignetteKeyword{vignette} %\VignetteKeyword{future} %\VignetteKeyword{lazy evaluation} %\VignetteKeyword{synchronous} %\VignetteKeyword{asynchronous} %\VignetteKeyword{parallel} %\VignetteKeyword{cluster} %\VignetteEngine{R.rsp::rsp} %\VignetteTangle{FALSE} --------------------------------------------------------------------"%> # A Future for R: Apply Function to Elements in Parallel ## Introduction The purpose of this package is to provide worry-free parallel alternatives to base-R "apply" functions, e.g. `apply()`, `lapply()`, and `vapply()`. The goal is that one should be able to replace any of these in the core with its futurized equivalent and things will just work. For example, instead of doing: ```r library("datasets") library("stats") y <- lapply(mtcars, FUN = mean, trim = 0.10) ``` one can do: ```r library("future.apply") plan(multisession) ## Run in parallel on local computer library("datasets") library("stats") y <- future_lapply(mtcars, FUN = mean, trim = 0.10) ``` Reproducibility is part of the core design, which means that perfect, parallel random number generation (RNG) is supported regardless of the amount of chunking, type of load balancing, and future backend being used. _To enable parallel RNG, use argument `future.seed = TRUE`._ ## Role Where does the [future.apply] package fit in the software stack? You can think of it as a sibling to [foreach], [furrr], [BiocParallel], [plyr], etc. Just as parallel provides `parLapply()`, foreach provides `foreach()`, BiocParallel provides `bplapply()`, and plyr provides `llply()`, future.apply provides `future_lapply()`. Below is a table summarizing this idea:
Package Functions Backends
future.apply

Future-versions of common goto *apply() functions available in base R (of the 'base' package):
future_apply(), future_by(), future_eapply(), future_lapply(), future_Map(), future_mapply(), future_.mapply(), future_replicate(), future_sapply(), future_tapply(), and future_vapply().
The following function is yet not implemented:
future_rapply()
All future backends
parallel mclapply(), mcmapply(), clusterMap(), parApply(), parLapply(), parSapply(), ... Built-in and conditional on operating system
foreach foreach(), times() All future backends via doFuture
furrr future_imap(), future_map(), future_pmap(), future_map2(), ... All future backends
BiocParallel Bioconductor's parallel mappers:
bpaggregate(), bpiterate(), bplapply(), and bpvec()
All future backends via doFuture (because it supports foreach) or via BiocParallel.FutureParam (direct BiocParallelParam support; prototype)
plyr **ply(..., .parallel = TRUE) functions:
aaply(), ddply(), dlply(), llply(), ...
All future backends via doFuture (because it uses foreach internally)
Note that, except for the built-in parallel package, none of these higher-level APIs implement their own parallel backends, but they rather enhance existing ones. The foreach framework leverages backends such as [doParallel], [doMC] and [doFuture], and the future.apply framework leverages the [future] ecosystem and therefore backends such as built-in parallel, [future.callr], and [future.batchtools]. By separating `future_lapply()` and friends from the [future] package, it helps clarifying the purpose of the future package, which is to define and provide the core Future API, which higher-level parallel APIs can build on and for which any futurized parallel backends can be plugged into. ## Roadmap 1. Implement `future_*apply()` versions for all common `*apply()` functions that exist in base R. This also involves writing a large set of package tests asserting the correctness and the same behavior as the corresponding `*apply()` functions. 2. Harmonize all `future_*apply()` functions with each other, e.g. the future-specific arguments. 3. Consider additional `future_*apply()` functions and features that fit in this package but don't necessarily have a corresponding function in base R. Examples of this may be "apply" functions that return futures rather than values, mechanisms for benchmarking, and richer control over load balancing. The API and identity of the future.apply package will be kept close to the `*apply()` functions in base R. In other words, it will _neither_ keep growing nor be expanded with new, more powerful apply-like functions beyond those core ones in base R. Such extended functionality should be part of a separate package. [batchtools]: https://cran.r-project.org/package=batchtools [BiocParallel]: https://bioconductor.org/packages/BiocParallel/ [doFuture]: https://cran.r-project.org/package=doFuture [doMC]: https://cran.r-project.org/package=doMC [doParallel]: https://cran.r-project.org/package=doParallel [foreach]: https://cran.r-project.org/package=foreach [future]: https://cran.r-project.org/package=future [future.apply]: https://cran.r-project.org/package=future.apply [future.batchtools]: https://cran.r-project.org/package=future.batchtools [future.callr]: https://cran.r-project.org/package=future.callr [furrr]: https://cran.r-project.org/package=furrr [plyr]: https://cran.r-project.org/package=plyr future.apply/inst/CITATION0000644000176200001440000000167614104465322015035 0ustar liggesuserscitHeader("Please cite 'future' and the future framework using the following references:") citEntry( # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # BibTeX entry: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - entry = "Misc", author = "Henrik Bengtsson", title = "A Unifying Framework for Parallel and Distributed Processing in R using Futures", year = "2021", # doi = "10.32614/RJ-2021-048", ## until DOI URL exist note = "10.32614/RJ-2021-048", url = "https://journal.r-project.org/archive/2021/RJ-2021-048/index.html", # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Plain-text citation: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - textVersion = paste(sep="", "H. Bengtsson, ", "A Unifying Framework for Parallel and Distributed Processing in R using Futures, ", "The R Journal, ", "2021, ", "doi:10.32614/RJ-2021-048" ) ) future.apply/inst/WORDLIST0000644000176200001440000000044114024036060015051 0ustar liggesusersAppVeyor arity batchtools benchmarking BiocParallel callr CMD CMRG doFuture doMC doParallel eapply foreach furrr futurized globals HPC L'Ecuyer lapply macOS mapply plyr pre Pre pregenerating reproducibility Reproducibility Roadmap sapply SGE Slurm stringsAsFactors tapply vapply vectorize