future.apply/ 0000755 0001762 0000144 00000000000 13605176412 012715 5 ustar ligges users future.apply/NAMESPACE 0000644 0001762 0000144 00000001461 13604742367 014145 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method(future_by,data.frame)
S3method(future_by,default)
export(future_Map)
export(future_apply)
export(future_by)
export(future_eapply)
export(future_lapply)
export(future_mapply)
export(future_replicate)
export(future_sapply)
export(future_tapply)
export(future_vapply)
importFrom(future,Future)
importFrom(future,FutureError)
importFrom(future,as.FutureGlobals)
importFrom(future,future)
importFrom(future,getGlobalsAndPackages)
importFrom(future,nbrOfWorkers)
importFrom(future,resolve)
importFrom(future,values)
importFrom(globals,globalsByName)
importFrom(parallel,nextRNGStream)
importFrom(parallel,nextRNGSubStream)
importFrom(parallel,splitIndices)
importFrom(utils,capture.output)
importFrom(utils,head)
importFrom(utils,packageVersion)
importFrom(utils,str)
future.apply/man/ 0000755 0001762 0000144 00000000000 13602733704 013470 5 ustar ligges users future.apply/man/future_mapply.Rd 0000644 0001762 0000144 00000014327 13602733704 016662 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/future_Map.R, R/future_mapply.R
\name{future_Map}
\alias{future_Map}
\alias{future_mapply}
\title{Apply a Function to Multiple List or Vector Arguments}
\usage{
future_Map(f, ..., future.label = "future_Map-\%d")
future_mapply(
FUN,
...,
MoreArgs = NULL,
SIMPLIFY = TRUE,
USE.NAMES = TRUE,
future.stdout = TRUE,
future.conditions = NULL,
future.globals = TRUE,
future.packages = NULL,
future.lazy = FALSE,
future.seed = FALSE,
future.scheduling = 1,
future.chunk.size = NULL,
future.label = "future_mapply-\%d"
)
}
\arguments{
\item{f}{A function of the arity \eqn{k} if \code{future_Map()} is called with
\eqn{k} arguments.}
\item{future.label}{If a character string, then each future is assigned
a label \code{sprintf(future.label, chunk_idx)}. If TRUE, then the
same as \code{future.label = "future_lapply-\%d"}. If FALSE, no labels
are assigned.}
\item{FUN}{A function to apply, found via \code{\link[base:match.fun]{base::match.fun()}}.}
\item{MoreArgs}{A list of other arguments to \code{FUN}.}
\item{SIMPLIFY}{A logical or character string; attempt to reduce the
result to a vector, matrix or higher dimensional array; see the simplify
argument of \code{\link[base:sapply]{base::sapply()}}.}
\item{USE.NAMES}{A logical; use names if the first \verb{\\ldots} argument has
names, or if it is a character vector, use that character vector as the
names.}
\item{future.stdout}{If \code{TRUE} (default), then the standard output of the
underlying futures is captured, and re-outputted as soon as possible.
If \code{FALSE}, any output is silenced (by sinking it to the null device
as it is outputted).
If \code{NA} (not recommended), output is \emph{not} intercepted.}
\item{future.conditions}{A character string of conditions classes to be
captured and relayed. The default is the same as the \code{condition}
argument of \code{\link[future:Future]{future::Future()}}.
To not intercept conditions, use \code{conditions = character(0L)}.
Errors are always relayed.}
\item{future.globals}{A logical, a character vector, or a named list for
controlling how globals are handled.
For details, see \code{\link[=future_lapply]{future_lapply()}}.}
\item{future.packages}{(optional) a character vector specifying packages
to be attached in the R environment evaluating the future.}
\item{future.lazy}{Specifies whether the futures should be resolved
lazily or eagerly (default).}
\item{future.seed}{A logical or an integer (of length one or seven), or
a list of \code{max(lengths(list(...)))} with pre-generated random seeds.
For details, see \code{\link[=future_lapply]{future_lapply()}}.}
\item{future.scheduling}{Average number of futures ("chunks") per worker.
If \code{0.0}, then a single future is used to process all elements
of \code{X}.
If \code{1.0} or \code{TRUE}, then one future per worker is used.
If \code{2.0}, then each worker will process two futures
(if there are enough elements in \code{X}).
If \code{Inf} or \code{FALSE}, then one future per element of
\code{X} is used.
Only used if \code{future.chunk.size} is \code{NULL}.}
\item{future.chunk.size}{The average number of elements per future ("chunk").
If \code{Inf}, then all elements are processed in a single future.
If \code{NULL}, then argument \code{future.scheduling} is used.}
\item{\ldots}{Arguments to vectorize over (vectors or lists of strictly
positive length, or all of zero length).}
}
\value{
\code{future_Map()} is a simple wrapper to \code{future_mapply()} which does not
attempt to simplify the result.
See \code{\link[base:Map]{base::Map()}} for details.
\verb{future_mapply() returns a list, or for }SIMPLIFY = TRUE`, a vector,
array or list. See \code{\link[base:mapply]{base::mapply()}} for details.
}
\description{
\code{future_mapply()} implements \code{\link[base:mapply]{base::mapply()}} using futures with perfect
replication of results, regardless of future backend used.
Analogously to \code{mapply()}, \code{future_mapply()} is a multivariate version of
\code{future_sapply()}.
It applies \code{FUN} to the first elements of each \verb{\\ldots} argument,
the second elements, the third elements, and so on.
Arguments are recycled if necessary.
}
\examples{
## ---------------------------------------------------------
## mapply()
## ---------------------------------------------------------
y0 <- mapply(rep, 1:4, 4:1)
y1 <- future_mapply(rep, 1:4, 4:1)
stopifnot(identical(y1, y0))
y0 <- mapply(rep, times = 1:4, x = 4:1)
y1 <- future_mapply(rep, times = 1:4, x = 4:1)
stopifnot(identical(y1, y0))
y0 <- mapply(rep, times = 1:4, MoreArgs = list(x = 42))
y1 <- future_mapply(rep, times = 1:4, MoreArgs = list(x = 42))
stopifnot(identical(y1, y0))
y0 <- mapply(function(x, y) seq_len(x) + y,
c(a = 1, b = 2, c = 3), # names from first
c(A = 10, B = 0, C = -10))
y1 <- future_mapply(function(x, y) seq_len(x) + y,
c(a = 1, b = 2, c = 3), # names from first
c(A = 10, B = 0, C = -10))
stopifnot(identical(y1, y0))
word <- function(C, k) paste(rep.int(C, k), collapse = "")
y0 <- mapply(word, LETTERS[1:6], 6:1, SIMPLIFY = FALSE)
y1 <- future_mapply(word, LETTERS[1:6], 6:1, SIMPLIFY = FALSE)
stopifnot(identical(y1, y0))
## ---------------------------------------------------------
## Parallel Random Number Generation
## ---------------------------------------------------------
\donttest{
## Regardless of the future plan, the number of workers, and
## where they are, the random numbers produced are identical
plan(multiprocess)
y1 <- future_mapply(stats::runif, n = 1:4, max = 2:5,
MoreArgs = list(min = 1), future.seed = 0xBEEF)
print(y1)
plan(sequential)
y2 <- future_mapply(stats::runif, n = 1:4, max = 2:5,
MoreArgs = list(min = 1), future.seed = 0xBEEF)
print(y2)
stopifnot(all.equal(y1, y2))
}
\dontshow{
## R CMD check: make sure any open connections are closed afterward
if (!inherits(plan(), "sequential")) plan(sequential)
}
}
\author{
The implementations of \code{future_Map()} is adopted from the source code
of the corresponding base \R function \code{Map()}, which is licensed under
GPL (>= 2) with 'The R Core Team' as the copyright holder.
}
\keyword{iteration}
\keyword{manip}
\keyword{programming}
future.apply/man/fold.Rd 0000644 0001762 0000144 00000002563 13322430303 014675 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fold.R
\name{fold}
\alias{fold}
\title{Efficient Fold, Reduce, Accumulate, Combine of a Vector}
\usage{
fold(x, f, left = TRUE, unname = TRUE, threshold = 1000L)
}
\arguments{
\item{x}{A vector.}
\item{f}{A binary function, i.e. a function take takes two arguments.}
\item{left}{If \code{TRUE}, vector is combined from the left (the first element),
otherwise the right (the last element).}
\item{unname}{If \code{TRUE}, function \code{f} is called as
\code{f(unname(y), x[[ii]])}, otherwise as \code{f(y, x[[ii]])},
which may introduce name \code{"y"}.}
\item{threshold}{An integer (>= 2) specifying the length where the
recursive divide-and-conquer call will stop and incremental building of
the partial value is performed. Using \code{threshold = +Inf} will disable
recursive folding.}
}
\value{
A vector.
}
\description{
Efficient Fold, Reduce, Accumulate, Combine of a Vector
}
\details{
In order for recursive folding to give the same results as non-recursive
folding, binary function \code{f} must be \emph{associative} with itself, i.e.
\code{f(f(x[[1]], x[[2]]), x[[3]])} equals
\code{f(x[[1]], f(x[[2]]), x[[3]])}.
This function is a more efficient (memory and speed) of
\code{\link[base:Reduce]{Reduce(f, x, right = !left, accumulate = FALSE)}},
especially when \code{x} is long.
}
\keyword{internal}
future.apply/man/makeChunks.Rd 0000644 0001762 0000144 00000003523 13602733534 016054 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/chunks.R
\name{makeChunks}
\alias{makeChunks}
\title{Create Chunks of Index Vectors}
\usage{
makeChunks(
nbrOfElements,
nbrOfWorkers,
future.scheduling = 1,
future.chunk.size = NULL
)
}
\arguments{
\item{nbrOfElements}{(integer) Total number of elements to iterate over.}
\item{future.scheduling}{(numeric) A strictly positive scalar.
Only used if argument \code{future.chunk.size} is \code{NULL}.}
\item{future.chunk.size}{(numeric) The maximum number of elements per
chunk, or \code{NULL}. If \code{NULL}, then the chunk sizes are given by the
\code{future.scheduling} argument.}
\item{nbrOfWorker}{(integer) Number of workers available.}
}
\value{
A list of chunks, where each chunk is an integer vector of
unique indices \code{[1, nbrOfElements]}. The union of all chunks
holds \code{nbrOfElements} elements and equals \code{1:nbrOfElements}.
If \code{nbrOfElements == 0}, then an empty list is returned.
}
\description{
\emph{This is an internal function.}
}
\section{Control processing order of elements}{
Attribute \code{ordering} of \code{future.chunk.size} or \code{future.scheduling} can
be used to control the ordering the elements are iterated over, which
only affects the processing order \emph{not} the order values are returned.
This attribute can take the following values:
\itemize{
\item index vector - an numeric vector of length \code{nbrOfElements} specifying
how elements are remapped
\item function - an function taking one argument which is called as
\code{ordering(nbrOfElements)} and which much return an
index vector of length \code{nbrOfElements}, e.g.
\code{function(n) rev(seq_len(n))} for reverse ordering.
\item \code{"random"} - this will randomize the ordering via random index
vector \code{sample.int(nbrOfElements)}.
}
}
\keyword{internal}
future.apply/man/future.apply.Rd 0000644 0001762 0000144 00000011144 13602733534 016417 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/future.apply-package.R
\docType{package}
\name{future.apply}
\alias{future.apply}
\alias{future.apply-package}
\title{future.apply: Apply Function to Elements in Parallel using Futures}
\description{
The \pkg{future.apply} packages provides parallel implementations of
common "apply" functions provided by base \R. The parallel processing
is performed via the \pkg{future} ecosystem, which provides a large
number of parallel backends, e.g. on the local machine, a remote cluster,
and a high-performance compute cluster.
}
\details{
Currently implemented functions are:
\itemize{
\item \code{\link[=future_apply]{future_apply()}}: a parallel version of \link[base:apply]{apply()}
\item \code{\link[=future_by]{future_by()}}: a parallel version of \link[base:by]{by()}
\item \code{\link[=future_eapply]{future_eapply()}}: a parallel version of \link[base:lapply]{eapply()}
\item \code{\link[=future_lapply]{future_lapply()}}: a parallel version of \link[base:lapply]{lapply()}
\item \code{\link[=future_mapply]{future_mapply()}}: a parallel version of \link[base:mapply]{mapply()}
\item \code{\link[=future_sapply]{future_sapply()}}: a parallel version of \link[base:sapply]{sapply()}
\item \code{\link[=future_tapply]{future_tapply()}}: a parallel version of \link[base:tapply]{tapply()}
\item \code{\link[=future_vapply]{future_vapply()}}: a parallel version of \link[base:vapply]{vapply()}
\item \code{\link[=future_Map]{future_Map()}}: a parallel version of \link[base:Map]{Map()}
\item \code{\link[=future_replicate]{future_replicate()}}: a parallel version of \link[base:replicate]{replicate()}
}
Reproducibility is part of the core design, which means that perfect,
parallel random number generation (RNG) is supported regardless of the
amount of chunking, type of load balancing, and future backend being used.
Since these \verb{future_*()} functions have the same arguments as the
corresponding base \R function, start using them is often as simple as
renaming the function in the code. For example, after attaching the package:\if{html}{\out{
}}\preformatted{library(future.apply)
}\if{html}{\out{
}}
code such as:\if{html}{\out{}}\preformatted{x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
y <- lapply(x, quantile, probs = 1:3/4)
}\if{html}{\out{
}}
can be updated to:\if{html}{\out{}}\preformatted{y <- future_lapply(x, quantile, probs = 1:3/4)
}\if{html}{\out{
}}
The default settings in the \pkg{future} framework is to process code
\emph{sequentially}. To run the above in parallel on the local machine
(on any operating system), use:\if{html}{\out{}}\preformatted{plan(multiprocess)
}\if{html}{\out{
}}
first. That's it!
To go back to sequential processing, use \code{plan(sequential)}.
If you have access to multiple machines on your local network, use:\if{html}{\out{}}\preformatted{plan(cluster, workers = c("n1", "n2", "n2", "n3"))
}\if{html}{\out{
}}
This will set up four workers, one on \code{n1} and \code{n3}, and two on \code{n2}.
If you have SSH access to some remote machines, use:\if{html}{\out{}}\preformatted{plan(cluster, workers = c("m1.myserver.org", "m2.myserver.org))
}\if{html}{\out{
}}
See the \pkg{future} package and \code{\link[future:plan]{future::plan()}} for more examples.
The \pkg{future.batchtools} package provides support for high-performance
compute (HPC) cluster schedulers such as SGE, Slurm, and TORQUE / PBS.
For example,
\itemize{
\item \code{plan(batchtools_slurm)}:
Process via a Slurm scheduler job queue.
\item \code{plan(batchtools_torque)}:
Process via a TORQUE / PBS scheduler job queue.
}
This builds on top of the queuing framework that the \pkg{batchtools}
package provides. For more details on backend configuration, please see
the \pkg{future.batchtools} and \pkg{batchtools} packages.
These are just a few examples of parallel/distributed backend for the
future ecosystem. For more alternatives, see the 'Reverse dependencies'
section on the
\href{https://cran.r-project.org/package=future}{future CRAN package page}.
}
\author{
Henrik Bengtsson, except for the implementations of \code{future_apply()},
\code{future_Map()}, \code{future_replicate()}, \code{future_sapply()}, and
\code{future_tapply()}, which are adopted from the source code of the
corresponding base \R functions, which are licensed under GPL (>= 2)
with 'The R Core Team' as the copyright holder.
Because of these dependencies, the license of this package is GPL (>= 2).
}
\keyword{iteration}
\keyword{manip}
\keyword{programming}
future.apply/man/future_lapply.Rd 0000644 0001762 0000144 00000030147 13602733704 016657 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/future_eapply.R, R/future_lapply.R,
% R/future_replicate.R, R/future_sapply.R, R/future_tapply.R,
% R/future_vapply.R
\name{future_eapply}
\alias{future_eapply}
\alias{future_lapply}
\alias{future_replicate}
\alias{future_sapply}
\alias{future_tapply}
\alias{future_vapply}
\title{Apply a Function over a List or Vector via Futures}
\usage{
future_eapply(
env,
FUN,
...,
all.names = FALSE,
USE.NAMES = TRUE,
future.label = "future_eapply-\%d"
)
future_lapply(
X,
FUN,
...,
future.stdout = TRUE,
future.conditions = NULL,
future.globals = TRUE,
future.packages = NULL,
future.lazy = FALSE,
future.seed = FALSE,
future.scheduling = 1,
future.chunk.size = NULL,
future.label = "future_lapply-\%d"
)
future_replicate(
n,
expr,
simplify = "array",
future.seed = TRUE,
...,
future.label = "future_replicate-\%d"
)
future_sapply(
X,
FUN,
...,
simplify = TRUE,
USE.NAMES = TRUE,
future.label = "future_sapply-\%d"
)
future_tapply(
X,
INDEX,
FUN = NULL,
...,
default = NA,
simplify = TRUE,
future.label = "future_tapply-\%d"
)
future_vapply(
X,
FUN,
FUN.VALUE,
...,
USE.NAMES = TRUE,
future.label = "future_vapply-\%d"
)
}
\arguments{
\item{env}{An \R environment.}
\item{FUN}{A function taking at least one argument.}
\item{all.names}{If \code{TRUE}, the function will also be applied to variables
that start with a period (\code{.}), otherwise not.
See \code{\link[base:eapply]{base::eapply()}} for details.}
\item{USE.NAMES}{See \code{\link[base:sapply]{base::sapply()}}.}
\item{future.label}{If a character string, then each future is assigned
a label \code{sprintf(future.label, chunk_idx)}. If TRUE, then the
same as \code{future.label = "future_lapply-\%d"}. If FALSE, no labels
are assigned.}
\item{X}{A vector-like object to iterate over.}
\item{future.stdout}{If \code{TRUE} (default), then the standard output of the
underlying futures is captured, and re-outputted as soon as possible.
If \code{FALSE}, any output is silenced (by sinking it to the null device
as it is outputted).
If \code{NA} (not recommended), output is \emph{not} intercepted.}
\item{future.conditions}{A character string of conditions classes to be
captured and relayed. The default is the same as the \code{condition}
argument of \code{\link[future:Future]{future::Future()}}.
To not intercept conditions, use \code{conditions = character(0L)}.
Errors are always relayed.}
\item{future.globals}{A logical, a character vector, or a named list for
controlling how globals are handled. For details, see below section.}
\item{future.packages}{(optional) a character vector specifying packages
to be attached in the R environment evaluating the future.}
\item{future.lazy}{Specifies whether the futures should be resolved
lazily or eagerly (default).}
\item{future.seed}{A logical or an integer (of length one or seven),
or a list of \code{length(X)} with pre-generated random seeds.
For details, see below section.}
\item{future.scheduling}{Average number of futures ("chunks") per worker.
If \code{0.0}, then a single future is used to process all elements
of \code{X}.
If \code{1.0} or \code{TRUE}, then one future per worker is used.
If \code{2.0}, then each worker will process two futures
(if there are enough elements in \code{X}).
If \code{Inf} or \code{FALSE}, then one future per element of
\code{X} is used.
Only used if \code{future.chunk.size} is \code{NULL}.}
\item{future.chunk.size}{The average number of elements per future ("chunk").
If \code{Inf}, then all elements are processed in a single future.
If \code{NULL}, then argument \code{future.scheduling} is used.}
\item{n}{The number of replicates.}
\item{expr}{An \R expression to evaluate repeatedly.}
\item{simplify}{See \code{\link[base:sapply]{base::sapply()}} and \code{\link[base:tapply]{base::tapply()}}, respectively.}
\item{INDEX}{A list of one or more factors, each of same length as \code{X}.
The elements are coerced to factors by \code{as.factor()}.
See also \code{\link[base:tapply]{base::tapply()}}.}
\item{default}{See \code{\link[base:tapply]{base::tapply()}}.}
\item{FUN.VALUE}{A template for the required return value from
each \code{FUN(X[ii], ...)}.
Types may be promoted to a higher type within the ordering
logical < integer < double < complex, but not demoted.
See \code{\link[base:vapply]{base::vapply()}} for details.}
\item{\ldots}{(optional) Additional arguments passed to \code{FUN()}.
For \code{future_*apply()} functions and \code{replicate()}, any \verb{future.*} arguments
part of \verb{\\ldots} are passed on to \code{future_lapply()} used internally.}
}
\value{
A named (unless \code{USE.NAMES = FALSE}) list.
See \code{\link[base:eapply]{base::eapply()}} for details.
For \code{future_lapply()}, a list with same length and names as \code{X}.
See \code{\link[base:lapply]{base::lapply()}} for details.
\code{future_replicate()} is a wrapper around \code{future_sapply()} and return
simplified object according to the \code{simplify} argument.
See \code{\link[base:replicate]{base::replicate()}} for details.
Since \code{future_replicate()} usually involves random number generation (RNG),
it uses \code{future.seed = TRUE} by default in order produce sound random
numbers regardless of future backend and number of background workers used.
For \code{future_sapply()}, a vector with same length and names as \code{X}.
See \code{\link[base:sapply]{base::sapply()}} for details.
\code{future_tapply()} returns an array with mode \code{"list"}, unless
\code{simplify = TRUE} (default) \emph{and} \code{FUN} returns a scalar, in which
case the mode of the array is the same as the returned scalars.
See \code{\link[base:tapply]{base::tapply()}} for details.
For \code{future_vapply()}, a vector with same length and names as \code{X}.
See \code{\link[base:vapply]{base::vapply()}} for details.
}
\description{
\code{future_lapply()} implements \code{\link[base:lapply]{base::lapply()}} using futures with perfect
replication of results, regardless of future backend used.
Analogously, this is true for all the other \code{future_nnn()} functions.
}
\section{Global variables}{
Argument \code{future.globals} may be used to control how globals
should be handled similarly how the \code{globals} argument is used with
\code{future()}.
Since all function calls use the same set of globals, this function can do
any gathering of globals upfront (once), which is more efficient than if
it would be done for each future independently.
If \code{TRUE}, \code{NULL} or not is specified (default), then globals
are automatically identified and gathered.
If a character vector of names is specified, then those globals are gathered.
If a named list, then those globals are used as is.
In all cases, \code{FUN} and any \verb{\\ldots} arguments are automatically
passed as globals to each future created as they are always needed.
}
\section{Reproducible random number generation (RNG)}{
Unless \code{future.seed = FALSE}, this function guarantees to generate
the exact same sequence of random numbers \emph{given the same initial
seed / RNG state} - this regardless of type of futures, scheduling
("chunking") strategy, and number of workers.
RNG reproducibility is achieved by pregenerating the random seeds for all
iterations (over \code{X}) by using L'Ecuyer-CMRG RNG streams. In each
iteration, these seeds are set before calling \code{FUN(X[[ii]], ...)}.
\emph{Note, for large \code{length(X)} this may introduce a large overhead.}
As input (\code{future.seed}), a fixed seed (integer) may be given, either
as a full L'Ecuyer-CMRG RNG seed (vector of 1+6 integers) or as a seed
generating such a full L'Ecuyer-CMRG seed.
If \code{future.seed = TRUE}, then \code{\link[base:Random]{.Random.seed}}
is returned if it holds a L'Ecuyer-CMRG RNG seed, otherwise one is created
randomly.
If \code{future.seed = NA}, a L'Ecuyer-CMRG RNG seed is randomly created.
If none of the function calls \code{FUN(X[[ii]], ...)} uses random number
generation, then \code{future.seed = FALSE} may be used.
In addition to the above, it is possible to specify a pre-generated
sequence of RNG seeds as a list such that
\code{length(future.seed) == length(X)} and where each element is an
integer seed vector that can be assigned to
\code{\link[base:Random]{.Random.seed}}. One approach to generate a
set of valid RNG seeds based on fixed initial seed (here \code{42L}) is:\if{html}{\out{}}\preformatted{seeds <- future_lapply(seq_along(X), FUN = function(x) .Random.seed,
future.chunk.size = Inf, future.seed = 42L)
}\if{html}{\out{
}}
\strong{Note that \code{as.list(seq_along(X))} is \emph{not} a valid set of such
\code{.Random.seed} values.}
In all cases but \code{future.seed = FALSE}, the RNG state of the calling
R processes after this function returns is guaranteed to be
"forwarded one step" from the RNG state that was before the call and
in the same way regardless of \code{future.seed}, \code{future.scheduling}
and future strategy used. This is done in order to guarantee that an \R
script calling \code{future_lapply()} multiple times should be numerically
reproducible given the same initial seed.
}
\section{Control processing order of elements}{
Attribute \code{ordering} of \code{future.chunk.size} or \code{future.scheduling} can
be used to control the ordering the elements are iterated over, which
only affects the processing order and \emph{not} the order values are returned.
This attribute can take the following values:
\itemize{
\item index vector - an numeric vector of length \code{length(X)}
\item function - an function taking one argument which is called as
\code{ordering(length(X))} and which much return an
index vector of length \code{length(X)}, e.g.
\code{function(n) rev(seq_len(n))} for reverse ordering.
\item \code{"random"} - this will randomize the ordering via random index
vector \code{sample.int(length(X))}.
For example, \code{future.scheduling = structure(TRUE, ordering = "random")}.
\emph{Note}, when elements are processed out of order, then captured standard
output and conditions are also relayed in that order, that is out of order.
}
}
\examples{
## ---------------------------------------------------------
## lapply(), sapply(), tapply()
## ---------------------------------------------------------
x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE, FALSE, FALSE, TRUE))
y0 <- lapply(x, FUN = quantile, probs = 1:3/4)
y1 <- future_lapply(x, FUN = quantile, probs = 1:3/4)
print(y1)
stopifnot(all.equal(y1, y0))
y0 <- sapply(x, FUN = quantile)
y1 <- future_sapply(x, FUN = quantile)
print(y1)
stopifnot(all.equal(y1, y0))
y0 <- vapply(x, FUN = quantile, FUN.VALUE = double(5L))
y1 <- future_vapply(x, FUN = quantile, FUN.VALUE = double(5L))
print(y1)
stopifnot(all.equal(y1, y0))
## ---------------------------------------------------------
## Parallel Random Number Generation
## ---------------------------------------------------------
\donttest{
## Regardless of the future plan, the number of workers, and
## where they are, the random numbers produced are identical
plan(multiprocess)
y1 <- future_lapply(1:5, FUN = rnorm, future.seed = 0xBEEF)
str(y1)
plan(sequential)
y2 <- future_lapply(1:5, FUN = rnorm, future.seed = 0xBEEF)
str(y2)
stopifnot(all.equal(y1, y2))
}
## ---------------------------------------------------------
## Process chunks of data.frame rows in parallel
## ---------------------------------------------------------
iris <- datasets::iris
chunks <- split(iris, seq(1, nrow(iris), length.out = 3L))
y0 <- lapply(chunks, FUN = function(iris) sum(iris$Sepal.Length))
y0 <- do.call(sum, y0)
y1 <- future_lapply(chunks, FUN = function(iris) sum(iris$Sepal.Length))
y1 <- do.call(sum, y1)
print(y1)
stopifnot(all.equal(y1, y0))
\dontshow{
## R CMD check: make sure any open connections are closed afterward
if (!inherits(plan(), "sequential")) plan(sequential)
}
}
\author{
The implementations of \code{future_replicate()}, \code{future_sapply()}, and
\code{future_tapply()} are adopted from the source code of the corresponding
base \R functions, which are licensed under GPL (>= 2) with
'The R Core Team' as the copyright holder.
}
\keyword{iteration}
\keyword{manip}
\keyword{programming}
future.apply/man/future_apply.Rd 0000644 0001762 0000144 00000006331 13603012060 016463 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/future_apply.R
\name{future_apply}
\alias{future_apply}
\title{Apply Functions Over Array Margins via Futures}
\usage{
future_apply(
X,
MARGIN,
FUN,
...,
future.globals = TRUE,
future.packages = NULL,
future.label = "future_apply-\%d"
)
}
\arguments{
\item{X}{an array, including a matrix.}
\item{MARGIN}{A vector giving the subscripts which the function will be
applied over. For example, for a matrix \code{1} indicates rows, \code{2} indicates
columns, \code{c(1, 2)} indicates rows and columns.
Where \code{X} has named dimnames, it can be a character vector selecting
dimension names.}
\item{FUN}{A function taking at least one argument.}
\item{future.globals}{A logical, a character vector, or a named list for
controlling how globals are handled. For details, see below section.}
\item{future.packages}{(optional) a character vector specifying packages
to be attached in the R environment evaluating the future.}
\item{future.label}{If a character string, then each future is assigned
a label \code{sprintf(future.label, chunk_idx)}. If TRUE, then the
same as \code{future.label = "future_lapply-\%d"}. If FALSE, no labels
are assigned.}
\item{\ldots}{(optional) Additional arguments passed to \code{FUN()}, except
\verb{future.*} arguments, which are passed on to \code{future_lapply()} used
internally.}
}
\value{
Returns a vector or array or list of values obtained by applying a
function to margins of an array or matrix.
See \code{\link[base:apply]{base::apply()}} for details.
}
\description{
\code{future_apply()} implements \code{\link[base:apply]{base::apply()}} using future with perfect
replication of results, regardless of future backend used.
It returns a vector or array or list of values obtained by applying a
function to margins of an array or matrix.
}
\examples{
## ---------------------------------------------------------
## apply()
## ---------------------------------------------------------
X <- matrix(c(1:4, 1, 6:8), nrow = 2L)
Y0 <- apply(X, MARGIN = 1L, FUN = table)
Y1 <- future_apply(X, MARGIN = 1L, FUN = table)
print(Y1)
stopifnot(all.equal(Y1, Y0, check.attributes = FALSE)) ## FIXME
Y0 <- apply(X, MARGIN = 1L, FUN = stats::quantile)
Y1 <- future_apply(X, MARGIN = 1L, FUN = stats::quantile)
print(Y1)
stopifnot(all.equal(Y1, Y0))
## ---------------------------------------------------------
## Parallel Random Number Generation
## ---------------------------------------------------------
\donttest{
## Regardless of the future plan, the number of workers, and
## where they are, the random numbers produced are identical
X <- matrix(c(1:4, 1, 6:8), nrow = 2L)
plan(multiprocess)
Y1 <- future_apply(X, MARGIN = 1L, FUN = sample, future.seed = 0xBEEF)
print(Y1)
plan(sequential)
Y2 <- future_apply(X, MARGIN = 1L, FUN = sample, future.seed = 0xBEEF)
print(Y2)
stopifnot(all.equal(Y1, Y2))
}
\dontshow{
## R CMD check: make sure any open connections are closed afterward
if (!inherits(plan(), "sequential")) plan(sequential)
}
}
\author{
The implementations of \code{future_apply()} is adopted from the source code
of the corresponding base \R function, which is licensed under GPL (>= 2)
with 'The R Core Team' as the copyright holder.
}
future.apply/man/make_rng_seeds.Rd 0000644 0001762 0000144 00000001564 13602733534 016734 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rng.R
\name{make_rng_seeds}
\alias{make_rng_seeds}
\title{Produce Reproducible Seeds for Parallel Random Number Generation}
\usage{
make_rng_seeds(count, seed = FALSE, debug = getOption("future.debug", FALSE))
}
\arguments{
\item{count}{The number of RNG seeds to produce.}
\item{seed}{A logical specifying whether RNG seeds should be generated
or not. (\code{seed = NULL} corresponds to \code{seed = FALSE}).
If a list, then it should be of length \code{count} and each element should
consist of a valid RNG seed.}
\item{debug}{If \code{TRUE}, debug output is produced, otherwise not.}
}
\value{
Returns a non-named list of length \code{count}, or \code{NULL}.
Any seed returned is a valid RNG seed.
}
\description{
Produce Reproducible Seeds for Parallel Random Number Generation
}
\keyword{internal}
future.apply/man/future_by.Rd 0000644 0001762 0000144 00000003711 13440257025 015762 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/future_by.R
\name{future_by}
\alias{future_by}
\title{Apply a Function to a Data Frame Split by Factors via Futures}
\usage{
future_by(data, INDICES, FUN, ..., simplify = TRUE)
}
\arguments{
\item{data}{An \R object, normally a data frame, possibly a matrix.}
\item{INDICES}{A factor or a list of factors, each of length \code{nrow(data)}.}
\item{FUN}{a function to be applied to (usually data-frame) subsets of \code{data}.}
\item{simplify}{logical: see \link[base:tapply]{base::tapply}.}
\item{\ldots}{Additional arguments pass to \code{\link[=future_lapply]{future_lapply()}} and
then to \code{FUN()}.}
}
\value{
An object of class "by", giving the results for each subset.
This is always a list if simplify is false, otherwise a list
or array (see \link[base:tapply]{base::tapply}).
See also \code{\link[base:by]{base::by()}} for details.
}
\description{
Apply a Function to a Data Frame Split by Factors via Futures
}
\details{
Internally, \code{data} is grouped by \code{INDICES} into a list of \code{data}
subset elements which is then processed by \code{\link[=future_lapply]{future_lapply()}}.
When the groups differ significantly in size, the processing time
may differ significantly between the groups.
To correct for processing-time imbalances, adjust the amount of chunking
via arguments \code{future.scheduling} and \code{future.chunk.size}.
}
\examples{
## ---------------------------------------------------------
## by()
## ---------------------------------------------------------
library(datasets) ## warpbreaks
library(stats) ## lm()
y0 <- by(warpbreaks, warpbreaks[,"tension"],
function(x) lm(breaks ~ wool, data = x))
plan(multiprocess)
y1 <- future_by(warpbreaks, warpbreaks[,"tension"],
function(x) lm(breaks ~ wool, data = x))
plan(sequential)
y2 <- future_by(warpbreaks, warpbreaks[,"tension"],
function(x) lm(breaks ~ wool, data = x))
}
future.apply/DESCRIPTION 0000644 0001762 0000144 00000002465 13605176412 014432 0 ustar ligges users Package: future.apply
Version: 1.4.0
Title: Apply Function to Elements in Parallel using Futures
Depends: R (>= 3.2.0), future (>= 1.15.1)
Imports: globals (>= 0.12.5)
Suggests: datasets, stats, tools, listenv (>= 0.8.0), R.rsp, markdown
VignetteBuilder: R.rsp
Authors@R: c(person("Henrik", "Bengtsson", role=c("aut", "cre", "cph"),
email = "henrikb@braju.com"),
person("R Core Team", role = c("cph", "ctb")))
Description: Implementations of apply(), by(), eapply(), lapply(), Map(), mapply(), replicate(), sapply(), tapply(), and vapply() that can be resolved using any future-supported backend, e.g. parallel on the local machine or distributed on a compute cluster. These future_*apply() functions come with the same pros and cons as the corresponding base-R *apply() functions but with the additional feature of being able to be processed via the future framework.
License: GPL (>= 2)
LazyLoad: TRUE
URL: https://github.com/HenrikBengtsson/future.apply
BugReports: https://github.com/HenrikBengtsson/future.apply/issues
RoxygenNote: 7.0.2
NeedsCompilation: no
Packaged: 2020-01-06 23:43:16 UTC; hb
Author: Henrik Bengtsson [aut, cre, cph],
R Core Team [cph, ctb]
Maintainer: Henrik Bengtsson
Repository: CRAN
Date/Publication: 2020-01-07 21:50:02 UTC
future.apply/build/ 0000755 0001762 0000144 00000000000 13604743022 014010 5 ustar ligges users future.apply/build/vignette.rds 0000644 0001762 0000144 00000000474 13604743022 016354 0 ustar ligges users uQ=O0t4RWRRUX6±#I&~9ilDawg! 9sfޛ9"q_JbeB(JO볞SNZќ
H`wouk;3y31Wty^JF[-!Z8G^-cRUjyzJYܤtyاSV)4:y.&o6B:ju\s]AYV],/"J$.|d}@2А쥩(=Y future.apply/tests/ 0000755 0001762 0000144 00000000000 13602734532 014057 5 ustar ligges users future.apply/tests/fold.R 0000644 0001762 0000144 00000003627 13443044773 015142 0 ustar ligges users source("incl/start,load-only.R")
message("*** fold() ...")
x1s <- list(
a = NULL,
b = 1,
c = c(a = 1, b = 2),
d = 1:10e3
)
x2s <- lapply(x1s, FUN = as.list)
names(x2s) <- toupper(names(x1s))
x3s <- list(
E = data.frame(a = 1:3),
F = data.frame(a = 1:3, b = letters[1:3])
)
xs <- c(x1s, x2s, x3s)
fcns <- list("c" = base::c, "cbind" = base::cbind)
for (kk in seq_along(xs)) {
x_name <- names(xs)[kk]
for (fcn_name in names(fcns)) {
fcn <- fcns[[fcn_name]]
message(sprintf(" - #%d. %s(x[['%s']]) ...", kk, fcn_name, x_name))
x <- xs[[kk]]
str(list(x = x))
y0 <- Reduce(x, f = fcn)
y1 <- fold(x, f = fcn)
y2 <- fold(x, f = fcn, unname = FALSE)
str(list(y0 = y0, y1 = y1, y2 = y2))
stopifnot(all.equal(unname(y1), unname(y0)))
stopifnot(all.equal(unname(y2), unname(y0)))
if (!fcn_name %in% "cbind") {
stopifnot(all.equal(y1, y0))
stopifnot(all.equal(y2, y0))
}
y0 <- Reduce(x, f = fcn, right = TRUE)
y1 <- fold(x, f = fcn, left = FALSE)
y2 <- fold(x, f = fcn, left = FALSE, unname = FALSE)
str(list(y0 = y0, y1 = y1, y2 = y2))
stopifnot(all.equal(unname(y1), unname(y0)))
stopifnot(all.equal(unname(y2), unname(y0)))
if (!fcn_name %in% "cbind") {
stopifnot(all.equal(y1, y0))
stopifnot(all.equal(y2, y0))
}
message(sprintf(" - #%d. %s(x[['%s']]) ... DONE", kk, fcn_name, x_name))
}
}
make_table <- function(n) data.frame(key = sample(n), value = sample(n))
sizes <- rep(10, 20)
set.seed(3180)
tables <- lapply(sizes, make_table)
key_merge <- function(x, y) merge(x, y, by = "key", all = FALSE)
suppressWarnings(
folded <- fold(tables, key_merge, left = TRUE, unname = FALSE,
threshold = 6L)
)
suppressWarnings(
reduced <- Reduce(key_merge, tables[-1], tables[[1]])
)
stopifnot(all.equal(unname(folded), unname(reduced)))
message("*** fold() ... DONE")
source("incl/end.R")
future.apply/tests/future_mapply,globals.R 0000644 0001762 0000144 00000014162 13443044773 020526 0 ustar ligges users source("incl/start.R")
library("tools") ## toTitleCase()
message("*** future_mapply() - globals ...")
#plan(cluster, workers = "localhost")
plan(sequential)
options(future.debug = FALSE)
a <- 1
b <- 2
globals_set <- list(
A = FALSE,
B = TRUE,
C = c("a", "b"),
D = list(a = 2, b = 3)
)
x <- list(1)
for (name in names(globals_set)) {
globals <- globals_set[[name]]
message("Globals set ", sQuote(name))
y_truth <- tryCatch({
mapply(function(x) median(c(x, a, b)), x)
}, error = identity)
y <- tryCatch({
future_mapply(function(x) median(c(x, a, b)), x,
future.globals = globals, future.packages = "utils")
}, error = identity)
print(y)
stopifnot(identical(y, y_truth))
}
message("*** future_mapply() - globals ... DONE")
## Test adopted from http://stackoverflow.com/questions/42561088/nested-do-call-within-a-foreach-dopar-environment-cant-find-function-passed-w
message("*** future_mapply() - tricky globals ...")
my_add <- function(a, b) a + b
call_my_add <- function(a, b) {
do.call(my_add, args = list(a = a, b = b))
}
call_my_add_caller <- function(a, b, FUN = call_my_add) {
do.call(FUN, args = list(a = a, b = b))
}
main <- function(x = 1:2, caller = call_my_add_caller,
args = list(FUN = call_my_add)) {
results <- future_mapply(function(i) {
do.call(caller, args = c(list(a = i, b = i + 1L), args))
}, x)
results
}
x <- list(list(1:2))
z_length <- mapply(do.call, args = x, MoreArgs = list(what = length))
fun <- function(...) sum(...)
z_fun <- mapply(do.call, args = x, MoreArgs = list(what = fun))
y0 <- NULL
for (strategy in supportedStrategies()) {
plan(strategy)
y <- main(1:3)
if (is.null(y0)) y0 <- y
stopifnot(identical(y, y0))
message("- future_mapply(do.call, x, ...) ...")
z <- future_mapply(do.call, args = x, MoreArgs = list(what = length))
stopifnot(identical(z, z_length))
z <- future_mapply(do.call, args = x, MoreArgs = list(what = fun))
stopifnot(identical(z, z_fun))
message("- future_mapply(FUN, x, ...) - passing arguments via '...' ...")
## typeof() == "list"
obj <- data.frame(a = 1:2)
stopifnot(typeof(obj) == "list")
y <- future_mapply(function(a, b) typeof(b), 1L, MoreArgs = list(b = obj))
stopifnot(identical(y[[1]], typeof(obj)))
## typeof() == "environment"
obj <- new.env()
stopifnot(typeof(obj) == "environment")
y <- future_mapply(function(a, b) typeof(b), 1L, MoreArgs = list(b = obj))
stopifnot(identical(y[[1]], typeof(obj)))
## typeof() == "S4"
if (requireNamespace("methods")) {
obj <- methods::getClass("MethodDefinition")
stopifnot(typeof(obj) == "S4")
y <- future_mapply(function(a, b) typeof(b), 1L, MoreArgs = list(b = obj))
stopifnot(identical(y[[1]], typeof(obj)))
}
message("- future_mapply(FUN, X, ...) - 'X' containing globals ...")
## From https://github.com/HenrikBengtsson/future.apply/issues/12
a <- 42
b <- 21
X <- list(
function(b) 2 * a,
function() b / 2,
function() a + b,
function() nchar(toTitleCase("hello world"))
)
z0 <- mapply(function(s, f) f() + s, s = seq_along(X), X)
str(z0)
z1 <- future_mapply(function(s, f) f() + s, s = seq_along(X), X)
str(z1)
stopifnot(identical(z1, z0))
}
message("*** future_mapply() - tricky globals ... DONE")
message("*** future_mapply() - missing arguments ...")
## Here 'abc' becomes missing, i.e. missing(abc) is TRUE
foo <- function(x, abc) mapply(function(y) y, x)
y0 <- foo(1:2)
foo <- function(x, abc) future_mapply(function(y) y, x)
y <- foo(1:2)
stopifnot(identical(y, y0))
message("*** future_mapply() - missing arguments ... DONE")
message("*** future_mapply() - false positives ...")
## Here 'abc' becomes a promise, which fails to resolve
## iff 'xyz' does not exist. (Issue #161)
suppressWarnings(rm(list = "xyz"))
foo <- function(x, abc) mapply(function(y) y, x)
y0 <- foo(1:2, abc = (xyz >= 3.14))
foo <- function(x, abc) future_mapply(function(y) y, x)
y <- foo(1:2, abc = (xyz >= 3.14))
stopifnot(identical(y, y0))
message("*** future_mapply() - false positives ... DONE")
message("*** future_mapply() - too large ...")
X <- replicate(10L, 1:100, simplify = FALSE)
FUN <- function(x) {
getOption("future.globals.maxSize")
}
y0 <- mapply(FUN = FUN, X)
sizes <- unclass(c(FUN = object.size(FUN), X = object.size(X)))
cat(sprintf("Baseline size of globals: %.2f KiB\n", sizes[["FUN"]] / 1024))
message("- true positive ...")
oMaxSize <- getOption("future.globals.maxSize")
options(future.globals.maxSize = 1L)
res <- tryCatch({
y <- future_mapply(FUN = FUN, X)
}, error = identity)
stopifnot(inherits(res, "error"))
res <- NULL
options(future.globals.maxSize = oMaxSize)
maxSize <- getOption("future.globals.maxSize")
y <- future_mapply(FUN = FUN, X)
str(y)
stopifnot(all(sapply(y, FUN = identical, oMaxSize)))
message("- approximately invariant to chunk size ...")
maxSize <- sizes[["FUN"]] + sizes[["X"]] / length(X)
options(future.globals.maxSize = maxSize)
for (chunk.size in c(1L, 2L, 5L, structure(10L, ordering = "random"))) {
y <- future_mapply(FUN = FUN, X, future.chunk.size = chunk.size)
str(y)
stopifnot(all(unlist(y) == maxSize))
cat(sprintf("maxSize = %g bytes\nfuture.globals.maxSize = %g bytes\n",
maxSize, getOption("future.globals.maxSize")))
stopifnot(getOption("future.globals.maxSize") == maxSize)
}
y <- NULL
options(future.globals.maxSize = oMaxSize)
message("*** future_mapply() - too large ... DONE")
message("*** future_mapply() - globals exceptions ...")
res <- tryCatch({
y <- future_mapply(function(x) x, 1, future.globals = 42)
}, error = identity)
stopifnot(inherits(res, "error"))
res <- tryCatch({
y <- future_mapply(function(x) x, 1, future.globals = list(1))
}, error = identity)
stopifnot(inherits(res, "error"))
res <- tryCatch({
y <- future_mapply(function(x) x, 1, future.globals = "...future.FUN")
}, error = identity)
stopifnot(inherits(res, "error"))
...future.elements_ii <- 42L
X <- list(function() 2 * ...future.elements_ii)
res <- tryCatch({
y <- future_mapply(FUN = function(f) f(), X)
}, error = identity)
stopifnot(inherits(res, "error"))
message("*** future_mapply() - globals exceptions ... DONE")
source("incl/end.R")
future.apply/tests/future_tapply.R 0000644 0001762 0000144 00000011460 13443044773 017113 0 ustar ligges users source("incl/start.R")
library("datasets") ## warpbreaks
options(future.debug = FALSE)
message("*** future_tapply() ...")
for (strategy in supportedStrategies()[1]) {
message(sprintf("*** strategy = %s ...", sQuote(strategy)))
plan(strategy)
message("- From example(tapply) ...")
message(" - Example #1")
library("stats") ## rbinom()
groups <- as.factor(stats::rbinom(32, n = 5, prob = 0.4))
t <- table(groups)
print(t)
y0 <- tapply(groups, INDEX = groups, FUN = length)
print(y0)
y1 <- future_tapply(groups, INDEX = groups, FUN = length)
print(y1)
stopifnot(all.equal(y1, y0))
message(" - Example #2")
## contingency table from data.frame : array with named dimnames
y0 <- tapply(warpbreaks$breaks, INDEX = warpbreaks[,-1], FUN = sum)
print(y0)
y1 <- future_tapply(warpbreaks$breaks, INDEX = warpbreaks[,-1], FUN = sum)
print(y1)
stopifnot(all.equal(y1, y0))
message(" - Example #3")
y0 <- tapply(warpbreaks$breaks, warpbreaks[, 3, drop = FALSE], sum)
print(y0)
y1 <- future_tapply(warpbreaks$breaks, warpbreaks[, 3, drop = FALSE], sum)
print(y1)
stopifnot(all.equal(y1, y0))
message(" - Example #4")
n <- 17
fac <- factor(rep_len(1:3, n), levels = 1:5)
t <- table(fac)
y0 <- tapply(1:n, fac, sum)
print(y0)
y1 <- future_tapply(1:n, fac, sum)
print(y1)
stopifnot(all.equal(y1, y0))
message(" - Example #5")
if ("default" %in% names(formals(tapply))) {
y0 <- tapply(1:n, fac, sum, default = 0) # maybe more desirable
print(y0)
y1 <- future_tapply(1:n, fac, sum, default = 0) # maybe more desirable
print(y1)
stopifnot(all.equal(y1, y0))
}
message(" - Example #6")
y0 <- tapply(1:n, fac, sum, simplify = FALSE)
print(y0)
y1 <- future_tapply(1:n, fac, sum, simplify = FALSE)
print(y1)
stopifnot(all.equal(y1, y0))
message(" - Example #7")
y0 <- tapply(1:n, fac, range)
print(y0)
y1 <- future_tapply(1:n, fac, range)
print(y1)
stopifnot(all.equal(y1, y0))
message(" - Example #8")
y0 <- tapply(1:n, fac, quantile)
print(y0)
y1 <- future_tapply(1:n, fac, quantile)
print(y1)
stopifnot(all.equal(y1, y0))
message(" - Example #9")
y0 <- tapply(1:n, fac, length) ## NA's
print(y0)
y1 <- future_tapply(1:n, fac, length) ## NA's
print(y1)
stopifnot(all.equal(y1, y0))
message(" - Example #10")
if ("default" %in% names(formals(tapply))) {
y0 <- tapply(1:n, fac, length, default = 0) # == table(fac)
print(y0)
y1 <- future_tapply(1:n, fac, length, default = 0) # == table(fac)
print(y1)
stopifnot(all.equal(y1, y0))
}
message(" - Example #11")
## example of ... argument: find quarterly means
y0 <- tapply(presidents, cycle(presidents), mean, na.rm = TRUE)
print(y0)
y1 <- future_tapply(presidents, cycle(presidents), mean, na.rm = TRUE)
print(y1)
stopifnot(all.equal(y1, y0))
message(" - Example #12")
ind <- list(c(1, 2, 2), c("A", "A", "B"))
t <- table(ind)
print(t)
y0 <- tapply(1:3, ind) #-> the split vector
print(y0)
y1 <- future_tapply(1:3, ind) #-> the split vector
print(y1)
stopifnot(all.equal(y1, y0))
message(" - Example #13")
y0 <- tapply(1:3, ind, sum)
print(y0)
y1 <- future_tapply(1:3, ind, sum)
print(y1)
stopifnot(all.equal(y1, y0))
## Some assertions (not held by all patch propsals):
message(" - Example #14")
nq <- names(quantile(1:5))
y_truth <- c(1L, 2L, 4L)
stopifnot(identical(tapply(1:3, ind), y_truth))
stopifnot(identical(future_tapply(1:3, ind), y_truth))
message(" - Example #15")
y_truth <- matrix(c(1L, 2L, NA, 3L), nrow = 2L,
dimnames = list(c("1", "2"), c("A", "B")))
stopifnot(identical(tapply(1:3, ind, sum), y_truth))
stopifnot(identical(future_tapply(1:3, ind, sum), y_truth))
message(" - Example #16")
y_truth <- array(list(
`2` = structure(c(2, 5.75, 9.5, 13.25, 17), .Names = nq),
`3` = structure(c(3, 6, 9, 12, 15), .Names = nq),
`4` = NULL, `5` = NULL),
dim = 4L, dimnames = list(as.character(2:5)))
stopifnot(identical(tapply(1:n, fac, quantile)[-1], y_truth))
stopifnot(identical(future_tapply(1:n, fac, quantile)[-1], y_truth))
plan(sequential)
message(sprintf("*** strategy = %s ... done", sQuote(strategy)))
} ## for (strategy in ...)
message("*** exceptions ...")
## Error: 'INDEX' is of length zero
res <- tryCatch({
y <- future_tapply(1L, INDEX = list())
}, error = identity)
stopifnot(inherits(res, "error"))
## Error: total number of levels >= 2^31
res <- tryCatch({
y <- future_tapply(1:216, INDEX = rep(list(1:216), times = 4L))
}, error = identity)
stopifnot(inherits(res, "error"))
## Error: arguments must have same length
res <- tryCatch({
y <- future_tapply(1L, INDEX = list(1:2))
}, error = identity)
stopifnot(inherits(res, "error"))
message("*** future_tapply() ... DONE")
source("incl/end.R")
future.apply/tests/incl/ 0000755 0001762 0000144 00000000000 13502071600 014771 5 ustar ligges users future.apply/tests/incl/start,load-only.R 0000644 0001762 0000144 00000003731 13502071600 020150 0 ustar ligges users ## Record original state
ovars <- ls()
oenvs <- oenvs0 <- Sys.getenv()
oopts0 <- options()
covr_testing <- ("covr" %in% loadedNamespaces())
on_solaris <- grepl("^solaris", R.version$os)
## Default options
oopts <- options(
warn = 1L,
mc.cores = 2L,
future.debug = TRUE,
## Reset the following during testing in case
## they are set on the test system
future.availableCores.system = NULL,
future.availableCores.fallback = NULL
)
## Reset the following during testing in case
## they are set on the test system
oenvs2 <- Sys.unsetenv(c(
"R_FUTURE_AVAILABLECORES_SYSTEM",
"R_FUTURE_AVAILABLECORES_FALLBACK",
## SGE
"NSLOTS", "PE_HOSTFILE",
## Slurm
"SLURM_CPUS_PER_TASK",
## TORQUE / PBS
"PBS_NUM_PPN", "PBS_NODEFILE", "PBS_NP", "PBS_NUM_NODES"
))
oplan <- future::plan()
## Use eager futures by default
future::plan("sequential")
## Private future.apply functions
fold <- future.apply:::fold
hpaste <- future.apply:::hpaste
mdebug <- future.apply:::mdebug
mdebugf <- future.apply:::mdebugf
import_from <- future.apply:::import_from
get_random_seed <- future.apply:::get_random_seed
set_random_seed <- future.apply:::set_random_seed
as_lecyer_cmrg_seed <- future.apply:::as_lecyer_cmrg_seed
is_lecyer_cmrg_seed <- future.apply:::is_lecyer_cmrg_seed
make_rng_seeds <- future.apply:::make_rng_seeds
## Local functions for test scripts
printf <- function(...) cat(sprintf(...))
mstr <- function(...) message(paste(capture.output(str(...)), collapse = "\n"))
attachLocally <- function(x, envir = parent.frame()) {
for (name in names(x)) {
assign(name, value = x[[name]], envir = envir)
}
}
supportedStrategies <- function(cores = 1L, excl = c("multiprocess", "cluster"), ...) {
strategies <- future:::supportedStrategies(...)
strategies <- setdiff(strategies, excl)
if (cores > 1) {
strategies <- setdiff(strategies,
c("sequential", "uniprocess", "eager", "lazy"))
}
strategies
}
availCores <- min(2L, future::availableCores())
future.apply/tests/incl/start.R 0000644 0001762 0000144 00000000071 13322430303 016246 0 ustar ligges users library("future.apply")
source("incl/start,load-only.R")
future.apply/tests/incl/end.R 0000644 0001762 0000144 00000002411 13322430303 015657 0 ustar ligges users ## Undo future strategy
future::plan(oplan)
## Undo options
## (a) Added
added <- setdiff(names(options()), names(oopts0))
opts <- vector("list", length = length(added))
names(opts) <- added
options(opts)
## (b) Modified
options(oopts)
## (c) Assert that everything was undone
stopifnot(all.equal(options(), oopts0))
## Undo system environment variables
## (a) Added
cenvs <- Sys.getenv()
added <- setdiff(names(cenvs), names(oenvs0))
for (name in added) Sys.unsetenv(name)
## (b) Missing
missing <- setdiff(names(oenvs0), names(cenvs))
if (length(missing) > 0) do.call(Sys.setenv, as.list(oenvs0[missing]))
## (c) Modified?
for (name in intersect(names(cenvs), names(oenvs0))) {
## WORKAROUND: On Linux Wine, base::Sys.getenv() may
## return elements with empty names. /HB 2016-10-06
if (nchar(name) == 0) next
if (!identical(cenvs[[name]], oenvs0[[name]])) {
do.call(Sys.setenv, as.list(oenvs0[name]))
}
}
## (d) Assert that everything was undone
stopifnot(identical(Sys.getenv(), oenvs0))
## Undo variables
rm(list = c(setdiff(ls(), ovars)))
## Travis CI specific: Explicit garbage collection because it
## looks like Travis CI might run out of memory during 'covr'
## testing and we now have so many tests. /HB 2017-01-11
if ("covr" %in% loadedNamespaces()) gc()
future.apply/tests/utils.R 0000644 0001762 0000144 00000005572 13502071600 015340 0 ustar ligges users source("incl/start,load-only.R")
stop_if_not <- future.apply:::stop_if_not
message("*** utils ...")
message("*** hpaste() ...")
# Some vectors
x <- 1:6
y <- 10:1
z <- LETTERS[x]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Abbreviation of output vector
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
printf("x = %s.\n", hpaste(x))
## x = 1, 2, 3, ..., 6.
printf("x = %s.\n", hpaste(x, maxHead = 2))
## x = 1, 2, ..., 6.
printf("x = %s.\n", hpaste(x), maxHead = 3) # Default
## x = 1, 2, 3, ..., 6.
# It will never output 1, 2, 3, 4, ..., 6
printf("x = %s.\n", hpaste(x, maxHead = 4))
## x = 1, 2, 3, 4, 5 and 6.
# Showing the tail
printf("x = %s.\n", hpaste(x, maxHead = 1, maxTail = 2))
## x = 1, ..., 5, 6.
# Turning off abbreviation
printf("y = %s.\n", hpaste(y, maxHead = Inf))
## y = 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
## ...or simply
printf("y = %s.\n", paste(y, collapse = ", "))
## y = 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
# Change last separator
printf("x = %s.\n", hpaste(x, lastCollapse = " and "))
## x = 1, 2, 3, 4, 5 and 6.
# No collapse
stopifnot(all(hpaste(x, collapse = NULL) == x))
# Empty input
stopifnot(identical(hpaste(character(0)), character(0)))
message("*** hpaste() ...")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# debug()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
message("*** mdebug() ...")
mdebug("Hello #", 1)
mdebugf("Hello #%d", 1)
options(future.debug = TRUE)
mdebug("Hello #", 2)
mdebugf("Hello #%d", 2)
options(future.debug = FALSE)
mdebug("Hello #", 3)
mdebugf("Hello #%d", 3)
message("*** mdebug() ... DONE")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# import_from()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
message("*** import_from() ...")
obj <- import_from("non-existing-fcn", default = NA, package = "future")
stopifnot(identical(obj, NA))
res <- tryCatch({
obj <- import_from("non-existing-fcn", package = "future")
}, error = identity)
print(res)
stopifnot(inherits(res, "simpleError"))
message("*** import_from() ... DONE")
message("*** stop_if_not() ...")
stop_if_not(TRUE)
stop_if_not(TRUE, TRUE)
res <- tryCatch({
stop_if_not(FALSE)
}, error = identity)
stopifnot(inherits(res, "simpleError"))
res <- tryCatch({
stop_if_not(list(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE))
}, error = identity)
stopifnot(inherits(res, "simpleError"))
message("*** stop_if_not() ... DONE")
message("*** assert_values2() ...")
assert_values2 <- future.apply:::assert_values2
assert_values2(nX = 2L, values2 = as.list(1:2))
res <- tryCatch({
assert_values2(nX = 1L, values = as.list(1:2), values2 = as.list(1:2), fcn = "tests", debug = TRUE)
}, error = identity)
stopifnot(inherits(res, "FutureError"))
message("*** assert_values2() ... DONE")
message("*** utils ... DONE")
source("incl/end.R")
future.apply/tests/future_eapply.R 0000644 0001762 0000144 00000001576 13443044773 017103 0 ustar ligges users source("incl/start.R")
message("*** future_eapply() ...")
message("- From example(eapply) ...")
for (strategy in supportedStrategies()) {
message(sprintf("*** strategy = %s ...", sQuote(strategy)))
plan(strategy)
env <- new.env(hash = FALSE)
env$a <- 1:10
env$beta <- exp(-3:3)
env$logic <- c(TRUE, FALSE, FALSE, TRUE)
y0 <- unlist(eapply(env, mean, USE.NAMES = FALSE))
y1 <- unlist(future_eapply(env, mean, USE.NAMES = FALSE))
stopifnot(all.equal(y1, y0))
y0 <- eapply(env, quantile, probs = 1:3/4)
y1 <- future_eapply(env, quantile, probs = 1:3/4)
stopifnot(all.equal(y1, y0))
y0 <- eapply(env, quantile)
y1 <- future_eapply(env, quantile)
stopifnot(all.equal(y1, y0))
plan(sequential)
message(sprintf("*** strategy = %s ... done", sQuote(strategy)))
} ## for (strategy in ...)
message("*** future_eapply() ... DONE")
source("incl/end.R")
future.apply/tests/future_mapply.R 0000644 0001762 0000144 00000006120 13602734431 017073 0 ustar ligges users source("incl/start.R")
message("*** future_mapply() ...")
message("- Parallel RNG truth (for later)...")
plan(sequential)
y_rng_0 <- future_mapply(stats::runif, n = 1:4, max = 2:5,
MoreArgs = list(min = 1), future.seed = 0xBEEF)
print(y_rng_0)
for (strategy in supportedStrategies()) {
message(sprintf("*** strategy = %s ...", sQuote(strategy)))
plan(strategy)
message("- From example(mapply) ...")
y0 <- mapply(rep, 1:4, 4:1)
y1 <- future_mapply(rep, 1:4, 4:1)
stopifnot(identical(y1, y0))
y0 <- mapply(rep, times = 1:4, x = 4:1)
y1 <- future_mapply(rep, times = 1:4, x = 4:1)
stopifnot(identical(y1, y0))
y0 <- mapply(rep, times = 1:4, MoreArgs = list(x = 42))
y1 <- future_mapply(rep, times = 1:4, MoreArgs = list(x = 42))
stopifnot(identical(y1, y0))
y0 <- mapply(function(x, y) seq_len(x) + y,
c(a = 1, b = 2, c = 3), # names from first
c(A = 10, B = 0, C = -10))
y1 <- future_mapply(function(x, y) seq_len(x) + y,
c(a = 1, b = 2, c = 3), # names from first
c(A = 10, B = 0, C = -10))
stopifnot(identical(y1, y0))
word <- function(C, k) paste(rep.int(C, k), collapse = "")
for (chunk.size in list(1L, structure(2L, ordering = "random"), structure(3L, ordering = 5:1))) {
y0 <- mapply(word, LETTERS[1:5], 5:1, SIMPLIFY = FALSE)
y1 <- future_mapply(word, LETTERS[1:5], 5:1, SIMPLIFY = FALSE, future.chunk.size = chunk.size)
stopifnot(identical(y1, y0))
}
message("- Subsetting (Issue #17) ...")
X <- as.Date("2018-06-01")
y0 <- mapply(FUN = identity, X, SIMPLIFY = FALSE)
y1 <- future_mapply(FUN = identity, X, SIMPLIFY = FALSE)
stopifnot(identical(y1, y0))
message("- Recycle arguments to same length ...")
y0 <- mapply(rep, 1:4, 2:1)
y1 <- future_mapply(rep, 1:4, 2:1)
stopifnot(identical(y1, y0))
message("- Parallel RNG ...")
y_rng_1 <- future_mapply(stats::runif, n = 1:4, max = 2:5,
MoreArgs = list(min = 1), future.seed = 0xBEEF)
print(y_rng_1)
stopifnot(all.equal(y_rng_1, y_rng_0))
message("- future_Map() ...")
xs <- replicate(5, stats::runif(10), simplify = FALSE)
ws <- replicate(5, stats::rpois(10, 5) + 1, simplify = FALSE)
y0 <- Map(weighted.mean, xs, ws)
y1 <- future_Map(stats::weighted.mean, xs, ws)
stopifnot(all.equal(y1, y0))
plan(sequential)
message(sprintf("*** strategy = %s ... done", sQuote(strategy)))
} ## for (strategy in ...)
message("- Empty input [non parallel] ...")
y0 <- mapply(search)
y1 <- future_mapply(search)
stopifnot(identical(y1, y0))
y0 <- mapply(list, integer(0L))
y1 <- future_mapply(list, integer(0L))
stopifnot(identical(y1, y0))
message("*** future_mapply() - special cases ...")
X <- list()
names(X) <- character(0L)
y <- future_mapply(FUN = identity, X)
stopifnot(length(y) == 0L, !is.null(names(y)), identical(y, X))
y <- future_mapply(FUN = identity, X, X)
stopifnot(length(y) == 0L, !is.null(names(y)), identical(y, X))
message("*** future_mapply() - special cases ... DONE")
message("*** future_mapply() ... DONE")
source("incl/end.R")
future.apply/tests/globals,tricky_recursive.R 0000644 0001762 0000144 00000004025 13443044773 021223 0 ustar ligges users source("incl/start.R")
## Test adopted from http://stackoverflow.com/questions/42561088/nested-do-call-within-a-foreach-dopar-environment-cant-find-function-passed-w
options(future.debug = FALSE)
message("*** Tricky globals requiring recursive search ...")
my_add <- function(a, b) a + b
call_my_add <- function(a, b) {
do.call(my_add, args = list(a = a, b = b))
}
call_my_add_caller <- function(a, b, FUN = call_my_add) {
do.call(FUN, args = list(a = a, b = b))
}
main_lapply <- function(x = 1:2, caller = call_my_add_caller,
args = list(FUN = call_my_add)) {
lapply(x, FUN = function(i) {
do.call(caller, args = c(list(a = i, b = i+1L), args))
})
}
main_lapply_no_FUN <- function(x = 1:2, caller = call_my_add_caller,
args = list(FUN = call_my_add)) {
lapply(x, FUN = function(i) {
do.call(caller, args = list(a = i, b = i+1L))
})
}
main_future_lapply <- function(x = 1:2, caller = call_my_add_caller,
args = list(FUN = call_my_add)) {
future_lapply(x, FUN = function(i) {
do.call(caller, args = c(list(a = i, b = i + 1L), args))
})
}
main_future_lapply_no_FUN <- function(x = 1:2, caller = call_my_add_caller,
args = list(FUN = call_my_add)) {
future_lapply(x, FUN = function(i) {
do.call(caller, args = list(a = i, b = i + 1L))
})
}
x0 <- y0 <- z0 <- NULL
for (strategy in supportedStrategies()) {
message(sprintf("*** strategy = %s ...", sQuote(strategy)))
plan(strategy)
z <- main_lapply()
str(list(z = z))
if (is.null(z0)) z0 <- z
stopifnot(identical(z, z0))
z2 <- main_lapply_no_FUN()
str(list(z2 = z2))
stopifnot(identical(z2, z0))
z3 <- main_future_lapply()
str(list(z3 = z3))
stopifnot(identical(z3, z0))
z4 <- main_future_lapply_no_FUN()
str(list(z4 = z4))
stopifnot(identical(z4, z0))
message(sprintf("*** strategy = %s ... DONE", sQuote(strategy)))
}
message("*** Tricky globals requiring recursive search ... DONE")
source("incl/end.R")
future.apply/tests/stdout.R 0000644 0001762 0000144 00000003563 13443044773 015537 0 ustar ligges users source("incl/start.R")
message("*** future_*apply() and 'future.stdout' ...")
options(future.debug = FALSE)
truth <- list()
out <- utils::capture.output({
y <- future_lapply(1:0, FUN = function(x) {
print(x)
})
})
truth[["lapply"]] <- list(value = y, stdout = out)
out <- utils::capture.output({
y <- future_mapply(1:0, 0:1, FUN = function(x, y) {
print(list(x = x, y = y))
})
})
truth[["mapply"]] <- list(value = y, stdout = out)
for (cores in 1:availCores) {
message(sprintf(" - Testing with %d cores ...", cores))
options(mc.cores = cores)
for (strategy in supportedStrategies(cores)) {
message(sprintf("* plan('%s') ...", strategy))
plan(strategy)
for (fun in c("lapply", "mapply")) {
for (stdout in c(FALSE, TRUE, NA)) {
message(sprintf("* future_%s(x, ..., future.stdout = %s) ...", fun, stdout))
out <- utils::capture.output({
if (fun == "lapply") {
y <- future_lapply(1:0, FUN = function(x) {
Sys.sleep(x / 2)
print(x)
}, future.stdout = stdout)
} else if (fun == "mapply") {
y <- future_mapply(1:0, 0:1, FUN = function(x, y) {
Sys.sleep(x / 2)
print(list(x = x, y = y))
}, future.stdout = stdout)
}
})
stopifnot(identical(y, truth[[fun]]$value))
if (isTRUE(stdout)) {
stopifnot(identical(out, truth[[fun]]$stdout))
} else if (is.na(stdout)) {
} else {
stopifnot(nchar(out) == 0)
}
message(sprintf("* future_%s(x, ..., future.stdout = %s) ... DONE", fun, stdout))
} ## for (stdout ...)
} ## for (fun ...)
message(sprintf("* plan('%s') ... DONE", strategy))
}
message(sprintf(" - Testing with %d cores ... DONE", cores))
}
message("*** future_*apply() and 'future.stdout' ... DONE")
source("incl/end.R")
future.apply/tests/future_replicate.R 0000644 0001762 0000144 00000002167 13443044773 017556 0 ustar ligges users source("incl/start.R")
message("*** future_replicate() ...")
for (strategy in supportedStrategies()) {
message(sprintf("*** strategy = %s ...", sQuote(strategy)))
plan(strategy)
y0 <- replicate(5L, sample(10L, size = 1L))
y1 <- future_replicate(5L, sample(10L, size = 1L))
stopifnot(length(y0) == length(y1))
set.seed(0xBEEF)
y1 <- future_replicate(5L, sample(10L, size = 1L))
set.seed(0xBEEF)
y2 <- future_replicate(5L, sample(10L, size = 1L))
stopifnot(all.equal(y2, y1))
y3 <- future_replicate(5L, sample(10L, size = 1L), future.seed = 0xBEEF)
y4 <- future_replicate(5L, sample(10L, size = 1L), future.seed = 0xBEEF)
stopifnot(all.equal(y4, y3))
message("- example(replicate) ...")
foo <- function(x = 1, y = 2) c(x, y)
bar0 <- function(n, x) replicate(n, foo(x = x))
y0 <- bar0(5, x = 3)
bar1 <- function(n, x) future_replicate(n, foo(x = x))
y1 <- bar1(5, x = 3)
stopifnot(all.equal(y1, y0))
plan(sequential)
message(sprintf("*** strategy = %s ... done", sQuote(strategy)))
} ## for (strategy in ...)
message("*** future_replicate() ... DONE")
source("incl/end.R")
future.apply/tests/future_sapply.R 0000644 0001762 0000144 00000002542 13443044773 017113 0 ustar ligges users source("incl/start.R")
message("*** future_sapply() ...")
xs <- list(
A = c(a = 1, b = 2, c = 3),
B = c(a = 1:2, b = 2:3, c = 3:4),
C = letters[1:3],
D = structure(10 * 5:8, names = LETTERS[1:4])
)
FUNS <- list(
a = identity,
b = as.matrix,
c = function(x, y = 2 * 1:5) outer(rep(x, length.out = 3L), y)
)
for (strategy in supportedStrategies()) {
message(sprintf("*** strategy = %s ...", sQuote(strategy)))
plan(strategy)
for (x in xs) {
FUNS_x <- FUNS
if (!is.numeric(x)) FUNS_x[["c"]] <- NULL
for (USE.NAMES in list(FALSE, TRUE)) {
for (simplify in list(FALSE, TRUE, "array")) {
for (FUN in FUNS_x) {
y0 <- sapply(x, FUN = FUN,
USE.NAMES = USE.NAMES, simplify = simplify)
y1 <- future_sapply(x, FUN = FUN,
USE.NAMES = USE.NAMES, simplify = simplify)
str(list(y0 = y0, y1 = y1))
stopifnot(identical(y1, y0))
if (identical(simplify, FALSE)) {
y2 <- lapply(x, FUN = FUN)
str(list(y0 = y0, y2 = y2))
stopifnot(identical(unname(y2), unname(y0)))
}
}
}
}
}
plan(sequential)
message(sprintf("*** strategy = %s ... done", sQuote(strategy)))
} ## for (strategy in ...)
message("*** future_sapply() ... DONE")
source("incl/end.R")
future.apply/tests/future_apply.R 0000644 0001762 0000144 00000006315 13443044773 016732 0 ustar ligges users source("incl/start.R")
message("*** future_apply() ...")
for (strategy in supportedStrategies()) {
message(sprintf("*** strategy = %s ...", sQuote(strategy)))
plan(strategy)
message("- From example(apply) ...")
X <- matrix(c(1:4, 1, 6:8), nrow = 2L)
Y0 <- apply(X, MARGIN = 1L, FUN = table)
Y1 <- future_apply(X, MARGIN = 1L, FUN = table)
print(Y1)
stopifnot(all.equal(Y1, Y0, check.attributes = FALSE)) ## FIXME
Y0 <- apply(X, MARGIN = 1L, FUN = stats::quantile)
Y1 <- future_apply(X, MARGIN = 1L, FUN = stats::quantile)
print(Y1)
stopifnot(all.equal(Y1, Y0))
x <- cbind(x1 = 3, x2 = c(4:1, 2:5))
names(dimnames(x)) <- c("row", "col")
x3 <- array(x, dim = c(dim(x), 3),
dimnames = c(dimnames(x), list(C = paste0("cop.", 1:3))))
y0 <- apply(x, MARGIN = 2L, FUN = identity)
stopifnot(identical(y0, x))
y1 <- future_apply(x, MARGIN = 2L, FUN = identity)
print(y1)
stopifnot(identical(y1, y0))
y0 <- apply(x3, MARGIN = 2:3, FUN = identity)
stopifnot(identical(y0, x3))
y1 <- future_apply(x3, MARGIN = 2:3, FUN = identity)
print(y1)
stopifnot(identical(y1, y0))
z <- array(1:24, dim = 2:4)
y0 <- apply(z, MARGIN = 1:2, FUN = function(x) seq_len(max(x)))
y1 <- future_apply(z, MARGIN = 1:2, FUN = function(x) seq_len(max(x)))
print(y1)
stopifnot(identical(y1, y0))
message("- apply(X, MARGIN = , ...) ...")
X <- matrix(1:2, nrow = 2L, ncol = 1L, dimnames = list(rows = c("a", "b")))
y0 <- apply(X, MARGIN = "rows", FUN = identity)
y1 <- future_apply(X, MARGIN = "rows", FUN = identity)
print(y1)
stopifnot(identical(y1, y0))
message("- apply(X, ...) - dim(X) > 2 ...")
X <- array(1:12, dim = c(2, 2, 3))
y0 <- apply(X, MARGIN = 1L, FUN = identity)
y1 <- future_apply(X, MARGIN = 1L, FUN = identity)
print(y1)
stopifnot(identical(y1, y0))
message("- apply(X, ...) - not all same names ...")
FUN <- function(x) {
if (x[1] == 1L) names(x) <- letters[seq_along(x)]
x
}
X <- matrix(1:4, nrow = 2L, ncol = 2L)
y0 <- apply(X, MARGIN = 1L, FUN = FUN)
y1 <- future_apply(X, MARGIN = 1L, FUN = FUN)
print(y1)
stopifnot(identical(y1, y0))
plan(sequential)
message(sprintf("*** strategy = %s ... done", sQuote(strategy)))
} ## for (strategy in ...)
message("*** apply(X, ...) - prod(dim(X)) == 0 [non-parallel] ...")
X <- matrix(nrow = 0L, ncol = 2L)
y0 <- apply(X, MARGIN = 1L, FUN = identity)
y1 <- future_apply(X, MARGIN = 1L, FUN = identity)
print(y1)
stopifnot(identical(y1, y0))
message("*** exceptions ...")
## Error: dim(X) must have a positive length
res <- tryCatch({
y <- future_apply(1L, MARGIN = 1L, FUN = identity)
}, error = identity)
stopifnot(inherits(res, "error"))
## Error: 'X' must have named dimnames
X <- matrix(1:2, nrow = 2L, ncol = 1L)
res <- tryCatch({
y <- future_apply(X, MARGIN = "rows", FUN = identity)
}, error = identity)
stopifnot(inherits(res, "error"))
## Error: not all elements of 'MARGIN' are names of dimensions
X <- matrix(1:2, nrow = 2L, ncol = 1L, dimnames = list(rows = c("a", "b")))
res <- tryCatch({
y <- future_apply(X, MARGIN = "cols", FUN = identity)
}, error = identity)
stopifnot(inherits(res, "error"))
message("*** future_apply() ... DONE")
source("incl/end.R")
future.apply/tests/future_lapply,RNG.R 0000644 0001762 0000144 00000013375 13602733704 017531 0 ustar ligges users source("incl/start.R")
message("*** future_lapply() and RNGs ...")
options(future.debug = FALSE)
message("* future_lapply(x, ..., future.seed = ) ...")
res <- tryCatch({
y <- future_lapply(1:3, FUN = identity, future.seed = as.list(1:2))
}, error = identity)
print(res)
stopifnot(inherits(res, "simpleError"))
res <- tryCatch({
y <- future_lapply(1:3, FUN = identity, future.seed = list(1, 2, 3:4))
}, error = identity)
print(res)
stopifnot(inherits(res, "simpleError"))
res <- tryCatch({
y <- future_lapply(1:3, FUN = identity, future.seed = as.list(1:3))
}, error = identity)
print(res)
stopifnot(inherits(res, "simpleError"))
seeds <- lapply(1:3, FUN = as_lecyer_cmrg_seed)
res <- tryCatch({
y <- future_lapply(1:3, FUN = identity, future.seed = lapply(seeds, FUN = as.numeric))
}, error = identity)
print(res)
stopifnot(inherits(res, "simpleError"))
seeds[[1]][1] <- seeds[[1]][1] + 1L
res <- tryCatch({
y <- future_lapply(1:3, FUN = identity, future.seed = seeds)
}, error = identity)
print(res)
stopifnot(inherits(res, "simpleError"))
message("* future_lapply(x, ..., future.seed = ) ... DONE")
## Iterate of the same set in all tests
x <- 1:5
message("* future_lapply(x, ..., future.seed = FALSE) ...")
y0 <- y0_nested <- seed00 <- NULL
for (cores in 1:availCores) {
message(sprintf(" - Testing with %d cores ...", cores))
options(mc.cores = cores)
for (strategy in supportedStrategies(cores)) {
message(sprintf("* plan('%s') ...", strategy))
plan(strategy)
set.seed(0xBEEF)
seed0 <- get_random_seed()
y <- future_lapply(x, FUN = function(i) i, future.seed = FALSE)
y <- unlist(y)
seed <- get_random_seed()
if (is.null(y0)) {
y0 <- y
seed00 <- seed
}
str(list(y = y))
stopifnot(identical(seed, seed0), identical(seed, seed00))
## NOTE: We cannot guarantee the same random numbers, because
## future.seed = FALSE.
message(sprintf("* plan('%s') ... DONE", strategy))
} ## for (strategy ...)
message(sprintf(" - Testing with %d cores ... DONE", cores))
} ## for (core ...)
message("* future_lapply(x, ..., future.seed = FALSE) ... DONE")
seed_sets <- list(
A = TRUE,
## B = NA,
C = 42L,
D = as_lecyer_cmrg_seed(42L),
E = list(),
F = vector("list", length = length(x)),
G = NULL
)
## Generate sequence of seeds of the current RNGkind()
## NOTE: This is NOT a good way to generate random seeds!!!
seeds <- lapply(seq_along(x), FUN = function(i) {
set.seed(i)
globalenv()$.Random.seed
})
seed_sets$E <- seeds
## Generate sequence of L'Ecyer CMRG seeds
seeds <- seed_sets$F
seeds[[1]] <- seed_sets$D
for (kk in 2:length(x)) seeds[[kk]] <- parallel::nextRNGStream(seeds[[kk - 1]])
seed_sets$F <- seeds
seed_sets$G <- seed_sets$A
rm(list = "seeds")
for (name in names(seed_sets)) {
future.seed <- seed_sets[[name]]
if (is.list(future.seed)) {
label <- sprintf("",
length(future.seed), length(future.seed[[1]]))
} else {
label <- hpaste(future.seed)
}
message(sprintf("* future_lapply(x, ..., future.seed = %s) ...", label))
set.seed(0xBEEF)
y0 <- seed00 <- NULL
for (cores in 1:availCores) {
message(sprintf(" - Testing with %d cores ...", cores))
options(mc.cores = cores)
for (strategy in supportedStrategies(cores)) {
message(sprintf("* plan('%s') ...", strategy))
plan(strategy)
set.seed(0xBEEF)
seed0 <- get_random_seed()
y <- future_lapply(x, FUN = function(i) {
rnorm(1L)
}, future.seed = future.seed)
y <- unlist(y)
seed <- get_random_seed()
if (is.null(y0)) {
y0 <- y
seed00 <- seed
}
str(list(y = y))
stopifnot(!identical(seed, seed0), identical(seed, seed00),
identical(y, y0))
## RNG-based results should also be identical regardless of
## load-balance scheduling.
for (scheduling in list(FALSE, TRUE, 0, 0.5, 2.0, Inf)) {
set.seed(0xBEEF)
seed0 <- get_random_seed()
y <- future_lapply(x, FUN = function(i) {
rnorm(1L)
}, future.seed = future.seed, future.scheduling = scheduling)
seed <- get_random_seed()
y <- unlist(y)
str(list(y = y))
stopifnot(!identical(seed, seed0), identical(seed, seed00),
identical(y, y0))
}
## Nested future_lapply():s
for (scheduling in list(FALSE, TRUE)) {
y <- future_lapply(x, FUN = function(i) {
.seed <- globalenv()$.Random.seed
z <- future_lapply(1:3, FUN = function(j) {
list(j = j, seed = globalenv()$.Random.seed)
}, future.seed = .seed)
## Assert that all future seeds are unique
seeds <- lapply(z, FUN = function(x) x$seed)
for (kk in 2:length(seeds)) stopifnot(!all(seeds[[kk]] == seeds[[1]]))
list(i = i, seed = .seed, sample = rnorm(1L), z = z)
}, future.seed = 42L, future.scheduling = scheduling)
if (is.null(y0_nested)) y0_nested <- y
str(list(y = y))
## Assert that all future seeds (also nested ones) are unique
seeds <- Reduce(c, lapply(y, FUN = function(x) {
c(list(seed = x$seed), lapply(x$z, FUN = function(x) x$seed))
}))
for (kk in 2:length(seeds)) stopifnot(!all(seeds[[kk]] == seeds[[1]]))
stopifnot(identical(y, y0_nested))
}
message(sprintf("* plan('%s') ... DONE", strategy))
} ## for (strategy ...)
message(sprintf(" - Testing with %d cores ... DONE", cores))
} ## for (cores ...)
message(sprintf("* future_lapply(x, ..., future.seed = %s) ... DONE", label))
} ## for (name ...)
message("*** future_lapply() and RNGs ... DONE")
source("incl/end.R")
future.apply/tests/rng.R 0000644 0001762 0000144 00000004060 13443044773 014774 0 ustar ligges users source("incl/start,load-only.R")
message("*** RNG ...")
set_random_seed(seed = NULL)
seed <- get_random_seed()
stopifnot(is.null(seed))
set_random_seed(seed = 42L)
seed <- get_random_seed()
stopifnot(identical(seed, 42L))
res <- tryCatch({
seed <- as_lecyer_cmrg_seed(seed = FALSE)
}, error = identity)
print(res)
stopifnot(inherits(res, "simpleError"))
seed <- as_lecyer_cmrg_seed(seed = 42L)
str(seed)
set_random_seed(seed = seed)
stopifnot(identical(get_random_seed(), seed))
seed2 <- as_lecyer_cmrg_seed(seed = TRUE)
str(seed2)
stopifnot(identical(seed2, seed))
seed3 <- as_lecyer_cmrg_seed(seed = seed)
str(seed3)
stopifnot(identical(seed3, seed))
message(" - make_rng_seeds ...")
seeds <- make_rng_seeds(2L, seed = FALSE)
stopifnot(is.null(seeds))
seeds <- make_rng_seeds(0L, seed = 42L, debug = TRUE)
stopifnot(length(seeds) == 0L, identical(seeds, list()))
seeds <- make_rng_seeds(2L, seed = TRUE)
stopifnot(length(seeds) == 2L, all(sapply(seeds, FUN = is_lecyer_cmrg_seed)))
seeds <- make_rng_seeds(3L, seed = 42L)
stopifnot(length(seeds) == 3L, all(sapply(seeds, FUN = is_lecyer_cmrg_seed)))
seeds <- make_rng_seeds(1L, seed = 42L, debug = TRUE)
stopifnot(length(seeds) == 1L, all(sapply(seeds, FUN = is_lecyer_cmrg_seed)))
seeds0 <- lapply(1:3, FUN = as_lecyer_cmrg_seed)
seeds <- make_rng_seeds(length(seeds0), seed = seeds0, debug = TRUE)
stopifnot(length(seeds) == length(seeds0),
all(sapply(seeds, FUN = is_lecyer_cmrg_seed)))
message(" - exceptions ...")
## Invalid L'Ecuyer seed
seed_invalid <- seed + 1L
res <- tryCatch({
seed <- as_lecyer_cmrg_seed(seed = seed_invalid)
}, error = identity)
print(res)
stopifnot(inherits(res, "simpleError"))
## Invalid seed
res <- tryCatch({
seed <- as_lecyer_cmrg_seed(seed = 1:2)
}, error = identity)
print(res)
stopifnot(inherits(res, "simpleError"))
## Invalid length
seeds0 <- lapply(1:2, FUN = as_lecyer_cmrg_seed)
res <- tryCatch({
seeds <- make_rng_seeds(1L, seed = seeds0)
}, error = identity)
print(res)
stopifnot(inherits(res, "simpleError"))
message("*** RNG ... DONE")
source("incl/end.R")
future.apply/tests/future_lapply.R 0000644 0001762 0000144 00000007100 13602734532 017073 0 ustar ligges users source("incl/start.R")
library("listenv")
message("*** future_lapply() ...")
x_a <- list(a = "integer", b = "numeric", c = "character", c = "list")
str(list(x_a = x_a))
y_a <- lapply(x_a, FUN = base::vector, length = 2L)
str(list(y_a = y_a))
x_b <- list(a = c("hello", b = 1:100))
str(list(x_b = x_b))
y_b <- lapply(x_b, FUN = future:::hpaste, collapse = "; ", maxHead = 3L)
str(list(y_b = y_b))
x_c <- list()
y_c <- listenv()
y_c$A <- 3L
x_c$a <- y_c
y_c<- listenv()
y_c$A <- 3L
y_c$B <- c("hello", b = 1:100)
x_c$b <- y_c
print(x_c)
y_c <- lapply(x_c, FUN = listenv::map)
str(list(y_c = y_c))
for (cores in 1:availCores) {
message(sprintf("Testing with %d cores ...", cores))
options(mc.cores = cores)
strategies <- supportedStrategies(cores)
for (strategy in supportedStrategies()) {
message(sprintf("- plan('%s') ...", strategy))
plan(strategy)
for (scheduling in list(FALSE, TRUE, structure(TRUE, ordering = "random"), structure(TRUE, ordering = function(n) rev(seq_len(n))))) {
message("- future_lapply(x, FUN = vector, ...) ...")
y <- future_lapply(x_a, FUN = vector, length = 2L, future.scheduling = scheduling)
str(list(y = y))
stopifnot(identical(y, y_a))
message("- future_lapply(x, FUN = base::vector, ...) ...")
y <- future_lapply(x_a, FUN = base::vector, length = 2L, future.scheduling = scheduling)
str(list(y = y))
stopifnot(identical(y, y_a))
message("- future_lapply(x, FUN = future:::hpaste, ...) ...")
y <- future_lapply(x_b, FUN = future:::hpaste, collapse = "; ", maxHead = 3L, future.scheduling = scheduling)
str(list(y = y))
stopifnot(identical(y, y_b))
message("- future_lapply(x, FUN = listenv::listenv, ...) ...")
y <- future_lapply(x_c, FUN = listenv::map, future.scheduling = scheduling)
str(list(y = y))
stopifnot(identical(y, y_c))
} ## for (scheduling ...)
message("- future_lapply(x, FUN, ...) for large length(x) ...")
a <- 3.14
x_d <- 1:1e4
y <- future_lapply(x_d, FUN = function(z) sqrt(z + a))
y <- unlist(y, use.names = FALSE)
stopifnot(all.equal(y, sqrt(x_d + a)))
message("- future_lapply(x, FUN = table, ...) ...")
x <- list(a = 1:4, b = 5:8)
y0 <- lapply(x, FUN = table)
y1 <- future_lapply(x, FUN = table)
stopifnot(all.equal(y1, y0, check.attributes = FALSE)) ## FIXME
message("- future_lapply(x, ...) where length(x) != length(as.list(x)) ...")
x <- structure(list(a = 1, b = 2), class = "Foo")
as.list.Foo <- function(x, ...) c(x, c = 3)
y0 <- lapply(x, FUN = length)
y1 <- future_lapply(x, FUN = length)
stopifnot(identical(y1, y0))
} ## for (strategy ...)
message(sprintf("Testing with %d cores ... DONE", cores))
} ## for (cores ...)
message("*** future_lapply() - special cases ...")
X <- list()
names(X) <- character(0L)
y <- future_lapply(X, FUN = identity)
stopifnot(length(y) == 0L, !is.null(names(y)), identical(y, X))
X <- character(0L)
y0 <- lapply(X, FUN = identity)
y <- future_lapply(X, FUN = identity)
stopifnot(identical(y, y0))
X <- character(0L)
names(X) <- character(0L)
y0 <- lapply(X, FUN = identity)
y <- future_lapply(X, FUN = identity)
stopifnot(identical(y, y0))
message("*** future_lapply() - special cases ... DONE")
message("*** future_lapply() - exceptions ...")
res <- tryCatch({
future_lapply(1:3, FUN = identity, future.chunk.size = structure(1L, ordering = "invalid"))
}, error = identity)
stopifnot(inherits(res, "error"))
message("*** future_lapply() - exceptions ... DONE")
message("*** future_lapply() ... DONE")
source("incl/end.R")
future.apply/tests/future_vapply.R 0000644 0001762 0000144 00000011531 13443044773 017114 0 ustar ligges users source("incl/start.R")
message("*** future_vapply() ...")
for (strategy in supportedStrategies()) {
message(sprintf("*** strategy = %s ...", sQuote(strategy)))
plan(strategy)
x <- NULL
fun <- is.factor
fun_value <- logical(1L)
y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y0)
y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y1)
stopifnot(all.equal(y1, y0))
x <- list()
fun <- is.numeric
fun_value <- logical(1L)
y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y0)
y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y1)
stopifnot(all.equal(y1, y0))
x <- integer()
fun <- identity
fun_value <- fun(integer(1L))
y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y0)
y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y1)
stopifnot(all.equal(y1, y0))
df <- data.frame(x = 1:10, y = letters[1:10])
fun <- class
fun_value <- character(1L)
y0 <- vapply(df, FUN = fun, FUN.VALUE = fun_value)
str(y0)
y1 <- future_vapply(df, FUN = fun, FUN.VALUE = fun_value)
str(y1)
stopifnot(all.equal(y1, y0))
x <- 1:10
fun <- function(x) double(0L)
fun_value <- fun(double(1L))
y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y0)
y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y1)
stopifnot(all.equal(y1, y0))
fun <- function(x) integer(0L)
fun_value <- fun(double(1L))
y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y0)
y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y1)
stopifnot(all.equal(y1, y0))
fun <- sqrt
fun_value <- fun(double(1L))
y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y0)
y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y1)
stopifnot(all.equal(y1, y0))
fun <- function(x) c(x, x^2)
fun_value <- fun(double(1L))
y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y0)
y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y1)
stopifnot(all.equal(y1, y0))
fun <- function(x) matrix(x, nrow = 2L, ncol = 2L)
fun_value <- fun(integer(1L))
y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y0)
y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y1)
stopifnot(all.equal(y1, y0))
fun <- function(x) matrix(x, nrow = 2L, ncol = 2L)
fun_value <- fun(double(1L))
y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y0)
y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y1)
stopifnot(all.equal(y1, y0))
## Ditto with dimnames on FUN.VALUE
fun <- function(x) {
matrix(x, nrow = 2L, ncol = 2L, dimnames = list(c("a", "b"), c("A", "B")))
}
fun_value <- fun(double(1L))
y0 <- vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y0)
y1 <- future_vapply(x, FUN = fun, FUN.VALUE = fun_value)
str(y1)
stopifnot(all.equal(y1, y0))
message("- From example(vapply) ...")
x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE, FALSE, FALSE, TRUE))
y0 <- vapply(x, FUN = quantile, FUN.VALUE = double(5L))
y1 <- future_vapply(x, FUN = quantile, FUN.VALUE = double(5L))
str(y1)
stopifnot(all.equal(y1, y0))
i39 <- sapply(3:9, seq)
ys0 <- sapply(i39, fivenum)
ys1 <- future_sapply(i39, fivenum)
stopifnot(all.equal(ys1, ys0))
yv0 <- vapply(i39, fivenum,
c(Min. = 0, "1st Qu." = 0, Median = 0, "3rd Qu." = 0, Max. = 0))
yv1 <- future_vapply(i39, fivenum,
c(Min. = 0, "1st Qu." = 0, Median = 0, "3rd Qu." = 0, Max. = 0))
str(yv1)
stopifnot(all.equal(yv1, yv0))
v <- structure(10*(5:8), names = LETTERS[1:4])
f <- function(x, y) outer(rep(x, length.out = 3L), y)
ys0 <- sapply(v, f, y = 2*(1:5), simplify = "array")
ys1 <- future_sapply(v, f, y = 2*(1:5), simplify = "array")
stopifnot(all.equal(ys1, ys0))
fv <- outer(1:3, 1:5)
y <- 2*(1:5)
yv0 <- vapply(v, f, fv, y = y)
yv1 <- future_vapply(v, f, fv, y = y)
str(yv1)
stopifnot(all.equal(yv1, yv0))
y0 <- vapply(mtcars, FUN = is.numeric, FUN.VALUE = logical(1L))
y1 <- future_vapply(mtcars, FUN = is.numeric, FUN.VALUE = logical(1L))
str(y1)
stopifnot(all.equal(y1, y0))
message("- future_vapply(x, ...) where length(x) != length(as.list(x)) ...")
x <- structure(list(a = 1, b = 2), class = "Foo")
as.list.Foo <- function(x, ...) c(x, c = 3)
y0 <- vapply(x, FUN = length, FUN.VALUE = -1L)
y1 <- future_vapply(x, FUN = length, FUN.VALUE = -1L)
stopifnot(identical(y1, y0))
message("- exceptions ...")
res <- tryCatch({
y0 <- vapply(1:3, FUN = identity, FUN.VALUE = c(3, 3))
}, error = identity)
stopifnot(inherits(res, "error"))
res <- tryCatch({
y1 <- future_vapply(1:3, FUN = identity, FUN.VALUE = c(3, 3))
}, error = identity)
stopifnot(inherits(res, "error"))
plan(sequential)
message(sprintf("*** strategy = %s ... done", sQuote(strategy)))
} ## for (strategy in ...)
message("*** future_vapply() ... DONE")
source("incl/end.R")
future.apply/tests/future_by.R 0000644 0001762 0000144 00000005267 13443044773 016224 0 ustar ligges users source("incl/start.R")
library("listenv")
all_equal_but_call <- function(target, current, ...) {
attr(target, "call") <- NULL
attr(current, "call") <- NULL
all.equal(target = target, current = current, ...)
}
message("*** future_by() ...")
## ---------------------------------------------------------
## by()
## ---------------------------------------------------------
if (require("datasets") && require("stats")) { ## warpbreaks & lm()
## Use a local variable to test that it is properly exported, because
## 'warpbreaks' is available in all R sessions
data <- warpbreaks
y0 <- by(data[, 1:2], INDICES = data[,"tension"], FUN = summary)
y1 <- by(data[, 1], INDICES = data[, -1], FUN = summary, digits = 2L)
y2 <- by(data, INDICES = data[,"tension"], FUN = function(x, ...) {
lm(breaks ~ wool, data = x, ...)
}, singular.ok = FALSE)
## now suppose we want to extract the coefficients by group
tmp <- with(data, by(data, INDICES = tension, FUN = function(x) {
lm(breaks ~ wool, data = x)
}))
y3 <- sapply(tmp, coef)
## Source: {r-source}/tests/reg-tests-1d.R
by2 <- function(data, INDICES, FUN) {
by(data, INDICES = INDICES, FUN = FUN)
}
future_by2 <- function(data, INDICES, FUN) {
future_by(data, INDICES = INDICES, FUN = FUN)
}
y4 <- by2(data, INDICES = data[,"tension"], FUN = summary)
for (cores in 1:availCores) {
message(sprintf("Testing with %d cores ...", cores))
options(mc.cores = cores)
strategies <- supportedStrategies(cores)
for (strategy in supportedStrategies()) {
message(sprintf("- plan('%s') ...", strategy))
plan(strategy)
y0f <- future_by(data[, 1:2], INDICES = data[,"tension"], FUN = summary)
stopifnot(all_equal_but_call(y0f, y0, check.attributes = FALSE))
y1f <- future_by(data[, 1], INDICES = data[, -1], FUN = summary, digits = 2L)
stopifnot(all_equal_but_call(y1f, y1))
y2f <- future_by(data, INDICES = data[,"tension"], FUN = function(x, ...) {
lm(breaks ~ wool, data = x, ...)
}, singular.ok = FALSE)
stopifnot(all_equal_but_call(y2f, y2))
## now suppose we want to extract the coefficients by group
tmp <- with(data, future_by(data, INDICES = tension, FUN = function(x) {
lm(breaks ~ wool, data = x)
}))
y3f <- sapply(tmp, coef)
stopifnot(all_equal_but_call(y3f, y3))
y4f <- future_by2(data, INDICES = data[,"tension"], FUN = summary)
stopifnot(all_equal_but_call(y4f, y4))
} ## for (strategy ...)
message(sprintf("Testing with %d cores ... DONE", cores))
} ## for (cores ...)
} ## if (require("stats"))
message("*** future_by() ... DONE")
source("incl/end.R")
future.apply/tests/future_lapply,globals.R 0000644 0001762 0000144 00000016324 13557176376 020542 0 ustar ligges users source("incl/start.R")
library("tools") ## toTitleCase()
message("*** future_lapply() - globals ...")
plan(cluster, workers = "localhost")
options(future.debug = FALSE)
a <- 1
b <- 2
globals_set <- list(
A = FALSE,
B = TRUE,
C = c("a", "b"),
D = list(a = 2, b = 3)
)
x <- list(1)
y_truth <- list(A = NULL, B = list(1), C = list(1), D = list(2))
str(y_truth)
for (name in names(globals_set)) {
globals <- globals_set[[name]]
message("Globals set ", sQuote(name))
y <- tryCatch({
future_lapply(x, FUN = function(x) {
median(c(x, a, b))
}, future.globals = globals, future.packages = "utils")
}, error = identity)
print(y)
stopifnot((name == "A" && inherits(y, "error")) ||
identical(y, y_truth[[name]]))
}
message("*** future_lapply() - globals ... DONE")
message("*** future_lapply() - manual globals ...")
d <- 42
y <- future_lapply(1:2, FUN = function(x) { x * d },
future.globals = structure(FALSE, add = "d"))
stopifnot(identical(y, list(42, 84)))
e <- 42
res <- tryCatch({
future_lapply(1:2, FUN = function(x) { 2 * e },
future.globals = structure(TRUE, ignore = "e"))
}, error = identity)
stopifnot(inherits(res, "error"))
message("*** future_lapply() - manual globals ... DONE")
## Test adopted from http://stackoverflow.com/questions/42561088/nested-do-call-within-a-foreach-dopar-environment-cant-find-function-passed-w
message("*** future_lapply() - tricky globals ...")
my_add <- function(a, b) a + b
call_my_add <- function(a, b) {
do.call(my_add, args = list(a = a, b = b))
}
call_my_add_caller <- function(a, b, FUN = call_my_add) {
do.call(FUN, args = list(a = a, b = b))
}
main <- function(x = 1:2, caller = call_my_add_caller,
args = list(FUN = call_my_add)) {
results <- future_lapply(x, FUN = function(i) {
do.call(caller, args = c(list(a = i, b = i + 1L), args))
})
results
}
x <- list(list(1:2))
z_length <- lapply(x, FUN = do.call, what = length)
fun <- function(...) sum(...)
z_fun <- lapply(x, FUN = do.call, what = fun)
y0 <- NULL
for (strategy in supportedStrategies()) {
plan(strategy)
y <- main(1:3)
if (is.null(y0)) y0 <- y
stopifnot(identical(y, y0))
message("- future_lapply(x, FUN = do.call, ...) ...")
z <- future_lapply(x, FUN = do.call, what = length)
stopifnot(identical(z, z_length))
z <- future_lapply(x, FUN = do.call, what = fun)
stopifnot(identical(z, z_fun))
message("- future_lapply(x, ...) - passing arguments via '...' ...")
## typeof() == "list"
obj <- data.frame(a = 1:2)
stopifnot(typeof(obj) == "list")
y <- future_lapply(1L, function(a, b) typeof(b), b = obj)
stopifnot(identical(y[[1]], typeof(obj)))
## typeof() == "environment"
obj <- new.env()
stopifnot(typeof(obj) == "environment")
y <- future_lapply(1L, function(a, b) typeof(b), b = obj)
stopifnot(identical(y[[1]], typeof(obj)))
## typeof() == "S4"
if (requireNamespace("methods")) {
obj <- methods::getClass("MethodDefinition")
stopifnot(typeof(obj) == "S4")
y <- future_lapply(1L, function(a, b) typeof(b), b = obj)
stopifnot(identical(y[[1]], typeof(obj)))
}
message("- future_lapply(X, ...) - 'X' containing globals ...")
## From https://github.com/HenrikBengtsson/future.apply/issues/12
a <- 42
b <- 21
X <- list(
function(b) 2 * a,
function() b / 2,
function() a + b,
function() nchar(toTitleCase("hello world"))
)
z0 <- lapply(X, FUN = function(f) f())
str(z0)
z1 <- future_lapply(X, FUN = function(f) f())
str(z1)
stopifnot(identical(z1, z0))
## https://github.com/HenrikBengtsson/future.apply/issues/47
message("- future_lapply(X, ...) - '{ a <- a + 1; a }' ...")
a <- 1
z0 <- lapply(1, function(ii) {
a <- a + 1
a
})
z1 <- future_lapply(1, function(ii) {
a <- a + 1
a
})
stopifnot(identical(z1, z0))
## https://github.com/HenrikBengtsson/future.apply/issues/47
message("- future_lapply(X, ...) - '{ a; a <- a + 1 }' ...")
z2 <- tryCatch(future_lapply(1, function(ii) {
a
a <- a + 1
}), error = identity)
if (packageVersion("globals") <= "0.12.4" && strategy %in% c("multisession")) {
stopifnot(inherits(z2, "error"))
} else {
stopifnot(identical(z2, z0))
}
} ## for (strategy ...)
message("*** future_lapply() - tricky globals ... DONE")
message("*** future_lapply() - missing arguments ...")
## Here 'abc' becomes missing, i.e. missing(abc) is TRUE
foo <- function(x, abc) future_lapply(x, FUN = function(y) y)
y <- foo(1:2)
stopifnot(identical(y, as.list(1:2)))
message("*** future_lapply() - missing arguments ... DONE")
message("*** future_lapply() - false positives ...")
## Here 'abc' becomes a promise, which fails to resolve
## iff 'xyz' does not exist. (Issue #161)
suppressWarnings(rm(list = "xyz"))
foo <- function(x, abc) future_lapply(x, FUN = function(y) y)
y <- foo(1:2, abc = (xyz >= 3.14))
stopifnot(identical(y, as.list(1:2)))
message("*** future_lapply() - false positives ... DONE")
message("*** future_lapply() - too large ...")
oMaxSize <- getOption("future.globals.maxSize")
X <- replicate(10L, 1:100, simplify = FALSE)
FUN <- function(x) {
getOption("future.globals.maxSize")
}
y0 <- lapply(X, FUN = FUN)
stopifnot(all(sapply(y0, FUN = identical, oMaxSize)))
sizes <- unclass(c(FUN = object.size(FUN), X = object.size(X)))
cat(sprintf("Baseline size of globals: %.2f KiB\n", sizes[["FUN"]] / 1024))
message("- true positive ...")
options(future.globals.maxSize = 1L)
res <- tryCatch({
y <- future_lapply(X, FUN = FUN)
}, error = identity)
stopifnot(inherits(res, "error"))
res <- NULL
options(future.globals.maxSize = oMaxSize)
maxSize <- getOption("future.globals.maxSize")
y <- future_lapply(X, FUN = FUN)
str(y)
stopifnot(all(sapply(y, FUN = identical, oMaxSize)))
message("- approximately invariant to chunk size ...")
maxSize <- sizes[["FUN"]] + sizes[["X"]] / length(X)
options(future.globals.maxSize = maxSize)
for (chunk.size in c(1L, 2L, 5L, 10L)) {
y <- future_lapply(X, FUN = FUN, future.chunk.size = chunk.size)
str(y)
stopifnot(all(unlist(y) == maxSize))
cat(sprintf("maxSize = %g bytes\nfuture.globals.maxSize = %g bytes\n",
maxSize, getOption("future.globals.maxSize")))
stopifnot(getOption("future.globals.maxSize") == maxSize)
}
y <- NULL
options(future.globals.maxSize = oMaxSize)
message("*** future_lapply() - too large ... DONE")
message("*** future_lapply() - globals exceptions ...")
res <- tryCatch({
y <- future_lapply(1, FUN = function(x) x, future.globals = 42)
}, error = identity)
stopifnot(inherits(res, "error"))
res <- tryCatch({
y <- future_lapply(1, FUN = function(x) x, future.globals = list(1))
}, error = identity)
stopifnot(inherits(res, "error"))
res <- tryCatch({
y <- future_lapply(1, FUN = function(x) x, future.globals = "...future.FUN")
}, error = identity)
stopifnot(inherits(res, "error"))
res <- tryCatch({
y <- future_lapply(1, FUN = function(x) x, future.globals = "...future.FUN")
}, error = identity)
stopifnot(inherits(res, "error"))
...future.elements_ii <- 42L
X <- list(function() 2 * ...future.elements_ii)
res <- tryCatch({
y <- future_lapply(X, FUN = function(f) f())
}, error = identity)
stopifnot(inherits(res, "error"))
message("*** future_lapply() - globals exceptions ... DONE")
source("incl/end.R")
future.apply/vignettes/ 0000755 0001762 0000144 00000000000 13604743022 014721 5 ustar ligges users future.apply/vignettes/future.apply-1-overview.md.rsp 0000644 0001762 0000144 00000015305 13440257025 022513 0 ustar ligges users <%@meta language="R-vignette" content="--------------------------------
%\VignetteIndexEntry{A Future for R: Apply Function to Elements in Parallel}
%\VignetteAuthor{Henrik Bengtsson}
%\VignetteKeyword{R}
%\VignetteKeyword{package}
%\VignetteKeyword{vignette}
%\VignetteKeyword{future}
%\VignetteKeyword{lazy evaluation}
%\VignetteKeyword{synchronous}
%\VignetteKeyword{asynchronous}
%\VignetteKeyword{parallel}
%\VignetteKeyword{cluster}
%\VignetteEngine{R.rsp::rsp}
%\VignetteTangle{FALSE}
Do not edit the *.md.rsp file. Instead edit the *.md.rsp.rsp (sic!)
file found under inst/vignettes-static/ of the source package.
--------------------------------------------------------------------"%>
# A Future for R: Apply Function to Elements in Parallel
## Introduction
The purpose of this package is to provide worry-free parallel alternatives to base-R "apply" functions, e.g. `apply()`, `lapply()`, and `vapply()`. The goal is that one should be able to replace any of these in the core with its futurized equivalent and things will just work. For example, instead of doing:
```r
library("stats")
x <- 1:10
y <- lapply(x, FUN = quantile, probs = 1:3/4)
```
one can do:
```r
library("future.apply")
plan(multiprocess) ## Run in parallel on local computer
library("stats")
x <- 1:10
y <- future_lapply(x, FUN = quantile, probs = 1:3/4)
```
Reproducibility is part of the core design, which means that perfect, parallel random number generation (RNG) is supported regardless of the amount of chunking, type of load balancing, and future backend being used. _To enable parallel RNG, use argument `future.seed = TRUE`._
## Role
Where does the [future.apply] package fit in the software stack? You can think of it as a sibling to [foreach], [BiocParallel], [plyr], etc. Just as parallel provides `parLapply()`, foreach provides `foreach()`, BiocParallel provides `bplapply()`, and plyr provides `llply()`, future.apply provides `future_lapply()`. Below is a table summarizing this idea:
Package |
Functions |
Backends |
future.apply
|
Future-versions of common goto *apply() functions available in base R (of the 'base' package):
future_apply() ,
future_by() ,
future_eapply() ,
future_lapply() ,
future_Map() ,
future_mapply() ,
future_replicate() ,
future_sapply() ,
future_tapply() , and
future_vapply() .
The following function is yet not implemented:
future_rapply()
|
All future backends
|
parallel
|
mclapply() , mcmapply() ,
clusterMap() , parApply() , parLapply() , parSapply() , ...
|
Built-in and conditional on operating system
|
foreach
|
foreach() ,
times()
|
All future backends via doFuture
|
BiocParallel
|
Bioconductor's parallel mappers:
bpaggregate() ,
bpiterate() ,
bplapply() , and
bpvec()
|
All future backends via doFuture (because it supports foreach) or via BiocParallel.FutureParam (direct BiocParallelParam support; prototype)
|
plyr
|
**ply(..., .parallel = TRUE) functions:
aaply() ,
ddply() ,
dlply() ,
llply() , ...
|
All future backends via doFuture (because it uses foreach internally)
|
Note that, except for the built-in parallel package, none of these higher-level APIs implement their own parallel backends, but they rather enhance existing ones. The foreach framework leverages backends such as [doParallel], [doMC] and [doFuture], and the future.apply framework leverages the [future] ecosystem and therefore backends such as built-in parallel, [future.callr], and [future.batchtools].
By separating `future_lapply()` and friends from the [future] package, it helps clarifying the purpose of the future package, which is to define and provide the core Future API, which higher-level parallel APIs can build on and for which any futurized parallel backends can be plugged into.
## Roadmap
1. Implement `future_*apply()` versions for all common `*apply()` functions that exist in base R. This also involves writing a large set of package tests asserting the correctness and the same behavior as the corresponding `*apply()` functions.
2. Harmonize all `future_*apply()` functions with each other, e.g. the future-specific arguments.
3. Consider additional `future_*apply()` functions and features that fit in this package but don't necessarily have a corresponding function in base R. Examples of this may be "apply" functions that return futures rather than values, mechanisms for benchmarking, and richer control over load balancing.
The API and identity of the future.apply package will be kept close to the `*apply()` functions in base R. In other words, it will _neither_ keep growing nor be expanded with new, more powerful apply-like functions beyond those core ones in base R. Such extended functionality should be part of a separate package.
[BatchJobs]: https://cran.r-project.org/package=BatchJobs
[batchtools]: https://cran.r-project.org/package=batchtools
[BiocParallel]: https://bioconductor.org/packages/release/bioc/html/BiocParallel.html
[doFuture]: https://cran.r-project.org/package=doFuture
[doMC]: https://cran.r-project.org/package=doMC
[doParallel]: https://cran.r-project.org/package=doParallel
[foreach]: https://cran.r-project.org/package=foreach
[future]: https://cran.r-project.org/package=future
[future.apply]: https://cran.r-project.org/package=future.apply
[future.BatchJobs]: https://cran.r-project.org/package=future.BatchJobs
[future.batchtools]: https://cran.r-project.org/package=future.batchtools
[future.callr]: https://cran.r-project.org/package=future.callr
[plyr]: https://cran.r-project.org/package=plyr
---
Copyright Henrik Bengtsson, 2017-2019
future.apply/NEWS 0000644 0001762 0000144 00000012307 13604742353 013421 0 ustar ligges users Package: future.apply
=====================
Version: 1.4.0 [2020-01-06]
NEW FEATURES:
* Now all future_nnn() functions set a label on each future that reflects the
name of the future_nnn() function and the index of the chunk, e.g.
'future_lapply-3'. The format can be controlled by argument 'future.label'.
* PERFORMANCE: The assertion of the maximum size of globals per chunk is now
significantly faster for future_apply().
BUG FIXES:
* future_lapply(X) and future_mapply(FUN, X) would drop 'names' argument of
the returned empty list when length(X) == 0.
* Package could set '.Random.seed' to NULL, instead of removing it, which in
turn would produce a warning on "'.Random.seed' is not an integer vector but
of type 'NULL', so ignored" when the next random number generated.
Version: 1.3.0 [2019-06-17]
NEW FEATURES:
* Now 'future.conditions' defaults to the same as argument 'conditions'
of future::future(). If the latter changes, this package will follow.
* Debug messages are now prepended with a timestamp.
BUG FIXES:
* The error "sprintf(...) : 'fmt' length exceeds maximal format length 8192"
could be produced when debugging tried to report on too many globals.
Version: 1.2.0 [2019-03-06]
NEW FEATURES:
* Added future_by().
BUG FIXES:
* Attributes 'add' and 'ignore' of argument 'future.globals' were ignored
although support for them was added in future (>= 1.10.0).
* Validation of L'Ecuyer-CMRG RNG seeds failed in recent R devel.
Version: 1.1.0 [2019-01-16]
SIGNIFICANT CHANGES:
* Added argument 'future.stdout' and 'future.conditions' for controlling
whether standard output and conditions (e.g. messages and warnings) produced
during the evaluation of futures should be captured and relayed or not.
Standard output is guaranteed to be relayed in the same order as it would
when using sequential processing. Analogously for conditions. However,
standard output is always relayed before conditions. Errors are always
relayed. Relaying of non-error conditions requires future (>= 1.11.0).
NEW FEATURES:
* Elements can be processed in random order by setting attribute 'ordering'
to "random" of argument 'future.chunk.size' or 'future.scheduling', e.g.
future.chunk.size = structure(TRUE, ordering = "random"). This can help
improve load balancing in cases where there is a correlation between
processing time and ordering of the elements. Note that the order of the
returned values is not affected when randomizing the processing order.
* Swapped order of arguments 'future.lazy' and 'future.seed' to be consistent
with ditto arguments of future::future().
Version: 1.0.1 [2018-08-26]
DOCUMENTATION / LICENCE:
* The license is GPL (>= 2). Previously it was documented as GPL (>= 2.1)
but that is a non-existing GPL version.
BUG FIXES:
* For list objects 'X' where X != as.list(X), future_lapply(X) did not
give the same result as lapply(X). Analogously for future_vapply(X).
* future_mapply() could drop class attribute on elements iterated over,
because .subset() was used internally instead of `[`(). For instance,
iteration over Date objects were affected.
Version: 1.0.0 [2018-06-19]
SIGNIFICANT CHANGES:
* License changed from LGPL (>= 2.1) to GPL (>= 2) to make sure it is
compatible with the source code adopted from R base's apply(), Map(),
replicate(), sapply(), and tapply(), which are all GPL (>= 2).
NEW FEATURES:
* Added future_apply(), future_mapply(), and future_Map().
* Added argument `future.chunk.size` as an alternative to argument
`future.scheduling` for controlling the average number of elements
processed per future ("chunk"). In R 3.5.0, the parallel package
introduced argument 'chunk.size'.
* The maximum total size of globals allowed (option 'future.globals.maxSize')
per future ("chunk") is now scaled up by the number of elements processed by
the future ("chunk") making the protection approximately invariant to the
amount of chunking (arguments 'future.scheduling' and 'future.chunk.size').
BUG FIXES:
* future_lapply(X, ...) did not search for globals in 'X'.
* future_vapply() did not return the same dimension names as vapply() when
FUN.VALUE had no names but FUN(X[[1]]) had.
SOFTWARE QUALITY:
* Test code coverage is 100%.
Version: 0.2.0 [2018-05-01]
NEW FEATURES:
* Added future_eapply(), future_tapply(), future_vapply(), and
future_replicate().
Version: 0.1.0 [2018-01-15]
* Package submitted to CRAN.
Version: 0.0.3 [2017-12-06]
DOCUMENTATION:
* Vignette now covers the basics of the package and describes its role
in the R package ecosystem together with a road map going forward.
SOFTWARE QUALITY:
* Added more package tests. Code coverage is currently at 100%.
Version: 0.0.2 [2017-12-06]
NEW FEATURES:
* PERFORMANCE: future_lapply(x, ...) is now much faster and more memory
efficient for large 'x' vectors because it uses internal fold() function
that is more efficient (memory and speed) version of base::Reduce(f, x),
especially when length(x) is large.
Version: 0.0.0-9000 [2017-08-31]
NEW FEATURES:
* Added future_sapply().
* Added future_lapply() - originally from the future package.
* Created package.
future.apply/R/ 0000755 0001762 0000144 00000000000 13603014217 013106 5 ustar ligges users future.apply/R/fold.R 0000644 0001762 0000144 00000005010 13443044736 014164 0 ustar ligges users #' Efficient Fold, Reduce, Accumulate, Combine of a Vector
#'
#' @param x A vector.
#'
#' @param f A binary function, i.e. a function take takes two arguments.
#'
#' @param left If `TRUE`, vector is combined from the left (the first element),
#' otherwise the right (the last element).
#'
#' @param unname If `TRUE`, function `f` is called as
#' \code{f(unname(y), x[[ii]])}, otherwise as \code{f(y, x[[ii]])},
#' which may introduce name `"y"`.
#'
#' @param threshold An integer (>= 2) specifying the length where the
#' recursive divide-and-conquer call will stop and incremental building of
#' the partial value is performed. Using `threshold = +Inf` will disable
#' recursive folding.
#'
#' @return A vector.
#'
#' @details
#' In order for recursive folding to give the same results as non-recursive
#' folding, binary function `f` must be _associative_ with itself, i.e.
#' \code{f(f(x[[1]], x[[2]]), x[[3]])} equals
#' \code{f(x[[1]], f(x[[2]]), x[[3]])}.
#'
#' This function is a more efficient (memory and speed) of
#' \code{\link[base:Reduce]{Reduce(f, x, right = !left, accumulate = FALSE)}},
#' especially when `x` is long.
#'
#' @keywords internal
fold <- function(x, f, left = TRUE, unname = TRUE, threshold = 1000L) {
f <- match.fun(f)
n <- length(x)
if (n == 0L) return(NULL)
if (!is.vector(x) || is.object(x)) x <- as.list(x)
if (n == 1L) return(x[[1]])
stop_if_not(length(left) == 1, is.logical(left), !is.na(left))
stop_if_not(length(threshold) == 1, is.numeric(threshold),
!is.na(threshold), threshold >= 2)
if (n >= threshold) {
## Divide and conquer, i.e. split, build the two parts, and merge
n_mid <- n %/% 2
y_left <- Recall(f = f, x = x[ 1:n_mid], left = left,
unname = unname, threshold = threshold)
y_right <- Recall(f = f, x = x[(n_mid+1L):n], left = left,
unname = unname, threshold = threshold)
y <- f(y_left, y_right)
y_left <- y_right <- NULL
} else {
## Incrementally build result vector
if (left) {
y <- x[[1L]]
if (unname) {
for (ii in 2:n)
y <- forceAndCall(n = 2L, FUN = f, unname(y), x[[ii]])
} else {
for (ii in 2:n)
y <- forceAndCall(n = 2L, FUN = f, y, x[[ii]])
}
} else {
y <- x[[n]]
if (unname) {
for (ii in (n-1):1)
y <- forceAndCall(n = 2L, FUN = f, x[[ii]], unname(y))
} else {
for (ii in (n-1):1)
y <- forceAndCall(n = 2L, FUN = f, x[[ii]], y)
}
}
}
y
}
future.apply/R/future_tapply.R 0000644 0001762 0000144 00000004077 13602733704 016154 0 ustar ligges users #' @inheritParams future_lapply
#'
#' @param INDEX A list of one or more factors, each of same length as `X`.
#' The elements are coerced to factors by `as.factor()`.
#' See also [base::tapply()].
#'
#' @param default See [base::tapply()].
#'
#' @return
#' `future_tapply()` returns an array with mode `"list"`, unless
#' `simplify = TRUE` (default) _and_ `FUN` returns a scalar, in which
#' case the mode of the array is the same as the returned scalars.
#' See [base::tapply()] for details.
#'
#' @rdname future_lapply
#' @export
future_tapply <- function(X, INDEX, FUN = NULL, ...,
default = NA, simplify = TRUE,
future.label = "future_tapply-%d") {
FUN <- if (!is.null(FUN))
match.fun(FUN)
if (!is.list(INDEX))
INDEX <- list(INDEX)
INDEX <- lapply(INDEX, FUN = as.factor)
nI <- length(INDEX)
if (!nI)
stop("'INDEX' is of length zero")
if (!all(lengths(INDEX) == length(X)))
stop("arguments must have same length")
namelist <- lapply(INDEX, FUN = levels)
extent <- lengths(namelist, use.names = FALSE)
cumextent <- cumprod(extent)
if (cumextent[nI] > .Machine$integer.max)
stop("total number of levels >= 2^31")
storage.mode(cumextent) <- "integer"
ngroup <- cumextent[nI]
group <- as.integer(INDEX[[1L]])
if (nI > 1L) {
for (i in 2L:nI) {
group <- group + cumextent[i - 1L] * (as.integer(INDEX[[i]]) - 1L)
}
}
if (is.null(FUN)) return(group)
levels(group) <- as.character(seq_len(ngroup))
class(group) <- "factor"
ans <- split(X, f = group)
names(ans) <- NULL
index <- as.logical(lengths(ans))
ans <- future_lapply(X = ans[index], FUN = FUN, ..., future.label = future.label)
ansmat <- array({
if (simplify && all(lengths(ans) == 1L)) {
ans <- unlist(ans, recursive = FALSE, use.names = FALSE)
if (!is.null(ans) && is.na(default) && is.atomic(ans))
vector(typeof(ans))
else
default
} else {
vector("list", prod(extent))
}
}, dim = extent, dimnames = namelist)
if (length(ans) > 0L) ansmat[index] <- ans
ansmat
}
future.apply/R/utils.R 0000644 0001762 0000144 00000011434 13602733704 014404 0 ustar ligges users isFALSE <- function(x) {
is.logical(x) && length(x) == 1L && !is.na(x) && !x
}
isNA <- function(x) {
is.logical(x) && length(x) == 1L && is.na(x)
}
stop_if_not <- function(...) {
res <- list(...)
for (ii in 1L:length(res)) {
res_ii <- .subset2(res, ii)
if (length(res_ii) != 1L || is.na(res_ii) || !res_ii) {
mc <- match.call()
call <- deparse(mc[[ii + 1]], width.cutoff = 60L)
if (length(call) > 1L) call <- paste(call[1L], "....")
stop(sprintf("%s is not TRUE", sQuote(call)),
call. = FALSE, domain = NA)
}
}
NULL
}
## From R.utils 2.0.2 (2015-05-23)
hpaste <- function(..., sep = "", collapse = ", ", lastCollapse = NULL, maxHead = if (missing(lastCollapse)) 3 else Inf, maxTail = if (is.finite(maxHead)) 1 else Inf, abbreviate = "...") {
if (is.null(lastCollapse)) lastCollapse <- collapse
# Build vector 'x'
x <- paste(..., sep = sep)
n <- length(x)
# Nothing todo?
if (n == 0) return(x)
if (is.null(collapse)) return(x)
# Abbreviate?
if (n > maxHead + maxTail + 1) {
head <- x[seq_len(maxHead)]
tail <- rev(rev(x)[seq_len(maxTail)])
x <- c(head, abbreviate, tail)
n <- length(x)
}
if (!is.null(collapse) && n > 1) {
if (lastCollapse == collapse) {
x <- paste(x, collapse = collapse)
} else {
xT <- paste(x[1:(n-1)], collapse = collapse)
x <- paste(xT, x[n], sep = lastCollapse)
}
}
x
} # hpaste()
now <- function(x = Sys.time(), format = "[%H:%M:%OS3] ") {
## format(x, format = format) ## slower
format(as.POSIXlt(x, tz = ""), format = format)
}
mdebug <- function(..., debug = getOption("future.debug", FALSE)) {
if (!debug) return()
message(now(), ...)
}
mdebugf <- function(..., appendLF = TRUE,
debug = getOption("future.debug", FALSE)) {
if (!debug) return()
message(now(), sprintf(...), appendLF = appendLF)
}
#' @importFrom utils capture.output
mprint <- function(..., appendLF = TRUE, debug = getOption("future.debug", FALSE)) {
if (!debug) return()
message(paste(now(), capture.output(print(...)), sep = "", collapse = "\n"), appendLF = appendLF)
}
#' @importFrom utils capture.output
mstr <- function(..., appendLF = TRUE, debug = getOption("future.debug", FALSE)) {
if (!debug) return()
message(paste(now(), capture.output(str(...)), sep = "", collapse = "\n"), appendLF = appendLF)
}
## When 'default' is specified, this is 30x faster than
## base::getOption(). The difference is that here we use
## use names(.Options) whereas in 'base' names(options())
## is used.
getOption <- local({
go <- base::getOption
function(x, default = NULL) {
if (missing(default) || match(x, table = names(.Options), nomatch = 0L) > 0L) go(x) else default
}
}) ## getOption()
import_from <- function(name, default = NULL, package) {
ns <- getNamespace(package)
if (exists(name, mode = "function", envir = ns, inherits = FALSE)) {
get(name, mode = "function", envir = ns, inherits = FALSE)
} else if (!is.null(default)) {
default
} else {
stop(sprintf("No such '%s' function: %s()", package, name))
}
}
import_future <- function(name, default = NULL) {
import_from(name, default = default, package = "future")
}
assert_values2 <- function(nX, values, values2, fcn_name, debug = FALSE) {
if (length(values2) != nX) {
chunk_sizes <- sapply(values, FUN = length)
chunk_sizes <- table(chunk_sizes)
chunk_summary <- sprintf("%d chunks with %s elements",
chunk_sizes, names(chunk_sizes))
chunk_summary <- paste(chunk_summary, collapse = ", ")
msg <- sprintf("Unexpected error in %s(): After gathering and merging the values from %d chunks in to a list, the total number of elements (= %d) does not match the number of input elements in 'X' (= %d). There were in total %d chunks and %d elements (%s)", fcn_name, length(values), length(values2), nX, length(values), sum(chunk_sizes), chunk_summary)
if (debug) {
mdebug(msg)
mprint(chunk_sizes)
mdebug("Results before merge chunks:")
mstr(values)
mdebug("Results after merge chunks:")
mstr(values2)
}
msg <- sprintf("%s. Example of the first few values: %s", msg,
paste(capture.output(str(head(values2, 3L))),
collapse = "\\n"))
ex <- FutureError(msg)
stop(ex)
}
}
stealth_sample.int <- function(n, size = n, replace = FALSE, ...) {
oseed <- .GlobalEnv$.Random.seed
on.exit({
if (is.null(oseed)) {
rm(list = ".Random.seed", envir = .GlobalEnv, inherits = FALSE)
} else {
.GlobalEnv$.Random.seed <- oseed
}
})
sample.int(n = n, size = size, replace = replace, ...)
}
#' @importFrom utils packageVersion
future_version <- local({
ver <- NULL
function() {
if (is.null(ver)) ver <<- packageVersion("future")
ver
}
})
future.apply/R/future_eapply.R 0000644 0001762 0000144 00000001306 13602733704 016125 0 ustar ligges users #' @inheritParams future_lapply
#'
#' @param env An \R environment.
#'
#' @param all.names If `TRUE`, the function will also be applied to variables
#' that start with a period (`.`), otherwise not.
#' See [base::eapply()] for details.
#'
#' @return
#' A named (unless `USE.NAMES = FALSE`) list.
#' See [base::eapply()] for details.
#'
#' @rdname future_lapply
#' @export
future_eapply <- function(env, FUN, ..., all.names = FALSE, USE.NAMES = TRUE, future.label = "future_eapply-%d") {
names <- ls(envir = env, all.names = all.names, sorted = FALSE)
X <- mget(names, envir = env, inherits = FALSE)
if (!USE.NAMES) names(X) <- NULL
future_lapply(X = X, FUN = FUN, ..., future.label = future.label)
}
future.apply/R/future_mapply.R 0000644 0001762 0000144 00000013170 13602733704 016137 0 ustar ligges users #' Apply a Function to Multiple List or Vector Arguments
#'
#' `future_mapply()` implements [base::mapply()] using futures with perfect
#' replication of results, regardless of future backend used.
#' Analogously to `mapply()`, `future_mapply()` is a multivariate version of
#' `future_sapply()`.
#' It applies `FUN` to the first elements of each `\ldots` argument,
#' the second elements, the third elements, and so on.
#' Arguments are recycled if necessary.
#'
#' @inheritParams future_lapply
#'
#' @param FUN A function to apply, found via [base::match.fun()].
#'
#' @param \ldots Arguments to vectorize over (vectors or lists of strictly
#' positive length, or all of zero length).
#'
#' @param MoreArgs A list of other arguments to `FUN`.
#'
#' @param SIMPLIFY A logical or character string; attempt to reduce the
#' result to a vector, matrix or higher dimensional array; see the simplify
#' argument of [base::sapply()].
#'
#' @param USE.NAMES A logical; use names if the first `\ldots` argument has
#' names, or if it is a character vector, use that character vector as the
#' names.
#'
#' @param future.globals A logical, a character vector, or a named list for
#' controlling how globals are handled.
#' For details, see [future_lapply()].
#'
#' @param future.seed A logical or an integer (of length one or seven), or
#' a list of `max(lengths(list(...)))` with pre-generated random seeds.
#' For details, see [future_lapply()].
#'
#' @return
#' `future_mapply() returns a list, or for `SIMPLIFY = TRUE`, a vector,
#' array or list. See [base::mapply()] for details.
#'
#' @example incl/future_mapply.R
#'
#' @keywords manip programming iteration
#'
#' @importFrom globals globalsByName
#' @importFrom future Future future resolve values as.FutureGlobals nbrOfWorkers getGlobalsAndPackages FutureError
#' @importFrom utils head str
#' @export
future_mapply <- function(FUN, ..., MoreArgs = NULL, SIMPLIFY = TRUE, USE.NAMES = TRUE, future.stdout = TRUE, future.conditions = NULL, future.globals = TRUE, future.packages = NULL, future.lazy = FALSE, future.seed = FALSE, future.scheduling = 1.0, future.chunk.size = NULL, future.label = "future_mapply-%d") {
fcn_name <- "future_mapply"
args_name <- "..."
FUN <- match.fun(FUN)
dots <- list(...)
## Nothing to do?
if (length(dots) == 0L) return(list())
ns <- lengths(dots)
## Nothing to do?
if (all(ns == 0L)) {
if (!USE.NAMES) return(list())
values <- list()
first <- dots[[1]]
names <- names(first)
if (is.null(names) && is.character(first)) names <- first
names(values) <- names
return(values)
}
stop_if_not(all(ns > 0L))
## Recycle?
nX <- max(ns)
stretch <- which(ns < nX)
if (length(stretch) > 0L) {
for (kk in stretch) dots[[kk]] <- rep(dots[[kk]], length.out = nX)
ns <- lengths(dots)
}
stop_if_not(all(ns == nX))
stop_if_not(is.null(MoreArgs) || is.list(MoreArgs))
debug <- getOption("future.debug", FALSE)
if (debug) mdebugf("%s() ...", fcn_name)
## NOTE TO SELF: We'd ideally have a 'future.envir' argument also for
## this function, cf. future(). However, it's not yet clear to me how
## to do this, because we need to have globalsOf() to search for globals
## from the current environment in order to identify the globals of
## arguments 'FUN' and '...'. /HB 2017-03-10
future.envir <- environment() ## Not used; just to clarify the above.
envir <- future.envir
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Future expression
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (is.null(future.seed) || isFALSE(future.seed) || isNA(future.seed)) {
expr <- bquote({
args <- c(list(FUN = ...future.FUN), ...future.elements_ii, MoreArgs = list(MoreArgs), SIMPLIFY = FALSE, USE.NAMES = FALSE)
do.call(mapply, args = args)
})
} else {
expr <- bquote({
...future.FUN2 <- function(..., ...future.seeds_ii_jj) {
assign(".Random.seed", ...future.seeds_ii_jj, envir = globalenv(), inherits = FALSE)
...future.FUN(...)
}
args <- c(list(FUN = ...future.FUN2), ...future.elements_ii, list(...future.seeds_ii_jj = ...future.seeds_ii), MoreArgs, SIMPLIFY = FALSE, USE.NAMES = FALSE)
do.call(mapply, args = args)
})
}
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Process
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
values <- future_xapply(
FUN = FUN,
nX = nX,
chunk_args = dots,
MoreArgs = MoreArgs,
get_chunk = function(X, chunk) lapply(X, FUN = `[`, chunk),
expr = expr,
envir = envir,
future.globals = future.globals,
future.packages = future.packages,
future.scheduling = future.scheduling,
future.chunk.size = future.chunk.size,
future.stdout = future.stdout,
future.conditions = future.conditions,
future.seed = future.seed,
future.lazy = future.lazy,
future.label = future.label,
fcn_name = fcn_name,
args_name = args_name,
debug = debug
)
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Reduce
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (USE.NAMES && length(dots) > 0L) {
if (is.null(names1 <- names(dots[[1L]])) && is.character(dots[[1L]])) {
names(values) <- dots[[1L]]
} else if (!is.null(names1)) {
names(values) <- names1
}
}
if (!isFALSE(SIMPLIFY) && length(values) > 0L) {
values <- simplify2array(values, higher = (SIMPLIFY == "array"))
}
if (debug) mdebugf("%s() ... DONE", fcn_name)
values
}
future.apply/R/globals.R 0000644 0001762 0000144 00000006475 13603010225 014663 0 ustar ligges users getGlobalsAndPackagesXApply <- function(FUN, args = NULL, MoreArgs = NULL, envir, future.globals = TRUE, future.packages = NULL, debug = getOption("future.debug", FALSE)) {
use_args <- !is.null(args)
packages <- NULL
globals <- future.globals
scanForGlobals <- FALSE
if (is.logical(globals)) {
## Gather all globals?
if (globals) {
if (debug) mdebug("Finding globals ...")
scanForGlobals <- TRUE
expr <- do.call(call, args = c(list("FUN"),
if (use_args) args else MoreArgs))
} else {
expr <- NULL
attr(globals, "add") <- c(attr(globals, "add"),
c("FUN", if (use_args) "..." else "MoreArgs"))
}
gp <- getGlobalsAndPackages(expr, envir = envir, globals = globals)
globals <- gp$globals
packages <- gp$packages
gp <- NULL
if (debug) {
mdebugf(" - globals found/used: [%d] %s", length(globals), hpaste(sQuote(names(globals))))
mdebugf(" - needed namespaces: [%d] %s", length(packages), hpaste(sQuote(packages)))
mdebug("Finding globals ... DONE")
}
} else if (is.character(globals)) {
globals <- unique(c(globals, "FUN", if (use_args) "..." else "MoreArgs"))
globals <- globalsByName(globals, envir = envir, mustExist = FALSE)
} else if (is.list(globals)) {
names <- names(globals)
if (length(globals) > 0 && is.null(names)) {
stop("Invalid argument 'future.globals'. All globals must be named.")
}
} else {
stop("Invalid argument 'future.globals': ", mode(globals))
}
globals <- as.FutureGlobals(globals)
stop_if_not(inherits(globals, "FutureGlobals"))
names <- names(globals)
if (!is.element("FUN", names)) {
globals <- c(globals, FUN = FUN)
}
if (use_args) {
if (!is.element("...", names)) {
objectSize <- import_future("objectSize")
if (debug) mdebug("Getting '...' globals ...")
dotdotdot <- globalsByName("...", envir = envir, mustExist = TRUE)
dotdotdot <- as.FutureGlobals(dotdotdot)
dotdotdot <- resolve(dotdotdot)
attr(dotdotdot, "total_size") <- objectSize(dotdotdot)
if (debug) mdebug("Getting '...' globals ... DONE")
globals <- c(globals, dotdotdot)
}
} else if (!is.element("MoreArgs", names)) {
globals <- c(globals, list(MoreArgs = MoreArgs))
}
## Assert there are no reserved variables names among globals
reserved <- intersect(c("...future.FUN", "...future.elements_ii",
"...future.seeds_ii"), names)
if (length(reserved) > 0) {
stop("Detected globals using reserved variables names: ",
paste(sQuote(reserved), collapse = ", "))
}
## Avoid FUN() clash with mapply(..., FUN) below.
names <- names(globals)
names[names == "FUN"] <- "...future.FUN"
names(globals) <- names
if (debug) {
mdebug("Globals to be used in all futures:")
mstr(globals)
}
if (!is.null(future.packages)) {
stop_if_not(is.character(future.packages))
future.packages <- unique(future.packages)
stop_if_not(!anyNA(future.packages), all(nzchar(future.packages)))
packages <- unique(c(packages, future.packages))
}
if (debug) {
mdebug("Packages to be attached in all futures:")
mstr(packages)
}
list(globals = globals, packages = packages, scanForGlobals = scanForGlobals)
} ## findGlobalsStep1()
future.apply/R/future_xapply.R 0000644 0001762 0000144 00000023162 13603011101 016131 0 ustar ligges users #' @importFrom future nbrOfWorkers future resolve values as.FutureGlobals getGlobalsAndPackages
future_xapply <- function(FUN, nX, chunk_args, args = NULL, MoreArgs = NULL, expr, envir, future.globals, future.packages, future.scheduling, future.chunk.size, future.stdout, future.conditions, future.seed, future.lazy, future.label, get_chunk, fcn_name, args_name, ..., debug) {
stop_if_not(is.function(FUN))
stop_if_not(is.logical(future.stdout), length(future.stdout) == 1L)
## FIXME: Memoize the result
if (is.null(future.conditions)) {
future.conditions <- eval(formals(Future)[["conditions"]])
}
stop_if_not(is.logical(future.lazy), length(future.lazy) == 1L)
stop_if_not(length(future.scheduling) == 1L, !is.na(future.scheduling),
is.numeric(future.scheduling) || is.logical(future.scheduling))
stop_if_not(length(future.label) == 1L, !is.na(future.label),
is.logical(future.label) || is.character(future.label))
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Reproducible RNG (for sequential and parallel processing)
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
seeds <- make_rng_seeds(nX, seed = future.seed, debug = debug)
## Future expression (with or without setting the RNG state) and
## pass possibly tweaked 'future.seed' to future()
if (is.null(seeds)) {
stop_if_not(is.null(future.seed) || isFALSE(future.seed))
if (isFALSE(future.seed) && future_version() <= "1.15.1") {
future.seed <- NULL
}
} else {
## If RNG seeds are used (given or generated), make sure to reset
## the RNG state afterward
oseed <- next_random_seed()
on.exit(set_random_seed(oseed))
## As seed=FALSE but without the RNG check
future.seed <- NULL
}
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Load balancing ("chunking")
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
chunks <- makeChunks(nX,
nbrOfWorkers = nbrOfWorkers(),
future.scheduling = future.scheduling,
future.chunk.size = future.chunk.size)
if (debug) mdebugf("Number of chunks: %d", length(chunks))
## Process elements in a custom order?
ordering <- attr(chunks, "ordering")
if (!is.null(ordering)) {
if (debug) mdebugf("Index remapping (attribute 'ordering'): [n = %d] %s", length(ordering), hpaste(ordering))
chunks <- lapply(chunks, FUN = function(idxs) .subset(ordering, idxs))
}
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Globals and Packages
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
gp <- getGlobalsAndPackagesXApply(FUN = FUN,
args = args,
MoreArgs = MoreArgs,
envir = envir,
future.globals = future.globals,
future.packages = future.packages,
debug = debug)
packages <- gp$packages
globals <- gp$globals
scanForGlobals <- gp$scanForGlobals
gp <- NULL
## Add argument placeholders
globals_extra <- as.FutureGlobals(list(
...future.elements_ii = NULL,
...future.seeds_ii = NULL,
...future.globals.maxSize = NULL
))
attr(globals_extra, "resolved") <- TRUE
attr(globals_extra, "total_size") <- 0
globals <- c(globals, globals_extra)
## At this point a globals should be resolved and we should know their total size
## stop_if_not(attr(globals, "resolved"), !is.na(attr(globals, "total_size")))
## To please R CMD check
...future.FUN <- ...future.elements_ii <- ...future.seeds_ii <-
...future.globals.maxSize <- NULL
globals.maxSize <- getOption("future.globals.maxSize")
globals.maxSize.default <- globals.maxSize
if (is.null(globals.maxSize.default)) globals.maxSize.default <- 500 * 1024^2
nchunks <- length(chunks)
if (debug) mdebugf("Number of futures (= number of chunks): %d", nchunks)
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Futures
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
expr <- bquote({
...future.globals.maxSize.org <- getOption("future.globals.maxSize")
if (!identical(...future.globals.maxSize.org, ...future.globals.maxSize)) {
oopts <- options(future.globals.maxSize = ...future.globals.maxSize)
on.exit(options(oopts), add = TRUE)
}
.(expr)
})
## Create labels?
if (isTRUE(future.label)) {
future.label <- sprintf("%s-%%d", fcn_name)
}
if (is.character(future.label)) {
labels <- sprintf(future.label, seq_len(nchunks))
stopifnot(length(labels) == nchunks)
} else {
labels <- NULL
}
if (debug) mdebugf("Launching %d futures (chunks) ...", nchunks)
fs <- vector("list", length = nchunks)
for (ii in seq_along(chunks)) {
chunk <- chunks[[ii]]
if (debug) mdebugf("Chunk #%d of %d ...", ii, length(chunks))
args_ii <- get_chunk(chunk_args, chunk)
globals_ii <- globals
## Subsetting outside future is more efficient
globals_ii[["...future.elements_ii"]] <- args_ii
packages_ii <- packages
if (scanForGlobals) {
if (debug) mdebugf(" - Finding globals in '%s' for chunk #%d ...", args_name, ii)
gp <- getGlobalsAndPackages(args_ii, envir = envir, globals = TRUE)
globals_args <- gp$globals
packages_args <- gp$packages
gp <- NULL
if (debug) {
mdebugf(" + globals found in '%s' for chunk #%d: [%d] %s",
args_name, chunk, length(globals_args), hpaste(sQuote(names(globals_args))))
mdebugf(" + needed namespaces for '%s' for chunk #%d: [%d] %s",
args_name, chunk, length(packages_args), hpaste(sQuote(packages_args)))
}
## Export also globals found in arguments?
if (length(globals_args) > 0L) {
reserved <- intersect(c("...future.FUN", "...future.elements_ii",
"...future.seeds_ii"), names(globals_args))
if (length(reserved) > 0) {
stop("Detected globals in '%s' using reserved variables names: ",
args_name, paste(sQuote(reserved), collapse = ", "))
}
globals_args <- as.FutureGlobals(globals_args)
globals_ii <- unique(c(globals_ii, globals_args))
## Packages needed due to globals in arguments?
if (length(packages_args) > 0L)
packages_ii <- unique(c(packages_ii, packages_args))
}
if (debug) mdebugf(" - Finding globals in '%s' for chunk #%d ... DONE", args_name, ii)
}
args_ii <- NULL
## stop_if_not(attr(globals_ii, "resolved"))
## Adjust option 'future.globals.maxSize' to account for the fact that more
## than one element is processed per future. The adjustment is done by
## scaling up the limit by the number of elements in the chunk. This is
## a "good enough" approach.
## (https://github.com/HenrikBengtsson/future.apply/issues/8).
if (length(chunk) > 1L) {
globals_ii["...future.globals.maxSize"] <- list(globals.maxSize)
options(future.globals.maxSize = length(chunk) * globals.maxSize.default)
if (debug) mdebugf(" - Adjusted option 'future.globals.maxSize': %g -> %d * %g = %g (bytes)", globals.maxSize.default, length(chunk), globals.maxSize.default, getOption("future.globals.maxSize"))
on.exit(options(future.globals.maxSize = globals.maxSize), add = TRUE)
}
## Using RNG seeds or not?
if (is.null(seeds)) {
if (debug) mdebug(" - seeds: ")
} else {
if (debug) mdebugf(" - seeds: [%d] ", length(chunk))
globals_ii[["...future.seeds_ii"]] <- seeds[chunk]
}
fs[[ii]] <- future(
expr, substitute = FALSE,
envir = envir,
stdout = future.stdout,
conditions = future.conditions,
globals = globals_ii, packages = packages_ii,
seed = future.seed,
lazy = future.lazy,
label = labels[ii]
)
## Not needed anymore
rm(list = c("chunk", "globals_ii"))
if (debug) mdebugf("Chunk #%d of %d ... DONE", ii, nchunks)
} ## for (ii ...)
if (debug) mdebugf("Launching %d futures (chunks) ... DONE", nchunks)
## 4. Resolving futures
if (debug) mdebugf("Resolving %d futures (chunks) ...", nchunks)
values <- values(fs)
## Not needed anymore
rm(list = "fs")
if (debug) {
mdebugf(" - Number of value chunks collected: %d", length(values))
mdebugf("Resolving %d futures (chunks) ... DONE", nchunks)
}
## Sanity check
stop_if_not(length(values) == nchunks)
if (debug) mdebugf("Reducing values from %d chunks ...", nchunks)
values2 <- do.call(c, args = values)
if (debug) {
mdebugf(" - Number of values collected after concatenation: %d",
length(values2))
mdebugf(" - Number of values expected: %d", nX)
}
assert_values2(nX, values, values2, fcn_name = fcn_name, debug = debug)
values <- values2
rm(list = "values2")
## Sanity check (this may happen if the future backend is broken)
stop_if_not(length(values) == nX)
## Were elements processed in a custom order?
if (length(values) > 1L && !is.null(ordering)) {
invOrdering <- vector(mode(ordering), length = nX)
idx <- 1:nX
invOrdering[.subset(ordering, idx)] <- idx
rm(list = c("ordering", "idx"))
if (debug) mdebugf("Reverse index remapping (attribute 'ordering'): [n = %d] %s", length(invOrdering), hpaste(invOrdering))
values <- .subset(values, invOrdering)
rm(list = c("invOrdering"))
}
if (debug) mdebugf("Reducing values from %d chunks ... DONE", nchunks)
values
} ## future_xapply() future.apply/R/future_replicate.R 0000644 0001762 0000144 00000001702 13602733704 016603 0 ustar ligges users #' @inheritParams future_lapply
#'
#' @param n The number of replicates.
#'
#' @param expr An \R expression to evaluate repeatedly.
#'
#' @return
#' `future_replicate()` is a wrapper around `future_sapply()` and return
#' simplified object according to the `simplify` argument.
#' See [base::replicate()] for details.
#' Since `future_replicate()` usually involves random number generation (RNG),
#' it uses `future.seed = TRUE` by default in order produce sound random
#' numbers regardless of future backend and number of background workers used.
#'
#' @export
#'
#' @rdname future_lapply
future_replicate <- function(n, expr, simplify = "array",
future.seed = TRUE, ...,
future.label = "future_replicate-%d")
future_sapply(X = integer(n),
FUN = eval.parent(substitute(function(...)expr)),
simplify = simplify,
future.seed = future.seed, ..., future.label = future.label)
future.apply/R/future_sapply.R 0000644 0001762 0000144 00000001742 13602733704 016147 0 ustar ligges users #' @inheritParams future_lapply
#'
#' @param simplify See [base::sapply()] and [base::tapply()], respectively.
#'
#' @param USE.NAMES See [base::sapply()].
#'
#' @return
#' For `future_sapply()`, a vector with same length and names as \code{X}.
#' See [base::sapply()] for details.
#'
#' @export
#'
#' @author
#' The implementations of `future_replicate()`, `future_sapply()`, and
#' `future_tapply()` are adopted from the source code of the corresponding
#' base \R functions, which are licensed under GPL (>= 2) with
#' 'The R Core Team' as the copyright holder.
#'
#' @rdname future_lapply
future_sapply <- function(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE, future.label = "future_sapply-%d") {
answer <- future_lapply(X = X, FUN = FUN, ..., future.label = future.label)
if (USE.NAMES && is.character(X) && is.null(names(answer)))
names(answer) <- X
if (!isFALSE(simplify) && length(answer))
simplify2array(answer, higher = (simplify == "array"))
else
answer
}
future.apply/R/future_apply.R 0000644 0001762 0000144 00000014344 13603014217 015756 0 ustar ligges users #' Apply Functions Over Array Margins via Futures
#'
#' `future_apply()` implements [base::apply()] using future with perfect
#' replication of results, regardless of future backend used.
#' It returns a vector or array or list of values obtained by applying a
#' function to margins of an array or matrix.
#'
#' @inheritParams future_lapply
#'
#' @param X an array, including a matrix.
#'
#' @param MARGIN A vector giving the subscripts which the function will be
#' applied over. For example, for a matrix `1` indicates rows, `2` indicates
#' columns, `c(1, 2)` indicates rows and columns.
#' Where `X` has named dimnames, it can be a character vector selecting
#' dimension names.
#'
#' @param \ldots (optional) Additional arguments passed to `FUN()`, except
#' `future.*` arguments, which are passed on to `future_lapply()` used
#' internally.
#'
#' @return
#' Returns a vector or array or list of values obtained by applying a
#' function to margins of an array or matrix.
#' See [base::apply()] for details.
#'
#' @author
#' The implementations of `future_apply()` is adopted from the source code
#' of the corresponding base \R function, which is licensed under GPL (>= 2)
#' with 'The R Core Team' as the copyright holder.
#'
#' @example incl/future_apply.R
#'
#' @importFrom future nbrOfWorkers
#' @export
future_apply <- function(X, MARGIN, FUN, ..., future.globals = TRUE, future.packages = NULL, future.label = "future_apply-%d") {
debug <- getOption("future.debug", FALSE)
FUN <- match.fun(FUN)
## Ensure that X is an array object
dl <- length(dim(X))
if(!dl) stop("dim(X) must have a positive length")
if(is.object(X))
X <- if(dl == 2L) as.matrix(X) else as.array(X)
## now record dim as coercion can change it
## (e.g. when a data frame contains a matrix).
d <- dim(X)
dn <- dimnames(X)
ds <- seq_len(dl)
## Extract the margins and associated dimnames
if (is.character(MARGIN)) {
if(is.null(dnn <- names(dn))) # names(NULL) is NULL
stop("'X' must have named dimnames")
MARGIN <- match(MARGIN, dnn)
if (anyNA(MARGIN))
stop("not all elements of 'MARGIN' are names of dimensions")
}
s.call <- ds[-MARGIN]
s.ans <- ds[MARGIN]
d.call <- d[-MARGIN]
d.ans <- d[MARGIN]
dn.call <- dn[-MARGIN]
dn.ans <- dn[MARGIN]
## dimnames(X) <- NULL
## do the calls
d2 <- prod(d.ans)
if(d2 == 0L) {
## arrays with some 0 extents: return ``empty result'' trying
## to use proper mode and dimension:
## The following is still a bit `hackish': use non-empty X
newX <- array(vector(typeof(X), 1L), dim = c(prod(d.call), 1L))
ans <- forceAndCall(1, FUN, if(length(d.call) < 2L) newX[,1] else
array(newX[, 1L], d.call, dn.call), ...)
return(if(is.null(ans)) ans else if(length(d.ans) < 2L) ans[1L][-1L]
else array(ans, d.ans, dn.ans))
}
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Globals and Packages
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
gp <- getGlobalsAndPackagesXApply(
FUN,
args = list(X = X, ...),
envir = environment(),
future.globals = future.globals,
future.packages = future.packages,
debug = debug
)
globals <- gp$globals
packages <- gp$packages
gp <- NULL
## Check size of global variables?
## Doing it here, on the matrix object, is much faster than doing it for
## the list elements passed to future_lapply()
oldMaxSize <- maxSize <- getOption("future.globals.maxSize")
if (is.null(maxSize) || is.finite(maxSize)) {
if (is.null(maxSize)) maxSize <- 500 * 1024^2
objectSize <- import_future("objectSize")
size <- objectSize(X)
nWorkers <- nbrOfWorkers()
chunk_size <- size / nWorkers
other_size <- attr(globals, "total_size")
if (is.numeric(other_size)) chunk_size <- chunk_size + other_size
if (chunk_size > maxSize) {
asIEC <- import_future("asIEC")
msg <- sprintf("The total size of %s (of class %s and type %s) is %s and the total size of the other argument is %s. With %d workers, this translates to %s per worker needed for future_apply(), which exceeds the maximum allowed size of %s (option 'future.globals.maxSize').", sQuote("X"), sQuote(class(X)[1]), sQuote(typeof(X)), asIEC(size), asIEC(other_size), nWorkers, asIEC(chunk_size), asIEC(maxSize))
if (debug) mdebug(msg)
stop(msg)
}
on.exit(options(future.globals.maxSize = oldMaxSize), add = TRUE)
options(future.globals.maxSize = +Inf)
}
newX <- aperm(X, c(s.call, s.ans))
dim(newX) <- c(prod(d.call), d2)
if(length(d.call) < 2L) {# vector
if (length(dn.call)) dimnames(newX) <- c(dn.call, list(NULL))
newX <- lapply(1L:d2, FUN = function(i) newX[,i])
} else
newX <- lapply(1L:d2, FUN = function(i)
array(newX[,i], dim = d.call, dimnames = dn.call))
globals$...future.FUN <- NULL
ans <- future_lapply(
X = newX,
FUN = FUN,
...,
future.globals = globals,
future.packages = packages,
future.label = future.label
)
## answer dims and dimnames
ans.list <- is.recursive(ans[[1L]])
l.ans <- length(ans[[1L]])
ans.names <- names(ans[[1L]])
if(!ans.list)
ans.list <- any(lengths(ans) != l.ans)
if(!ans.list && length(ans.names)) {
all.same <- vapply(ans, function(x) identical(names(x), ans.names), NA)
if (!all(all.same)) ans.names <- NULL
}
len.a <- if(ans.list) d2 else length(ans <- unlist(ans, recursive = FALSE))
if(length(MARGIN) == 1L && len.a == d2) {
names(ans) <- if(length(dn.ans[[1L]])) dn.ans[[1L]] # else NULL
ans
}
else if(len.a == d2)
array(ans, d.ans, dn.ans)
else if(len.a && len.a %% d2 == 0L) {
if(is.null(dn.ans)) dn.ans <- vector(mode="list", length(d.ans))
dn1 <- list(ans.names)
if(length(dn.call) && !is.null(n1 <- names(dn <- dn.call[1])) &&
nzchar(n1) && length(ans.names) == length(dn[[1]]))
names(dn1) <- n1
dn.ans <- c(dn1, dn.ans)
array(ans, c(len.a %/% d2, d.ans),
if(!is.null(names(dn.ans)) || !all(vapply(dn.ans, is.null, NA)))
dn.ans)
} else
ans
}
future.apply/R/future_Map.R 0000644 0001762 0000144 00000001324 13602733704 015350 0 ustar ligges users #' @inheritParams future_mapply
#'
#' @param f A function of the arity \eqn{k} if `future_Map()` is called with
#' \eqn{k} arguments.
#'
#' @return
#' `future_Map()` is a simple wrapper to `future_mapply()` which does not
#' attempt to simplify the result.
#' See [base::Map()] for details.
#'
#' @export
#'
#' @author
#' The implementations of `future_Map()` is adopted from the source code
#' of the corresponding base \R function `Map()`, which is licensed under
#' GPL (>= 2) with 'The R Core Team' as the copyright holder.
#'
#' @rdname future_mapply
future_Map <- function(f, ..., future.label = "future_Map-%d") {
f <- match.fun(f)
future_mapply(FUN = f, ..., SIMPLIFY = FALSE, future.label = future.label)
}
future.apply/R/rng.R 0000644 0001762 0000144 00000015000 13602733704 014023 0 ustar ligges users get_random_seed <- function() {
env <- globalenv()
env$.Random.seed
}
set_random_seed <- function(seed) {
env <- globalenv()
if (is.null(seed)) {
rm(list = ".Random.seed", envir = env, inherits = FALSE)
} else {
env$.Random.seed <- seed
}
}
next_random_seed <- function(seed = get_random_seed()) {
sample.int(n = 1L, size = 1L, replace = FALSE)
seed_next <- get_random_seed()
stop_if_not(!any(seed_next != seed))
invisible(seed_next)
}
is_valid_random_seed <- function(seed) {
oseed <- get_random_seed()
on.exit(set_random_seed(oseed))
env <- globalenv()
env$.Random.seed <- seed
res <- tryCatch({
sample.int(n = 1L, size = 1L, replace = FALSE)
}, simpleWarning = function(w) w)
!inherits(res, "simpleWarning")
}
## For RNGkind("L'Ecuyer-CMRG") we should have (see help('RNGkind')):
## .Random.seed <- c(rng.kind, n) where length(n) == 6L.
## From R source code: check for rng.kind %% 10000L == 407L
is_lecyer_cmrg_seed <- function(seed) {
is.numeric(seed) &&
length(seed) == 7L &&
all(is.finite(seed)) &&
(seed[1] %% 10000L == 407L)
}
# @importFrom utils capture.output
as_lecyer_cmrg_seed <- function(seed) {
## Generate a L'Ecuyer-CMRG seed (existing or random)?
if (is.logical(seed)) {
stop_if_not(length(seed) == 1L)
if (!is.na(seed) && !seed) {
stop("Argument 'seed' must be TRUE if logical: ", seed)
}
oseed <- get_random_seed()
## Already a L'Ecuyer-CMRG seed? Then use that as is.
if (!is.na(seed) && seed) {
if (is_lecyer_cmrg_seed(oseed)) return(oseed)
}
## Otherwise, generate a random one.
on.exit(set_random_seed(oseed), add = TRUE)
RNGkind("L'Ecuyer-CMRG")
return(get_random_seed())
}
stop_if_not(is.numeric(seed), all(is.finite(seed)))
seed <- as.integer(seed)
## Already a L'Ecuyer-CMRG seed?
if (is_lecyer_cmrg_seed(seed)) {
return(seed)
}
## Generate a new L'Ecuyer-CMRG seed?
if (length(seed) == 1L) {
oseed <- get_random_seed()
on.exit(set_random_seed(oseed), add = TRUE)
RNGkind("L'Ecuyer-CMRG")
set.seed(seed)
return(get_random_seed())
}
stop("Argument 'seed' must be L'Ecuyer-CMRG RNG seed as returned by parallel::nextRNGStream() or an single integer: ", capture.output(str(seed)))
}
#' Produce Reproducible Seeds for Parallel Random Number Generation
#'
#' @param count The number of RNG seeds to produce.
#'
#' @param seed A logical specifying whether RNG seeds should be generated
#' or not. (`seed = NULL` corresponds to `seed = FALSE`).
#' If a list, then it should be of length `count` and each element should
#' consist of a valid RNG seed.
#'
#' @param debug If `TRUE`, debug output is produced, otherwise not.
#'
#' @return Returns a non-named list of length `count`, or `NULL`.
#' Any seed returned is a valid RNG seed.
#'
#' @importFrom parallel nextRNGStream nextRNGSubStream splitIndices
#' @importFrom utils capture.output str
#'
#' @keywords internal
make_rng_seeds <- function(count, seed = FALSE,
debug = getOption("future.debug", FALSE)) {
## Don't use RNGs? (seed = {FALSE, NULL})
if (is.null(seed)) return(NULL)
if (is.logical(seed) && !is.na(seed) && !seed) return(NULL)
stop_if_not(is.numeric(count), length(count) == 1L, !is.na(count),
count >= 0L)
## Placeholder for all RNG stream seeds.
seeds <- NULL
# Use RNGs?
if (debug) mdebug("Generating random seeds ...")
## A pregenerated sequence of random seeds?
if (is.list(seed)) {
if (debug) mdebugf("Using a pre-define stream of %d random seeds ...", count)
seeds <- seed
nseeds <- length(seeds)
if (nseeds != count) {
stop(sprintf("Argument 'seed' is a list, which specifies the sequence of seeds to be used for each element iterated over, but length(seed) != number of elements: %g != %g", nseeds, count))
}
## Assert same type of RNG seeds?
ns <- unique(unlist(lapply(seeds, FUN = length), use.names = FALSE))
if (length(ns) != 1L) {
stop("The elements of the list specified in argument 'seed' are not all of the same lengths (did you really pass RNG seeds?): ", hpaste(ns))
}
## Did use specify scalar integers as meant for set.seed()?
if (ns == 1L) {
stop("Argument 'seed' is invalid. Pre-generated random seeds must be valid .Random.seed seeds, which means they should be all integers and consists of two or more elements, not just one.")
}
types <- unlist(lapply(seeds, FUN = typeof), use.names = FALSE)
if (!all(types == "integer")) {
stop("The elements of the list specified in argument 'seed' are not all integers (did you really pass RNG seeds?): ", hpaste(unique(types)))
}
## Check if valid random seeds are specified.
## For efficiency, only look at the first one.
if (!is_valid_random_seed(seeds[[1]])) {
stop("The list in argument 'seed' does not seem to hold elements that are valid .Random.seed values: ", capture.output(str(seeds[[1]])))
}
if (debug) {
mdebugf("Using a pre-define stream of %d random seeds ... DONE", count)
mdebug("Generating random seeds ... DONE")
}
return(seeds)
}
if (debug) mdebugf("Generating random seed streams for %d elements ...", count)
## Generate sequence of _all_ RNG seeds starting with an initial seed
## '.seed' that is based on argument 'seed'.
.seed <- as_lecyer_cmrg_seed(seed)
## future_*apply() should return with the same RNG state regardless of
## future strategy used. This is be done such that RNG kind is preserved
## and the seed is "forwarded" one step from what it was when this
## function was called. The forwarding is done by generating one random
## number. Note that this approach is also independent on the number of
## elements iterated over and the different FUN() calls.
oseed <- next_random_seed()
on.exit(set_random_seed(oseed))
seeds <- vector("list", length = count)
for (ii in seq_len(count)) {
## RNG substream seed used when calling FUN() for element(s) 'ii':
## This way each future can in turn generate further seeds, also
## recursively, with minimal risk of generating the same seeds as
## another future. This should make it safe to recursively call
## future_*apply(). /HB 2017-01-11
seeds[[ii]] <- nextRNGSubStream(.seed)
## Main random seed for next iteration (= ii + 1)
.seed <- nextRNGStream(.seed)
}
if (debug) {
mdebugf("Generating random seed streams for %d elements ... DONE", count)
mdebug("Generating random seeds ... DONE")
}
seeds
} # make_rng_seeds()
future.apply/R/chunks.R 0000644 0001762 0000144 00000010111 13443044736 014531 0 ustar ligges users #' Create Chunks of Index Vectors
#'
#' _This is an internal function._
#'
#' @param nbrOfElements (integer) Total number of elements to iterate over.
#'
#' @param nbrOfWorker (integer) Number of workers available.
#'
#' @param future.scheduling (numeric) A strictly positive scalar.
#' Only used if argument `future.chunk.size` is `NULL`.
#'
#' @param future.chunk.size (numeric) The maximum number of elements per
#' chunk, or `NULL`. If `NULL`, then the chunk sizes are given by the
#' `future.scheduling` argument.
#'
#' @return A list of chunks, where each chunk is an integer vector of
#' unique indices \code{[1, nbrOfElements]}. The union of all chunks
#' holds `nbrOfElements` elements and equals `1:nbrOfElements`.
#' If `nbrOfElements == 0`, then an empty list is returned.
#'
#' @section Control processing order of elements:
#' Attribute `ordering` of `future.chunk.size` or `future.scheduling` can
#' be used to control the ordering the elements are iterated over, which
#' only affects the processing order _not_ the order values are returned.
#' This attribute can take the following values:
#' * index vector - an numeric vector of length `nbrOfElements` specifying
#' how elements are remapped
#' * function - an function taking one argument which is called as
#' `ordering(nbrOfElements)` and which much return an
#' index vector of length `nbrOfElements`, e.g.
#' `function(n) rev(seq_len(n))` for reverse ordering.
#' * `"random"` - this will randomize the ordering via random index
#' vector `sample.int(nbrOfElements)`.
#'
#' @importFrom parallel splitIndices
#' @keywords internal
makeChunks <- function(nbrOfElements, nbrOfWorkers,
future.scheduling = 1.0, future.chunk.size = NULL) {
stop_if_not(nbrOfElements >= 0L, nbrOfWorkers >= 1L)
## 'future.chunk.size != NULL' takes precedence over 'future.scheduling'
if (!is.null(future.chunk.size)) {
stop_if_not(length(future.chunk.size) == 1L, !is.na(future.chunk.size),
future.chunk.size > 0)
## Same definition as parallel:::staticNChunks() in R (>= 3.5.0)
nbrOfChunks <- max(1, ceiling(nbrOfElements / future.chunk.size))
## Customized ordering?
ordering <- attr(future.chunk.size, "ordering", exact = TRUE)
} else {
if (is.logical(future.scheduling)) {
stop_if_not(length(future.scheduling) == 1L, !is.na(future.scheduling))
if (future.scheduling) {
nbrOfChunks <- nbrOfWorkers
if (nbrOfChunks > nbrOfElements) nbrOfChunks <- nbrOfElements
} else {
nbrOfChunks <- nbrOfElements
}
} else {
## Treat 'future.scheduling' as the number of chunks per worker, i.e.
## the number of chunks each worker should process on average.
stop_if_not(length(future.scheduling) == 1L, !is.na(future.scheduling),
future.scheduling >= 0)
if (nbrOfWorkers > nbrOfElements) nbrOfWorkers <- nbrOfElements
nbrOfChunks <- future.scheduling * nbrOfWorkers
if (nbrOfChunks < 1L) {
nbrOfChunks <- 1L
} else if (nbrOfChunks > nbrOfElements) {
nbrOfChunks <- nbrOfElements
}
}
## Customized ordering?
ordering <- attr(future.scheduling, "ordering", exact = TRUE)
}
chunks <- splitIndices(nbrOfElements, ncl = nbrOfChunks)
## Customized ordering?
if (nbrOfElements > 1L && !is.null(ordering)) {
if (is.character(ordering) && ordering == "random") {
map <- stealth_sample.int(nbrOfElements)
} else if (is.numeric(ordering)) {
map <- ordering
} else if (is.function(ordering)) {
map <- ordering(nbrOfElements)
} else {
stop(sprintf("Unknown value of attribute %s for argument %s: ", "ordering", if (!is.null(future.chunk.size)) "future.chunk.size" else "future.scheduling"), mode(ordering))
}
if (!is.null(map)) {
## Simple validity check of "ordering". Looking for NAs, range,
## uniqueness is too expensive so skipped.
stop_if_not(length(map) == nbrOfElements)
attr(chunks, "ordering") <- map
}
}
chunks
}
future.apply/R/future_lapply.R 0000644 0001762 0000144 00000023310 13603010404 016115 0 ustar ligges users #' Apply a Function over a List or Vector via Futures
#'
#' `future_lapply()` implements [base::lapply()] using futures with perfect
#' replication of results, regardless of future backend used.
#' Analogously, this is true for all the other `future_nnn()` functions.
#'
#' @param X A vector-like object to iterate over.
#'
#' @param FUN A function taking at least one argument.
#'
#' @param \ldots (optional) Additional arguments passed to `FUN()`.
#' For `future_*apply()` functions and `replicate()`, any `future.*` arguments
#' part of `\ldots` are passed on to `future_lapply()` used internally.
#'
#' @param future.stdout If `TRUE` (default), then the standard output of the
#' underlying futures is captured, and re-outputted as soon as possible.
#' If `FALSE`, any output is silenced (by sinking it to the null device
#' as it is outputted).
#' If `NA` (not recommended), output is _not_ intercepted.
#'
#' @param future.conditions A character string of conditions classes to be
#' captured and relayed. The default is the same as the `condition`
#' argument of [future::Future()].
#' To not intercept conditions, use `conditions = character(0L)`.
#' Errors are always relayed.
#'
#' @param future.globals A logical, a character vector, or a named list for
#' controlling how globals are handled. For details, see below section.
#'
#' @param future.packages (optional) a character vector specifying packages
#' to be attached in the R environment evaluating the future.
#'
#' @param future.lazy Specifies whether the futures should be resolved
#' lazily or eagerly (default).
#'
#' @param future.seed A logical or an integer (of length one or seven),
#' or a list of `length(X)` with pre-generated random seeds.
#' For details, see below section.
#'
#' @param future.scheduling Average number of futures ("chunks") per worker.
#' If `0.0`, then a single future is used to process all elements
#' of `X`.
#' If `1.0` or `TRUE`, then one future per worker is used.
#' If `2.0`, then each worker will process two futures
#' (if there are enough elements in `X`).
#' If `Inf` or `FALSE`, then one future per element of
#' `X` is used.
#' Only used if `future.chunk.size` is `NULL`.
#'
#' @param future.chunk.size The average number of elements per future ("chunk").
#' If `Inf`, then all elements are processed in a single future.
#' If `NULL`, then argument `future.scheduling` is used.
#'
#' @param future.label If a character string, then each future is assigned
#' a label `sprintf(future.label, chunk_idx)`. If TRUE, then the
#' same as `future.label = "future_lapply-%d"`. If FALSE, no labels
#' are assigned.
#'
#' @return
#' For `future_lapply()`, a list with same length and names as `X`.
#' See [base::lapply()] for details.
#'
#' @section Global variables:
#' Argument `future.globals` may be used to control how globals
#' should be handled similarly how the `globals` argument is used with
#' `future()`.
#' Since all function calls use the same set of globals, this function can do
#' any gathering of globals upfront (once), which is more efficient than if
#' it would be done for each future independently.
#' If `TRUE`, `NULL` or not is specified (default), then globals
#' are automatically identified and gathered.
#' If a character vector of names is specified, then those globals are gathered.
#' If a named list, then those globals are used as is.
#' In all cases, `FUN` and any `\ldots` arguments are automatically
#' passed as globals to each future created as they are always needed.
#'
#' @section Reproducible random number generation (RNG):
#' Unless `future.seed = FALSE`, this function guarantees to generate
#' the exact same sequence of random numbers _given the same initial
#' seed / RNG state_ - this regardless of type of futures, scheduling
#' ("chunking") strategy, and number of workers.
#'
#' RNG reproducibility is achieved by pregenerating the random seeds for all
#' iterations (over `X`) by using L'Ecuyer-CMRG RNG streams. In each
#' iteration, these seeds are set before calling `FUN(X[[ii]], ...)`.
#' _Note, for large `length(X)` this may introduce a large overhead._
#' As input (`future.seed`), a fixed seed (integer) may be given, either
#' as a full L'Ecuyer-CMRG RNG seed (vector of 1+6 integers) or as a seed
#' generating such a full L'Ecuyer-CMRG seed.
#' If `future.seed = TRUE`, then \code{\link[base:Random]{.Random.seed}}
#' is returned if it holds a L'Ecuyer-CMRG RNG seed, otherwise one is created
#' randomly.
#' If `future.seed = NA`, a L'Ecuyer-CMRG RNG seed is randomly created.
#' If none of the function calls `FUN(X[[ii]], ...)` uses random number
#' generation, then `future.seed = FALSE` may be used.
#'
#' In addition to the above, it is possible to specify a pre-generated
#' sequence of RNG seeds as a list such that
#' `length(future.seed) == length(X)` and where each element is an
#' integer seed vector that can be assigned to
#' \code{\link[base:Random]{.Random.seed}}. One approach to generate a
#' set of valid RNG seeds based on fixed initial seed (here `42L`) is:
#' ```r
#' seeds <- future_lapply(seq_along(X), FUN = function(x) .Random.seed,
#' future.chunk.size = Inf, future.seed = 42L)
#' ```
#' **Note that `as.list(seq_along(X))` is _not_ a valid set of such
#' `.Random.seed` values.**
#'
#' In all cases but `future.seed = FALSE`, the RNG state of the calling
#' R processes after this function returns is guaranteed to be
#' "forwarded one step" from the RNG state that was before the call and
#' in the same way regardless of `future.seed`, `future.scheduling`
#' and future strategy used. This is done in order to guarantee that an \R
#' script calling `future_lapply()` multiple times should be numerically
#' reproducible given the same initial seed.
#'
#' @section Control processing order of elements:
#' Attribute `ordering` of `future.chunk.size` or `future.scheduling` can
#' be used to control the ordering the elements are iterated over, which
#' only affects the processing order and _not_ the order values are returned.
#' This attribute can take the following values:
#' * index vector - an numeric vector of length `length(X)`
#' * function - an function taking one argument which is called as
#' `ordering(length(X))` and which much return an
#' index vector of length `length(X)`, e.g.
#' `function(n) rev(seq_len(n))` for reverse ordering.
#' * `"random"` - this will randomize the ordering via random index
#' vector `sample.int(length(X))`.
#' For example, `future.scheduling = structure(TRUE, ordering = "random")`.
#' _Note_, when elements are processed out of order, then captured standard
#' output and conditions are also relayed in that order, that is out of order.
#'
#' @example incl/future_lapply.R
#'
#' @keywords manip programming iteration
#'
#' @importFrom globals globalsByName
#' @importFrom future future resolve values as.FutureGlobals nbrOfWorkers getGlobalsAndPackages FutureError
#' @importFrom utils head str
#' @export
future_lapply <- function(X, FUN, ..., future.stdout = TRUE, future.conditions = NULL, future.globals = TRUE, future.packages = NULL, future.lazy = FALSE, future.seed = FALSE, future.scheduling = 1.0, future.chunk.size = NULL, future.label = "future_lapply-%d") {
fcn_name <- "future_lapply"
args_name <- "X"
## Coerce to as.list()?
if (!is.vector(X) || is.object(X)) X <- as.list(X)
## Nothing to do?
nX <- length(X)
if (nX == 0L) return(as.list(X))
debug <- getOption("future.debug", FALSE)
if (debug) mdebugf("%s() ...", fcn_name)
## NOTE TO SELF: We'd ideally have a 'future.envir' argument also for
## this function, cf. future(). However, it's not yet clear to me how
## to do this, because we need to have globalsOf() to search for globals
## from the current environment in order to identify the globals of
## arguments 'FUN' and '...'. /HB 2017-03-10
future.envir <- environment() ## Not used; just to clarify the above.
envir <- future.envir
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Future expression
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (is.null(future.seed) || isFALSE(future.seed) || isNA(future.seed)) {
## Don't set .Random.seed
seedExpr <- NULL
} else {
## Set .Random.seed
seedExpr <- quote(assign(".Random.seed", ...future.seeds_ii[[jj]], envir = globalenv(), inherits = FALSE))
}
expr <- bquote({
lapply(seq_along(...future.elements_ii), FUN = function(jj) {
...future.X_jj <- ...future.elements_ii[[jj]]
.(seedExpr)
...future.FUN(...future.X_jj, ...)
})
})
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Process
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
values <- future_xapply(
FUN = FUN,
nX = nX,
chunk_args = X,
args = list(...),
get_chunk = `[`,
expr = expr,
envir = envir,
future.globals = future.globals,
future.packages = future.packages,
future.scheduling = future.scheduling,
future.chunk.size = future.chunk.size,
future.stdout = future.stdout,
future.conditions = future.conditions,
future.seed = future.seed,
future.lazy = future.lazy,
future.label = future.label,
fcn_name = fcn_name,
args_name = args_name,
debug = debug
)
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Reduce
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
names(values) <- names(X)
if (debug) mdebugf("%s() ... DONE", fcn_name)
values
}
future.apply/R/future_vapply.R 0000644 0001762 0000144 00000005402 13602733704 016147 0 ustar ligges users #' @inheritParams future_lapply
#'
#' @param FUN.VALUE A template for the required return value from
#' each `FUN(X[ii], ...)`.
#' Types may be promoted to a higher type within the ordering
#' logical < integer < double < complex, but not demoted.
#' See [base::vapply()] for details.
#'
#' @return
#' For `future_vapply()`, a vector with same length and names as \code{X}.
#' See [base::vapply()] for details.
#'
#' @export
#'
#' @rdname future_lapply
future_vapply <- function(X, FUN, FUN.VALUE, ..., USE.NAMES = TRUE, future.label = "future_vapply-%d") {
## Coerce to as.list()?
if (!is.vector(X) || is.object(X)) X <- as.list(X)
n <- length(X)
stop_if_not(is.function(FUN))
stop_if_not(is.vector(FUN.VALUE) || is.array(FUN.VALUE))
type <- typeof(FUN.VALUE)
times <- length(FUN.VALUE)
dim <- dim(FUN.VALUE)
stop_if_not(is.logical(USE.NAMES), length(USE.NAMES) == 1L, !is.na(USE.NAMES))
valid_types <- switch(
type,
logical = "logical",
integer = c("logical", "integer"),
double = c("logical", "integer", "double"),
complex = c("logical", "integer", "double", "complex"),
type
)
x_FUN <- FUN
res <- future_lapply(X, FUN = function(x, ...) {
value <- x_FUN(x, ...)
if (length(value) != times) {
stop(sprintf(
"values must be length %d, but FUN(X[[ii]]) result is length %d",
times, length(value)))
}
stop_if_not(all(dim(value) == dim), typeof(value) %in% valid_types)
value
}, ..., future.label = future.label)
if (!is.null(dim)) {
dim_res <- c(dim, n)
} else if (times != 1L) {
dim_res <- c(times, n)
} else {
dim_res <- NULL
}
if (USE.NAMES && length(res) > 0L) {
if (is.null(dim)) {
names_FUN.VALUE <- names(FUN.VALUE)
if (is.null(names_FUN.VALUE)) names_FUN.VALUE <- names(res[[1]])
} else {
names_FUN.VALUE <- dimnames(FUN.VALUE)
if (is.null(names_FUN.VALUE)) names_FUN.VALUE <- dimnames(res[[1]])
}
}
res <- unlist(res, use.names = FALSE)
if (is.null(res)) res <- vector(mode = type, length = 0L)
if (!is.null(dim_res)) dim(res) <- dim_res
if (USE.NAMES) {
if (is.array(res)) {
n_dim <- length(dim(res))
dimnames <- vector("list", length = n_dim)
if (is.null(dim)) {
names <- names(X)
if (!is.null(names)) dimnames[[2]] <- names
names <- names_FUN.VALUE
if (!is.null(names)) dimnames[[1]] <- names
} else {
names <- names(X)
if (!is.null(names)) dimnames[[n_dim]] <- names
names <- names_FUN.VALUE
if (!is.null(names)) dimnames[-n_dim] <- names
}
if (!all(unlist(lapply(dimnames, FUN = is.null), use.names = FALSE))) {
dimnames(res) <- dimnames
}
} else {
names(res) <- names(X)
}
}
res
}
future.apply/R/future.apply-package.R 0000644 0001762 0000144 00000007571 13602733534 017303 0 ustar ligges users #' future.apply: Apply Function to Elements in Parallel using Futures
#'
#' The \pkg{future.apply} packages provides parallel implementations of
#' common "apply" functions provided by base \R. The parallel processing
#' is performed via the \pkg{future} ecosystem, which provides a large
#' number of parallel backends, e.g. on the local machine, a remote cluster,
#' and a high-performance compute cluster.
#'
#' Currently implemented functions are:
#'
#' * [future_apply()]: a parallel version of [apply()][base::apply]
#' * [future_by()]: a parallel version of [by()][base::by]
#' * [future_eapply()]: a parallel version of [eapply()][base::lapply]
#' * [future_lapply()]: a parallel version of [lapply()][base::lapply]
#' * [future_mapply()]: a parallel version of [mapply()][base::mapply]
#' * [future_sapply()]: a parallel version of [sapply()][base::sapply]
#' * [future_tapply()]: a parallel version of [tapply()][base::tapply]
#' * [future_vapply()]: a parallel version of [vapply()][base::vapply]
#' * [future_Map()]: a parallel version of [Map()][base::Map]
#' * [future_replicate()]: a parallel version of [replicate()][base::replicate]
#'
#' Reproducibility is part of the core design, which means that perfect,
#' parallel random number generation (RNG) is supported regardless of the
#' amount of chunking, type of load balancing, and future backend being used.
#'
#' Since these `future_*()` functions have the same arguments as the
#' corresponding base \R function, start using them is often as simple as
#' renaming the function in the code. For example, after attaching the package:
#' ```r
#' library(future.apply)
#' ```
#' code such as:
#' ```r
#' x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
#' y <- lapply(x, quantile, probs = 1:3/4)
#' ```
#' can be updated to:
#' ```r
#' y <- future_lapply(x, quantile, probs = 1:3/4)
#' ```
#'
#' The default settings in the \pkg{future} framework is to process code
#' _sequentially_. To run the above in parallel on the local machine
#' (on any operating system), use:
#' ```r
#' plan(multiprocess)
#' ```
#' first. That's it!
#'
#' To go back to sequential processing, use `plan(sequential)`.
#' If you have access to multiple machines on your local network, use:
#' ```r
#' plan(cluster, workers = c("n1", "n2", "n2", "n3"))
#' ```
#' This will set up four workers, one on `n1` and `n3`, and two on `n2`.
#' If you have SSH access to some remote machines, use:
#' ```r
#' plan(cluster, workers = c("m1.myserver.org", "m2.myserver.org))
#' ```
#' See the \pkg{future} package and [future::plan()] for more examples.
#'
#' The \pkg{future.batchtools} package provides support for high-performance
#' compute (HPC) cluster schedulers such as SGE, Slurm, and TORQUE / PBS.
#' For example,
#'
#' * `plan(batchtools_slurm)`:
#' Process via a Slurm scheduler job queue.
#' * `plan(batchtools_torque)`:
#' Process via a TORQUE / PBS scheduler job queue.
#'
#' This builds on top of the queuing framework that the \pkg{batchtools}
#' package provides. For more details on backend configuration, please see
#' the \pkg{future.batchtools} and \pkg{batchtools} packages.
#'
#' These are just a few examples of parallel/distributed backend for the
#' future ecosystem. For more alternatives, see the 'Reverse dependencies'
#' section on the
#' [future CRAN package page](https://cran.r-project.org/package=future).
#'
#' @author
#' Henrik Bengtsson, except for the implementations of `future_apply()`,
#' `future_Map()`, `future_replicate()`, `future_sapply()`, and
#' `future_tapply()`, which are adopted from the source code of the
#' corresponding base \R functions, which are licensed under GPL (>= 2)
#' with 'The R Core Team' as the copyright holder.
#' Because of these dependencies, the license of this package is GPL (>= 2).
#'
#' @keywords manip programming iteration
#'
#' @docType package
#' @aliases future.apply-package
#' @name future.apply
NULL
future.apply/R/future_by.R 0000644 0001762 0000144 00000010026 13602733704 015244 0 ustar ligges users #' Apply a Function to a Data Frame Split by Factors via Futures
#'
#' @param data An \R object, normally a data frame, possibly a matrix.
#'
#' @param INDICES A factor or a list of factors, each of length `nrow(data)`.
#'
#' @param FUN a function to be applied to (usually data-frame) subsets of `data`.
#'
#' @param \ldots Additional arguments pass to [future_lapply()] and
#' then to `FUN()`.
#'
#' @param simplify logical: see [base::tapply].
#'
#' @return
#' An object of class "by", giving the results for each subset.
#' This is always a list if simplify is false, otherwise a list
#' or array (see [base::tapply]).
#' See also [base::by()] for details.
#'
#' @example incl/future_by.R
#'
#' @details
#' Internally, `data` is grouped by `INDICES` into a list of `data`
#' subset elements which is then processed by [future_lapply()].
#' When the groups differ significantly in size, the processing time
#' may differ significantly between the groups.
#' To correct for processing-time imbalances, adjust the amount of chunking
#' via arguments `future.scheduling` and `future.chunk.size`.
#'
#' @rdname future_by
#' @export
future_by <- function(data, INDICES, FUN, ..., simplify = TRUE) {
UseMethod("future_by")
}
#' @export
future_by.default <- function(data, INDICES, FUN, ..., simplify = TRUE) {
ndim <- length(dim(data))
.SUBSETTER <- if (ndim == 0L) {
function(row) data[row, , drop = TRUE]
} else {
function(row) data[row, , drop = FALSE]
}
data <- as.data.frame(data)
future_by_internal(data = data, INDICES = INDICES, FUN = FUN, ...,
simplify = simplify,
.INDICES.NAME = deparse(substitute(INDICES))[1L],
.CALL = match.call(),
.SUBSETTER = .SUBSETTER)
}
#' @export
future_by.data.frame <- function(data, INDICES, FUN, ..., simplify = TRUE) {
future_by_internal(data = data, INDICES = INDICES, FUN = FUN, ...,
simplify = simplify,
.INDICES.NAME = deparse(substitute(INDICES))[1L],
.CALL = match.call(),
.SUBSETTER = function(row) data[row, , drop = FALSE])
}
future_by_internal <- function(data, INDICES, FUN, ..., simplify = TRUE, .SUBSETTER, .CALL, .INDICES.NAME, future.label = "future_by-%d") {
FUN <- if (!is.null(FUN)) match.fun(FUN)
stop_if_not(is.function(.SUBSETTER))
if (!is.list(INDICES)) {
INDEX <- vector("list", length = 1L)
INDEX[[1L]] <- INDICES
names(INDEX) <- .INDICES.NAME
INDICES <- INDEX
INDEX <- NULL ## Not needed anymore
}
INDICES <- lapply(INDICES, FUN = as.factor)
nI <- length(INDICES)
if (!nI) stop("'INDICES' is of length zero")
nd <- nrow(data)
if (!all(lengths(INDICES) == nd)) {
stop("All elements of argument 'INDICES' must have same length as 'data'")
}
namelist <- lapply(INDICES, FUN = levels)
extent <- lengths(namelist, use.names = FALSE)
cumextent <- cumprod(extent)
if (cumextent[nI] > .Machine$integer.max)
stop("total number of levels >= 2^31")
storage.mode(cumextent) <- "integer"
ngroup <- cumextent[nI]
group <- as.integer(INDICES[[1L]])
if (nI > 1L) {
for (i in 2L:nI) {
group <- group + cumextent[i - 1L] * (as.integer(INDICES[[i]]) - 1L)
}
}
cumextent <- NULL ## Not needed anymore
levels(group) <- as.character(seq_len(ngroup))
class(group) <- "factor"
ans <- split(seq_len(nd), f = group)
names(ans) <- NULL
index <- as.logical(lengths(ans) > 0L)
group <- NULL ## Not needed anymore
grouped_data <- lapply(X = ans[index], FUN = .SUBSETTER)
ans <- future_lapply(X = grouped_data, FUN = FUN, ..., future.label = future.label)
grouped_data <- NULL ## Not needed anymore
ansmat <- array({
if (simplify && all(lengths(ans) == 1L)) {
ans <- unlist(ans, recursive = FALSE, use.names = FALSE)
if (!is.null(ans) && is.atomic(ans)) vector(typeof(ans)) else NA
} else {
vector("list", length = prod(extent))
}
}, dim = extent, dimnames = namelist)
if (length(ans) > 0L) ansmat[index] <- ans
ans <- NULL ## Not needed anymore
structure(ansmat,
call = .CALL,
class = "by"
)
}
future.apply/MD5 0000644 0001762 0000144 00000005555 13605176412 013237 0 ustar ligges users 40b9377e226c7086215d2a89b769845b *DESCRIPTION
91536e274776654da41fc168dacd5740 *NAMESPACE
73c2d0bd9f955291926f2780d41dc449 *NEWS
ebf58acffb6bd3ed4e99fe43787e66b6 *R/chunks.R
f5bae7038efa565ab860b2b7e02a1e66 *R/fold.R
db3b9528d699e8261be3244f7b092b5c *R/future.apply-package.R
f5eb8ac2c85f7af9b378b4b557d59a3e *R/future_Map.R
91b7e7e4ca80c8e99ebb007edb5d4728 *R/future_apply.R
9cc247effd838e7d39cdcfbd5b2c12ae *R/future_by.R
26920e1c355bb6822df142ad61fa2081 *R/future_eapply.R
535582d71210797e4a083f70b3e18f17 *R/future_lapply.R
b15ef7f6f1049d442bd343cde1f8f3ab *R/future_mapply.R
5abdef41fe377cb8de4cab4e8720112d *R/future_replicate.R
732996f6a04778a7b82a685a93fabbc0 *R/future_sapply.R
ec56bc5c511307edbb3d9db185cd54d5 *R/future_tapply.R
12cdddce8b51d2312a315e322356d7dd *R/future_vapply.R
8969b49991b04d48b1d4e712e0999f68 *R/future_xapply.R
850d27fefbd23080f2f11a50469264fa *R/globals.R
e1f959d1a679deffe57c8e34cfdfea4f *R/rng.R
56ed6c738dce4103422c940412b73225 *R/utils.R
061c64ec7b4865d52eabdcf8eca0605d *build/vignette.rds
c65ed4d1282224ffeeb1fddb91e12705 *inst/WORDLIST
eb25c704d9387376b5a940e79b175628 *inst/doc/future.apply-1-overview.html
82db612a4a5a281182b083d5510b9555 *inst/doc/future.apply-1-overview.md.rsp
f19581af39d17cde7d9da58b5626cd1d *inst/vignettes-static/future.apply-1-overview.md.rsp.rsp
83b91fc0677a6cca0cf7a26d0ac9b9e2 *man/fold.Rd
8e06774cbcbf5b43863a9c699afb769d *man/future.apply.Rd
13732ccba2822923b773a933ab01c934 *man/future_apply.Rd
76aedc30fccc8463b88c2677f8a4240c *man/future_by.Rd
f55b885507840632b2a46b4cd348d7ce *man/future_lapply.Rd
72a65b190b8579dd5927a7c26fb2d806 *man/future_mapply.Rd
cbb247e3e74de5b8f88d71ecfd92f96d *man/makeChunks.Rd
defd8ab7fa2fa630858a32f19d454a0d *man/make_rng_seeds.Rd
0292712886c0826cb3a0fa8b5d46f68c *tests/fold.R
2e4618d189f581ea1169165e9dfb1aab *tests/future_apply.R
475ac0461e474ebf2d264c4e41e9062e *tests/future_by.R
16ce9cc54c711594ba68858573da0ebb *tests/future_eapply.R
6368c0b7cf1c330162596a0962c13f33 *tests/future_lapply,RNG.R
a0e301e9a89f950b9119b155d7171e66 *tests/future_lapply,globals.R
5545c54b64fa338f3246a6a15a4e6e38 *tests/future_lapply.R
3bbc8a9f2cd91808c2d7c6939a9b2dd6 *tests/future_mapply,globals.R
1d54fb1bbcbbddd6a7f5eb6fd81cd192 *tests/future_mapply.R
8a0e88bd5caff070ee6023064ec3a85b *tests/future_replicate.R
67f9bb8bf3e02a65701565a9768c1681 *tests/future_sapply.R
d640c99128ff228abe80c17ed5f5c789 *tests/future_tapply.R
557ac0910c81ccecde6c1ef5e5307766 *tests/future_vapply.R
befa096c99ef36f2194b7bd036565a46 *tests/globals,tricky_recursive.R
d1b606b24136d2d7ad8cda37792cbe62 *tests/incl/end.R
a22ffb53f44b646dc66473fec5ebb5f6 *tests/incl/start,load-only.R
0d757150beb30ecef6a18a539d432264 *tests/incl/start.R
c18b68f21db39e30a734f3080d68c21d *tests/rng.R
d23d1bde69f1d66c3922111d6f1fc875 *tests/stdout.R
a1b8a5aa7887761d1a723e00b7e7e8a8 *tests/utils.R
82db612a4a5a281182b083d5510b9555 *vignettes/future.apply-1-overview.md.rsp
future.apply/inst/ 0000755 0001762 0000144 00000000000 13604743022 013666 5 ustar ligges users future.apply/inst/doc/ 0000755 0001762 0000144 00000000000 13604743022 014433 5 ustar ligges users future.apply/inst/doc/future.apply-1-overview.html 0000644 0001762 0000144 00000046761 13604743021 021776 0 ustar ligges users
A Future for R: Apply Function to Elements in Parallel
A Future for R: Apply Function to Elements in Parallel
Introduction
The purpose of this package is to provide worry-free parallel alternatives to base-R “apply” functions, e.g. apply()
, lapply()
, and vapply()
. The goal is that one should be able to replace any of these in the core with its futurized equivalent and things will just work. For example, instead of doing:
library("stats")
x <- 1:10
y <- lapply(x, FUN = quantile, probs = 1:3/4)
one can do:
library("future.apply")
plan(multiprocess) ## Run in parallel on local computer
library("stats")
x <- 1:10
y <- future_lapply(x, FUN = quantile, probs = 1:3/4)
Reproducibility is part of the core design, which means that perfect, parallel random number generation (RNG) is supported regardless of the amount of chunking, type of load balancing, and future backend being used. To enable parallel RNG, use argument future.seed = TRUE
.
Role
Where does the future.apply package fit in the software stack? You can think of it as a sibling to foreach, BiocParallel, plyr, etc. Just as parallel provides parLapply()
, foreach provides foreach()
, BiocParallel provides bplapply()
, and plyr provides llply()
, future.apply provides future_lapply()
. Below is a table summarizing this idea:
Package |
Functions |
Backends |
future.apply
|
Future-versions of common goto *apply() functions available in base R (of the ‘base’ package):
future_apply() ,
future_by() ,
future_eapply() ,
future_lapply() ,
future_Map() ,
future_mapply() ,
future_replicate() ,
future_sapply() ,
future_tapply() , and
future_vapply() .
The following function is yet not implemented:
future_rapply()
|
All future backends
|
parallel
|
mclapply() , mcmapply() ,
clusterMap() , parApply() , parLapply() , parSapply() , …
|
Built-in and conditional on operating system
|
foreach
|
foreach() ,
times()
|
All future backends via doFuture
|
BiocParallel
|
Bioconductor’s parallel mappers:
bpaggregate() ,
bpiterate() ,
bplapply() , and
bpvec()
|
All future backends via doFuture (because it supports foreach) or via BiocParallel.FutureParam (direct BiocParallelParam support; prototype)
|
plyr
|
**ply(..., .parallel = TRUE) functions:
aaply() ,
ddply() ,
dlply() ,
llply() , …
|
All future backends via doFuture (because it uses foreach internally)
|
Note that, except for the built-in parallel package, none of these higher-level APIs implement their own parallel backends, but they rather enhance existing ones. The foreach framework leverages backends such as doParallel, doMC and doFuture, and the future.apply framework leverages the future ecosystem and therefore backends such as built-in parallel, future.callr, and future.batchtools.
By separating future_lapply()
and friends from the future package, it helps clarifying the purpose of the future package, which is to define and provide the core Future API, which higher-level parallel APIs can build on and for which any futurized parallel backends can be plugged into.
Roadmap
Implement future_*apply()
versions for all common *apply()
functions that exist in base R. This also involves writing a large set of package tests asserting the correctness and the same behavior as the corresponding *apply()
functions.
Harmonize all future_*apply()
functions with each other, e.g. the future-specific arguments.
Consider additional future_*apply()
functions and features that fit in this package but don't necessarily have a corresponding function in base R. Examples of this may be “apply” functions that return futures rather than values, mechanisms for benchmarking, and richer control over load balancing.
The API and identity of the future.apply package will be kept close to the *apply()
functions in base R. In other words, it will neither keep growing nor be expanded with new, more powerful apply-like functions beyond those core ones in base R. Such extended functionality should be part of a separate package.
Copyright Henrik Bengtsson, 2017-2019
future.apply/inst/doc/future.apply-1-overview.md.rsp 0000644 0001762 0000144 00000015305 13440257025 022225 0 ustar ligges users <%@meta language="R-vignette" content="--------------------------------
%\VignetteIndexEntry{A Future for R: Apply Function to Elements in Parallel}
%\VignetteAuthor{Henrik Bengtsson}
%\VignetteKeyword{R}
%\VignetteKeyword{package}
%\VignetteKeyword{vignette}
%\VignetteKeyword{future}
%\VignetteKeyword{lazy evaluation}
%\VignetteKeyword{synchronous}
%\VignetteKeyword{asynchronous}
%\VignetteKeyword{parallel}
%\VignetteKeyword{cluster}
%\VignetteEngine{R.rsp::rsp}
%\VignetteTangle{FALSE}
Do not edit the *.md.rsp file. Instead edit the *.md.rsp.rsp (sic!)
file found under inst/vignettes-static/ of the source package.
--------------------------------------------------------------------"%>
# A Future for R: Apply Function to Elements in Parallel
## Introduction
The purpose of this package is to provide worry-free parallel alternatives to base-R "apply" functions, e.g. `apply()`, `lapply()`, and `vapply()`. The goal is that one should be able to replace any of these in the core with its futurized equivalent and things will just work. For example, instead of doing:
```r
library("stats")
x <- 1:10
y <- lapply(x, FUN = quantile, probs = 1:3/4)
```
one can do:
```r
library("future.apply")
plan(multiprocess) ## Run in parallel on local computer
library("stats")
x <- 1:10
y <- future_lapply(x, FUN = quantile, probs = 1:3/4)
```
Reproducibility is part of the core design, which means that perfect, parallel random number generation (RNG) is supported regardless of the amount of chunking, type of load balancing, and future backend being used. _To enable parallel RNG, use argument `future.seed = TRUE`._
## Role
Where does the [future.apply] package fit in the software stack? You can think of it as a sibling to [foreach], [BiocParallel], [plyr], etc. Just as parallel provides `parLapply()`, foreach provides `foreach()`, BiocParallel provides `bplapply()`, and plyr provides `llply()`, future.apply provides `future_lapply()`. Below is a table summarizing this idea:
Package |
Functions |
Backends |
future.apply
|
Future-versions of common goto *apply() functions available in base R (of the 'base' package):
future_apply() ,
future_by() ,
future_eapply() ,
future_lapply() ,
future_Map() ,
future_mapply() ,
future_replicate() ,
future_sapply() ,
future_tapply() , and
future_vapply() .
The following function is yet not implemented:
future_rapply()
|
All future backends
|
parallel
|
mclapply() , mcmapply() ,
clusterMap() , parApply() , parLapply() , parSapply() , ...
|
Built-in and conditional on operating system
|
foreach
|
foreach() ,
times()
|
All future backends via doFuture
|
BiocParallel
|
Bioconductor's parallel mappers:
bpaggregate() ,
bpiterate() ,
bplapply() , and
bpvec()
|
All future backends via doFuture (because it supports foreach) or via BiocParallel.FutureParam (direct BiocParallelParam support; prototype)
|
plyr
|
**ply(..., .parallel = TRUE) functions:
aaply() ,
ddply() ,
dlply() ,
llply() , ...
|
All future backends via doFuture (because it uses foreach internally)
|
Note that, except for the built-in parallel package, none of these higher-level APIs implement their own parallel backends, but they rather enhance existing ones. The foreach framework leverages backends such as [doParallel], [doMC] and [doFuture], and the future.apply framework leverages the [future] ecosystem and therefore backends such as built-in parallel, [future.callr], and [future.batchtools].
By separating `future_lapply()` and friends from the [future] package, it helps clarifying the purpose of the future package, which is to define and provide the core Future API, which higher-level parallel APIs can build on and for which any futurized parallel backends can be plugged into.
## Roadmap
1. Implement `future_*apply()` versions for all common `*apply()` functions that exist in base R. This also involves writing a large set of package tests asserting the correctness and the same behavior as the corresponding `*apply()` functions.
2. Harmonize all `future_*apply()` functions with each other, e.g. the future-specific arguments.
3. Consider additional `future_*apply()` functions and features that fit in this package but don't necessarily have a corresponding function in base R. Examples of this may be "apply" functions that return futures rather than values, mechanisms for benchmarking, and richer control over load balancing.
The API and identity of the future.apply package will be kept close to the `*apply()` functions in base R. In other words, it will _neither_ keep growing nor be expanded with new, more powerful apply-like functions beyond those core ones in base R. Such extended functionality should be part of a separate package.
[BatchJobs]: https://cran.r-project.org/package=BatchJobs
[batchtools]: https://cran.r-project.org/package=batchtools
[BiocParallel]: https://bioconductor.org/packages/release/bioc/html/BiocParallel.html
[doFuture]: https://cran.r-project.org/package=doFuture
[doMC]: https://cran.r-project.org/package=doMC
[doParallel]: https://cran.r-project.org/package=doParallel
[foreach]: https://cran.r-project.org/package=foreach
[future]: https://cran.r-project.org/package=future
[future.apply]: https://cran.r-project.org/package=future.apply
[future.BatchJobs]: https://cran.r-project.org/package=future.BatchJobs
[future.batchtools]: https://cran.r-project.org/package=future.batchtools
[future.callr]: https://cran.r-project.org/package=future.callr
[plyr]: https://cran.r-project.org/package=plyr
---
Copyright Henrik Bengtsson, 2017-2019
future.apply/inst/vignettes-static/ 0000755 0001762 0000144 00000000000 13322430303 017153 5 ustar ligges users future.apply/inst/vignettes-static/future.apply-1-overview.md.rsp.rsp 0000644 0001762 0000144 00000004370 13322430303 025547 0 ustar ligges users <%---------------------------------------------------------------------
This *.md.rsp.rsp file is used to generate the *.md.rsp that
is the copied to vignettes/. The latter contains no dynamic
code. The reason for this is that this RSP file uses
multisession futures and for some unknown reason those gives
errors during R CMD build on Windows. Everywhere else they
work including when building this RSP manually. /HB 2016-04-12
---------------------------------------------------------------------%>
<%%@meta language="R-vignette" content="--------------------------------
%\VignetteIndexEntry{A Future for R: Apply Function to Elements in Parallel}
%\VignetteAuthor{Henrik Bengtsson}
%\VignetteKeyword{R}
%\VignetteKeyword{package}
%\VignetteKeyword{vignette}
%\VignetteKeyword{future}
%\VignetteKeyword{lazy evaluation}
%\VignetteKeyword{synchronous}
%\VignetteKeyword{asynchronous}
%\VignetteKeyword{parallel}
%\VignetteKeyword{cluster}
%\VignetteEngine{R.rsp::rsp}
%\VignetteTangle{FALSE}
Do not edit the *.md.rsp file. Instead edit the *.md.rsp.rsp (sic!)
file found under inst/vignettes-static/ of the source package.
--------------------------------------------------------------------"%%>
<%@meta language="R-vignette" content="--------------------------------
%\VignetteIndexEntry{A Future for R: Apply Function to Elements in Parallel}
%\VignetteAuthor{Henrik Bengtsson}
%\VignetteKeyword{R}
%\VignetteKeyword{package}
%\VignetteKeyword{vignette}
%\VignetteKeyword{future}
%\VignetteKeyword{lazy evaluation}
%\VignetteKeyword{synchronous}
%\VignetteKeyword{asynchronous}
%\VignetteKeyword{parallel}
%\VignetteKeyword{cluster}
%\VignetteEngine{R.rsp::rsp}
%\VignetteTangle{FALSE}
--------------------------------------------------------------------"%>
<%
R.utils::use("R.utils")
use("future.apply")
options("withCapture/newline" = FALSE)
options(mc.cores = 2L)
%>
# <%@meta name="title"%>
## Introduction
[BatchJobs]: https://cran.r-project.org/package=BatchJobs
[batchtools]: https://cran.r-project.org/package=batchtools
[future]: https://cran.r-project.org/package=future
[future.BatchJobs]: https://cran.r-project.org/package=future.BatchJobs
[future.batchtools]: https://cran.r-project.org/package=future.batchtools
---
Copyright Henrik Bengtsson, 2017-2018
future.apply/inst/WORDLIST 0000644 0001762 0000144 00000000412 13420014624 015050 0 ustar ligges users AppVeyor
arity
batchtools
benchmarking
BiocParallel
callr
CMD
CMRG
doFuture
doMC
doParallel
eapply
foreach
futurized
globals
HPC
L'Ecuyer
lapply
macOS
mapply
plyr
pre
Pre
pregenerating
reproducibility
Reproducibility
Roadmap
sapply
SGE
Slurm
tapply
vapply
vectorize