future.apply/ 0000755 0001762 0000144 00000000000 14104474122 012707 5 ustar ligges users future.apply/NAMESPACE 0000644 0001762 0000144 00000001611 14104465634 014135 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method(future_by,data.frame)
S3method(future_by,default)
export(future_.mapply)
export(future_Map)
export(future_apply)
export(future_by)
export(future_eapply)
export(future_lapply)
export(future_mapply)
export(future_replicate)
export(future_sapply)
export(future_tapply)
export(future_vapply)
importFrom(future,Future)
importFrom(future,FutureError)
importFrom(future,as.FutureGlobals)
importFrom(future,future)
importFrom(future,getGlobalsAndPackages)
importFrom(future,nbrOfWorkers)
importFrom(future,resolve)
importFrom(future,value)
importFrom(globals,findGlobals)
importFrom(globals,globalsByName)
importFrom(parallel,nextRNGStream)
importFrom(parallel,nextRNGSubStream)
importFrom(parallel,splitIndices)
importFrom(utils,capture.output)
importFrom(utils,globalVariables)
importFrom(utils,head)
importFrom(utils,packageVersion)
importFrom(utils,str)
future.apply/man/ 0000755 0001762 0000144 00000000000 14104217116 013460 5 ustar ligges users future.apply/man/future_mapply.Rd 0000644 0001762 0000144 00000015703 14104262315 016652 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/future_Map.R, R/future_mapply.R
\name{future_Map}
\alias{future_Map}
\alias{future_mapply}
\alias{future_.mapply}
\title{Apply a Function to Multiple List or Vector Arguments}
\usage{
future_Map(
f,
...,
future.envir = parent.frame(),
future.label = "future_Map-\%d"
)
future_mapply(
FUN,
...,
MoreArgs = NULL,
SIMPLIFY = TRUE,
USE.NAMES = TRUE,
future.envir = parent.frame(),
future.stdout = TRUE,
future.conditions = "condition",
future.globals = TRUE,
future.packages = NULL,
future.lazy = FALSE,
future.seed = FALSE,
future.scheduling = 1,
future.chunk.size = NULL,
future.label = "future_mapply-\%d"
)
future_.mapply(FUN, dots, MoreArgs, ..., future.label = "future_.mapply-\%d")
}
\arguments{
\item{f}{A function of the arity \eqn{k} if \code{future_Map()} is called with
\eqn{k} arguments.}
\item{future.envir}{An \link{environment} passed as argument \code{envir} to
\code{\link[future:future]{future::future()}} as-is.}
\item{future.label}{If a character string, then each future is assigned
a label \code{sprintf(future.label, chunk_idx)}. If TRUE, then the
same as \code{future.label = "future_lapply-\%d"}. If FALSE, no labels
are assigned.}
\item{FUN}{A function to apply, found via \code{\link[base:match.fun]{base::match.fun()}}.}
\item{MoreArgs}{A list of other arguments to \code{FUN}.}
\item{SIMPLIFY}{A logical or character string; attempt to reduce the
result to a vector, matrix or higher dimensional array; see the simplify
argument of \code{\link[base:lapply]{base::sapply()}}.}
\item{USE.NAMES}{A logical; use names if the first \verb{\\ldots} argument has
names, or if it is a character vector, use that character vector as the
names.}
\item{future.stdout}{If \code{TRUE} (default), then the standard output of the
underlying futures is captured, and re-outputted as soon as possible.
If \code{FALSE}, any output is silenced (by sinking it to the null device
as it is outputted).
If \code{NA} (not recommended), output is \emph{not} intercepted.}
\item{future.conditions}{A character string of conditions classes to be
captured and relayed. The default is the same as the \code{condition}
argument of \code{\link[future:Future-class]{future::Future()}}.
To not intercept conditions, use \code{conditions = character(0L)}.
Errors are always relayed.}
\item{future.globals}{A logical, a character vector, or a named list for
controlling how globals are handled.
For details, see \code{\link[=future_lapply]{future_lapply()}}.}
\item{future.packages}{(optional) a character vector specifying packages
to be attached in the R environment evaluating the future.}
\item{future.lazy}{Specifies whether the futures should be resolved
lazily or eagerly (default).}
\item{future.seed}{A logical or an integer (of length one or seven), or
a list of \code{max(lengths(list(...)))} with pre-generated random seeds.
For details, see \code{\link[=future_lapply]{future_lapply()}}.}
\item{future.scheduling}{Average number of futures ("chunks") per worker.
If \code{0.0}, then a single future is used to process all elements
of \code{X}.
If \code{1.0} or \code{TRUE}, then one future per worker is used.
If \code{2.0}, then each worker will process two futures
(if there are enough elements in \code{X}).
If \code{Inf} or \code{FALSE}, then one future per element of
\code{X} is used.
Only used if \code{future.chunk.size} is \code{NULL}.}
\item{future.chunk.size}{The average number of elements per future ("chunk").
If \code{Inf}, then all elements are processed in a single future.
If \code{NULL}, then argument \code{future.scheduling} is used.}
\item{dots}{A list of arguments to vectorize over (vectors or lists of
strictly positive length, or all of zero length).}
\item{\ldots}{Arguments to vectorize over (vectors or lists of strictly
positive length, or all of zero length).}
}
\value{
\code{future_Map()} is a simple wrapper to \code{future_mapply()} which does not
attempt to simplify the result.
See \code{\link[base:funprog]{base::Map()}} for details.
\code{future_mapply()} returns a list, or for \code{SIMPLIFY = TRUE}, a vector,
array or list. See \code{\link[base:mapply]{base::mapply()}} for details.
\code{future_.mapply()} returns a list. See \code{\link[base:base-internal]{base::.mapply()}} for details.
}
\description{
\code{future_mapply()} implements \code{\link[base:mapply]{base::mapply()}} using futures with perfect
replication of results, regardless of future backend used.
Analogously to \code{mapply()}, \code{future_mapply()} is a multivariate version of
\code{future_sapply()}.
It applies \code{FUN} to the first elements of each \verb{\\ldots} argument,
the second elements, the third elements, and so on.
Arguments are recycled if necessary.
}
\details{
Note that \code{\link[base:base-internal]{base::.mapply()}}, which \code{future_.mapply()} is modeled after
is listed as an "internal" function in \R despite being exported.
}
\examples{
## ---------------------------------------------------------
## mapply()
## ---------------------------------------------------------
y0 <- mapply(rep, 1:4, 4:1)
y1 <- future_mapply(rep, 1:4, 4:1)
stopifnot(identical(y1, y0))
y0 <- mapply(rep, times = 1:4, x = 4:1)
y1 <- future_mapply(rep, times = 1:4, x = 4:1)
stopifnot(identical(y1, y0))
y0 <- mapply(rep, times = 1:4, MoreArgs = list(x = 42))
y1 <- future_mapply(rep, times = 1:4, MoreArgs = list(x = 42))
stopifnot(identical(y1, y0))
y0 <- mapply(function(x, y) seq_len(x) + y,
c(a = 1, b = 2, c = 3), # names from first
c(A = 10, B = 0, C = -10))
y1 <- future_mapply(function(x, y) seq_len(x) + y,
c(a = 1, b = 2, c = 3), # names from first
c(A = 10, B = 0, C = -10))
stopifnot(identical(y1, y0))
word <- function(C, k) paste(rep.int(C, k), collapse = "")
y0 <- mapply(word, LETTERS[1:6], 6:1, SIMPLIFY = FALSE)
y1 <- future_mapply(word, LETTERS[1:6], 6:1, SIMPLIFY = FALSE)
stopifnot(identical(y1, y0))
## ---------------------------------------------------------
## Parallel Random Number Generation
## ---------------------------------------------------------
\donttest{
## Regardless of the future plan, the number of workers, and
## where they are, the random numbers produced are identical
plan(multisession)
y1 <- future_mapply(stats::runif, n = 1:4, max = 2:5,
MoreArgs = list(min = 1), future.seed = 0xBEEF)
print(y1)
plan(sequential)
y2 <- future_mapply(stats::runif, n = 1:4, max = 2:5,
MoreArgs = list(min = 1), future.seed = 0xBEEF)
print(y2)
stopifnot(all.equal(y1, y2))
}
\dontshow{
## R CMD check: make sure any open connections are closed afterward
if (!inherits(plan(), "sequential")) plan(sequential)
}
}
\author{
The implementations of \code{future_Map()} is adopted from the source code
of the corresponding base \R function \code{Map()}, which is licensed under
GPL (>= 2) with 'The R Core Team' as the copyright holder.
}
\keyword{iteration}
\keyword{manip}
\keyword{programming}
future.apply/man/fold.Rd 0000644 0001762 0000144 00000002564 14024036060 014700 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fold.R
\name{fold}
\alias{fold}
\title{Efficient Fold, Reduce, Accumulate, Combine of a Vector}
\usage{
fold(x, f, left = TRUE, unname = TRUE, threshold = 1000L)
}
\arguments{
\item{x}{A vector.}
\item{f}{A binary function, i.e. a function take takes two arguments.}
\item{left}{If \code{TRUE}, vector is combined from the left (the first element),
otherwise the right (the last element).}
\item{unname}{If \code{TRUE}, function \code{f} is called as
\code{f(unname(y), x[[ii]])}, otherwise as \code{f(y, x[[ii]])},
which may introduce name \code{"y"}.}
\item{threshold}{An integer (>= 2) specifying the length where the
recursive divide-and-conquer call will stop and incremental building of
the partial value is performed. Using \code{threshold = +Inf} will disable
recursive folding.}
}
\value{
A vector.
}
\description{
Efficient Fold, Reduce, Accumulate, Combine of a Vector
}
\details{
In order for recursive folding to give the same results as non-recursive
folding, binary function \code{f} must be \emph{associative} with itself, i.e.
\code{f(f(x[[1]], x[[2]]), x[[3]])} equals
\code{f(x[[1]], f(x[[2]]), x[[3]])}.
This function is a more efficient (memory and speed) of
\code{\link[base:funprog]{Reduce(f, x, right = !left, accumulate = FALSE)}},
especially when \code{x} is long.
}
\keyword{internal}
future.apply/man/makeChunks.Rd 0000644 0001762 0000144 00000003530 14104216357 016047 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/makeChunks.R
\name{makeChunks}
\alias{makeChunks}
\title{Create Chunks of Index Vectors}
\usage{
makeChunks(
nbrOfElements,
nbrOfWorkers,
future.scheduling = 1,
future.chunk.size = NULL
)
}
\arguments{
\item{nbrOfElements}{(integer) Total number of elements to iterate over.}
\item{nbrOfWorkers}{(integer) Number of workers available.}
\item{future.scheduling}{(numeric) A strictly positive scalar.
Only used if argument \code{future.chunk.size} is \code{NULL}.}
\item{future.chunk.size}{(numeric) The maximum number of elements per
chunk, or \code{NULL}. If \code{NULL}, then the chunk sizes are given by the
\code{future.scheduling} argument.}
}
\value{
A list of chunks, where each chunk is an integer vector of
unique indices \code{[1, nbrOfElements]}. The union of all chunks
holds \code{nbrOfElements} elements and equals \code{1:nbrOfElements}.
If \code{nbrOfElements == 0}, then an empty list is returned.
}
\description{
\emph{This is an internal function.}
}
\section{Control processing order of elements}{
Attribute \code{ordering} of \code{future.chunk.size} or \code{future.scheduling} can
be used to control the ordering the elements are iterated over, which
only affects the processing order \emph{not} the order values are returned.
This attribute can take the following values:
\itemize{
\item index vector - an numeric vector of length \code{nbrOfElements} specifying
how elements are remapped
\item function - an function taking one argument which is called as
\code{ordering(nbrOfElements)} and which must return an
index vector of length \code{nbrOfElements}, e.g.
\code{function(n) rev(seq_len(n))} for reverse ordering.
\item \code{"random"} - this will randomize the ordering via random index
vector \code{sample.int(nbrOfElements)}.
}
}
\keyword{internal}
future.apply/man/future.apply.Rd 0000644 0001762 0000144 00000011327 14024036060 016407 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/future.apply-package.R
\docType{package}
\name{future.apply}
\alias{future.apply}
\alias{future.apply-package}
\title{future.apply: Apply Function to Elements in Parallel using Futures}
\description{
The \pkg{future.apply} packages provides parallel implementations of
common "apply" functions provided by base \R. The parallel processing
is performed via the \pkg{future} ecosystem, which provides a large
number of parallel backends, e.g. on the local machine, a remote cluster,
and a high-performance compute cluster.
}
\details{
Currently implemented functions are:
\itemize{
\item \code{\link[=future_apply]{future_apply()}}: a parallel version of \link[base:apply]{apply()}
\item \code{\link[=future_by]{future_by()}}: a parallel version of \link[base:by]{by()}
\item \code{\link[=future_eapply]{future_eapply()}}: a parallel version of \link[base:lapply]{eapply()}
\item \code{\link[=future_lapply]{future_lapply()}}: a parallel version of \link[base:lapply]{lapply()}
\item \code{\link[=future_mapply]{future_mapply()}}: a parallel version of \link[base:mapply]{mapply()}
\item \code{\link[=future_sapply]{future_sapply()}}: a parallel version of \link[base:lapply]{sapply()}
\item \code{\link[=future_tapply]{future_tapply()}}: a parallel version of \link[base:tapply]{tapply()}
\item \code{\link[=future_vapply]{future_vapply()}}: a parallel version of \link[base:lapply]{vapply()}
\item \code{\link[=future_Map]{future_Map()}}: a parallel version of \link[base:funprog]{Map()}
\item \code{\link[=future_replicate]{future_replicate()}}: a parallel version of \link[base:lapply]{replicate()}
\item \code{\link[=future_.mapply]{future_.mapply()}}: a parallel version of \link[base:base-internal]{.mapply()}
}
Reproducibility is part of the core design, which means that perfect,
parallel random number generation (RNG) is supported regardless of the
amount of chunking, type of load balancing, and future backend being used.
Since these \verb{future_*()} functions have the same arguments as the
corresponding base \R function, start using them is often as simple as
renaming the function in the code. For example, after attaching the package:\if{html}{\out{
}}\preformatted{library(future.apply)
}\if{html}{\out{
}}
code such as:\if{html}{\out{}}\preformatted{x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
y <- lapply(x, quantile, probs = 1:3/4)
}\if{html}{\out{
}}
can be updated to:\if{html}{\out{}}\preformatted{y <- future_lapply(x, quantile, probs = 1:3/4)
}\if{html}{\out{
}}
The default settings in the \pkg{future} framework is to process code
\emph{sequentially}. To run the above in parallel on the local machine
(on any operating system), use:\if{html}{\out{}}\preformatted{plan(multisession)
}\if{html}{\out{
}}
first. That's it!
To go back to sequential processing, use \code{plan(sequential)}.
If you have access to multiple machines on your local network, use:\if{html}{\out{}}\preformatted{plan(cluster, workers = c("n1", "n2", "n2", "n3"))
}\if{html}{\out{
}}
This will set up four workers, one on \code{n1} and \code{n3}, and two on \code{n2}.
If you have SSH access to some remote machines, use:\if{html}{\out{}}\preformatted{plan(cluster, workers = c("m1.myserver.org", "m2.myserver.org))
}\if{html}{\out{
}}
See the \pkg{future} package and \code{\link[future:plan]{future::plan()}} for more examples.
The \pkg{future.batchtools} package provides support for high-performance
compute (HPC) cluster schedulers such as SGE, Slurm, and TORQUE / PBS.
For example,
\itemize{
\item \code{plan(batchtools_slurm)}:
Process via a Slurm scheduler job queue.
\item \code{plan(batchtools_torque)}:
Process via a TORQUE / PBS scheduler job queue.
}
This builds on top of the queuing framework that the \pkg{batchtools}
package provides. For more details on backend configuration, please see
the \pkg{future.batchtools} and \pkg{batchtools} packages.
These are just a few examples of parallel/distributed backend for the
future ecosystem. For more alternatives, see the 'Reverse dependencies'
section on the
\href{https://cran.r-project.org/package=future}{future CRAN package page}.
}
\author{
Henrik Bengtsson, except for the implementations of \code{future_apply()},
\code{future_Map()}, \code{future_replicate()}, \code{future_sapply()}, and
\code{future_tapply()}, which are adopted from the source code of the
corresponding base \R functions, which are licensed under GPL (>= 2)
with 'The R Core Team' as the copyright holder.
Because of these dependencies, the license of this package is GPL (>= 2).
}
\keyword{iteration}
\keyword{manip}
\keyword{programming}
future.apply/man/future_lapply.Rd 0000644 0001762 0000144 00000030673 14104262315 016654 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/future_eapply.R, R/future_lapply.R,
% R/future_replicate.R, R/future_sapply.R, R/future_tapply.R,
% R/future_vapply.R
\name{future_eapply}
\alias{future_eapply}
\alias{future_lapply}
\alias{future_replicate}
\alias{future_sapply}
\alias{future_tapply}
\alias{future_vapply}
\title{Apply a Function over a List or Vector via Futures}
\usage{
future_eapply(
env,
FUN,
...,
all.names = FALSE,
USE.NAMES = TRUE,
future.envir = parent.frame(),
future.label = "future_eapply-\%d"
)
future_lapply(
X,
FUN,
...,
future.envir = parent.frame(),
future.stdout = TRUE,
future.conditions = "condition",
future.globals = TRUE,
future.packages = NULL,
future.lazy = FALSE,
future.seed = FALSE,
future.scheduling = 1,
future.chunk.size = NULL,
future.label = "future_lapply-\%d"
)
future_replicate(
n,
expr,
simplify = "array",
future.seed = TRUE,
...,
future.envir = parent.frame(),
future.label = "future_replicate-\%d"
)
future_sapply(
X,
FUN,
...,
simplify = TRUE,
USE.NAMES = TRUE,
future.envir = parent.frame(),
future.label = "future_sapply-\%d"
)
future_tapply(
X,
INDEX,
FUN = NULL,
...,
default = NA,
simplify = TRUE,
future.envir = parent.frame(),
future.label = "future_tapply-\%d"
)
future_vapply(
X,
FUN,
FUN.VALUE,
...,
USE.NAMES = TRUE,
future.envir = parent.frame(),
future.label = "future_vapply-\%d"
)
}
\arguments{
\item{env}{An \R environment.}
\item{FUN}{A function taking at least one argument.}
\item{all.names}{If \code{TRUE}, the function will also be applied to variables
that start with a period (\code{.}), otherwise not.
See \code{\link[base:eapply]{base::eapply()}} for details.}
\item{USE.NAMES}{See \code{\link[base:lapply]{base::sapply()}}.}
\item{future.envir}{An \link{environment} passed as argument \code{envir} to
\code{\link[future:future]{future::future()}} as-is.}
\item{future.label}{If a character string, then each future is assigned
a label \code{sprintf(future.label, chunk_idx)}. If TRUE, then the
same as \code{future.label = "future_lapply-\%d"}. If FALSE, no labels
are assigned.}
\item{X}{A vector-like object to iterate over.}
\item{future.stdout}{If \code{TRUE} (default), then the standard output of the
underlying futures is captured, and re-outputted as soon as possible.
If \code{FALSE}, any output is silenced (by sinking it to the null device
as it is outputted).
If \code{NA} (not recommended), output is \emph{not} intercepted.}
\item{future.conditions}{A character string of conditions classes to be
captured and relayed. The default is the same as the \code{condition}
argument of \code{\link[future:Future-class]{future::Future()}}.
To not intercept conditions, use \code{conditions = character(0L)}.
Errors are always relayed.}
\item{future.globals}{A logical, a character vector, or a named list for
controlling how globals are handled. For details, see below section.}
\item{future.packages}{(optional) a character vector specifying packages
to be attached in the R environment evaluating the future.}
\item{future.lazy}{Specifies whether the futures should be resolved
lazily or eagerly (default).}
\item{future.seed}{A logical or an integer (of length one or seven),
or a list of \code{length(X)} with pre-generated random seeds.
For details, see below section.}
\item{future.scheduling}{Average number of futures ("chunks") per worker.
If \code{0.0}, then a single future is used to process all elements
of \code{X}.
If \code{1.0} or \code{TRUE}, then one future per worker is used.
If \code{2.0}, then each worker will process two futures
(if there are enough elements in \code{X}).
If \code{Inf} or \code{FALSE}, then one future per element of
\code{X} is used.
Only used if \code{future.chunk.size} is \code{NULL}.}
\item{future.chunk.size}{The average number of elements per future ("chunk").
If \code{Inf}, then all elements are processed in a single future.
If \code{NULL}, then argument \code{future.scheduling} is used.}
\item{n}{The number of replicates.}
\item{expr}{An \R expression to evaluate repeatedly.}
\item{simplify}{See \code{\link[base:lapply]{base::sapply()}} and \code{\link[base:tapply]{base::tapply()}}, respectively.}
\item{INDEX}{A list of one or more factors, each of same length as \code{X}.
The elements are coerced to factors by \code{as.factor()}.
See also \code{\link[base:tapply]{base::tapply()}}.}
\item{default}{See \code{\link[base:tapply]{base::tapply()}}.}
\item{FUN.VALUE}{A template for the required return value from
each \code{FUN(X[ii], ...)}.
Types may be promoted to a higher type within the ordering
logical < integer < double < complex, but not demoted.
See \code{\link[base:lapply]{base::vapply()}} for details.}
\item{\ldots}{(optional) Additional arguments passed to \code{FUN()}.
For \code{future_*apply()} functions and \code{replicate()}, any \verb{future.*} arguments
part of \verb{\\ldots} are passed on to \code{future_lapply()} used internally.}
}
\value{
A named (unless \code{USE.NAMES = FALSE}) list.
See \code{\link[base:eapply]{base::eapply()}} for details.
For \code{future_lapply()}, a list with same length and names as \code{X}.
See \code{\link[base:lapply]{base::lapply()}} for details.
\code{future_replicate()} is a wrapper around \code{future_sapply()} and return
simplified object according to the \code{simplify} argument.
See \code{\link[base:lapply]{base::replicate()}} for details.
Since \code{future_replicate()} usually involves random number generation (RNG),
it uses \code{future.seed = TRUE} by default in order produce sound random
numbers regardless of future backend and number of background workers used.
For \code{future_sapply()}, a vector with same length and names as \code{X}.
See \code{\link[base:lapply]{base::sapply()}} for details.
\code{future_tapply()} returns an array with mode \code{"list"}, unless
\code{simplify = TRUE} (default) \emph{and} \code{FUN} returns a scalar, in which
case the mode of the array is the same as the returned scalars.
See \code{\link[base:tapply]{base::tapply()}} for details.
For \code{future_vapply()}, a vector with same length and names as \code{X}.
See \code{\link[base:lapply]{base::vapply()}} for details.
}
\description{
\code{future_lapply()} implements \code{\link[base:lapply]{base::lapply()}} using futures with perfect
replication of results, regardless of future backend used.
Analogously, this is true for all the other \code{future_nnn()} functions.
}
\section{Global variables}{
Argument \code{future.globals} may be used to control how globals
should be handled similarly how the \code{globals} argument is used with
\code{future()}.
Since all function calls use the same set of globals, this function can do
any gathering of globals upfront (once), which is more efficient than if
it would be done for each future independently.
If \code{TRUE}, \code{NULL} or not is specified (default), then globals
are automatically identified and gathered.
If a character vector of names is specified, then those globals are gathered.
If a named list, then those globals are used as is.
In all cases, \code{FUN} and any \verb{\\ldots} arguments are automatically
passed as globals to each future created as they are always needed.
}
\section{Reproducible random number generation (RNG)}{
Unless \code{future.seed = FALSE}, this function guarantees to generate
the exact same sequence of random numbers \emph{given the same initial
seed / RNG state} - this regardless of type of futures, scheduling
("chunking") strategy, and number of workers.
RNG reproducibility is achieved by pregenerating the random seeds for all
iterations (over \code{X}) by using L'Ecuyer-CMRG RNG streams. In each
iteration, these seeds are set before calling \code{FUN(X[[ii]], ...)}.
\emph{Note, for large \code{length(X)} this may introduce a large overhead.}
As input (\code{future.seed}), a fixed seed (integer) may be given, either
as a full L'Ecuyer-CMRG RNG seed (vector of 1+6 integers) or as a seed
generating such a full L'Ecuyer-CMRG seed.
If \code{future.seed = TRUE}, then \code{\link[base:Random]{.Random.seed}}
is returned if it holds a L'Ecuyer-CMRG RNG seed, otherwise one is created
randomly.
If \code{future.seed = NA}, a L'Ecuyer-CMRG RNG seed is randomly created.
If none of the function calls \code{FUN(X[[ii]], ...)} uses random number
generation, then \code{future.seed = FALSE} may be used.
In addition to the above, it is possible to specify a pre-generated
sequence of RNG seeds as a list such that
\code{length(future.seed) == length(X)} and where each element is an
integer seed vector that can be assigned to
\code{\link[base:Random]{.Random.seed}}. One approach to generate a
set of valid RNG seeds based on fixed initial seed (here \code{42L}) is:\if{html}{\out{}}\preformatted{seeds <- future_lapply(seq_along(X), FUN = function(x) .Random.seed,
future.chunk.size = Inf, future.seed = 42L)
}\if{html}{\out{
}}
\strong{Note that \code{as.list(seq_along(X))} is \emph{not} a valid set of such
\code{.Random.seed} values.}
In all cases but \code{future.seed = FALSE}, the RNG state of the calling
R processes after this function returns is guaranteed to be
"forwarded one step" from the RNG state that was before the call and
in the same way regardless of \code{future.seed}, \code{future.scheduling}
and future strategy used. This is done in order to guarantee that an \R
script calling \code{future_lapply()} multiple times should be numerically
reproducible given the same initial seed.
}
\section{Control processing order of elements}{
Attribute \code{ordering} of \code{future.chunk.size} or \code{future.scheduling} can
be used to control the ordering the elements are iterated over, which
only affects the processing order and \emph{not} the order values are returned.
This attribute can take the following values:
\itemize{
\item index vector - an numeric vector of length \code{length(X)}
\item function - an function taking one argument which is called as
\code{ordering(length(X))} and which must return an
index vector of length \code{length(X)}, e.g.
\code{function(n) rev(seq_len(n))} for reverse ordering.
\item \code{"random"} - this will randomize the ordering via random index
vector \code{sample.int(length(X))}.
For example, \code{future.scheduling = structure(TRUE, ordering = "random")}.
\emph{Note}, when elements are processed out of order, then captured standard
output and conditions are also relayed in that order, that is out of order.
}
}
\examples{
## ---------------------------------------------------------
## lapply(), sapply(), tapply()
## ---------------------------------------------------------
x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE, FALSE, FALSE, TRUE))
y0 <- lapply(x, FUN = quantile, probs = 1:3/4)
y1 <- future_lapply(x, FUN = quantile, probs = 1:3/4)
print(y1)
stopifnot(all.equal(y1, y0))
y0 <- sapply(x, FUN = quantile)
y1 <- future_sapply(x, FUN = quantile)
print(y1)
stopifnot(all.equal(y1, y0))
y0 <- vapply(x, FUN = quantile, FUN.VALUE = double(5L))
y1 <- future_vapply(x, FUN = quantile, FUN.VALUE = double(5L))
print(y1)
stopifnot(all.equal(y1, y0))
## ---------------------------------------------------------
## Parallel Random Number Generation
## ---------------------------------------------------------
\donttest{
## Regardless of the future plan, the number of workers, and
## where they are, the random numbers produced are identical
plan(multisession)
y1 <- future_lapply(1:5, FUN = rnorm, future.seed = 0xBEEF)
str(y1)
plan(sequential)
y2 <- future_lapply(1:5, FUN = rnorm, future.seed = 0xBEEF)
str(y2)
stopifnot(all.equal(y1, y2))
}
## ---------------------------------------------------------
## Process chunks of data.frame rows in parallel
## ---------------------------------------------------------
iris <- datasets::iris
chunks <- split(iris, seq(1, nrow(iris), length.out = 3L))
y0 <- lapply(chunks, FUN = function(iris) sum(iris$Sepal.Length))
y0 <- do.call(sum, y0)
y1 <- future_lapply(chunks, FUN = function(iris) sum(iris$Sepal.Length))
y1 <- do.call(sum, y1)
print(y1)
stopifnot(all.equal(y1, y0))
\dontshow{
## R CMD check: make sure any open connections are closed afterward
if (!inherits(plan(), "sequential")) plan(sequential)
}
}
\author{
The implementations of \code{future_replicate()}, \code{future_sapply()}, and
\code{future_tapply()} are adopted from the source code of the corresponding
base \R functions, which are licensed under GPL (>= 2) with
'The R Core Team' as the copyright holder.
}
\keyword{iteration}
\keyword{manip}
\keyword{programming}
future.apply/man/future_apply.Rd 0000644 0001762 0000144 00000012256 14104262315 016475 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/future_apply.R
\name{future_apply}
\alias{future_apply}
\title{Apply Functions Over Array Margins via Futures}
\usage{
future_apply(
X,
MARGIN,
FUN,
...,
simplify = TRUE,
future.envir = parent.frame(),
future.stdout = TRUE,
future.conditions = "condition",
future.globals = TRUE,
future.packages = NULL,
future.lazy = FALSE,
future.seed = FALSE,
future.scheduling = 1,
future.chunk.size = NULL,
future.label = "future_apply-\%d"
)
}
\arguments{
\item{X}{an array, including a matrix.}
\item{MARGIN}{A vector giving the subscripts which the function will be
applied over. For example, for a matrix \code{1} indicates rows, \code{2} indicates
columns, \code{c(1, 2)} indicates rows and columns.
Where \code{X} has named dimnames, it can be a character vector selecting
dimension names.}
\item{FUN}{A function taking at least one argument.}
\item{simplify}{a logical indicating whether results should be simplified
if possible.}
\item{future.envir}{An \link{environment} passed as argument \code{envir} to
\code{\link[future:future]{future::future()}} as-is.}
\item{future.stdout}{If \code{TRUE} (default), then the standard output of the
underlying futures is captured, and re-outputted as soon as possible.
If \code{FALSE}, any output is silenced (by sinking it to the null device
as it is outputted).
If \code{NA} (not recommended), output is \emph{not} intercepted.}
\item{future.conditions}{A character string of conditions classes to be
captured and relayed. The default is the same as the \code{condition}
argument of \code{\link[future:Future-class]{future::Future()}}.
To not intercept conditions, use \code{conditions = character(0L)}.
Errors are always relayed.}
\item{future.globals}{A logical, a character vector, or a named list for
controlling how globals are handled. For details, see below section.}
\item{future.packages}{(optional) a character vector specifying packages
to be attached in the R environment evaluating the future.}
\item{future.lazy}{Specifies whether the futures should be resolved
lazily or eagerly (default).}
\item{future.seed}{A logical or an integer (of length one or seven),
or a list of \code{length(X)} with pre-generated random seeds.
For details, see below section.}
\item{future.scheduling}{Average number of futures ("chunks") per worker.
If \code{0.0}, then a single future is used to process all elements
of \code{X}.
If \code{1.0} or \code{TRUE}, then one future per worker is used.
If \code{2.0}, then each worker will process two futures
(if there are enough elements in \code{X}).
If \code{Inf} or \code{FALSE}, then one future per element of
\code{X} is used.
Only used if \code{future.chunk.size} is \code{NULL}.}
\item{future.chunk.size}{The average number of elements per future ("chunk").
If \code{Inf}, then all elements are processed in a single future.
If \code{NULL}, then argument \code{future.scheduling} is used.}
\item{future.label}{If a character string, then each future is assigned
a label \code{sprintf(future.label, chunk_idx)}. If TRUE, then the
same as \code{future.label = "future_lapply-\%d"}. If FALSE, no labels
are assigned.}
\item{\ldots}{(optional) Additional arguments passed to \code{FUN()}, except
\verb{future.*} arguments, which are passed on to \code{\link[=future_lapply]{future_lapply()}} used
internally.}
}
\value{
Returns a vector or array or list of values obtained by applying a
function to margins of an array or matrix.
See \code{\link[base:apply]{base::apply()}} for details.
}
\description{
\code{future_apply()} implements \code{\link[base:apply]{base::apply()}} using future with perfect
replication of results, regardless of future backend used.
It returns a vector or array or list of values obtained by applying a
function to margins of an array or matrix.
}
\examples{
## ---------------------------------------------------------
## apply()
## ---------------------------------------------------------
X <- matrix(c(1:4, 1, 6:8), nrow = 2L)
Y0 <- apply(X, MARGIN = 1L, FUN = table)
Y1 <- future_apply(X, MARGIN = 1L, FUN = table)
print(Y1)
stopifnot(all.equal(Y1, Y0, check.attributes = FALSE)) ## FIXME
Y0 <- apply(X, MARGIN = 1L, FUN = stats::quantile)
Y1 <- future_apply(X, MARGIN = 1L, FUN = stats::quantile)
print(Y1)
stopifnot(all.equal(Y1, Y0))
## ---------------------------------------------------------
## Parallel Random Number Generation
## ---------------------------------------------------------
\donttest{
## Regardless of the future plan, the number of workers, and
## where they are, the random numbers produced are identical
X <- matrix(c(1:4, 1, 6:8), nrow = 2L)
plan(multisession)
Y1 <- future_apply(X, MARGIN = 1L, FUN = sample, future.seed = 0xBEEF)
print(Y1)
plan(sequential)
Y2 <- future_apply(X, MARGIN = 1L, FUN = sample, future.seed = 0xBEEF)
print(Y2)
stopifnot(all.equal(Y1, Y2))
}
\dontshow{
## R CMD check: make sure any open connections are closed afterward
if (!inherits(plan(), "sequential")) plan(sequential)
}
}
\author{
The implementations of \code{future_apply()} is adopted from the source code
of the corresponding base \R function, which is licensed under GPL (>= 2)
with 'The R Core Team' as the copyright holder.
}
future.apply/man/make_rng_seeds.Rd 0000644 0001762 0000144 00000001526 14104217517 016726 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rng.R
\name{make_rng_seeds}
\alias{make_rng_seeds}
\title{Produce Reproducible Seeds for Parallel Random Number Generation}
\usage{
make_rng_seeds(count, seed = FALSE, debug = NA)
}
\arguments{
\item{count}{The number of RNG seeds to produce.}
\item{seed}{A logical specifying whether RNG seeds should be generated
or not. (\code{seed = NULL} corresponds to \code{seed = FALSE}).
If a list, then it should be of length \code{count} and each element should
consist of a valid RNG seed.}
\item{debug}{If \code{TRUE}, debug output is produced, otherwise not.}
}
\value{
Returns a non-named list of length \code{count}, or \code{NULL}.
Any seed returned is a valid RNG seed.
}
\description{
Produce Reproducible Seeds for Parallel Random Number Generation
}
\keyword{internal}
future.apply/man/future_by.Rd 0000644 0001762 0000144 00000005333 14104262741 015763 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/future_by.R
\name{future_by}
\alias{future_by}
\title{Apply a Function to a Data Frame Split by Factors via Futures}
\usage{
future_by(
data,
INDICES,
FUN,
...,
simplify = TRUE,
future.envir = parent.frame()
)
}
\arguments{
\item{data}{An \R object, normally a data frame, possibly a matrix.}
\item{INDICES}{A factor or a list of factors, each of length \code{nrow(data)}.}
\item{FUN}{a function to be applied to (usually data-frame) subsets of \code{data}.}
\item{simplify}{logical: see \link[base:tapply]{base::tapply}.}
\item{future.envir}{An \link{environment} passed as argument \code{envir} to
\code{\link[future:future]{future::future()}} as-is.}
\item{\ldots}{Additional arguments pass to \code{\link[=future_lapply]{future_lapply()}} and
then to \code{FUN()}.}
}
\value{
An object of class "by", giving the results for each subset.
This is always a list if simplify is false, otherwise a list
or array (see \link[base:tapply]{base::tapply}).
See also \code{\link[base:by]{base::by()}} for details.
}
\description{
Apply a Function to a Data Frame Split by Factors via Futures
}
\details{
Internally, \code{data} is grouped by \code{INDICES} into a list of \code{data}
subset elements which is then processed by \code{\link[=future_lapply]{future_lapply()}}.
When the groups differ significantly in size, the processing time
may differ significantly between the groups.
To correct for processing-time imbalances, adjust the amount of chunking
via arguments \code{future.scheduling} and \code{future.chunk.size}.
}
\section{Note on 'stringsAsFactors'}{
The \code{future_by()} is modeled as closely as possible to the
behavior of \code{base::by()}. Both functions have "default" S3 methods that
calls \code{data <- as.data.frame(data)} internally. This call may in turn call
an S3 method for \code{as.data.frame()} that coerces strings to factors or not
depending on whether it has a \code{stringsAsFactors} argument and what its
default is.
For example, the S3 method of \code{as.data.frame()} for lists changed its
(effective) default from \code{stringsAsFactors = TRUE} to
\code{stringsAsFactors = TRUE} in R 4.0.0.
}
\examples{
## ---------------------------------------------------------
## by()
## ---------------------------------------------------------
library(datasets) ## warpbreaks
library(stats) ## lm()
y0 <- by(warpbreaks, warpbreaks[,"tension"],
function(x) lm(breaks ~ wool, data = x))
plan(multisession)
y1 <- future_by(warpbreaks, warpbreaks[,"tension"],
function(x) lm(breaks ~ wool, data = x))
plan(sequential)
y2 <- future_by(warpbreaks, warpbreaks[,"tension"],
function(x) lm(breaks ~ wool, data = x))
}
future.apply/man/future.apply.options.Rd 0000644 0001762 0000144 00000003107 14104217116 020100 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/options.R
\name{future.apply.options}
\alias{future.apply.options}
\alias{future.apply.debug}
\alias{R_FUTURE_APPLY_DEBUG}
\title{Options used for future.apply}
\description{
Below are the \R options and environment variables that are used by the
\pkg{future.apply} package and packages enhancing it.\cr
\cr
\emph{WARNING: Note that the names and the default values of these options may
change in future versions of the package. Please use with care until
further notice.}
}
\details{
For settings specific to the \pkg{future} package, see
\link[future:future.options]{future::future.options} page.
}
\section{Options for debugging future.apply}{
\describe{
\item{\option{future.apply.debug}:}{(logical) If \code{TRUE}, extensive debug messages are generated. (Default: \code{FALSE})}
}
}
\section{Environment variables that set R options}{
All of the above \R \option{future.apply.*} options can be set by
corresponding environment variable \env{R_FUTURE_APPLY_*} \emph{when the
\pkg{future.apply} package is loaded}.
For example, if \code{R_FUTURE_APPLY_DEBUG = "TRUE"}, then option
\option{future.apply.debug} is set to \code{TRUE} (logical).
}
\examples{
options(future.apply.debug = TRUE)
}
\seealso{
To set \R options or environment variables when \R starts (even before the \pkg{future} package is loaded), see the \link[base]{Startup} help page. The \href{https://cran.r-project.org/package=startup}{\pkg{startup}} package provides a friendly mechanism for configurating \R's startup process.
}
\keyword{internal}
future.apply/DESCRIPTION 0000644 0001762 0000144 00000002577 14104474122 014430 0 ustar ligges users Package: future.apply
Version: 1.8.1
Title: Apply Function to Elements in Parallel using Futures
Depends: R (>= 3.2.0), future (>= 1.21.0)
Imports: globals (>= 0.14.0), parallel, utils
Suggests: datasets, stats, tools, listenv (>= 0.8.0), R.rsp, markdown
VignetteBuilder: R.rsp
Authors@R: c(person("Henrik", "Bengtsson", role=c("aut", "cre", "cph"),
email = "henrikb@braju.com"),
person("R Core Team", role = c("cph", "ctb")))
Description: Implementations of apply(), by(), eapply(), lapply(), Map(), .mapply(), mapply(), replicate(), sapply(), tapply(), and vapply() that can be resolved using any future-supported backend, e.g. parallel on the local machine or distributed on a compute cluster. These future_*apply() functions come with the same pros and cons as the corresponding base-R *apply() functions but with the additional feature of being able to be processed via the future framework.
License: GPL (>= 2)
LazyLoad: TRUE
URL: https://future.apply.futureverse.org,
https://github.com/HenrikBengtsson/future.apply
BugReports: https://github.com/HenrikBengtsson/future.apply/issues
RoxygenNote: 7.1.1
NeedsCompilation: no
Packaged: 2021-08-10 12:15:45 UTC; hb
Author: Henrik Bengtsson [aut, cre, cph],
R Core Team [cph, ctb]
Maintainer: Henrik Bengtsson
Repository: CRAN
Date/Publication: 2021-08-10 13:00:02 UTC
future.apply/build/ 0000755 0001762 0000144 00000000000 14104466760 014017 5 ustar ligges users future.apply/build/vignette.rds 0000644 0001762 0000144 00000000473 14104466760 016362 0 ustar ligges users uQN0t4RWVdaOkؑ
_NqQGXKr~wyBF$
L
k87%6DdjZ*.AxL