statip/0000755000176200001440000000000013564337062011567 5ustar liggesusersstatip/NAMESPACE0000644000176200001440000000316113564334703013006 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(as.stepfun,picor) S3method(kernelfun,"function") S3method(kernelfun,character) S3method(knots,picor) S3method(mfv,default) S3method(mfv,tableNA) S3method(plot,loess) S3method(plot,picor) S3method(predict,default) S3method(predict,kmeans) S3method(predict,picor) S3method(print,picor) export(.kernelsList) export(as.stepfun) export(bandwidth) export(cv) export(dbern) export(densityfun) export(distr2name) export(erf) export(find_breaks) export(hellinger) export(histo) export(kernel_properties) export(kernelfun) export(knots) export(lagk) export(mfv) export(mfv1) export(midhinge) export(midrange) export(na.pass) export(name2distr) export(pbern) export(picor) export(plot) export(predict) export(qbern) export(rbern) export(tableNA) export(trimean) importFrom(clue,cl_predict) importFrom(graphics,lines) importFrom(graphics,plot) importFrom(rpart,rpart) importFrom(stats,IQR) importFrom(stats,approxfun) importFrom(stats,as.stepfun) importFrom(stats,bw.SJ) importFrom(stats,bw.bcv) importFrom(stats,bw.nrd) importFrom(stats,bw.nrd0) importFrom(stats,bw.ucv) importFrom(stats,fft) importFrom(stats,integrate) importFrom(stats,isoreg) importFrom(stats,knots) importFrom(stats,mad) importFrom(stats,median) importFrom(stats,model.frame) importFrom(stats,na.pass) importFrom(stats,pbinom) importFrom(stats,pnorm) importFrom(stats,predict) importFrom(stats,qbinom) importFrom(stats,quantile) importFrom(stats,sd) importFrom(stats,stepfun) importFrom(stats,var) useDynLib(statip, .registration=TRUE) useDynLib(statip,BinDist) statip/README.md0000644000176200001440000000170513352003255013036 0ustar liggesusers# statip: miscellaneous statistical functions [![Travis-CI Build Status](https://travis-ci.org/paulponcet/statip.svg?branch=master)](https://travis-ci.org/paulponcet/statip) [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/statip)](https://cran.r-project.org/package=statip) [![](https://cranlogs.r-pkg.org/badges/statip)](https://cran.r-project.org/package=statip) A collection of miscellaneous statistical functions for probability distributions: dbern(), pbern(), qbern(), rbern() for the Bernoulli distribution, and distr2name(), name2distr() for distribution names; probability density estimation: densityfun(); most frequent value estimation: mfv(), mfv1(); calculation of the Hellinger distance: hellinger(); use of classical kernels: kernelfun(), kernel_properties(). ## Installation You can install 'statip' from GitHub with: ```R # install.packages("devtools") devtools::install_github("paulponcet/statip") ``` statip/man/0000755000176200001440000000000013564326731012343 5ustar liggesusersstatip/man/tableNA.Rd0000644000176200001440000000134113564331032014125 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tableNA.R \name{tableNA} \alias{tableNA} \title{Alternative Table Creation} \usage{ tableNA(x) } \arguments{ \item{x}{numeric. An atomic vector or a factor.} } \value{ An object of class \code{"tableNA"}, which is the result of \code{\link{tabulate}()} with three attributes: \itemize{ \item \code{type_of_x}: the result of \code{typeof(x)}; \item \code{is_factor_x}: the result of \code{is.factor(x)}; \item \code{levels}: the result of \code{levels(x)}. } The number of missing values is always reported. } \description{ Count the occurrences of each factor level or value in a vector. } \examples{ tableNA(c(1,2,2,1,3)) tableNA(c(1,2,2,1,3, NA)) } statip/man/find_breaks.Rd0000644000176200001440000000220313564327243015075 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/find_breaks.R \name{find_breaks} \alias{find_breaks} \title{Breakpoints to be passed to a Histogram} \usage{ find_breaks(x, nbins = "Scott", h, x0 = -h/1000) } \arguments{ \item{x}{numeric. A vector.} \item{nbins}{integer or character. The suggested number of bins. Either a positive integer, or a character string naming a rule: \code{"Scott"} (the default) or \code{"Freedman-Diaconis"} or \code{"FD"}. (Case is ignored.)} \item{h}{numeric. The bin width, a strictly positive number (takes precedence over nbins).} \item{x0}{numeric. Shift for the bins - the breaks are at \code{x0 + h * (..., -1, 0, 1, ...)}.} } \value{ A numeric vector. } \description{ The function \code{find_breaks()} isolates a piece of code of the function \code{\link[MASS]{truehist}()} from package \pkg{MASS} that is used to compute the set of breakpoints to be applied for the construction of the histogram. } \seealso{ \code{\link[statip]{histo}()} in this package; \code{\link[MASS]{truehist}()} from package \pkg{MASS}; \code{\link[graphics]{hist}()} from package \pkg{graphics}. } statip/man/kernelfun.Rd0000644000176200001440000000207313564303513014616 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/kernel_properties.R, R/kernelfun.R, % R/kernelsList.R \name{kernel_properties} \alias{kernel_properties} \alias{kernelfun} \alias{kernelfun.function} \alias{kernelfun.character} \alias{.kernelsList} \title{Smoothing kernels} \usage{ kernel_properties(name, derivative = FALSE) kernelfun(name, ...) \method{kernelfun}{`function`}(name, ...) \method{kernelfun}{character}(name, derivative = FALSE, ...) .kernelsList() } \arguments{ \item{name}{character. The name of the kernel to be used. Authorized kernels are listed in \code{\link{.kernelsList}()}.} \item{derivative}{logical. If \code{TRUE}, the derivative of the kernel is returned.} \item{...}{Additional arguments to be passed to the kernel function.} } \value{ A function. } \description{ The generic function \code{kernelfun} creates a smoothing kernel function. } \examples{ kernel_properties("gaussian") k <- kernelfun("epanechnikov") curve(k(x), xlim = c(-1, 1)) } \seealso{ \code{\link[stats]{density}} in package \pkg{stats}. } statip/man/mfv.Rd0000644000176200001440000000457713564326731013437 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mfv.R \name{mfv} \alias{mfv} \alias{mfv.default} \alias{mfv.tableNA} \alias{mfv1} \title{Most frequent value(s)} \usage{ mfv(x, ...) \method{mfv}{default}(x, na_rm = FALSE, ...) \method{mfv}{tableNA}(x, na_rm = FALSE, ...) mfv1(x, na_rm = FALSE, ...) } \arguments{ \item{x}{Vector of observations (of type numeric, integer, character, factor, or logical). \code{x} is to come from a discrete distribution.} \item{...}{Additional arguments (currently not used).} \item{na_rm}{logical. If \code{TRUE}, missing values do not interfer with the result, see 'Details'.} } \value{ The function \code{mfv} returns a vector of the same type as \code{x}. One should be aware that this vector can be of length \code{> 1}, in case of multiple modes. \code{mfv1} always returns a vector of length \code{1} (the first of the modes found). } \description{ The function \code{mfv()} returns the most frequent value(s) (or mode(s)) found in a vector. The function \code{mfv1} returns the first of these values, so that \code{mfv1(x)} is identical to \code{mfv(x)[[1L]]}. } \details{ See David Smith' blog post \href{http://blog.revolutionanalytics.com/2016/07/understanding-na-in-r.html}{here} to understand the philosophy followed in the code of \code{mfv} for missing values treatment. } \note{ \code{mfv()} calls the function \code{\link[base]{tabulate}}. } \examples{ # Basic examples: mfv(integer(0)) # NaN mfv(c(3, 3, 3, 2, 4)) # 3 mfv(c(TRUE, FALSE, TRUE)) # TRUE mfv(c("a", "a", "b", "a", "d")) # "a" mfv(c("a", "a", "b", "b", "d")) # c("a", "b") mfv1(c("a", "a", "b", "b", "d")) # "a" # With missing values: mfv(c(3, 3, 3, 2, NA)) # 3 mfv(c(3, 3, 2, NA)) # NA mfv(c(3, 3, 2, NA), na_rm = TRUE) # 3 mfv(c(3, 3, 2, 2, NA)) # NA mfv(c(3, 3, 2, 2, NA), na_rm = TRUE) # c(2, 3) mfv1(c(3, 3, 2, 2, NA), na_rm = TRUE)# 2 # With only missing values: mfv(c(NA, NA)) # NA mfv(c(NA, NA), na_rm = TRUE) # NaN # With factors mfv(factor(c("a", "b", "a"))) mfv(factor(c("a", "b", "a", NA))) mfv(factor(c("a", "b", "a", NA)), na_rm = TRUE) } \references{ \itemize{ \item Dutta S. and Goswami A. (2010). Mode estimation for discrete distributions. \emph{Mathematical Methods of Statistics}, \bold{19}(4):374--384. } } statip/man/lagk.Rd0000644000176200001440000000225313564303513013543 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lagk.R \name{lagk} \alias{lagk} \title{Lag a vector} \usage{ lagk(x, k, na = FALSE, cst = FALSE) } \arguments{ \item{x}{A vector.} \item{k}{integer. The number of lags. If \code{k < 0}, la serie est avancee au lieu d'etre retardee.} \item{na}{logical. If \code{na = TRUE} and \code{k > 0} (resp. \code{k < 0}), the \code{|k|} holes created in the lagged vector are put to \code{NA}; otherwise, the imputation depends on \code{cst}.} \item{cst}{logical. If \code{na = FALSE} and \code{cst = TRUE}, the \code{|k|} holes created in the lagged vector are put to \code{x[[1L]]} (or to \code{x[[length(x)]]} if \code{k < 0}). If \code{na = FALSE} and \code{cst = FALSE}, these \code{|k|} holes are imputed by the \code{k} first values of \code{x} (or the \code{k} last values if \code{k < 0}).} } \value{ A vector of the same type and length as \code{x}. } \description{ This function computes a lagged vector, shifting it back or forward. } \examples{ v <- sample(1:10) print(v) lagk(v, 1) lagk(v, 1, na = TRUE) lagk(v, -2) lagk(v, -3, na = TRUE) lagk(v, -3, na = FALSE, cst = TRUE) lagk(v, -3, na = FALSE) } statip/man/midhinge.Rd0000644000176200001440000000116613564303434014415 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/midhinge.R \name{midhinge} \alias{midhinge} \title{Midhinge} \usage{ midhinge(x, na_rm = FALSE, ...) } \arguments{ \item{x}{numeric. A numeric vector.} \item{na_rm}{logical. Should missing values be removed before computing the midhinge?} \item{...}{Additional arguments to be passed to \code{\link[stats]{quantile}()}.} } \value{ A numeric value, the midhinge. } \description{ Compute the midhinge of a numeric vector \code{x}, defined as the average of the first and third quartiles. } \references{ \url{https://en.wikipedia.org/wiki/Midhinge}. } statip/man/picor.Rd0000644000176200001440000000366413564303513013750 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/picor.R \name{picor} \alias{picor} \alias{knots.picor} \alias{predict.picor} \alias{plot.picor} \alias{print.picor} \title{Piecewise-constant regression} \usage{ picor(formula, data, method, min_length = 0, ...) \method{knots}{picor}(Fn, ...) \method{predict}{picor}(object, newdata, ...) \method{plot}{picor}(x, ...) \method{print}{picor}(x, ...) } \arguments{ \item{formula}{formula of the model to be fitted.} \item{data}{optional data frame.} \item{method}{character. If \code{method = "isotonic"}, then isotonic regression is applied with the \code{\link[stats]{isoreg}} from package \pkg{stats}. Otherwise, \code{\link[rpart]{rpart}} is used, with the corresponding \code{method} argument.} \item{min_length}{integer. The minimal distance between two consecutive knots.} \item{...}{Additional arguments to be passed to \code{\link[rpart]{rpart}}.} \item{object, x, Fn}{An object of class \code{"picor"}.} \item{newdata}{data.frame to be passed to the \code{predict} method.} } \value{ An object of class \code{"picor"}, which is a list composed of the following elements: \itemize{ \item formula: the formula passed as an argument; \item x: the numeric vector of predictors; \item y: the numeric vector of responses; \item knots: a numeric vector (possibly of length 0), the knots found; \item values: a numeric vector (of length \code{length(knots)+1}), the constant values taken by the regression function between the knots. } } \description{ \code{picor} looks for a piecewise-constant function as a regression function. The regression is necessarily univariate. This is essentially a wrapper for \code{\link[rpart]{rpart}} (regression tree) and \code{\link[stats]{isoreg}}. } \examples{ \dontrun{ s <- stats::stepfun(c(-1,0,1), c(1., 2., 4., 3.)) x <- stats::rnorm(1000) y <- s(x) p <- picor(y ~ x, data.frame(x = x, y = y)) print(p) plot(p) } } statip/man/plot.loess.Rd0000644000176200001440000000100113564303513014715 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.loess.R \name{plot.loess} \alias{plot.loess} \title{Basic plot of a loess object} \usage{ \method{plot}{loess}(x, ...) } \arguments{ \item{x}{An object of class \code{"loess"}.} \item{...}{Additional graphical arguments.} } \description{ Plots a loess object adjusted on one unique explanatory variable. } \examples{ reg <- loess(dist ~ speed, cars) plot(reg) } \seealso{ \code{\link[stats]{loess}} from package \pkg{stats}. } statip/man/histo.Rd0000644000176200001440000000211313564331032013743 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/histo.R \name{histo} \alias{histo} \title{Alternative Histograms} \usage{ histo(x, breaks, ...) } \arguments{ \item{x}{numeric. A vector.} \item{breaks}{numeric. A vector of breakpoints to build the histogram, possibly given by \code{\link[statip]{find_breaks}()}.} \item{...}{Additional parameters (currently not used).} } \value{ An object of class \code{"histogram"}, which can be plotted by \code{\link[graphics]{plot.histogram}} from package \pkg{graphics}. This object is a list with components: \itemize{ \item \code{breaks}: the \code{n+1} cell boundaries; \item \code{counts}: \code{n} integers giving the number of \code{x} inside each cell; \item \code{xname}: a string with the actual \code{x} argument name. } } \description{ A simplified version of \code{\link[graphics]{hist}()} from package \pkg{graphics}. } \seealso{ \code{\link[statip]{find_breaks}()} in this package; \code{\link[MASS]{truehist}()} from package \pkg{MASS}; \code{\link[graphics]{hist}()} from package \pkg{graphics}. } statip/man/reexports.Rd0000644000176200001440000000127513564303513014663 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/as.stepfun.R, R/picor.R, R/plot.loess.R, % R/predict.default.R, R/predict.kmeans.R \docType{import} \name{reexports} \alias{reexports} \alias{as.stepfun} \alias{knots} \alias{predict} \alias{plot} \alias{na.pass} \title{Objects exported from other packages} \keyword{internal} \description{ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ \item{graphics}{\code{\link[graphics]{plot}}} \item{stats}{\code{\link[stats]{as.stepfun}}, \code{\link[stats]{knots}}, \code{\link[stats]{na.pass}}, \code{\link[stats]{predict}}, \code{\link[stats]{predict}}} }} statip/man/bandwidth.Rd0000644000176200001440000000077013564303513014573 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bandwidth.R \name{bandwidth} \alias{bandwidth} \title{Bandwidth calculation} \usage{ bandwidth(x, rule) } \arguments{ \item{x}{numeric. The data from which the estimate is to be computed.} \item{rule}{character. A rule to choose the bandwidth. See \code{\link[stats]{bw.nrd}}.} } \value{ A numeric value. } \description{ \code{bandwidth} computes the bandwidth to be used in the \code{\link[statip]{densityfun}} function. } statip/man/predict.default.Rd0000644000176200001440000000144113564303513015700 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict.default.R \name{predict.default} \alias{predict.default} \title{Default model predictions} \usage{ \method{predict}{default}(object, newdata, ...) } \arguments{ \item{object}{A model object, possibly empty.} \item{newdata}{An optional data frame in which to look for variables with which to predict. If omitted, the fitted values are used.} \item{...}{Additional arguments.} } \value{ A vector of predictions. } \description{ Default method of the \code{\link[stats]{predict}} generic function, which can be used when the model object is empty. } \examples{ stats::predict(NULL) stats::predict(NULL, newdata = data.frame(x = 1:2, y = 2:3)) } \seealso{ \code{\link[stats]{predict}} from package \pkg{stats}. } statip/man/distr2name.Rd0000644000176200001440000000160013564311213014664 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distr2name.R, R/name2distr.R \name{distr2name} \alias{distr2name} \alias{name2distr} \title{Conversion between abbreviated distribution names and proper names} \usage{ distr2name(x) name2distr(x) } \arguments{ \item{x}{character. A vector of abbreviated distribution names or proper distribution names.} } \value{ A character vector of the same length as \code{x}. Elements of \code{x} that are not recognized are kept unchanged (yet in lowercase). } \description{ The function \code{distr2name()} converts abbreviated distribution names to proper distribution names (e.g. \code{"norm"} becomes \code{"Gaussian"}). The function \code{name2distr()} does the reciprocal operation. } \examples{ distr2name(c("norm", "dnorm", "rhyper", "ppois")) name2distr(c("Cauchy", "Gaussian", "Generalized Extreme Value")) } statip/man/densityfun.Rd0000644000176200001440000000530413564303513015015 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/densityfun.R \name{densityfun} \alias{densityfun} \title{Kernel density estimation} \usage{ densityfun( x, bw = "nrd0", adjust = 1, kernel = "gaussian", weights = NULL, window = kernel, width, n = 512, from, to, cut = 3, na.rm = FALSE, ... ) } \arguments{ \item{x}{numeric. The data from which the estimate is to be computed.} \item{bw}{numeric. The smoothing bandwidth to be used. See the eponymous argument of \code{\link[stats]{density}}.} \item{adjust}{numeric. The bandwidth used is actually \code{adjust*bw}. This makes it easy to specify values like 'half the default' bandwidth.} \item{kernel, window}{character. A string giving the smoothing kernel to be used. Authorized kernels are listed in \code{\link[statip]{.kernelsList}()}. See also the eponymous argument of \code{\link[stats]{density}}.} \item{weights}{numeric. A vector of non-negative observation weights, hence of same length as \code{x}. See the eponymous argument of \code{\link[stats]{density}}.} \item{width}{this exists for compatibility with S; if given, and \code{bw} is not, will set \code{bw} to \code{width} if this is a character string, or to a kernel-dependent multiple of \code{width} if this is numeric.} \item{n}{The number of equally spaced points at which the density is to be estimated. See the eponymous argument of \code{\link[stats]{density}}.} \item{from, to}{The left and right-most points of the grid at which the density is to be estimated; the defaults are \code{cut * bw} outside of \code{range(x)}.} \item{cut}{By default, the values of \code{from} and \code{to} are cut bandwidths beyond the extremes of the data. This allows the estimated density to drop to approximately zero at the extremes.} \item{na.rm}{logical. If \code{TRUE}, missing values are removed from \code{x}. If \code{FALSE} any missing values cause an error.} \item{...}{Additional arguments for (non-default) methods.} } \value{ A function that can be called to generate a density. } \description{ Return a function performing kernel density estimation. The difference between \code{\link[stats]{density}} and \code{densityfun} is similar to that between \code{\link[stats]{approx}} and \code{\link[stats]{approxfun}}. } \examples{ x <- rlnorm(1000, 1, 1) f <- densityfun(x, from = 0) curve(f(x), xlim = c(0, 20)) } \seealso{ \code{\link[stats]{density}} and \code{\link[stats]{approxfun}} from package \pkg{stats}. } \author{ Adapted from the \code{\link[stats]{density}} function of package \pkg{stats}. The C code of \code{BinDist} is copied from package \pkg{stats} and authored by the R Core Team with contributions from Adrian Baddeley. } statip/man/midrange.Rd0000644000176200001440000000103613564303434014413 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/midrange.R \name{midrange} \alias{midrange} \title{Mid-range} \usage{ midrange(x, na_rm = FALSE) } \arguments{ \item{x}{numeric. A numeric vector.} \item{na_rm}{logical. Should missing values be removed before computing the mid-range?} } \value{ A numeric value, the mid-range. } \description{ Compute the mid-range of a numeric vector \code{x}, defined as the mean of the minimum and the maximum. } \references{ \url{https://en.wikipedia.org/wiki/Mid-range}. } statip/man/trimean.Rd0000644000176200001440000000107213564303434014264 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/trimean.R \name{trimean} \alias{trimean} \title{Tukey's trimean} \usage{ trimean(x, na_rm = FALSE, ...) } \arguments{ \item{x}{numeric. A numeric vector.} \item{na_rm}{logical. Should missing values be removed before computing the trimean?} \item{...}{Additional arguments to be passed to \code{\link[stats]{quantile}()}.} } \value{ A numeric value, the trimean. } \description{ Compute the trimean of a numeric vector \code{x}. } \references{ \url{https://en.wikipedia.org/wiki/Trimean} } statip/man/cv.Rd0000644000176200001440000000125413564303434013237 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cv.R \name{cv} \alias{cv} \title{Coefficient of variation} \usage{ cv(x, na_rm = FALSE, ...) } \arguments{ \item{x}{numeric. A numeric vector.} \item{na_rm}{logical. Should missing values be removed before computing the coefficient of variation?} \item{...}{Additional arguments to be passed to \code{\link{mean}()}.} } \value{ A numeric value, the coefficient of variation. } \description{ Compute the coefficient of variation of a numeric vector \code{x}, defined as the ratio between the standard deviation and the mean. } \references{ \url{https://en.wikipedia.org/wiki/Coefficient_of_variation}. } statip/man/hellinger.Rd0000644000176200001440000000261713564303513014602 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hellinger.R \name{hellinger} \alias{hellinger} \title{Hellinger distance} \usage{ hellinger(x, y, lower = -Inf, upper = Inf, method = 1, ...) } \arguments{ \item{x}{numeric. A vector giving the first sample.} \item{y}{numeric. A vector giving the second sample.} \item{lower}{numeric. Lower limit passed to \code{\link[stats]{integrate}}.} \item{upper}{numeric. Upper limit passed to \code{\link[stats]{integrate}}.} \item{method}{integer. If \code{method = 1}, the usual definition of the Hellinger distance is used; if \code{method = 2}, an alternative formula is used.} \item{...}{Additional parameters to be passed to \code{\link[statip]{densityfun}}.} } \value{ A numeric value, the Hellinger distance. } \description{ Estimate the \href{https://en.wikipedia.org/wiki/Hellinger_distance}{Hellinger distance} between two random samples whose underdyling distributions are continuous. } \details{ Probability density functions are estimated with \code{\link[statip]{densityfun}}. Then numeric integration is performed with \code{\link[stats]{integrate}}. } \examples{ x <- rnorm(200, 0, 2) y <- rnorm(1000, 10, 15) hellinger(x, y, -Inf, Inf) hellinger(x, y, -Inf, Inf, method = 2) } \references{ \url{https://en.wikipedia.org/wiki/Hellinger_distance}. } \seealso{ \code{\link[distrEx]{HellingerDist}} in package \pkg{distrEx}. } statip/man/dbern.Rd0000644000176200001440000000220413564303513013713 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dbern.R \name{dbern} \alias{dbern} \alias{qbern} \alias{pbern} \alias{rbern} \title{The Bernoulli distribution} \usage{ dbern(x, prob, log = FALSE) qbern(p, prob, lower.tail = TRUE, log.p = FALSE) pbern(q, prob, lower.tail = TRUE, log.p = FALSE) rbern(n, prob) } \arguments{ \item{x}{numeric. Vector of quantiles.} \item{prob}{Probability of success on each trial.} \item{log}{logical. If \code{TRUE}, probabilities \code{p} are given as \code{log(p)}.} \item{p}{numeric in \code{[0, 1]}. Vector of probabilities.} \item{lower.tail}{logical. If \code{TRUE} (default), probabilities are \code{P[X <= x]}, otherwise, \code{P[X > x]}.} \item{log.p}{logical. If \code{TRUE}, probabilities \code{p} are given as \code{log(p)}.} \item{q}{numeric. Vector of quantiles.} \item{n}{number of observations. If \code{length(n) > 1}, the length is taken to be the number required.} } \description{ Density, distribution function, quantile function and random generation for the Bernoulli distribution. } \seealso{ See the help page of the \code{\link{Binomial}} distribution. } statip/man/erf.Rd0000644000176200001440000000130713564303513013400 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/erf.R \name{erf} \alias{erf} \title{Error function} \usage{ erf(x, ...) } \arguments{ \item{x}{numeric. A vector of input values.} \item{...}{Additional arguments to be passed to \code{\link[stats]{pnorm}}.} } \value{ A numeric vector of the same length as \code{x}. } \description{ The function \code{erf()} encodes the \href{https://en.wikipedia.org/wiki/Error_function}{error function}, defined as \code{erf(x) = 2 * F(x * sqrt(2)) - 1}, where \code{F} is the Gaussian distribution function. } \references{ \url{https://en.wikipedia.org/wiki/Error_function}. } \seealso{ \code{\link[stats]{pnorm}} from package \pkg{stats}. } statip/DESCRIPTION0000644000176200001440000000371313564337062013301 0ustar liggesusersPackage: statip Type: Package Title: Statistical Functions for Probability Distributions and Regression Version: 0.2.3 Authors@R: c(person("Paul", "Poncet", , "paulponcet@yahoo.fr", role = c("aut", "cre")), person("The R Core Team", role = c("aut", "cph"), comment = "C function 'BinDist' copied from package 'stats'"), person("The R Foundation", role = "cph", comment = "C function 'BinDist' copied from package 'stats'"), person("Adrian", "Baddeley", role = "ctb", comment = "C function 'BinDist' copied from package 'stats'")) Description: A collection of miscellaneous statistical functions for probability distributions: 'dbern()', 'pbern()', 'qbern()', 'rbern()' for the Bernoulli distribution, and 'distr2name()', 'name2distr()' for distribution names; probability density estimation: 'densityfun()'; most frequent value estimation: 'mfv()', 'mfv1()'; other statistical measures of location: 'cv()' (coefficient of variation), 'midhinge()', 'midrange()', 'trimean()'; construction of histograms: 'histo()', 'find_breaks()'; calculation of the Hellinger distance: 'hellinger()'; use of classical kernels: 'kernelfun()', 'kernel_properties()'; univariate piecewise-constant regression: 'picor()'. License: GPL-3 LazyData: TRUE Depends: R (>= 3.1.3) Imports: clue, graphics, rpart, stats Suggests: knitr, testthat URL: https://github.com/paulponcet/statip BugReports: https://github.com/paulponcet/statip/issues RoxygenNote: 7.0.0 NeedsCompilation: yes Packaged: 2019-11-17 21:21:11 UTC; YL1101 Author: Paul Poncet [aut, cre], The R Core Team [aut, cph] (C function 'BinDist' copied from package 'stats'), The R Foundation [cph] (C function 'BinDist' copied from package 'stats'), Adrian Baddeley [ctb] (C function 'BinDist' copied from package 'stats') Maintainer: Paul Poncet Repository: CRAN Date/Publication: 2019-11-17 21:40:02 UTC statip/tests/0000755000176200001440000000000013127653234012726 5ustar liggesusersstatip/tests/testthat/0000755000176200001440000000000013564337062014571 5ustar liggesusersstatip/tests/testthat/test-distribution_names.R0000644000176200001440000000066513564304044021574 0ustar liggesuserscontext("Test distribution names") test_that("'distr2name()' is ok", { expect_identical(distr2name(c("norm", "dnorm", "rhyper", "ppois", "toto")), c("Gaussian", "Gaussian", "Hypergeometric", "Poisson", "toto")) }) test_that("'name2distr()' is ok", { expect_identical(name2distr(c("Cauchy", "Gaussian", "Generalized Extreme Value", "Toto")), c("cauchy", "norm", "gev", "toto")) }) statip/tests/testthat/test-erf.R0000644000176200001440000000024513564305106016440 0ustar liggesuserscontext("Test 'erf()' function") test_that("'erf()' is ok", { x <- rnorm(10) expect_identical(erf(c(x, NA)), c(2 * stats::pnorm(x * sqrt(2)) - 1, NA)) }) statip/tests/testthat/test-mfv.R0000644000176200001440000000233613564326402016461 0ustar liggesuserscontext("Test 'mfv()' function") test_that("'mfv()' returns the most frequent value(s)", { expect_identical(mfv(integer(0)), NaN) expect_identical(mfv(c(3, 3, 3, 2, 4)), 3) expect_identical(mfv(c(TRUE, FALSE, TRUE)), TRUE) expect_identical(mfv(c("a", "a", "b", "a", "d")), "a") expect_identical(mfv(c("a", "a", "b", "b", "d")), c("a", "b")) expect_identical(mfv1(c("a", "a", "b", "b", "d")), "a") }) test_that("'mfv()' works with missing values", { expect_identical(mfv(c(3, 3, 3, 2, NA)), 3) expect_identical(mfv(c(3, 3, 2, NA)), NA_real_) expect_identical(mfv(c(3, 3, 2, NA), na_rm = TRUE), 3) expect_identical(mfv(c(3, 3, 2, 2, NA)), NA_real_) expect_identical(mfv(c(3, 3, 2, 2, NA), na_rm = TRUE), c(2, 3)) expect_identical(mfv(c(NA, NA)), NA) expect_identical(mfv(c(NA, NA), na_rm = TRUE), NaN) }) test_that("'mfv()' works with factors", { expect_identical(mfv(factor(c("a", "b", "a"))), factor("a", levels = c("a", "b"))) expect_identical(mfv(factor(c("a", "b", "a", NA))), factor(NA, levels = c("a", "b"))) expect_identical(mfv(factor(c("a", "b", "a", NA)), na_rm = TRUE), factor("a", levels = c("a", "b"))) }) statip/tests/testthat.R0000644000176200001440000000005313043142543014700 0ustar liggesusers library(testthat) test_check("statip") statip/src/0000755000176200001440000000000013564334707012361 5ustar liggesusersstatip/src/massdist.c0000644000176200001440000000435513564334707014363 0ustar liggesusers/* This file is the copy of the 'massdist.c' file found in the * 'stats' R package. */ /* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 1996-2012 The R Core Team * Copyright (C) 2005 The R Foundation * "HACKED" to allow weights by Adrian Baddeley * Changes indicated by 'AB' * ------- * FIXME Does he want 'COPYRIGHT' ? * ------- * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/ */ #ifdef HAVE_CONFIG_H #include #endif #include // includes math.h #include /* NB: this only works in the lower half of y, but pads with zeros. */ SEXP BinDist(SEXP sx, SEXP sw, SEXP slo, SEXP shi, SEXP sn) { PROTECT(sx = coerceVector(sx, REALSXP)); PROTECT(sw = coerceVector(sw, REALSXP)); int n = asInteger(sn); if (n == NA_INTEGER || n <= 0) error("invalid '%s' argument", "n"); SEXP ans = allocVector(REALSXP, 2*n); PROTECT(ans); double xlo = asReal(slo), xhi = asReal(shi); double *x = REAL(sx), *w = REAL(sw), *y = REAL(ans); int ixmin = 0, ixmax = n - 2; double xdelta = (xhi - xlo) / (n - 1); for(int i = 0; i < 2*n ; i++) y[i] = 0; for(R_xlen_t i = 0; i < XLENGTH(sx) ; i++) { if(R_FINITE(x[i])) { double xpos = (x[i] - xlo) / xdelta; int ix = (int) floor(xpos); double fx = xpos - ix; double wi = w[i]; if(ixmin <= ix && ix <= ixmax) { y[ix] += (1 - fx) * wi; y[ix + 1] += fx * wi; } else if(ix == -1) y[0] += fx * wi; else if(ix == ixmax + 1) y[ix] += (1 - fx) * wi; } } UNPROTECT(3); return ans; } statip/src/init.c0000644000176200001440000000033113564334707013465 0ustar liggesusers// RegisteringDynamic Symbols #include #include #include void R_init(DllInfo* info) { R_registerRoutines(info, NULL, NULL, NULL, NULL); R_useDynamicSymbols(info, TRUE); } statip/R/0000755000176200001440000000000013564307020011757 5ustar liggesusersstatip/R/lagk.R0000644000176200001440000000412613564270574013040 0ustar liggesusers#' @title #' Lag a vector #' #' @description #' This function computes a lagged vector, shifting it back or forward. #' #' @param x #' A vector. #' #' @param k #' integer. The number of lags. #' If \code{k < 0}, la serie est avancee au lieu d'etre retardee. #' #' @param na #' logical. If \code{na = TRUE} and \code{k > 0} #' (resp. \code{k < 0}), the \code{|k|} holes created in the lagged vector #' are put to \code{NA}; otherwise, the imputation depends on \code{cst}. #' #' @param cst #' logical. #' If \code{na = FALSE} and \code{cst = TRUE}, the \code{|k|} holes #' created in the lagged vector are put to \code{x[[1L]]} #' (or to \code{x[[length(x)]]} if \code{k < 0}). #' If \code{na = FALSE} and \code{cst = FALSE}, #' these \code{|k|} holes are imputed by the \code{k} #' first values of \code{x} (or the \code{k} last values if \code{k < 0}). #' #' @return #' A vector of the same type and length as \code{x}. #' #' @export #' #' @examples #' v <- sample(1:10) #' print(v) #' lagk(v, 1) #' lagk(v, 1, na = TRUE) #' lagk(v, -2) #' lagk(v, -3, na = TRUE) #' lagk(v, -3, na = FALSE, cst = TRUE) #' lagk(v, -3, na = FALSE) #' lagk <- function (x, k, na = FALSE, cst = FALSE) { nx <- length(x) if(nx < abs(k)) { warning(paste0("argument 'x' is too short to be lagged from ", k, " lags"), call. = FALSE) if (na) { return(rep(NA,nx)) } else { return(x) } } else { if (k > 0) { if (na) { deb <- rep(NA, k) } else { if (cst) { deb <- rep(x[1], k) } else { deb <- x[1:k] } } y <- c(deb, x[1:(nx - k)]) names(y) <- names(x) } if (k < 0) { if (na) { fin <- rep(NA, (-k)) } else { if (cst) { fin <- rep(x[length(x)], -k) } else { fin <- x[(nx+k+1):nx] } } y <- c(x[(-k + 1):nx], fin) names(y) <- names(x) } if (k == 0) { y <- x } return(y) } } statip/R/picor.R0000644000176200001440000001064713564323675013244 0ustar liggesusers#' @title #' Piecewise-constant regression #' #' @description #' \code{picor} looks for a piecewise-constant function as a regression #' function. The regression is necessarily univariate. #' This is essentially a wrapper for \code{\link[rpart]{rpart}} (regression #' tree) and \code{\link[stats]{isoreg}}. #' #' @param formula #' formula of the model to be fitted. #' #' @param data #' optional data frame. #' #' @param method #' character. If \code{method = "isotonic"}, then isotonic regression is #' applied with the \code{\link[stats]{isoreg}} from package \pkg{stats}. #' Otherwise, \code{\link[rpart]{rpart}} is used, with the corresponding #' \code{method} argument. #' #' @param min_length #' integer. #' The minimal distance between two consecutive knots. #' #' @param ... #' Additional arguments to be passed to \code{\link[rpart]{rpart}}. #' #' @param object,x,Fn #' An object of class \code{"picor"}. #' #' @param newdata #' data.frame to be passed to the \code{predict} method. #' #' @return #' An object of class \code{"picor"}, which is a list composed of the #' following elements: #' \itemize{ #' \item formula: the formula passed as an argument; #' \item x: the numeric vector of predictors; #' \item y: the numeric vector of responses; #' \item knots: a numeric vector (possibly of length 0), the knots found; #' \item values: a numeric vector (of length \code{length(knots)+1}), #' the constant values taken by the regression function between the knots. #' } #' #' @importFrom rpart rpart #' @importFrom stats isoreg knots model.frame #' @export #' #' @examples #' \dontrun{ #' s <- stats::stepfun(c(-1,0,1), c(1., 2., 4., 3.)) #' x <- stats::rnorm(1000) #' y <- s(x) #' p <- picor(y ~ x, data.frame(x = x, y = y)) #' print(p) #' plot(p) #' } #' picor <- function(formula, data, method, min_length = 0, ...) { df <- stats::model.frame(formula, data) if (ncol(df) == 1L) { x <- seq_len(nrow(df)) } else if (ncol(df) != 2L) { stop("incorrect formula", call. = FALSE) } else { x <- df[[2L]] } stopifnot(is.numeric(x)) y <- df[[1L]] stopifnot(is.numeric(y)) if (!missing(method) && method == "isotonic") { r <- stats::isoreg(x = x, y = y) r <- as.stepfun(r) knots <- stats::knots(r) values <- unname(r(knots)) } else { ## Use -x instead of x to make the resulting prediction function ## left-continuous instead of right-continuous r <- rpart::rpart(y ~ I(-x), na.action = rpart::na.rpart, method = method, model = FALSE, x = FALSE, y = FALSE, ...) if (is.null(r$splits)) { knots <- max(x, na.rm = TRUE) } else { knots <- c(sort(-r$splits[,"index"]), max(x, na.rm = TRUE)) } #r <- as_fun(r) values <- stats::predict(r, data.frame(x = knots)) } z <- structure(list(formula = formula, x = x, y = y, #predict = r, knots = unname(knots)[-length(knots)], values = values),#unname(r(knots))), class = "picor") prune(z, min_length = min_length) } #' @importFrom stats knots #' @export #' stats::knots #' @export #' @rdname picor #' knots.picor <- function(Fn, ...) { Fn$knots } #' @importFrom stats predict #' @export #' stats::predict #' @importFrom stats stepfun #' @export #' @rdname picor #' predict.picor <- function(object, newdata, ...) { if (is.data.frame(newdata)) { x <- all.vars(object$formula)[-1L] newdata <- newdata[[x]] } if (!is.numeric(newdata)) { stop("incorrect 'newdata' argument", call. = FALSE) } if (length(object$knots) == 0L) { f <- function(v) { rep(object$values, length(v)) } } else { f <- stats::stepfun(object$knots, object$values, right = TRUE) } f(newdata) } #' @importFrom graphics plot lines #' @export #' @rdname picor #' plot.picor <- function(x, ...) { obj <- x v <- all.vars(obj$formula) x <- obj$x a <- order(x) graphics::plot(x, obj$y, xlab = v[2L], ylab = v[1L], ...) graphics::lines(x[a], predict(obj, x)[a], col = 2) } #' @export #' @rdname picor #' print.picor <- function(x, ...) { print(x[c("knots", "values")]) } statip/R/distr2name.R0000644000176200001440000000506413564304144014163 0ustar liggesusers#' @title #' Conversion between abbreviated distribution names and proper names #' #' @description #' The function \code{distr2name()} converts abbreviated #' distribution names to proper distribution names #' (e.g. \code{"norm"} becomes \code{"Gaussian"}). #' #' The function \code{name2distr()} does the reciprocal operation. #' #' @param x #' character. A vector of abbreviated distribution names #' or proper distribution names. #' #' @return #' A character vector of the same length as \code{x}. #' Elements of \code{x} that are not recognized are kept unchanged #' (yet in lowercase). #' #' @export #' #' @examples #' distr2name(c("norm", "dnorm", "rhyper", "ppois")) #' name2distr(c("Cauchy", "Gaussian", "Generalized Extreme Value")) #' distr2name <- function(x) { p <- c("", "d", "p", "q", "r") # prefix x[x %in% c(paste0(p, "bern"), "bernoulli")] <- "Bernoulli" x[x %in% c(paste0(p, "beta"), "beta")] <- "Beta" x[x %in% c(paste0(p, "binom"), "binomial")] <- "Binomial" x[x %in% c(paste0(p, "cauchy"), "cauchy")] <- "Cauchy" x[x %in% c(paste0(p, "chisq"), "chi-square")] <- "Chi-Square" x[x %in% c(paste0(p, "dagum"), "dagum")] <- "Dagum" x[x %in% c(paste0(p, "exp"), "exponential")] <- "Exponential" x[x %in% c(paste0(p, "f"), "f")] <- "F" x[x %in% c(paste0(p, "gamma"), "gamma")] <- "Gamma" x[x %in% c(paste0(p, "gh"), "gh")] <- "Generaralized Hyperbolic" x[x %in% c(paste0(p, "gev"), "gev")] <- "Generaralized Extreme Value" x[x %in% c(paste0(p, "gpd"), "gpd")] <- "Generalized Pareto" x[x %in% c(paste0(p, "geom"), "geometric")] <- "Geometric" x[x %in% c(paste0(p, "hyp"), "hyperbolic")] <- "Hyperbolic" x[x %in% c(paste0(p, "hyper"), "hypergeometric")] <- "Hypergeometric" x[x %in% c(paste0(p, "kumar"), "kumaraswamy")] <- "Kumaraswamy" x[x %in% c(paste0(p, "logis"), "logistic")] <- "Logistic" x[x %in% c(paste0(p, "lnorm"), "log-normal")] <- "Log-Normal" x[x %in% c(paste0(p, "multinom"), "multinomial")] <- "Multinomial" x[x %in% c(paste0(p, "nbinom"), "negative-binomial")] <- "Negative-Binomial" x[x %in% c(paste0(p, "nig"), "normal-inverse")] <- "Normal-Inverse" x[x %in% c(paste0(p, "norm"), "gaussian")] <- "Gaussian" x[x %in% c(paste0(p, "pois"), "poisson")] <- "Poisson" x[x %in% c(paste0(p, "t"), "student")] <- "Student" x[x %in% c(paste0(p, "tukey"), "tukey")] <- "Tukey" x[x %in% c(paste0(p, "unif"), "uniform")] <- "Uniform" x[x %in% c(paste0(p, "weibull"), "weibull")] <- "Weibull" x[x %in% c(paste0(p, "wilcox"), "wilcoxon")] <- "Wilcoxon" x } statip/R/as.stepfun.R0000644000176200001440000000054413564270224014200 0ustar liggesusers #' @importFrom stats as.stepfun #' @export stats::as.stepfun #' @importFrom stats stepfun #' @export #' as.stepfun.picor <- function(x, ...) { if (length(x$knots) == 0L) { stop("cannot convert 'x' to a stepfun object, as least one knot is needed", call. = FALSE) } stats::stepfun(x$knots, x$values, right = TRUE) } statip/R/utils.R0000644000176200001440000000330613564323746013261 0ustar liggesusers nclass_sturges <- function(x) { ceiling(log2(length(x)) + 1) } #' @importFrom stats var #' nclass_scott <- function(x) { h <- 3.5 * sqrt(stats::var(x)) * length(x)^(-1/3) if (h > 0) ceiling(diff(range(x))/h) else 1L } #' @importFrom stats IQR mad #' nclass_freedman_diaconis <- function(x) { h <- stats::IQR(x) if (h == 0) h <- stats::mad(x, constant = 2) if (h > 0) ceiling(diff(range(x))/(2 * h * length(x)^(-1/3))) else 1L } # #' @importFrom stats formula predict # #' # as_fun <- function(x, ...) { # # ## Name of the X variables # ..x <- x # rm(x) # ..n <- stats::formula(..x) # ..n <- all.vars(..n)[-1L] # if ("..x" %in% ..n) { # stop("the model's formula contains a variable called '..x', # 'as_fun()' does not work in this specific case") # } # if ("..n" %in% ..n) { # stop("the model's formula contains a variable called '..n', # 'as_fun()' does not work in this specific case") # } # # ## Creation of the function to be returned, with no arguments yet # f <- function() { # df <- as.data.frame(as.list(environment())) # names(df) <- ..n # p <- stats::predict(..x, newdata = df, type = "vector", ...) # if (is.list(p)) { # if (!is.null(p$fit)) { # y <- p$fit # } else if (!is.null(p$pred)) { # y <- p$pred # } else { # stop("cannot find predicted values") # } # } else { # y <- p # } # unname(y) # } # # ## 'l' is the list used to name the arguments of the function 'f()' # l <- replicate(length(..n), substitute()) # names(l) <- ..n # formals(f) <- l # f # } statip/R/histo.R0000644000176200001440000000360413564330152013235 0ustar liggesusers#' @title #' Alternative Histograms #' #' @description #' A simplified version of #' \code{\link[graphics]{hist}()} from package \pkg{graphics}. #' #' @param x #' numeric. A vector. #' #' @param breaks #' numeric. A vector of breakpoints to build the histogram, #' possibly given by \code{\link[statip]{find_breaks}()}. #' #' @param ... #' Additional parameters (currently not used). #' #' @return #' An object of class \code{"histogram"}, which can be plotted #' by \code{\link[graphics]{plot.histogram}} from package \pkg{graphics}. #' This object is a list with components: #' \itemize{ #' \item \code{breaks}: the \code{n+1} cell boundaries; #' \item \code{counts}: \code{n} integers giving the number of \code{x} #' inside each cell; #' \item \code{xname}: a string with the actual \code{x} argument name. #' } #' #' @export #' #' @seealso #' \code{\link[statip]{find_breaks}()} in this package; #' \code{\link[MASS]{truehist}()} from package \pkg{MASS}; #' \code{\link[graphics]{hist}()} from package \pkg{graphics}. #' histo <- function(x, breaks, ...) { stopifnot(is.numeric(x)) xname <- paste(deparse(substitute(x), 500), collapse = "\n") x <- x[is.finite(x)] bin <- cut(x, breaks, include.lowest = TRUE) counts <- tabulate(bin, length(levels(bin))) #dens <- counts/(diff(counts) * length(x)) structure(list(breaks = breaks, counts = counts, xname = xname), class = "histogram") #stats::stepfun(h$breaks, c(0, h$counts, 0), ...) } # mean.histogram <- function(x, ...) { # # } # # median.histogram <- function(x, ...) { # # } # min.histogram <- function(..., na.rm = FALSE) { # l <- list(...) # min(l[[1L]]$breaks, na.rm = na.rm) # } # # max.histogram <- function(..., na.rm = FALSE) { # l <- list(...) # max(l[[1L]]$breaks, na.rm = na.rm) # } statip/R/predict.kmeans.R0000644000176200001440000000057213202647065015022 0ustar liggesusers #' @importFrom stats na.pass #' @export #' stats::na.pass #' @importFrom clue cl_predict #' @export #' predict.kmeans <- function(object, newdata, #na.action = na.pass, ...) { if (missing(newdata) || is.null(newdata)) return(object$cluster) x <- clue::cl_predict(object, newdata, type = "class_ids") as.integer(x) } statip/R/kernelfun.R0000644000176200001440000000702713564270554014114 0ustar liggesusers#' @title #' Smoothing kernels #' #' @description #' The generic function \code{kernelfun} creates #' a smoothing kernel function. #' #' @param name #' character. #' The name of the kernel to be used. #' Authorized kernels are listed in \code{\link{.kernelsList}()}. #' #' @param derivative #' logical. If \code{TRUE}, the derivative of the kernel #' is returned. #' #' @param ... #' Additional arguments to be passed to the kernel function. #' #' @return #' A function. #' #' @seealso #' \code{\link[stats]{density}} in package \pkg{stats}. #' # #' @importFrom dplyr if_else #' @export #' #' @examples #' k <- kernelfun("epanechnikov") #' curve(k(x), xlim = c(-1, 1)) #' kernelfun <- function(name, ...) { UseMethod("kernelfun") } #' @export #' @rdname kernelfun #' kernelfun.function <- function(name, ...) { x <- deparse(substitute(name)) Kfun <- name attr(Kfun, "name") <- x Kfun } #' @export #' @rdname kernelfun #' kernelfun.character <- function(name, derivative = FALSE, ...) { name <- match.arg(tolower(name), .kernelsList()) kname <- if (derivative) { paste0(".kernel.d", name) } else { paste0(".kernel.", name) } Kfun <- function(x) { do.call(kname, list(x, ...)) } attr(Kfun, "name") <- name Kfun } .kernel.biweight <- function(x, ...) { a <- sqrt(7) ax <- abs(x) ifelse(ax < a, (15/16) * (1 - (ax/a)^2)^2/a, 0) } # Derivative .kernel.dbiweight <- function(x, ...) { a <- sqrt(7) ax <- abs(x) ifelse(ax < a, -(15/4) * x * (1 - (ax/a)^2)/a^3, 0) } .kernel.uniform <- .kernel.chernoff <- function(x, ...) { ifelse(abs(x) <= 1, 1/2, 0) } # Derivative .kernel.duniform <- .kernel.dchernoff <- function(x, ...) { 0 } .kernel.cosine <- function(x, ...) { a <- 1/sqrt(1/3 - 2/pi^2) ifelse(abs(x) < a, (1 + cos(pi*x/a))/(2*a), 0) } # Derivative .kernel.dcosine <- function(x, ...) { a <- 1/sqrt(1/3 - 2/pi^2) ifelse(abs(x) < a, -(pi/(2*a^2))*sin(pi*x/a), 0) } .kernel.eddy <- function(x, ...) { #ax <- abs(x) ifelse(abs(x) <= 1, (15/32) * (3 - 10*x^2 + 7*x^4), 0) } # Derivative .kernel.deddy <- function(x, ...) { ifelse(abs(x) <= 1, (15/32) * (-20*x + 28*x^3), 0) } .kernel.epanechnikov <- function(x, ...) { ifelse(abs(x) <= 1, (3/4) * (1 - x^2), 0) } # Derivative .kernel.depanechnikov <- function(x, ...) { ifelse(abs(x) <= 1, (-3*x/2), 0) } .kernel.gaussian <- function(x, ...) { stats::dnorm(x) } # Derivative .kernel.dgaussian <- function(x, ...) { -x*stats::dnorm(x) } .kernel.optcosine <- function(x, ...) { a <- 1/sqrt(1 - 8/pi^2) ifelse(abs(x) < a, (pi/4) * cos(pi * x/(2*a))/a, 0) } # Derivative .kernel.doptcosine <- function(x, ...) { a <- 1/sqrt(1 - 8/pi^2) ifelse(abs(x) < a, -(pi^2/(8*a^2)) * sin(pi * x/(2*a)), 0) } .kernel.rectangular <- function(x, ...) { a <- sqrt(3) ifelse(abs(x) < a, 0.5/a, 0) } # Derivative .kernel.drectangular <- function(x, ...) { 0 } .kernel.triangular <- function(x, ...) { ax <- abs(x) ifelse(ax <= 1, (1 - ax), 0) } # Derivative .kernel.dtriangular <- function(x, ...) { ax <- abs(x) ifelse(ax <= 1, -sign(x), 0) } statip/R/trimean.R0000644000176200001440000000155013564270653013555 0ustar liggesusers#' @title #' Tukey's trimean #' #' @description #' Compute the trimean of a numeric vector \code{x}. #' #' @param x #' numeric. A numeric vector. #' #' @param na_rm #' logical. Should missing values be removed before computing the trimean? #' #' @param ... #' Additional arguments to be passed to \code{\link[stats]{quantile}()}. #' #' @return #' A numeric value, the trimean. #' #' @references #' \url{https://en.wikipedia.org/wiki/Trimean} #' #' @importFrom stats median quantile #' @export #' trimean <- function(x, na_rm = FALSE, ...) { if (!is.null(list(...)$na.rm)) { stop("'na.rm' is not a valid argument, please use 'na_rm' instead", call. = FALSE) } qs <- stats::quantile(x, probs = c(0.25, 0.75), na.rm = na_rm, ...) m <- stats::median(x, na.rm = na_rm) (qs[1L] + 2*m + qs[2L]) / 4 } statip/R/cv.R0000644000176200001440000000162013564271511012516 0ustar liggesusers#' @title #' Coefficient of variation #' #' @description #' Compute the coefficient of variation of a numeric vector \code{x}, #' defined as the ratio between the standard deviation and the mean. #' #' @param x #' numeric. A numeric vector. #' #' @param na_rm #' logical. Should missing values be removed before computing the coefficient of variation? #' #' @param ... #' Additional arguments to be passed to \code{\link{mean}()}. #' #' @return #' A numeric value, the coefficient of variation. #' #' @references #' \url{https://en.wikipedia.org/wiki/Coefficient_of_variation}. #' #' @importFrom stats sd #' @export #' cv <- function(x, na_rm = FALSE, ...) { if (!is.null(list(...)$na.rm)) { stop("'na.rm' is not a valid argument, please use 'na_rm' instead", call. = FALSE) } stats::sd(x, na.rm = na_rm) / mean(x, na.rm = na_rm, ...) } statip/R/erf.R0000644000176200001440000000141713564264620012671 0ustar liggesusers#' @title #' Error function #' #' @description #' The function \code{erf()} encodes the #' \href{https://en.wikipedia.org/wiki/Error_function}{error function}, #' defined as \code{erf(x) = 2 * F(x * sqrt(2)) - 1}, where #' \code{F} is the Gaussian distribution function. #' #' @param x #' numeric. A vector of input values. #' #' @param ... #' Additional arguments to be passed to \code{\link[stats]{pnorm}}. #' #' @return #' A numeric vector of the same length as \code{x}. #' #' @references #' \url{https://en.wikipedia.org/wiki/Error_function}. #' #' @seealso #' \code{\link[stats]{pnorm}} from package \pkg{stats}. #' #' @importFrom stats pnorm #' @export #' erf <- function(x, ...) { 2 * stats::pnorm(x * sqrt(2), ...) - 1 } statip/R/mfv.R0000644000176200001440000001014613564327213012702 0ustar liggesusers#' @title #' Most frequent value(s) #' #' @description #' The function \code{mfv()} returns the most frequent value(s) (or mode(s)) #' found in a vector. #' The function \code{mfv1} returns the first of these values, so that #' \code{mfv1(x)} is identical to \code{mfv(x)[[1L]]}. #' #' @details #' See David Smith' blog post #' \href{http://blog.revolutionanalytics.com/2016/07/understanding-na-in-r.html}{here} #' to understand the philosophy followed in the code of \code{mfv} for missing #' values treatment. #' #' @note #' \code{mfv()} calls the function \code{\link[base]{tabulate}}. #' #' @references #' \itemize{ #' \item Dutta S. and Goswami A. (2010). #' Mode estimation for discrete distributions. #' \emph{Mathematical Methods of Statistics}, \bold{19}(4):374--384. #' } #' #' @param x #' Vector of observations (of type numeric, integer, character, factor, or #' logical). #' \code{x} is to come from a discrete distribution. #' #' @param na_rm #' logical. If \code{TRUE}, missing values do not interfer #' with the result, see 'Details'. #' #' @param ... #' Additional arguments (currently not used). #' #' @return #' The function \code{mfv} returns a vector of the same type as \code{x}. #' One should be aware that this vector can be of length \code{> 1}, in case of #' multiple modes. #' \code{mfv1} always returns a vector of length \code{1} #' (the first of the modes found). #' #' @export #' #' @examples #' # Basic examples: #' mfv(integer(0)) # NaN #' mfv(c(3, 3, 3, 2, 4)) # 3 #' mfv(c(TRUE, FALSE, TRUE)) # TRUE #' mfv(c("a", "a", "b", "a", "d")) # "a" #' #' mfv(c("a", "a", "b", "b", "d")) # c("a", "b") #' mfv1(c("a", "a", "b", "b", "d")) # "a" #' #' # With missing values: #' mfv(c(3, 3, 3, 2, NA)) # 3 #' mfv(c(3, 3, 2, NA)) # NA #' mfv(c(3, 3, 2, NA), na_rm = TRUE) # 3 #' mfv(c(3, 3, 2, 2, NA)) # NA #' mfv(c(3, 3, 2, 2, NA), na_rm = TRUE) # c(2, 3) #' mfv1(c(3, 3, 2, 2, NA), na_rm = TRUE)# 2 #' #' # With only missing values: #' mfv(c(NA, NA)) # NA #' mfv(c(NA, NA), na_rm = TRUE) # NaN #' #' # With factors #' mfv(factor(c("a", "b", "a"))) #' mfv(factor(c("a", "b", "a", NA))) #' mfv(factor(c("a", "b", "a", NA)), na_rm = TRUE) #' mfv <- function(x, ...) { UseMethod("mfv") } # TODO: ne marche pas avec , devrait renvoyer NA #' @export #' @rdname mfv #' mfv.default <- function(x, na_rm = FALSE, ...) { mfv(tableNA(x), na_rm = na_rm, ...) # cl <- typeof(x) #class(x)[[1L]] # n <- length(x) # a <- sum(is.na(x)) # if (na_rm) n <- n - a # if (n == 0L) return(NaN) # # f <- factor(x) # lf <- levels(f) # tf <- tabulate(f) # wf <- which.max(tf) # n1 <- tf[wf] #max(tf) # v <- lf[tf == n1] # # if (!na_rm && a > 0L) { # n2 <- ifelse(length(tf) > 1L, max(tf[-wf]), 0L) # if (n2 + a >= n1) v <- NA # } # # if (is.factor(x)) { # return(factor(v, levels = lf)) # } else { # return(as.vector(v, mode = cl)) # } } #' @export #' @rdname mfv #' mfv.tableNA <- function(x, na_rm = FALSE, ...) { l <- list(...) if (!is.null(l$na.rm)) { message("argument 'na.rm' is soft-deprecated, please start using 'na_rm' instead") na_rm <- l$na.rm } tf <- x a <- tf[""] if (na_rm || is.na(a)) { a <- 0L } tf[""] <- 0L if (length(tf) == 1L && a == 0L) return(NaN) wf <- which.max(tf) n1 <- tf[wf] v <- names(tf)[tf == n1] if (length(tf) == 1L && !na_rm) { v <- NA } if (!na_rm && a > 0L) { n2 <- ifelse(length(tf) > 1L, max(tf[-wf]), 0L) if (n2 + a >= n1) v <- NA } if (attr(x, "is_factor_x")) { return(factor(v, levels = levels(x))) } else { return(as.vector(v, mode = attr(x, "type_of_x"))) } } #' @export #' @rdname mfv #' mfv1 <- function(x, na_rm = FALSE, ...) { mfv(x, na_rm = na_rm, ...)[[1L]] } statip/R/plot.loess.R0000644000176200001440000000131013035511470014176 0ustar liggesusers#' @title #' Basic plot of a loess object #' #' @description #' Plots a loess object adjusted on one #' unique explanatory variable. #' #' @param x #' An object of class \code{"loess"}. #' #' @param ... #' Additional graphical arguments. #' #' @seealso #' \code{\link[stats]{loess}} from package \pkg{stats}. #' #' @importFrom graphics lines #' @importFrom graphics plot #' @export #' #' @examples #' reg <- loess(dist ~ speed, cars) #' plot(reg) #' plot.loess <- function(x, ...) { v <- x$x a <- order(v) graphics::plot(v, x$y, ...) graphics::lines(v[a], x$fitted[a], col = 2) } #' @importFrom graphics plot #' @export #' graphics::plot statip/R/hellinger.R0000644000176200001440000000367513564263450014076 0ustar liggesusers#' @title #' Hellinger distance #' #' @description #' Estimate the #' \href{https://en.wikipedia.org/wiki/Hellinger_distance}{Hellinger distance} #' between two random samples whose underdyling distributions #' are continuous. #' #' @details #' Probability density functions are estimated with #' \code{\link[statip]{densityfun}}. #' Then numeric integration is performed with \code{\link[stats]{integrate}}. #' #' @param x #' numeric. A vector giving the first sample. #' #' @param y #' numeric. A vector giving the second sample. #' #' @param lower #' numeric. Lower limit passed to \code{\link[stats]{integrate}}. #' #' @param upper #' numeric. Upper limit passed to \code{\link[stats]{integrate}}. #' #' @param method #' integer. If \code{method = 1}, the usual definition #' of the Hellinger distance is used; if \code{method = 2}, #' an alternative formula is used. #' #' @param ... #' Additional parameters to be passed to \code{\link[statip]{densityfun}}. #' #' @return #' A numeric value, the Hellinger distance. #' #' @references #' \url{https://en.wikipedia.org/wiki/Hellinger_distance}. #' #' @seealso #' \code{\link[distrEx]{HellingerDist}} in package \pkg{distrEx}. #' #' #' @importFrom stats integrate #' @export #' #' @examples #' x <- rnorm(200, 0, 2) #' y <- rnorm(1000, 10, 15) #' hellinger(x, y, -Inf, Inf) #' hellinger(x, y, -Inf, Inf, method = 2) #' hellinger <- function(x, y, lower = -Inf, upper = Inf, method = 1, ...) { fx <- densityfun(x, ...) fy <- densityfun(y, ...) if (method == 1) { g <- function(z) (fx(z)^0.5 - fy(z)^0.5)^2 h2 <- stats::integrate(g, lower, upper)$value/2 } else if (method == 2) { g <- function(z) (fx(z)*fy(z))^0.5 h2 <- 1 - stats::integrate(g, lower, upper)$value } else { stop("incorrect 'method' argument", call. = FALSE) } sqrt(h2) } statip/R/kernel_properties.R0000644000176200001440000001372013541254642015647 0ustar liggesusers #' @export #' @rdname kernelfun #' #' @examples #' kernel_properties("gaussian") #' kernel_properties <- function(name, derivative = FALSE) { name <- match.arg(tolower(name), .kernelsList()) canonical_bandwidth <- switch(name, biweight = 5 * sqrt(7)/49, chernoff = NA_real_, cosine = 3/4 * sqrt(1/3 - 2/pi^2), eddy = NA_real_, epanechnikov = 3/(5 * sqrt(5)), gaussian = 1/(2 * sqrt(pi)), optcosine = sqrt(1 - 8/pi^2) * pi^2/16, rectangular = sqrt(3)/6, triangular = sqrt(6)/9, uniform = NA_real_) canonical_bandwidth_deriv <- NA_real_ fac <- switch(name, biweight = 2 * sqrt(7), chernoff = NA, cosine = 2/sqrt(1/3 - 2/pi^2), eddy = NA, epanechnikov = 2 * sqrt(5), gaussian = 4, optcosine = 2/sqrt(1 - 8/pi^2), rectangular = 2 * sqrt(3), triangular = 2 * sqrt(6), uniform = NA) fac_deriv <- NA_real_ integral_K <- switch(name, biweight = 1, chernoff = 1, cosine = 1, eddy = 1, epanechnikov = 1, gaussian = 1, optcosine = 1, rectangular = 1, triangular = 1, uniform = 1) integral_K_deriv <- switch(name, biweight = NA, chernoff = 0, cosine = 0, eddy = 0, epanechnikov = 0, gaussian = 0, optcosine = 0, rectangular = 0, triangular = 0, uniform = 0) integral_K2 <- switch(name, biweight = 1/2, chernoff = 1/2, cosine = (3/4)*sqrt(1/3 - 2/pi^2), eddy = 1.25, epanechnikov = 3/5, gaussian = 1/(2*sqrt(pi)), optcosine = (pi^2/16)*sqrt(1 - 8/pi^2), rectangular = 1/2, triangular = 2/3, uniform = 1/2) integral_K2_deriv <- switch(name, biweight = 15/(49*sqrt(7)), chernoff = 0, cosine = (pi^2/4)*(sqrt(1/3 - 2/pi^2))^3, eddy = 9.375, epanechnikov = 3/2, gaussian = 0.1410474, optcosine = (pi^4/64)*(sqrt(1 - 8/pi^2))^3, rectangular = 0, triangular = 2, uniform = 0) continuity <- switch(name, biweight = Inf, chernoff = 0, cosine = Inf, eddy = 1, epanechnikov = 1, gaussian = Inf, optcosine = 1, rectangular = 0, triangular = 1, uniform = 0) continuity_deriv <- switch(name, biweight = Inf, chernoff = Inf, cosine = Inf, eddy = 0, epanechnikov = 0, gaussian = Inf, optcosine = 0, rectangular = Inf, triangular = 0, uniform = Inf) differentiability <- switch(name, biweight = Inf, chernoff = 0, cosine = Inf, eddy = 0, epanechnikov = 0, gaussian = Inf, optcosine = 0, rectangular = 0, triangular = 0, uniform = 0) differentiability_deriv <- switch(name, biweight = Inf, chernoff = Inf, cosine = Inf, eddy = 0, epanechnikov = 0, gaussian = Inf, optcosine = 0, rectangular = Inf, triangular = 0, uniform = Inf) if (derivative) { list(canonical_bandwidth = canonical_bandwidth_deriv, continuity = continuity_deriv, differentiability = differentiability_deriv, fac = fac_deriv, integral_K = integral_K_deriv, integral_K2 = integral_K2_deriv, name = name, derivative = derivative) } else { list(canonical_bandwidth = canonical_bandwidth, continuity = continuity, differentiability = differentiability, fac = fac, integral_K = integral_K, integral_K2 = integral_K2, name = name, derivative = derivative) } } statip/R/kernelsList.R0000644000176200001440000000036713541254657014423 0ustar liggesusers #' @export #' @rdname kernelfun #' .kernelsList <- function() { c("biweight", "chernoff", "cosine", "eddy", "epanechnikov", "gaussian", "optcosine", "rectangular", "triangular", "uniform") } statip/R/dbern.R0000644000176200001440000000350213564270453013205 0ustar liggesusers#' @title #' The Bernoulli distribution #' #' @description #' Density, distribution function, quantile function and #' random generation for the Bernoulli distribution. #' #' @param x #' numeric. Vector of quantiles. #' #' @param q #' numeric. Vector of quantiles. #' #' @param p #' numeric in \code{[0, 1]}. Vector of probabilities. #' #' @param n #' number of observations. #' If \code{length(n) > 1}, the length is taken to be the number required. #' #' @param prob #' Probability of success on each trial. #' #' @param log #' logical. If \code{TRUE}, probabilities \code{p} are given as \code{log(p)}. #' #' @param log.p #' logical. If \code{TRUE}, probabilities \code{p} are given as \code{log(p)}. #' #' @param lower.tail #' logical. If \code{TRUE} (default), #' probabilities are \code{P[X <= x]}, otherwise, \code{P[X > x]}. #' #' @seealso #' See the help page of the \code{\link{Binomial}} distribution. #' #' @export #' dbern <- function(x, prob, log = FALSE) { res <- rep(0, length(x)) res[x == 1] <- prob res[x == 0] <- 1-prob if (log) return(log(res)) res } #' @importFrom stats qbinom #' @export #' @rdname dbern #' qbern <- function(p, prob, lower.tail = TRUE, log.p =FALSE) { stats::qbinom(p, size = 1, prob = prob, lower.tail = lower.tail, log.p = log.p) } #' @importFrom stats pbinom #' @export #' @rdname dbern #' pbern <- function(q, prob, lower.tail = TRUE, log.p = FALSE) { stats::pbinom(q, size = 1, prob = prob, lower.tail = lower.tail, log.p = log.p) } #' @export #' @rdname dbern #' rbern <- function(n, prob) { sample(c(0,1), n, replace = TRUE, prob = c(1-prob, prob)) } statip/R/midhinge.R0000644000176200001440000000153013564270612013673 0ustar liggesusers#' @title #' Midhinge #' #' @description #' Compute the midhinge of a numeric vector \code{x}, #' defined as the average of the first and third quartiles. #' #' @param x #' numeric. A numeric vector. #' #' @param na_rm #' logical. Should missing values be removed before computing the midhinge? #' #' @param ... #' Additional arguments to be passed to \code{\link[stats]{quantile}()}. #' #' @return #' A numeric value, the midhinge. #' #' @references #' \url{https://en.wikipedia.org/wiki/Midhinge}. #' #' @importFrom stats quantile #' @export #' midhinge <- function(x, na_rm = FALSE, ...) { if (!is.null(list(...)$na.rm)) { stop("'na.rm' is not a valid argument, please use 'na_rm' instead", call. = FALSE) } mean(stats::quantile(x, probs = c(0.25, 0.75), na.rm = na_rm, ...)) } statip/R/find_breaks.R0000644000176200001440000000406613564327124014366 0ustar liggesusers#' @title #' Breakpoints to be passed to a Histogram #' #' @description #' The function \code{find_breaks()} isolates a piece of code of #' the function \code{\link[MASS]{truehist}()} from package \pkg{MASS} #' that is used to compute the set of breakpoints to be applied for the #' construction of the histogram. #' #' @param x #' numeric. A vector. #' #' @param nbins #' integer or character. The suggested number of bins. #' Either a positive integer, or a character string naming a rule: #' \code{"Scott"} (the default) or \code{"Freedman-Diaconis"} or \code{"FD"}. #' (Case is ignored.) #' #' @param h #' numeric. The bin width, a strictly positive number #' (takes precedence over nbins). #' #' @param x0 #' numeric. Shift for the bins - #' the breaks are at \code{x0 + h * (..., -1, 0, 1, ...)}. #' #' @return #' A numeric vector. #' #' @export #' #' @seealso #' \code{\link[statip]{histo}()} in this package; #' \code{\link[MASS]{truehist}()} from package \pkg{MASS}; #' \code{\link[graphics]{hist}()} from package \pkg{graphics}. #' find_breaks <- function(x, nbins = "Scott", h, x0 = -h/1000) { if (missing(h)) { if (is.character(nbins)) nbins <- switch(casefold(nbins), scott = nclass_scott(x), `freedman-diaconis` = , fd = nclass_freedman_diaconis(x), sturges = nclass_sturges(x)) if (!is.finite(nbins) || nbins <= 0) stop("'nbins' must result in a positive integer", call. = FALSE) h <- diff(pretty(x, nbins))[1L] } if (!is.finite(h) || h <= 0) stop("'h' must be strictly positive", call. = FALSE) first <- floor((min(x) - x0)/h) last <- ceiling((max(x) - x0)/h) breaks <- x0 + h * c(first:last) if (any(diff(breaks) <= 0)) stop("'breaks' must be strictly increasing", call. = FALSE) if (min(x) < min(breaks) || max(x) > max(breaks)) stop("'breaks' do not cover the range of 'x' values", call. = FALSE) breaks } statip/R/bandwidth.R0000644000176200001440000000200013564307210014037 0ustar liggesusers#' @title #' Bandwidth calculation #' #' @description #' \code{bandwidth} computes the bandwidth to be used in the #' \code{\link[statip]{densityfun}} function. #' #' @param x #' numeric. The data from which the estimate is to be computed. #' #' @param rule #' character. A rule to choose the bandwidth. See \code{\link[stats]{bw.nrd}}. #' #' @return #' A numeric value. #' #' @importFrom stats bw.nrd0 bw.nrd bw.ucv bw.bcv bw.SJ #' @export #' bandwidth <- function(x, rule) { stopifnot(is.character(rule)) if (length(x) < 2L) stop("need at least 2 points to select a bandwidth automatically", call. = FALSE) switch(tolower(rule), nrd0 = stats::bw.nrd0(x), nrd = stats::bw.nrd(x), ucv = stats::bw.ucv(x), bcv = stats::bw.bcv(x), sj = , `sj-ste` = stats::bw.SJ(x, method = "ste"), `sj-dpi` = stats::bw.SJ(x, method = "dpi"), stop("unknown bandwidth rule", call. = FALSE)) } statip/R/predict.default.R0000644000176200001440000000174513564266562015205 0ustar liggesusers#' @title #' Default model predictions #' #' @description #' Default method of the \code{\link[stats]{predict}} generic #' function, which can be used when the model object is empty. #' #' @param object #' A model object, possibly empty. #' #' @param newdata #' An optional data frame in which to look for variables #' with which to predict. #' If omitted, the fitted values are used. #' #' @param ... #' Additional arguments. #' #' @return #' A vector of predictions. #' #' @seealso #' \code{\link[stats]{predict}} from package \pkg{stats}. #' #' @export #' #' @examples #' stats::predict(NULL) #' stats::predict(NULL, newdata = data.frame(x = 1:2, y = 2:3)) #' predict.default <- function(object, newdata, ...) { if (length(object) == 0L) { n <- if (missing(newdata)) 0L else nrow(newdata) return(rep(NA, n)) } NextMethod("predict") } #' @importFrom stats predict #' @export #' stats::predict statip/R/reexports.R0000644000176200001440000000011313564266771014151 0ustar liggesusers # #' @importFrom bazar is_empty # #' @export # #' # bazar::is_empty statip/R/tableNA.R0000644000176200001440000000210613564330774013423 0ustar liggesusers#' @title #' Alternative Table Creation #' #' @description #' Count the occurrences of each factor level or value in a vector. #' #' @param x #' numeric. An atomic vector or a factor. #' #' @return #' An object of class \code{"tableNA"}, which is the result of #' \code{\link{tabulate}()} with three attributes: #' \itemize{ #' \item \code{type_of_x}: the result of \code{typeof(x)}; #' \item \code{is_factor_x}: the result of \code{is.factor(x)}; #' \item \code{levels}: the result of \code{levels(x)}. #' } #' The number of missing values is always reported. #' #' @export #' #' @examples #' tableNA(c(1,2,2,1,3)) #' tableNA(c(1,2,2,1,3, NA)) #' tableNA <- function(x) { stopifnot(is.atomic(x)) if (length(x) == 0L) { fx <- factor(x, exclude = NULL, levels = NA) } else { fx <- factor(x, exclude = NULL) } tab <- tabulate(fx) ns <- levels(fx) ns[is.na(ns)] <- "" names(tab) <- ns structure(tab, type_of_x = typeof(x), is_factor_x = is.factor(x), levels = levels(x), class = "tableNA") } statip/R/prune.R0000644000176200001440000000031313564305151013233 0ustar liggesusers prune <- function(x, min_length = 1L) { n <- length(x$knots) w <- which(diff(x$knots) >= min_length) x$knots <- x$knots[c(w, n)] x$values <- x$values[unique(c(0L, w, n)) + 1L] x } statip/R/densityfun.R0000644000176200001440000001331713541256550014305 0ustar liggesusers#' @title #' Kernel density estimation #' #' @description #' Return a function performing kernel density estimation. #' The difference between \code{\link[stats]{density}} and #' \code{densityfun} is similar to that between #' \code{\link[stats]{approx}} and \code{\link[stats]{approxfun}}. #' #' @param x #' numeric. The data from which the estimate is to be computed. #' #' @param bw #' numeric. The smoothing bandwidth to be used. #' See the eponymous argument of \code{\link[stats]{density}}. #' #' @param adjust #' numeric. The bandwidth used is actually \code{adjust*bw}. #' This makes it easy to specify values like 'half the default' bandwidth. #' #' @param kernel,window #' character. A string giving the smoothing kernel to be used. #' Authorized kernels are listed in \code{\link[statip]{.kernelsList}()}. #' See also the eponymous argument of \code{\link[stats]{density}}. #' #' @param weights #' numeric. A vector of non-negative observation weights, #' hence of same length as \code{x}. #' See the eponymous argument of \code{\link[stats]{density}}. #' #' @param width #' this exists for compatibility with S; #' if given, and \code{bw} is not, #' will set \code{bw} to \code{width} #' if this is a character string, #' or to a kernel-dependent multiple of \code{width} if this is numeric. #' #' @param n #' The number of equally spaced points at which the density #' is to be estimated. #' See the eponymous argument of \code{\link[stats]{density}}. #' #' @param from,to #' The left and right-most points of the grid at which the #' density is to be estimated; #' the defaults are \code{cut * bw} outside of \code{range(x)}. #' #' @param cut #' By default, the values of \code{from} and \code{to} #' are cut bandwidths beyond the extremes of the data. #' This allows the estimated density to drop to #' approximately zero at the extremes. #' #' @param na.rm #' logical. If \code{TRUE}, missing values are removed #' from \code{x}. #' If \code{FALSE} any missing values cause an error. #' #' @param ... #' Additional arguments for (non-default) methods. #' #' @return #' A function that can be called to generate a density. #' #' @author #' Adapted from the \code{\link[stats]{density}} function of package \pkg{stats}. #' The C code of \code{BinDist} is copied from package \pkg{stats} and authored #' by the R Core Team with contributions from Adrian Baddeley. #' #' @seealso #' \code{\link[stats]{density}} and \code{\link[stats]{approxfun}} #' from package \pkg{stats}. #' #' @useDynLib statip, .registration=TRUE #' @useDynLib statip BinDist #' @importFrom stats approxfun fft #' @importFrom stats bw.bcv bw.nrd0 bw.nrd bw.SJ bw.ucv #' @export #' #' @examples #' x <- rlnorm(1000, 1, 1) #' f <- densityfun(x, from = 0) #' curve(f(x), xlim = c(0, 20)) #' densityfun <- function(x, bw = "nrd0", adjust = 1, kernel = "gaussian", weights = NULL, window = kernel, width, n = 512, from, to, cut = 3, na.rm = FALSE, ...) { if (!missing(...)) warning("non-matched further arguments are disregarded", call. = FALSE) if (!missing(window) && missing(kernel)) kernel <- window kernel <- match.arg(kernel, .kernelsList()) #if (give.Rkern) return(kernel_properties(kernel)$canonical_bandwidth) if (!is.numeric(x)) stop("argument 'x' must be numeric", call. = FALSE) x <- as.vector(x) x.na <- is.na(x) if (any(x.na)) { if (na.rm) x <- x[!x.na] else stop("'x' contains missing values", call. = FALSE) } N <- nx <- as.integer(length(x)) if (is.na(N)) stop("invalid value of length(x)", call. = FALSE) x.finite <- is.finite(x) if (any(!x.finite)) { x <- x[x.finite] nx <- length(x) } if (is.null(weights)) { weights <- rep.int(1/nx, nx) totMass <- nx/N } else { if (length(weights) != N) stop("'x' and 'weights' have unequal length", call. = FALSE) if (!all(is.finite(weights))) stop("'weights' must all be finite", call. = FALSE) if (any(weights < 0)) stop("'weights' must not be negative", call. = FALSE) wsum <- sum(weights) if (any(!x.finite)) { weights <- weights[x.finite] totMass <- sum(weights)/wsum } else totMass <- 1 if (!isTRUE(all.equal(1, wsum))) warning("sum(weights) != 1 -- will not get true density", call. = FALSE) } #n.user <- n n <- max(n, 512) if (n > 512) n <- 2^ceiling(log2(n)) if (missing(bw) && !missing(width)) { if (is.numeric(width)) { fac <- kernel_properties(kernel)$fac bw <- width/fac } if (is.character(width)) bw <- width } if (is.character(bw)) bw <- bandwidth(x, bw) if (!is.finite(bw)) stop("non-finite 'bw'", call. = FALSE) bw <- adjust * bw if (bw <= 0) stop("'bw' is not positive", call. = FALSE) if (missing(from)) from <- min(x) - cut * bw if (missing(to)) to <- max(x) + cut * bw if (!is.finite(from)) stop("non-finite 'from'", call. = FALSE) if (!is.finite(to)) stop("non-finite 'to'", call. = FALSE) lo <- from - 4 * bw up <- to + 4 * bw y <- .Call(BinDist, x, weights, lo, up, n) * totMass kords <- seq.int(0, 2 * (up - lo), length.out = 2L * n) kords[(n + 2):(2 * n)] <- -kords[n:2] kords <- kernelfun(kernel)(kords/bw)/bw kords <- stats::fft(stats::fft(y) * Conj(stats::fft(kords)), inverse = TRUE) kords <- pmax.int(0, Re(kords)[1L:n]/length(y)) xords <- seq.int(lo, up, length.out = n) stats::approxfun(xords, kords, method = "linear", yleft = 0, yright = 0, rule = 1) } statip/R/midrange.R0000644000176200001440000000103413564266362013703 0ustar liggesusers#' @title #' Mid-range #' #' @description #' Compute the mid-range of a numeric vector \code{x}, #' defined as the mean of the minimum and the maximum. #' #' @param x #' numeric. A numeric vector. #' #' @param na_rm #' logical. Should missing values be removed before computing the mid-range? #' #' @return #' A numeric value, the mid-range. #' #' @references #' \url{https://en.wikipedia.org/wiki/Mid-range}. #' #' @export #' midrange <- function(x, na_rm = FALSE) { mean(range(x, na.rm = na_rm)) } statip/R/name2distr.R0000644000176200001440000000240013541256257014160 0ustar liggesusers #' @export #' @rdname distr2name #' name2distr <- function(x) { x <- tolower(x) x <- chartr(" -", "__", x) x[x == "bernoulli"] <- "bern" x[x == "beta"] <- "beta" x[x == "binomial"] <- "binom" x[x == "cauchy"] <- "cauchy" x[x %in% c("chi_square", "chisquare", "chisquared")] <- "chisq" #x[x == "dagum"] <- "dagum" x[x == "exponential"] <- "exp" x[x == "fdist"] <- "f" x[x == "gammadist"] <- "gamma" x[x %in% c("generalised_hyperbolic", "generalized_hyperbolic")] <- "gh" x[x %in% c("generalised_extreme_value", "generalized_extreme_value")] <- "gev" x[x %in% c("generalised_pareto", "generalized_pareto")] <- "gpd" x[x == "geometric"] <- "geom" x[x == "hyperbolic"] <- "hyp" x[x %in% c("hypergeometric", "hypergeom")] <- "hyper" x[x == "kumaraswamy"] <- "kumar" x[x == "logistic"] <- "logis" x[x %in% c("lognormal", "loggaussian")] <- "lnorm" x[x == "multinomial"] <- "multinom" x[x == "negative_binomial"] <- "nbinom" x[x == "normal_inverse"] <- "nig" x[x %in% c("gaussian", "normal")] <- "norm" x[x == "poisson"] <- "pois" x[x == "student"] <- "t" x[x == "tukey"] <- "tukey" x[x == "uniform"] <- "unif" x[x == "weibull"] <- "weibull" #x[x == "symmetric_stable"] <- "symstb" x } statip/NEWS.md0000644000176200001440000000313113564331332012655 0ustar liggesusers# statip 0.2.3 (2019-11-17) ## Bug fixes * Fix bug in `prune()`. * Now `hellinger()` returns the Hellinger distance, not the square of it (Issue #1, thanks to benjones13). ## Features * Use `model.frame()` to improve the way formulas are processed in `picor()`. * Now `mfv()` is powered by a function `tableNA()` and returns a factor with the same levels as `x`, when `x` is itself a factor. * In `mfv()` the argument `na.rm` is soft-deprecated (so still accepted for now, with a message thrown); the default argument is now called `na_rm`. * Add various statistical functions: `cv()` (coefficient of variation), `midhinge()`, `midrange()`, `trimean()`. * Add `histo()` to compute a histogram and `find_breaks()` to give breakpoints just like `MASS::truehist()` does. # statip 0.2.0 ## Features * Add function `picor()` for univariate piecewise-constant regression. * Add function `bandwidth()`. * Add function `predict.kmeans()` (with dependency to package `clue`). * Add `...` argument in `mfv()` and `mfv1()` (useful for the subsequent call of these functions within package 'modeest'). ## Performance * Function `if_else()` from package 'dplyr' is no longer used since it currently inconsistently transforms a matrix into a vector; the base `ifelse()` is used instead. # statip 0.1.5 * `mfv()` was not working with factors. # statip 0.1.4 * Update authors list. * Add functions `mfv()` and `mfv1()`, which compute the mode (most frequent value) found in a vector of discrete values. * Add small updates before CRAN submission. statip/MD50000644000176200001440000000533313564337062012103 0ustar liggesusers425705c564d7341eadeb57c50292c69b *DESCRIPTION f4b37094db8dd98416def4487278be13 *NAMESPACE 91296fd7aae8fb0dbdc459893b324e1d *NEWS.md fdbefb3508f38ef2c504ebd14c72f3ca *R/as.stepfun.R d4ea43dab6b099e9881fa71f5e3d61a3 *R/bandwidth.R 07121a392539ddfe065526667e2fdcc3 *R/cv.R 28a1b59a6eaca6a2a2f0c39f124f0a76 *R/dbern.R b9951d51202e9ea4c8a9559681ffd502 *R/densityfun.R 9c6f20b50c4669045b3c26823268278f *R/distr2name.R e80fd7eaad02276bbe49cee7170068e1 *R/erf.R ec30115d60a450155f2bc1e56d481ad7 *R/find_breaks.R 0546bb233f893a1668db43970523c4ab *R/hellinger.R b866db81871a1460de27aa30de1f3da1 *R/histo.R 3c62564cd0ef7de62cd5a15b28fd164e *R/kernel_properties.R 71c5f86f37eb615347df39718852f42c *R/kernelfun.R 714ea6b192b52e1d58e572490d609ebb *R/kernelsList.R 7fe892b218d1cdd6d78871bba3a1591b *R/lagk.R 6a93d173ed808e9482991d2bfbb05514 *R/mfv.R 2d47645847013480f7e42d9eadbd5826 *R/midhinge.R 7e3cff91752d3f2f58c3a62e81ea2566 *R/midrange.R a44138e211220022c9cb597de94ca784 *R/name2distr.R 065c4a241c30ec54f9e0f37fd31aa64f *R/picor.R b8828531da44092eb629e4625e16b5ef *R/plot.loess.R f18e082e3dd51309db62c466530476b7 *R/predict.default.R 4956e94db8829770726a25c603e33731 *R/predict.kmeans.R 62d2f66111b93c2a1b3ba7c4c3b44fd1 *R/prune.R 30894c697b246b29610cb3fe0b03d730 *R/reexports.R 3e74e1897ef08bcba2c1104a5b6a2db6 *R/tableNA.R 80ae0ab741741efc7b21bcc34557fe85 *R/trimean.R 7b9f54e6bfd78101c8e901e061d34418 *R/utils.R e1affd6eba8b5f65ec0b69c9f473a228 *README.md bccce57e08c8b90450796dbc2bb97222 *man/bandwidth.Rd e6e9ba6ca6277469a858879238ccbbe7 *man/cv.Rd 47b31b74c94ac4776046148a96a804e0 *man/dbern.Rd e6523c26944b2dec60cc4add293d83bd *man/densityfun.Rd 5057779f6dac32805a1a4469293924e2 *man/distr2name.Rd c3c2bfe7d7c68bbd2409198d9f1c160d *man/erf.Rd 6996c0ffaa1ce3e3c8f3c20ca5f7537e *man/find_breaks.Rd cb56228f9c8a90f0e48e7f6bba15d548 *man/hellinger.Rd bff7ac599c1c38e35b49794b08156a0b *man/histo.Rd 2d81a5dd3e435c275ff2b1c5b58c71b2 *man/kernelfun.Rd 4a27e11c7675f271b055af88358ab1c0 *man/lagk.Rd 306962d33825de361d132e9a4ebf0b36 *man/mfv.Rd 5adbad6b281dbf1832a28ba2950620cc *man/midhinge.Rd 0b9370ae077918c20e81da4c76d5910e *man/midrange.Rd ab141df6b3d920780d77d1edf18c77a7 *man/picor.Rd 71c461a2ed013d96407429b44a181544 *man/plot.loess.Rd f2a477f999e9c2604849af19ee2d66bd *man/predict.default.Rd 53118ea5ca41e4432e0b9fc3a2b9e9b8 *man/reexports.Rd 6bc18c997beabe9b2790931d8e8e13c0 *man/tableNA.Rd 6635f7e561b4dfc9736f058996ed5a73 *man/trimean.Rd 1ae55fb4262fe7388910f79f7fe11bb2 *src/init.c 916c216c3b7d542a1bdb0b3947fba768 *src/massdist.c 04a8bc7096a8ec57a572f3c27f1c975d *tests/testthat.R ce3360528d3f85a868edefbbe673b1c8 *tests/testthat/test-distribution_names.R 436dbb2deb60f41e79b0ae16e8abe6c5 *tests/testthat/test-erf.R 33f3a71be429bd8b52601b0c9b3e888e *tests/testthat/test-mfv.R