PoissonBinomial/0000755000176200001440000000000014077711332013364 5ustar liggesusersPoissonBinomial/NAMESPACE0000644000176200001440000000055514077551034014611 0ustar liggesusers# Generated by roxygen2: do not edit by hand export(dgpbinom) export(dpbinom) export(pgpbinom) export(ppbinom) export(qgpbinom) export(qpbinom) export(rgpbinom) export(rpbinom) import(Rcpp) importFrom(stats,dbinom) importFrom(stats,pbinom) importFrom(stats,rbinom) importFrom(stats,runif) importFrom(stats,stepfun) useDynLib(PoissonBinomial, .registration = TRUE) PoissonBinomial/man/0000755000176200001440000000000014022640216014127 5ustar liggesusersPoissonBinomial/man/PoissonBinomial-package.Rd0000644000176200001440000000352514022630610021116 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/PoissonBinomial.R \docType{package} \name{PoissonBinomial-package} \alias{PoissonBinomial-package} \title{Efficient Exact and Approximate Implementations for Computing Ordinary and Generalized Poisson Binomial Distributions} \description{ This package implements various algorithms for computing the probability mass function, the cumulative distribution function, quantiles and random numbers of both ordinary and generalized Poisson binomial distributions. } \section{References}{ Hong, Y. (2013). On computing the distribution function for the Poisson binomial distribution. \emph{Computational Statistics & Data Analysis}, \strong{59}, pp. 41-51. \doi{10.1016/j.csda.2012.10.006} Biscarri, W., Zhao, S. D. and Brunner, R. J. (2018) A simple and fast method for computing the Poisson binomial distribution. \emph{Computational Statistics and Data Analysis}, \strong{31}, pp. 216–222. \doi{10.1016/j.csda.2018.01.007} Zhang, M., Hong, Y. and Balakrishnan, N. (2018). The generalized Poisson-binomial distribution and the computation of its distribution function. \emph{Journal of Statistical Computational and Simulation}, \strong{88}(8), pp. 1515-1527. \doi{10.1080/00949655.2018.1440294} } \examples{ # Functions for ordinary Poisson binomial distributions set.seed(1) pp <- c(1, 0, runif(10), 1, 0, 1) qq <- seq(0, 1, 0.01) dpbinom(NULL, pp) ppbinom(7:10, pp, method = "DivideFFT") qpbinom(qq, pp, method = "Convolve") rpbinom(10, pp, method = "RefinedNormal") # Functions for generalized Poisson binomial distributions va <- rep(5, length(pp)) vb <- 1:length(pp) dgpbinom(NULL, pp, va, vb, method = "Convolve") pgpbinom(80:100, pp, va, vb, method = "Convolve") qgpbinom(qq, pp, va, vb, method = "Convolve") rgpbinom(100, pp, va, vb, method = "Convolve") } PoissonBinomial/man/GenPoissonBinomial-Distribution.Rd0000644000176200001440000001346014022631004022631 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/gpbinom.R \name{GenPoissonBinomial-Distribution} \alias{GenPoissonBinomial-Distribution} \alias{dgpbinom} \alias{pgpbinom} \alias{qgpbinom} \alias{rgpbinom} \title{The Generalized Poisson Binomial Distribution} \usage{ dgpbinom(x, probs, val_p, val_q, wts = NULL, method = "DivideFFT", log = FALSE) pgpbinom( x, probs, val_p, val_q, wts = NULL, method = "DivideFFT", lower.tail = TRUE, log.p = FALSE ) qgpbinom( p, probs, val_p, val_q, wts = NULL, method = "DivideFFT", lower.tail = TRUE, log.p = FALSE ) rgpbinom( n, probs, val_p, val_q, wts = NULL, method = "DivideFFT", generator = "Sample" ) } \arguments{ \item{x}{Either a vector of observed sums or NULL. If NULL, probabilities of all possible observations are returned.} \item{probs}{Vector of probabilities of success of each Bernoulli trial.} \item{val_p}{Vector of values that each trial produces with probability in \code{probs}.} \item{val_q}{Vector of values that each trial produces with probability in \code{1 - probs}.} \item{wts}{Vector of non-negative integer weights for the input probabilities.} \item{method}{Character string that specifies the method of computation and must be one of \code{"DivideFFT"}, \code{"Convolve"}, \code{"Characteristic"}, \code{"Normal"} or \code{"RefinedNormal"} (abbreviations are allowed).} \item{log, log.p}{Logical value indicating if results are given as logarithms.} \item{lower.tail}{Logical value indicating if results are \eqn{P[X \leq x]} (if \code{TRUE}; default) or \eqn{P[X > x]} (if \code{FALSE}).} \item{p}{Vector of probabilities for computation of quantiles.} \item{n}{Number of observations. If \code{length(n) > 1}, the length is taken to be the number required.} \item{generator}{Character string that specifies the random number generator and must either be \code{"Sample"} or \code{"Bernoulli"} (abbreviations are allowed).} } \value{ \code{dgpbinom} gives the density, \code{pgpbinom} computes the distribution function, \code{qgpbinom} gives the quantile function and \code{rgpbinom} generates random deviates. For \code{rgpbinom}, the length of the result is determined by \code{n}, and is the lengths of the numerical arguments for the other functions. } \description{ Density, distribution function, quantile function and random generation for the generalized Poisson binomial distribution with probability vector \code{probs}. } \details{ See the references for computational details. The \emph{Divide and Conquer} (\code{"DivideFFT"}) and \emph{Direct Convolution} (\code{"Convolve"}) algorithms are derived and described in Biscarri, Zhao & Brunner (2018). They have been modified for use with the generalized Poisson binomial distribution. The \emph{Discrete Fourier Transformation of the Characteristic Function} (\code{"Characteristic"}) is derived in Zhang, Hong & Balakrishnan (2018), the \emph{Normal Approach} (\code{"Normal"}) and the \emph{Refined Normal Approach} (\code{"RefinedNormal"}) are described in Hong (2013). They were slightly adapted for the generalized Poisson binomial distribution. In some special cases regarding the values of \code{probs}, the \code{method} parameter is ignored (see Introduction vignette). Random numbers can be generated in two ways. The \code{"Sample"} method uses \code{R}'s \code{sample} function to draw random values according to their probabilities that are calculated by \code{dgpbinom}. The \code{"Bernoulli"} procedure ignores the \code{method} parameter and simulates Bernoulli-distributed random numbers according to the probabilities in \code{probs} and sums them up. It is a bit slower than the \code{"Sample"} generator, but may yield better results, as it allows to obtain observations that cannot be generated by the \code{"Sample"} procedure, because \code{dgpbinom} may compute 0-probabilities, due to rounding, if the length of \code{probs} is large and/or its values contain a lot of very small values. } \section{References}{ Hong, Y. (2018). On computing the distribution function for the Poisson binomial distribution. \emph{Computational Statistics & Data Analysis}, \strong{59}, pp. 41-51. \doi{10.1016/j.csda.2012.10.006} Biscarri, W., Zhao, S. D. and Brunner, R. J. (2018) A simple and fast method for computing the Poisson binomial distribution. \emph{Computational Statistics and Data Analysis}, \strong{31}, pp. 216–222. \doi{10.1016/j.csda.2018.01.007} Zhang, M., Hong, Y. and Balakrishnan, N. (2018). The generalized Poisson-binomial distribution and the computation of its distribution function. \emph{Journal of Statistical Computational and Simulation}, \strong{88}(8), pp. 1515-1527. \doi{10.1080/00949655.2018.1440294} } \examples{ set.seed(1) pp <- c(1, 0, runif(10), 1, 0, 1) qq <- seq(0, 1, 0.01) va <- rep(5, length(pp)) vb <- 1:length(pp) dgpbinom(NULL, pp, va, vb, method = "DivideFFT") pgpbinom(75:100, pp, va, vb, method = "DivideFFT") qgpbinom(qq, pp, va, vb, method = "DivideFFT") rgpbinom(100, pp, va, vb, method = "DivideFFT") dgpbinom(NULL, pp, va, vb, method = "Convolve") pgpbinom(75:100, pp, va, vb, method = "Convolve") qgpbinom(qq, pp, va, vb, method = "Convolve") rgpbinom(100, pp, va, vb, method = "Convolve") dgpbinom(NULL, pp, va, vb, method = "Characteristic") pgpbinom(75:100, pp, va, vb, method = "Characteristic") qgpbinom(qq, pp, va, vb, method = "Characteristic") rgpbinom(100, pp, va, vb, method = "Characteristic") dgpbinom(NULL, pp, va, vb, method = "Normal") pgpbinom(75:100, pp, va, vb, method = "Normal") qgpbinom(qq, pp, va, vb, method = "Normal") rgpbinom(100, pp, va, vb, method = "Normal") dgpbinom(NULL, pp, va, vb, method = "RefinedNormal") pgpbinom(75:100, pp, va, vb, method = "RefinedNormal") qgpbinom(qq, pp, va, vb, method = "RefinedNormal") rgpbinom(100, pp, va, vb, method = "RefinedNormal") } PoissonBinomial/man/PoissonBinomial-Distribution.Rd0000644000176200001440000001450514022631004022200 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pbinom.R \name{PoissonBinomial-Distribution} \alias{PoissonBinomial-Distribution} \alias{dpbinom} \alias{ppbinom} \alias{qpbinom} \alias{rpbinom} \title{The Poisson Binomial Distribution} \usage{ dpbinom(x, probs, wts = NULL, method = "DivideFFT", log = FALSE) ppbinom( x, probs, wts = NULL, method = "DivideFFT", lower.tail = TRUE, log.p = FALSE ) qpbinom( p, probs, wts = NULL, method = "DivideFFT", lower.tail = TRUE, log.p = FALSE ) rpbinom(n, probs, wts = NULL, method = "DivideFFT", generator = "Sample") } \arguments{ \item{x}{Either a vector of observed numbers of successes or NULL. If NULL, probabilities of all possible observations are returned.} \item{probs}{Vector of probabilities of success of each Bernoulli trial.} \item{wts}{Vector of non-negative integer weights for the input probabilities.} \item{method}{Character string that specifies the method of computation and must be one of \code{"DivideFFT"}, \code{"Convolve"}, \code{"Characteristic"}, \code{"Recursive"}, \code{"Mean"}, \code{"GeoMean"}, \code{"GeoMeanCounter"}, \code{"Poisson"}, \code{"Normal"} or \code{"RefinedNormal"} (abbreviations are allowed).} \item{log, log.p}{Logical value indicating if results are given as logarithms.} \item{lower.tail}{Logical value indicating if results are \eqn{P[X \leq x]} (if \code{TRUE}; default) or \eqn{P[X > x]} (if \code{FALSE}).} \item{p}{Vector of probabilities for computation of quantiles.} \item{n}{Number of observations. If \code{length(n) > 1}, the length is taken to be the number required.} \item{generator}{Character string that specifies the random number generator and must either be \code{"Sample"} (default) or \code{"Bernoulli"} (abbreviations are allowed). See Details for more information.} } \value{ \code{dpbinom} gives the density, \code{ppbinom} computes the distribution function, \code{qpbinom} gives the quantile function and \code{rpbinom} generates random deviates. For \code{rpbinom}, the length of the result is determined by \code{n}, and is the lengths of the numerical arguments for the other functions. } \description{ Density, distribution function, quantile function and random generation for the Poisson binomial distribution with probability vector \code{probs}. } \details{ See the references for computational details. The \emph{Divide and Conquer} (\code{"DivideFFT"}) and \emph{Direct Convolution} (\code{"Convolve"}) algorithms are derived and described in Biscarri, Zhao & Brunner (2018). The \emph{Discrete Fourier Transformation of the Characteristic Function} (\code{"Characteristic"}), the \emph{Recursive Formula} (\code{"Recursive"}), the \emph{Poisson Approximation} (\code{"Poisson"}), the \emph{Normal Approach} (\code{"Normal"}) and the \emph{Refined Normal Approach} (\code{"RefinedNormal"}) are described in Hong (2013). The calculation of the \emph{Recursive Formula} was modified to overcome the excessive memory requirements of Hong's implementation. The \code{"Mean"} method is a naive binomial approach using the arithmetic mean of the probabilities of success. Similarly, the \code{"GeoMean"} and \code{"GeoMeanCounter"} procedures are binomial approximations, too, but they form the geometric mean of the probabilities of success (\code{"GeoMean"}) and their counter probabilities (\code{"GeoMeanCounter"}), respectively. In some special cases regarding the values of \code{probs}, the \code{method} parameter is ignored (see Introduction vignette). Random numbers can be generated in two ways. The \code{"Sample"} method uses \code{R}'s \code{sample} function to draw random values according to their probabilities that are calculated by \code{dgpbinom}. The \code{"Bernoulli"} procedure ignores the \code{method} parameter and simulates Bernoulli-distributed random numbers according to the probabilities in \code{probs} and sums them up. It is a bit slower than the \code{"Sample"} generator, but may yield better results, as it allows to obtain observations that cannot be generated by the \code{"Sample"} procedure, because \code{dgpbinom} may compute 0-probabilities, due to rounding, if the length of \code{probs} is large and/or its values contain a lot of very small values. } \section{References}{ Hong, Y. (2013). On computing the distribution function for the Poisson binomial distribution. \emph{Computational Statistics & Data Analysis}, \strong{59}, pp. 41-51. \doi{10.1016/j.csda.2012.10.006} Biscarri, W., Zhao, S. D. and Brunner, R. J. (2018) A simple and fast method for computing the Poisson binomial distribution. \emph{Computational Statistics and Data Analysis}, \strong{31}, pp. 216–222. \doi{10.1016/j.csda.2018.01.007} } \examples{ set.seed(1) pp <- c(0, 0, runif(995), 1, 1, 1) qq <- seq(0, 1, 0.01) dpbinom(NULL, pp, method = "DivideFFT") ppbinom(450:550, pp, method = "DivideFFT") qpbinom(qq, pp, method = "DivideFFT") rpbinom(100, pp, method = "DivideFFT") dpbinom(NULL, pp, method = "Convolve") ppbinom(450:550, pp, method = "Convolve") qpbinom(qq, pp, method = "Convolve") rpbinom(100, pp, method = "Convolve") dpbinom(NULL, pp, method = "Characteristic") ppbinom(450:550, pp, method = "Characteristic") qpbinom(qq, pp, method = "Characteristic") rpbinom(100, pp, method = "Characteristic") dpbinom(NULL, pp, method = "Recursive") ppbinom(450:550, pp, method = "Recursive") qpbinom(qq, pp, method = "Recursive") rpbinom(100, pp, method = "Recursive") dpbinom(NULL, pp, method = "Mean") ppbinom(450:550, pp, method = "Mean") qpbinom(qq, pp, method = "Mean") rpbinom(100, pp, method = "Mean") dpbinom(NULL, pp, method = "GeoMean") ppbinom(450:550, pp, method = "GeoMean") qpbinom(qq, pp, method = "GeoMean") rpbinom(100, pp, method = "GeoMean") dpbinom(NULL, pp, method = "GeoMeanCounter") ppbinom(450:550, pp, method = "GeoMeanCounter") qpbinom(qq, pp, method = "GeoMeanCounter") rpbinom(100, pp, method = "GeoMeanCounter") dpbinom(NULL, pp, method = "Poisson") ppbinom(450:550, pp, method = "Poisson") qpbinom(qq, pp, method = "Poisson") rpbinom(100, pp, method = "Poisson") dpbinom(NULL, pp, method = "Normal") ppbinom(450:550, pp, method = "Normal") qpbinom(qq, pp, method = "Normal") rpbinom(100, pp, method = "Normal") dpbinom(NULL, pp, method = "RefinedNormal") ppbinom(450:550, pp, method = "RefinedNormal") qpbinom(qq, pp, method = "RefinedNormal") rpbinom(100, pp, method = "RefinedNormal") } PoissonBinomial/DESCRIPTION0000644000176200001440000000220214077711332015066 0ustar liggesusersPackage: PoissonBinomial Type: Package Title: Efficient Computation of Ordinary and Generalized Poisson Binomial Distributions Version: 1.2.4 Date: 2021-07-27 Authors@R: person("Florian", "Junge", role = c("aut", "cre"), email = "florian.junge@h-da.de") Maintainer: Florian Junge Language: en-US Description: Efficient implementations of multiple exact and approximate methods as described in Hong (2013) , Biscarri, Zhao & Brunner (2018) and Zhang, Hong & Balakrishnan (2018) for computing the probability mass, cumulative distribution and quantile functions, as well as generating random numbers for both the ordinary and generalized Poisson binomial distribution. License: GPL (>= 2) Encoding: UTF-8 Imports: Rcpp (>= 1.0.3) LinkingTo: Rcpp SystemRequirements: fftw3 (>= 3.3) RoxygenNote: 7.1.1 Suggests: knitr, rmarkdown, microbenchmark VignetteBuilder: knitr NeedsCompilation: yes Packaged: 2021-07-26 22:28:11 UTC; Florian Author: Florian Junge [aut, cre] Repository: CRAN Date/Publication: 2021-07-27 05:00:10 UTC PoissonBinomial/build/0000755000176200001440000000000014077633372014472 5ustar liggesusersPoissonBinomial/build/vignette.rds0000644000176200001440000000053614077633372017035 0ustar liggesusersRQO00AJsA|hM.ڥ7mĘw}A lQе3Х3קGa.Bx J[axUaA^(Fs}4OSa2/J JdʞT"a @ ? aDȋgl(|+b AR*ph̖$D[# nQw=k- -x[O&hR= ZL\;6 xFSt^[Npo9TT7JnlXש8)r OSE>YkaPoissonBinomial/build/partial.rdb0000644000176200001440000004236114077633271016623 0ustar liggesusers}yIzD%5#D]AEEJJڙ 4n@wvN8Nbo;q;N;MOb%o{WDTV[_RTx?_Kx!:S|L_dLtYd˛un4+:v*,Z`ͰTgWc՗_}ӗ/{n5W|_'[Oz k*ؙ Wم ]^ΏN5kh٫ %rm^~! _W/TϨ>?q/JrG^,|YDeL/qkvv:3]/;_r\˰3}bd;}i/n..WRqlr*7>6v-?fg;9=v-+Uϣ-ZՊؙ*sbf/VIzV{¯iZ˹/Q;~PG9C+ixfhEP]7A,-fWT$XGt @LY(O9#AH::gQ}~gn^ YjK{!_!jRhֱ^|/sTK,6%d^B{!(ahUo RmPJUFV %C'gRuͅ: 9u0l9 g|c LjD+",U5O(T S|jNQ!GL9jiqT4:;cHr.t"Ǒv: Nz;F$'|:S@>t"ǭ#щ|h؉9JgR_'r|ߣN Kr'rf'2#ԉҝy ډ'|:@>t"R{щp9O'2 "a'2J:㌭[V0%sH͏w^ģp:6g'&0|}O;dI8!I:82pA$BtL;krr])i79@-3h yڼk5S`}8 yZe[KT27$L8NA~: ;! :O?xqKjR<ꭇpE4PfL,H!Bߍtur"A/lT bWz6ۭޏD;﴿^qg([TmAZQ#Tލ[>~Z]_6 +8 AxԒSlydګa{EO)6,PrS՗]Ӵ*ZumRmL}P-XڴYšTZÚU?&HUM|%=kwcDɧ9nOQMW;(HY*l Rmcwlpp 謣/BV:Z`'ؖ]NϡuӐ& K8*ڠ O!KXNɝ!̒ 1 eP툒j>OHx8U Tj|y.-\$jGYwiϸA*} %-j>.L(Rǔy ϭE(M&vbS  ~mpD ,5+j8V(gU6{!קwn+Bigڸj%qrL <Y*8XAuO@Va|$tIf{_d|ӚmhC O:+⳺5T&Ba6S.QX x2+Pr]SO€ OF: 9/)O! gRG8Y.tEv# S +v '%5Ŵ0G|f,? dM1( ,U ;<#pS1E͊X]>>3yH8&K3JɌQ`{#T~Bbn&(~h#P@ffnG0`xi\5bx>Ì"as\O 䌶DnLCV!m>4'"Z^MjiX^Cȋe arGXٟf!Yglk#R1GG;Ak9H:B=c~?mMǽA]y7ƦaT uv5?:l2||z}~[n1UUr*MqJb+z﫢c |5]\&Of)xE#c2dBz1dfvYB.?ހ,|g(eFu yH#T̷ EpS& s4K.t6j4[Tt6뿝qlFv={EJqoc K^(|D. mS]rIh aJ-{UzU0WV(aNҮ h->ZF֊Y⼎vuE{?pضx4n'fb1}gn/oRC=^'[1ȄqE+Аf(4Ybx} Էy՟oŋ|aM {*^,T_HPJ4"}q% Y]oA) $ImjK "D ^@QVHwf5jpbWgioc ~+!ˇ_lk*#k l ik]!zQ\6˱a-yn n.KQKVB-9;ZN7Wk뙬C`;8֑Ao}U %qަ;az`mtl"f?eBP-t=D{ #.͞fu1sD{XKkN>iOn,.@^PAa3|oARab=㏜K.j.n0*5D(p&:E@V+ы\^$$u% ܯS#^D~b4?1vM։p43mpm*_>91WƮ_McSe E4*&~{Ud`z?HZMb UC+W Ay/5ZDRv»31ȕ^MX %.IM wBU?(v6jG:8ģ [S;}b;xwS}Q,YxY'HoX9T.0O\UZF,ѿrC}ѧ66gn Br B S&wA\<b.Aq9ҎPϦ|dǏ f YNc,Ÿ +ar^x9}mb:˵[qR?{mD76Z)arVu΍X! viGr;2]ZolV@,1e a'6􏴶 &OvzLlrg&Vx!S&; p,eߺM_Aw㇩t u{fYﵩatإ _*QY*r,4].^4e55l8{zZgyW?mΗ2LLQʾg#zW7LeRʍ]ˏYyUf$ǟc('[+jEc(7'UrcCBxi~e|F/q&8740oPE8 yZ[/gdXZ>cDo)d[L݂|K6 }Ӡ1֑`0Mq~;Hȟ3{ 3 uAcm 말eSuO&Y\N"'993A G:z[ W'; Ov4g.Z(\G /!+L?e!P[۶5J{1³zBbLqH.aa̘hQWQ9AuJrLyIK{:p,z{%cȏ=a7t>+R! v?mgvj;ifܖ/#lO#HX<ښ#QjF≪ՆܲEdVS|a!fpODfQ:p2cႣpGFaOU$ n)hDp TՍdC>~=Q8Z^yRjm26= JHIQEhRTOI:!A'] EGNR:۬]t5dCJ'=COu=Nz^uJ餗{کI/ދPNνz?>P%  g|{4K?'~ePńpƲg|]&uD",%O(t3wȧr՜8A!GL9ji˽viuG8a]h#9l#7dH~76,^#}li#O{FS|>NWm$iPjHp73H;B=m#̰mqyxCq3g GiaobZ RxP"쐮W4XιT8E$L`hTm&@;+̷}m]]_6]U Ln`7eBUtS +0T]c*Y (+;g-L)XᚢUp-c׬T],fpk5jC jbu%5"}o8(6l ~Z3ڬ :%AjRaCq #[BA'Sk+Y MTK%Ktz?{JQ5TRP,zNr*L:s"jl#P>RvQ\{&승oLkuteѲ%w!UֵFw:&Tъ+x/B<)J ]wtQ[FIISqFӵXsUߺ PgGƆ١o5zeFM#|ЕsU–mIAbˊ˨̙^,o)mC&{x>%ӎb./CM1}[Me^|=zNAVTH_NC֦C/EC|Ʉy j(OjlB, !F xqŜ~ ]0J?Yr#zbi:@Fb:@ m7J8Yn}CLGZ-ǀg iZB>KjasiaYO/]) AnCO  FO_ aӡZr"Hն̶Z-0|h? z%͠Τ:(*i {a *deKt ʺ9m¸ R#T~NgQg9ģ)u׺mK=Ryd2J~*[}µguPDClCڜ/V{(osג^^KHv]9-B=; XDňPmzJ|b ,5 ( :O`70 "ċvY0%w 8Ya?`23 Yn1Rs"txpmAe U)*xbVx]]Ȗ:V-`f[3vs?`cHЗfEXA>e׬lީO?AjKd>DnjJ0/߂得=kgB2U ~%*dxB DW&Ez!a划r3:P%TtxYm*:\(^-.'!Oތ)9Yj[bOOCq,?PvAVWCB~LS‚j6@kSEN™ruZa_-Aq?,dFF̊). v)s!C4$Tuάl;>B'jDbxv QX$E*"O|$cPǁ' F":g _jPr'!˅? }FC VCx]/Q= duSO%j2d8TbDɝNBSƉ8l8'C^5+bu ,C+?.$:T`ˬh]q]XwS:gcNζ-#Z!dsk-8!뎻. L,:TF Of{k^>PD%N'x|:%< Yj;dN c 3F%PM3+VAVAg} f>鲫 {a[%-mJyGE6ÕM|nCn%7 Lu$ Gt T8&',?4OOʉ̤Ο$@ؕDqU.~^܆4%NC0NntUbYjB$<[Qb=y:<"N@n.J pw5Xm 2׀s>zzzfOf>k}߲w- BVDF{!V1ĥYʷ&V)NedoW]uV-gB%\]* n1-z}oCůa!oC> ~J)z6Oje>nlM,GrӸ]cifx̯YAZ+gbM_4Kvv4  v*N<,a `F ݷs,J0 9[. Q8 Y.ֹ Ąب;[A,7Z7^Ibդu6jtYV+Qx1d{2!.}6g(N ]JP:xAOcߠ{m["=ՏDW4ʨJBQǦڻh91)?Gj@=CJCGPjig|[MMs4K.g6j4[Tt6뿝qlFv={EJqoc #K^(|D.mmS]rI aJ-{UzU0WV(a|NҮ h->Z֊Yv7,8mn ұXLi𙸹U&4q= )E& +f/2~&XG|4[̰Ge3 ^Er[dPߚUj}X~cccŽo_ Ԧ;2?-_߅/ncf9֣2[Q9ر}AԱ?{[z)plic ǖ6rB~E0}G Rjm=ul}tp:s3譯:[j[5NԿa'ێ->YdbT魃a~=T,$?JبQb'#L[)By/ DvJ'qOSa(Ϭd<,hJ())sre\s2|H3|^鱍 bg;qrB k]OWvzW(EŸŋ"0aeADnzb"o%9uK`o %ȗ^vA6Z /CV/nCiGdž.7 `ZK;LHI+(lA \&^|Qڛjvzgr'^v Fus"UT ssg[RewP_nfQ.h!{vmm]~'^l۩09X\kl2S!@=c; wslvzl|3 b\?;B}K0I2KIn S9h$##< vzR3{‹i#0y G (l5T)^|A}p3#7i#l!ȇdHŦڻkf9n8wP;cOvi&OhJH&ximZZ8uB-g i(yTD@*Te yKՂ#]8J:x4:b,g`soB%0SYd 4#^b]baW?aDgN4C7N?X*VgQLȝhw _⑬txK5ch!: -ZNcFiVֽ+[7ٺ!~<;><="#0qz96]aD@~g-;5cp#: m`hz*'ASu3v| n7(@*6 E4qZ\=<}9 .}0&8U׉9`LuGB*J. d}]R)G<$B! tX{ģ;Sۢ+֋&N`?dѝRC R)O1wKzA蝛2kv=iR U"H6r&[$Iօ^nm% Rz&C`[[!׎vZ&H7E_k\€=1Vԇ0ALD_o[*>s;} 0&zG8~D뢯>`LV9(N`Lui&mU}I׉]w5$Jp&-B䢯{\tD_(#iNku0}=ul,zTcmgOcK/E-mZFNHf[[$[gG_omi$_5_'_mlJשN#XhrޚAdҬ#*j%klN_+^uEq ADu_ nk¬WhvN ssq&<> 9MGN%O_x5gz7 +ܨm Qۈ_U\^$V暚s\`'rFO5a['F!C̴Nõ\6~D8`_~5!M_9s2 E4*&~{Ud`z?HZMb UC+W Ay/5ZDRv»31ȕ>JmJbVXDC<0OEv::Bb:2 ϶V7?K-L p 2}ςTmm\xbN2ErCѧ.S<BUȃ+E{?l )^@\V~c"Zdky]E˜ȸt[sh8.$+[Fcg9fp`OJǡ j]< #define STRICT_R_HEADERS #include using namespace Rcpp; // used to make fft computations more readable #define REAL 0 #define IMAG 1 //#define PI2 3.14159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651e+00 /******************************************************************/ /** Helper Functions **/ /******************************************************************/ // helper function for normalisation of PMFs (i.e. ensure that sum = 1) void norm_dpb(NumericVector &pmf){ // sums of PMF double new_sum = sum(pmf), old_sum = 0, older_sum = 0, oldest_sum = 0; //Rcout << ((new_sum < 1)?"l ":((new_sum == 1)?"e ":"g ")); while(new_sum != 1){ oldest_sum = older_sum; older_sum = old_sum; old_sum = new_sum; NumericVector old_pmf = pmf; pmf = pmf / new_sum; new_sum = sum(pmf); //Rcout << ((new_sum < 1)?"l ":((new_sum == 1)?"e ":"g ")); if(new_sum >= 1 || new_sum == old_sum || new_sum == older_sum || new_sum == oldest_sum) break; if(new_sum < 1 && new_sum <= old_sum){ pmf = old_pmf; break; } } //Rcout << "\n"; } // "generic" function for computing some of the PMFs NumericVector dpb_generic(const IntegerVector obs, const NumericVector cdf){ // maximum observed value const int max_q = obs.length() ? max(obs) : cdf.length() - 1; // results vector NumericVector results(max_q + 1); // compute masses results[0] = cdf[0]; for(int i = 1; i <= max_q; i++) results[i] = cdf[i] - cdf[i - 1]; // return final results if(obs.length()) return results[obs]; else return results; } // "generic" function for computing some of the CDFs NumericVector ppb_generic(const IntegerVector obs, const NumericVector pmf, bool lower_tail = true){ // distribution size const int size = pmf.length(); // maximum observed value const int max_q = obs.length() ? max(obs) : size - 1; // results vector NumericVector results = NumericVector(std::min(max_q + 1, size)); // compute cumulative probabilities if(lower_tail){ results[0] = pmf[0]; for(int i = 1; i <= max_q; i++) results[i] = pmf[i] + results[i - 1]; }else{ const int min_q = obs.length() ? min(obs) : 0; const int len = pmf.length() - 1; for(int i = len; i > min_q; i--){ if(i > max_q) results[max_q] += pmf[i]; else results[i - 1] = pmf[i] + results[i]; } } // "correct" numerically too large results results[results > 1] = 1; // return final results if(obs.length()) return results[obs]; else return results; } IntegerVector order(NumericVector x, bool decreasing = false){ NumericVector uni = unique(x).sort(); if(decreasing) uni = NumericVector(rev(uni)); IntegerVector order(x.length()); int k = 0; for(int i = 0; i < uni.length(); i++){ for(int j = 0; j < x.length(); j++){ if(uni[i] == x[j]) order[k++] = j; } } return order; } // [[Rcpp::export]] int vectorGCD(const IntegerVector x){ // input size const int size = x.length(); if(size == 0) return 0; // make all values positive IntegerVector y; y = abs(x); // minimum of 'x' (add 1 to make sure that it is greater than the first value) int xmin = y[0] + 1; // search for minimum, one and zero; return it, if found for(int i = 0; i < size; i++){ if(xmin > y[i]){ xmin = y[i]; if(xmin <= 1) return xmin; } } int a, b, r, i = 0, gcd = xmin; while(gcd > 1 && i < size){ a = std::max(gcd, x[i]); b = std::min(gcd, x[i]); while(b != 0){ r = a % b; a = b; b = r; } gcd = a; i++; } return gcd; } /******************************************************************/ /** Functions for "ordinary" Poisson binomial distribution **/ /******************************************************************/ // PMFs /*NumericVector dpb_conv(IntegerVector obs, NumericVector probs); NumericVector dpb_dc(IntegerVector obs, NumericVector probs); NumericVector dpb_dftcf(IntegerVector obs, NumericVector probs); NumericVector dpb_rf(IntegerVector obs, NumericVector probs); NumericVector dpb_mean(IntegerVector obs, NumericVector probs); NumericVector dpb_gmba(IntegerVector obs, NumericVector probs, bool anti = false); NumericVector dpb_pa(IntegerVector obs, NumericVector probs); NumericVector dpb_na(IntegerVector obs, NumericVector probs, bool refined = true); // CDFs NumericVector ppb_conv(IntegerVector obs, NumericVector probs, bool lower_tail = true); NumericVector ppb_dc(IntegerVector obs, NumericVector probs, bool lower_tail = true); NumericVector ppb_dftcf(IntegerVector obs, NumericVector probs, bool lower_tail = true); NumericVector ppb_rf(IntegerVector obs, NumericVector probs, bool lower_tail = true); NumericVector ppb_mean(IntegerVector obs, NumericVector probs, bool lower_tail = true); NumericVector ppb_gmba(IntegerVector obs, NumericVector probs, bool anti = false, bool lower_tail = true); NumericVector ppb_pa(IntegerVector obs, NumericVector probs, bool lower_tail = true); NumericVector ppb_na(IntegerVector obs, NumericVector probs, bool refined = true, bool lower_tail = true);*/ // Direct Convolution // [[Rcpp::export]] NumericVector dpb_conv(const IntegerVector obs, const NumericVector probs){ // number of input probabilities const int size = probs.length(); // results vector NumericVector results(size + 1); results[0] = 1 - probs[0]; results[1] = probs[0]; for(int i = 1; i < size; i++){ checkUserInterrupt(); if(probs[i]){ for(int j = i; j >= 0; j--){ if(results[j]){ results[j + 1] += results[j] * probs[i]; results[j] *= 1 - probs[i]; } } } } // make sure that probability masses sum up to 1 norm_dpb(results); // return final results if(obs.length()) return results[obs]; else return results; } // [[Rcpp::export]] NumericVector ppb_conv(const IntegerVector obs, const NumericVector probs, const bool lower_tail = true){ // number of input probabilities const int size = probs.length(); // highest observed value const int max_q = obs.length() ? max(obs) : size; // probability masses const NumericVector pmf = dpb_conv(IntegerVector(), probs); // compute CDF NumericVector results = ppb_generic(obs, pmf, lower_tail); // ensure that (for lower tail) sum = 1, if last value = n (the highest observable value) if(obs.length()){ if(max_q == size) results[obs == max_q] = (double)lower_tail; }else results[size] = (double)lower_tail; // return final results return results; } // Divide & Conquer FFT (DC-FFT) NumericVector fft_probs(const NumericVector probsA, const NumericVector probsB){ // sizes of input vectors and the result const int sizeA = probsA.length(); const int sizeB = probsB.length(); const int sizeResult = sizeA + sizeB - 1; // results vector double *result_vec = new double[sizeResult]; // allocate memory for FFTs of the probs and the convolution result fftw_complex *probsA_fft, *probsB_fft, *result_fft; // 0-padding of probsA vector and perform FFT of it NumericVector padded_probsA(sizeResult); padded_probsA[Range(0, sizeA - 1)] = probsA; probsA_fft = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * sizeResult); fftw_plan planA = fftw_plan_dft_r2c_1d(sizeResult, padded_probsA.begin(), probsA_fft, FFTW_ESTIMATE); fftw_execute(planA); fftw_destroy_plan(planA); // 0-padding of probsB vector and perform FFT of it NumericVector padded_probsB(sizeResult); padded_probsB[Range(0, sizeB - 1)] = probsB; probsB_fft = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * sizeResult); fftw_plan planB = fftw_plan_dft_r2c_1d(sizeResult, padded_probsB.begin(), probsB_fft, FFTW_ESTIMATE); fftw_execute(planB); fftw_destroy_plan(planB); // convolute by complex multiplication of the transformed input probs result_fft = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * sizeResult); for(int i = 0; i < sizeResult; i++){ result_fft[i][REAL] = (probsA_fft[i][REAL]*probsB_fft[i][REAL] - probsA_fft[i][IMAG]*probsB_fft[i][IMAG])/sizeResult; result_fft[i][IMAG] = (probsA_fft[i][REAL]*probsB_fft[i][IMAG] + probsA_fft[i][IMAG]*probsB_fft[i][REAL])/sizeResult; } // inverse tranformation of the above multiplications //fftw_plan planResult = fftw_plan_dft_c2r_1d(sizeResult, result_fft, result.begin(), FFTW_ESTIMATE); fftw_plan planResult = fftw_plan_dft_c2r_1d(sizeResult, result_fft, result_vec, FFTW_ESTIMATE); fftw_execute(planResult); fftw_destroy_plan(planResult); // garbage collection fftw_free(probsA_fft); fftw_free(probsB_fft); fftw_free(result_fft); // return final results NumericVector result(sizeResult); for(int i = 0; i < sizeResult; i++) result[i] = result_vec[i]; delete[] result_vec; return result; } // [[Rcpp::export]] NumericVector dpb_dc(const IntegerVector obs, const NumericVector probs){//, const int splits = -1){ // number of probabilities of success const int size = probs.length(); // automatically determine number of splits, if size is above 1950 //int num_splits = splits < 0 ? std::max(0, (int)std::ceil(std::log(size / 1950) / std::log(2.0))) : splits; int num_splits = size > 1950 ? (int)std::ceil(std::log(size / 1950) / std::log(2.0)) : 0; // direct convolution is sufficient in case of 0 splits if(num_splits == 0) return dpb_conv(obs, probs); // number of groups int num_groups = std::pow(2, num_splits); // reduce number of splits and groups if too large while(num_splits > 0 && num_groups > size){ num_splits -= 1; num_groups /= 2; } // direct convolution is sufficient, if no splits are necessary if(num_splits == 0) return dpb_conv(obs, probs); // range variables int start, end; // compute group sizes with minimum size disparity IntegerVector group_sizes(num_groups, size / num_groups); const int remainder = size % num_groups; for(int i = 0; i < remainder; i++) group_sizes[i]++; // compute first and last indices of the groups IntegerVector starts(num_groups), ends(num_groups); starts[0] = 0; ends[0] = group_sizes[0] - 1; for(int i = 1; i < num_groups; i++){ starts[i] = starts[i - 1] + group_sizes[i - 1]; ends[i] = ends[i - 1] + group_sizes[i]; } // results vector; direct allocation will increase size of each group by 1 NumericVector results(size + num_groups); // compute direct convolutions for each group for(int i = 0; i < num_groups; i++){ checkUserInterrupt(); // compute new starting and ending indices, because groups grow by 1 start = starts[i] + i; end = ends[i] + i + 1; // target range Range target(start, end); // direct convolution results[target] = dpb_conv(IntegerVector(), probs[Range(starts[i], ends[i])]); // update starting and ending indices starts[i] = start; ends[i] = end; } int num_groups_reduced = num_groups / 2; while(num_splits > 0){ for(int i = 0; i < num_groups_reduced; i++){ checkUserInterrupt(); // compute new starting and ending indices, because group sizes are // reduced by 1, due to FFT convolution start = starts[2*i] - i; end = ends[2*i + 1] - i - 1; //convolution results[Range(start, end)] = fft_probs(results[Range(starts[2*i], ends[2*i])], results[Range(starts[2*i + 1], ends[2*i + 1])]); // update starting and ending indices starts[i] = start; ends[i] = end; } num_groups_reduced /= 2; num_splits -= 1; } // select final results results = NumericVector(results[Range(0, size)]); // "correct" numerically false (and thus useless) results results[results < 5.55e-17] = 0; results[results > 1] = 1; // make sure that probability masses sum up to 1 norm_dpb(results); // return final results if(obs.length()) return results[obs]; else return results; } // [[Rcpp::export]] NumericVector ppb_dc(const IntegerVector obs, const NumericVector probs, const bool lower_tail = true){ // number of input probabilities const int size = probs.length(); // highest observed value const int max_q = obs.length() ? max(obs) : size; // probability masses const NumericVector pmf = dpb_dc(IntegerVector(), probs); // compute CDF NumericVector results = ppb_generic(obs, pmf, lower_tail); // ensure that (for lower tail) sum = 1, if last value = n (the highest observable value) if(obs.length()){ if(max_q == size) results[obs == max_q] = (double)lower_tail; }else results[size] = (double)lower_tail; // return final results return results; } // Discrete Fourier Transformation of Characteristic Function (DFT-CF) // [[Rcpp::export]] NumericVector dpb_dftcf(const IntegerVector obs, const NumericVector probs){ // number of probabilities of success const int sizeIn = probs.length(); // number of distribution const int sizeOut = sizeIn + 1; // "initialize" DFT input vector fftw_complex *input_fft; input_fft = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * sizeOut); input_fft[0][REAL] = 1.0; input_fft[0][IMAG] = 0.0; // initialize complex numbers for "C" and "C to the power of i" const std::complex C = exp(std::complex(0.0, 2.0) * M_PI / ((double)sizeOut)); std::complex C_power = 1.0; // compute closed-form expression of Hernandez and Williams const int mid = sizeIn / 2 + 1; for(int i = 1; i <= mid; i++){ checkUserInterrupt(); C_power *= C; std::complex product = 1.0; for(int j = 0; j < sizeIn; j++) product *= 1.0 + (C_power - 1.0) * probs[j]; input_fft[i][REAL] = product.real(); input_fft[i][IMAG] = product.imag(); input_fft[sizeOut - i][REAL] = product.real(); input_fft[sizeOut - i][IMAG] = -product.imag(); } // vector of DFT results fftw_complex *result_fft; result_fft = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * sizeOut); // perform DFT fftw_plan planDFT; planDFT = fftw_plan_dft_1d(sizeOut, input_fft, result_fft, FFTW_FORWARD, FFTW_ESTIMATE); fftw_execute(planDFT); // gather results NumericVector results(sizeOut); for(int i = 0; i < sizeOut; i++) results[i] = result_fft[i][REAL] / sizeOut; // garbage collection fftw_destroy_plan(planDFT); fftw_free(input_fft); fftw_free(result_fft); // "correct" numerically false (and thus useless) results results[results < 2.22e-16] = 0; results[results > 1] = 1; // make sure that probability masses sum up to 1 norm_dpb(results); // return final results if(obs.length()) return results[obs]; else return results; } // [[Rcpp::export]] NumericVector ppb_dftcf(const IntegerVector obs, const NumericVector probs, const bool lower_tail = true){ // number of input probabilities const int size = probs.length(); // highest observed value const int max_q = obs.length() ? max(obs) : size; // probability masses const NumericVector pmf = dpb_dftcf(IntegerVector(), probs); // compute CDF NumericVector results = ppb_generic(obs, pmf, lower_tail); // ensure that (for lower tail) sum = 1, if last value = n (the highest observable value) if(obs.length()){ if(max_q == size) results[obs == max_q] = (double)lower_tail; }else results[size] = (double)lower_tail; // return final results return results; } // Recursive Formula // [[Rcpp::export]] NumericVector dpb_rf(const IntegerVector obs, const NumericVector probs){ // number of input probabilities const int size = probs.length(); NumericMatrix dist(size + 1, 2); NumericVector results(size + 1); int col_new = 0, col_old = 1; dist(0, col_new) = 1.0; dist(1, col_new) = 1 - probs[0]; for(int j = 1; j < size; j++) dist(j + 1, col_new) = (1 - probs[j]) * dist(j, col_new); results[0] = dist(size, col_new); for(int i = 1; i <= size; i++){ checkUserInterrupt(); col_new -= std::pow(-1, i); col_old += std::pow(-1, i); for(int j = 0; j <= i - 1; j++) dist(j, col_new) = 0; for(int j = i - 1; j < size; j++){ dist(j + 1, col_new) = (1 - probs[j]) * dist(j, col_new) + probs[j] * dist(j, col_old); } results[i] = dist(size, col_new); } // make sure that probability masses sum up to 1 norm_dpb(results); // return final results if(obs.length()) return results[obs]; else return results; } // [[Rcpp::export]] NumericVector ppb_rf(const IntegerVector obs, const NumericVector probs, const bool lower_tail = true){ // number of input probabilities int size = probs.length(); // highest observed value int max_q = obs.length() ? max(obs) : size; // probability masses const NumericVector pmf = dpb_rf(IntegerVector(), probs); // compute CDF NumericVector results = ppb_generic(obs, pmf, lower_tail); // make sure that largest observation has probability of 1 (or 0, depending on lower_tail) if(obs.length()){ if(max_q == size) results[obs == max_q] = (double)lower_tail; }else results[size] = (double)lower_tail; // return final results return results; } // Arithmetic Mean Binomial Approximation // [[Rcpp::export]] NumericVector dpb_mean(IntegerVector obs, const NumericVector probs){ // number of input probabilities const int size = probs.length(); // mean of probabilities is the approximate binomial probability const double bin_prob = mean(probs); // compute probability masses and return if(obs.length() == 0) return dbinom(IntegerVector(Range(0, size)), (double)size, bin_prob); else return dbinom(obs, (double)size, bin_prob); } // [[Rcpp::export]] NumericVector ppb_mean(const IntegerVector obs, const NumericVector probs, const bool lower_tail = true){ // number of input probabilities const int size = probs.length(); // mean of probabilities is the approximate binomial probability const double bin_prob = mean(probs); // compute cumulative probabilities and return if(obs.length() == 0) return pbinom(IntegerVector(Range(0, size)), (double)size, bin_prob, lower_tail); else return pbinom(obs, (double)size, bin_prob, lower_tail); } // Geometric Mean Binomial Approximations // [[Rcpp::export]] NumericVector dpb_gmba(const IntegerVector obs, const NumericVector probs, const bool anti = false){ // number of probabilities of success const int size = probs.length(); // logarithms of 'probs' (sums of logarithms are numerically more stable than // products of probabilities, especially when the probabilities are small) NumericVector logs; double bin_prob; if(anti){ logs = NumericVector(log(1 - probs)); bin_prob = 1 - std::exp(mean(logs)); }else{ logs = NumericVector(log(probs)); bin_prob = std::exp(mean(logs)); } // compute probability masses and return if(obs.length() == 0) return dbinom(IntegerVector(Range(0, size)), (double)size, bin_prob); else return dbinom(obs, (double)size, bin_prob); } // [[Rcpp::export]] NumericVector ppb_gmba(const IntegerVector obs, const NumericVector probs, const bool anti = false, const bool lower_tail = true){ // number of probabilities of success const int size = probs.length(); // logarithms of 'probs' (sums of logarithms are numerically more stable than // products of probabilities, especially when the probabilities are small) NumericVector logs; double bin_prob; if(anti){ logs = NumericVector(log(1 - probs)); bin_prob = 1 - std::exp(mean(logs)); }else{ logs = NumericVector(log(probs)); bin_prob = std::exp(mean(logs)); } // compute cumulative probabilities and return if(obs.length() == 0) return pbinom(IntegerVector(Range(0, size)), (double)size, bin_prob, lower_tail); else return pbinom(obs, (double)size, bin_prob, lower_tail); } // Poisson Approximation // [[Rcpp::export]] NumericVector dpb_pa(const IntegerVector obs, const NumericVector probs){ // number of probabilities of success const int size = probs.length(); // sum of probability is the expectation of the Poisson approximation const double lambda = sum(probs); // compute probability masses NumericVector results; if(obs.length() == 0){ results = dpois(IntegerVector(Range(0, size)), lambda); results[size] += R::ppois(size, lambda, false, false); }else{ results = dpois(obs, lambda); for(int i = 0; i < obs.length(); i++) if(obs[i] == size) results[i] += R::ppois(size, lambda, false, false); } // return final results return results; } // [[Rcpp::export]] NumericVector ppb_pa(const IntegerVector obs, const NumericVector probs, bool lower_tail = true){ // sum of probability is the expectation of the Poisson approximation const double lambda = sum(probs); // compute cumulative probabilities const IntegerVector observed = obs.length() ? obs : IntegerVector(Range(0, probs.length())); NumericVector results = ppois(observed, lambda, lower_tail); // make sure that largest possible observation has probability of 1 (or 0, depending on lower_tail) results[observed == probs.length()] = (double)lower_tail; // return final results return results; } // [[Rcpp::export]] NumericVector ppb_na(const IntegerVector obs, const NumericVector probs, const bool refined = true, const bool lower_tail = true){ // number of probabilities of success const int size = probs.length(); // highest observed value const int max_q = obs.length() ? max(obs) : size; // mu const double mu = sum(probs); // p * q const NumericVector pq = probs * (1 - probs); // sigma const double sigma = std::sqrt(sum(pq)); // standardized observations with continuity correction NumericVector obs_std; if(obs.length() == 0) obs_std = (NumericVector(IntegerVector(Range(0, size))) + 0.5 - mu)/sigma; else obs_std = (NumericVector(obs) + 0.5 - mu)/sigma; // vector to store results NumericVector results = Rcpp::pnorm(obs_std, 0.0, 1.0, lower_tail); // cumulative probabilities if(refined){ // gamma const double gamma = sum(pq * (1 - 2 * probs)); // probabilities if(lower_tail) results += gamma/(6 * std::pow(sigma, 3.0)) * (1 - pow(obs_std, 2.0)) * dnorm(obs_std); else results += -gamma/(6 * std::pow(sigma, 3.0)) * (1 - pow(obs_std, 2.0)) * dnorm(obs_std); } // make sure that all probabilities do not exceed 1 and are at least 0 results[results < 0] = 0; results[results > 1] = 1; // make sure largest possible value has cumulative probability of 1 (lower tail) or 0 (upper tail) if(obs.length()){ if(max_q >= size) results[obs >= max_q] = (double)lower_tail; }else results[size] = (double)lower_tail; // return final results return results; } // Normal Approximations (NA, RNA) // [[Rcpp::export]] NumericVector dpb_na(const IntegerVector obs, const NumericVector probs, const bool refined = true){ // number of probabilities of success const int size = probs.length(); // highest observed value const int max_q = obs.length() ? max(obs) : size; // rounded down expectation + 0.5 (continuity correction) const int mid = (int)floor(sum(probs) + 0.5); // cumulative probabilities const NumericVector cdf_lower = ppb_na(IntegerVector(Range(0, std::min(mid, max_q))), probs, refined, true); const NumericVector cdf_upper = ppb_na(IntegerVector(Range(std::min(mid, max_q), max_q)), probs, refined, false); // vector to store results NumericVector results(max_q + 1); // compute probability masses results[0] = cdf_lower[0]; for(int i = 1; i <= max_q; i++){ if(i <= mid) results[i] = cdf_lower[i] - cdf_lower[i - 1]; else results[i] = cdf_upper[i - 1 - mid] - cdf_upper[i - mid]; } // compute and return results if(obs.length()) return results[obs]; else return results; } // Bernoulli Random Number Generator // [[Rcpp::export]] IntegerVector rpb_bernoulli(const int n, const NumericVector probs){ // number of probabilities of success const int size = probs.length(); // vector to store results NumericVector results(n); // generate random numbers for(int i = 0; i < size; i++) for(int j = 0; j < n; j++) results[j] += R::rbinom(1.0, probs[i]); // return results return IntegerVector(results); } /******************************************************************/ /** Functions for generalized Poisson binomial distribution **/ /******************************************************************/ // PMFs /*NumericVector dgpb_conv(IntegerVector obs, NumericVector probs, NumericVector val_p, NumericVector val_q); NumericVector dgpb_dc(IntegerVector obs, NumericVector probs, NumericVector val_p, NumericVector val_q); NumericVector dgpb_dftcf(IntegerVector obs, NumericVector probs, NumericVector val_p, NumericVector val_q); NumericVector dgpb_na(IntegerVector obs, NumericVector probs, NumericVector val_p, NumericVector val_q, bool refined = true); // CDFs NumericVector pgpb_conv(IntegerVector obs, NumericVector probs, NumericVector val_p, NumericVector val_q, bool lower_tail = true); NumericVector pgpb_dc(IntegerVector obs, NumericVector probs, NumericVector val_p, NumericVector val_q, bool lower_tail = true); NumericVector pgpb_dftcf(IntegerVector obs, NumericVector probs, NumericVector val_p, NumericVector val_q, bool lower_tail = true); NumericVector pgpb_na(IntegerVector obs, NumericVector probs, NumericVector val_p, NumericVector val_q, bool refined = true, bool lower_tail = true);*/ // Generalized Direct Convolution (G-DC) // [[Rcpp::export]] NumericVector dgpb_conv(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q){ // number of probabilities of success const int sizeIn = probs.length(); // determine pairwise minimum and maximum const IntegerVector v = pmin(val_p, val_q); const IntegerVector u = pmax(val_p, val_q); // compute differences IntegerVector d = u - v; // final output size const int sizeOut = sum(d) + 1; // greatest common divisor of the differences const int gcd = vectorGCD(d[d > 0]); // rescale differences according to gcd if(gcd > 1) d = d / gcd; // theoretical rescaled maximum const int max_rescaled = sum(d); // output size const int sizeOut_rescaled = max_rescaled + 1; // if val_p[i] was not the larger one, the respective probs[i] has to be 'flipped' // furthermore: if difference is 0 (i.e. u[i] equals v[i]), a non-zero outcome is impossible NumericVector probs_flipped(sizeIn); for(int i = 0; i < sizeIn; i++){ if(!d[i]) probs_flipped[i] = 0.0; else if(val_p[i] < u[i]) probs_flipped[i] = 1 - probs[i]; else probs_flipped[i] = probs[i]; } // results vectors NumericVector results(sizeOut); NumericVector results_rescaled; // if maximum rescaled difference equals 1, we have an ordinary poisson binomial distribution if(max(d) == 1){ // compute ordinary distribution results_rescaled = dpb_conv(IntegerVector(), probs_flipped[d > 0]); }else{ results_rescaled = NumericVector(sizeOut_rescaled); // initialize result of first convolution step results_rescaled[0] = 1.0; // ending position of last computed iteration int end = 0; // perform convolution for(int i = 0; i < sizeIn; i++){ checkUserInterrupt(); if(probs_flipped[i]){ for(int j = end; j >= 0; j--){ if(d[i] && results_rescaled[j] && probs_flipped[i]){ results_rescaled[j + d[i]] += results_rescaled[j] * probs_flipped[i]; results_rescaled[j] *= 1 - probs_flipped[i]; } } // update ending position end += d[i]; } } } // "correct" numerically false (and thus useless) results results_rescaled[results_rescaled > 1] = 1; // make sure that probability masses sum up to 1 norm_dpb(results_rescaled); // map results to generalized distribution (scale-back) for(int i = 0; i < sizeOut_rescaled; i++) results[i * gcd] = results_rescaled[i]; // return final results if(obs.length()) return results[obs - sum(v)]; else return results; } // [[Rcpp::export]] NumericVector pgpb_conv(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, bool lower_tail = true){ // theoretical minimum const int min_v = sum(pmin(val_p, val_q)); // theoretical maximum const int max_v = sum(pmax(val_p, val_q)); // maximum observed value const int max_q = obs.length() ? max(obs) : max_v; // probability masses const NumericVector pmf = dgpb_conv(IntegerVector(), probs, val_p, val_q); // compute CDF NumericVector results = ppb_generic(obs - min_v, pmf, lower_tail); // ensure that sum = 1 (or 0), if last value equals the highest observable value if(obs.length()){ if(max_q == max_v) results[obs == max_q] = (double)lower_tail; }else results[max_v - min_v] = (double)lower_tail; // return final results return results; } // Generalized Divide & Conquer FFT Tree Convolution (G-DC-FFT) // [[Rcpp::export]] NumericVector dgpb_dc(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q){//, const int splits = -1){ // number of probabilities of success const int sizeIn = probs.length(); // determine pairwise minimum and maximum IntegerVector v = pmin(val_p, val_q); IntegerVector u = pmax(val_p, val_q); // theoretical minimum const int min_v = sum(v); // compute differences IntegerVector d = u - v; // final output size const int sizeOut = sum(d) + 1; // greatest common divisor of the differences const int gcd = vectorGCD(d[d > 0]); // rescale differences according to gcd if(gcd > 1) d = d / gcd; // theoretical rescaled maximum const int max_rescaled = sum(d); // output size const int sizeOut_rescaled = max_rescaled + 1; // if val_p[i] was not the larger one, the respective probs[i] has to be 'flipped' // furthermore: if difference is 0 (i.e. u[i] equals v[i]), a non-zero outcome is impossible NumericVector probs_flipped(sizeIn); for(int i = 0; i < sizeIn; i++){ if(!d[i]) probs_flipped[i] = 0.0; else if(val_p[i] < u[i]) probs_flipped[i] = 1 - probs[i]; else probs_flipped[i] = probs[i]; } // results vectors NumericVector results(sizeOut); NumericVector results_rescaled; // if max_rescaled equals input size, we have an ordinary poisson binomial distribution if(max(d) == 1){ // compute ordinary distribution results_rescaled = dpb_dc(IntegerVector(), probs_flipped); }else{ // number of tree splits //int num_splits = splits < 0 ? std::max(0, (int)std::ceil(std::log(sizeIn / 860) / std::log(2.0))) : splits; int num_splits = sizeIn > 860 ? std::max(0, (int)std::ceil(std::log(sizeIn / 860) / std::log(2.0))) : 0; // direct convolution is sufficient in case of 0 splits if(num_splits == 0) return dgpb_conv(obs, probs_flipped, u, v); // number of groups int num_groups = std::pow(2, num_splits); // fraction of total size per group double frac = (double)(sizeOut_rescaled - 1)/num_groups; // reduce number of splits and groups if inner-group sizes are too large while(num_splits > 0 && (num_groups > sizeIn || frac < max(d))){ num_splits -= 1; num_groups /= 2; frac *= 2; } // direct convolution is sufficient, if no splits are necessary if(num_splits == 0) return dgpb_conv(obs, probs_flipped, u, v); // compute group sizes with minimum size disparity IntegerVector group_sizes(num_groups); IntegerVector group_indices(sizeIn, -1); // assign each probability and outcome to a group IntegerVector ord = order(NumericVector(d), true); IntegerVector d_ordered = d[ord]; probs_flipped = probs_flipped[ord]; NumericVector remainder(num_groups, frac); int g = 0; int inc = 1; for(int i = 0; i < sizeIn; i++){ checkUserInterrupt(); if(g == num_groups || g == -1){ inc *= -1; g += inc; } if(d_ordered[i] > remainder[g]){ g = 0; for(int j = 1; j < num_groups; j++){ if(remainder[j] > remainder[g]) g = j; } } group_sizes[g] += d_ordered[i]; remainder[g] -= d_ordered[i]; group_indices[i] = g; g += inc; } // compute first and last indices of the groups IntegerVector group_starts(num_groups); IntegerVector group_ends(num_groups); group_starts[0] = 0; group_ends[0] = group_sizes[0] - 1; for(int i = 1; i < num_groups; i++){ group_starts[i] = group_starts[i - 1] + group_sizes[i - 1]; group_ends[i] = group_ends[i - 1] + group_sizes[i]; } // results vector; direct convolution will increase size of each group by 1 results_rescaled = NumericVector(sizeOut_rescaled - 1 + num_groups); // compute direct convolutions for each group int start = 0, end = 0; for(int i = 0; i < num_groups; i++){ checkUserInterrupt(); // compute new starting and ending indices, because groups grow by 1 start = group_starts[i] + i; end = group_ends[i] + i + 1; // target range Range target(start, end); u = d_ordered[group_indices == i]; v = IntegerVector(u.length()); // direct convolution results_rescaled[target] = dgpb_conv(IntegerVector(), probs_flipped[group_indices == i], u, v); // update starting and ending positions group_starts[i] = start; group_ends[i] = end; } int num_groups_reduced = num_groups / 2; while(num_splits > 0){ for(int i = 0; i < num_groups_reduced; i++){ checkUserInterrupt(); // compute new starting and ending indices, because group sizes are // reduced by 1, due to FFT convolution start = group_starts[2*i] - i; end = group_ends[2*i + 1] - i - 1; // target range Range target(start, end); // FFT convolution results_rescaled[target] = fft_probs(results_rescaled[Range(group_starts[2*i], group_ends[2*i])], results_rescaled[Range(group_starts[2*i + 1], group_ends[2*i + 1])]); // update starting and ending indices group_starts[i] = start; group_ends[i] = end; } num_groups_reduced /= 2; num_splits -= 1; } } // "correct" numerically false (and thus useless) results results_rescaled[results_rescaled < 5.55e-17] = 0; results_rescaled[results_rescaled > 1] = 1; // make sure that probability masses sum up to 1 norm_dpb(results_rescaled); // map results to generalized distribution (scale-back) for(int i = 0; i < sizeOut_rescaled; i++) results[i * gcd] = results_rescaled[i]; // return final results if(obs.length()) return results[obs - min_v]; else return results; } // [[Rcpp::export]] NumericVector pgpb_dc(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool lower_tail = true){ // theoretical minimum const int min_v = sum(pmin(val_p, val_q)); // theoretical maximum const int max_v = sum(pmax(val_p, val_q)); // maximum observed value const int max_q = obs.length() ? max(obs) : max_v; // probability masses const NumericVector pmf = dgpb_dc(IntegerVector(), probs, val_p, val_q); // compute CDF NumericVector results = ppb_generic(obs - min_v, pmf, lower_tail); // ensure that sum = 1 (or 0), if last value equals the highest observable value if(obs.length()){ if(max_q == max_v) results[obs == max_v] = (double)lower_tail; }else results[max_v - min_v] = (double)lower_tail; // return final results return results; } // Generalized Discrete Fourier Transformation of Characteristic Function (G-DFT-CF) // [[Rcpp::export]] NumericVector dgpb_dftcf(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q){ // number of probabilities of success const int sizeIn = probs.length(); // determine pairwise minimum and maximum const IntegerVector v = pmin(val_p, val_q); const IntegerVector u = pmax(val_p, val_q); // compute differences IntegerVector d = u - v; // final output size const int sizeOut = sum(d) + 1; // greatest common divisor of the differences const int gcd = vectorGCD(d[d > 0]); // rescale differences according to gcd if(gcd > 1) d = d / gcd; // theoretical rescaled maximum const int max_rescaled = sum(d); // output size const int sizeOut_rescaled = max_rescaled + 1; // if val_p[i] was not the larger one, the respective probs[i] has to be 'flipped' // furthermore: if difference is 0 (i.e. u[i] equals v[i]), a non-zero outcome is impossible NumericVector probs_flipped(sizeIn); for(int i = 0; i < sizeIn; i++){ if(!d[i]) probs_flipped[i] = 0.0; else if(val_p[i] < u[i]) probs_flipped[i] = 1 - probs[i]; else probs_flipped[i] = probs[i]; } // results vectors NumericVector results(sizeOut); NumericVector results_rescaled; // if max_rescaled equals input size, we have an ordinary poisson binomial distribution if(max(d) == 1){ // compute ordinary distribution results_rescaled = dpb_dftcf(IntegerVector(), probs_flipped[d > 0]); }else{ // "initialize" DFT input vector fftw_complex *input_fft; input_fft = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * sizeOut_rescaled); input_fft[0][REAL] = 1.0; input_fft[0][IMAG] = 0.0; // initialize complex numbers for "C" and "C to the power of i" std::vector< std::complex > C(sizeIn, 1.0); std::vector< std::complex > C_power(sizeIn, 1.0); for(int i = 0; i < sizeIn; i++){ if(d[i]) C[i] = exp(std::complex(0.0, d[i] * 2.0) * M_PI / ((double)sizeOut_rescaled)); } // compute closed-form expression of Hernandez and Williams for(int l = 1; l <= sizeOut_rescaled / 2; l++){ checkUserInterrupt(); std::complex product = 1.0; for(int k = 0; k < sizeIn; k++){ if(probs_flipped[k]){ if(d[k]) C_power[k] *= C[k];//// product *= 1.0 + probs_flipped[k] * (C_power[k] - 1.0); } } input_fft[l][REAL] = product.real(); input_fft[l][IMAG] = product.imag(); input_fft[sizeOut_rescaled - l][REAL] = product.real(); input_fft[sizeOut_rescaled - l][IMAG] = -product.imag(); } // vector of DFT results fftw_complex *result_fft; result_fft = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * sizeOut_rescaled); // perform DFT fftw_plan planDFT; planDFT = fftw_plan_dft_1d(sizeOut_rescaled, input_fft, result_fft, FFTW_FORWARD, FFTW_ESTIMATE); fftw_execute(planDFT); // gather results results_rescaled = NumericVector(sizeOut_rescaled); for(int i = 0; i < sizeOut_rescaled; i++) results_rescaled[i] = result_fft[i][REAL] / sizeOut_rescaled; // garbage collection fftw_destroy_plan(planDFT); fftw_free(input_fft); fftw_free(result_fft); } // "correct" numerically false (and thus useless) results results_rescaled[results_rescaled < 2.22e-16] = 0; results_rescaled[results_rescaled > 1] = 1; // make sure that probability masses sum up to 1 norm_dpb(results_rescaled); // map results to generalized distribution (scale-back) for(int i = 0; i < sizeOut_rescaled; i++) results[i * gcd] = results_rescaled[i]; // return final results if(obs.length()) return results[obs - sum(v)]; else return results; } // [[Rcpp::export]] NumericVector pgpb_dftcf(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool lower_tail = true){ // theoretical minimum const int min_v = sum(pmin(val_p, val_q)); // theoretical maximum const int max_v = sum(pmax(val_p, val_q)); // maximum observed value const int max_q = obs.length() ? max(obs) : max_v; // probability masses const NumericVector pmf = dgpb_dftcf(IntegerVector(), probs, val_p, val_q); // compute CDF NumericVector results = ppb_generic(obs - min_v, pmf, lower_tail); // ensure that sum = 1 (or 0), if last value equals the highest observable value if(obs.length()){ if(max_q == max_v) results[obs == max_v] = (double)lower_tail; }else results[max_v - min_v] = (double)lower_tail; // return final results return results; } // [[Rcpp::export]] NumericVector pgpb_na(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool refined = true, const bool lower_tail = true){ // number of probabilities of success const int sizeIn = probs.length(); // determine pairwise minimum and maximum const IntegerVector v = pmin(val_p, val_q); const IntegerVector u = pmax(val_p, val_q); // theoretical (unshifted!) minimum const int min_v = sum(v); // compute differences IntegerVector d = u - v; // maximum observable value const int max_q = sum(d); // greatest common divisor of the differences const int gcd = vectorGCD(d[d > 0]); // rescale differences according to gcd if(gcd > 1) d = d / gcd; // theoretical (shifted!) maximum const int max_rescaled = max_q / gcd; // rescaled observations const IntegerVector obs_range = obs.length() ? (obs - min_v) / gcd : IntegerVector(Range(0, max_rescaled)); // if val_p[i] was not the larger one, the respective probs[i] has to be 'flipped' // furthermore: if difference is 0 (i.e. u[i] equals v[i]), a non-zero outcome is impossible NumericVector probs_flipped(sizeIn); for(int i = 0; i < sizeIn; i++){ if(!d[i]) probs_flipped[i] = 0.0; else if(val_p[i] < u[i]) probs_flipped[i] = 1 - probs[i]; else probs_flipped[i] = probs[i]; } // if maximum difference equals 1, we have an ordinary poisson binomial distribution if(max(d) == 1){ // compute ordinary distribution return ppb_na(obs_range, probs_flipped[d > 0], refined, lower_tail); }else{ // mu const double mu = sum(probs_flipped * NumericVector(d)); // p * q const NumericVector pq = probs_flipped * (1 - probs_flipped); // sigma const double sigma = std::sqrt(sum(pq * pow(NumericVector(d), 2.0))); // standardized observations with continuity correction const NumericVector obs_std = (NumericVector(obs_range) + 0.5 - mu)/sigma; // cumulative probabilities NumericVector results_rescaled = pnorm(obs_std, 0.0, 1.0, lower_tail); // refine if(refined && sigma){ // gamma const double gamma = sum(pq * (1 - 2 * probs_flipped) * pow(NumericVector(d), 3.0))/std::pow(sigma, 3.0); // probabilities if(lower_tail) results_rescaled += gamma * (1 - pow(obs_std, 2.0)) * dnorm(obs_std) / 6; else results_rescaled += -gamma * (1 - pow(obs_std, 2.0)) * dnorm(obs_std) / 6; } // make sure that all probabilities do not exceed 1 and are at least 0 results_rescaled[results_rescaled < 0] = 0; results_rescaled[results_rescaled > 1] = 1; // make sure largest possible value has cumulative probability of 1 if(max_q >= max_rescaled) results_rescaled[obs_range >= max_rescaled] = (double)lower_tail; // return final results if(obs.length() || gcd == 1) return results_rescaled; else{ // map results to generalized distribution (scale-back) NumericVector results(max_q + 1); for(int i = 0; i <= max_q; i++) results[i] = results_rescaled[i/gcd]; return results; } } } // Generalized Normal Approximations (G-NA, G-RNA) // [[Rcpp::export]] NumericVector dgpb_na(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool refined = true){ // smallest possible value const int min_v = sum(pmin(val_p, val_q)); // highest observed value const int max_q = obs.length() ? max(obs) : sum(pmax(val_p, val_q)); // rounded down expectation + 0.5 (continuity correction) const int mid = (int)floor(sum(probs * NumericVector(val_p) + (1 - probs) * NumericVector(val_q)) + 0.5); // cumulative probabilities NumericVector cdf_lower = pgpb_na(IntegerVector(Range(min_v, std::min(mid, max_q))), probs, val_p, val_q, refined, true); NumericVector cdf_upper = pgpb_na(IntegerVector(Range(std::min(mid, max_q), max_q)), probs, val_p, val_q, refined, false); // vector to store results NumericVector results(max_q - min_v + 1); // compute probability masses results[0] = cdf_lower[0]; for(int i = 1; i <= max_q - min_v; i++){ if(i + min_v <= mid) results[i] = cdf_lower[i] - cdf_lower[i - 1]; else results[i] = cdf_upper[i - 1 - mid + min_v] - cdf_upper[i - mid + min_v]; } // compute and return results if(obs.length()) return results[obs - min_v]; else return results; } // Bernoulli Random Number Generator // [[Rcpp::export]] IntegerVector rgpb_bernoulli(const int n, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q){ // number of probabilities of success const int size = probs.length(); // sum of values that occur with probability q = 1 - p const double sum_v = (double)sum(val_q); // differences const IntegerVector d = val_p - val_q; // vector to store results NumericVector results(n, sum_v); // generate random numbers for(int i = 0; i < size; i++) for(int j = 0; j < n; j++) results[j] += d[i] * R::rbinom(1.0, probs[i]); // return results return IntegerVector(results); } PoissonBinomial/src/Makevars0000644000176200001440000000015413561232576015654 0ustar liggesusersPKG_CPPFLAGS = -I../inst/include -I/usr/local/include PKG_LIBS= -L/usr/local/lib -lfftw3 all: $(SHLIB) PoissonBinomial/src/RcppExports.cpp0000644000176200001440000014371214077547032017164 0ustar liggesusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include "../inst/include/PoissonBinomial.h" #include #include #include using namespace Rcpp; #ifdef RCPP_USE_GLOBAL_ROSTREAM Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // vectorGCD int vectorGCD(const IntegerVector x); static SEXP _PoissonBinomial_vectorGCD_try(SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type x(xSEXP); rcpp_result_gen = Rcpp::wrap(vectorGCD(x)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_vectorGCD(SEXP xSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_vectorGCD_try(xSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dpb_conv NumericVector dpb_conv(const IntegerVector obs, const NumericVector probs); static SEXP _PoissonBinomial_dpb_conv_try(SEXP obsSEXP, SEXP probsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); rcpp_result_gen = Rcpp::wrap(dpb_conv(obs, probs)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dpb_conv(SEXP obsSEXP, SEXP probsSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dpb_conv_try(obsSEXP, probsSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // ppb_conv NumericVector ppb_conv(const IntegerVector obs, const NumericVector probs, const bool lower_tail); static SEXP _PoissonBinomial_ppb_conv_try(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(ppb_conv(obs, probs, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_ppb_conv(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_ppb_conv_try(obsSEXP, probsSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dpb_dc NumericVector dpb_dc(const IntegerVector obs, const NumericVector probs); static SEXP _PoissonBinomial_dpb_dc_try(SEXP obsSEXP, SEXP probsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); rcpp_result_gen = Rcpp::wrap(dpb_dc(obs, probs)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dpb_dc(SEXP obsSEXP, SEXP probsSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dpb_dc_try(obsSEXP, probsSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // ppb_dc NumericVector ppb_dc(const IntegerVector obs, const NumericVector probs, const bool lower_tail); static SEXP _PoissonBinomial_ppb_dc_try(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(ppb_dc(obs, probs, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_ppb_dc(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_ppb_dc_try(obsSEXP, probsSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dpb_dftcf NumericVector dpb_dftcf(const IntegerVector obs, const NumericVector probs); static SEXP _PoissonBinomial_dpb_dftcf_try(SEXP obsSEXP, SEXP probsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); rcpp_result_gen = Rcpp::wrap(dpb_dftcf(obs, probs)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dpb_dftcf(SEXP obsSEXP, SEXP probsSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dpb_dftcf_try(obsSEXP, probsSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // ppb_dftcf NumericVector ppb_dftcf(const IntegerVector obs, const NumericVector probs, const bool lower_tail); static SEXP _PoissonBinomial_ppb_dftcf_try(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(ppb_dftcf(obs, probs, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_ppb_dftcf(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_ppb_dftcf_try(obsSEXP, probsSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dpb_rf NumericVector dpb_rf(const IntegerVector obs, const NumericVector probs); static SEXP _PoissonBinomial_dpb_rf_try(SEXP obsSEXP, SEXP probsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); rcpp_result_gen = Rcpp::wrap(dpb_rf(obs, probs)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dpb_rf(SEXP obsSEXP, SEXP probsSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dpb_rf_try(obsSEXP, probsSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // ppb_rf NumericVector ppb_rf(const IntegerVector obs, const NumericVector probs, const bool lower_tail); static SEXP _PoissonBinomial_ppb_rf_try(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(ppb_rf(obs, probs, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_ppb_rf(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_ppb_rf_try(obsSEXP, probsSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dpb_mean NumericVector dpb_mean(IntegerVector obs, const NumericVector probs); static SEXP _PoissonBinomial_dpb_mean_try(SEXP obsSEXP, SEXP probsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); rcpp_result_gen = Rcpp::wrap(dpb_mean(obs, probs)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dpb_mean(SEXP obsSEXP, SEXP probsSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dpb_mean_try(obsSEXP, probsSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // ppb_mean NumericVector ppb_mean(const IntegerVector obs, const NumericVector probs, const bool lower_tail); static SEXP _PoissonBinomial_ppb_mean_try(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(ppb_mean(obs, probs, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_ppb_mean(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_ppb_mean_try(obsSEXP, probsSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dpb_gmba NumericVector dpb_gmba(const IntegerVector obs, const NumericVector probs, const bool anti); static SEXP _PoissonBinomial_dpb_gmba_try(SEXP obsSEXP, SEXP probsSEXP, SEXP antiSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const bool >::type anti(antiSEXP); rcpp_result_gen = Rcpp::wrap(dpb_gmba(obs, probs, anti)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dpb_gmba(SEXP obsSEXP, SEXP probsSEXP, SEXP antiSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dpb_gmba_try(obsSEXP, probsSEXP, antiSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // ppb_gmba NumericVector ppb_gmba(const IntegerVector obs, const NumericVector probs, const bool anti, const bool lower_tail); static SEXP _PoissonBinomial_ppb_gmba_try(SEXP obsSEXP, SEXP probsSEXP, SEXP antiSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const bool >::type anti(antiSEXP); Rcpp::traits::input_parameter< const bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(ppb_gmba(obs, probs, anti, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_ppb_gmba(SEXP obsSEXP, SEXP probsSEXP, SEXP antiSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_ppb_gmba_try(obsSEXP, probsSEXP, antiSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dpb_pa NumericVector dpb_pa(const IntegerVector obs, const NumericVector probs); static SEXP _PoissonBinomial_dpb_pa_try(SEXP obsSEXP, SEXP probsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); rcpp_result_gen = Rcpp::wrap(dpb_pa(obs, probs)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dpb_pa(SEXP obsSEXP, SEXP probsSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dpb_pa_try(obsSEXP, probsSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // ppb_pa NumericVector ppb_pa(const IntegerVector obs, const NumericVector probs, bool lower_tail); static SEXP _PoissonBinomial_ppb_pa_try(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(ppb_pa(obs, probs, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_ppb_pa(SEXP obsSEXP, SEXP probsSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_ppb_pa_try(obsSEXP, probsSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // ppb_na NumericVector ppb_na(const IntegerVector obs, const NumericVector probs, const bool refined, const bool lower_tail); static SEXP _PoissonBinomial_ppb_na_try(SEXP obsSEXP, SEXP probsSEXP, SEXP refinedSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const bool >::type refined(refinedSEXP); Rcpp::traits::input_parameter< const bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(ppb_na(obs, probs, refined, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_ppb_na(SEXP obsSEXP, SEXP probsSEXP, SEXP refinedSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_ppb_na_try(obsSEXP, probsSEXP, refinedSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dpb_na NumericVector dpb_na(const IntegerVector obs, const NumericVector probs, const bool refined); static SEXP _PoissonBinomial_dpb_na_try(SEXP obsSEXP, SEXP probsSEXP, SEXP refinedSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const bool >::type refined(refinedSEXP); rcpp_result_gen = Rcpp::wrap(dpb_na(obs, probs, refined)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dpb_na(SEXP obsSEXP, SEXP probsSEXP, SEXP refinedSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dpb_na_try(obsSEXP, probsSEXP, refinedSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // rpb_bernoulli IntegerVector rpb_bernoulli(const int n, const NumericVector probs); static SEXP _PoissonBinomial_rpb_bernoulli_try(SEXP nSEXP, SEXP probsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const int >::type n(nSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); rcpp_result_gen = Rcpp::wrap(rpb_bernoulli(n, probs)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_rpb_bernoulli(SEXP nSEXP, SEXP probsSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_rpb_bernoulli_try(nSEXP, probsSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dgpb_conv NumericVector dgpb_conv(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q); static SEXP _PoissonBinomial_dgpb_conv_try(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_p(val_pSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_q(val_qSEXP); rcpp_result_gen = Rcpp::wrap(dgpb_conv(obs, probs, val_p, val_q)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dgpb_conv(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dgpb_conv_try(obsSEXP, probsSEXP, val_pSEXP, val_qSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // pgpb_conv NumericVector pgpb_conv(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, bool lower_tail); static SEXP _PoissonBinomial_pgpb_conv_try(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_p(val_pSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_q(val_qSEXP); Rcpp::traits::input_parameter< bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(pgpb_conv(obs, probs, val_p, val_q, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_pgpb_conv(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_pgpb_conv_try(obsSEXP, probsSEXP, val_pSEXP, val_qSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dgpb_dc NumericVector dgpb_dc(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q); static SEXP _PoissonBinomial_dgpb_dc_try(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_p(val_pSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_q(val_qSEXP); rcpp_result_gen = Rcpp::wrap(dgpb_dc(obs, probs, val_p, val_q)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dgpb_dc(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dgpb_dc_try(obsSEXP, probsSEXP, val_pSEXP, val_qSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // pgpb_dc NumericVector pgpb_dc(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool lower_tail); static SEXP _PoissonBinomial_pgpb_dc_try(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_p(val_pSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_q(val_qSEXP); Rcpp::traits::input_parameter< const bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(pgpb_dc(obs, probs, val_p, val_q, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_pgpb_dc(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_pgpb_dc_try(obsSEXP, probsSEXP, val_pSEXP, val_qSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dgpb_dftcf NumericVector dgpb_dftcf(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q); static SEXP _PoissonBinomial_dgpb_dftcf_try(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_p(val_pSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_q(val_qSEXP); rcpp_result_gen = Rcpp::wrap(dgpb_dftcf(obs, probs, val_p, val_q)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dgpb_dftcf(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dgpb_dftcf_try(obsSEXP, probsSEXP, val_pSEXP, val_qSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // pgpb_dftcf NumericVector pgpb_dftcf(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool lower_tail); static SEXP _PoissonBinomial_pgpb_dftcf_try(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_p(val_pSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_q(val_qSEXP); Rcpp::traits::input_parameter< const bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(pgpb_dftcf(obs, probs, val_p, val_q, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_pgpb_dftcf(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_pgpb_dftcf_try(obsSEXP, probsSEXP, val_pSEXP, val_qSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // pgpb_na NumericVector pgpb_na(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool refined, const bool lower_tail); static SEXP _PoissonBinomial_pgpb_na_try(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP, SEXP refinedSEXP, SEXP lower_tailSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_p(val_pSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_q(val_qSEXP); Rcpp::traits::input_parameter< const bool >::type refined(refinedSEXP); Rcpp::traits::input_parameter< const bool >::type lower_tail(lower_tailSEXP); rcpp_result_gen = Rcpp::wrap(pgpb_na(obs, probs, val_p, val_q, refined, lower_tail)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_pgpb_na(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP, SEXP refinedSEXP, SEXP lower_tailSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_pgpb_na_try(obsSEXP, probsSEXP, val_pSEXP, val_qSEXP, refinedSEXP, lower_tailSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // dgpb_na NumericVector dgpb_na(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool refined); static SEXP _PoissonBinomial_dgpb_na_try(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP, SEXP refinedSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const IntegerVector >::type obs(obsSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_p(val_pSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_q(val_qSEXP); Rcpp::traits::input_parameter< const bool >::type refined(refinedSEXP); rcpp_result_gen = Rcpp::wrap(dgpb_na(obs, probs, val_p, val_q, refined)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_dgpb_na(SEXP obsSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP, SEXP refinedSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_dgpb_na_try(obsSEXP, probsSEXP, val_pSEXP, val_qSEXP, refinedSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // rgpb_bernoulli IntegerVector rgpb_bernoulli(const int n, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q); static SEXP _PoissonBinomial_rgpb_bernoulli_try(SEXP nSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::traits::input_parameter< const int >::type n(nSEXP); Rcpp::traits::input_parameter< const NumericVector >::type probs(probsSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_p(val_pSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type val_q(val_qSEXP); rcpp_result_gen = Rcpp::wrap(rgpb_bernoulli(n, probs, val_p, val_q)); return rcpp_result_gen; END_RCPP_RETURN_ERROR } RcppExport SEXP _PoissonBinomial_rgpb_bernoulli(SEXP nSEXP, SEXP probsSEXP, SEXP val_pSEXP, SEXP val_qSEXP) { SEXP rcpp_result_gen; { Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = PROTECT(_PoissonBinomial_rgpb_bernoulli_try(nSEXP, probsSEXP, val_pSEXP, val_qSEXP)); } Rboolean rcpp_isInterrupt_gen = Rf_inherits(rcpp_result_gen, "interrupted-error"); if (rcpp_isInterrupt_gen) { UNPROTECT(1); Rf_onintr(); } bool rcpp_isLongjump_gen = Rcpp::internal::isLongjumpSentinel(rcpp_result_gen); if (rcpp_isLongjump_gen) { Rcpp::internal::resumeJump(rcpp_result_gen); } Rboolean rcpp_isError_gen = Rf_inherits(rcpp_result_gen, "try-error"); if (rcpp_isError_gen) { SEXP rcpp_msgSEXP_gen = Rf_asChar(rcpp_result_gen); UNPROTECT(1); Rf_error(CHAR(rcpp_msgSEXP_gen)); } UNPROTECT(1); return rcpp_result_gen; } // validate (ensure exported C++ functions exist before calling them) static int _PoissonBinomial_RcppExport_validate(const char* sig) { static std::set signatures; if (signatures.empty()) { signatures.insert("int(*vectorGCD)(const IntegerVector)"); signatures.insert("NumericVector(*dpb_conv)(const IntegerVector,const NumericVector)"); signatures.insert("NumericVector(*ppb_conv)(const IntegerVector,const NumericVector,const bool)"); signatures.insert("NumericVector(*dpb_dc)(const IntegerVector,const NumericVector)"); signatures.insert("NumericVector(*ppb_dc)(const IntegerVector,const NumericVector,const bool)"); signatures.insert("NumericVector(*dpb_dftcf)(const IntegerVector,const NumericVector)"); signatures.insert("NumericVector(*ppb_dftcf)(const IntegerVector,const NumericVector,const bool)"); signatures.insert("NumericVector(*dpb_rf)(const IntegerVector,const NumericVector)"); signatures.insert("NumericVector(*ppb_rf)(const IntegerVector,const NumericVector,const bool)"); signatures.insert("NumericVector(*dpb_mean)(IntegerVector,const NumericVector)"); signatures.insert("NumericVector(*ppb_mean)(const IntegerVector,const NumericVector,const bool)"); signatures.insert("NumericVector(*dpb_gmba)(const IntegerVector,const NumericVector,const bool)"); signatures.insert("NumericVector(*ppb_gmba)(const IntegerVector,const NumericVector,const bool,const bool)"); signatures.insert("NumericVector(*dpb_pa)(const IntegerVector,const NumericVector)"); signatures.insert("NumericVector(*ppb_pa)(const IntegerVector,const NumericVector,bool)"); signatures.insert("NumericVector(*ppb_na)(const IntegerVector,const NumericVector,const bool,const bool)"); signatures.insert("NumericVector(*dpb_na)(const IntegerVector,const NumericVector,const bool)"); signatures.insert("IntegerVector(*rpb_bernoulli)(const int,const NumericVector)"); signatures.insert("NumericVector(*dgpb_conv)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector)"); signatures.insert("NumericVector(*pgpb_conv)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector,bool)"); signatures.insert("NumericVector(*dgpb_dc)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector)"); signatures.insert("NumericVector(*pgpb_dc)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector,const bool)"); signatures.insert("NumericVector(*dgpb_dftcf)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector)"); signatures.insert("NumericVector(*pgpb_dftcf)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector,const bool)"); signatures.insert("NumericVector(*pgpb_na)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector,const bool,const bool)"); signatures.insert("NumericVector(*dgpb_na)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector,const bool)"); signatures.insert("IntegerVector(*rgpb_bernoulli)(const int,const NumericVector,const IntegerVector,const IntegerVector)"); } return signatures.find(sig) != signatures.end(); } // registerCCallable (register entry points for exported C++ functions) RcppExport SEXP _PoissonBinomial_RcppExport_registerCCallable() { R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_vectorGCD", (DL_FUNC)_PoissonBinomial_vectorGCD_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dpb_conv", (DL_FUNC)_PoissonBinomial_dpb_conv_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_ppb_conv", (DL_FUNC)_PoissonBinomial_ppb_conv_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dpb_dc", (DL_FUNC)_PoissonBinomial_dpb_dc_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_ppb_dc", (DL_FUNC)_PoissonBinomial_ppb_dc_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dpb_dftcf", (DL_FUNC)_PoissonBinomial_dpb_dftcf_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_ppb_dftcf", (DL_FUNC)_PoissonBinomial_ppb_dftcf_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dpb_rf", (DL_FUNC)_PoissonBinomial_dpb_rf_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_ppb_rf", (DL_FUNC)_PoissonBinomial_ppb_rf_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dpb_mean", (DL_FUNC)_PoissonBinomial_dpb_mean_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_ppb_mean", (DL_FUNC)_PoissonBinomial_ppb_mean_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dpb_gmba", (DL_FUNC)_PoissonBinomial_dpb_gmba_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_ppb_gmba", (DL_FUNC)_PoissonBinomial_ppb_gmba_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dpb_pa", (DL_FUNC)_PoissonBinomial_dpb_pa_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_ppb_pa", (DL_FUNC)_PoissonBinomial_ppb_pa_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_ppb_na", (DL_FUNC)_PoissonBinomial_ppb_na_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dpb_na", (DL_FUNC)_PoissonBinomial_dpb_na_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_rpb_bernoulli", (DL_FUNC)_PoissonBinomial_rpb_bernoulli_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dgpb_conv", (DL_FUNC)_PoissonBinomial_dgpb_conv_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_pgpb_conv", (DL_FUNC)_PoissonBinomial_pgpb_conv_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dgpb_dc", (DL_FUNC)_PoissonBinomial_dgpb_dc_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_pgpb_dc", (DL_FUNC)_PoissonBinomial_pgpb_dc_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dgpb_dftcf", (DL_FUNC)_PoissonBinomial_dgpb_dftcf_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_pgpb_dftcf", (DL_FUNC)_PoissonBinomial_pgpb_dftcf_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_pgpb_na", (DL_FUNC)_PoissonBinomial_pgpb_na_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_dgpb_na", (DL_FUNC)_PoissonBinomial_dgpb_na_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_rgpb_bernoulli", (DL_FUNC)_PoissonBinomial_rgpb_bernoulli_try); R_RegisterCCallable("PoissonBinomial", "_PoissonBinomial_RcppExport_validate", (DL_FUNC)_PoissonBinomial_RcppExport_validate); return R_NilValue; } static const R_CallMethodDef CallEntries[] = { {"_PoissonBinomial_vectorGCD", (DL_FUNC) &_PoissonBinomial_vectorGCD, 1}, {"_PoissonBinomial_dpb_conv", (DL_FUNC) &_PoissonBinomial_dpb_conv, 2}, {"_PoissonBinomial_ppb_conv", (DL_FUNC) &_PoissonBinomial_ppb_conv, 3}, {"_PoissonBinomial_dpb_dc", (DL_FUNC) &_PoissonBinomial_dpb_dc, 2}, {"_PoissonBinomial_ppb_dc", (DL_FUNC) &_PoissonBinomial_ppb_dc, 3}, {"_PoissonBinomial_dpb_dftcf", (DL_FUNC) &_PoissonBinomial_dpb_dftcf, 2}, {"_PoissonBinomial_ppb_dftcf", (DL_FUNC) &_PoissonBinomial_ppb_dftcf, 3}, {"_PoissonBinomial_dpb_rf", (DL_FUNC) &_PoissonBinomial_dpb_rf, 2}, {"_PoissonBinomial_ppb_rf", (DL_FUNC) &_PoissonBinomial_ppb_rf, 3}, {"_PoissonBinomial_dpb_mean", (DL_FUNC) &_PoissonBinomial_dpb_mean, 2}, {"_PoissonBinomial_ppb_mean", (DL_FUNC) &_PoissonBinomial_ppb_mean, 3}, {"_PoissonBinomial_dpb_gmba", (DL_FUNC) &_PoissonBinomial_dpb_gmba, 3}, {"_PoissonBinomial_ppb_gmba", (DL_FUNC) &_PoissonBinomial_ppb_gmba, 4}, {"_PoissonBinomial_dpb_pa", (DL_FUNC) &_PoissonBinomial_dpb_pa, 2}, {"_PoissonBinomial_ppb_pa", (DL_FUNC) &_PoissonBinomial_ppb_pa, 3}, {"_PoissonBinomial_ppb_na", (DL_FUNC) &_PoissonBinomial_ppb_na, 4}, {"_PoissonBinomial_dpb_na", (DL_FUNC) &_PoissonBinomial_dpb_na, 3}, {"_PoissonBinomial_rpb_bernoulli", (DL_FUNC) &_PoissonBinomial_rpb_bernoulli, 2}, {"_PoissonBinomial_dgpb_conv", (DL_FUNC) &_PoissonBinomial_dgpb_conv, 4}, {"_PoissonBinomial_pgpb_conv", (DL_FUNC) &_PoissonBinomial_pgpb_conv, 5}, {"_PoissonBinomial_dgpb_dc", (DL_FUNC) &_PoissonBinomial_dgpb_dc, 4}, {"_PoissonBinomial_pgpb_dc", (DL_FUNC) &_PoissonBinomial_pgpb_dc, 5}, {"_PoissonBinomial_dgpb_dftcf", (DL_FUNC) &_PoissonBinomial_dgpb_dftcf, 4}, {"_PoissonBinomial_pgpb_dftcf", (DL_FUNC) &_PoissonBinomial_pgpb_dftcf, 5}, {"_PoissonBinomial_pgpb_na", (DL_FUNC) &_PoissonBinomial_pgpb_na, 6}, {"_PoissonBinomial_dgpb_na", (DL_FUNC) &_PoissonBinomial_dgpb_na, 5}, {"_PoissonBinomial_rgpb_bernoulli", (DL_FUNC) &_PoissonBinomial_rgpb_bernoulli, 4}, {"_PoissonBinomial_RcppExport_registerCCallable", (DL_FUNC) &_PoissonBinomial_RcppExport_registerCCallable, 0}, {NULL, NULL, 0} }; RcppExport void R_init_PoissonBinomial(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } PoissonBinomial/vignettes/0000755000176200001440000000000014077633373015404 5ustar liggesusersPoissonBinomial/vignettes/proc_exact.Rmd0000644000176200001440000002120314022377326020166 0ustar liggesusers--- title: "Exact Procedures" output: rmarkdown::html_vignette: toc: yes vignette: > %\VignetteIndexEntry{Exact Procedures} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup, echo = FALSE} library(PoissonBinomial) ``` ## Ordinary Poisson Binomial Distribution ### Direct Convolution The *Direct Convolution* (DC) approach is requested with `method = "Convolve"`. ```{r directconv-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Convolve") ppbinom(NULL, pp, wt, "Convolve") ``` ### Divide & Conquer FFT Tree Convolution The *Divide & Conquer FFT Tree Convolution* (DC-FFT) approach is requested with `method = "DivideFFT"`. ```{r divide1-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "DivideFFT") ppbinom(NULL, pp, wt, "DivideFFT") ``` By design, as proposed by [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), its results are identical to the DC procedure, if $n \leq 750$. Thus, differences can be observed for larger $n > 750$: ```{r divide2-ord} set.seed(1) pp1 <- runif(751) pp2 <- pp1[1:750] sum(abs(dpbinom(NULL, pp2, method = "DivideFFT") - dpbinom(NULL, pp2, method = "Convolve"))) sum(abs(dpbinom(NULL, pp1, method = "DivideFFT") - dpbinom(NULL, pp1, method = "Convolve"))) ``` The reason is that the DC-FFT method splits the input `probs` vector into as equally sized parts as possible and computes their distributions separately with the DC approach. The results of the portions are then convoluted by means of the Fast Fourier Transformation. As proposed by [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), no splitting is done for $n \leq 750$. In addition, the DC-FFT procedure does not produce probabilities $\leq 5.55e\text{-}17$, i.e. smaller values are rounded off to 0, if $n > 750$, whereas the smallest possible result of the DC algorithm is $\sim 1e\text{-}323$. This is most likely caused by the used FFTW3 library. ```{r divide3-ord} set.seed(1) pp1 <- runif(751) d1 <- dpbinom(NULL, pp1, method = "DivideFFT") d2 <- dpbinom(NULL, pp1, method = "Convolve") min(d1[d1 > 0]) min(d2[d2 > 0]) ``` ### Discrete Fourier Transformation of the Characteristic Function The *Discrete Fourier Transformation of the Characteristic Function* (DFT-CF) approach is requested with `method = "Characteristic"`. ```{r dftcf-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Characteristic") ppbinom(NULL, pp, wt, "Characteristic") ``` As can be seen, the DFT-CF procedure does not produce probabilities $\leq 2.22e\text{-}16$, i.e. smaller values are rounded off to 0, most likely due to the used FFTW3 library. ### Recursive Formula The *Recursive Formula* (RF) approach is requested with `method = "Recursive"`. ```{r rf1-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Recursive") ppbinom(NULL, pp, wt, "Recursive") ``` Obviously, the RF procedure does produce probabilities $\leq 5.55e\text{-}17$, because it does not rely on the FFTW3 library. Furthermore, it yields the same results as the DC method. ```{r rf2-ord} set.seed(1) pp <- runif(1000) wt <- sample(1:10, 1000, TRUE) sum(abs(dpbinom(NULL, pp, wt, "Convolve") - dpbinom(NULL, pp, wt, "Recursive"))) ``` ### Processing Speed Comparisons To assess the performance of the exact procedures, we use the `microbenchmark` package. Each algorithm has to calculate the PMF repeatedly based on random probability vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2). ```{r benchmark-ord} library(microbenchmark) set.seed(1) f1 <- function() dpbinom(NULL, runif(6000), method = "DivideFFT") f2 <- function() dpbinom(NULL, runif(6000), method = "Convolve") f3 <- function() dpbinom(NULL, runif(6000), method = "Recursive") f4 <- function() dpbinom(NULL, runif(6000), method = "Characteristic") microbenchmark(f1(), f2(), f3(), f4(), times = 51) ``` Clearly, the DC-FFT procedure is the fastest, followed by DC, RF and DFT-CF methods. ## Generalized Poisson Binomial Distribution ### Generalized Direct Convolution The *Generalized Direct Convolution* (G-DC) approach is requested with `method = "Convolve"`. ```{r directconv-gen} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "Convolve") pgpbinom(NULL, pp, va, vb, wt, "Convolve") ``` ### Generalized Divide & Conquer FFT Tree Convolution The *Generalized Divide & Conquer FFT Tree Convolution* (G-DC-FFT) approach is requested with `method = "DivideFFT"`. ```{r divide1-gen} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "DivideFFT") pgpbinom(NULL, pp, va, vb, wt, "DivideFFT") ``` By design, similar to the ordinary DC-FFT algorithm by [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), its results are identical to the G-DC procedure, if $n$ and the number of possible observed values is small. Thus, differences can be observed for larger numbers: ```{r divide2-gen} set.seed(1) pp1 <- runif(250) va1 <- sample(0:50, 250, TRUE) vb1 <- sample(0:50, 250, TRUE) pp2 <- pp1[1:248] va2 <- va1[1:248] vb2 <- vb1[1:248] sum(abs(dgpbinom(NULL, pp1, va1, vb1, method = "DivideFFT") - dgpbinom(NULL, pp1, va1, vb1, method = "Convolve"))) sum(abs(dgpbinom(NULL, pp2, va2, vb2, method = "DivideFFT") - dgpbinom(NULL, pp2, va2, vb2, method = "Convolve"))) ``` The reason is that the G-DC-FFT method splits the input `probs`, `val_p` and `val_q` vectors into parts such that the numbers of possible observations of all parts are as equally sized as possible. Their distributions are then computed separately with the G-DC approach. The results of the portions are then convoluted by means of the Fast Fourier Transformation. For small $n$ and small distribution sizes, no splitting is needed. In addition, the G-DC-FFT procedure, just like the DC-FFT method, does not produce probabilities $\leq 5.55e\text{-}17$, i.e. smaller values are rounded off to $0$, if the total number of possible observations is smaller than $750$, whereas the smallest possible result of the DC algorithm is $\sim 1e\text{-}323$. This is most likely caused by the used FFTW3 library. ```{r divide3-gen} d1 <- dgpbinom(NULL, pp1, va1, vb1, method = "DivideFFT") d2 <- dgpbinom(NULL, pp1, va1, vb1, method = "Convolve") min(d1[d1 > 0]) min(d2[d2 > 0]) ``` ### Generalized Discrete Fourier Transformation of the Characteristic Function The *Generalized Discrete Fourier Transformation of the Characteristic Function* (G-DFT-CF) approach is requested with `method = "Characteristic"`. ```{r dftcf-gen} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "Characteristic") pgpbinom(NULL, pp, va, vb, wt, "Characteristic") ``` As can be seen, the G-DFT-CF procedure does not produce probabilities $\leq 2.2e\text{-}16$, i.e. smaller values are rounded off to 0, most likely due to the used FFTW3 library. ### Processing Speed Comparisons To assess the performance of the exact procedures, we use the `microbenchmark` package. Each algorithm has to calculate the PMF repeatedly based on random probability and value vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2). ```{r benchmark-gen} library(microbenchmark) n <- 2500 set.seed(1) va <- sample(1:50, n, TRUE) vb <- sample(1:50, n, TRUE) f1 <- function() dgpbinom(NULL, runif(n), va, vb, method = "DivideFFT") f2 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Convolve") f3 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Characteristic") microbenchmark(f1(), f2(), f3(), times = 51) ``` Clearly, the G-DC-FFT procedure is the fastest one. It outperforms both the G-DC and G-DFT-CF approaches. The latter one needs a lot more time than the others. Generally, the computational speed advantage of the G-DC-FFT procedure increases with larger $n$ (and $m$).PoissonBinomial/vignettes/use_with_rcpp.Rmd0000644000176200001440000001527013757216142020723 0ustar liggesusers--- title: "Usage with Rcpp" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Usage with Rcpp} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` Each procedure's probability mass function (PMF) and cumulative distribution function (CDF) was implemented in *C++* using the `Rcpp` package. By means of `Rcpp::interface`, these functions are exported to both the package's *R* namespace and *C++* headers. That way, the following functions can then be used by other packages that use `Rcpp`: ``` /*** Ordinary Poisson Binomial Distribution ***/ /*** Exact Procedures ***/ // Direct Convolution (DC) // PMF NumericVector dpb_conv(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_conv(const IntegerVector obs, const NumericVector probs, const bool lower_tail); // Divide & Conquer FFT Tree Convolution (DC-FFT) // PMF NumericVector dpb_dc(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_dc(const IntegerVector obs, const NumericVector probs, const bool lower_tail); // Discrete Fourier Transformation of the Characteristic Function (DFT-CF) // PMF NumericVector dpb_dftcf(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_dftcf(const IntegerVector obs, const NumericVector probs, const bool lower_tail); // Recursive Formula (RF) // PMF NumericVector dpb_rf(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_rf(const IntegerVector obs, const NumericVector probs, const bool lower_tail); /*** Approximations ***/ // Arithmetic Mean Binomial Approximation (AMBA) // PMF NumericVector dpb_mean(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_mean(const IntegerVector obs, const NumericVector probs, const bool lower_tail); // Geometric Mean Binomial Approximations (GMBA) // PMF NumericVector dpb_gmba(const IntegerVector obs, const NumericVector const probs, const bool anti); // CDF NumericVector ppb_gmba(const IntegerVector obs, const NumericVector probs, const bool anti, const bool lower_tail); // Poisson Approximation (PA) // PMF NumericVector dpb_pa(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_pa(const IntegerVector obs, const NumericVector probs, const bool lower_tail); // Normal Approximations (NA, RNA) // PMF NumericVector dpb_na(const IntegerVector obs, const NumericVector probs, const bool refined); // CDF NumericVector ppb_na(const IntegerVector obs, const NumericVector probs, const bool refined, const bool lower_tail); /*** Generalized Poisson Binomial Distribution ***/ /*** Exact Procedures ***/ // Generalized Direct Convolution (G-DC) // PMF NumericVector dgpb_conv(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q); // CDF NumericVector pgpb_conv(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q, const bool lower_tail); // Generalized Discrete Fourier Transformation of the Characteristic Function (G-DFT-CF) // PMF NumericVector dgpb_dftcf(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q); // CDF NumericVector pgpb_dftcf(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q, const bool lower_tail); /*** Approximations ***/ // Generalized Normal Approximations (G-NA, G-RNA) // PMF NumericVector dgpb_na(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q, const bool refined, const bool lower_tail); // CDF NumericVector pgpb_na(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q, const bool refined, const bool lower_tail); ``` ## Making the functions usable There are only a few simple steps to follow: 1. Add the `Rcpp` and `PoissonBinomial` packages to the `Imports` and `LinkingTo` fields of the `DESCRIPTION` file. 2. Add `#include ` to source (`.cpp`) and/or header (`.h`, `.hpp`) files in which these functions are to be used. 3. Optional: Add `using namespace PoissonBinomial;`. Without it, the use of functions of this package must be fully qualified with `PoissonBinomial::`, e.g. `PoissonBinomial::dpb_dc` instead of `dpb_dc` ## Important Remarks For better performance, the PMFs and CDFs do not check any of their parameters for plausibility! This must be done by the user by means of *R* or *C/C++* functions. It must be made sure that * the observations in the `obs` vectors are valid, * the probabilities in the `probs` vector are in $(0, 1)$ and * for `dpb_gmba`, `ppb_gmba`, `dpb_na`, `ppb_na`, `dgpb_na` and `pgpb_na`: the probabilities in the `probs` vector **must not** contain zeros or ones. Furthermore, the CDFs only compute non-logarithmic probabilities. If logarithms are needed, they must be computed "manually".PoissonBinomial/vignettes/intro.Rmd0000644000176200001440000003760713757216224017214 0ustar liggesusers--- title: "Efficient Computation of Ordinary and Generalized Poisson Binomial Distributions" output: rmarkdown::html_vignette: toc: yes vignette: > %\VignetteIndexEntry{Efficient Computation of Ordinary and Generalized Poisson Binomial Distributions} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup, echo = FALSE} library(PoissonBinomial) ``` ## Introduction The Poisson binomial distribution (in the following abbreviated as PBD) is becoming increasingly important, especially in the areas of statistics, finance, insurance mathematics and quality management. This package provides functions for two types of PBDs: ordinary and generalized PBDs (henceforth referred to as O-PBDs and G-PBDs). ### Ordinary Poisson Binomial Distribution The O-PBD is the distribution of the sum of a number $n$ of independent Bernoulli-distributed random indicators $X_i \in \{0, 1\}$ $(i = 1, ..., n)$: $$X := \sum_{i = 1}^{n}{X_i}.$$ Each of the $X_i$ possesses a predefined probability of success $p_i := P(X_i = 1)$ (subsequently $P(X_i = 0) = 1 - p_i =: q_i$). With this, mean, variance and skewness can be expressed as $$E(X) = \sum_{i = 1}^{n}{p_i} \quad \quad Var(X) = \sum_{i = 1}^{n}{p_i q_i} \quad \quad Skew(X) = \frac{\sum_{i = 1}^{n}{p_i q_i(q_i - p_i)}}{\sqrt{Var(X)}^3}.$$ All possible observations are in $\{0, ..., n\}$. ### Generalized Poisson Binomial Distribution The G-PBD is defined very similar. Again, it is the distribution of a sum random variables, but here, each $X_i \in \{u_i, v_i\}$ with $P(X_i = u_i) =: p_i$ and $P(X_i = v_i) = 1 - p_i =: q_i$. Using ordinary Bernoulli-distributed random variables $Y_i$, $X_i$ can be expressed as $X_i = u_i Y_i + v_i(1 - Y_i) = v_i + Y_i \cdot (u_i - v_i)$. As a result, mean, variance and skewness are given by $$E(X) = \sum_{i = 1}^{n}{v_i} + \sum_{i = 1}^{n}{p_i (u_i - v_i)} \quad \quad Var(X) = \sum_{i = 1}^{n}{p_i q_i(u_i - v_i)^2} \\Skew(X) = \frac{\sum_{i = 1}^{n}{p_i q_i(q_i - p_i)(u_i - v_i)^3}}{\sqrt{Var(X)}^3}.$$ All possible observations are in $\{U, ..., V\}$ with $U := \sum_{i = 1}^{n}{\min\{u_i, v_i\}}$ and $V := \sum_{i = 1}^{n}{\max\{u_i, v_i\}}$. Note that the size $m := V - U$ of the distribution does not generally equal $n$! ### Existing R Packages Computing these distributions exactly is computationally demanding, but in the last few years, some efficient algorithms have been developed. Particularly significant in this respect are the works of [Hong (2013)](http://dx.doi.org/10.1016/j.csda.2012.10.006), who derived the DFT-CF procedure for O-PBDs, [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007) who developed two immensely faster algorithms for O-PBDs, namely the DC and DC-FFT procedures, and [Zhang, Hong and Balakrishnan (2018)](https://doi.org/10.1080/00949655.2018.1440294) who further developed [Hong's (2013)](http://dx.doi.org/10.1016/j.csda.2012.10.006) DFT-CF algorithm for G-PBDs (in the following, this generalized procedure is referred to as G-DFT-CF). Still, only a few R packages exist for the calculation of either ordinary and generalized PBDs, e.g. [`poibin`](https://cran.r-project.org/package=poibin) and [`poisbinom`](https://cran.r-project.org/package=poisbinom) for O-PBDs and [`GPB`](https://cran.r-project.org/package=GPB) for G-PDBs. Before the release of this `PoissonBinomial` package, there has been no R package that implemented the DC and DC-FFT algorithms of [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), as they only published a [reference implementation](https://github.com/biscarri1/convpoibin) for R, but refrained from releasing it as a package. Additionally, there are no comparable approaches for G-PBDs to date. The `poibin` package implements the DFT-CF algorithm along with the exact recursive method of [Barlow & Heidtmann (1984)](http://dx.doi.org/10.1109/TR.1984.5221843) and Normal and Poisson approximations. However, both exact procedures of this package possess some disadvantages, i.e. they are relatively slow at computing very large distributions, with the recursive algorithm being also very memory consuming. The G-DFT-CF procedure is implemented in the `GPB` package and inherits this performance drawback. The `poisbinom` package provides a more efficient and much faster DFT-CF implementation. The performance improvement over the `poibin` package lies in the use of the [FFTW C library](http://www.fftw.org). Unfortunately, it sometimes yields some negative probabilities in the tail regions, especially for large distributions. However, this numerical issue has not been addressed to date. This `PoissonBinomial` also utilizes FFTW for both DFT-CF and G-DFT-CF algorithms, but corrects that issue. In addition to the disadvantages regarding computational speed (`poibin` and `GPB`) or numerics (`poisbinom`), especially for very large distributions, the aforementioned packages do not provide headers for their internal C/C++ functions, so that they cannot be imported directly by C or C++ code of other packages that use for example `Rcpp`. In some situations, people might have to deal with Poisson binomial distributions that include Bernoulli variables with $p_i \in \{0, 1\}$. Calculation performance can be further optimized by handling these indicators before the actual computations. Approximations also benefit from this in terms of accuracy. None of the aforementioned packages implements such optimizations. Therefore, the advantages of this `PoissonBinomial` package can be summarized as follows: * Efficient computation of very large distributions with both exact and approximate algorithms for O-PBDs and G-PBDs * Provides headers for the C++ functions so that other packages may include them in their own C++ code * Handles (sometimes large numbers of) 0- and 1-probabilities to speed up performance In total, this package includes 10 different algorithms of computing ordinary Poisson binomial distributions, including optimized versions of the Normal, Refined Normal and Poisson approaches, and 5 approaches for generalized PBDs. In addition, the implementation of the exact recursive procedure for O-PBDs was rewritten so that it is considerably less memory intensive: the `poibin` implementation needs the memory equivalent of $(n + 1)^2$ values of type `double`, while ours only needs $3 \cdot (n + 1)$. *** ## Exact Procedures ### Ordinary Poisson Binomial Distribution In this package implements the following exact algorithms for computing ordinary Poisson binomial distributions: * the *Direct Convolution* approach of [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), * the *Divide & Conquer FFT Tree Convolution* procedure of [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), * the *Discrete Fourier Transformation of the Characteristic Function* algorithm of [Hong (2013)](http://dx.doi.org/10.1016/j.csda.2012.10.006) and * the *Recursive Formula* of [Barlow & Heidtmann (1984)](http://dx.doi.org/10.1109/TR.1984.5221843). ### Generalized Poisson Binomial Distribution For generalized Poisson binomial distributions, this package provides: * a generalized adaptation of the *Direct Convolution* approach of [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), * a generalized *Divide & Conquer FFT Tree Convolution*, inspired by the respective procedure of [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007) for O-PDBs, * the *Generalized Discrete Fourier Transformation of the Characteristic Function* algorithm of [Zhang, Hong and Balakrishnan (2018)](https://doi.org/10.1080/00949655.2018.1440294). ### Examples Examples and performance comparisons of these procedures are presented in a [separate vignette](proc_exact.html). *** ## Approximations ### Ordinary Poisson Binomial Distribution In addition, the following O-PBD approximation methods are included: * the *Poisson Approximation* approach, * the *Arithmetic Mean Binomial Approximation* procedure, * *Geometric Mean Binomial Approximation* algorithms, * the *Normal Approximation* and * the *Refined Normal Approximation*. ### Generalized Poisson Binomial Distribution For G-PBDs, there are * the *Normal Approximation* and * the *Refined Normal Approximation*. ### Examples Examples and performance comparisons of these approaches are provided in a [separate vignette](proc_approx.html) as well. *** ## Handling special cases, zeros and ones Handling special cases, such as ordinary binomial distributions, zeros and ones is useful to speed up performance. Unfortunately, some approximations do not work well for Bernoulli trials with $p_i \in \{0, 1\}$, e.g. the Geometric Mean Binomial Approximations. This is why handling these values *before* the actual computation of the distribution is not only a performance tweak, but sometimes even a necessity. It is achieved by some simple preliminary considerations. ### Ordinary Poisson Binomial Distributions 1. All $p_i = p$ are equal? In this case, we have a usual binomial distribution. The specified method of computation is then ignored. In particular, the following applies: a) $p = 0$: The only observable value is $0$, i.e. $P(X = 0) = 1$ and $P(X \neq 0) = 0$. b) $p = 1$: The only observable value is $n$, i.e. $P(X = n) = 1$ and $P(X \neq n) = 0$. 2. All $p_i \in \{0, 1\} (i = 1, ..., n)$? If one $p_i$ is 1, it is impossible to measure 0 successes. Following the same logic, if two $p_i$ are 1, we cannot observe 0 and 1 successes and so on. In general, a number of $n_1$ values with $p_i = 1$ makes it impossible to measure $0, ..., n_1 - 1$ successes. Likewise, if there are $n_0$ Bernoulli trials with $p_i = 0$, we cannot observe $n - n_0 + 1, ..., n$ successes. If all $p_i \in \{0, 1\}$, it holds $n = n_0 + n_1$. As a result, the only observable value is $n_1$, i.e. $P(X = n_1) = 1$ and $P(X \neq n_1) = 0$. 3. Are there $p_i \notin \{0, 1\}$? Using the deductions from above, we can only observe an "inner" distribution in the range of $n_1, n_1 + 1, ..., n - n_0$, i.e. $P(X \in \{n_1, ..., n - n_0\}) > 0$ and $P(X < n_1) = P(X > n - n_0) = 0$. As a result, $X$ can be expressed as $X = n_1 + Y$ with $Y \sim PBin(\{p_i|0 < p_i < 1\})$ and $|\{p_i|0 < p_i < 1\}| = n - n_0 - n_1$. Subsequently, the Poisson binomial distribution must only be computed for $Y$. Especially, if there is only one $p_i \notin \{0, 1\}$, $Y$ follows a Bernoulli distribution with parameter $p_i$, i.e. $P(X = n_1) = P(Y = 0) = 1 - p_i$ and $P(X = n_1 + 1) = P(Y = 1) = p_i$. These cases are illustrated in the following example: ```{r ex-opdb} # Case 1 dpbinom(NULL, rep(0.3, 7)) dbinom(0:7, 7, 0.3) # equal results dpbinom(NULL, c(0, 0, 0, 0, 0, 0, 0)) # only 0 is observable dpbinom(0, c(0, 0, 0, 0, 0, 0, 0)) # confirmation dpbinom(NULL, c(1, 1, 1, 1, 1, 1, 1)) # only 7 is observable dpbinom(7, c(1, 1, 1, 1, 1, 1, 1)) # confirmation # Case 2 dpbinom(NULL, c(0, 0, 0, 0, 1, 1, 1)) # only 3 is observable dpbinom(3, c(0, 0, 0, 0, 1, 1, 1)) # confirmation # Case 3 dpbinom(NULL, c(0, 0, 0.1, 0.2, 0.4, 0.8, 1)) # only 1-5 are observable dpbinom(1:5, c(0, 0, 0.1, 0.2, 0.4, 0.8, 1)) # confirmation dpbinom(NULL, c(0, 0, 0.4, 1)) # only 1 and 2 are observable dpbinom(1:2, c(0, 0, 0.4, 1)) # confirmation ``` ### Generalized Poisson Binomial Distributions 1. All $u_i \in \{0, 1\}$ and all $v_i = 1 - u_i$? Then, it is an ordinary Poisson binomial distribution with parameters $p_i' = p_i$ for all $i$ for which $u_i = 1$ and $p_i' = 1 - p_i$ otherwise. This includes all the special cases described above. 2. All $u_i = u$ are equal and all $v_i = v$ are equal? In this case, we have a linearly transformed ordinary Poisson binomial distribution, i.e. $X$ can be expressed as $X = uY + v(n - Y)$ with $Y \sim PBin(p_1, ..., p_n)$. In particular, if all $p_i = p$ are also the same, we have a linear transformation of the usual binomial distribution, i.e. $X = uZ + v(n - Z)$ with $Z \sim Bin(n, p)$. Summarizing this, the following applies: a) All $p_i = 0$: The only observable value is $n \cdot v$, i.e. $P(X = n \cdot v) = 1$ and $P(X \neq n \cdot v) = 0$. b) All $p_i = 1$: The only observable value is $n \cdot u$, i.e. $P(X = n \cdot u) = 1$ and $P(X \neq n \cdot u) = 0$. c) All $p_i = p$: Observable values are in $\{u \cdot k + v \cdot (n - k) | k = 0, ..., n\}$ and $P(X = u \cdot k + v \cdot (n - k)) = P(Z = k)$. d) Otherwise: Observable values are in $\{u \cdot k + v \cdot (n - k) | k = 0, ..., n\})$ and $P(X = u \cdot k + v(n - k)) = P(Y = k)$ 3. All $p_i \in \{0, 1\}$? Let $I = \{i\, |\, p_i = 1\} \subseteq \{1, ..., n\}$ and $J = \{i\, |\, p_i = 0\} \subseteq \{1, ..., n\}$. Then, we have: a) All $p_i = 0$: The only observable value is $v^* := \sum_{i = 1}^{n}{v_i}$, i.e. $P(X = v^*) = 1$ and $P(X \neq v^*) = 0$. b) All $p_i = 1$: The only observable value is $u^* := \sum_{i = 1}^{n}{u_i}$, i.e. $P(X = u^*) = 1$ and $P(X \neq u^*) = 0$. c) Otherwise, The only observable value is $w^* := \sum_{i \in I}{u_i} + \sum_{i \in J}{v_i}$, i.e. $P(X = w^*) = 1$ and $P(X \neq w^*) = 0$. Note that the case that any $u_i = v_i$ is equivalent to $p_i = 1$, because the corresponding random variable $X_i$ has always the same (non-random) value. 4. Are there $p_i \notin \{0, 1\}$? Let $I$, $J$ and $w^*$ as above and $K = \{i\, |\, p_i > 0 \, \wedge p_i < 1\} \subseteq \{1, ..., n\}$. Then, $X$ can be expressed as $X = w^* + Z$ with $Z = \sum_{i \in K}{X_i}$ following a (reduced) generalized Poisson Bernoulli distribution. In particular, if only one $p_i \notin \{0, 1\}$, Z follows a linearly transformed Bernoulli distribution. These cases are illustrated in the following example: ```{r ex-gpdb} set.seed(1) pp <- runif(7) va <- sample(0:6, 7, TRUE) vb <- sample(0:6, 7, TRUE) # Case 1 dgpbinom(NULL, pp, rep(1, 7), rep(0, 7)) dpbinom(NULL, pp) # equal results dgpbinom(NULL, pp, rep(0, 7), rep(1, 7)) dpbinom(NULL, 1 - pp) # equal results dgpbinom(NULL, pp, c(rep(1, 3), rep(0, 4)), c(rep(0, 3), rep(1, 4))) dpbinom(NULL, c(pp[1:3], 1 - pp[4:7])) # reorder for 0 and 1; equal results # Case 2 a) dgpbinom(NULL, rep(0, 7), rep(4, 7), rep(2, 7)) # only 14 is observable dgpbinom(7 * 2, rep(0, 7), rep(4, 7), rep(2, 7)) # confirmation # Case 2 b) dgpbinom(NULL, rep(1, 7), rep(4, 7), rep(2, 7)) # only 28 is observable dgpbinom(7 * 4, rep(1, 7), rep(4, 7), rep(2, 7)) # confirmation # Case 2 c) dgpbinom(NULL, rep(0.3, 7), rep(4, 7), rep(2, 7)) dbinom(0:7, 7, 0.3) # equal results, but on different support set # Case 2 d) dgpbinom(NULL, pp, rep(4, 7), rep(2, 7)) dpbinom(NULL, pp) # equal results, but on different support set # Case 3 a) dgpbinom(NULL, c(0, 0, 0, 0, 0, 0, 0), va, vb) # only sum(vb) is observable dgpbinom(sum(vb), rep(0, 7), va, vb) # confirmation # Case 3 b) dgpbinom(NULL, c(1, 1, 1, 1, 1, 1, 1), va, vb) # only sum(va) is observable dgpbinom(sum(va), rep(1, 7), va, vb) # confirmation # Case 3 c) dgpbinom(NULL, c(0, 0, 0, 1, 1, 1, 1), va, vb) # only sum(va[4:7], vb[1:3]) is observable dgpbinom(sum(va[4:7], vb[1:3]), c(0, 0, 0, 1, 1, 1, 1), va, vb) # confirmation # Case 4 dgpbinom(NULL, c(0, 0, 0.3, 0.6, 1, 1, 1), va, vb) sure <- sum(va[5:7], vb[1:2]) x.transf <- sum(pmin(va[3:4], vb[3:4])):sum(pmax(va[3:4], vb[3:4])) dgpbinom(sure + x.transf, c(0, 0, 0.3, 0.6, 1, 1, 1), va, vb) dgpbinom(x.transf, c(0.3, 0.6), va[3:4], vb[3:4]) # equal results dgpbinom(NULL, c(0, 0, 0, 0.6, 1, 1, 1), va, vb) sure <- sum(va[5:7], vb[1:3]) x.transf <- va[4]:vb[4] dgpbinom(sure + x.transf, c(0, 0, 0, 0.6, 1, 1, 1), va, vb) dgpbinom(x.transf, 0.6, va[4], vb[4]) # equal results; essentially transformed Bernoulli ``` *** ## Usage with Rcpp How to import and use the internal C++ functions in `Rcpp` based packages is described in a [separate vignette](use_with_rcpp.html).PoissonBinomial/vignettes/proc_approx.Rmd0000644000176200001440000003351114022377771020404 0ustar liggesusers--- title: "Approximate Procedures" output: rmarkdown::html_vignette: toc: yes vignette: > %\VignetteIndexEntry{Approximate Procedures} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup, echo = FALSE} library(PoissonBinomial) ``` ## Ordinary Poisson Binomial Distribution ### Poisson Approximation The *Poisson Approximation* (DC) approach is requested with `method = "Poisson"`. It is based on a Poisson distribution, whose parameter is the sum of the probabilities of success. ```{r pa1} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Poisson") ppbinom(NULL, pp, wt, "Poisson") ``` A comparison with exact computation shows that the approximation quality of the PA procedure increases with smaller probabilities of success. The reason is that the Poisson Binomial distribution approaches a Poisson distribution when the probabilities are very small. ```{r pa2} set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "Poisson") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Poisson") - dpbinom(NULL, pp)) # U(0, 0.01) random probabilities of success pp <- runif(20, 0, 0.01) dpbinom(NULL, pp, method = "Poisson") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Poisson") - dpbinom(NULL, pp)) ``` ### Arithmetic Mean Binomial Approximation The *Arithmetic Mean Binomial Approximation* (AMBA) approach is requested with `method = "Mean"`. It is based on a Binomial distribution, whose parameter is the arithmetic mean of the probabilities of success. ```{r am1} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) mean(rep(pp, wt)) dpbinom(NULL, pp, wt, "Mean") ppbinom(NULL, pp, wt, "Mean") ``` A comparison with exact computation shows that the approximation quality of the AMBA procedure increases when the probabilities of success are closer to each other. The reason is that, although the expectation remains unchanged, the distribution's variance becomes smaller the less the probabilities differ. Since this variance is minimized by equal probabilities (but still underestimated), the AMBA method is best suited for situations with very similar probabilities of success. ```{r am2} set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "Mean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp)) # U(0.3, 0.5) random probabilities of success pp <- runif(20, 0.3, 0.5) dpbinom(NULL, pp, method = "Mean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp)) # U(0.39, 0.41) random probabilities of success pp <- runif(20, 0.39, 0.41) dpbinom(NULL, pp, method = "Mean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp)) ``` ### Geometric Mean Binomial Approximation - Variant A The *Geometric Mean Binomial Approximation (Variant A)* (GMBA-A) approach is requested with `method = "GeoMean"`. It is based on a Binomial distribution, whose parameter is the geometric mean of the probabilities of success: $$\hat{p} = \sqrt[n]{p_1 \cdot ... \cdot p_n}$$ ```{r gma1} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) prod(rep(pp, wt))^(1/sum(wt)) dpbinom(NULL, pp, wt, "GeoMean") ppbinom(NULL, pp, wt, "GeoMean") ``` It is known that the geometric mean of the probabilities of success is always smaller than their arithmetic mean. Thus, we get a stochastically *smaller* binomial distribution. A comparison with exact computation shows that the approximation quality of the GMBA-A procedure increases when the probabilities of success are closer to each other: ```{r gma2} set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "GeoMean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp)) # U(0.4, 0.6) random probabilities of success pp <- runif(20, 0.4, 0.6) dpbinom(NULL, pp, method = "GeoMean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp)) # U(0.49, 0.51) random probabilities of success pp <- runif(20, 0.49, 0.51) dpbinom(NULL, pp, method = "GeoMean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp)) ``` ### Geometric Mean Binomial Approximation - Variant B The *Geometric Mean Binomial Approximation (Variant B)* (GMBA-B) approach is requested with `method = "GeoMeanCounter"`. It is based on a Binomial distribution, whose parameter is 1 minus the geometric mean of the probabilities of **failure**: $$\hat{p} = 1 - \sqrt[n]{(1 - p_1) \cdot ... \cdot (1 - p_n)}$$ ```{r gmb1} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) 1 - prod(1 - rep(pp, wt))^(1/sum(wt)) dpbinom(NULL, pp, wt, "GeoMeanCounter") ppbinom(NULL, pp, wt, "GeoMeanCounter") ``` It is known that the geometric mean of the probabilities of **failure** is always smaller than their arithmetic mean. As a result, 1 minus the geometric mean is larger than 1 minus the arithmetic mean. Thus, we get a stochastically *larger* binomial distribution. A comparison with exact computation shows that the approximation quality of the GMBA-B procedure again increases when the probabilities of success are closer to each other: ```{r gmb2} set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "GeoMeanCounter") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp)) # U(0.4, 0.6) random probabilities of success pp <- runif(20, 0.4, 0.6) dpbinom(NULL, pp, method = "GeoMeanCounter") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp)) # U(0.49, 0.51) random probabilities of success pp <- runif(20, 0.49, 0.51) dpbinom(NULL, pp, method = "GeoMeanCounter") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp)) ``` ### Normal Approximation The *Normal Approximation* (NA) approach is requested with `method = "Normal"`. It is based on a Normal distribution, whose parameters are derived from the theoretical mean and variance of the input probabilities of success. ```{r na1-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Normal") ppbinom(NULL, pp, wt, "Normal") ``` A comparison with exact computation shows that the approximation quality of the NA procedure increases with larger numbers of probabilities of success: ```{r na2-ord} set.seed(1) # 10 random probabilities of success pp <- runif(10) dpn <- dpbinom(NULL, pp, method = "Normal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) dpn <- dpbinom(NULL, pp, method = "Normal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100000 random probabilities of success pp <- runif(100000) dpn <- dpbinom(NULL, pp, method = "Normal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ``` ### Refined Normal Approximation The *Refined Normal Approximation* (RNA) approach is requested with `method = "RefinedNormal"`. It is based on a Normal distribution, whose parameters are derived from the theoretical mean, variance and skewness of the input probabilities of success. ```{r rna1-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "RefinedNormal") ppbinom(NULL, pp, wt, "RefinedNormal") ``` A comparison with exact computation shows that the approximation quality of the RNA procedure increases with larger numbers of probabilities of success: ```{r rna2-ord} set.seed(1) # 10 random probabilities of success pp <- runif(10) dpn <- dpbinom(NULL, pp, method = "RefinedNormal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) dpn <- dpbinom(NULL, pp, method = "RefinedNormal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100000 random probabilities of success pp <- runif(100000) dpn <- dpbinom(NULL, pp, method = "RefinedNormal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ``` ### Processing Speed Comparisons To assess the performance of the approximation procedures, we use the `microbenchmark` package. Each algorithm has to calculate the PMF repeatedly based on random probability vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2). ```{r benchmark-ord} library(microbenchmark) set.seed(1) f1 <- function() dpbinom(NULL, runif(4000), method = "Normal") f2 <- function() dpbinom(NULL, runif(4000), method = "RefinedNormal") f3 <- function() dpbinom(NULL, runif(4000), method = "Poisson") f4 <- function() dpbinom(NULL, runif(4000), method = "Mean") f5 <- function() dpbinom(NULL, runif(4000), method = "GeoMean") f6 <- function() dpbinom(NULL, runif(4000), method = "GeoMeanCounter") f7 <- function() dpbinom(NULL, runif(4000), method = "DivideFFT") microbenchmark(f1(), f2(), f3(), f4(), f5(), f6(), f7(), times = 51) ``` Clearly, the NA procedure is the fastest, followed by the RNA and PA methods. The next fastest algorithms are AMBA, GMBA-A and GMBA-B. They exhibit almost equal mean execution speed, with the AMBA algorithm being slightly faster. All of the approximation procedures outperform the fastest exact approach, DC-FFT, by far. ## Generalized Poisson Binomial Distribution ### Generalized Normal Approximation The *Generalized Normal Approximation* (G-NA) approach is requested with `method = "Normal"`. It is based on a Normal distribution, whose parameters are derived from the theoretical mean and variance of the input probabilities of success (see [Introduction](intro.html). ```{r na1-gen} set.seed(2) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "Normal") pgpbinom(NULL, pp, va, vb, wt, "Normal") ``` A comparison with exact computation shows that the approximation quality of the NA procedure increases with larger numbers of probabilities of success: ```{r na2-gen} set.seed(2) # 10 random probabilities of success pp <- runif(10) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100 random probabilities of success pp <- runif(100) va <- sample(0:100, 100, TRUE) vb <- sample(0:100, 100, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) va <- sample(0:1000, 1000, TRUE) vb <- sample(0:1000, 1000, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ``` ### Generalized Refined Normal Approximation The *Generalized Refined Normal Approximation* (G-RNA) approach is requested with `method = "RefinedNormal"`. It is based on a Normal distribution, whose parameters are derived from the theoretical mean, variance and skewness of the input probabilities of success. ```{r rna1-gen} set.seed(2) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "RefinedNormal") pgpbinom(NULL, pp, va, vb, wt, "RefinedNormal") ``` A comparison with exact computation shows that the approximation quality of the RNA procedure increases with larger numbers of probabilities of success: ```{r rna2-gen} set.seed(2) # 10 random probabilities of success pp <- runif(10) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100 random probabilities of success pp <- runif(100) va <- sample(0:100, 100, TRUE) vb <- sample(0:100, 100, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) va <- sample(0:1000, 1000, TRUE) vb <- sample(0:1000, 1000, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ``` ### Processing Speed Comparisons To assess the performance of the approximation procedures, we use the `microbenchmark` package. Each algorithm has to calculate the PMF repeatedly based on random probability vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2). ```{r benchmark-gen} library(microbenchmark) n <- 1500 set.seed(2) va <- sample(1:50, n, TRUE) vb <- sample(1:50, n, TRUE) f1 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Normal") f2 <- function() dgpbinom(NULL, runif(n), va, vb, method = "RefinedNormal") f3 <- function() dgpbinom(NULL, runif(n), va, vb, method = "DivideFFT") microbenchmark(f1(), f2(), f3(), times = 51) ``` Clearly, the G-NA procedure is the fastest, followed by the G-RNA method. Both are hugely faster than G-DC-FFT.PoissonBinomial/R/0000755000176200001440000000000014077606026013570 5ustar liggesusersPoissonBinomial/R/gpbinom.R0000644000176200001440000003270514077547461015364 0ustar liggesusers#'@name GenPoissonBinomial-Distribution #' #'@title The Generalized Poisson Binomial Distribution #' #'@description #'Density, distribution function, quantile function and random generation for #'the generalized Poisson binomial distribution with probability vector #'\code{probs}. #' #'@param x Either a vector of observed sums or NULL. If NULL, #' probabilities of all possible observations are #' returned. #'@param p Vector of probabilities for computation of quantiles. #'@param n Number of observations. If \code{length(n) > 1}, the #' length is taken to be the number required. #'@param probs Vector of probabilities of success of each Bernoulli #' trial. #'@param val_p Vector of values that each trial produces with probability #' in \code{probs}. #'@param val_q Vector of values that each trial produces with probability #' in \code{1 - probs}. #'@param method Character string that specifies the method of computation #' and must be one of \code{"DivideFFT"}, \code{"Convolve"}, #' \code{"Characteristic"}, \code{"Normal"} or #' \code{"RefinedNormal"} (abbreviations are allowed). #'@param wts Vector of non-negative integer weights for the input #' probabilities. #'@param log,log.p Logical value indicating if results are given as #' logarithms. #'@param lower.tail Logical value indicating if results are \eqn{P[X \leq x]} #' (if \code{TRUE}; default) or \eqn{P[X > x]} (if #' \code{FALSE}). #'@param generator Character string that specifies the random number #' generator and must either be \code{"Sample"} or #' \code{"Bernoulli"} (abbreviations are allowed). #' #'@details #'See the references for computational details. The \emph{Divide and Conquer} #'(\code{"DivideFFT"}) and \emph{Direct Convolution} (\code{"Convolve"}) #'algorithms are derived and described in Biscarri, Zhao & Brunner (2018). They #'have been modified for use with the generalized Poisson binomial #'distribution. The #'\emph{Discrete Fourier Transformation of the Characteristic Function} #'(\code{"Characteristic"}) is derived in Zhang, Hong & Balakrishnan (2018), #'the \emph{Normal Approach} (\code{"Normal"}) and the #'\emph{Refined Normal Approach} (\code{"RefinedNormal"}) are described in Hong #'(2013). They were slightly adapted for the generalized Poisson binomial #'distribution. #' #'In some special cases regarding the values of \code{probs}, the \code{method} #'parameter is ignored (see Introduction vignette). #' #'Random numbers can be generated in two ways. The \code{"Sample"} method #'uses \code{R}'s \code{sample} function to draw random values according to #'their probabilities that are calculated by \code{dgpbinom}. The #'\code{"Bernoulli"} procedure ignores the \code{method} parameter and #'simulates Bernoulli-distributed random numbers according to the probabilities #'in \code{probs} and sums them up. It is a bit slower than the \code{"Sample"} #'generator, but may yield better results, as it allows to obtain observations #'that cannot be generated by the \code{"Sample"} procedure, because #'\code{dgpbinom} may compute 0-probabilities, due to rounding, if the length #'of \code{probs} is large and/or its values contain a lot of very small #'values. #' #'@return #'\code{dgpbinom} gives the density, \code{pgpbinom} computes the distribution #'function, \code{qgpbinom} gives the quantile function and \code{rgpbinom} #'generates random deviates. #' #'For \code{rgpbinom}, the length of the result is determined by \code{n}, and #'is the lengths of the numerical arguments for the other functions. #' #'@section References: #'Hong, Y. (2018). On computing the distribution function for the Poisson #' binomial distribution. \emph{Computational Statistics & Data Analysis}, #' \strong{59}, pp. 41-51. \doi{10.1016/j.csda.2012.10.006} #' #'Biscarri, W., Zhao, S. D. and Brunner, R. J. (2018) A simple and fast method #' for computing the Poisson binomial distribution. #' \emph{Computational Statistics and Data Analysis}, \strong{31}, pp. #' 216–222. \doi{10.1016/j.csda.2018.01.007} #' #'Zhang, M., Hong, Y. and Balakrishnan, N. (2018). The generalized #' Poisson-binomial distribution and the computation of its distribution #' function. \emph{Journal of Statistical Computational and Simulation}, #' \strong{88}(8), pp. 1515-1527. \doi{10.1080/00949655.2018.1440294} #' #'@examples #'set.seed(1) #'pp <- c(1, 0, runif(10), 1, 0, 1) #'qq <- seq(0, 1, 0.01) #'va <- rep(5, length(pp)) #'vb <- 1:length(pp) #' #'dgpbinom(NULL, pp, va, vb, method = "DivideFFT") #'pgpbinom(75:100, pp, va, vb, method = "DivideFFT") #'qgpbinom(qq, pp, va, vb, method = "DivideFFT") #'rgpbinom(100, pp, va, vb, method = "DivideFFT") #' #'dgpbinom(NULL, pp, va, vb, method = "Convolve") #'pgpbinom(75:100, pp, va, vb, method = "Convolve") #'qgpbinom(qq, pp, va, vb, method = "Convolve") #'rgpbinom(100, pp, va, vb, method = "Convolve") #' #'dgpbinom(NULL, pp, va, vb, method = "Characteristic") #'pgpbinom(75:100, pp, va, vb, method = "Characteristic") #'qgpbinom(qq, pp, va, vb, method = "Characteristic") #'rgpbinom(100, pp, va, vb, method = "Characteristic") #' #'dgpbinom(NULL, pp, va, vb, method = "Normal") #'pgpbinom(75:100, pp, va, vb, method = "Normal") #'qgpbinom(qq, pp, va, vb, method = "Normal") #'rgpbinom(100, pp, va, vb, method = "Normal") #' #'dgpbinom(NULL, pp, va, vb, method = "RefinedNormal") #'pgpbinom(75:100, pp, va, vb, method = "RefinedNormal") #'qgpbinom(qq, pp, va, vb, method = "RefinedNormal") #'rgpbinom(100, pp, va, vb, method = "RefinedNormal") #' #'@export dgpbinom <- function(x, probs, val_p, val_q, wts = NULL, method = "DivideFFT", log = FALSE){ ## preliminary checks method <- check.args.GPB(x, probs, val_p, val_q, wts, method) ## transform input to relevant range transf <- transform.GPB(x, probs, val_p, val_q, wts) # if x = NULL, return all possible probabilities if(is.null(x)) x <- transf$compl.range # identify valid 'x' values (invalid ones will have 0-probability) idx.valid <- which(x %in% transf$compl.range) ## compute probabilities # vector for storing the probabilities d <- double(length(x)) # no computation needed, if there are no valid observations in 'x' if(length(idx.valid)){ # select valid observations in relevant range y <- x[idx.valid] # relevant observations idx.inner <- which(y %in% transf$inner.range) # if no input value is in relevant range, they are impossible (i.e. return 0-probabilities) if(length(idx.inner)){ # transformed input parameters n <- transf$n probs <- transf$probs diffs <- transf$diffs if(n == 0){ # 'probs' contains only zeros and ones, i.e. only one possible observation d[idx.valid][idx.inner] <- 1 }else{ z <- y[idx.inner] - transf$inner.range[1] # compute distribution if(all(diffs == diffs[1])){ # all values of 'diffs' are equal, i.e. a multiplied ordinary poisson binomial distribution remainder <- z %% diffs[1] idx.r <- which(remainder == 0) d[idx.valid][idx.inner][idx.r] <- dpbinom((z %/% diffs[1])[idx.r], probs, method = method) }else{ # compute distribution according to 'method' d[idx.valid][idx.inner] <- switch(method, DivideFFT = dgpb_dc(z, probs, diffs, rep(0, n)), Convolve = dgpb_conv(z, probs, diffs, rep(0, n)), Characteristic = dgpb_dftcf(z, probs, diffs, rep(0, n)), Normal = dgpb_na(z, probs, diffs, rep(0, n), FALSE), RefinedNormal = dgpb_na(z, probs, diffs, rep(0, n), TRUE)) } } } } # logarithm, if required if(log) d <- log(d) # return results return(d) } #'@rdname GenPoissonBinomial-Distribution #'@export pgpbinom <- function(x, probs, val_p, val_q, wts = NULL, method = "DivideFFT", lower.tail = TRUE, log.p = FALSE){ ## preliminary checks method <- check.args.GPB(x, probs, val_p, val_q, wts, method) ## transform input to relevant range transf <- transform.GPB(x, probs, val_p, val_q, wts) # if x = NULL, return all possible probabilities if(is.null(x)) x <- transf$compl.range # identify valid 'x' values (invalid ones will have 0-probability) idx.valid <- which(x %in% transf$compl.range) ## compute probabilities # vector for storing the probabilities d <- rep(as.numeric(!lower.tail), length(x)) # no computation needed, if there are no valid observations in 'x' if(length(idx.valid)){ # select valid observations in relevant range y <- x[idx.valid] # relevant observations idx.inner <- which(y %in% transf$inner.range) if(length(idx.inner)){ # transformed input parameters n <- transf$n probs <- transf$probs diffs <- transf$diffs if(n == 0){ # 'probs' contains only zeros and ones, i.e. only one possible observation d[idx.valid][idx.inner] <- as.numeric(lower.tail) }else{ # select and rescale relevant observations z <- y[idx.inner] - transf$inner.range[1] # compute distribution if(all(diffs == diffs[1])){ # all GCD-optimized values of 'diffs' are equal, i.e. a standard binomial distribution d[idx.valid][idx.inner] <- ppbinom(z %/% diffs[1], probs, method = method, lower.tail = lower.tail) }else{ # compute distribution according to 'method' d[idx.valid][idx.inner] <- switch(method, DivideFFT = pgpb_dc(z, probs, diffs, rep(0, n), lower.tail), Convolve = pgpb_conv(z, probs, diffs, rep(0, n), lower.tail), Characteristic = pgpb_dftcf(z, probs, diffs, rep(0, n), lower.tail), Normal = pgpb_na(z, probs, diffs, rep(0, n), FALSE, lower.tail), RefinedNormal = pgpb_na(z, probs, diffs, rep(0, n), TRUE, lower.tail)) } } } # which valid observations are above relevant range idx.above <- which(y > max(transf$inner.range)) # fill cumulative probabilities of values above the relevant range if(length(idx.above)) d[idx.valid][idx.above] <- as.double(lower.tail) } # fill cumulative probabilities of values above complete range d[x > max(transf$compl.range)] <- as.double(lower.tail) # logarithm, if required if(log.p) d <- log(d) # return results return(d) } #'@rdname GenPoissonBinomial-Distribution #'@importFrom stats stepfun #'@export qgpbinom <- function(p, probs, val_p, val_q, wts = NULL, method = "DivideFFT", lower.tail = TRUE, log.p = FALSE){ ## preliminary checks method <- check.args.GPB(NULL, probs, val_p, val_q, wts, method) # check if 'q' contains only probabilities if(!log.p){ if(is.null(p) || any(is.na(p) | p < 0 | p > 1)) stop("'p' must contain real numbers between 0 and 1!") }else{ if(is.null(p) || any(is.na(p) | p > 0)) stop("'p' must contain real numbers between -Inf and 0!") } ## transform input to relevant range transf <- transform.GPB(NULL, probs, val_p, val_q, wts) probs <- transf$probs val_p <- transf$val_p val_q <- transf$val_q ## compute probabilities (does checking for the other variables) cdf <- pgpbinom(NULL, probs, val_p, val_q, NULL, method, lower.tail) # bounds of relevant observations first <- min(transf$inner.range) last <- max(transf$inner.range) # length of cdf len <- length(cdf) # logarithm, if required if(log.p) p <- exp(p) ## compute quantiles # handle quantiles between 0 and 1 if(lower.tail) Q <- stepfun(cdf[transf$inner.range - first + 1], c(transf$inner.range, last), right = TRUE) else Q <- stepfun(rev(cdf[transf$inner.range - first + 1]), c(last, rev(transf$inner.range)), right = TRUE) # vector to store results res <- Q(p) # handle quantiles of 0 or 1 res[p == lower.tail] <- last res[p == !lower.tail] <- first # return results return(res) } #'@rdname GenPoissonBinomial-Distribution #'@importFrom stats runif rbinom #'@export rgpbinom <- function(n, probs, val_p, val_q, wts = NULL, method = "DivideFFT", generator = "Sample"){ ## preliminary checks method <- check.args.GPB(NULL, probs, val_p, val_q, wts, method) len <- length(n) if(len > 1) n <- len # check if 'n' is NULL if(is.null(n)) stop("'n' must not be NULL!") ## expand 'probs', 'val_p' and 'val_q' according to the counts in 'wts' # if 'wts' is NULL, set it to be a vector of ones if(is.null(wts)) wts <- rep(1, length(probs)) # expand 'probs', 'val_p', 'val_q' probs <- rep(probs, wts) val_p <- rep(val_p, wts) val_q <- rep(val_q, wts) # make sure that the value of 'generator' matches one of the implemented procedures generator <- match.arg(generator, c("Sample", "Bernoulli")) # generate random numbers res <- switch(generator, Sample = sample(sum(pmin(val_p, val_q)):sum(pmax(val_p, val_q)), n, TRUE, dgpbinom(NULL, probs, val_p, val_q, NULL, method)), Bernoulli = rgpb_bernoulli(n, probs, val_p, val_q)) # return results return(res) }PoissonBinomial/R/onUnload.R0000644000176200001440000000021313563504710015462 0ustar liggesusers# http://r-pkgs.had.co.nz/src.html#c-best-practices .onUnload <- function (libpath) { library.dynam.unload("PoissonBinomial", libpath) }PoissonBinomial/R/utility.R0000644000176200001440000000711414077532204015415 0ustar liggesuserscheck.args.GPB <- function(x, probs, val_p, val_q, wts, method, log.p = FALSE){ # check if 'x' contains only integers if(!is.null(x) && any(x - round(x) != 0)){ warning("'x' should contain integers only! Using rounded off values.") x <- floor(x) } # check if 'probs' contains only probabilities if(is.null(probs) || any(is.na(probs) | probs < 0 | probs > 1)) stop("'probs' must contain real numbers between 0 and 1!") # number of probabilities n <- length(probs) # check if 'val_p' and 'val_q' have the same length as 'probs' if(length(val_p) != n || length(val_q) != n) stop("'probs', 'val_p' and 'val_q' must have the same length!") if(!is.null(wts) && length(wts) != n) stop("'probs' and 'wts' (if not NULL) must have the same length!") # check if 'val_p' contains only integers if(!is.null(val_p) && any(val_p - round(val_p) != 0)){ warning("'val_p' should contain integers only! Using rounded off values.") val_p <- floor(val_p) } # check if 'val_q' contains only integers if(!is.null(val_q) && any(val_q - round(val_q) != 0)){ warning("'val_q' should contain integers only! Using rounded off values.") val_q <- floor(val_q) } # check if 'wts' contains only integers (zeros are allowed) if(!is.null(wts) && any(is.na(wts) | wts < 0 | abs(wts - round(wts)) > 1e-07)) stop("'wts' must contain non-negative integers!") # make sure that the value of 'method' matches one of the implemented procedures method <- match.arg(method, c("DivideFFT", "Convolve", "Characteristic", "Normal", "RefinedNormal")) # if all checks were successful, return matched 'method' return(method) } transform.GPB <- function(x, probs, val_p, val_q, wts){ # number of probabilities n <- length(probs) ## expand 'probs', 'val_p' and 'val_q' according to the counts in 'wts' # if 'wts' is NULL, set it to be a vector of ones if(is.null(wts)) wts <- rep(1, n) # expand 'probs', 'val_p', 'val_q' probs <- rep(probs, wts) val_p <- rep(val_p, wts) val_q <- rep(val_q, wts) # reorder 'val_p' and 'val_q' so that values in 'val_p' are always greater val_gr <- pmax(val_p, val_q) val_lo <- pmin(val_p, val_q) probs[val_gr > val_p] <- 1 - probs[val_gr > val_p] # re-compute length of 'probs' (= sum of 'wts') n <- sum(wts) ## determine relevant range of observations # determine minimum and maximum possible observations sum_min <- sum(val_lo) sum_max <- sum(val_gr) # which probabilities are 0 or 1, which val_p and val_q are equal idx.0 <- which(probs == 0) idx.1 <- which(probs == 1) idx.v <- which(val_gr == val_lo & probs > 0 & probs < 1) idx.r <- setdiff(1:n, union(union(idx.0, idx.1), idx.v)) # guaranteed val_gr_sure <- val_gr[idx.1] val_lo_sure <- val_lo[idx.0] vals_equal <- val_gr[idx.v]# equal to val_lo[idx.v] sum_sure <- sum(val_gr_sure, val_lo_sure, vals_equal) # limit 'probs', 'val_p' and 'val_q' to relevant range np <- length(idx.r) if(np){ probs <- probs[idx.r] val_gr <- val_gr[idx.r] val_lo <- val_lo[idx.r] }else{ probs <- 1 val_gr <- 0 val_lo <- 0 } # compute differences and their GCD diffs <- val_gr - val_lo # bounds of relevant observations sum_min_in <- sum(val_lo) + sum_sure sum_max_in <- sum(val_gr) + sum_sure return(list(probs = probs, val_p = val_gr, val_q = val_lo, compl.range = sum_min:sum_max, inner.range = sum_min_in:sum_max_in, inner.size = sum_max_in - sum_min_in + 1, n = np, diffs = diffs)) } PoissonBinomial/R/PoissonBinomial.R0000644000176200001440000000365014022630567017021 0ustar liggesusers#'@name PoissonBinomial-package #' #'@title Efficient Exact and Approximate Implementations for Computing Ordinary and Generalized Poisson Binomial Distributions #' #'@description #'This package implements various algorithms for computing the probability mass #'function, the cumulative distribution function, quantiles and random numbers #'of both ordinary and generalized Poisson binomial distributions. #' #'@docType package #'@import Rcpp #'@useDynLib PoissonBinomial, .registration = TRUE #' #'@section References: #'Hong, Y. (2013). On computing the distribution function for the Poisson #' binomial distribution. \emph{Computational Statistics & Data Analysis}, #' \strong{59}, pp. 41-51. \doi{10.1016/j.csda.2012.10.006} #' #'Biscarri, W., Zhao, S. D. and Brunner, R. J. (2018) A simple and fast method #' for computing the Poisson binomial distribution. #' \emph{Computational Statistics and Data Analysis}, \strong{31}, pp. #' 216–222. \doi{10.1016/j.csda.2018.01.007} #' #'Zhang, M., Hong, Y. and Balakrishnan, N. (2018). The generalized #' Poisson-binomial distribution and the computation of its distribution #' function. \emph{Journal of Statistical Computational and Simulation}, #' \strong{88}(8), pp. 1515-1527. \doi{10.1080/00949655.2018.1440294} #' #'@examples #'# Functions for ordinary Poisson binomial distributions #'set.seed(1) #'pp <- c(1, 0, runif(10), 1, 0, 1) #'qq <- seq(0, 1, 0.01) #' #'dpbinom(NULL, pp) #'ppbinom(7:10, pp, method = "DivideFFT") #'qpbinom(qq, pp, method = "Convolve") #'rpbinom(10, pp, method = "RefinedNormal") #' #'# Functions for generalized Poisson binomial distributions #'va <- rep(5, length(pp)) #'vb <- 1:length(pp) #' #'dgpbinom(NULL, pp, va, vb, method = "Convolve") #'pgpbinom(80:100, pp, va, vb, method = "Convolve") #'qgpbinom(qq, pp, va, vb, method = "Convolve") #'rgpbinom(100, pp, va, vb, method = "Convolve") NULLPoissonBinomial/R/RcppExports.R0000644000176200001440000000660114077551033016204 0ustar liggesusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #' NULL vectorGCD <- function(x) { .Call(`_PoissonBinomial_vectorGCD`, x) } dpb_conv <- function(obs, probs) { .Call(`_PoissonBinomial_dpb_conv`, obs, probs) } ppb_conv <- function(obs, probs, lower_tail = TRUE) { .Call(`_PoissonBinomial_ppb_conv`, obs, probs, lower_tail) } dpb_dc <- function(obs, probs) { .Call(`_PoissonBinomial_dpb_dc`, obs, probs) } ppb_dc <- function(obs, probs, lower_tail = TRUE) { .Call(`_PoissonBinomial_ppb_dc`, obs, probs, lower_tail) } dpb_dftcf <- function(obs, probs) { .Call(`_PoissonBinomial_dpb_dftcf`, obs, probs) } ppb_dftcf <- function(obs, probs, lower_tail = TRUE) { .Call(`_PoissonBinomial_ppb_dftcf`, obs, probs, lower_tail) } dpb_rf <- function(obs, probs) { .Call(`_PoissonBinomial_dpb_rf`, obs, probs) } ppb_rf <- function(obs, probs, lower_tail = TRUE) { .Call(`_PoissonBinomial_ppb_rf`, obs, probs, lower_tail) } dpb_mean <- function(obs, probs) { .Call(`_PoissonBinomial_dpb_mean`, obs, probs) } ppb_mean <- function(obs, probs, lower_tail = TRUE) { .Call(`_PoissonBinomial_ppb_mean`, obs, probs, lower_tail) } dpb_gmba <- function(obs, probs, anti = FALSE) { .Call(`_PoissonBinomial_dpb_gmba`, obs, probs, anti) } ppb_gmba <- function(obs, probs, anti = FALSE, lower_tail = TRUE) { .Call(`_PoissonBinomial_ppb_gmba`, obs, probs, anti, lower_tail) } dpb_pa <- function(obs, probs) { .Call(`_PoissonBinomial_dpb_pa`, obs, probs) } ppb_pa <- function(obs, probs, lower_tail = TRUE) { .Call(`_PoissonBinomial_ppb_pa`, obs, probs, lower_tail) } ppb_na <- function(obs, probs, refined = TRUE, lower_tail = TRUE) { .Call(`_PoissonBinomial_ppb_na`, obs, probs, refined, lower_tail) } dpb_na <- function(obs, probs, refined = TRUE) { .Call(`_PoissonBinomial_dpb_na`, obs, probs, refined) } rpb_bernoulli <- function(n, probs) { .Call(`_PoissonBinomial_rpb_bernoulli`, n, probs) } dgpb_conv <- function(obs, probs, val_p, val_q) { .Call(`_PoissonBinomial_dgpb_conv`, obs, probs, val_p, val_q) } pgpb_conv <- function(obs, probs, val_p, val_q, lower_tail = TRUE) { .Call(`_PoissonBinomial_pgpb_conv`, obs, probs, val_p, val_q, lower_tail) } dgpb_dc <- function(obs, probs, val_p, val_q) { .Call(`_PoissonBinomial_dgpb_dc`, obs, probs, val_p, val_q) } pgpb_dc <- function(obs, probs, val_p, val_q, lower_tail = TRUE) { .Call(`_PoissonBinomial_pgpb_dc`, obs, probs, val_p, val_q, lower_tail) } dgpb_dftcf <- function(obs, probs, val_p, val_q) { .Call(`_PoissonBinomial_dgpb_dftcf`, obs, probs, val_p, val_q) } pgpb_dftcf <- function(obs, probs, val_p, val_q, lower_tail = TRUE) { .Call(`_PoissonBinomial_pgpb_dftcf`, obs, probs, val_p, val_q, lower_tail) } pgpb_na <- function(obs, probs, val_p, val_q, refined = TRUE, lower_tail = TRUE) { .Call(`_PoissonBinomial_pgpb_na`, obs, probs, val_p, val_q, refined, lower_tail) } dgpb_na <- function(obs, probs, val_p, val_q, refined = TRUE) { .Call(`_PoissonBinomial_dgpb_na`, obs, probs, val_p, val_q, refined) } rgpb_bernoulli <- function(n, probs, val_p, val_q) { .Call(`_PoissonBinomial_rgpb_bernoulli`, n, probs, val_p, val_q) } # Register entry points for exported C++ functions methods::setLoadAction(function(ns) { .Call('_PoissonBinomial_RcppExport_registerCCallable', PACKAGE = 'PoissonBinomial') }) PoissonBinomial/R/pbinom.R0000644000176200001440000004244414077547450015214 0ustar liggesusers#'@name PoissonBinomial-Distribution #' #'@importFrom stats dbinom pbinom runif #' #'@title The Poisson Binomial Distribution #' #'@description #'Density, distribution function, quantile function and random generation for #'the Poisson binomial distribution with probability vector \code{probs}. #' #'@param x Either a vector of observed numbers of successes or NULL. #' If NULL, probabilities of all possible observations are #' returned. #'@param p Vector of probabilities for computation of quantiles. #'@param n Number of observations. If \code{length(n) > 1}, the #' length is taken to be the number required. #'@param probs Vector of probabilities of success of each Bernoulli #' trial. #'@param method Character string that specifies the method of computation #' and must be one of \code{"DivideFFT"}, \code{"Convolve"}, #' \code{"Characteristic"}, \code{"Recursive"}, #' \code{"Mean"}, \code{"GeoMean"}, \code{"GeoMeanCounter"}, #' \code{"Poisson"}, \code{"Normal"} or #' \code{"RefinedNormal"} (abbreviations are allowed). #'@param wts Vector of non-negative integer weights for the input #' probabilities. #'@param log,log.p Logical value indicating if results are given as #' logarithms. #'@param lower.tail Logical value indicating if results are \eqn{P[X \leq x]} #' (if \code{TRUE}; default) or \eqn{P[X > x]} (if #' \code{FALSE}). #'@param generator Character string that specifies the random number #' generator and must either be \code{"Sample"} (default) or #' \code{"Bernoulli"} (abbreviations are allowed). See #' Details for more information. #' #'@details #'See the references for computational details. The \emph{Divide and Conquer} #'(\code{"DivideFFT"}) and \emph{Direct Convolution} (\code{"Convolve"}) #'algorithms are derived and described in Biscarri, Zhao & Brunner (2018). The #'\emph{Discrete Fourier Transformation of the Characteristic Function} #'(\code{"Characteristic"}), the \emph{Recursive Formula} (\code{"Recursive"}), #'the \emph{Poisson Approximation} (\code{"Poisson"}), the #'\emph{Normal Approach} (\code{"Normal"}) and the #'\emph{Refined Normal Approach} (\code{"RefinedNormal"}) are described in Hong #'(2013). The calculation of the \emph{Recursive Formula} was modified to #'overcome the excessive memory requirements of Hong's implementation. #' #'The \code{"Mean"} method is a naive binomial approach using the arithmetic #'mean of the probabilities of success. Similarly, the \code{"GeoMean"} and #'\code{"GeoMeanCounter"} procedures are binomial approximations, too, but #'they form the geometric mean of the probabilities of success #'(\code{"GeoMean"}) and their counter probabilities (\code{"GeoMeanCounter"}), #'respectively. #' #'In some special cases regarding the values of \code{probs}, the \code{method} #'parameter is ignored (see Introduction vignette). #' #'Random numbers can be generated in two ways. The \code{"Sample"} method #'uses \code{R}'s \code{sample} function to draw random values according to #'their probabilities that are calculated by \code{dgpbinom}. The #'\code{"Bernoulli"} procedure ignores the \code{method} parameter and #'simulates Bernoulli-distributed random numbers according to the probabilities #'in \code{probs} and sums them up. It is a bit slower than the \code{"Sample"} #'generator, but may yield better results, as it allows to obtain observations #'that cannot be generated by the \code{"Sample"} procedure, because #'\code{dgpbinom} may compute 0-probabilities, due to rounding, if the length #'of \code{probs} is large and/or its values contain a lot of very small #'values. #' #'@return #'\code{dpbinom} gives the density, \code{ppbinom} computes the distribution #'function, \code{qpbinom} gives the quantile function and \code{rpbinom} #'generates random deviates. #' #'For \code{rpbinom}, the length of the result is determined by \code{n}, and #'is the lengths of the numerical arguments for the other functions. #' #'@section References: #'Hong, Y. (2013). On computing the distribution function for the Poisson #' binomial distribution. \emph{Computational Statistics & Data Analysis}, #' \strong{59}, pp. 41-51. \doi{10.1016/j.csda.2012.10.006} #' #'Biscarri, W., Zhao, S. D. and Brunner, R. J. (2018) A simple and fast method #' for computing the Poisson binomial distribution. #' \emph{Computational Statistics and Data Analysis}, \strong{31}, pp. #' 216–222. \doi{10.1016/j.csda.2018.01.007} #' #'@examples #'set.seed(1) #'pp <- c(0, 0, runif(995), 1, 1, 1) #'qq <- seq(0, 1, 0.01) #' #'dpbinom(NULL, pp, method = "DivideFFT") #'ppbinom(450:550, pp, method = "DivideFFT") #'qpbinom(qq, pp, method = "DivideFFT") #'rpbinom(100, pp, method = "DivideFFT") #' #'dpbinom(NULL, pp, method = "Convolve") #'ppbinom(450:550, pp, method = "Convolve") #'qpbinom(qq, pp, method = "Convolve") #'rpbinom(100, pp, method = "Convolve") #' #'dpbinom(NULL, pp, method = "Characteristic") #'ppbinom(450:550, pp, method = "Characteristic") #'qpbinom(qq, pp, method = "Characteristic") #'rpbinom(100, pp, method = "Characteristic") #' #'dpbinom(NULL, pp, method = "Recursive") #'ppbinom(450:550, pp, method = "Recursive") #'qpbinom(qq, pp, method = "Recursive") #'rpbinom(100, pp, method = "Recursive") #' #'dpbinom(NULL, pp, method = "Mean") #'ppbinom(450:550, pp, method = "Mean") #'qpbinom(qq, pp, method = "Mean") #'rpbinom(100, pp, method = "Mean") #' #'dpbinom(NULL, pp, method = "GeoMean") #'ppbinom(450:550, pp, method = "GeoMean") #'qpbinom(qq, pp, method = "GeoMean") #'rpbinom(100, pp, method = "GeoMean") #' #'dpbinom(NULL, pp, method = "GeoMeanCounter") #'ppbinom(450:550, pp, method = "GeoMeanCounter") #'qpbinom(qq, pp, method = "GeoMeanCounter") #'rpbinom(100, pp, method = "GeoMeanCounter") #' #'dpbinom(NULL, pp, method = "Poisson") #'ppbinom(450:550, pp, method = "Poisson") #'qpbinom(qq, pp, method = "Poisson") #'rpbinom(100, pp, method = "Poisson") #' #'dpbinom(NULL, pp, method = "Normal") #'ppbinom(450:550, pp, method = "Normal") #'qpbinom(qq, pp, method = "Normal") #'rpbinom(100, pp, method = "Normal") #' #'dpbinom(NULL, pp, method = "RefinedNormal") #'ppbinom(450:550, pp, method = "RefinedNormal") #'qpbinom(qq, pp, method = "RefinedNormal") #'rpbinom(100, pp, method = "RefinedNormal") #' #'@export dpbinom <- function(x, probs, wts = NULL, method = "DivideFFT", log = FALSE){ ## preliminary checks # number of probabilities n <- length(probs) # check if 'x' contains only integers if(!is.null(x) && any(x - round(x) != 0)){ warning("'x' should contain integers only! Using rounded off values.") x <- floor(x) } # check if 'probs' contains only probabilities if(is.null(probs) || any(is.na(probs) | probs < 0 | probs > 1)) stop("'probs' must contain real numbers between 0 and 1!") # make sure that the value of 'method' matches one of the implemented procedures method <- match.arg(method, c("DivideFFT", "Convolve", "Characteristic", "Recursive", "Mean", "GeoMean", "GeoMeanCounter", "Poisson", "Normal", "RefinedNormal")) # check if 'wts' contains only integers (zeros are allowed) if(!is.null(wts) && any(is.na(wts) | wts < 0 | abs(wts - round(wts)) > 1e-07)) stop("'wts' must contain non-negative integers!") if(!is.null(wts) && length(wts) != n) stop("'probs' and 'wts' (if not NULL) must have the same length!") ## expand 'probs' according to the counts in 'wts' # if 'wts' is NULL, set it to be a vector of ones if(is.null(wts)) wts <- rep(1, n) # expand 'probs' probs <- rep(probs, wts) # re-compute length of 'probs' (= sum of 'wts') n <- sum(wts) # if x = NULL, return all possible probabilities if(is.null(x)) x <- 0:n # identify valid 'x' values (invalid ones will have 0-probability) idx.x <- which(x >= 0 & x <= n) # select valid observations y <- x[idx.x] ## compute probabilities # vector for storing the probabilities d <- double(length(x)) # no computation needed, if there are no valid observations in 'x' if(length(idx.x)){ # which probabilities are 0 or 1 idx0 <- which(probs == 0) idx1 <- which(probs == 1) probs <- probs[probs > 0 & probs < 1] # number of zeros and ones n0 <- length(idx0) n1 <- length(idx1) np <- n - n0 - n1 # relevant observations idx.y <- which(y %in% n1:(n - n0)) if(length(idx.y)){ z <- y[idx.y] - n1 if(np == 0){ # 'probs' contains only zeros and ones, i.e. only one possible observation d[idx.x][idx.y] <- 1 }else if(np == 1){ # 'probs' contains only one value that is not 0 or 1, i.e. a Bernoulli distribution d[idx.x][idx.y] <- c(1 - probs, probs)[z + 1] }else{ if(all(probs == probs[1])){ # all values of 'probs' are equal, i.e. a standard binomial distribution d[idx.x][idx.y] <- dbinom(z, np, probs[1]) }else{ # otherwise, compute distribution according to 'method' d[idx.x][idx.y] <- switch(method, DivideFFT = dpb_dc(z, probs), Convolve = dpb_conv(z, probs), Characteristic = dpb_dftcf(z, probs), Recursive = dpb_rf(z, probs), Mean = dpb_mean(z, probs), GeoMean = dpb_gmba(z, probs, FALSE), GeoMeanCounter = dpb_gmba(z, probs, TRUE), Poisson = dpb_pa(z, probs), Normal = dpb_na(z, probs, FALSE), RefinedNormal = dpb_na(z, probs, TRUE)) } } } } # logarithm, if required if(log) d <- log(d) # return results return(d) } #'@rdname PoissonBinomial-Distribution #'@export ppbinom <- function(x, probs, wts = NULL, method = "DivideFFT", lower.tail = TRUE, log.p = FALSE){ ## preliminary checks # number of probabilities n <- length(probs) # check if 'x' contains only integers if(!is.null(x) && any(x - round(x) != 0)){ warning("'x' should contain integers only! Using rounded off values.") x <- floor(x) } # check if 'probs' contains only probabilities if(is.null(probs) || any(is.na(probs) | probs < 0 | probs > 1)) stop("'probs' must contain real numbers between 0 and 1!") # make sure that the value of 'method' matches one of the implemented procedures method <- match.arg(method, c("DivideFFT", "Convolve", "Characteristic", "Recursive", "Mean", "GeoMean", "GeoMeanCounter", "Poisson", "Normal", "RefinedNormal")) # check if 'wts' contains only integers (zeros are allowed) if(!is.null(wts) && any(is.na(wts) | wts < 0 | abs(wts - round(wts)) > 1e-07)) stop("'wts' must contain non-negative integers!") if(!is.null(wts) && length(wts) != n) stop("'probs' and 'wts' (if not NULL) must have the same length!") ## expand 'probs' according to the counts in 'wts' # if 'wts' is NULL, set it to be a vector of ones if(is.null(wts)) wts <- rep(1, n) # expand 'probs' probs <- rep(probs, wts) # re-compute length of 'probs' (= sum of 'wts') n <- sum(wts) # if x = NULL, return all possible probabilities if(is.null(x)) x <- 0:n # identify valid 'x' values (invalid ones will have 0-probability) idx.x <- which(x >= 0 & x <= n) # select valid observations y <- x[idx.x] ## compute probabilities # vector for storing the probabilities d <- rep(as.numeric(!lower.tail), length(x)) # no computation needed, if there are no valid observations in 'x' if(length(idx.x)){ # which probabilities are 0 or 1 idx0 <- which(probs == 0) idx1 <- which(probs == 1) probs <- probs[probs > 0 & probs < 1] # number of zeros and ones n0 <- length(idx0) n1 <- length(idx1) np <- n - n0 - n1 # relevant observations idx.y <- which(y %in% n1:(n - n0)) idx.z <- which(y > n - n0) if(length(idx.y)){ z <- y[idx.y] - n1 if(np == 0){ # 'probs' contains only zeros and ones, i.e. there is only one possible observation d[idx.x][idx.y] <- if(lower.tail) 1 else 0 }else if(np == 1){ # 'probs' contains only one value that is not 0 or 1, i.e. a Bernoulli distribution d[idx.x][idx.y] <- if(lower.tail) c(1 - probs, 1)[z + 1] else c(probs, 0)[z + 1] }else{ if(all(probs == probs[1])){ # all values of 'probs' are equal, i.e. a standard binomial distribution d[idx.x][idx.y] <- pbinom(q = z, size = np, prob = probs[1], lower.tail = lower.tail) }else{ # otherwise, compute distribution according to 'method' d[idx.x][idx.y] <- switch(method, DivideFFT = ppb_dc(z, probs, lower.tail), Convolve = ppb_conv(z, probs, lower.tail), Characteristic = ppb_dftcf(z, probs, lower.tail), Recursive = ppb_rf(z, probs, lower.tail), Mean = ppb_mean(z, probs, lower.tail), GeoMean = ppb_gmba(z, probs, FALSE, lower.tail), GeoMeanCounter = ppb_gmba(z, probs, TRUE, lower.tail), Poisson = ppb_pa(z, probs, lower.tail), Normal = ppb_na(z, probs, FALSE, lower.tail), RefinedNormal = ppb_na(z, probs, TRUE, lower.tail)) # compute counter-probabilities, if necessary #if(!lower.tail) d[idx.x][idx.y] <- 1 - d[idx.x][idx.y] } } } # fill cumulative probabilities of values above the relevant range if(length(idx.z)) d[idx.x][idx.z] <- as.double(lower.tail) } # fill cumulative probabilities of values above n d[x > n] <- as.double(lower.tail) # logarithm, if required if(log.p) d <- log(d) # return results return(d) } #'@rdname PoissonBinomial-Distribution #'@importFrom stats stepfun #'@export qpbinom <- function(p, probs, wts = NULL, method = "DivideFFT", lower.tail = TRUE, log.p = FALSE){ ## preliminary checks # check if 'p' contains only probabilities if(!log.p){ if(is.null(p) || any(is.na(p) | p < 0 | p > 1)) stop("'p' must contain real numbers between 0 and 1!") }else{ if(is.null(p) || any(is.na(p) | p > 0)) stop("'p' must contain real numbers between -Inf and 0!") } # make sure that the value of 'method' matches one of the implemented procedures method <- match.arg(method, c("DivideFFT", "Convolve", "Characteristic", "Recursive", "Mean", "GeoMean", "GeoMeanCounter", "Poisson", "Normal", "RefinedNormal")) ## compute probabilities (does checking for the other variables) cdf <- ppbinom(NULL, probs, wts, method, lower.tail) # size of distribution size <- length(probs) # length of cdf #len <- length(cdf) # logarithm, if required if(log.p) p <- exp(p) ## compute quantiles # observable range and indices n0 <- sum(probs == 0) n1 <- sum(probs == 1) hi <- size - n0 range <- n1:hi #idx <- range + 1 # handle quantiles between 0 and 1 if(lower.tail) Q <- stepfun(cdf[range + 1], c(range, hi), right = TRUE) else Q <- stepfun(rev(cdf[range + 1]), c(hi, rev(range)), right = TRUE) # vector to store results res <- Q(p) # handle quantiles of 0 or 1 res[p == lower.tail] <- hi res[p == !lower.tail] <- n1 # return results return(res) } #'@rdname PoissonBinomial-Distribution #'@importFrom stats runif rbinom #'@export rpbinom <- function(n, probs, wts = NULL, method = "DivideFFT", generator = "Sample"){ len <- length(n) if(len > 1) n <- len # check if 'n' is NULL if(is.null(n)) stop("'n' must not be NULL!") # number of probabilities len <- length(probs) # check if 'probs' contains only probabilities if(is.null(probs) || any(is.na(probs) | probs < 0 | probs > 1)) stop("'probs' must contain real numbers between 0 and 1!") # check if 'wts' contains only integers (zeros are allowed) if(!is.null(wts) && any(is.na(wts) | wts < 0 | abs(wts - round(wts)) > 1e-07)) stop("'wts' must contain non-negative integers!") if(!is.null(wts) && length(wts) != len) stop("'probs' and 'wts' (if not NULL) must have the same length!") ## expand 'probs' according to the counts in 'wts' # if 'wts' is NULL, set it to be a vector of ones if(is.null(wts)) wts <- rep(1, len) # expand 'probs' probs <- rep(probs, wts) # make sure that the value of 'method' matches one of the implemented procedures method <- match.arg(method, c("DivideFFT", "Convolve", "Characteristic", "Recursive", "Mean", "GeoMean", "GeoMeanCounter", "Poisson", "Normal", "RefinedNormal")) # make sure that the value of 'generator' matches one of the implemented procedures generator <- match.arg(generator, c("Sample", "Bernoulli")) # generate random numbers res <- switch(generator, Sample = sample(0:length(probs), n, TRUE, dpbinom(NULL, probs, NULL, method)), Bernoulli = rpb_bernoulli(n, probs)) # return results return(res) }PoissonBinomial/NEWS.md0000644000176200001440000000662214077505360014472 0ustar liggesusers# PoissonBinomial 1.2.4 * Performance improvements and minor bug fix for quantile functions # PoissonBinomial 1.2.3 * Further optimizations of determining the number of splits for "DivideFFT" procedures. # PoissonBinomial 1.2.2 * Performance improvements for "Convolve" (and subsequently "DivideFFT") procedures. * GCD optimizations for generalized Poisson distributions have been moved to the respective C++ functions, so that packages that import them may benefit from them as well. * Removed dependence on `BH` package, as it was only needed for one constant in the C++ code, which has now been defined manually. * Adjustment to vignettes. Benchmarks (for performance comparisons) are now done with 51 runs (instead of 100) to accelerate building of the package. * Fixed a minor bug with `qgpbinom`. # PoissonBinomial 1.2.1 * Fixed a minor code issue that prevented compilation on Solaris systems. # PoissonBinomial 1.2.0 * Performance improvements for exact methods of `dgpbinom` and `pgpbinom`. * Input variable of the Rcpp implementations of all methods are made `const` to prevent inadvertent changes to them. Any package that imports headers must be updated. The 'Imports' field of the DESCRIPTION file should include a version requirement, i.e. PoissonBinomial (>= 1.2.0). * Added new random generation methods for `rpbinom` and `rgpbinom` (see function documentation). They are much faster than the old quantile-based inversion method, which has been removed. # PoissonBinomial 1.1.3 * Improved numerical accuracy of normal approximations of `dpbinom` and `dgpbinom`. # PoissonBinomial 1.1.2 * Bug fixes and performance improvements for `qpbinom` and `qgpbinom` that also affect `rpbinom` and `rgpbinom`. Quantiles were off be one; all code that use the quantile functions should be reviewed! * When requesting cumulative probabilities, the respective C++ implementations are now capable of computing these values for `lower.tail = FALSE` on their own, which improves accuracy. # PoissonBinomial 1.1.1 * Bug fixes in `ppbinom` and `pgpbinom` that caused incorrect calculation of logarithms and cumulative upper-tail probabilities. # PoissonBinomial 1.1.0 * Added exact and approximate algorithms for the generalized Poisson binomial distribution described in Zhang, Hong & Balakrishnan (2018). The non-generalized distribution is now referred to as the 'ordinary' Poisson binomial distribution. * Restructured vignettes. Added tables of content and fixed smaller issues. * Minor bug fixes for `dbinom`, `ppbinom` and `qpbinom` functions. # PoissonBinomial 1.0.2-1 * Fixes and improvements of the vignettes; no code changes. # PoissonBinomial 1.0.2 * Improvements of C++ helper function "norm_dpb" to achieve better normalization. * Bug fix of DFT-CF method ("Characteristic") so that negative probabilities are no longer possible. * Reworked vignette structure. * Added author acknowledgments to the Makevars.win file (original author was Geoff99 (https://github.com/Geoff99)). # PoissonBinomial 1.0.1 * Fixed a bug in the C++ helper function "norm_dpb" that could cause infinite loops (the function is invisible to the user, since it is only used in the C++ domain). # PoissonBinomial 1.0.0 * Initial release. * Added a `NEWS.md` file to track changes to the package. PoissonBinomial/MD50000644000176200001440000000364614077711332013705 0ustar liggesusers0f812024a61597b12eaac43516174bea *DESCRIPTION c21f39ea9e56c67467c59e8145e37f84 *NAMESPACE ddd6d61f123decd0d62ce8b69d88bd78 *NEWS.md 432cc31019faeeaa5b5dd84ac9438cf6 *R/PoissonBinomial.R b5344242db9b367842435a3e194ac4a2 *R/RcppExports.R 3f674d3f9d08d95a02e2a50f56719637 *R/gpbinom.R 270048308245407e0b1d0e6a96bb0403 *R/onUnload.R 24fa8cbff9892745c414756b62a35084 *R/pbinom.R 4d35102664dedc9d218dc08c5a649ef5 *R/utility.R a42d854c375f8954babc2f0b58d664b2 *build/partial.rdb 777d6f5268ae2c15dfb409310b8ea50f *build/vignette.rds 8f8685cdf6f2011bea10dddac9e180e8 *inst/doc/intro.R 3a8b09b23f79017133b0c6f80b5e6181 *inst/doc/intro.Rmd aa950679b4ff86851e8f8ecfc2ef6b0c *inst/doc/intro.html b150d7fc80a6f173bc18ef7b4155c9cc *inst/doc/proc_approx.R b6598d9d5ab9f43ad1d04f32f51c882f *inst/doc/proc_approx.Rmd 970b600a6e3e638733070c6e31af3c71 *inst/doc/proc_approx.html f51ed3d2f923d488ae353388f3f75b5c *inst/doc/proc_exact.R 75328c46d6b4679da381d0a066043a2f *inst/doc/proc_exact.Rmd 3978427b52629cd04d9de239bfe2e0aa *inst/doc/proc_exact.html e8c98e39c7a9fcb4b53cb90eb37e61cc *inst/doc/use_with_rcpp.R 1ad6b0b699e25321afccd3de142df10e *inst/doc/use_with_rcpp.Rmd d46b4d3c62d3891ce0420f1e8841ac62 *inst/doc/use_with_rcpp.html f7993d7b3903b458c35c30fdb9519634 *inst/include/PoissonBinomial.h b01b94307d422c24cde66458c1933196 *inst/include/PoissonBinomial_RcppExports.h 0f5dc2cdc27dc335dd6b6e41d8abc7b3 *man/GenPoissonBinomial-Distribution.Rd 0a0860c11cc498ff73b84b7e2bb994a0 *man/PoissonBinomial-Distribution.Rd f2b6926b460255c8f0590b5ab331026b *man/PoissonBinomial-package.Rd 1837f292513fd2ccf2f230928449178a *src/Makevars 47ead4ae58142ddd48f059009e3eb79b *src/PoissonBinomial.cpp ac089f2dc6844a4bcd1ce52ee5b1617f *src/RcppExports.cpp 3a8b09b23f79017133b0c6f80b5e6181 *vignettes/intro.Rmd b6598d9d5ab9f43ad1d04f32f51c882f *vignettes/proc_approx.Rmd 75328c46d6b4679da381d0a066043a2f *vignettes/proc_exact.Rmd 1ad6b0b699e25321afccd3de142df10e *vignettes/use_with_rcpp.Rmd PoissonBinomial/inst/0000755000176200001440000000000014077633372014350 5ustar liggesusersPoissonBinomial/inst/doc/0000755000176200001440000000000014077633372015115 5ustar liggesusersPoissonBinomial/inst/doc/proc_approx.html0000644000176200001440000046640214077633301020343 0ustar liggesusers Approximate Procedures

Approximate Procedures

Ordinary Poisson Binomial Distribution

Poisson Approximation

The Poisson Approximation (DC) approach is requested with method = "Poisson". It is based on a Poisson distribution, whose parameter is the sum of the probabilities of success.

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)

dpbinom(NULL, pp, wt, "Poisson")
#>  [1] 2.263593e-16 8.154460e-15 1.468798e-13 1.763753e-12 1.588454e-11
#>  [6] 1.144462e-10 6.871428e-10 3.536273e-09 1.592402e-08 6.373926e-08
#> [11] 2.296169e-07 7.519830e-07 2.257479e-06 6.255718e-06 1.609704e-05
#> [16] 3.865908e-05 8.704191e-05 1.844490e-04 3.691482e-04 6.999128e-04
#> [21] 1.260697e-03 2.162661e-03 3.541299e-03 5.546660e-03 8.325631e-03
#> [26] 1.199704e-02 1.662255e-02 2.217842e-02 2.853445e-02 3.544609e-02
#> [31] 4.256414e-02 4.946284e-02 5.568342e-02 6.078674e-02 6.440607e-02
#> [36] 6.629115e-02 6.633610e-02 6.458699e-02 6.122916e-02 5.655755e-02
#> [41] 5.093630e-02 4.475488e-02 3.838734e-02 3.216003e-02 2.633059e-02
#> [46] 2.107875e-02 1.650760e-02 1.265269e-02 9.495953e-03 6.981348e-03
#> [51] 5.029979e-03 3.552981e-03 2.461424e-03 1.673044e-03 1.116119e-03
#> [56] 7.310458e-04 4.702766e-04 2.972182e-04 1.846053e-04 1.127169e-04
#> [61] 6.767601e-05 9.288901e-05
ppbinom(NULL, pp, wt, "Poisson")
#>  [1] 2.263593e-16 8.380820e-15 1.552606e-13 1.919013e-12 1.780355e-11
#>  [6] 1.322498e-10 8.193925e-10 4.355666e-09 2.027968e-08 8.401894e-08
#> [11] 3.136359e-07 1.065619e-06 3.323097e-06 9.578815e-06 2.567585e-05
#> [16] 6.433494e-05 1.513768e-04 3.358259e-04 7.049740e-04 1.404887e-03
#> [21] 2.665584e-03 4.828245e-03 8.369543e-03 1.391620e-02 2.224184e-02
#> [26] 3.423887e-02 5.086142e-02 7.303984e-02 1.015743e-01 1.370204e-01
#> [31] 1.795845e-01 2.290474e-01 2.847308e-01 3.455175e-01 4.099236e-01
#> [36] 4.762147e-01 5.425508e-01 6.071378e-01 6.683670e-01 7.249245e-01
#> [41] 7.758608e-01 8.206157e-01 8.590031e-01 8.911631e-01 9.174937e-01
#> [46] 9.385724e-01 9.550800e-01 9.677327e-01 9.772287e-01 9.842100e-01
#> [51] 9.892400e-01 9.927930e-01 9.952544e-01 9.969275e-01 9.980436e-01
#> [56] 9.987746e-01 9.992449e-01 9.995421e-01 9.997267e-01 9.998394e-01
#> [61] 9.999071e-01 1.000000e+00

A comparison with exact computation shows that the approximation quality of the PA procedure increases with smaller probabilities of success. The reason is that the Poisson Binomial distribution approaches a Poisson distribution when the probabilities are very small.

set.seed(1)

# U(0, 1) random probabilities of success
pp <- runif(20)
dpbinom(NULL, pp, method = "Poisson")
#>  [1] 0.0000150619 0.0001672374 0.0009284471 0.0034362888 0.0095385726
#>  [6] 0.0211820073 0.0391985129 0.0621763578 0.0862956727 0.1064633767
#> [11] 0.1182099310 0.1193204840 0.1104046811 0.0942969970 0.0747865595
#> [16] 0.0553587178 0.0384166744 0.0250913815 0.0154776776 0.0090449448
#> [21] 0.0101904160
dpbinom(NULL, pp)
#>  [1] 4.401037e-11 7.873212e-09 3.624610e-07 7.952504e-06 1.014602e-04
#>  [6] 8.311558e-04 4.642470e-03 1.838525e-02 5.297347e-02 1.129135e-01
#> [11] 1.798080e-01 2.148719e-01 1.926468e-01 1.289706e-01 6.384266e-02
#> [16] 2.299142e-02 5.871700e-03 1.021142e-03 1.129421e-04 6.977021e-06
#> [21] 1.747603e-07
summary(dpbinom(NULL, pp, method = "Poisson") - dpbinom(NULL, pp))
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -9.555e-02  1.506e-05  9.437e-03  0.000e+00  2.407e-02  4.379e-02

# U(0, 0.01) random probabilities of success
pp <- runif(20, 0, 0.01)
dpbinom(NULL, pp, method = "Poisson")
#>  [1] 9.095763e-01 8.620639e-02 4.085167e-03 1.290592e-04 3.057942e-06
#>  [6] 5.796418e-08 9.156063e-10 1.239684e-11 1.468661e-13 1.546605e-15
#> [11] 1.465817e-17 1.262953e-19 9.974852e-22 7.272161e-24 4.923067e-26
#> [16] 3.110605e-28 1.842575e-30 1.027251e-32 5.408845e-35 2.698058e-37
#> [21] 1.284357e-39
dpbinom(NULL, pp)
#>  [1] 9.093051e-01 8.672423e-02 3.861917e-03 1.066765e-04 2.048094e-06
#>  [6] 2.902198e-08 3.145829e-10 2.667571e-12 1.794592e-14 9.656258e-17
#> [11] 4.170114e-19 1.444465e-21 3.994453e-24 8.738444e-27 1.490372e-29
#> [16] 1.938487e-32 1.859939e-35 1.249654e-38 5.381374e-42 1.245845e-45
#> [21] 9.511846e-50
summary(dpbinom(NULL, pp, method = "Poisson") - dpbinom(NULL, pp))
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -5.178e-04  0.000e+00  0.000e+00  0.000e+00  6.000e-10  2.712e-04

Arithmetic Mean Binomial Approximation

The Arithmetic Mean Binomial Approximation (AMBA) approach is requested with method = "Mean". It is based on a Binomial distribution, whose parameter is the arithmetic mean of the probabilities of success.

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)
mean(rep(pp, wt))
#> [1] 0.5905641

dpbinom(NULL, pp, wt, "Mean")
#>  [1] 2.204668e-24 1.939788e-22 8.393759e-21 2.381049e-19 4.979863e-18
#>  [6] 8.188480e-17 1.102354e-15 1.249300e-14 1.216331e-13 1.033156e-12
#> [11] 7.749086e-12 5.182139e-11 3.114432e-10 1.693217e-09 8.373498e-09
#> [16] 3.784379e-08 1.569327e-07 5.991812e-07 2.112610e-06 6.896287e-06
#> [21] 2.088890e-05 5.882491e-05 1.542694e-04 3.773093e-04 8.616897e-04
#> [26] 1.839474e-03 3.673702e-03 6.868933e-03 1.203071e-02 1.974641e-02
#> [31] 3.038072e-02 4.382068e-02 5.925587e-02 7.510979e-02 8.921887e-02
#> [36] 9.927353e-02 1.034154e-01 1.007871e-01 9.181496e-02 7.810121e-02
#> [41] 6.195859e-02 4.577391e-02 3.143980e-02 2.003761e-02 1.182352e-02
#> [46] 6.442647e-03 3.232269e-03 1.487928e-03 6.259647e-04 2.395401e-04
#> [51] 8.292214e-05 2.579729e-05 7.155695e-06 1.752667e-06 3.745215e-07
#> [56] 6.875325e-08 1.062521e-08 1.344354e-09 1.337294e-10 9.807924e-12
#> [61] 4.715599e-13 1.115034e-14
ppbinom(NULL, pp, wt, "Mean")
#>  [1] 2.204668e-24 1.961834e-22 8.589942e-21 2.466948e-19 5.226557e-18
#>  [6] 8.711136e-17 1.189465e-15 1.368247e-14 1.353155e-13 1.168472e-12
#> [11] 8.917558e-12 6.073895e-11 3.721822e-10 2.065399e-09 1.043890e-08
#> [16] 4.828268e-08 2.052154e-07 8.043966e-07 2.917007e-06 9.813294e-06
#> [21] 3.070220e-05 8.952711e-05 2.437965e-04 6.211058e-04 1.482796e-03
#> [26] 3.322270e-03 6.995972e-03 1.386490e-02 2.589561e-02 4.564203e-02
#> [31] 7.602274e-02 1.198434e-01 1.790993e-01 2.542091e-01 3.434279e-01
#> [36] 4.427015e-01 5.461169e-01 6.469040e-01 7.387189e-01 8.168201e-01
#> [41] 8.787787e-01 9.245526e-01 9.559924e-01 9.760300e-01 9.878536e-01
#> [46] 9.942962e-01 9.975285e-01 9.990164e-01 9.996424e-01 9.998819e-01
#> [51] 9.999648e-01 9.999906e-01 9.999978e-01 9.999995e-01 9.999999e-01
#> [56] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [61] 1.000000e+00 1.000000e+00

A comparison with exact computation shows that the approximation quality of the AMBA procedure increases when the probabilities of success are closer to each other. The reason is that, although the expectation remains unchanged, the distribution’s variance becomes smaller the less the probabilities differ. Since this variance is minimized by equal probabilities (but still underestimated), the AMBA method is best suited for situations with very similar probabilities of success.

set.seed(1)

# U(0, 1) random probabilities of success
pp <- runif(20)
dpbinom(NULL, pp, method = "Mean")
#>  [1] 9.203176e-08 2.297178e-06 2.723611e-05 2.039497e-04 1.081780e-03
#>  [6] 4.320318e-03 1.347977e-02 3.364646e-02 6.823695e-02 1.135495e-01
#> [11] 1.558851e-01 1.768638e-01 1.655492e-01 1.271454e-01 7.934094e-02
#> [16] 3.960811e-02 1.544760e-02 4.536271e-03 9.435709e-04 1.239589e-04
#> [21] 7.735255e-06
dpbinom(NULL, pp)
#>  [1] 4.401037e-11 7.873212e-09 3.624610e-07 7.952504e-06 1.014602e-04
#>  [6] 8.311558e-04 4.642470e-03 1.838525e-02 5.297347e-02 1.129135e-01
#> [11] 1.798080e-01 2.148719e-01 1.926468e-01 1.289706e-01 6.384266e-02
#> [16] 2.299142e-02 5.871700e-03 1.021142e-03 1.129421e-04 6.977021e-06
#> [21] 1.747603e-07
summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp))
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -3.801e-02  2.290e-06  6.360e-04  0.000e+00  8.837e-03  1.662e-02

# U(0.3, 0.5) random probabilities of success
pp <- runif(20, 0.3, 0.5)
dpbinom(NULL, pp, method = "Mean")
#>  [1] 4.348271e-05 5.672598e-04 3.515127e-03 1.375712e-02 3.813748e-02
#>  [6] 7.960444e-02 1.298114e-01 1.693472e-01 1.795010e-01 1.561137e-01
#> [11] 1.120132e-01 6.642197e-02 3.249439e-02 1.304339e-02 4.253984e-03
#> [16] 1.109919e-03 2.262438e-04 3.472347e-05 3.774915e-06 2.591904e-07
#> [21] 8.453263e-09
dpbinom(NULL, pp)
#>  [1] 4.015121e-05 5.344728e-04 3.370391e-03 1.338738e-02 3.756479e-02
#>  [6] 7.915145e-02 1.299445e-01 1.702071e-01 1.806555e-01 1.569062e-01
#> [11] 1.121277e-01 6.604356e-02 3.200604e-02 1.269255e-02 4.078679e-03
#> [16] 1.045709e-03 2.088926e-04 3.133484e-05 3.320483e-06 2.216332e-07
#> [21] 7.008006e-09
summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp))
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -1.155e-03  1.400e-09  1.735e-05  0.000e+00  3.508e-04  5.727e-04

# U(0.39, 0.41) random probabilities of success
pp <- runif(20, 0.39, 0.41)
dpbinom(NULL, pp, method = "Mean")
#>  [1] 3.638616e-05 4.854405e-04 3.076305e-03 1.231262e-02 3.490673e-02
#>  [6] 7.451247e-02 1.242621e-01 1.657824e-01 1.797056e-01 1.598344e-01
#> [11] 1.172824e-01 7.112295e-02 3.558286e-02 1.460687e-02 4.871885e-03
#> [16] 1.299951e-03 2.709859e-04 4.253314e-05 4.728746e-06 3.320414e-07
#> [21] 1.107470e-08
dpbinom(NULL, pp)
#>  [1] 3.636149e-05 4.851935e-04 3.075192e-03 1.230970e-02 3.490204e-02
#>  [6] 7.450845e-02 1.242626e-01 1.657891e-01 1.797153e-01 1.598415e-01
#> [11] 1.172840e-01 7.112011e-02 3.557873e-02 1.460374e-02 4.870251e-03
#> [16] 1.299328e-03 2.708111e-04 4.249771e-05 4.723809e-06 3.316172e-07
#> [21] 1.105772e-08
summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp))
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -9.641e-06  1.700e-11  1.747e-07  0.000e+00  2.844e-06  4.689e-06

Geometric Mean Binomial Approximation - Variant A

The Geometric Mean Binomial Approximation (Variant A) (GMBA-A) approach is requested with method = "GeoMean". It is based on a Binomial distribution, whose parameter is the geometric mean of the probabilities of success: \[\hat{p} = \sqrt[n]{p_1 \cdot ... \cdot p_n}\]

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)
prod(rep(pp, wt))^(1/sum(wt))
#> [1] 0.4669916

dpbinom(NULL, pp, wt, "GeoMean")
#>  [1] 2.141782e-17 1.144670e-15 3.008684e-14 5.184208e-13 6.586057e-12
#>  [6] 6.578175e-11 5.379195e-10 3.703028e-09 2.189958e-08 1.129911e-07
#> [11] 5.147813e-07 2.091103e-06 7.633772e-06 2.520966e-05 7.572779e-05
#> [16] 2.078916e-04 5.236606e-04 1.214475e-03 2.601021e-03 5.157435e-03
#> [21] 9.489168e-03 1.623184e-02 2.585712e-02 3.841422e-02 5.328923e-02
#> [26] 6.909972e-02 8.382634e-02 9.520502e-02 1.012875e-01 1.009827e-01
#> [31] 9.437363e-02 8.268481e-02 6.791600e-02 5.229152e-02 3.772988e-02
#> [36] 2.550094e-02 1.613623e-02 9.552467e-03 5.285892e-03 2.731219e-03
#> [41] 1.316117e-03 5.906156e-04 2.464113e-04 9.539397e-05 3.419132e-05
#> [46] 1.131690e-05 3.448772e-06 9.643463e-07 2.464308e-07 5.728188e-08
#> [51] 1.204491e-08 2.276152e-09 3.835067e-10 5.705775e-11 7.406038e-12
#> [56] 8.258409e-13 7.752374e-14 5.958061e-15 3.600079e-16 1.603823e-17
#> [61] 4.683928e-19 6.727527e-21
ppbinom(NULL, pp, wt, "GeoMean")
#>  [1] 2.141782e-17 1.166088e-15 3.125293e-14 5.496737e-13 7.135731e-12
#>  [6] 7.291748e-11 6.108370e-10 4.313865e-09 2.621345e-08 1.392046e-07
#> [11] 6.539859e-07 2.745088e-06 1.037886e-05 3.558852e-05 1.113163e-04
#> [16] 3.192079e-04 8.428685e-04 2.057343e-03 4.658364e-03 9.815799e-03
#> [21] 1.930497e-02 3.553681e-02 6.139393e-02 9.980815e-02 1.530974e-01
#> [26] 2.221971e-01 3.060234e-01 4.012285e-01 5.025160e-01 6.034986e-01
#> [31] 6.978723e-01 7.805571e-01 8.484731e-01 9.007646e-01 9.384945e-01
#> [36] 9.639954e-01 9.801316e-01 9.896841e-01 9.949700e-01 9.977012e-01
#> [41] 9.990173e-01 9.996080e-01 9.998544e-01 9.999498e-01 9.999840e-01
#> [46] 9.999953e-01 9.999987e-01 9.999997e-01 9.999999e-01 1.000000e+00
#> [51] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [56] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [61] 1.000000e+00 1.000000e+00

It is known that the geometric mean of the probabilities of success is always smaller than their arithmetic mean. Thus, we get a stochastically smaller binomial distribution. A comparison with exact computation shows that the approximation quality of the GMBA-A procedure increases when the probabilities of success are closer to each other:

set.seed(1)

# U(0, 1) random probabilities of success
pp <- runif(20)
dpbinom(NULL, pp, method = "GeoMean")
#>  [1] 4.557123e-06 7.742984e-05 6.249130e-04 3.185359e-03 1.150098e-02
#>  [6] 3.126602e-02 6.640491e-02 1.128282e-01 1.557610e-01 1.764351e-01
#> [11] 1.648790e-01 1.273387e-01 8.113517e-02 4.241734e-02 1.801777e-02
#> [16] 6.122779e-03 1.625497e-03 3.249263e-04 4.600672e-05 4.114199e-06
#> [21] 1.747603e-07
dpbinom(NULL, pp)
#>  [1] 4.401037e-11 7.873212e-09 3.624610e-07 7.952504e-06 1.014602e-04
#>  [6] 8.311558e-04 4.642470e-03 1.838525e-02 5.297347e-02 1.129135e-01
#> [11] 1.798080e-01 2.148719e-01 1.926468e-01 1.289706e-01 6.384266e-02
#> [16] 2.299142e-02 5.871700e-03 1.021142e-03 1.129421e-04 6.977021e-06
#> [21] 1.747603e-07
summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp))
#>     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
#> -0.11151 -0.01493  0.00000  0.00000  0.01140  0.10279

# U(0.4, 0.6) random probabilities of success
pp <- runif(20, 0.4, 0.6)
dpbinom(NULL, pp, method = "GeoMean")
#>  [1] 1.317886e-06 2.551200e-05 2.345875e-04 1.362363e-03 5.604265e-03
#>  [6] 1.735823e-02 4.200318e-02 8.131092e-02 1.278907e-01 1.650496e-01
#> [11] 1.757292e-01 1.546280e-01 1.122499e-01 6.686047e-02 3.235759e-02
#> [16] 1.252775e-02 3.789307e-03 8.629936e-04 1.392173e-04 1.418425e-05
#> [21] 6.864565e-07
dpbinom(NULL, pp)
#>  [1] 1.046635e-06 2.098187e-05 1.993006e-04 1.192678e-03 5.043114e-03
#>  [6] 1.601621e-02 3.964022e-02 7.829406e-02 1.253351e-01 1.642218e-01
#> [11] 1.770816e-01 1.574210e-01 1.151700e-01 6.896627e-02 3.347297e-02
#> [16] 1.296524e-02 3.913788e-03 8.873960e-04 1.421738e-04 1.435144e-05
#> [21] 6.864565e-07
summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp))
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -0.0029201 -0.0004375  0.0000000  0.0000000  0.0005612  0.0030169

# U(0.49, 0.51) random probabilities of success
pp <- runif(20, 0.49, 0.51)
dpbinom(NULL, pp, method = "GeoMean")
#>  [1] 9.491177e-07 1.899145e-05 1.805052e-04 1.083550e-03 4.607292e-03
#>  [6] 1.475040e-02 3.689366e-02 7.382266e-02 1.200193e-01 1.601024e-01
#> [11] 1.761970e-01 1.602558e-01 1.202494e-01 7.403508e-02 3.703527e-02
#> [16] 1.482120e-02 4.633845e-03 1.090839e-03 1.818935e-04 1.915586e-05
#> [21] 9.582517e-07
dpbinom(NULL, pp)
#>  [1] 9.472606e-07 1.895984e-05 1.802539e-04 1.082315e-03 4.603107e-03
#>  [6] 1.474011e-02 3.687497e-02 7.379784e-02 1.199969e-01 1.600932e-01
#> [11] 1.762060e-01 1.602781e-01 1.202742e-01 7.405383e-02 3.704562e-02
#> [16] 1.482542e-02 4.635093e-03 1.091093e-03 1.819256e-04 1.915775e-05
#> [21] 9.582517e-07
summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp))
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -2.485e-05 -4.219e-06  0.000e+00  0.000e+00  4.185e-06  2.482e-05

Geometric Mean Binomial Approximation - Variant B

The Geometric Mean Binomial Approximation (Variant B) (GMBA-B) approach is requested with method = "GeoMeanCounter". It is based on a Binomial distribution, whose parameter is 1 minus the geometric mean of the probabilities of failure: \[\hat{p} = 1 - \sqrt[n]{(1 - p_1) \cdot ... \cdot (1 - p_n)}\]

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)
1 - prod(1 - rep(pp, wt))^(1/sum(wt))
#> [1] 0.7275426

dpbinom(NULL, pp, wt, "GeoMeanCounter")
#>  [1] 3.574462e-35 5.822379e-33 4.664248e-31 2.449471e-29 9.484189e-28
#>  [6] 2.887121e-26 7.195512e-25 1.509685e-23 2.721134e-22 4.279009e-21
#> [11] 5.941642e-20 7.356037e-19 8.184508e-18 8.237686e-17 7.541858e-16
#> [16] 6.310225e-15 4.844429e-14 3.424255e-13 2.235148e-12 1.350769e-11
#> [21] 7.574609e-11 3.948978e-10 1.917264e-09 8.681177e-09 3.670379e-08
#> [26] 1.450549e-07 5.363170e-07 1.856461e-06 6.019586e-06 1.829121e-05
#> [31] 5.209921e-05 1.391205e-04 3.482749e-04 8.172712e-04 1.797236e-03
#> [36] 3.702208e-03 7.139892e-03 1.288219e-02 2.172588e-02 3.421374e-02
#> [41] 5.024851e-02 6.872559e-02 8.738947e-02 1.031108e-01 1.126377e-01
#> [46] 1.136267e-01 1.055364e-01 8.994057e-02 7.004907e-02 4.962603e-02
#> [51] 3.180393e-02 1.831737e-02 9.406320e-03 4.265268e-03 1.687339e-03
#> [56] 5.734528e-04 1.640669e-04 3.843049e-05 7.077304e-06 9.609416e-07
#> [61] 8.553338e-08 3.744258e-09
ppbinom(NULL, pp, wt, "GeoMeanCounter")
#>  [1] 3.574462e-35 5.858123e-33 4.722829e-31 2.496699e-29 9.733859e-28
#>  [6] 2.984460e-26 7.493958e-25 1.584624e-23 2.879597e-22 4.566969e-21
#> [11] 6.398339e-20 7.995871e-19 8.984095e-18 9.136095e-17 8.455467e-16
#> [16] 7.155772e-15 5.560007e-14 3.980256e-13 2.633173e-12 1.614086e-11
#> [21] 9.188695e-11 4.867847e-10 2.404049e-09 1.108523e-08 4.778901e-08
#> [26] 1.928440e-07 7.291610e-07 2.585622e-06 8.605207e-06 2.689642e-05
#> [31] 7.899562e-05 2.181161e-04 5.663910e-04 1.383662e-03 3.180899e-03
#> [36] 6.883107e-03 1.402300e-02 2.690519e-02 4.863107e-02 8.284481e-02
#> [41] 1.330933e-01 2.018189e-01 2.892084e-01 3.923192e-01 5.049569e-01
#> [46] 6.185836e-01 7.241200e-01 8.140606e-01 8.841097e-01 9.337357e-01
#> [51] 9.655396e-01 9.838570e-01 9.932633e-01 9.975286e-01 9.992159e-01
#> [56] 9.997894e-01 9.999534e-01 9.999919e-01 9.999989e-01 9.999999e-01
#> [61] 1.000000e+00 1.000000e+00

It is known that the geometric mean of the probabilities of failure is always smaller than their arithmetic mean. As a result, 1 minus the geometric mean is larger than 1 minus the arithmetic mean. Thus, we get a stochastically larger binomial distribution. A comparison with exact computation shows that the approximation quality of the GMBA-B procedure again increases when the probabilities of success are closer to each other:

set.seed(1)

# U(0, 1) random probabilities of success
pp <- runif(20)
dpbinom(NULL, pp, method = "GeoMeanCounter")
#>  [1] 4.401037e-11 2.019854e-09 4.403304e-08 6.062685e-07 5.912743e-06
#>  [6] 4.341843e-05 2.490859e-04 1.143179e-03 4.262876e-03 1.304297e-02
#> [11] 3.292337e-02 6.868258e-02 1.182069e-01 1.669263e-01 1.915269e-01
#> [16] 1.758024e-01 1.260695e-01 6.807004e-02 2.603394e-02 6.288561e-03
#> [21] 7.215333e-04
dpbinom(NULL, pp)
#>  [1] 4.401037e-11 7.873212e-09 3.624610e-07 7.952504e-06 1.014602e-04
#>  [6] 8.311558e-04 4.642470e-03 1.838525e-02 5.297347e-02 1.129135e-01
#> [11] 1.798080e-01 2.148719e-01 1.926468e-01 1.289706e-01 6.384266e-02
#> [16] 2.299142e-02 5.871700e-03 1.021142e-03 1.129421e-04 6.977021e-06
#> [21] 1.747603e-07
summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp))
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -1.469e-01 -1.724e-02 -3.200e-07  0.000e+00  2.592e-02  1.528e-01

# U(0.4, 0.6) random probabilities of success
pp <- runif(20, 0.4, 0.6)
dpbinom(NULL, pp, method = "GeoMeanCounter")
#>  [1] 1.046635e-06 2.073844e-05 1.951870e-04 1.160254e-03 4.885321e-03
#>  [6] 1.548796e-02 3.836059e-02 7.600922e-02 1.223688e-01 1.616443e-01
#> [11] 1.761588e-01 1.586582e-01 1.178895e-01 7.187414e-02 3.560358e-02
#> [16] 1.410928e-02 4.368234e-03 1.018282e-03 1.681387e-04 1.753458e-05
#> [21] 8.685930e-07
dpbinom(NULL, pp)
#>  [1] 1.046635e-06 2.098187e-05 1.993006e-04 1.192678e-03 5.043114e-03
#>  [6] 1.601621e-02 3.964022e-02 7.829406e-02 1.253351e-01 1.642218e-01
#> [11] 1.770816e-01 1.574210e-01 1.151700e-01 6.896627e-02 3.347297e-02
#> [16] 1.296524e-02 3.913788e-03 8.873960e-04 1.421738e-04 1.435144e-05
#> [21] 6.864565e-07
summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp))
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -0.0029663 -0.0005283  0.0000000  0.0000000  0.0004544  0.0029079

# U(0.49, 0.51) random probabilities of success
pp <- runif(20, 0.49, 0.51)
dpbinom(NULL, pp, method = "GeoMeanCounter")
#>  [1] 9.472606e-07 1.895800e-05 1.802225e-04 1.082065e-03 4.601880e-03
#>  [6] 1.473596e-02 3.686475e-02 7.377926e-02 1.199722e-01 1.600709e-01
#> [11] 1.761969e-01 1.602871e-01 1.202964e-01 7.407854e-02 3.706427e-02
#> [16] 1.483571e-02 4.639289e-03 1.092334e-03 1.821786e-04 1.918963e-05
#> [21] 9.601293e-07
dpbinom(NULL, pp)
#>  [1] 9.472606e-07 1.895984e-05 1.802539e-04 1.082315e-03 4.603107e-03
#>  [6] 1.474011e-02 3.687497e-02 7.379784e-02 1.199969e-01 1.600932e-01
#> [11] 1.762060e-01 1.602781e-01 1.202742e-01 7.405383e-02 3.704562e-02
#> [16] 1.482542e-02 4.635093e-03 1.091093e-03 1.819256e-04 1.915775e-05
#> [21] 9.582517e-07
summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp))
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -2.467e-05 -4.159e-06  0.000e+00  0.000e+00  4.196e-06  2.470e-05

Normal Approximation

The Normal Approximation (NA) approach is requested with method = "Normal". It is based on a Normal distribution, whose parameters are derived from the theoretical mean and variance of the input probabilities of success.

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)

dpbinom(NULL, pp, wt, "Normal")
#>  [1] 2.552770e-32 1.207834e-30 5.219650e-29 2.022022e-27 7.021785e-26
#>  [6] 2.185917e-24 6.100302e-23 1.526188e-21 3.423032e-20 6.882841e-19
#> [11] 1.240755e-17 2.005270e-16 2.905604e-15 3.774712e-14 4.396661e-13
#> [16] 4.591569e-12 4.299381e-11 3.609645e-10 2.717342e-09 1.834224e-08
#> [21] 1.110185e-07 6.025326e-07 2.932337e-06 1.279682e-05 5.007841e-05
#> [26] 1.757379e-04 5.530339e-04 1.560683e-03 3.949650e-03 8.963710e-03
#> [31] 1.824341e-02 3.329786e-02 5.450317e-02 8.000636e-02 1.053238e-01
#> [36] 1.243451e-01 1.316535e-01 1.250080e-01 1.064497e-01 8.129267e-02
#> [41] 5.567468e-02 3.419491e-02 1.883477e-02 9.303614e-03 4.121280e-03
#> [46] 1.637186e-03 5.832371e-04 1.863241e-04 5.337829e-05 1.371282e-05
#> [51] 3.159002e-06 6.525712e-07 1.208800e-07 2.007813e-08 2.990389e-09
#> [56] 3.993563e-10 4.782059e-11 5.134327e-12 4.942641e-13 4.266130e-14
#> [61] 3.301422e-15 2.441468e-16
ppbinom(NULL, pp, wt, "Normal")
#>  [1] 2.552770e-32 1.233362e-30 5.342987e-29 2.075452e-27 7.229330e-26
#>  [6] 2.258210e-24 6.326123e-23 1.589449e-21 3.581977e-20 7.241039e-19
#> [11] 1.313165e-17 2.136587e-16 3.119262e-15 4.086639e-14 4.805325e-13
#> [16] 5.072102e-12 4.806591e-11 4.090305e-10 3.126373e-09 2.146861e-08
#> [21] 1.324871e-07 7.350197e-07 3.667357e-06 1.646417e-05 6.654258e-05
#> [26] 2.422805e-04 7.953144e-04 2.355997e-03 6.305647e-03 1.526936e-02
#> [31] 3.351276e-02 6.681062e-02 1.213138e-01 2.013201e-01 3.066439e-01
#> [36] 4.309891e-01 5.626426e-01 6.876506e-01 7.941003e-01 8.753930e-01
#> [41] 9.310676e-01 9.652625e-01 9.840973e-01 9.934009e-01 9.975222e-01
#> [46] 9.991594e-01 9.997426e-01 9.999290e-01 9.999823e-01 9.999960e-01
#> [51] 9.999992e-01 9.999999e-01 1.000000e+00 1.000000e+00 1.000000e+00
#> [56] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [61] 1.000000e+00 1.000000e+00

A comparison with exact computation shows that the approximation quality of the NA procedure increases with larger numbers of probabilities of success:

set.seed(1)

# 10 random probabilities of success
pp <- runif(10)
dpn <- dpbinom(NULL, pp, method = "Normal")
dpd <- dpbinom(NULL, pp)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -0.0053305 -0.0010422  0.0005271  0.0000000  0.0016579  0.0026553

# 1000 random probabilities of success
pp <- runif(1000)
dpn <- dpbinom(NULL, pp, method = "Normal")
dpd <- dpbinom(NULL, pp)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -8.412e-06  0.000e+00  0.000e+00  0.000e+00  0.000e+00  3.815e-06

# 100000 random probabilities of success
pp <- runif(100000)
dpn <- dpbinom(NULL, pp, method = "Normal")
dpd <- dpbinom(NULL, pp)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -4.484e-09  0.000e+00  8.720e-13  0.000e+00  4.914e-10  2.734e-09

Refined Normal Approximation

The Refined Normal Approximation (RNA) approach is requested with method = "RefinedNormal". It is based on a Normal distribution, whose parameters are derived from the theoretical mean, variance and skewness of the input probabilities of success.

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)

dpbinom(NULL, pp, wt, "RefinedNormal")
#>  [1] 2.579548e-31 1.128297e-29 4.507210e-28 1.611452e-26 5.156486e-25
#>  [6] 1.476806e-23 3.785627e-22 8.685911e-21 1.783953e-19 3.280039e-18
#> [11] 5.399492e-17 7.959230e-16 1.050796e-14 1.242802e-13 1.317210e-12
#> [16] 1.251531e-11 1.066498e-10 8.155390e-10 5.599786e-09 3.455053e-08
#> [21] 1.917106e-07 9.574753e-07 4.308224e-06 1.748069e-05 6.401569e-05
#> [26] 2.117447e-04 6.329842e-04 1.710740e-03 4.180480e-03 9.234968e-03
#> [31] 1.843341e-02 3.322175e-02 5.401115e-02 7.912655e-02 1.043358e-01
#> [36] 1.236782e-01 1.316360e-01 1.256489e-01 1.074322e-01 8.218619e-02
#> [41] 5.618825e-02 3.428872e-02 1.865323e-02 9.032795e-03 3.886960e-03
#> [46] 1.483178e-03 5.004545e-04 1.487517e-04 3.873113e-05 8.757189e-06
#> [51] 1.693868e-06 2.722346e-07 3.388544e-08 2.218356e-09 0.000000e+00
#> [56] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#> [61] 0.000000e+00 0.000000e+00
ppbinom(NULL, pp, wt, "RefinedNormal")
#>  [1] 2.579548e-31 1.154092e-29 4.622620e-28 1.657678e-26 5.322254e-25
#>  [6] 1.530028e-23 3.938629e-22 9.079774e-21 1.874750e-19 3.467514e-18
#> [11] 5.746244e-17 8.533855e-16 1.136134e-14 1.356415e-13 1.452852e-12
#> [16] 1.396817e-11 1.206179e-10 9.361569e-10 6.535943e-09 4.108647e-08
#> [21] 2.327971e-07 1.190272e-06 5.498496e-06 2.297918e-05 8.699487e-05
#> [26] 2.987396e-04 9.317238e-04 2.642463e-03 6.822944e-03 1.605791e-02
#> [31] 3.449132e-02 6.771307e-02 1.217242e-01 2.008508e-01 3.051866e-01
#> [36] 4.288648e-01 5.605008e-01 6.861497e-01 7.935820e-01 8.757682e-01
#> [41] 9.319564e-01 9.662451e-01 9.848984e-01 9.939312e-01 9.978181e-01
#> [46] 9.993013e-01 9.998018e-01 9.999505e-01 9.999892e-01 9.999980e-01
#> [51] 9.999997e-01 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [56] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [61] 1.000000e+00 1.000000e+00

A comparison with exact computation shows that the approximation quality of the RNA procedure increases with larger numbers of probabilities of success:

set.seed(1)

# 10 random probabilities of success
pp <- runif(10)
dpn <- dpbinom(NULL, pp, method = "RefinedNormal")
dpd <- dpbinom(NULL, pp)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -0.0039538 -0.0006920  0.0003543  0.0000000  0.0017167  0.0023597

# 1000 random probabilities of success
pp <- runif(1000)
dpn <- dpbinom(NULL, pp, method = "RefinedNormal")
dpd <- dpbinom(NULL, pp)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -2.974e-06  0.000e+00  0.000e+00  0.000e+00  0.000e+00  2.270e-06

# 100000 random probabilities of success
pp <- runif(100000)
dpn <- dpbinom(NULL, pp, method = "RefinedNormal")
dpd <- dpbinom(NULL, pp)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -3.126e-09  0.000e+00  6.200e-13  0.000e+00  4.616e-10  2.293e-09

Processing Speed Comparisons

To assess the performance of the approximation procedures, we use the microbenchmark package. Each algorithm has to calculate the PMF repeatedly based on random probability vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2).

library(microbenchmark)
set.seed(1)

f1 <- function() dpbinom(NULL, runif(4000), method = "Normal")
f2 <- function() dpbinom(NULL, runif(4000), method = "RefinedNormal")
f3 <- function() dpbinom(NULL, runif(4000), method = "Poisson")
f4 <- function() dpbinom(NULL, runif(4000), method = "Mean")
f5 <- function() dpbinom(NULL, runif(4000), method = "GeoMean")
f6 <- function() dpbinom(NULL, runif(4000), method = "GeoMeanCounter")
f7 <- function() dpbinom(NULL, runif(4000), method = "DivideFFT")

microbenchmark(f1(), f2(), f3(), f4(), f5(), f6(), f7(), times = 51)
#> Unit: microseconds
#>  expr    min      lq      mean median      uq     max neval
#>  f1()  639.8  650.30  746.0882  663.7  695.20  2041.1    51
#>  f2()  789.9  803.95  886.6608  815.4  845.05  2190.7    51
#>  f3()  690.4  695.10  810.5333  711.0  739.75  4181.5    51
#>  f4() 1080.6 1085.95 1160.2745 1111.6 1158.40  2007.5    51
#>  f5() 1150.5 1160.80 1266.4745 1187.6 1239.90  2689.5    51
#>  f6() 1171.7 1176.85 1266.8588 1202.4 1259.85  2610.1    51
#>  f7() 9133.3 9309.05 9660.7471 9408.4 9822.85 13330.0    51

Clearly, the NA procedure is the fastest, followed by the RNA and PA methods. The next fastest algorithms are AMBA, GMBA-A and GMBA-B. They exhibit almost equal mean execution speed, with the AMBA algorithm being slightly faster. All of the approximation procedures outperform the fastest exact approach, DC-FFT, by far.

Generalized Poisson Binomial Distribution

Generalized Normal Approximation

The Generalized Normal Approximation (G-NA) approach is requested with method = "Normal". It is based on a Normal distribution, whose parameters are derived from the theoretical mean and variance of the input probabilities of success (see Introduction.

set.seed(2)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)
va <- sample(0:10, 10, TRUE)
vb <- sample(0:10, 10, TRUE)

dgpbinom(NULL, pp, va, vb, wt, "Normal")
#>   [1] 5.607923e-34 8.868899e-34 2.266907e-33 5.759009e-33 1.454159e-32
#>   [6] 3.649437e-32 9.103112e-32 2.256856e-31 5.561194e-31 1.362016e-30
#>  [11] 3.315478e-30 8.021587e-30 1.928965e-29 4.610400e-29 1.095224e-28
#>  [16] 2.585931e-28 6.068497e-28 1.415453e-27 3.281403e-27 7.560907e-27
#>  [21] 1.731562e-26 3.941418e-26 8.916960e-26 2.005077e-25 4.481212e-25
#>  [26] 9.954281e-25 2.197730e-24 4.822684e-24 1.051849e-23 2.280173e-23
#>  [31] 4.912836e-23 1.052075e-22 2.239296e-22 4.737247e-22 9.960718e-22
#>  [36] 2.081639e-21 4.323844e-21 8.926573e-21 1.831680e-20 3.735634e-20
#>  [41] 7.572323e-20 1.525612e-19 3.054984e-19 6.080284e-19 1.202787e-18
#>  [46] 2.364851e-18 4.621350e-18 8.976023e-18 1.732802e-17 3.324790e-17
#>  [51] 6.340586e-17 1.201834e-16 2.264174e-16 4.239603e-16 7.890246e-16
#>  [56] 1.459506e-15 2.683313e-15 4.903282e-15 8.905378e-15 1.607563e-14
#>  [61] 2.884254e-14 5.143387e-14 9.116221e-14 1.605945e-13 2.811877e-13
#>  [66] 4.893417e-13 8.464047e-13 1.455104e-12 2.486337e-12 4.222561e-12
#>  [71] 7.127579e-12 1.195799e-11 1.993996e-11 3.304764e-11 5.443857e-11
#>  [76] 8.912982e-11 1.450405e-10 2.345880e-10 3.771137e-10 6.025440e-10
#>  [81] 9.568753e-10 1.510330e-09 2.369401e-09 3.694497e-09 5.725614e-09
#>  [86] 8.819398e-09 1.350224e-08 2.054578e-08 3.107347e-08 4.670967e-08
#>  [91] 6.978689e-08 1.036313e-07 1.529531e-07 2.243755e-07 3.271469e-07
#>  [96] 4.740893e-07 6.828536e-07 9.775638e-07 1.390954e-06 1.967117e-06
#> [101] 2.765018e-06 3.862920e-06 5.363935e-06 7.402890e-06 1.015475e-05
#> [106] 1.384482e-05 1.876097e-05 2.526814e-05 3.382528e-05 4.500488e-05
#> [111] 5.951520e-05 7.822512e-05 1.021915e-04 1.326884e-04 1.712386e-04
#> [116] 2.196444e-04 2.800198e-04 3.548195e-04 4.468649e-04 5.593647e-04
#> [121] 6.959275e-04 8.605635e-04 1.057674e-03 1.292025e-03 1.568701e-03
#> [126] 1.893038e-03 2.270537e-03 2.706749e-03 3.207136e-03 3.776912e-03
#> [131] 4.420856e-03 5.143112e-03 5.946968e-03 6.834635e-03 7.807017e-03
#> [136] 8.863494e-03 1.000172e-02 1.121747e-02 1.250446e-02 1.385431e-02
#> [141] 1.525651e-02 1.669842e-02 1.816543e-02 1.964112e-02 2.110749e-02
#> [146] 2.254536e-02 2.393468e-02 2.525505e-02 2.648616e-02 2.760831e-02
#> [151] 2.860294e-02 2.945314e-02 3.014411e-02 3.066363e-02 3.100235e-02
#> [156] 3.115414e-02 3.111624e-02 3.088932e-02 3.047753e-02 2.988830e-02
#> [161] 2.913216e-02 2.822242e-02 2.717477e-02 2.600684e-02 2.473770e-02
#> [166] 2.338736e-02 2.197622e-02 2.052462e-02 1.905228e-02 1.757799e-02
#> [171] 1.611912e-02 1.469141e-02 1.330871e-02 1.198280e-02 1.072335e-02
#> [176] 9.537908e-03 8.431904e-03 7.408807e-03 6.470249e-03 5.616215e-03
#> [181] 4.845254e-03 4.154698e-03 3.540890e-03 2.999407e-03 2.525274e-03
#> [186] 2.113156e-03 1.757538e-03 1.452874e-03 1.193717e-03 9.748208e-04
#> [191] 7.912218e-04 6.382955e-04 5.117942e-04 4.078674e-04 3.230671e-04
#> [196] 2.543411e-04 1.990171e-04 1.547798e-04 1.196432e-04 9.192046e-05
#> [201] 7.019178e-05 5.327340e-05 4.018691e-05 3.013068e-05 2.245346e-05
#> [206] 1.663059e-05 1.224284e-05 8.957907e-06 6.514501e-06 1.614725e-05
pgpbinom(NULL, pp, va, vb, wt, "Normal")
#>   [1] 5.607923e-34 1.447682e-33 3.714589e-33 9.473598e-33 2.401518e-32
#>   [6] 6.050955e-32 1.515407e-31 3.772263e-31 9.333457e-31 2.295361e-30
#>  [11] 5.610840e-30 1.363243e-29 3.292208e-29 7.902608e-29 1.885484e-28
#>  [16] 4.471416e-28 1.053991e-27 2.469444e-27 5.750847e-27 1.331175e-26
#>  [21] 3.062738e-26 7.004156e-26 1.592112e-25 3.597189e-25 8.078401e-25
#>  [26] 1.803268e-24 4.000998e-24 8.823682e-24 1.934217e-23 4.214390e-23
#>  [31] 9.127226e-23 1.964798e-22 4.204093e-22 8.941340e-22 1.890206e-21
#>  [36] 3.971844e-21 8.295689e-21 1.722226e-20 3.553906e-20 7.289540e-20
#>  [41] 1.486186e-19 3.011798e-19 6.066782e-19 1.214707e-18 2.417494e-18
#>  [46] 4.782345e-18 9.403695e-18 1.837972e-17 3.570774e-17 6.895564e-17
#>  [51] 1.323615e-16 2.525449e-16 4.789624e-16 9.029227e-16 1.691947e-15
#>  [56] 3.151453e-15 5.834767e-15 1.073805e-14 1.964343e-14 3.571905e-14
#>  [61] 6.456159e-14 1.159955e-13 2.071577e-13 3.677521e-13 6.489399e-13
#>  [66] 1.138282e-12 1.984686e-12 3.439790e-12 5.926127e-12 1.014869e-11
#>  [71] 1.727627e-11 2.923425e-11 4.917421e-11 8.222186e-11 1.366604e-10
#>  [76] 2.257903e-10 3.708308e-10 6.054188e-10 9.825325e-10 1.585076e-09
#>  [81] 2.541952e-09 4.052282e-09 6.421683e-09 1.011618e-08 1.584179e-08
#>  [86] 2.466119e-08 3.816343e-08 5.870922e-08 8.978268e-08 1.364924e-07
#>  [91] 2.062792e-07 3.099106e-07 4.628636e-07 6.872392e-07 1.014386e-06
#>  [96] 1.488475e-06 2.171329e-06 3.148893e-06 4.539847e-06 6.506964e-06
#> [101] 9.271982e-06 1.313490e-05 1.849884e-05 2.590173e-05 3.605648e-05
#> [106] 4.990129e-05 6.866226e-05 9.393040e-05 1.277557e-04 1.727606e-04
#> [111] 2.322758e-04 3.105009e-04 4.126924e-04 5.453808e-04 7.166194e-04
#> [116] 9.362638e-04 1.216284e-03 1.571103e-03 2.017968e-03 2.577333e-03
#> [121] 3.273260e-03 4.133824e-03 5.191498e-03 6.483523e-03 8.052224e-03
#> [126] 9.945263e-03 1.221580e-02 1.492255e-02 1.812968e-02 2.190660e-02
#> [131] 2.632745e-02 3.147056e-02 3.741753e-02 4.425217e-02 5.205918e-02
#> [136] 6.092268e-02 7.092440e-02 8.214187e-02 9.464633e-02 1.085006e-01
#> [141] 1.237572e-01 1.404556e-01 1.586210e-01 1.782621e-01 1.993696e-01
#> [146] 2.219150e-01 2.458497e-01 2.711047e-01 2.975909e-01 3.251992e-01
#> [151] 3.538021e-01 3.832553e-01 4.133994e-01 4.440630e-01 4.750653e-01
#> [156] 5.062195e-01 5.373357e-01 5.682250e-01 5.987026e-01 6.285909e-01
#> [161] 6.577230e-01 6.859454e-01 7.131202e-01 7.391271e-01 7.638648e-01
#> [166] 7.872521e-01 8.092283e-01 8.297529e-01 8.488052e-01 8.663832e-01
#> [171] 8.825023e-01 8.971938e-01 9.105025e-01 9.224853e-01 9.332086e-01
#> [176] 9.427465e-01 9.511784e-01 9.585872e-01 9.650575e-01 9.706737e-01
#> [181] 9.755189e-01 9.796736e-01 9.832145e-01 9.862139e-01 9.887392e-01
#> [186] 9.908524e-01 9.926099e-01 9.940628e-01 9.952565e-01 9.962313e-01
#> [191] 9.970225e-01 9.976608e-01 9.981726e-01 9.985805e-01 9.989036e-01
#> [196] 9.991579e-01 9.993569e-01 9.995117e-01 9.996314e-01 9.997233e-01
#> [201] 9.997935e-01 9.998467e-01 9.998869e-01 9.999171e-01 9.999395e-01
#> [206] 9.999561e-01 9.999684e-01 9.999773e-01 9.999839e-01 1.000000e+00

A comparison with exact computation shows that the approximation quality of the NA procedure increases with larger numbers of probabilities of success:

set.seed(2)

# 10 random probabilities of success
pp <- runif(10)
va <- sample(0:10, 10, TRUE)
vb <- sample(0:10, 10, TRUE)
dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal")
dpd <- dgpbinom(NULL, pp, va, vb)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -0.0346309 -0.0042919  0.0001378  0.0000000  0.0038447  0.0317044

# 100 random probabilities of success
pp <- runif(100)
va <- sample(0:100, 100, TRUE)
vb <- sample(0:100, 100, TRUE)
dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal")
dpd <- dgpbinom(NULL, pp, va, vb)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -3.006e-05 -1.126e-09  0.000e+00  0.000e+00  1.854e-09  2.967e-05

# 1000 random probabilities of success
pp <- runif(1000)
va <- sample(0:1000, 1000, TRUE)
vb <- sample(0:1000, 1000, TRUE)
dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal")
dpd <- dgpbinom(NULL, pp, va, vb)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -3.152e-08  0.000e+00  0.000e+00  0.000e+00  0.000e+00  3.707e-08

Generalized Refined Normal Approximation

The Generalized Refined Normal Approximation (G-RNA) approach is requested with method = "RefinedNormal". It is based on a Normal distribution, whose parameters are derived from the theoretical mean, variance and skewness of the input probabilities of success.

set.seed(2)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)
va <- sample(0:10, 10, TRUE)
vb <- sample(0:10, 10, TRUE)
dgpbinom(NULL, pp, va, vb, wt, "RefinedNormal")
#>   [1] 5.100768e-32 7.816039e-32 1.959106e-31 4.880045e-31 1.208047e-30
#>   [6] 2.971921e-30 7.265798e-30 1.765311e-29 4.262362e-29 1.022751e-28
#>  [11] 2.438814e-28 5.779315e-28 1.361012e-27 3.185186e-27 7.407878e-27
#>  [16] 1.712136e-26 3.932484e-26 8.975930e-26 2.035985e-25 4.589352e-25
#>  [21] 1.028037e-24 2.288476e-24 5.062470e-24 1.112900e-23 2.431235e-23
#>  [26] 5.278047e-23 1.138660e-22 2.441116e-22 5.200621e-22 1.101015e-21
#>  [31] 2.316333e-21 4.842591e-21 1.006056e-20 2.076983e-20 4.260973e-20
#>  [36] 8.686571e-20 1.759748e-19 3.542530e-19 7.086575e-19 1.408697e-18
#>  [41] 2.782630e-18 5.461965e-18 1.065359e-17 2.064884e-17 3.976912e-17
#>  [46] 7.611065e-17 1.447413e-16 2.735176e-16 5.135966e-16 9.582999e-16
#>  [51] 1.776730e-15 3.273256e-15 5.992053e-15 1.089949e-14 1.970017e-14
#>  [56] 3.538058e-14 6.313772e-14 1.119541e-13 1.972495e-13 3.453144e-13
#>  [61] 6.006676e-13 1.038179e-12 1.782897e-12 3.042246e-12 5.157913e-12
#>  [66] 8.688860e-12 1.454315e-11 2.418568e-11 3.996319e-11 6.560867e-11
#>  [71] 1.070186e-10 1.734408e-10 2.792769e-10 4.467944e-10 7.101774e-10
#>  [76] 1.121527e-09 1.759679e-09 2.743061e-09 4.248282e-09 6.536785e-09
#>  [81] 9.992759e-09 1.517660e-08 2.289965e-08 3.432780e-08 5.112383e-08
#>  [86] 7.564129e-08 1.111860e-07 1.623661e-07 2.355550e-07 3.394997e-07
#>  [91] 4.861107e-07 6.914779e-07 9.771650e-07 1.371840e-06 1.913307e-06
#>  [96] 2.651012e-06 3.649099e-06 4.990081e-06 6.779222e-06 9.149662e-06
#> [101] 1.226837e-05 1.634294e-05 2.162919e-05 2.843967e-05 3.715276e-05
#> [106] 4.822249e-05 6.218875e-05 7.968764e-05 1.014618e-04 1.283702e-04
#> [111] 1.613972e-04 2.016606e-04 2.504176e-04 3.090698e-04 3.791651e-04
#> [116] 4.623982e-04 5.606082e-04 6.757744e-04 8.100102e-04 9.655553e-04
#> [121] 1.144767e-03 1.350110e-03 1.584150e-03 1.849543e-03 2.149024e-03
#> [126] 2.485405e-03 2.861561e-03 3.280420e-03 3.744950e-03 4.258135e-03
#> [131] 4.822941e-03 5.442277e-03 6.118927e-03 6.855467e-03 7.654163e-03
#> [136] 8.516833e-03 9.444692e-03 1.043817e-02 1.149671e-02 1.261856e-02
#> [141] 1.380053e-02 1.503782e-02 1.632377e-02 1.764978e-02 1.900514e-02
#> [146] 2.037702e-02 2.175055e-02 2.310888e-02 2.443348e-02 2.570445e-02
#> [151] 2.690096e-02 2.800177e-02 2.898579e-02 2.983278e-02 3.052397e-02
#> [156] 3.104271e-02 3.137515e-02 3.151071e-02 3.144261e-02 3.116818e-02
#> [161] 3.068902e-02 3.001109e-02 2.914456e-02 2.810352e-02 2.690563e-02
#> [166] 2.557147e-02 2.412399e-02 2.258773e-02 2.098813e-02 1.935073e-02
#> [171] 1.770044e-02 1.606093e-02 1.445398e-02 1.289904e-02 1.141287e-02
#> [176] 1.000927e-02 8.699011e-03 7.489773e-03 6.386301e-03 5.390581e-03
#> [181] 4.502114e-03 3.718233e-03 3.034469e-03 2.444914e-03 1.942594e-03
#> [186] 1.519822e-03 1.168521e-03 8.805066e-04 6.477360e-04 4.625001e-04
#> [191] 2.621189e-04 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#> [196] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#> [201] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#> [206] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
pgpbinom(NULL, pp, va, vb, wt, "RefinedNormal")
#>   [1] 5.100768e-32 1.291681e-31 3.250786e-31 8.130831e-31 2.021130e-30
#>   [6] 4.993051e-30 1.225885e-29 2.991196e-29 7.253558e-29 1.748106e-28
#>  [11] 4.186920e-28 9.966236e-28 2.357636e-27 5.542822e-27 1.295070e-26
#>  [16] 3.007206e-26 6.939690e-26 1.591562e-25 3.627547e-25 8.216899e-25
#>  [21] 1.849727e-24 4.138203e-24 9.200673e-24 2.032968e-23 4.464203e-23
#>  [26] 9.742250e-23 2.112885e-22 4.554002e-22 9.754623e-22 2.076477e-21
#>  [31] 4.392810e-21 9.235402e-21 1.929596e-20 4.006579e-20 8.267552e-20
#>  [36] 1.695412e-19 3.455160e-19 6.997690e-19 1.408427e-18 2.817123e-18
#>  [41] 5.599754e-18 1.106172e-17 2.171531e-17 4.236415e-17 8.213328e-17
#>  [46] 1.582439e-16 3.029852e-16 5.765028e-16 1.090099e-15 2.048399e-15
#>  [51] 3.825129e-15 7.098385e-15 1.309044e-14 2.398993e-14 4.369010e-14
#>  [56] 7.907068e-14 1.422084e-13 2.541625e-13 4.514120e-13 7.967264e-13
#>  [61] 1.397394e-12 2.435573e-12 4.218470e-12 7.260717e-12 1.241863e-11
#>  [66] 2.110749e-11 3.565064e-11 5.983632e-11 9.979950e-11 1.654082e-10
#>  [71] 2.724267e-10 4.458675e-10 7.251445e-10 1.171939e-09 1.882116e-09
#>  [76] 3.003643e-09 4.763322e-09 7.506383e-09 1.175466e-08 1.829145e-08
#>  [81] 2.828421e-08 4.346081e-08 6.636046e-08 1.006883e-07 1.518121e-07
#>  [86] 2.274534e-07 3.386394e-07 5.010055e-07 7.365605e-07 1.076060e-06
#>  [91] 1.562171e-06 2.253649e-06 3.230814e-06 4.602653e-06 6.515960e-06
#>  [96] 9.166972e-06 1.281607e-05 1.780615e-05 2.458537e-05 3.373504e-05
#> [101] 4.600341e-05 6.234634e-05 8.397554e-05 1.124152e-04 1.495680e-04
#> [106] 1.977905e-04 2.599792e-04 3.396668e-04 4.411286e-04 5.694988e-04
#> [111] 7.308960e-04 9.325566e-04 1.182974e-03 1.492044e-03 1.871209e-03
#> [116] 2.333607e-03 2.894215e-03 3.569990e-03 4.380000e-03 5.345555e-03
#> [121] 6.490322e-03 7.840432e-03 9.424583e-03 1.127413e-02 1.342315e-02
#> [126] 1.590855e-02 1.877011e-02 2.205053e-02 2.579549e-02 3.005362e-02
#> [131] 3.487656e-02 4.031884e-02 4.643777e-02 5.329323e-02 6.094740e-02
#> [136] 6.946423e-02 7.890892e-02 8.934709e-02 1.008438e-01 1.134624e-01
#> [141] 1.272629e-01 1.423007e-01 1.586245e-01 1.762743e-01 1.952794e-01
#> [146] 2.156564e-01 2.374070e-01 2.605159e-01 2.849493e-01 3.106538e-01
#> [151] 3.375548e-01 3.655565e-01 3.945423e-01 4.243751e-01 4.548991e-01
#> [156] 4.859418e-01 5.173169e-01 5.488276e-01 5.802702e-01 6.114384e-01
#> [161] 6.421274e-01 6.721385e-01 7.012831e-01 7.293866e-01 7.562922e-01
#> [166] 7.818637e-01 8.059877e-01 8.285754e-01 8.495636e-01 8.689143e-01
#> [171] 8.866147e-01 9.026757e-01 9.171296e-01 9.300287e-01 9.414415e-01
#> [176] 9.514508e-01 9.601498e-01 9.676396e-01 9.740259e-01 9.794165e-01
#> [181] 9.839186e-01 9.876368e-01 9.906713e-01 9.931162e-01 9.950588e-01
#> [186] 9.965786e-01 9.977471e-01 9.986276e-01 9.992754e-01 9.997379e-01
#> [191] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [196] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [201] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [206] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00

A comparison with exact computation shows that the approximation quality of the RNA procedure increases with larger numbers of probabilities of success:

set.seed(2)

# 10 random probabilities of success
pp <- runif(10)
va <- sample(0:10, 10, TRUE)
vb <- sample(0:10, 10, TRUE)
dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal")
dpd <- dgpbinom(NULL, pp, va, vb)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -3.045e-02 -4.084e-03  1.727e-04  1.179e-05  4.324e-03  3.161e-02

# 100 random probabilities of success
pp <- runif(100)
va <- sample(0:100, 100, TRUE)
vb <- sample(0:100, 100, TRUE)
dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal")
dpd <- dgpbinom(NULL, pp, va, vb)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -8.831e-06  0.000e+00  1.000e-12  9.000e-12  3.642e-07  1.333e-05

# 1000 random probabilities of success
pp <- runif(1000)
va <- sample(0:1000, 1000, TRUE)
vb <- sample(0:1000, 1000, TRUE)
dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal")
dpd <- dgpbinom(NULL, pp, va, vb)
idx <- which(dpn != 0 & dpd != 0)
summary((dpn - dpd)[idx])
#>       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
#> -1.980e-08  0.000e+00  0.000e+00  0.000e+00  0.000e+00  3.197e-08

Processing Speed Comparisons

To assess the performance of the approximation procedures, we use the microbenchmark package. Each algorithm has to calculate the PMF repeatedly based on random probability vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2).

library(microbenchmark)
n <- 1500
set.seed(2)
va <- sample(1:50, n, TRUE)
vb <- sample(1:50, n, TRUE)

f1 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Normal")
f2 <- function() dgpbinom(NULL, runif(n), va, vb, method = "RefinedNormal")
f3 <- function() dgpbinom(NULL, runif(n), va, vb, method = "DivideFFT")

microbenchmark(f1(), f2(), f3(), times = 51)
#> Unit: milliseconds
#>  expr     min       lq      mean  median      uq     max neval
#>  f1()  7.3175  7.58750  8.077786  7.8100  8.1227 12.7853    51
#>  f2()  8.6754  9.04880  9.463494  9.2844  9.6528 11.5537    51
#>  f3() 35.5874 36.96335 38.594629 38.4630 39.4759 43.4619    51

Clearly, the G-NA procedure is the fastest, followed by the G-RNA method. Both are hugely faster than G-DC-FFT.

PoissonBinomial/inst/doc/proc_exact.Rmd0000644000176200001440000002120314022377326017700 0ustar liggesusers--- title: "Exact Procedures" output: rmarkdown::html_vignette: toc: yes vignette: > %\VignetteIndexEntry{Exact Procedures} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup, echo = FALSE} library(PoissonBinomial) ``` ## Ordinary Poisson Binomial Distribution ### Direct Convolution The *Direct Convolution* (DC) approach is requested with `method = "Convolve"`. ```{r directconv-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Convolve") ppbinom(NULL, pp, wt, "Convolve") ``` ### Divide & Conquer FFT Tree Convolution The *Divide & Conquer FFT Tree Convolution* (DC-FFT) approach is requested with `method = "DivideFFT"`. ```{r divide1-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "DivideFFT") ppbinom(NULL, pp, wt, "DivideFFT") ``` By design, as proposed by [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), its results are identical to the DC procedure, if $n \leq 750$. Thus, differences can be observed for larger $n > 750$: ```{r divide2-ord} set.seed(1) pp1 <- runif(751) pp2 <- pp1[1:750] sum(abs(dpbinom(NULL, pp2, method = "DivideFFT") - dpbinom(NULL, pp2, method = "Convolve"))) sum(abs(dpbinom(NULL, pp1, method = "DivideFFT") - dpbinom(NULL, pp1, method = "Convolve"))) ``` The reason is that the DC-FFT method splits the input `probs` vector into as equally sized parts as possible and computes their distributions separately with the DC approach. The results of the portions are then convoluted by means of the Fast Fourier Transformation. As proposed by [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), no splitting is done for $n \leq 750$. In addition, the DC-FFT procedure does not produce probabilities $\leq 5.55e\text{-}17$, i.e. smaller values are rounded off to 0, if $n > 750$, whereas the smallest possible result of the DC algorithm is $\sim 1e\text{-}323$. This is most likely caused by the used FFTW3 library. ```{r divide3-ord} set.seed(1) pp1 <- runif(751) d1 <- dpbinom(NULL, pp1, method = "DivideFFT") d2 <- dpbinom(NULL, pp1, method = "Convolve") min(d1[d1 > 0]) min(d2[d2 > 0]) ``` ### Discrete Fourier Transformation of the Characteristic Function The *Discrete Fourier Transformation of the Characteristic Function* (DFT-CF) approach is requested with `method = "Characteristic"`. ```{r dftcf-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Characteristic") ppbinom(NULL, pp, wt, "Characteristic") ``` As can be seen, the DFT-CF procedure does not produce probabilities $\leq 2.22e\text{-}16$, i.e. smaller values are rounded off to 0, most likely due to the used FFTW3 library. ### Recursive Formula The *Recursive Formula* (RF) approach is requested with `method = "Recursive"`. ```{r rf1-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Recursive") ppbinom(NULL, pp, wt, "Recursive") ``` Obviously, the RF procedure does produce probabilities $\leq 5.55e\text{-}17$, because it does not rely on the FFTW3 library. Furthermore, it yields the same results as the DC method. ```{r rf2-ord} set.seed(1) pp <- runif(1000) wt <- sample(1:10, 1000, TRUE) sum(abs(dpbinom(NULL, pp, wt, "Convolve") - dpbinom(NULL, pp, wt, "Recursive"))) ``` ### Processing Speed Comparisons To assess the performance of the exact procedures, we use the `microbenchmark` package. Each algorithm has to calculate the PMF repeatedly based on random probability vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2). ```{r benchmark-ord} library(microbenchmark) set.seed(1) f1 <- function() dpbinom(NULL, runif(6000), method = "DivideFFT") f2 <- function() dpbinom(NULL, runif(6000), method = "Convolve") f3 <- function() dpbinom(NULL, runif(6000), method = "Recursive") f4 <- function() dpbinom(NULL, runif(6000), method = "Characteristic") microbenchmark(f1(), f2(), f3(), f4(), times = 51) ``` Clearly, the DC-FFT procedure is the fastest, followed by DC, RF and DFT-CF methods. ## Generalized Poisson Binomial Distribution ### Generalized Direct Convolution The *Generalized Direct Convolution* (G-DC) approach is requested with `method = "Convolve"`. ```{r directconv-gen} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "Convolve") pgpbinom(NULL, pp, va, vb, wt, "Convolve") ``` ### Generalized Divide & Conquer FFT Tree Convolution The *Generalized Divide & Conquer FFT Tree Convolution* (G-DC-FFT) approach is requested with `method = "DivideFFT"`. ```{r divide1-gen} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "DivideFFT") pgpbinom(NULL, pp, va, vb, wt, "DivideFFT") ``` By design, similar to the ordinary DC-FFT algorithm by [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), its results are identical to the G-DC procedure, if $n$ and the number of possible observed values is small. Thus, differences can be observed for larger numbers: ```{r divide2-gen} set.seed(1) pp1 <- runif(250) va1 <- sample(0:50, 250, TRUE) vb1 <- sample(0:50, 250, TRUE) pp2 <- pp1[1:248] va2 <- va1[1:248] vb2 <- vb1[1:248] sum(abs(dgpbinom(NULL, pp1, va1, vb1, method = "DivideFFT") - dgpbinom(NULL, pp1, va1, vb1, method = "Convolve"))) sum(abs(dgpbinom(NULL, pp2, va2, vb2, method = "DivideFFT") - dgpbinom(NULL, pp2, va2, vb2, method = "Convolve"))) ``` The reason is that the G-DC-FFT method splits the input `probs`, `val_p` and `val_q` vectors into parts such that the numbers of possible observations of all parts are as equally sized as possible. Their distributions are then computed separately with the G-DC approach. The results of the portions are then convoluted by means of the Fast Fourier Transformation. For small $n$ and small distribution sizes, no splitting is needed. In addition, the G-DC-FFT procedure, just like the DC-FFT method, does not produce probabilities $\leq 5.55e\text{-}17$, i.e. smaller values are rounded off to $0$, if the total number of possible observations is smaller than $750$, whereas the smallest possible result of the DC algorithm is $\sim 1e\text{-}323$. This is most likely caused by the used FFTW3 library. ```{r divide3-gen} d1 <- dgpbinom(NULL, pp1, va1, vb1, method = "DivideFFT") d2 <- dgpbinom(NULL, pp1, va1, vb1, method = "Convolve") min(d1[d1 > 0]) min(d2[d2 > 0]) ``` ### Generalized Discrete Fourier Transformation of the Characteristic Function The *Generalized Discrete Fourier Transformation of the Characteristic Function* (G-DFT-CF) approach is requested with `method = "Characteristic"`. ```{r dftcf-gen} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "Characteristic") pgpbinom(NULL, pp, va, vb, wt, "Characteristic") ``` As can be seen, the G-DFT-CF procedure does not produce probabilities $\leq 2.2e\text{-}16$, i.e. smaller values are rounded off to 0, most likely due to the used FFTW3 library. ### Processing Speed Comparisons To assess the performance of the exact procedures, we use the `microbenchmark` package. Each algorithm has to calculate the PMF repeatedly based on random probability and value vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2). ```{r benchmark-gen} library(microbenchmark) n <- 2500 set.seed(1) va <- sample(1:50, n, TRUE) vb <- sample(1:50, n, TRUE) f1 <- function() dgpbinom(NULL, runif(n), va, vb, method = "DivideFFT") f2 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Convolve") f3 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Characteristic") microbenchmark(f1(), f2(), f3(), times = 51) ``` Clearly, the G-DC-FFT procedure is the fastest one. It outperforms both the G-DC and G-DFT-CF approaches. The latter one needs a lot more time than the others. Generally, the computational speed advantage of the G-DC-FFT procedure increases with larger $n$ (and $m$).PoissonBinomial/inst/doc/proc_exact.R0000644000176200001440000001060314077633372017367 0ustar liggesusers## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, echo = FALSE------------------------------------------------------ library(PoissonBinomial) ## ----directconv-ord----------------------------------------------------------- set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Convolve") ppbinom(NULL, pp, wt, "Convolve") ## ----divide1-ord-------------------------------------------------------------- set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "DivideFFT") ppbinom(NULL, pp, wt, "DivideFFT") ## ----divide2-ord-------------------------------------------------------------- set.seed(1) pp1 <- runif(751) pp2 <- pp1[1:750] sum(abs(dpbinom(NULL, pp2, method = "DivideFFT") - dpbinom(NULL, pp2, method = "Convolve"))) sum(abs(dpbinom(NULL, pp1, method = "DivideFFT") - dpbinom(NULL, pp1, method = "Convolve"))) ## ----divide3-ord-------------------------------------------------------------- set.seed(1) pp1 <- runif(751) d1 <- dpbinom(NULL, pp1, method = "DivideFFT") d2 <- dpbinom(NULL, pp1, method = "Convolve") min(d1[d1 > 0]) min(d2[d2 > 0]) ## ----dftcf-ord---------------------------------------------------------------- set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Characteristic") ppbinom(NULL, pp, wt, "Characteristic") ## ----rf1-ord------------------------------------------------------------------ set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Recursive") ppbinom(NULL, pp, wt, "Recursive") ## ----rf2-ord------------------------------------------------------------------ set.seed(1) pp <- runif(1000) wt <- sample(1:10, 1000, TRUE) sum(abs(dpbinom(NULL, pp, wt, "Convolve") - dpbinom(NULL, pp, wt, "Recursive"))) ## ----benchmark-ord------------------------------------------------------------ library(microbenchmark) set.seed(1) f1 <- function() dpbinom(NULL, runif(6000), method = "DivideFFT") f2 <- function() dpbinom(NULL, runif(6000), method = "Convolve") f3 <- function() dpbinom(NULL, runif(6000), method = "Recursive") f4 <- function() dpbinom(NULL, runif(6000), method = "Characteristic") microbenchmark(f1(), f2(), f3(), f4(), times = 51) ## ----directconv-gen----------------------------------------------------------- set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "Convolve") pgpbinom(NULL, pp, va, vb, wt, "Convolve") ## ----divide1-gen-------------------------------------------------------------- set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "DivideFFT") pgpbinom(NULL, pp, va, vb, wt, "DivideFFT") ## ----divide2-gen-------------------------------------------------------------- set.seed(1) pp1 <- runif(250) va1 <- sample(0:50, 250, TRUE) vb1 <- sample(0:50, 250, TRUE) pp2 <- pp1[1:248] va2 <- va1[1:248] vb2 <- vb1[1:248] sum(abs(dgpbinom(NULL, pp1, va1, vb1, method = "DivideFFT") - dgpbinom(NULL, pp1, va1, vb1, method = "Convolve"))) sum(abs(dgpbinom(NULL, pp2, va2, vb2, method = "DivideFFT") - dgpbinom(NULL, pp2, va2, vb2, method = "Convolve"))) ## ----divide3-gen-------------------------------------------------------------- d1 <- dgpbinom(NULL, pp1, va1, vb1, method = "DivideFFT") d2 <- dgpbinom(NULL, pp1, va1, vb1, method = "Convolve") min(d1[d1 > 0]) min(d2[d2 > 0]) ## ----dftcf-gen---------------------------------------------------------------- set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "Characteristic") pgpbinom(NULL, pp, va, vb, wt, "Characteristic") ## ----benchmark-gen------------------------------------------------------------ library(microbenchmark) n <- 2500 set.seed(1) va <- sample(1:50, n, TRUE) vb <- sample(1:50, n, TRUE) f1 <- function() dgpbinom(NULL, runif(n), va, vb, method = "DivideFFT") f2 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Convolve") f3 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Characteristic") microbenchmark(f1(), f2(), f3(), times = 51) PoissonBinomial/inst/doc/intro.html0000644000176200001440000016474014077633273017152 0ustar liggesusers Efficient Computation of Ordinary and Generalized Poisson Binomial Distributions

Efficient Computation of Ordinary and Generalized Poisson Binomial Distributions

Introduction

The Poisson binomial distribution (in the following abbreviated as PBD) is becoming increasingly important, especially in the areas of statistics, finance, insurance mathematics and quality management. This package provides functions for two types of PBDs: ordinary and generalized PBDs (henceforth referred to as O-PBDs and G-PBDs).

Ordinary Poisson Binomial Distribution

The O-PBD is the distribution of the sum of a number \(n\) of independent Bernoulli-distributed random indicators \(X_i \in \{0, 1\}\) \((i = 1, ..., n)\): \[X := \sum_{i = 1}^{n}{X_i}.\] Each of the \(X_i\) possesses a predefined probability of success \(p_i := P(X_i = 1)\) (subsequently \(P(X_i = 0) = 1 - p_i =: q_i\)). With this, mean, variance and skewness can be expressed as \[E(X) = \sum_{i = 1}^{n}{p_i} \quad \quad Var(X) = \sum_{i = 1}^{n}{p_i q_i} \quad \quad Skew(X) = \frac{\sum_{i = 1}^{n}{p_i q_i(q_i - p_i)}}{\sqrt{Var(X)}^3}.\] All possible observations are in \(\{0, ..., n\}\).

Generalized Poisson Binomial Distribution

The G-PBD is defined very similar. Again, it is the distribution of a sum random variables, but here, each \(X_i \in \{u_i, v_i\}\) with \(P(X_i = u_i) =: p_i\) and \(P(X_i = v_i) = 1 - p_i =: q_i\). Using ordinary Bernoulli-distributed random variables \(Y_i\), \(X_i\) can be expressed as \(X_i = u_i Y_i + v_i(1 - Y_i) = v_i + Y_i \cdot (u_i - v_i)\). As a result, mean, variance and skewness are given by \[E(X) = \sum_{i = 1}^{n}{v_i} + \sum_{i = 1}^{n}{p_i (u_i - v_i)} \quad \quad Var(X) = \sum_{i = 1}^{n}{p_i q_i(u_i - v_i)^2} \\Skew(X) = \frac{\sum_{i = 1}^{n}{p_i q_i(q_i - p_i)(u_i - v_i)^3}}{\sqrt{Var(X)}^3}.\] All possible observations are in \(\{U, ..., V\}\) with \(U := \sum_{i = 1}^{n}{\min\{u_i, v_i\}}\) and \(V := \sum_{i = 1}^{n}{\max\{u_i, v_i\}}\). Note that the size \(m := V - U\) of the distribution does not generally equal \(n\)!

Existing R Packages

Computing these distributions exactly is computationally demanding, but in the last few years, some efficient algorithms have been developed. Particularly significant in this respect are the works of Hong (2013), who derived the DFT-CF procedure for O-PBDs, Biscarri, Zhao & Brunner (2018) who developed two immensely faster algorithms for O-PBDs, namely the DC and DC-FFT procedures, and Zhang, Hong and Balakrishnan (2018) who further developed Hong’s (2013) DFT-CF algorithm for G-PBDs (in the following, this generalized procedure is referred to as G-DFT-CF). Still, only a few R packages exist for the calculation of either ordinary and generalized PBDs, e.g. poibin and poisbinom for O-PBDs and GPB for G-PDBs. Before the release of this PoissonBinomial package, there has been no R package that implemented the DC and DC-FFT algorithms of Biscarri, Zhao & Brunner (2018), as they only published a reference implementation for R, but refrained from releasing it as a package. Additionally, there are no comparable approaches for G-PBDs to date.

The poibin package implements the DFT-CF algorithm along with the exact recursive method of Barlow & Heidtmann (1984) and Normal and Poisson approximations. However, both exact procedures of this package possess some disadvantages, i.e. they are relatively slow at computing very large distributions, with the recursive algorithm being also very memory consuming. The G-DFT-CF procedure is implemented in the GPB package and inherits this performance drawback. The poisbinom package provides a more efficient and much faster DFT-CF implementation. The performance improvement over the poibin package lies in the use of the FFTW C library. Unfortunately, it sometimes yields some negative probabilities in the tail regions, especially for large distributions. However, this numerical issue has not been addressed to date. This PoissonBinomial also utilizes FFTW for both DFT-CF and G-DFT-CF algorithms, but corrects that issue. In addition to the disadvantages regarding computational speed (poibin and GPB) or numerics (poisbinom), especially for very large distributions, the aforementioned packages do not provide headers for their internal C/C++ functions, so that they cannot be imported directly by C or C++ code of other packages that use for example Rcpp.

In some situations, people might have to deal with Poisson binomial distributions that include Bernoulli variables with \(p_i \in \{0, 1\}\). Calculation performance can be further optimized by handling these indicators before the actual computations. Approximations also benefit from this in terms of accuracy. None of the aforementioned packages implements such optimizations. Therefore, the advantages of this PoissonBinomial package can be summarized as follows:

  • Efficient computation of very large distributions with both exact and approximate algorithms for O-PBDs and G-PBDs
  • Provides headers for the C++ functions so that other packages may include them in their own C++ code
  • Handles (sometimes large numbers of) 0- and 1-probabilities to speed up performance

In total, this package includes 10 different algorithms of computing ordinary Poisson binomial distributions, including optimized versions of the Normal, Refined Normal and Poisson approaches, and 5 approaches for generalized PBDs. In addition, the implementation of the exact recursive procedure for O-PBDs was rewritten so that it is considerably less memory intensive: the poibin implementation needs the memory equivalent of \((n + 1)^2\) values of type double, while ours only needs \(3 \cdot (n + 1)\).


Exact Procedures

Ordinary Poisson Binomial Distribution

In this package implements the following exact algorithms for computing ordinary Poisson binomial distributions:

Generalized Poisson Binomial Distribution

For generalized Poisson binomial distributions, this package provides:

Examples

Examples and performance comparisons of these procedures are presented in a separate vignette.


Approximations

Ordinary Poisson Binomial Distribution

In addition, the following O-PBD approximation methods are included:

  • the Poisson Approximation approach,
  • the Arithmetic Mean Binomial Approximation procedure,
  • Geometric Mean Binomial Approximation algorithms,
  • the Normal Approximation and
  • the Refined Normal Approximation.

Generalized Poisson Binomial Distribution

For G-PBDs, there are

  • the Normal Approximation and
  • the Refined Normal Approximation.

Examples

Examples and performance comparisons of these approaches are provided in a separate vignette as well.


Handling special cases, zeros and ones

Handling special cases, such as ordinary binomial distributions, zeros and ones is useful to speed up performance. Unfortunately, some approximations do not work well for Bernoulli trials with \(p_i \in \{0, 1\}\), e.g. the Geometric Mean Binomial Approximations. This is why handling these values before the actual computation of the distribution is not only a performance tweak, but sometimes even a necessity. It is achieved by some simple preliminary considerations.

Ordinary Poisson Binomial Distributions

  1. All \(p_i = p\) are equal?
    In this case, we have a usual binomial distribution. The specified method of computation is then ignored. In particular, the following applies:
    1. \(p = 0\): The only observable value is \(0\), i.e. \(P(X = 0) = 1\) and \(P(X \neq 0) = 0\).
    2. \(p = 1\): The only observable value is \(n\), i.e. \(P(X = n) = 1\) and \(P(X \neq n) = 0\).
  2. All \(p_i \in \{0, 1\} (i = 1, ..., n)\)?
    If one \(p_i\) is 1, it is impossible to measure 0 successes. Following the same logic, if two \(p_i\) are 1, we cannot observe 0 and 1 successes and so on. In general, a number of \(n_1\) values with \(p_i = 1\) makes it impossible to measure \(0, ..., n_1 - 1\) successes. Likewise, if there are \(n_0\) Bernoulli trials with \(p_i = 0\), we cannot observe \(n - n_0 + 1, ..., n\) successes. If all \(p_i \in \{0, 1\}\), it holds \(n = n_0 + n_1\). As a result, the only observable value is \(n_1\), i.e. \(P(X = n_1) = 1\) and \(P(X \neq n_1) = 0\).
  3. Are there \(p_i \notin \{0, 1\}\)?
    Using the deductions from above, we can only observe an “inner” distribution in the range of \(n_1, n_1 + 1, ..., n - n_0\), i.e. \(P(X \in \{n_1, ..., n - n_0\}) > 0\) and \(P(X < n_1) = P(X > n - n_0) = 0\). As a result, \(X\) can be expressed as \(X = n_1 + Y\) with \(Y \sim PBin(\{p_i|0 < p_i < 1\})\) and \(|\{p_i|0 < p_i < 1\}| = n - n_0 - n_1\). Subsequently, the Poisson binomial distribution must only be computed for \(Y\). Especially, if there is only one \(p_i \notin \{0, 1\}\), \(Y\) follows a Bernoulli distribution with parameter \(p_i\), i.e. \(P(X = n_1) = P(Y = 0) = 1 - p_i\) and \(P(X = n_1 + 1) = P(Y = 1) = p_i\).

These cases are illustrated in the following example:

# Case 1
dpbinom(NULL, rep(0.3, 7))
#> [1] 0.0823543 0.2470629 0.3176523 0.2268945 0.0972405 0.0250047 0.0035721
#> [8] 0.0002187
dbinom(0:7, 7, 0.3) # equal results
#> [1] 0.0823543 0.2470629 0.3176523 0.2268945 0.0972405 0.0250047 0.0035721
#> [8] 0.0002187

dpbinom(NULL, c(0, 0, 0, 0, 0, 0, 0)) # only 0 is observable
#> [1] 1 0 0 0 0 0 0 0
dpbinom(0, c(0, 0, 0, 0, 0, 0, 0)) # confirmation
#> [1] 1

dpbinom(NULL, c(1, 1, 1, 1, 1, 1, 1)) # only 7 is observable
#> [1] 0 0 0 0 0 0 0 1
dpbinom(7, c(1, 1, 1, 1, 1, 1, 1)) # confirmation
#> [1] 1

# Case 2
dpbinom(NULL, c(0, 0, 0, 0, 1, 1, 1)) # only 3 is observable
#> [1] 0 0 0 1 0 0 0 0
dpbinom(3, c(0, 0, 0, 0, 1, 1, 1)) # confirmation
#> [1] 1

# Case 3
dpbinom(NULL, c(0, 0, 0.1, 0.2, 0.4, 0.8, 1)) # only 1-5 are observable
#> [1] 0.0000 0.0864 0.4344 0.3784 0.0944 0.0064 0.0000 0.0000
dpbinom(1:5, c(0, 0, 0.1, 0.2, 0.4, 0.8, 1)) # confirmation
#> [1] 0.0864 0.4344 0.3784 0.0944 0.0064

dpbinom(NULL, c(0, 0, 0.4, 1)) # only 1 and 2 are observable
#> [1] 0.0 0.6 0.4 0.0 0.0
dpbinom(1:2, c(0, 0, 0.4, 1)) # confirmation
#> [1] 0.6 0.4

Generalized Poisson Binomial Distributions

  1. All \(u_i \in \{0, 1\}\) and all \(v_i = 1 - u_i\)?
    Then, it is an ordinary Poisson binomial distribution with parameters \(p_i' = p_i\) for all \(i\) for which \(u_i = 1\) and \(p_i' = 1 - p_i\) otherwise. This includes all the special cases described above.
  2. All \(u_i = u\) are equal and all \(v_i = v\) are equal?
    In this case, we have a linearly transformed ordinary Poisson binomial distribution, i.e. \(X\) can be expressed as \(X = uY + v(n - Y)\) with \(Y \sim PBin(p_1, ..., p_n)\). In particular, if all \(p_i = p\) are also the same, we have a linear transformation of the usual binomial distribution, i.e. \(X = uZ + v(n - Z)\) with \(Z \sim Bin(n, p)\). Summarizing this, the following applies:
    1. All \(p_i = 0\): The only observable value is \(n \cdot v\), i.e. \(P(X = n \cdot v) = 1\) and \(P(X \neq n \cdot v) = 0\).
    2. All \(p_i = 1\): The only observable value is \(n \cdot u\), i.e. \(P(X = n \cdot u) = 1\) and \(P(X \neq n \cdot u) = 0\).
    3. All \(p_i = p\): Observable values are in \(\{u \cdot k + v \cdot (n - k) | k = 0, ..., n\}\) and \(P(X = u \cdot k + v \cdot (n - k)) = P(Z = k)\).
    4. Otherwise: Observable values are in \(\{u \cdot k + v \cdot (n - k) | k = 0, ..., n\})\) and \(P(X = u \cdot k + v(n - k)) = P(Y = k)\)
  3. All \(p_i \in \{0, 1\}\)?
    Let \(I = \{i\, |\, p_i = 1\} \subseteq \{1, ..., n\}\) and \(J = \{i\, |\, p_i = 0\} \subseteq \{1, ..., n\}\). Then, we have:
    1. All \(p_i = 0\): The only observable value is \(v^* := \sum_{i = 1}^{n}{v_i}\), i.e. \(P(X = v^*) = 1\) and \(P(X \neq v^*) = 0\).
    2. All \(p_i = 1\): The only observable value is \(u^* := \sum_{i = 1}^{n}{u_i}\), i.e. \(P(X = u^*) = 1\) and \(P(X \neq u^*) = 0\).
    3. Otherwise, The only observable value is \(w^* := \sum_{i \in I}{u_i} + \sum_{i \in J}{v_i}\), i.e. \(P(X = w^*) = 1\) and \(P(X \neq w^*) = 0\). Note that the case that any \(u_i = v_i\) is equivalent to \(p_i = 1\), because the corresponding random variable \(X_i\) has always the same (non-random) value.
  4. Are there \(p_i \notin \{0, 1\}\)?
    Let \(I\), \(J\) and \(w^*\) as above and \(K = \{i\, |\, p_i > 0 \, \wedge p_i < 1\} \subseteq \{1, ..., n\}\). Then, \(X\) can be expressed as \(X = w^* + Z\) with \(Z = \sum_{i \in K}{X_i}\) following a (reduced) generalized Poisson Bernoulli distribution. In particular, if only one \(p_i \notin \{0, 1\}\), Z follows a linearly transformed Bernoulli distribution.

These cases are illustrated in the following example:

set.seed(1)
pp <- runif(7)
va <- sample(0:6, 7, TRUE)
vb <- sample(0:6, 7, TRUE)

# Case 1
dgpbinom(NULL, pp, rep(1, 7), rep(0, 7))
#> [1] 8.114776e-05 3.112722e-03 4.063146e-02 2.115237e-01 3.793308e-01
#> [6] 2.735489e-01 8.297278e-02 8.798512e-03
dpbinom(NULL, pp) # equal results
#> [1] 8.114776e-05 3.112722e-03 4.063146e-02 2.115237e-01 3.793308e-01
#> [6] 2.735489e-01 8.297278e-02 8.798512e-03

dgpbinom(NULL, pp, rep(0, 7), rep(1, 7))
#> [1] 8.798512e-03 8.297278e-02 2.735489e-01 3.793308e-01 2.115237e-01
#> [6] 4.063146e-02 3.112722e-03 8.114776e-05
dpbinom(NULL, 1 - pp) # equal results
#> [1] 8.798512e-03 8.297278e-02 2.735489e-01 3.793308e-01 2.115237e-01
#> [6] 4.063146e-02 3.112722e-03 8.114776e-05

dgpbinom(NULL, pp, c(rep(1, 3), rep(0, 4)), c(rep(0, 3), rep(1, 4)))
#> [1] 3.062225e-02 1.998504e-01 3.769239e-01 2.828424e-01 9.450797e-02
#> [6] 1.426764e-02 9.620692e-04 2.331571e-05
dpbinom(NULL, c(pp[1:3], 1 - pp[4:7])) # reorder for 0 and 1; equal results
#> [1] 3.062225e-02 1.998504e-01 3.769239e-01 2.828424e-01 9.450797e-02
#> [6] 1.426764e-02 9.620692e-04 2.331571e-05

# Case 2 a)
dgpbinom(NULL, rep(0, 7), rep(4, 7), rep(2, 7)) # only 14 is observable
#>  [1] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
dgpbinom(7 * 2, rep(0, 7), rep(4, 7), rep(2, 7)) # confirmation
#> [1] 1

# Case 2 b)
dgpbinom(NULL, rep(1, 7), rep(4, 7), rep(2, 7)) # only 28 is observable
#>  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
dgpbinom(7 * 4, rep(1, 7), rep(4, 7), rep(2, 7)) # confirmation
#> [1] 1

# Case 2 c)
dgpbinom(NULL, rep(0.3, 7), rep(4, 7), rep(2, 7))
#>  [1] 0.0823543 0.0000000 0.2470629 0.0000000 0.3176523 0.0000000 0.2268945
#>  [8] 0.0000000 0.0972405 0.0000000 0.0250047 0.0000000 0.0035721 0.0000000
#> [15] 0.0002187
dbinom(0:7, 7, 0.3) # equal results, but on different support set
#> [1] 0.0823543 0.2470629 0.3176523 0.2268945 0.0972405 0.0250047 0.0035721
#> [8] 0.0002187

# Case 2 d)
dgpbinom(NULL, pp, rep(4, 7), rep(2, 7))
#>  [1] 8.114776e-05 0.000000e+00 3.112722e-03 0.000000e+00 4.063146e-02
#>  [6] 0.000000e+00 2.115237e-01 0.000000e+00 3.793308e-01 0.000000e+00
#> [11] 2.735489e-01 0.000000e+00 8.297278e-02 0.000000e+00 8.798512e-03
dpbinom(NULL, pp) # equal results, but on different support set
#> [1] 8.114776e-05 3.112722e-03 4.063146e-02 2.115237e-01 3.793308e-01
#> [6] 2.735489e-01 8.297278e-02 8.798512e-03

# Case 3 a)
dgpbinom(NULL, c(0, 0, 0, 0, 0, 0, 0), va, vb) # only sum(vb) is observable
#>  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
dgpbinom(sum(vb), rep(0, 7), va, vb) # confirmation
#> [1] 1

# Case 3 b)
dgpbinom(NULL, c(1, 1, 1, 1, 1, 1, 1), va, vb) # only sum(va) is observable
#>  [1] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
dgpbinom(sum(va), rep(1, 7), va, vb) # confirmation
#> [1] 1

# Case 3 c)
dgpbinom(NULL, c(0, 0, 0, 1, 1, 1, 1), va, vb) # only sum(va[4:7], vb[1:3]) is observable
#>  [1] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
dgpbinom(sum(va[4:7], vb[1:3]), c(0, 0, 0, 1, 1, 1, 1), va, vb) # confirmation
#> [1] 1

# Case 4
dgpbinom(NULL, c(0, 0, 0.3, 0.6, 1, 1, 1), va, vb)
#>  [1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.18 0.00 0.00 0.12 0.42 0.00 0.00 0.28
#> [16] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
sure <- sum(va[5:7], vb[1:2])
x.transf <- sum(pmin(va[3:4], vb[3:4])):sum(pmax(va[3:4], vb[3:4]))
dgpbinom(sure + x.transf, c(0, 0, 0.3, 0.6, 1, 1, 1), va, vb)
#> [1] 0.18 0.00 0.00 0.12 0.42 0.00 0.00 0.28
dgpbinom(x.transf, c(0.3, 0.6), va[3:4], vb[3:4]) # equal results
#> [1] 0.18 0.00 0.00 0.12 0.42 0.00 0.00 0.28

dgpbinom(NULL, c(0, 0, 0, 0.6, 1, 1, 1), va, vb)
#>  [1] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.6 0.0 0.0 0.4 0.0 0.0 0.0 0.0
#> [20] 0.0 0.0 0.0 0.0 0.0 0.0
sure <- sum(va[5:7], vb[1:3])
x.transf <- va[4]:vb[4]
dgpbinom(sure + x.transf, c(0, 0, 0, 0.6, 1, 1, 1), va, vb)
#> [1] 0.6 0.0 0.0 0.4
dgpbinom(x.transf, 0.6, va[4], vb[4]) # equal results; essentially transformed Bernoulli
#> [1] 0.6 0.0 0.0 0.4

Usage with Rcpp

How to import and use the internal C++ functions in Rcpp based packages is described in a separate vignette.

PoissonBinomial/inst/doc/use_with_rcpp.Rmd0000644000176200001440000001527013757216142020435 0ustar liggesusers--- title: "Usage with Rcpp" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Usage with Rcpp} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` Each procedure's probability mass function (PMF) and cumulative distribution function (CDF) was implemented in *C++* using the `Rcpp` package. By means of `Rcpp::interface`, these functions are exported to both the package's *R* namespace and *C++* headers. That way, the following functions can then be used by other packages that use `Rcpp`: ``` /*** Ordinary Poisson Binomial Distribution ***/ /*** Exact Procedures ***/ // Direct Convolution (DC) // PMF NumericVector dpb_conv(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_conv(const IntegerVector obs, const NumericVector probs, const bool lower_tail); // Divide & Conquer FFT Tree Convolution (DC-FFT) // PMF NumericVector dpb_dc(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_dc(const IntegerVector obs, const NumericVector probs, const bool lower_tail); // Discrete Fourier Transformation of the Characteristic Function (DFT-CF) // PMF NumericVector dpb_dftcf(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_dftcf(const IntegerVector obs, const NumericVector probs, const bool lower_tail); // Recursive Formula (RF) // PMF NumericVector dpb_rf(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_rf(const IntegerVector obs, const NumericVector probs, const bool lower_tail); /*** Approximations ***/ // Arithmetic Mean Binomial Approximation (AMBA) // PMF NumericVector dpb_mean(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_mean(const IntegerVector obs, const NumericVector probs, const bool lower_tail); // Geometric Mean Binomial Approximations (GMBA) // PMF NumericVector dpb_gmba(const IntegerVector obs, const NumericVector const probs, const bool anti); // CDF NumericVector ppb_gmba(const IntegerVector obs, const NumericVector probs, const bool anti, const bool lower_tail); // Poisson Approximation (PA) // PMF NumericVector dpb_pa(const IntegerVector obs, const NumericVector probs); // CDF NumericVector ppb_pa(const IntegerVector obs, const NumericVector probs, const bool lower_tail); // Normal Approximations (NA, RNA) // PMF NumericVector dpb_na(const IntegerVector obs, const NumericVector probs, const bool refined); // CDF NumericVector ppb_na(const IntegerVector obs, const NumericVector probs, const bool refined, const bool lower_tail); /*** Generalized Poisson Binomial Distribution ***/ /*** Exact Procedures ***/ // Generalized Direct Convolution (G-DC) // PMF NumericVector dgpb_conv(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q); // CDF NumericVector pgpb_conv(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q, const bool lower_tail); // Generalized Discrete Fourier Transformation of the Characteristic Function (G-DFT-CF) // PMF NumericVector dgpb_dftcf(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q); // CDF NumericVector pgpb_dftcf(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q, const bool lower_tail); /*** Approximations ***/ // Generalized Normal Approximations (G-NA, G-RNA) // PMF NumericVector dgpb_na(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q, const bool refined, const bool lower_tail); // CDF NumericVector pgpb_na(const IntegerVector obs, const NumericVector probs, const NumericVector val_p, const NumericVector val_q, const bool refined, const bool lower_tail); ``` ## Making the functions usable There are only a few simple steps to follow: 1. Add the `Rcpp` and `PoissonBinomial` packages to the `Imports` and `LinkingTo` fields of the `DESCRIPTION` file. 2. Add `#include ` to source (`.cpp`) and/or header (`.h`, `.hpp`) files in which these functions are to be used. 3. Optional: Add `using namespace PoissonBinomial;`. Without it, the use of functions of this package must be fully qualified with `PoissonBinomial::`, e.g. `PoissonBinomial::dpb_dc` instead of `dpb_dc` ## Important Remarks For better performance, the PMFs and CDFs do not check any of their parameters for plausibility! This must be done by the user by means of *R* or *C/C++* functions. It must be made sure that * the observations in the `obs` vectors are valid, * the probabilities in the `probs` vector are in $(0, 1)$ and * for `dpb_gmba`, `ppb_gmba`, `dpb_na`, `ppb_na`, `dgpb_na` and `pgpb_na`: the probabilities in the `probs` vector **must not** contain zeros or ones. Furthermore, the CDFs only compute non-logarithmic probabilities. If logarithms are needed, they must be computed "manually".PoissonBinomial/inst/doc/use_with_rcpp.html0000644000176200001440000003361514077633372020666 0ustar liggesusers Usage with Rcpp

Usage with Rcpp

Each procedure’s probability mass function (PMF) and cumulative distribution function (CDF) was implemented in C++ using the Rcpp package. By means of Rcpp::interface, these functions are exported to both the package’s R namespace and C++ headers. That way, the following functions can then be used by other packages that use Rcpp:

/***   Ordinary Poisson Binomial Distribution   ***/


/***   Exact Procedures   ***/

// Direct Convolution (DC)

// PMF
NumericVector dpb_conv(const IntegerVector obs,
                       const NumericVector probs);
                       
// CDF
NumericVector ppb_conv(const IntegerVector obs,
                       const NumericVector probs,
                       const bool lower_tail);


// Divide & Conquer FFT Tree Convolution (DC-FFT)

// PMF
NumericVector dpb_dc(const IntegerVector obs,
                     const NumericVector probs);
                     
// CDF
NumericVector ppb_dc(const IntegerVector obs,
                     const NumericVector probs,
                     const bool lower_tail);


// Discrete Fourier Transformation of the Characteristic Function (DFT-CF)

// PMF
NumericVector dpb_dftcf(const IntegerVector obs,
                        const NumericVector probs);
                        
// CDF
NumericVector ppb_dftcf(const IntegerVector obs, const NumericVector probs,
                        const bool lower_tail);
                        

// Recursive Formula (RF)

// PMF
NumericVector dpb_rf(const IntegerVector obs,
                     const NumericVector probs);

// CDF
NumericVector ppb_rf(const IntegerVector obs,
                     const NumericVector probs,
                     const bool lower_tail);



/***   Approximations   ***/


// Arithmetic Mean Binomial Approximation (AMBA)

// PMF
NumericVector dpb_mean(const IntegerVector obs,
                       const NumericVector probs);

// CDF
NumericVector ppb_mean(const IntegerVector obs,
                       const NumericVector probs,
                       const bool lower_tail);


// Geometric Mean Binomial Approximations (GMBA)

// PMF
NumericVector dpb_gmba(const IntegerVector obs, 
                       const NumericVector const probs,
                       const bool anti);
                       
// CDF
NumericVector ppb_gmba(const IntegerVector obs,
                       const NumericVector probs,
                       const bool anti,
                       const bool lower_tail);


// Poisson Approximation (PA)

// PMF
NumericVector dpb_pa(const IntegerVector obs,
                     const NumericVector probs);
                     
// CDF
NumericVector ppb_pa(const IntegerVector obs,
                     const NumericVector probs,
                     const bool lower_tail);
                     

// Normal Approximations (NA, RNA)

// PMF
NumericVector dpb_na(const IntegerVector obs,
                     const NumericVector probs,
                     const bool refined);
                     
// CDF
NumericVector ppb_na(const IntegerVector obs,
                     const NumericVector probs,
                     const bool refined,
                     const bool lower_tail);
                     



/***   Generalized Poisson Binomial Distribution   ***/


/***   Exact Procedures   ***/


// Generalized Direct Convolution (G-DC)

// PMF
NumericVector dgpb_conv(const IntegerVector obs,
                        const NumericVector probs,
                        const NumericVector val_p,
                        const NumericVector val_q);
                        
// CDF
NumericVector pgpb_conv(const IntegerVector obs,
                        const NumericVector probs,
                        const NumericVector val_p,
                        const NumericVector val_q,
                        const bool lower_tail);
                        

// Generalized Discrete Fourier Transformation of the Characteristic Function (G-DFT-CF)

// PMF
NumericVector dgpb_dftcf(const IntegerVector obs,
                         const NumericVector probs,
                         const NumericVector val_p,
                         const NumericVector val_q);
                         
// CDF
NumericVector pgpb_dftcf(const IntegerVector obs,
                         const NumericVector probs,
                         const NumericVector val_p,
                         const NumericVector val_q,
                         const bool lower_tail);
                       
                       
                       
/***   Approximations   ***/


// Generalized Normal Approximations (G-NA, G-RNA)

// PMF
NumericVector dgpb_na(const IntegerVector obs,
                      const NumericVector probs,
                      const NumericVector val_p,
                      const NumericVector val_q,
                      const bool refined,
                      const bool lower_tail);
                      
// CDF
NumericVector pgpb_na(const IntegerVector obs,
                      const NumericVector probs,
                      const NumericVector val_p,
                      const NumericVector val_q,
                      const bool refined,
                      const bool lower_tail);

Making the functions usable

There are only a few simple steps to follow:

  1. Add the Rcpp and PoissonBinomial packages to the Imports and LinkingTo fields of the DESCRIPTION file.
  2. Add #include <PoissonBinomial.h> to source (.cpp) and/or header (.h, .hpp) files in which these functions are to be used.
  3. Optional: Add using namespace PoissonBinomial;. Without it, the use of functions of this package must be fully qualified with PoissonBinomial::, e.g. PoissonBinomial::dpb_dc instead of dpb_dc

Important Remarks

For better performance, the PMFs and CDFs do not check any of their parameters for plausibility! This must be done by the user by means of R or C/C++ functions. It must be made sure that

  • the observations in the obs vectors are valid,
  • the probabilities in the probs vector are in \((0, 1)\) and
  • for dpb_gmba, ppb_gmba, dpb_na, ppb_na, dgpb_na and pgpb_na: the probabilities in the probs vector must not contain zeros or ones.

Furthermore, the CDFs only compute non-logarithmic probabilities. If logarithms are needed, they must be computed “manually”.

PoissonBinomial/inst/doc/intro.Rmd0000644000176200001440000003760713757216224016726 0ustar liggesusers--- title: "Efficient Computation of Ordinary and Generalized Poisson Binomial Distributions" output: rmarkdown::html_vignette: toc: yes vignette: > %\VignetteIndexEntry{Efficient Computation of Ordinary and Generalized Poisson Binomial Distributions} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup, echo = FALSE} library(PoissonBinomial) ``` ## Introduction The Poisson binomial distribution (in the following abbreviated as PBD) is becoming increasingly important, especially in the areas of statistics, finance, insurance mathematics and quality management. This package provides functions for two types of PBDs: ordinary and generalized PBDs (henceforth referred to as O-PBDs and G-PBDs). ### Ordinary Poisson Binomial Distribution The O-PBD is the distribution of the sum of a number $n$ of independent Bernoulli-distributed random indicators $X_i \in \{0, 1\}$ $(i = 1, ..., n)$: $$X := \sum_{i = 1}^{n}{X_i}.$$ Each of the $X_i$ possesses a predefined probability of success $p_i := P(X_i = 1)$ (subsequently $P(X_i = 0) = 1 - p_i =: q_i$). With this, mean, variance and skewness can be expressed as $$E(X) = \sum_{i = 1}^{n}{p_i} \quad \quad Var(X) = \sum_{i = 1}^{n}{p_i q_i} \quad \quad Skew(X) = \frac{\sum_{i = 1}^{n}{p_i q_i(q_i - p_i)}}{\sqrt{Var(X)}^3}.$$ All possible observations are in $\{0, ..., n\}$. ### Generalized Poisson Binomial Distribution The G-PBD is defined very similar. Again, it is the distribution of a sum random variables, but here, each $X_i \in \{u_i, v_i\}$ with $P(X_i = u_i) =: p_i$ and $P(X_i = v_i) = 1 - p_i =: q_i$. Using ordinary Bernoulli-distributed random variables $Y_i$, $X_i$ can be expressed as $X_i = u_i Y_i + v_i(1 - Y_i) = v_i + Y_i \cdot (u_i - v_i)$. As a result, mean, variance and skewness are given by $$E(X) = \sum_{i = 1}^{n}{v_i} + \sum_{i = 1}^{n}{p_i (u_i - v_i)} \quad \quad Var(X) = \sum_{i = 1}^{n}{p_i q_i(u_i - v_i)^2} \\Skew(X) = \frac{\sum_{i = 1}^{n}{p_i q_i(q_i - p_i)(u_i - v_i)^3}}{\sqrt{Var(X)}^3}.$$ All possible observations are in $\{U, ..., V\}$ with $U := \sum_{i = 1}^{n}{\min\{u_i, v_i\}}$ and $V := \sum_{i = 1}^{n}{\max\{u_i, v_i\}}$. Note that the size $m := V - U$ of the distribution does not generally equal $n$! ### Existing R Packages Computing these distributions exactly is computationally demanding, but in the last few years, some efficient algorithms have been developed. Particularly significant in this respect are the works of [Hong (2013)](http://dx.doi.org/10.1016/j.csda.2012.10.006), who derived the DFT-CF procedure for O-PBDs, [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007) who developed two immensely faster algorithms for O-PBDs, namely the DC and DC-FFT procedures, and [Zhang, Hong and Balakrishnan (2018)](https://doi.org/10.1080/00949655.2018.1440294) who further developed [Hong's (2013)](http://dx.doi.org/10.1016/j.csda.2012.10.006) DFT-CF algorithm for G-PBDs (in the following, this generalized procedure is referred to as G-DFT-CF). Still, only a few R packages exist for the calculation of either ordinary and generalized PBDs, e.g. [`poibin`](https://cran.r-project.org/package=poibin) and [`poisbinom`](https://cran.r-project.org/package=poisbinom) for O-PBDs and [`GPB`](https://cran.r-project.org/package=GPB) for G-PDBs. Before the release of this `PoissonBinomial` package, there has been no R package that implemented the DC and DC-FFT algorithms of [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), as they only published a [reference implementation](https://github.com/biscarri1/convpoibin) for R, but refrained from releasing it as a package. Additionally, there are no comparable approaches for G-PBDs to date. The `poibin` package implements the DFT-CF algorithm along with the exact recursive method of [Barlow & Heidtmann (1984)](http://dx.doi.org/10.1109/TR.1984.5221843) and Normal and Poisson approximations. However, both exact procedures of this package possess some disadvantages, i.e. they are relatively slow at computing very large distributions, with the recursive algorithm being also very memory consuming. The G-DFT-CF procedure is implemented in the `GPB` package and inherits this performance drawback. The `poisbinom` package provides a more efficient and much faster DFT-CF implementation. The performance improvement over the `poibin` package lies in the use of the [FFTW C library](http://www.fftw.org). Unfortunately, it sometimes yields some negative probabilities in the tail regions, especially for large distributions. However, this numerical issue has not been addressed to date. This `PoissonBinomial` also utilizes FFTW for both DFT-CF and G-DFT-CF algorithms, but corrects that issue. In addition to the disadvantages regarding computational speed (`poibin` and `GPB`) or numerics (`poisbinom`), especially for very large distributions, the aforementioned packages do not provide headers for their internal C/C++ functions, so that they cannot be imported directly by C or C++ code of other packages that use for example `Rcpp`. In some situations, people might have to deal with Poisson binomial distributions that include Bernoulli variables with $p_i \in \{0, 1\}$. Calculation performance can be further optimized by handling these indicators before the actual computations. Approximations also benefit from this in terms of accuracy. None of the aforementioned packages implements such optimizations. Therefore, the advantages of this `PoissonBinomial` package can be summarized as follows: * Efficient computation of very large distributions with both exact and approximate algorithms for O-PBDs and G-PBDs * Provides headers for the C++ functions so that other packages may include them in their own C++ code * Handles (sometimes large numbers of) 0- and 1-probabilities to speed up performance In total, this package includes 10 different algorithms of computing ordinary Poisson binomial distributions, including optimized versions of the Normal, Refined Normal and Poisson approaches, and 5 approaches for generalized PBDs. In addition, the implementation of the exact recursive procedure for O-PBDs was rewritten so that it is considerably less memory intensive: the `poibin` implementation needs the memory equivalent of $(n + 1)^2$ values of type `double`, while ours only needs $3 \cdot (n + 1)$. *** ## Exact Procedures ### Ordinary Poisson Binomial Distribution In this package implements the following exact algorithms for computing ordinary Poisson binomial distributions: * the *Direct Convolution* approach of [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), * the *Divide & Conquer FFT Tree Convolution* procedure of [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), * the *Discrete Fourier Transformation of the Characteristic Function* algorithm of [Hong (2013)](http://dx.doi.org/10.1016/j.csda.2012.10.006) and * the *Recursive Formula* of [Barlow & Heidtmann (1984)](http://dx.doi.org/10.1109/TR.1984.5221843). ### Generalized Poisson Binomial Distribution For generalized Poisson binomial distributions, this package provides: * a generalized adaptation of the *Direct Convolution* approach of [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007), * a generalized *Divide & Conquer FFT Tree Convolution*, inspired by the respective procedure of [Biscarri, Zhao & Brunner (2018)](http://dx.doi.org/10.1016/j.csda.2018.01.007) for O-PDBs, * the *Generalized Discrete Fourier Transformation of the Characteristic Function* algorithm of [Zhang, Hong and Balakrishnan (2018)](https://doi.org/10.1080/00949655.2018.1440294). ### Examples Examples and performance comparisons of these procedures are presented in a [separate vignette](proc_exact.html). *** ## Approximations ### Ordinary Poisson Binomial Distribution In addition, the following O-PBD approximation methods are included: * the *Poisson Approximation* approach, * the *Arithmetic Mean Binomial Approximation* procedure, * *Geometric Mean Binomial Approximation* algorithms, * the *Normal Approximation* and * the *Refined Normal Approximation*. ### Generalized Poisson Binomial Distribution For G-PBDs, there are * the *Normal Approximation* and * the *Refined Normal Approximation*. ### Examples Examples and performance comparisons of these approaches are provided in a [separate vignette](proc_approx.html) as well. *** ## Handling special cases, zeros and ones Handling special cases, such as ordinary binomial distributions, zeros and ones is useful to speed up performance. Unfortunately, some approximations do not work well for Bernoulli trials with $p_i \in \{0, 1\}$, e.g. the Geometric Mean Binomial Approximations. This is why handling these values *before* the actual computation of the distribution is not only a performance tweak, but sometimes even a necessity. It is achieved by some simple preliminary considerations. ### Ordinary Poisson Binomial Distributions 1. All $p_i = p$ are equal? In this case, we have a usual binomial distribution. The specified method of computation is then ignored. In particular, the following applies: a) $p = 0$: The only observable value is $0$, i.e. $P(X = 0) = 1$ and $P(X \neq 0) = 0$. b) $p = 1$: The only observable value is $n$, i.e. $P(X = n) = 1$ and $P(X \neq n) = 0$. 2. All $p_i \in \{0, 1\} (i = 1, ..., n)$? If one $p_i$ is 1, it is impossible to measure 0 successes. Following the same logic, if two $p_i$ are 1, we cannot observe 0 and 1 successes and so on. In general, a number of $n_1$ values with $p_i = 1$ makes it impossible to measure $0, ..., n_1 - 1$ successes. Likewise, if there are $n_0$ Bernoulli trials with $p_i = 0$, we cannot observe $n - n_0 + 1, ..., n$ successes. If all $p_i \in \{0, 1\}$, it holds $n = n_0 + n_1$. As a result, the only observable value is $n_1$, i.e. $P(X = n_1) = 1$ and $P(X \neq n_1) = 0$. 3. Are there $p_i \notin \{0, 1\}$? Using the deductions from above, we can only observe an "inner" distribution in the range of $n_1, n_1 + 1, ..., n - n_0$, i.e. $P(X \in \{n_1, ..., n - n_0\}) > 0$ and $P(X < n_1) = P(X > n - n_0) = 0$. As a result, $X$ can be expressed as $X = n_1 + Y$ with $Y \sim PBin(\{p_i|0 < p_i < 1\})$ and $|\{p_i|0 < p_i < 1\}| = n - n_0 - n_1$. Subsequently, the Poisson binomial distribution must only be computed for $Y$. Especially, if there is only one $p_i \notin \{0, 1\}$, $Y$ follows a Bernoulli distribution with parameter $p_i$, i.e. $P(X = n_1) = P(Y = 0) = 1 - p_i$ and $P(X = n_1 + 1) = P(Y = 1) = p_i$. These cases are illustrated in the following example: ```{r ex-opdb} # Case 1 dpbinom(NULL, rep(0.3, 7)) dbinom(0:7, 7, 0.3) # equal results dpbinom(NULL, c(0, 0, 0, 0, 0, 0, 0)) # only 0 is observable dpbinom(0, c(0, 0, 0, 0, 0, 0, 0)) # confirmation dpbinom(NULL, c(1, 1, 1, 1, 1, 1, 1)) # only 7 is observable dpbinom(7, c(1, 1, 1, 1, 1, 1, 1)) # confirmation # Case 2 dpbinom(NULL, c(0, 0, 0, 0, 1, 1, 1)) # only 3 is observable dpbinom(3, c(0, 0, 0, 0, 1, 1, 1)) # confirmation # Case 3 dpbinom(NULL, c(0, 0, 0.1, 0.2, 0.4, 0.8, 1)) # only 1-5 are observable dpbinom(1:5, c(0, 0, 0.1, 0.2, 0.4, 0.8, 1)) # confirmation dpbinom(NULL, c(0, 0, 0.4, 1)) # only 1 and 2 are observable dpbinom(1:2, c(0, 0, 0.4, 1)) # confirmation ``` ### Generalized Poisson Binomial Distributions 1. All $u_i \in \{0, 1\}$ and all $v_i = 1 - u_i$? Then, it is an ordinary Poisson binomial distribution with parameters $p_i' = p_i$ for all $i$ for which $u_i = 1$ and $p_i' = 1 - p_i$ otherwise. This includes all the special cases described above. 2. All $u_i = u$ are equal and all $v_i = v$ are equal? In this case, we have a linearly transformed ordinary Poisson binomial distribution, i.e. $X$ can be expressed as $X = uY + v(n - Y)$ with $Y \sim PBin(p_1, ..., p_n)$. In particular, if all $p_i = p$ are also the same, we have a linear transformation of the usual binomial distribution, i.e. $X = uZ + v(n - Z)$ with $Z \sim Bin(n, p)$. Summarizing this, the following applies: a) All $p_i = 0$: The only observable value is $n \cdot v$, i.e. $P(X = n \cdot v) = 1$ and $P(X \neq n \cdot v) = 0$. b) All $p_i = 1$: The only observable value is $n \cdot u$, i.e. $P(X = n \cdot u) = 1$ and $P(X \neq n \cdot u) = 0$. c) All $p_i = p$: Observable values are in $\{u \cdot k + v \cdot (n - k) | k = 0, ..., n\}$ and $P(X = u \cdot k + v \cdot (n - k)) = P(Z = k)$. d) Otherwise: Observable values are in $\{u \cdot k + v \cdot (n - k) | k = 0, ..., n\})$ and $P(X = u \cdot k + v(n - k)) = P(Y = k)$ 3. All $p_i \in \{0, 1\}$? Let $I = \{i\, |\, p_i = 1\} \subseteq \{1, ..., n\}$ and $J = \{i\, |\, p_i = 0\} \subseteq \{1, ..., n\}$. Then, we have: a) All $p_i = 0$: The only observable value is $v^* := \sum_{i = 1}^{n}{v_i}$, i.e. $P(X = v^*) = 1$ and $P(X \neq v^*) = 0$. b) All $p_i = 1$: The only observable value is $u^* := \sum_{i = 1}^{n}{u_i}$, i.e. $P(X = u^*) = 1$ and $P(X \neq u^*) = 0$. c) Otherwise, The only observable value is $w^* := \sum_{i \in I}{u_i} + \sum_{i \in J}{v_i}$, i.e. $P(X = w^*) = 1$ and $P(X \neq w^*) = 0$. Note that the case that any $u_i = v_i$ is equivalent to $p_i = 1$, because the corresponding random variable $X_i$ has always the same (non-random) value. 4. Are there $p_i \notin \{0, 1\}$? Let $I$, $J$ and $w^*$ as above and $K = \{i\, |\, p_i > 0 \, \wedge p_i < 1\} \subseteq \{1, ..., n\}$. Then, $X$ can be expressed as $X = w^* + Z$ with $Z = \sum_{i \in K}{X_i}$ following a (reduced) generalized Poisson Bernoulli distribution. In particular, if only one $p_i \notin \{0, 1\}$, Z follows a linearly transformed Bernoulli distribution. These cases are illustrated in the following example: ```{r ex-gpdb} set.seed(1) pp <- runif(7) va <- sample(0:6, 7, TRUE) vb <- sample(0:6, 7, TRUE) # Case 1 dgpbinom(NULL, pp, rep(1, 7), rep(0, 7)) dpbinom(NULL, pp) # equal results dgpbinom(NULL, pp, rep(0, 7), rep(1, 7)) dpbinom(NULL, 1 - pp) # equal results dgpbinom(NULL, pp, c(rep(1, 3), rep(0, 4)), c(rep(0, 3), rep(1, 4))) dpbinom(NULL, c(pp[1:3], 1 - pp[4:7])) # reorder for 0 and 1; equal results # Case 2 a) dgpbinom(NULL, rep(0, 7), rep(4, 7), rep(2, 7)) # only 14 is observable dgpbinom(7 * 2, rep(0, 7), rep(4, 7), rep(2, 7)) # confirmation # Case 2 b) dgpbinom(NULL, rep(1, 7), rep(4, 7), rep(2, 7)) # only 28 is observable dgpbinom(7 * 4, rep(1, 7), rep(4, 7), rep(2, 7)) # confirmation # Case 2 c) dgpbinom(NULL, rep(0.3, 7), rep(4, 7), rep(2, 7)) dbinom(0:7, 7, 0.3) # equal results, but on different support set # Case 2 d) dgpbinom(NULL, pp, rep(4, 7), rep(2, 7)) dpbinom(NULL, pp) # equal results, but on different support set # Case 3 a) dgpbinom(NULL, c(0, 0, 0, 0, 0, 0, 0), va, vb) # only sum(vb) is observable dgpbinom(sum(vb), rep(0, 7), va, vb) # confirmation # Case 3 b) dgpbinom(NULL, c(1, 1, 1, 1, 1, 1, 1), va, vb) # only sum(va) is observable dgpbinom(sum(va), rep(1, 7), va, vb) # confirmation # Case 3 c) dgpbinom(NULL, c(0, 0, 0, 1, 1, 1, 1), va, vb) # only sum(va[4:7], vb[1:3]) is observable dgpbinom(sum(va[4:7], vb[1:3]), c(0, 0, 0, 1, 1, 1, 1), va, vb) # confirmation # Case 4 dgpbinom(NULL, c(0, 0, 0.3, 0.6, 1, 1, 1), va, vb) sure <- sum(va[5:7], vb[1:2]) x.transf <- sum(pmin(va[3:4], vb[3:4])):sum(pmax(va[3:4], vb[3:4])) dgpbinom(sure + x.transf, c(0, 0, 0.3, 0.6, 1, 1, 1), va, vb) dgpbinom(x.transf, c(0.3, 0.6), va[3:4], vb[3:4]) # equal results dgpbinom(NULL, c(0, 0, 0, 0.6, 1, 1, 1), va, vb) sure <- sum(va[5:7], vb[1:3]) x.transf <- va[4]:vb[4] dgpbinom(sure + x.transf, c(0, 0, 0, 0.6, 1, 1, 1), va, vb) dgpbinom(x.transf, 0.6, va[4], vb[4]) # equal results; essentially transformed Bernoulli ``` *** ## Usage with Rcpp How to import and use the internal C++ functions in `Rcpp` based packages is described in a [separate vignette](use_with_rcpp.html).PoissonBinomial/inst/doc/use_with_rcpp.R0000644000176200001440000000022514077633372020112 0ustar liggesusers## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) PoissonBinomial/inst/doc/proc_approx.R0000644000176200001440000002174714077633301017577 0ustar liggesusers## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, echo = FALSE------------------------------------------------------ library(PoissonBinomial) ## ----pa1---------------------------------------------------------------------- set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Poisson") ppbinom(NULL, pp, wt, "Poisson") ## ----pa2---------------------------------------------------------------------- set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "Poisson") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Poisson") - dpbinom(NULL, pp)) # U(0, 0.01) random probabilities of success pp <- runif(20, 0, 0.01) dpbinom(NULL, pp, method = "Poisson") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Poisson") - dpbinom(NULL, pp)) ## ----am1---------------------------------------------------------------------- set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) mean(rep(pp, wt)) dpbinom(NULL, pp, wt, "Mean") ppbinom(NULL, pp, wt, "Mean") ## ----am2---------------------------------------------------------------------- set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "Mean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp)) # U(0.3, 0.5) random probabilities of success pp <- runif(20, 0.3, 0.5) dpbinom(NULL, pp, method = "Mean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp)) # U(0.39, 0.41) random probabilities of success pp <- runif(20, 0.39, 0.41) dpbinom(NULL, pp, method = "Mean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp)) ## ----gma1--------------------------------------------------------------------- set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) prod(rep(pp, wt))^(1/sum(wt)) dpbinom(NULL, pp, wt, "GeoMean") ppbinom(NULL, pp, wt, "GeoMean") ## ----gma2--------------------------------------------------------------------- set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "GeoMean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp)) # U(0.4, 0.6) random probabilities of success pp <- runif(20, 0.4, 0.6) dpbinom(NULL, pp, method = "GeoMean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp)) # U(0.49, 0.51) random probabilities of success pp <- runif(20, 0.49, 0.51) dpbinom(NULL, pp, method = "GeoMean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp)) ## ----gmb1--------------------------------------------------------------------- set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) 1 - prod(1 - rep(pp, wt))^(1/sum(wt)) dpbinom(NULL, pp, wt, "GeoMeanCounter") ppbinom(NULL, pp, wt, "GeoMeanCounter") ## ----gmb2--------------------------------------------------------------------- set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "GeoMeanCounter") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp)) # U(0.4, 0.6) random probabilities of success pp <- runif(20, 0.4, 0.6) dpbinom(NULL, pp, method = "GeoMeanCounter") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp)) # U(0.49, 0.51) random probabilities of success pp <- runif(20, 0.49, 0.51) dpbinom(NULL, pp, method = "GeoMeanCounter") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp)) ## ----na1-ord------------------------------------------------------------------ set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Normal") ppbinom(NULL, pp, wt, "Normal") ## ----na2-ord------------------------------------------------------------------ set.seed(1) # 10 random probabilities of success pp <- runif(10) dpn <- dpbinom(NULL, pp, method = "Normal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) dpn <- dpbinom(NULL, pp, method = "Normal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100000 random probabilities of success pp <- runif(100000) dpn <- dpbinom(NULL, pp, method = "Normal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ## ----rna1-ord----------------------------------------------------------------- set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "RefinedNormal") ppbinom(NULL, pp, wt, "RefinedNormal") ## ----rna2-ord----------------------------------------------------------------- set.seed(1) # 10 random probabilities of success pp <- runif(10) dpn <- dpbinom(NULL, pp, method = "RefinedNormal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) dpn <- dpbinom(NULL, pp, method = "RefinedNormal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100000 random probabilities of success pp <- runif(100000) dpn <- dpbinom(NULL, pp, method = "RefinedNormal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ## ----benchmark-ord------------------------------------------------------------ library(microbenchmark) set.seed(1) f1 <- function() dpbinom(NULL, runif(4000), method = "Normal") f2 <- function() dpbinom(NULL, runif(4000), method = "RefinedNormal") f3 <- function() dpbinom(NULL, runif(4000), method = "Poisson") f4 <- function() dpbinom(NULL, runif(4000), method = "Mean") f5 <- function() dpbinom(NULL, runif(4000), method = "GeoMean") f6 <- function() dpbinom(NULL, runif(4000), method = "GeoMeanCounter") f7 <- function() dpbinom(NULL, runif(4000), method = "DivideFFT") microbenchmark(f1(), f2(), f3(), f4(), f5(), f6(), f7(), times = 51) ## ----na1-gen------------------------------------------------------------------ set.seed(2) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "Normal") pgpbinom(NULL, pp, va, vb, wt, "Normal") ## ----na2-gen------------------------------------------------------------------ set.seed(2) # 10 random probabilities of success pp <- runif(10) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100 random probabilities of success pp <- runif(100) va <- sample(0:100, 100, TRUE) vb <- sample(0:100, 100, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) va <- sample(0:1000, 1000, TRUE) vb <- sample(0:1000, 1000, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ## ----rna1-gen----------------------------------------------------------------- set.seed(2) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "RefinedNormal") pgpbinom(NULL, pp, va, vb, wt, "RefinedNormal") ## ----rna2-gen----------------------------------------------------------------- set.seed(2) # 10 random probabilities of success pp <- runif(10) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100 random probabilities of success pp <- runif(100) va <- sample(0:100, 100, TRUE) vb <- sample(0:100, 100, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) va <- sample(0:1000, 1000, TRUE) vb <- sample(0:1000, 1000, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ## ----benchmark-gen------------------------------------------------------------ library(microbenchmark) n <- 1500 set.seed(2) va <- sample(1:50, n, TRUE) vb <- sample(1:50, n, TRUE) f1 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Normal") f2 <- function() dgpbinom(NULL, runif(n), va, vb, method = "RefinedNormal") f3 <- function() dgpbinom(NULL, runif(n), va, vb, method = "DivideFFT") microbenchmark(f1(), f2(), f3(), times = 51) PoissonBinomial/inst/doc/proc_approx.Rmd0000644000176200001440000003351114022377771020116 0ustar liggesusers--- title: "Approximate Procedures" output: rmarkdown::html_vignette: toc: yes vignette: > %\VignetteIndexEntry{Approximate Procedures} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup, echo = FALSE} library(PoissonBinomial) ``` ## Ordinary Poisson Binomial Distribution ### Poisson Approximation The *Poisson Approximation* (DC) approach is requested with `method = "Poisson"`. It is based on a Poisson distribution, whose parameter is the sum of the probabilities of success. ```{r pa1} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Poisson") ppbinom(NULL, pp, wt, "Poisson") ``` A comparison with exact computation shows that the approximation quality of the PA procedure increases with smaller probabilities of success. The reason is that the Poisson Binomial distribution approaches a Poisson distribution when the probabilities are very small. ```{r pa2} set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "Poisson") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Poisson") - dpbinom(NULL, pp)) # U(0, 0.01) random probabilities of success pp <- runif(20, 0, 0.01) dpbinom(NULL, pp, method = "Poisson") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Poisson") - dpbinom(NULL, pp)) ``` ### Arithmetic Mean Binomial Approximation The *Arithmetic Mean Binomial Approximation* (AMBA) approach is requested with `method = "Mean"`. It is based on a Binomial distribution, whose parameter is the arithmetic mean of the probabilities of success. ```{r am1} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) mean(rep(pp, wt)) dpbinom(NULL, pp, wt, "Mean") ppbinom(NULL, pp, wt, "Mean") ``` A comparison with exact computation shows that the approximation quality of the AMBA procedure increases when the probabilities of success are closer to each other. The reason is that, although the expectation remains unchanged, the distribution's variance becomes smaller the less the probabilities differ. Since this variance is minimized by equal probabilities (but still underestimated), the AMBA method is best suited for situations with very similar probabilities of success. ```{r am2} set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "Mean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp)) # U(0.3, 0.5) random probabilities of success pp <- runif(20, 0.3, 0.5) dpbinom(NULL, pp, method = "Mean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp)) # U(0.39, 0.41) random probabilities of success pp <- runif(20, 0.39, 0.41) dpbinom(NULL, pp, method = "Mean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "Mean") - dpbinom(NULL, pp)) ``` ### Geometric Mean Binomial Approximation - Variant A The *Geometric Mean Binomial Approximation (Variant A)* (GMBA-A) approach is requested with `method = "GeoMean"`. It is based on a Binomial distribution, whose parameter is the geometric mean of the probabilities of success: $$\hat{p} = \sqrt[n]{p_1 \cdot ... \cdot p_n}$$ ```{r gma1} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) prod(rep(pp, wt))^(1/sum(wt)) dpbinom(NULL, pp, wt, "GeoMean") ppbinom(NULL, pp, wt, "GeoMean") ``` It is known that the geometric mean of the probabilities of success is always smaller than their arithmetic mean. Thus, we get a stochastically *smaller* binomial distribution. A comparison with exact computation shows that the approximation quality of the GMBA-A procedure increases when the probabilities of success are closer to each other: ```{r gma2} set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "GeoMean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp)) # U(0.4, 0.6) random probabilities of success pp <- runif(20, 0.4, 0.6) dpbinom(NULL, pp, method = "GeoMean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp)) # U(0.49, 0.51) random probabilities of success pp <- runif(20, 0.49, 0.51) dpbinom(NULL, pp, method = "GeoMean") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMean") - dpbinom(NULL, pp)) ``` ### Geometric Mean Binomial Approximation - Variant B The *Geometric Mean Binomial Approximation (Variant B)* (GMBA-B) approach is requested with `method = "GeoMeanCounter"`. It is based on a Binomial distribution, whose parameter is 1 minus the geometric mean of the probabilities of **failure**: $$\hat{p} = 1 - \sqrt[n]{(1 - p_1) \cdot ... \cdot (1 - p_n)}$$ ```{r gmb1} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) 1 - prod(1 - rep(pp, wt))^(1/sum(wt)) dpbinom(NULL, pp, wt, "GeoMeanCounter") ppbinom(NULL, pp, wt, "GeoMeanCounter") ``` It is known that the geometric mean of the probabilities of **failure** is always smaller than their arithmetic mean. As a result, 1 minus the geometric mean is larger than 1 minus the arithmetic mean. Thus, we get a stochastically *larger* binomial distribution. A comparison with exact computation shows that the approximation quality of the GMBA-B procedure again increases when the probabilities of success are closer to each other: ```{r gmb2} set.seed(1) # U(0, 1) random probabilities of success pp <- runif(20) dpbinom(NULL, pp, method = "GeoMeanCounter") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp)) # U(0.4, 0.6) random probabilities of success pp <- runif(20, 0.4, 0.6) dpbinom(NULL, pp, method = "GeoMeanCounter") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp)) # U(0.49, 0.51) random probabilities of success pp <- runif(20, 0.49, 0.51) dpbinom(NULL, pp, method = "GeoMeanCounter") dpbinom(NULL, pp) summary(dpbinom(NULL, pp, method = "GeoMeanCounter") - dpbinom(NULL, pp)) ``` ### Normal Approximation The *Normal Approximation* (NA) approach is requested with `method = "Normal"`. It is based on a Normal distribution, whose parameters are derived from the theoretical mean and variance of the input probabilities of success. ```{r na1-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "Normal") ppbinom(NULL, pp, wt, "Normal") ``` A comparison with exact computation shows that the approximation quality of the NA procedure increases with larger numbers of probabilities of success: ```{r na2-ord} set.seed(1) # 10 random probabilities of success pp <- runif(10) dpn <- dpbinom(NULL, pp, method = "Normal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) dpn <- dpbinom(NULL, pp, method = "Normal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100000 random probabilities of success pp <- runif(100000) dpn <- dpbinom(NULL, pp, method = "Normal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ``` ### Refined Normal Approximation The *Refined Normal Approximation* (RNA) approach is requested with `method = "RefinedNormal"`. It is based on a Normal distribution, whose parameters are derived from the theoretical mean, variance and skewness of the input probabilities of success. ```{r rna1-ord} set.seed(1) pp <- runif(10) wt <- sample(1:10, 10, TRUE) dpbinom(NULL, pp, wt, "RefinedNormal") ppbinom(NULL, pp, wt, "RefinedNormal") ``` A comparison with exact computation shows that the approximation quality of the RNA procedure increases with larger numbers of probabilities of success: ```{r rna2-ord} set.seed(1) # 10 random probabilities of success pp <- runif(10) dpn <- dpbinom(NULL, pp, method = "RefinedNormal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) dpn <- dpbinom(NULL, pp, method = "RefinedNormal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100000 random probabilities of success pp <- runif(100000) dpn <- dpbinom(NULL, pp, method = "RefinedNormal") dpd <- dpbinom(NULL, pp) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ``` ### Processing Speed Comparisons To assess the performance of the approximation procedures, we use the `microbenchmark` package. Each algorithm has to calculate the PMF repeatedly based on random probability vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2). ```{r benchmark-ord} library(microbenchmark) set.seed(1) f1 <- function() dpbinom(NULL, runif(4000), method = "Normal") f2 <- function() dpbinom(NULL, runif(4000), method = "RefinedNormal") f3 <- function() dpbinom(NULL, runif(4000), method = "Poisson") f4 <- function() dpbinom(NULL, runif(4000), method = "Mean") f5 <- function() dpbinom(NULL, runif(4000), method = "GeoMean") f6 <- function() dpbinom(NULL, runif(4000), method = "GeoMeanCounter") f7 <- function() dpbinom(NULL, runif(4000), method = "DivideFFT") microbenchmark(f1(), f2(), f3(), f4(), f5(), f6(), f7(), times = 51) ``` Clearly, the NA procedure is the fastest, followed by the RNA and PA methods. The next fastest algorithms are AMBA, GMBA-A and GMBA-B. They exhibit almost equal mean execution speed, with the AMBA algorithm being slightly faster. All of the approximation procedures outperform the fastest exact approach, DC-FFT, by far. ## Generalized Poisson Binomial Distribution ### Generalized Normal Approximation The *Generalized Normal Approximation* (G-NA) approach is requested with `method = "Normal"`. It is based on a Normal distribution, whose parameters are derived from the theoretical mean and variance of the input probabilities of success (see [Introduction](intro.html). ```{r na1-gen} set.seed(2) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "Normal") pgpbinom(NULL, pp, va, vb, wt, "Normal") ``` A comparison with exact computation shows that the approximation quality of the NA procedure increases with larger numbers of probabilities of success: ```{r na2-gen} set.seed(2) # 10 random probabilities of success pp <- runif(10) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100 random probabilities of success pp <- runif(100) va <- sample(0:100, 100, TRUE) vb <- sample(0:100, 100, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) va <- sample(0:1000, 1000, TRUE) vb <- sample(0:1000, 1000, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "Normal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ``` ### Generalized Refined Normal Approximation The *Generalized Refined Normal Approximation* (G-RNA) approach is requested with `method = "RefinedNormal"`. It is based on a Normal distribution, whose parameters are derived from the theoretical mean, variance and skewness of the input probabilities of success. ```{r rna1-gen} set.seed(2) pp <- runif(10) wt <- sample(1:10, 10, TRUE) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dgpbinom(NULL, pp, va, vb, wt, "RefinedNormal") pgpbinom(NULL, pp, va, vb, wt, "RefinedNormal") ``` A comparison with exact computation shows that the approximation quality of the RNA procedure increases with larger numbers of probabilities of success: ```{r rna2-gen} set.seed(2) # 10 random probabilities of success pp <- runif(10) va <- sample(0:10, 10, TRUE) vb <- sample(0:10, 10, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 100 random probabilities of success pp <- runif(100) va <- sample(0:100, 100, TRUE) vb <- sample(0:100, 100, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) # 1000 random probabilities of success pp <- runif(1000) va <- sample(0:1000, 1000, TRUE) vb <- sample(0:1000, 1000, TRUE) dpn <- dgpbinom(NULL, pp, va, vb, method = "RefinedNormal") dpd <- dgpbinom(NULL, pp, va, vb) idx <- which(dpn != 0 & dpd != 0) summary((dpn - dpd)[idx]) ``` ### Processing Speed Comparisons To assess the performance of the approximation procedures, we use the `microbenchmark` package. Each algorithm has to calculate the PMF repeatedly based on random probability vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2). ```{r benchmark-gen} library(microbenchmark) n <- 1500 set.seed(2) va <- sample(1:50, n, TRUE) vb <- sample(1:50, n, TRUE) f1 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Normal") f2 <- function() dgpbinom(NULL, runif(n), va, vb, method = "RefinedNormal") f3 <- function() dgpbinom(NULL, runif(n), va, vb, method = "DivideFFT") microbenchmark(f1(), f2(), f3(), times = 51) ``` Clearly, the G-NA procedure is the fastest, followed by the G-RNA method. Both are hugely faster than G-DC-FFT.PoissonBinomial/inst/doc/intro.R0000644000176200001440000000604514077633273016400 0ustar liggesusers## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, echo = FALSE------------------------------------------------------ library(PoissonBinomial) ## ----ex-opdb------------------------------------------------------------------ # Case 1 dpbinom(NULL, rep(0.3, 7)) dbinom(0:7, 7, 0.3) # equal results dpbinom(NULL, c(0, 0, 0, 0, 0, 0, 0)) # only 0 is observable dpbinom(0, c(0, 0, 0, 0, 0, 0, 0)) # confirmation dpbinom(NULL, c(1, 1, 1, 1, 1, 1, 1)) # only 7 is observable dpbinom(7, c(1, 1, 1, 1, 1, 1, 1)) # confirmation # Case 2 dpbinom(NULL, c(0, 0, 0, 0, 1, 1, 1)) # only 3 is observable dpbinom(3, c(0, 0, 0, 0, 1, 1, 1)) # confirmation # Case 3 dpbinom(NULL, c(0, 0, 0.1, 0.2, 0.4, 0.8, 1)) # only 1-5 are observable dpbinom(1:5, c(0, 0, 0.1, 0.2, 0.4, 0.8, 1)) # confirmation dpbinom(NULL, c(0, 0, 0.4, 1)) # only 1 and 2 are observable dpbinom(1:2, c(0, 0, 0.4, 1)) # confirmation ## ----ex-gpdb------------------------------------------------------------------ set.seed(1) pp <- runif(7) va <- sample(0:6, 7, TRUE) vb <- sample(0:6, 7, TRUE) # Case 1 dgpbinom(NULL, pp, rep(1, 7), rep(0, 7)) dpbinom(NULL, pp) # equal results dgpbinom(NULL, pp, rep(0, 7), rep(1, 7)) dpbinom(NULL, 1 - pp) # equal results dgpbinom(NULL, pp, c(rep(1, 3), rep(0, 4)), c(rep(0, 3), rep(1, 4))) dpbinom(NULL, c(pp[1:3], 1 - pp[4:7])) # reorder for 0 and 1; equal results # Case 2 a) dgpbinom(NULL, rep(0, 7), rep(4, 7), rep(2, 7)) # only 14 is observable dgpbinom(7 * 2, rep(0, 7), rep(4, 7), rep(2, 7)) # confirmation # Case 2 b) dgpbinom(NULL, rep(1, 7), rep(4, 7), rep(2, 7)) # only 28 is observable dgpbinom(7 * 4, rep(1, 7), rep(4, 7), rep(2, 7)) # confirmation # Case 2 c) dgpbinom(NULL, rep(0.3, 7), rep(4, 7), rep(2, 7)) dbinom(0:7, 7, 0.3) # equal results, but on different support set # Case 2 d) dgpbinom(NULL, pp, rep(4, 7), rep(2, 7)) dpbinom(NULL, pp) # equal results, but on different support set # Case 3 a) dgpbinom(NULL, c(0, 0, 0, 0, 0, 0, 0), va, vb) # only sum(vb) is observable dgpbinom(sum(vb), rep(0, 7), va, vb) # confirmation # Case 3 b) dgpbinom(NULL, c(1, 1, 1, 1, 1, 1, 1), va, vb) # only sum(va) is observable dgpbinom(sum(va), rep(1, 7), va, vb) # confirmation # Case 3 c) dgpbinom(NULL, c(0, 0, 0, 1, 1, 1, 1), va, vb) # only sum(va[4:7], vb[1:3]) is observable dgpbinom(sum(va[4:7], vb[1:3]), c(0, 0, 0, 1, 1, 1, 1), va, vb) # confirmation # Case 4 dgpbinom(NULL, c(0, 0, 0.3, 0.6, 1, 1, 1), va, vb) sure <- sum(va[5:7], vb[1:2]) x.transf <- sum(pmin(va[3:4], vb[3:4])):sum(pmax(va[3:4], vb[3:4])) dgpbinom(sure + x.transf, c(0, 0, 0.3, 0.6, 1, 1, 1), va, vb) dgpbinom(x.transf, c(0.3, 0.6), va[3:4], vb[3:4]) # equal results dgpbinom(NULL, c(0, 0, 0, 0.6, 1, 1, 1), va, vb) sure <- sum(va[5:7], vb[1:3]) x.transf <- va[4]:vb[4] dgpbinom(sure + x.transf, c(0, 0, 0, 0.6, 1, 1, 1), va, vb) dgpbinom(x.transf, 0.6, va[4], vb[4]) # equal results; essentially transformed Bernoulli PoissonBinomial/inst/doc/proc_exact.html0000644000176200001440000033213414077633372020140 0ustar liggesusers Exact Procedures

Exact Procedures

Ordinary Poisson Binomial Distribution

Direct Convolution

The Direct Convolution (DC) approach is requested with method = "Convolve".

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)

dpbinom(NULL, pp, wt, "Convolve")
#>  [1] 3.574462e-35 1.120280e-32 1.685184e-30 1.620524e-28 1.119523e-26
#>  [6] 5.920060e-25 2.493263e-23 8.591850e-22 2.470125e-20 6.011429e-19
#> [11] 1.252345e-17 2.253115e-16 3.525477e-15 4.825171e-14 5.803728e-13
#> [16] 6.158735e-12 5.784692e-11 4.822437e-10 3.576566e-09 2.364563e-08
#> [21] 1.395965e-07 7.370448e-07 3.484836e-06 1.477208e-05 5.619632e-05
#> [26] 1.920240e-04 5.897928e-04 1.629272e-03 4.049768e-03 9.060183e-03
#> [31] 1.824629e-02 3.307754e-02 5.396724e-02 7.921491e-02 1.045505e-01
#> [36] 1.239854e-01 1.319896e-01 1.259938e-01 1.077029e-01 8.232174e-02
#> [41] 5.616422e-02 3.413623e-02 1.844304e-02 8.835890e-03 3.743554e-03
#> [46] 1.398320e-03 4.589049e-04 1.318064e-04 3.298425e-05 7.154649e-06
#> [51] 1.337083e-06 2.137543e-07 2.898296e-08 3.298587e-09 3.110922e-10
#> [56] 2.392070e-11 1.468267e-12 6.991155e-14 2.478218e-15 6.130807e-17
#> [61] 9.411166e-19 6.727527e-21
ppbinom(NULL, pp, wt, "Convolve")
#>  [1] 3.574462e-35 1.123854e-32 1.696423e-30 1.637488e-28 1.135898e-26
#>  [6] 6.033650e-25 2.553600e-23 8.847210e-22 2.558597e-20 6.267289e-19
#> [11] 1.315018e-17 2.384617e-16 3.763939e-15 5.201565e-14 6.323884e-13
#> [16] 6.791123e-12 6.463805e-11 5.468818e-10 4.123448e-09 2.776908e-08
#> [21] 1.673656e-07 9.044104e-07 4.389247e-06 1.916133e-05 7.535765e-05
#> [26] 2.673817e-04 8.571745e-04 2.486446e-03 6.536215e-03 1.559640e-02
#> [31] 3.384269e-02 6.692022e-02 1.208875e-01 2.001024e-01 3.046529e-01
#> [36] 4.286383e-01 5.606280e-01 6.866217e-01 7.943246e-01 8.766463e-01
#> [41] 9.328105e-01 9.669468e-01 9.853898e-01 9.942257e-01 9.979692e-01
#> [46] 9.993676e-01 9.998265e-01 9.999583e-01 9.999913e-01 9.999984e-01
#> [51] 9.999998e-01 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [56] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [61] 1.000000e+00 1.000000e+00

Divide & Conquer FFT Tree Convolution

The Divide & Conquer FFT Tree Convolution (DC-FFT) approach is requested with method = "DivideFFT".

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)

dpbinom(NULL, pp, wt, "DivideFFT")
#>  [1] 3.574462e-35 1.120280e-32 1.685184e-30 1.620524e-28 1.119523e-26
#>  [6] 5.920060e-25 2.493263e-23 8.591850e-22 2.470125e-20 6.011429e-19
#> [11] 1.252345e-17 2.253115e-16 3.525477e-15 4.825171e-14 5.803728e-13
#> [16] 6.158735e-12 5.784692e-11 4.822437e-10 3.576566e-09 2.364563e-08
#> [21] 1.395965e-07 7.370448e-07 3.484836e-06 1.477208e-05 5.619632e-05
#> [26] 1.920240e-04 5.897928e-04 1.629272e-03 4.049768e-03 9.060183e-03
#> [31] 1.824629e-02 3.307754e-02 5.396724e-02 7.921491e-02 1.045505e-01
#> [36] 1.239854e-01 1.319896e-01 1.259938e-01 1.077029e-01 8.232174e-02
#> [41] 5.616422e-02 3.413623e-02 1.844304e-02 8.835890e-03 3.743554e-03
#> [46] 1.398320e-03 4.589049e-04 1.318064e-04 3.298425e-05 7.154649e-06
#> [51] 1.337083e-06 2.137543e-07 2.898296e-08 3.298587e-09 3.110922e-10
#> [56] 2.392070e-11 1.468267e-12 6.991155e-14 2.478218e-15 6.130807e-17
#> [61] 9.411166e-19 6.727527e-21
ppbinom(NULL, pp, wt, "DivideFFT")
#>  [1] 3.574462e-35 1.123854e-32 1.696423e-30 1.637488e-28 1.135898e-26
#>  [6] 6.033650e-25 2.553600e-23 8.847210e-22 2.558597e-20 6.267289e-19
#> [11] 1.315018e-17 2.384617e-16 3.763939e-15 5.201565e-14 6.323884e-13
#> [16] 6.791123e-12 6.463805e-11 5.468818e-10 4.123448e-09 2.776908e-08
#> [21] 1.673656e-07 9.044104e-07 4.389247e-06 1.916133e-05 7.535765e-05
#> [26] 2.673817e-04 8.571745e-04 2.486446e-03 6.536215e-03 1.559640e-02
#> [31] 3.384269e-02 6.692022e-02 1.208875e-01 2.001024e-01 3.046529e-01
#> [36] 4.286383e-01 5.606280e-01 6.866217e-01 7.943246e-01 8.766463e-01
#> [41] 9.328105e-01 9.669468e-01 9.853898e-01 9.942257e-01 9.979692e-01
#> [46] 9.993676e-01 9.998265e-01 9.999583e-01 9.999913e-01 9.999984e-01
#> [51] 9.999998e-01 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [56] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [61] 1.000000e+00 1.000000e+00

By design, as proposed by Biscarri, Zhao & Brunner (2018), its results are identical to the DC procedure, if \(n \leq 750\). Thus, differences can be observed for larger \(n > 750\):

set.seed(1)
pp1 <- runif(751)
pp2 <- pp1[1:750]

sum(abs(dpbinom(NULL, pp2, method = "DivideFFT") - dpbinom(NULL, pp2, method = "Convolve")))
#> [1] 0
sum(abs(dpbinom(NULL, pp1, method = "DivideFFT") - dpbinom(NULL, pp1, method = "Convolve")))
#> [1] 0

The reason is that the DC-FFT method splits the input probs vector into as equally sized parts as possible and computes their distributions separately with the DC approach. The results of the portions are then convoluted by means of the Fast Fourier Transformation. As proposed by Biscarri, Zhao & Brunner (2018), no splitting is done for \(n \leq 750\). In addition, the DC-FFT procedure does not produce probabilities \(\leq 5.55e\text{-}17\), i.e. smaller values are rounded off to 0, if \(n > 750\), whereas the smallest possible result of the DC algorithm is \(\sim 1e\text{-}323\). This is most likely caused by the used FFTW3 library.

set.seed(1)
pp1 <- runif(751)

d1 <- dpbinom(NULL, pp1, method = "DivideFFT")
d2 <- dpbinom(NULL, pp1, method = "Convolve")

min(d1[d1 > 0])
#> [1] 1.635357e-321
min(d2[d2 > 0])
#> [1] 1.635357e-321

Discrete Fourier Transformation of the Characteristic Function

The Discrete Fourier Transformation of the Characteristic Function (DFT-CF) approach is requested with method = "Characteristic".

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)

dpbinom(NULL, pp, wt, "Characteristic")
#>  [1] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#>  [6] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#> [11] 0.000000e+00 2.238353e-16 3.549132e-15 4.829828e-14 5.804377e-13
#> [16] 6.158818e-12 5.784702e-11 4.822438e-10 3.576566e-09 2.364563e-08
#> [21] 1.395965e-07 7.370448e-07 3.484836e-06 1.477208e-05 5.619632e-05
#> [26] 1.920240e-04 5.897928e-04 1.629272e-03 4.049768e-03 9.060183e-03
#> [31] 1.824629e-02 3.307754e-02 5.396724e-02 7.921491e-02 1.045505e-01
#> [36] 1.239854e-01 1.319896e-01 1.259938e-01 1.077029e-01 8.232174e-02
#> [41] 5.616422e-02 3.413623e-02 1.844304e-02 8.835890e-03 3.743554e-03
#> [46] 1.398320e-03 4.589049e-04 1.318064e-04 3.298425e-05 7.154649e-06
#> [51] 1.337083e-06 2.137543e-07 2.898296e-08 3.298587e-09 3.110923e-10
#> [56] 2.392079e-11 1.468354e-12 6.994931e-14 2.513558e-15 0.000000e+00
#> [61] 0.000000e+00 0.000000e+00
ppbinom(NULL, pp, wt, "Characteristic")
#>  [1] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#>  [6] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#> [11] 0.000000e+00 2.238353e-16 3.772968e-15 5.207125e-14 6.325089e-13
#> [16] 6.791327e-12 6.463834e-11 5.468822e-10 4.123448e-09 2.776908e-08
#> [21] 1.673656e-07 9.044104e-07 4.389247e-06 1.916133e-05 7.535765e-05
#> [26] 2.673817e-04 8.571745e-04 2.486446e-03 6.536215e-03 1.559640e-02
#> [31] 3.384269e-02 6.692022e-02 1.208875e-01 2.001024e-01 3.046529e-01
#> [36] 4.286383e-01 5.606280e-01 6.866217e-01 7.943246e-01 8.766463e-01
#> [41] 9.328105e-01 9.669468e-01 9.853898e-01 9.942257e-01 9.979692e-01
#> [46] 9.993676e-01 9.998265e-01 9.999583e-01 9.999913e-01 9.999984e-01
#> [51] 9.999998e-01 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [56] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [61] 1.000000e+00 1.000000e+00

As can be seen, the DFT-CF procedure does not produce probabilities \(\leq 2.22e\text{-}16\), i.e. smaller values are rounded off to 0, most likely due to the used FFTW3 library.

Recursive Formula

The Recursive Formula (RF) approach is requested with method = "Recursive".

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)

dpbinom(NULL, pp, wt, "Recursive")
#>  [1] 3.574462e-35 1.120280e-32 1.685184e-30 1.620524e-28 1.119523e-26
#>  [6] 5.920060e-25 2.493263e-23 8.591850e-22 2.470125e-20 6.011429e-19
#> [11] 1.252345e-17 2.253115e-16 3.525477e-15 4.825171e-14 5.803728e-13
#> [16] 6.158735e-12 5.784692e-11 4.822437e-10 3.576566e-09 2.364563e-08
#> [21] 1.395965e-07 7.370448e-07 3.484836e-06 1.477208e-05 5.619632e-05
#> [26] 1.920240e-04 5.897928e-04 1.629272e-03 4.049768e-03 9.060183e-03
#> [31] 1.824629e-02 3.307754e-02 5.396724e-02 7.921491e-02 1.045505e-01
#> [36] 1.239854e-01 1.319896e-01 1.259938e-01 1.077029e-01 8.232174e-02
#> [41] 5.616422e-02 3.413623e-02 1.844304e-02 8.835890e-03 3.743554e-03
#> [46] 1.398320e-03 4.589049e-04 1.318064e-04 3.298425e-05 7.154649e-06
#> [51] 1.337083e-06 2.137543e-07 2.898296e-08 3.298587e-09 3.110922e-10
#> [56] 2.392070e-11 1.468267e-12 6.991155e-14 2.478218e-15 6.130807e-17
#> [61] 9.411166e-19 6.727527e-21
ppbinom(NULL, pp, wt, "Recursive")
#>  [1] 3.574462e-35 1.123854e-32 1.696423e-30 1.637488e-28 1.135898e-26
#>  [6] 6.033650e-25 2.553600e-23 8.847210e-22 2.558597e-20 6.267289e-19
#> [11] 1.315018e-17 2.384617e-16 3.763939e-15 5.201565e-14 6.323884e-13
#> [16] 6.791123e-12 6.463805e-11 5.468818e-10 4.123448e-09 2.776908e-08
#> [21] 1.673656e-07 9.044104e-07 4.389247e-06 1.916133e-05 7.535765e-05
#> [26] 2.673817e-04 8.571745e-04 2.486446e-03 6.536215e-03 1.559640e-02
#> [31] 3.384269e-02 6.692022e-02 1.208875e-01 2.001024e-01 3.046529e-01
#> [36] 4.286383e-01 5.606280e-01 6.866217e-01 7.943246e-01 8.766463e-01
#> [41] 9.328105e-01 9.669468e-01 9.853898e-01 9.942257e-01 9.979692e-01
#> [46] 9.993676e-01 9.998265e-01 9.999583e-01 9.999913e-01 9.999984e-01
#> [51] 9.999998e-01 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [56] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [61] 1.000000e+00 1.000000e+00

Obviously, the RF procedure does produce probabilities \(\leq 5.55e\text{-}17\), because it does not rely on the FFTW3 library. Furthermore, it yields the same results as the DC method.

set.seed(1)
pp <- runif(1000)
wt <- sample(1:10, 1000, TRUE)

sum(abs(dpbinom(NULL, pp, wt, "Convolve") - dpbinom(NULL, pp, wt, "Recursive")))
#> [1] 0

Processing Speed Comparisons

To assess the performance of the exact procedures, we use the microbenchmark package. Each algorithm has to calculate the PMF repeatedly based on random probability vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2).

library(microbenchmark)
set.seed(1)

f1 <- function() dpbinom(NULL, runif(6000), method = "DivideFFT")
f2 <- function() dpbinom(NULL, runif(6000), method = "Convolve")
f3 <- function() dpbinom(NULL, runif(6000), method = "Recursive")
f4 <- function() dpbinom(NULL, runif(6000), method = "Characteristic")

microbenchmark(f1(), f2(), f3(), f4(), times = 51)
#> Unit: milliseconds
#>  expr      min        lq      mean   median        uq      max neval
#>  f1()  12.9458  13.63805  14.21733  13.9593  14.34765  19.3790    51
#>  f2()  26.0156  26.63650  27.70315  27.5269  28.03865  30.7545    51
#>  f3()  48.5651  49.50555  51.04236  50.6247  51.54655  57.3016    51
#>  f4() 135.8826 138.92250 141.05593 140.6084 142.84260 150.7202    51

Clearly, the DC-FFT procedure is the fastest, followed by DC, RF and DFT-CF methods.

Generalized Poisson Binomial Distribution

Generalized Direct Convolution

The Generalized Direct Convolution (G-DC) approach is requested with method = "Convolve".

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)
va <- sample(0:10, 10, TRUE)
vb <- sample(0:10, 10, TRUE)

dgpbinom(NULL, pp, va, vb, wt, "Convolve")
#>   [1] 1.140600e-31 5.349930e-30 1.164698e-28 1.572037e-27 1.491024e-26
#>   [6] 1.077204e-25 6.336147e-25 3.215011e-24 1.466295e-23 6.127671e-23
#>  [11] 2.363402e-22 8.484857e-22 2.866109e-21 9.171228e-21 2.788507e-20
#>  [16] 8.091940e-20 2.254155e-19 6.051395e-19 1.570129e-18 3.953458e-18
#>  [21] 9.696098e-18 2.321913e-17 5.442392e-17 1.251302e-16 2.824507e-16
#>  [26] 6.264454e-16 1.366745e-15 2.934598e-15 6.203639e-15 1.292697e-14
#>  [31] 2.657759e-14 5.394727e-14 1.081983e-13 2.144873e-13 4.201625e-13
#>  [36] 8.135609e-13 1.557745e-12 2.949821e-12 5.527695e-12 1.025815e-11
#>  [41] 1.885777e-11 3.434641e-11 6.196981e-11 1.106787e-10 1.956340e-10
#>  [46] 3.425394e-10 5.948077e-10 1.025224e-09 1.753751e-09 2.972596e-09
#>  [51] 4.985314e-09 8.275458e-09 1.362195e-08 2.227979e-08 3.622799e-08
#>  [56] 5.845270e-08 9.332219e-08 1.473012e-07 2.302797e-07 3.576650e-07
#>  [61] 5.529336e-07 8.496291e-07 1.292864e-06 1.943382e-06 2.888042e-06
#>  [66] 4.257944e-06 6.248675e-06 9.128095e-06 1.322640e-05 1.893515e-05
#>  [71] 2.675612e-05 3.741507e-05 5.199255e-05 7.194684e-05 9.895330e-05
#>  [76] 1.347017e-04 1.809349e-04 2.399008e-04 3.150314e-04 4.112231e-04
#>  [81] 5.341537e-04 6.888863e-04 8.788234e-04 1.106198e-03 1.374340e-03
#>  [86] 1.690272e-03 2.065290e-03 2.511885e-03 3.037800e-03 3.641214e-03
#>  [91] 4.311837e-03 5.039293e-03 5.824625e-03 6.686091e-03 7.651765e-03
#>  [96] 8.740859e-03 9.945159e-03 1.122411e-02 1.252016e-02 1.378863e-02
#> [101] 1.502576e-02 1.627450e-02 1.759663e-02 1.902489e-02 2.052786e-02
#> [106] 2.201243e-02 2.336424e-02 2.450429e-02 2.543095e-02 2.622065e-02
#> [111] 2.697857e-02 2.776636e-02 2.855637e-02 2.924236e-02 2.969655e-02
#> [116] 2.983772e-02 2.967384e-02 2.929746e-02 2.883252e-02 2.836282e-02
#> [121] 2.788971e-02 2.734351e-02 2.663438e-02 2.570794e-02 2.457639e-02
#> [126] 2.331289e-02 2.201380e-02 2.075053e-02 1.954176e-02 1.836001e-02
#> [131] 1.716200e-02 1.592047e-02 1.464084e-02 1.335803e-02 1.211826e-02
#> [136] 1.095708e-02 9.886542e-03 8.897658e-03 7.972694e-03 7.098018e-03
#> [141] 6.270583e-03 5.496952e-03 4.787457e-03 4.149442e-03 3.583427e-03
#> [146] 3.083701e-03 2.641746e-03 2.249767e-03 1.902455e-03 1.596805e-03
#> [151] 1.330879e-03 1.102475e-03 9.084265e-04 7.447312e-04 6.071616e-04
#> [156] 4.918629e-04 3.956251e-04 3.158260e-04 2.502339e-04 1.968330e-04
#> [161] 1.537458e-04 1.192445e-04 9.179821e-05 7.010494e-05 5.308547e-05
#> [166] 3.984854e-05 2.965115e-05 2.187013e-05 1.598631e-05 1.157497e-05
#> [171] 8.295941e-06 5.881266e-06 4.121776e-06 2.854642e-06 1.953341e-06
#> [176] 1.320224e-06 8.809465e-07 5.799307e-07 3.763587e-07 2.406488e-07
#> [181] 1.515662e-07 9.401686e-08 5.742327e-08 3.451481e-08 2.039831e-08
#> [186] 1.184350e-08 6.751380e-09 3.777327e-09 2.073644e-09 1.116337e-09
#> [191] 5.887148e-10 3.036829e-10 1.529887e-10 7.516829e-11 3.598151e-11
#> [196] 1.676154e-11 7.585978e-12 3.326429e-12 1.407527e-12 5.717370e-13
#> [201] 2.216349e-13 8.149241e-14 2.824954e-14 9.179165e-15 2.780017e-15
#> [206] 7.803525e-16 2.018046e-16 4.775552e-17 1.025798e-17 1.979767e-18
#> [211] 3.386554e-19 5.038594e-20 6.336865e-21 6.424747e-22 4.821385e-23
#> [216] 2.108301e-24
pgpbinom(NULL, pp, va, vb, wt, "Convolve")
#>   [1] 1.140600e-31 5.463990e-30 1.219337e-28 1.693971e-27 1.660421e-26
#>   [6] 1.243246e-25 7.579393e-25 3.972950e-24 1.863590e-23 7.991261e-23
#>  [11] 3.162528e-22 1.164739e-21 4.030847e-21 1.320208e-20 4.108715e-20
#>  [16] 1.220065e-19 3.474220e-19 9.525615e-19 2.522691e-18 6.476149e-18
#>  [21] 1.617225e-17 3.939138e-17 9.381530e-17 2.189455e-16 5.013962e-16
#>  [26] 1.127842e-15 2.494586e-15 5.429184e-15 1.163282e-14 2.455979e-14
#>  [31] 5.113739e-14 1.050847e-13 2.132829e-13 4.277703e-13 8.479327e-13
#>  [36] 1.661494e-12 3.219239e-12 6.169059e-12 1.169675e-11 2.195491e-11
#>  [41] 4.081268e-11 7.515909e-11 1.371289e-10 2.478076e-10 4.434415e-10
#>  [46] 7.859810e-10 1.380789e-09 2.406013e-09 4.159763e-09 7.132360e-09
#>  [51] 1.211767e-08 2.039313e-08 3.401508e-08 5.629487e-08 9.252285e-08
#>  [56] 1.509756e-07 2.442977e-07 3.915989e-07 6.218786e-07 9.795436e-07
#>  [61] 1.532477e-06 2.382106e-06 3.674970e-06 5.618352e-06 8.506394e-06
#>  [66] 1.276434e-05 1.901301e-05 2.814111e-05 4.136751e-05 6.030266e-05
#>  [71] 8.705877e-05 1.244738e-04 1.764664e-04 2.484132e-04 3.473665e-04
#>  [76] 4.820683e-04 6.630032e-04 9.029039e-04 1.217935e-03 1.629158e-03
#>  [81] 2.163312e-03 2.852198e-03 3.731022e-03 4.837220e-03 6.211560e-03
#>  [86] 7.901832e-03 9.967122e-03 1.247901e-02 1.551681e-02 1.915802e-02
#>  [91] 2.346986e-02 2.850915e-02 3.433378e-02 4.101987e-02 4.867163e-02
#>  [96] 5.741249e-02 6.735765e-02 7.858176e-02 9.110192e-02 1.048906e-01
#> [101] 1.199163e-01 1.361908e-01 1.537874e-01 1.728123e-01 1.933402e-01
#> [106] 2.153526e-01 2.387169e-01 2.632211e-01 2.886521e-01 3.148727e-01
#> [111] 3.418513e-01 3.696177e-01 3.981740e-01 4.274164e-01 4.571130e-01
#> [116] 4.869507e-01 5.166245e-01 5.459220e-01 5.747545e-01 6.031173e-01
#> [121] 6.310070e-01 6.583505e-01 6.849849e-01 7.106929e-01 7.352692e-01
#> [126] 7.585821e-01 7.805959e-01 8.013465e-01 8.208882e-01 8.392482e-01
#> [131] 8.564102e-01 8.723307e-01 8.869715e-01 9.003296e-01 9.124478e-01
#> [136] 9.234049e-01 9.332914e-01 9.421891e-01 9.501618e-01 9.572598e-01
#> [141] 9.635304e-01 9.690273e-01 9.738148e-01 9.779642e-01 9.815477e-01
#> [146] 9.846314e-01 9.872731e-01 9.895229e-01 9.914253e-01 9.930221e-01
#> [151] 9.943530e-01 9.954555e-01 9.963639e-01 9.971087e-01 9.977158e-01
#> [156] 9.982077e-01 9.986033e-01 9.989191e-01 9.991694e-01 9.993662e-01
#> [161] 9.995199e-01 9.996392e-01 9.997310e-01 9.998011e-01 9.998542e-01
#> [166] 9.998940e-01 9.999237e-01 9.999455e-01 9.999615e-01 9.999731e-01
#> [171] 9.999814e-01 9.999873e-01 9.999914e-01 9.999943e-01 9.999962e-01
#> [176] 9.999975e-01 9.999984e-01 9.999990e-01 9.999994e-01 9.999996e-01
#> [181] 9.999998e-01 9.999999e-01 9.999999e-01 1.000000e+00 1.000000e+00
#> [186] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [191] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [196] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [201] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [206] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [211] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [216] 1.000000e+00

Generalized Divide & Conquer FFT Tree Convolution

The Generalized Divide & Conquer FFT Tree Convolution (G-DC-FFT) approach is requested with method = "DivideFFT".

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)
va <- sample(0:10, 10, TRUE)
vb <- sample(0:10, 10, TRUE)

dgpbinom(NULL, pp, va, vb, wt, "DivideFFT")
#>   [1] 1.140600e-31 5.349930e-30 1.164698e-28 1.572037e-27 1.491024e-26
#>   [6] 1.077204e-25 6.336147e-25 3.215011e-24 1.466295e-23 6.127671e-23
#>  [11] 2.363402e-22 8.484857e-22 2.866109e-21 9.171228e-21 2.788507e-20
#>  [16] 8.091940e-20 2.254155e-19 6.051395e-19 1.570129e-18 3.953458e-18
#>  [21] 9.696098e-18 2.321913e-17 5.442392e-17 1.251302e-16 2.824507e-16
#>  [26] 6.264454e-16 1.366745e-15 2.934598e-15 6.203639e-15 1.292697e-14
#>  [31] 2.657759e-14 5.394727e-14 1.081983e-13 2.144873e-13 4.201625e-13
#>  [36] 8.135609e-13 1.557745e-12 2.949821e-12 5.527695e-12 1.025815e-11
#>  [41] 1.885777e-11 3.434641e-11 6.196981e-11 1.106787e-10 1.956340e-10
#>  [46] 3.425394e-10 5.948077e-10 1.025224e-09 1.753751e-09 2.972596e-09
#>  [51] 4.985314e-09 8.275458e-09 1.362195e-08 2.227979e-08 3.622799e-08
#>  [56] 5.845270e-08 9.332219e-08 1.473012e-07 2.302797e-07 3.576650e-07
#>  [61] 5.529336e-07 8.496291e-07 1.292864e-06 1.943382e-06 2.888042e-06
#>  [66] 4.257944e-06 6.248675e-06 9.128095e-06 1.322640e-05 1.893515e-05
#>  [71] 2.675612e-05 3.741507e-05 5.199255e-05 7.194684e-05 9.895330e-05
#>  [76] 1.347017e-04 1.809349e-04 2.399008e-04 3.150314e-04 4.112231e-04
#>  [81] 5.341537e-04 6.888863e-04 8.788234e-04 1.106198e-03 1.374340e-03
#>  [86] 1.690272e-03 2.065290e-03 2.511885e-03 3.037800e-03 3.641214e-03
#>  [91] 4.311837e-03 5.039293e-03 5.824625e-03 6.686091e-03 7.651765e-03
#>  [96] 8.740859e-03 9.945159e-03 1.122411e-02 1.252016e-02 1.378863e-02
#> [101] 1.502576e-02 1.627450e-02 1.759663e-02 1.902489e-02 2.052786e-02
#> [106] 2.201243e-02 2.336424e-02 2.450429e-02 2.543095e-02 2.622065e-02
#> [111] 2.697857e-02 2.776636e-02 2.855637e-02 2.924236e-02 2.969655e-02
#> [116] 2.983772e-02 2.967384e-02 2.929746e-02 2.883252e-02 2.836282e-02
#> [121] 2.788971e-02 2.734351e-02 2.663438e-02 2.570794e-02 2.457639e-02
#> [126] 2.331289e-02 2.201380e-02 2.075053e-02 1.954176e-02 1.836001e-02
#> [131] 1.716200e-02 1.592047e-02 1.464084e-02 1.335803e-02 1.211826e-02
#> [136] 1.095708e-02 9.886542e-03 8.897658e-03 7.972694e-03 7.098018e-03
#> [141] 6.270583e-03 5.496952e-03 4.787457e-03 4.149442e-03 3.583427e-03
#> [146] 3.083701e-03 2.641746e-03 2.249767e-03 1.902455e-03 1.596805e-03
#> [151] 1.330879e-03 1.102475e-03 9.084265e-04 7.447312e-04 6.071616e-04
#> [156] 4.918629e-04 3.956251e-04 3.158260e-04 2.502339e-04 1.968330e-04
#> [161] 1.537458e-04 1.192445e-04 9.179821e-05 7.010494e-05 5.308547e-05
#> [166] 3.984854e-05 2.965115e-05 2.187013e-05 1.598631e-05 1.157497e-05
#> [171] 8.295941e-06 5.881266e-06 4.121776e-06 2.854642e-06 1.953341e-06
#> [176] 1.320224e-06 8.809465e-07 5.799307e-07 3.763587e-07 2.406488e-07
#> [181] 1.515662e-07 9.401686e-08 5.742327e-08 3.451481e-08 2.039831e-08
#> [186] 1.184350e-08 6.751380e-09 3.777327e-09 2.073644e-09 1.116337e-09
#> [191] 5.887148e-10 3.036829e-10 1.529887e-10 7.516829e-11 3.598151e-11
#> [196] 1.676154e-11 7.585978e-12 3.326429e-12 1.407527e-12 5.717370e-13
#> [201] 2.216349e-13 8.149241e-14 2.824954e-14 9.179165e-15 2.780017e-15
#> [206] 7.803525e-16 2.018046e-16 4.775552e-17 1.025798e-17 1.979767e-18
#> [211] 3.386554e-19 5.038594e-20 6.336865e-21 6.424747e-22 4.821385e-23
#> [216] 2.108301e-24
pgpbinom(NULL, pp, va, vb, wt, "DivideFFT")
#>   [1] 1.140600e-31 5.463990e-30 1.219337e-28 1.693971e-27 1.660421e-26
#>   [6] 1.243246e-25 7.579393e-25 3.972950e-24 1.863590e-23 7.991261e-23
#>  [11] 3.162528e-22 1.164739e-21 4.030847e-21 1.320208e-20 4.108715e-20
#>  [16] 1.220065e-19 3.474220e-19 9.525615e-19 2.522691e-18 6.476149e-18
#>  [21] 1.617225e-17 3.939138e-17 9.381530e-17 2.189455e-16 5.013962e-16
#>  [26] 1.127842e-15 2.494586e-15 5.429184e-15 1.163282e-14 2.455979e-14
#>  [31] 5.113739e-14 1.050847e-13 2.132829e-13 4.277703e-13 8.479327e-13
#>  [36] 1.661494e-12 3.219239e-12 6.169059e-12 1.169675e-11 2.195491e-11
#>  [41] 4.081268e-11 7.515909e-11 1.371289e-10 2.478076e-10 4.434415e-10
#>  [46] 7.859810e-10 1.380789e-09 2.406013e-09 4.159763e-09 7.132360e-09
#>  [51] 1.211767e-08 2.039313e-08 3.401508e-08 5.629487e-08 9.252285e-08
#>  [56] 1.509756e-07 2.442977e-07 3.915989e-07 6.218786e-07 9.795436e-07
#>  [61] 1.532477e-06 2.382106e-06 3.674970e-06 5.618352e-06 8.506394e-06
#>  [66] 1.276434e-05 1.901301e-05 2.814111e-05 4.136751e-05 6.030266e-05
#>  [71] 8.705877e-05 1.244738e-04 1.764664e-04 2.484132e-04 3.473665e-04
#>  [76] 4.820683e-04 6.630032e-04 9.029039e-04 1.217935e-03 1.629158e-03
#>  [81] 2.163312e-03 2.852198e-03 3.731022e-03 4.837220e-03 6.211560e-03
#>  [86] 7.901832e-03 9.967122e-03 1.247901e-02 1.551681e-02 1.915802e-02
#>  [91] 2.346986e-02 2.850915e-02 3.433378e-02 4.101987e-02 4.867163e-02
#>  [96] 5.741249e-02 6.735765e-02 7.858176e-02 9.110192e-02 1.048906e-01
#> [101] 1.199163e-01 1.361908e-01 1.537874e-01 1.728123e-01 1.933402e-01
#> [106] 2.153526e-01 2.387169e-01 2.632211e-01 2.886521e-01 3.148727e-01
#> [111] 3.418513e-01 3.696177e-01 3.981740e-01 4.274164e-01 4.571130e-01
#> [116] 4.869507e-01 5.166245e-01 5.459220e-01 5.747545e-01 6.031173e-01
#> [121] 6.310070e-01 6.583505e-01 6.849849e-01 7.106929e-01 7.352692e-01
#> [126] 7.585821e-01 7.805959e-01 8.013465e-01 8.208882e-01 8.392482e-01
#> [131] 8.564102e-01 8.723307e-01 8.869715e-01 9.003296e-01 9.124478e-01
#> [136] 9.234049e-01 9.332914e-01 9.421891e-01 9.501618e-01 9.572598e-01
#> [141] 9.635304e-01 9.690273e-01 9.738148e-01 9.779642e-01 9.815477e-01
#> [146] 9.846314e-01 9.872731e-01 9.895229e-01 9.914253e-01 9.930221e-01
#> [151] 9.943530e-01 9.954555e-01 9.963639e-01 9.971087e-01 9.977158e-01
#> [156] 9.982077e-01 9.986033e-01 9.989191e-01 9.991694e-01 9.993662e-01
#> [161] 9.995199e-01 9.996392e-01 9.997310e-01 9.998011e-01 9.998542e-01
#> [166] 9.998940e-01 9.999237e-01 9.999455e-01 9.999615e-01 9.999731e-01
#> [171] 9.999814e-01 9.999873e-01 9.999914e-01 9.999943e-01 9.999962e-01
#> [176] 9.999975e-01 9.999984e-01 9.999990e-01 9.999994e-01 9.999996e-01
#> [181] 9.999998e-01 9.999999e-01 9.999999e-01 1.000000e+00 1.000000e+00
#> [186] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [191] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [196] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [201] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [206] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [211] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [216] 1.000000e+00

By design, similar to the ordinary DC-FFT algorithm by Biscarri, Zhao & Brunner (2018), its results are identical to the G-DC procedure, if \(n\) and the number of possible observed values is small. Thus, differences can be observed for larger numbers:

set.seed(1)
pp1 <- runif(250)
va1 <- sample(0:50, 250, TRUE)
vb1 <- sample(0:50, 250, TRUE)
pp2 <- pp1[1:248]
va2 <- va1[1:248]
vb2 <- vb1[1:248]

sum(abs(dgpbinom(NULL, pp1, va1, vb1, method = "DivideFFT")
        - dgpbinom(NULL, pp1, va1, vb1, method = "Convolve")))
#> [1] 0

sum(abs(dgpbinom(NULL, pp2, va2, vb2, method = "DivideFFT")
        - dgpbinom(NULL, pp2, va2, vb2, method = "Convolve")))
#> [1] 0

The reason is that the G-DC-FFT method splits the input probs, val_p and val_q vectors into parts such that the numbers of possible observations of all parts are as equally sized as possible. Their distributions are then computed separately with the G-DC approach. The results of the portions are then convoluted by means of the Fast Fourier Transformation. For small \(n\) and small distribution sizes, no splitting is needed. In addition, the G-DC-FFT procedure, just like the DC-FFT method, does not produce probabilities \(\leq 5.55e\text{-}17\), i.e. smaller values are rounded off to \(0\), if the total number of possible observations is smaller than \(750\), whereas the smallest possible result of the DC algorithm is \(\sim 1e\text{-}323\). This is most likely caused by the used FFTW3 library.

d1 <- dgpbinom(NULL, pp1, va1, vb1, method = "DivideFFT")
d2 <- dgpbinom(NULL, pp1, va1, vb1, method = "Convolve")

min(d1[d1 > 0])
#> [1] 2.839368e-99
min(d2[d2 > 0])
#> [1] 2.839368e-99

Generalized Discrete Fourier Transformation of the Characteristic Function

The Generalized Discrete Fourier Transformation of the Characteristic Function (G-DFT-CF) approach is requested with method = "Characteristic".

set.seed(1)
pp <- runif(10)
wt <- sample(1:10, 10, TRUE)
va <- sample(0:10, 10, TRUE)
vb <- sample(0:10, 10, TRUE)

dgpbinom(NULL, pp, va, vb, wt, "Characteristic")
#>   [1] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#>   [6] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#>  [11] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#>  [16] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#>  [21] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 2.837237e-16
#>  [26] 6.270062e-16 1.364746e-15 2.935666e-15 6.201829e-15 1.292176e-14
#>  [31] 2.657237e-14 5.394193e-14 1.081902e-13 2.144802e-13 4.201557e-13
#>  [36] 8.135509e-13 1.557735e-12 2.949809e-12 5.527683e-12 1.025814e-11
#>  [41] 1.885776e-11 3.434640e-11 6.196980e-11 1.106787e-10 1.956340e-10
#>  [46] 3.425394e-10 5.948077e-10 1.025224e-09 1.753750e-09 2.972596e-09
#>  [51] 4.985314e-09 8.275458e-09 1.362195e-08 2.227979e-08 3.622799e-08
#>  [56] 5.845270e-08 9.332219e-08 1.473012e-07 2.302797e-07 3.576650e-07
#>  [61] 5.529336e-07 8.496291e-07 1.292864e-06 1.943382e-06 2.888042e-06
#>  [66] 4.257944e-06 6.248675e-06 9.128095e-06 1.322640e-05 1.893515e-05
#>  [71] 2.675612e-05 3.741507e-05 5.199255e-05 7.194684e-05 9.895330e-05
#>  [76] 1.347017e-04 1.809349e-04 2.399008e-04 3.150314e-04 4.112231e-04
#>  [81] 5.341537e-04 6.888863e-04 8.788234e-04 1.106198e-03 1.374340e-03
#>  [86] 1.690272e-03 2.065290e-03 2.511885e-03 3.037800e-03 3.641214e-03
#>  [91] 4.311837e-03 5.039293e-03 5.824625e-03 6.686091e-03 7.651765e-03
#>  [96] 8.740859e-03 9.945159e-03 1.122411e-02 1.252016e-02 1.378863e-02
#> [101] 1.502576e-02 1.627450e-02 1.759663e-02 1.902489e-02 2.052786e-02
#> [106] 2.201243e-02 2.336424e-02 2.450429e-02 2.543095e-02 2.622065e-02
#> [111] 2.697857e-02 2.776636e-02 2.855637e-02 2.924236e-02 2.969655e-02
#> [116] 2.983772e-02 2.967384e-02 2.929746e-02 2.883252e-02 2.836282e-02
#> [121] 2.788971e-02 2.734351e-02 2.663438e-02 2.570794e-02 2.457639e-02
#> [126] 2.331289e-02 2.201380e-02 2.075053e-02 1.954176e-02 1.836001e-02
#> [131] 1.716200e-02 1.592047e-02 1.464084e-02 1.335803e-02 1.211826e-02
#> [136] 1.095708e-02 9.886542e-03 8.897658e-03 7.972694e-03 7.098018e-03
#> [141] 6.270583e-03 5.496952e-03 4.787457e-03 4.149442e-03 3.583427e-03
#> [146] 3.083701e-03 2.641746e-03 2.249767e-03 1.902455e-03 1.596805e-03
#> [151] 1.330879e-03 1.102475e-03 9.084265e-04 7.447312e-04 6.071616e-04
#> [156] 4.918629e-04 3.956251e-04 3.158260e-04 2.502339e-04 1.968330e-04
#> [161] 1.537458e-04 1.192445e-04 9.179821e-05 7.010494e-05 5.308547e-05
#> [166] 3.984854e-05 2.965115e-05 2.187013e-05 1.598631e-05 1.157497e-05
#> [171] 8.295941e-06 5.881266e-06 4.121776e-06 2.854642e-06 1.953341e-06
#> [176] 1.320224e-06 8.809465e-07 5.799307e-07 3.763587e-07 2.406488e-07
#> [181] 1.515662e-07 9.401686e-08 5.742327e-08 3.451481e-08 2.039831e-08
#> [186] 1.184350e-08 6.751380e-09 3.777327e-09 2.073644e-09 1.116337e-09
#> [191] 5.887148e-10 3.036829e-10 1.529887e-10 7.516829e-11 3.598151e-11
#> [196] 1.676154e-11 7.585978e-12 3.326430e-12 1.407529e-12 5.717381e-13
#> [201] 2.216360e-13 8.149551e-14 2.825209e-14 9.182470e-15 2.781725e-15
#> [206] 7.813323e-16 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#> [211] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#> [216] 0.000000e+00
pgpbinom(NULL, pp, va, vb, wt, "Characteristic")
#>   [1] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#>   [6] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#>  [11] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#>  [16] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
#>  [21] 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 2.837237e-16
#>  [26] 9.107298e-16 2.275475e-15 5.211141e-15 1.141297e-14 2.433473e-14
#>  [31] 5.090710e-14 1.048490e-13 2.130392e-13 4.275194e-13 8.476751e-13
#>  [36] 1.661226e-12 3.218961e-12 6.168770e-12 1.169645e-11 2.195459e-11
#>  [41] 4.081235e-11 7.515875e-11 1.371285e-10 2.478072e-10 4.434412e-10
#>  [46] 7.859806e-10 1.380788e-09 2.406013e-09 4.159763e-09 7.132359e-09
#>  [51] 1.211767e-08 2.039313e-08 3.401508e-08 5.629487e-08 9.252285e-08
#>  [56] 1.509756e-07 2.442977e-07 3.915989e-07 6.218786e-07 9.795436e-07
#>  [61] 1.532477e-06 2.382106e-06 3.674970e-06 5.618352e-06 8.506394e-06
#>  [66] 1.276434e-05 1.901301e-05 2.814111e-05 4.136751e-05 6.030266e-05
#>  [71] 8.705877e-05 1.244738e-04 1.764664e-04 2.484132e-04 3.473665e-04
#>  [76] 4.820683e-04 6.630032e-04 9.029039e-04 1.217935e-03 1.629158e-03
#>  [81] 2.163312e-03 2.852198e-03 3.731022e-03 4.837220e-03 6.211560e-03
#>  [86] 7.901832e-03 9.967122e-03 1.247901e-02 1.551681e-02 1.915802e-02
#>  [91] 2.346986e-02 2.850915e-02 3.433378e-02 4.101987e-02 4.867163e-02
#>  [96] 5.741249e-02 6.735765e-02 7.858176e-02 9.110192e-02 1.048906e-01
#> [101] 1.199163e-01 1.361908e-01 1.537874e-01 1.728123e-01 1.933402e-01
#> [106] 2.153526e-01 2.387169e-01 2.632211e-01 2.886521e-01 3.148727e-01
#> [111] 3.418513e-01 3.696177e-01 3.981740e-01 4.274164e-01 4.571130e-01
#> [116] 4.869507e-01 5.166245e-01 5.459220e-01 5.747545e-01 6.031173e-01
#> [121] 6.310070e-01 6.583505e-01 6.849849e-01 7.106929e-01 7.352692e-01
#> [126] 7.585821e-01 7.805959e-01 8.013465e-01 8.208882e-01 8.392482e-01
#> [131] 8.564102e-01 8.723307e-01 8.869715e-01 9.003296e-01 9.124478e-01
#> [136] 9.234049e-01 9.332914e-01 9.421891e-01 9.501618e-01 9.572598e-01
#> [141] 9.635304e-01 9.690273e-01 9.738148e-01 9.779642e-01 9.815477e-01
#> [146] 9.846314e-01 9.872731e-01 9.895229e-01 9.914253e-01 9.930221e-01
#> [151] 9.943530e-01 9.954555e-01 9.963639e-01 9.971087e-01 9.977158e-01
#> [156] 9.982077e-01 9.986033e-01 9.989191e-01 9.991694e-01 9.993662e-01
#> [161] 9.995199e-01 9.996392e-01 9.997310e-01 9.998011e-01 9.998542e-01
#> [166] 9.998940e-01 9.999237e-01 9.999455e-01 9.999615e-01 9.999731e-01
#> [171] 9.999814e-01 9.999873e-01 9.999914e-01 9.999943e-01 9.999962e-01
#> [176] 9.999975e-01 9.999984e-01 9.999990e-01 9.999994e-01 9.999996e-01
#> [181] 9.999998e-01 9.999999e-01 9.999999e-01 1.000000e+00 1.000000e+00
#> [186] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [191] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [196] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [201] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [206] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [211] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
#> [216] 1.000000e+00

As can be seen, the G-DFT-CF procedure does not produce probabilities \(\leq 2.2e\text{-}16\), i.e. smaller values are rounded off to 0, most likely due to the used FFTW3 library.

Processing Speed Comparisons

To assess the performance of the exact procedures, we use the microbenchmark package. Each algorithm has to calculate the PMF repeatedly based on random probability and value vectors. The run times are then summarized in a table that presents, among other statistics, their minima, maxima and means. The following results were recorded on an AMD Ryzen 7 1800X with 32 GiB of RAM and Windows 10 Education (20H2).

library(microbenchmark)
n <- 2500
set.seed(1)
va <- sample(1:50, n, TRUE)
vb <- sample(1:50, n, TRUE)

f1 <- function() dgpbinom(NULL, runif(n), va, vb, method = "DivideFFT")
f2 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Convolve")
f3 <- function() dgpbinom(NULL, runif(n), va, vb, method = "Characteristic")

microbenchmark(f1(), f2(), f3(), times = 51)
#> Unit: milliseconds
#>  expr      min       lq      mean   median        uq      max neval
#>  f1()  75.7508  78.2083  83.64722  79.8600  84.76995 200.4473    51
#>  f2() 108.6950 111.3314 114.78413 113.5170 117.47335 126.8446    51
#>  f3() 580.2455 646.8719 665.34329 663.1248 680.58515 758.0827    51

Clearly, the G-DC-FFT procedure is the fastest one. It outperforms both the G-DC and G-DFT-CF approaches. The latter one needs a lot more time than the others. Generally, the computational speed advantage of the G-DC-FFT procedure increases with larger \(n\) (and \(m\)).

PoissonBinomial/inst/include/0000755000176200001440000000000014077606026015767 5ustar liggesusersPoissonBinomial/inst/include/PoissonBinomial_RcppExports.h0000644000176200001440000007765314077547031023640 0ustar liggesusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #ifndef RCPP_PoissonBinomial_RCPPEXPORTS_H_GEN_ #define RCPP_PoissonBinomial_RCPPEXPORTS_H_GEN_ #include namespace PoissonBinomial { using namespace Rcpp; namespace { void validateSignature(const char* sig) { Rcpp::Function require = Rcpp::Environment::base_env()["require"]; require("PoissonBinomial", Rcpp::Named("quietly") = true); typedef int(*Ptr_validate)(const char*); static Ptr_validate p_validate = (Ptr_validate) R_GetCCallable("PoissonBinomial", "_PoissonBinomial_RcppExport_validate"); if (!p_validate(sig)) { throw Rcpp::function_not_exported( "C++ function with signature '" + std::string(sig) + "' not found in PoissonBinomial"); } } } inline int vectorGCD(const IntegerVector x) { typedef SEXP(*Ptr_vectorGCD)(SEXP); static Ptr_vectorGCD p_vectorGCD = NULL; if (p_vectorGCD == NULL) { validateSignature("int(*vectorGCD)(const IntegerVector)"); p_vectorGCD = (Ptr_vectorGCD)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_vectorGCD"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_vectorGCD(Shield(Rcpp::wrap(x))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dpb_conv(const IntegerVector obs, const NumericVector probs) { typedef SEXP(*Ptr_dpb_conv)(SEXP,SEXP); static Ptr_dpb_conv p_dpb_conv = NULL; if (p_dpb_conv == NULL) { validateSignature("NumericVector(*dpb_conv)(const IntegerVector,const NumericVector)"); p_dpb_conv = (Ptr_dpb_conv)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dpb_conv"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dpb_conv(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector ppb_conv(const IntegerVector obs, const NumericVector probs, const bool lower_tail = true) { typedef SEXP(*Ptr_ppb_conv)(SEXP,SEXP,SEXP); static Ptr_ppb_conv p_ppb_conv = NULL; if (p_ppb_conv == NULL) { validateSignature("NumericVector(*ppb_conv)(const IntegerVector,const NumericVector,const bool)"); p_ppb_conv = (Ptr_ppb_conv)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_ppb_conv"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_ppb_conv(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dpb_dc(const IntegerVector obs, const NumericVector probs) { typedef SEXP(*Ptr_dpb_dc)(SEXP,SEXP); static Ptr_dpb_dc p_dpb_dc = NULL; if (p_dpb_dc == NULL) { validateSignature("NumericVector(*dpb_dc)(const IntegerVector,const NumericVector)"); p_dpb_dc = (Ptr_dpb_dc)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dpb_dc"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dpb_dc(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector ppb_dc(const IntegerVector obs, const NumericVector probs, const bool lower_tail = true) { typedef SEXP(*Ptr_ppb_dc)(SEXP,SEXP,SEXP); static Ptr_ppb_dc p_ppb_dc = NULL; if (p_ppb_dc == NULL) { validateSignature("NumericVector(*ppb_dc)(const IntegerVector,const NumericVector,const bool)"); p_ppb_dc = (Ptr_ppb_dc)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_ppb_dc"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_ppb_dc(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dpb_dftcf(const IntegerVector obs, const NumericVector probs) { typedef SEXP(*Ptr_dpb_dftcf)(SEXP,SEXP); static Ptr_dpb_dftcf p_dpb_dftcf = NULL; if (p_dpb_dftcf == NULL) { validateSignature("NumericVector(*dpb_dftcf)(const IntegerVector,const NumericVector)"); p_dpb_dftcf = (Ptr_dpb_dftcf)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dpb_dftcf"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dpb_dftcf(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector ppb_dftcf(const IntegerVector obs, const NumericVector probs, const bool lower_tail = true) { typedef SEXP(*Ptr_ppb_dftcf)(SEXP,SEXP,SEXP); static Ptr_ppb_dftcf p_ppb_dftcf = NULL; if (p_ppb_dftcf == NULL) { validateSignature("NumericVector(*ppb_dftcf)(const IntegerVector,const NumericVector,const bool)"); p_ppb_dftcf = (Ptr_ppb_dftcf)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_ppb_dftcf"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_ppb_dftcf(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dpb_rf(const IntegerVector obs, const NumericVector probs) { typedef SEXP(*Ptr_dpb_rf)(SEXP,SEXP); static Ptr_dpb_rf p_dpb_rf = NULL; if (p_dpb_rf == NULL) { validateSignature("NumericVector(*dpb_rf)(const IntegerVector,const NumericVector)"); p_dpb_rf = (Ptr_dpb_rf)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dpb_rf"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dpb_rf(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector ppb_rf(const IntegerVector obs, const NumericVector probs, const bool lower_tail = true) { typedef SEXP(*Ptr_ppb_rf)(SEXP,SEXP,SEXP); static Ptr_ppb_rf p_ppb_rf = NULL; if (p_ppb_rf == NULL) { validateSignature("NumericVector(*ppb_rf)(const IntegerVector,const NumericVector,const bool)"); p_ppb_rf = (Ptr_ppb_rf)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_ppb_rf"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_ppb_rf(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dpb_mean(IntegerVector obs, const NumericVector probs) { typedef SEXP(*Ptr_dpb_mean)(SEXP,SEXP); static Ptr_dpb_mean p_dpb_mean = NULL; if (p_dpb_mean == NULL) { validateSignature("NumericVector(*dpb_mean)(IntegerVector,const NumericVector)"); p_dpb_mean = (Ptr_dpb_mean)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dpb_mean"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dpb_mean(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector ppb_mean(const IntegerVector obs, const NumericVector probs, const bool lower_tail = true) { typedef SEXP(*Ptr_ppb_mean)(SEXP,SEXP,SEXP); static Ptr_ppb_mean p_ppb_mean = NULL; if (p_ppb_mean == NULL) { validateSignature("NumericVector(*ppb_mean)(const IntegerVector,const NumericVector,const bool)"); p_ppb_mean = (Ptr_ppb_mean)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_ppb_mean"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_ppb_mean(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dpb_gmba(const IntegerVector obs, const NumericVector probs, const bool anti = false) { typedef SEXP(*Ptr_dpb_gmba)(SEXP,SEXP,SEXP); static Ptr_dpb_gmba p_dpb_gmba = NULL; if (p_dpb_gmba == NULL) { validateSignature("NumericVector(*dpb_gmba)(const IntegerVector,const NumericVector,const bool)"); p_dpb_gmba = (Ptr_dpb_gmba)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dpb_gmba"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dpb_gmba(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(anti))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector ppb_gmba(const IntegerVector obs, const NumericVector probs, const bool anti = false, const bool lower_tail = true) { typedef SEXP(*Ptr_ppb_gmba)(SEXP,SEXP,SEXP,SEXP); static Ptr_ppb_gmba p_ppb_gmba = NULL; if (p_ppb_gmba == NULL) { validateSignature("NumericVector(*ppb_gmba)(const IntegerVector,const NumericVector,const bool,const bool)"); p_ppb_gmba = (Ptr_ppb_gmba)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_ppb_gmba"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_ppb_gmba(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(anti)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dpb_pa(const IntegerVector obs, const NumericVector probs) { typedef SEXP(*Ptr_dpb_pa)(SEXP,SEXP); static Ptr_dpb_pa p_dpb_pa = NULL; if (p_dpb_pa == NULL) { validateSignature("NumericVector(*dpb_pa)(const IntegerVector,const NumericVector)"); p_dpb_pa = (Ptr_dpb_pa)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dpb_pa"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dpb_pa(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector ppb_pa(const IntegerVector obs, const NumericVector probs, bool lower_tail = true) { typedef SEXP(*Ptr_ppb_pa)(SEXP,SEXP,SEXP); static Ptr_ppb_pa p_ppb_pa = NULL; if (p_ppb_pa == NULL) { validateSignature("NumericVector(*ppb_pa)(const IntegerVector,const NumericVector,bool)"); p_ppb_pa = (Ptr_ppb_pa)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_ppb_pa"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_ppb_pa(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector ppb_na(const IntegerVector obs, const NumericVector probs, const bool refined = true, const bool lower_tail = true) { typedef SEXP(*Ptr_ppb_na)(SEXP,SEXP,SEXP,SEXP); static Ptr_ppb_na p_ppb_na = NULL; if (p_ppb_na == NULL) { validateSignature("NumericVector(*ppb_na)(const IntegerVector,const NumericVector,const bool,const bool)"); p_ppb_na = (Ptr_ppb_na)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_ppb_na"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_ppb_na(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(refined)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dpb_na(const IntegerVector obs, const NumericVector probs, const bool refined = true) { typedef SEXP(*Ptr_dpb_na)(SEXP,SEXP,SEXP); static Ptr_dpb_na p_dpb_na = NULL; if (p_dpb_na == NULL) { validateSignature("NumericVector(*dpb_na)(const IntegerVector,const NumericVector,const bool)"); p_dpb_na = (Ptr_dpb_na)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dpb_na"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dpb_na(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(refined))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline IntegerVector rpb_bernoulli(const int n, const NumericVector probs) { typedef SEXP(*Ptr_rpb_bernoulli)(SEXP,SEXP); static Ptr_rpb_bernoulli p_rpb_bernoulli = NULL; if (p_rpb_bernoulli == NULL) { validateSignature("IntegerVector(*rpb_bernoulli)(const int,const NumericVector)"); p_rpb_bernoulli = (Ptr_rpb_bernoulli)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_rpb_bernoulli"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_rpb_bernoulli(Shield(Rcpp::wrap(n)), Shield(Rcpp::wrap(probs))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dgpb_conv(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q) { typedef SEXP(*Ptr_dgpb_conv)(SEXP,SEXP,SEXP,SEXP); static Ptr_dgpb_conv p_dgpb_conv = NULL; if (p_dgpb_conv == NULL) { validateSignature("NumericVector(*dgpb_conv)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector)"); p_dgpb_conv = (Ptr_dgpb_conv)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dgpb_conv"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dgpb_conv(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(val_p)), Shield(Rcpp::wrap(val_q))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector pgpb_conv(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, bool lower_tail = true) { typedef SEXP(*Ptr_pgpb_conv)(SEXP,SEXP,SEXP,SEXP,SEXP); static Ptr_pgpb_conv p_pgpb_conv = NULL; if (p_pgpb_conv == NULL) { validateSignature("NumericVector(*pgpb_conv)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector,bool)"); p_pgpb_conv = (Ptr_pgpb_conv)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_pgpb_conv"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_pgpb_conv(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(val_p)), Shield(Rcpp::wrap(val_q)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dgpb_dc(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q) { typedef SEXP(*Ptr_dgpb_dc)(SEXP,SEXP,SEXP,SEXP); static Ptr_dgpb_dc p_dgpb_dc = NULL; if (p_dgpb_dc == NULL) { validateSignature("NumericVector(*dgpb_dc)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector)"); p_dgpb_dc = (Ptr_dgpb_dc)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dgpb_dc"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dgpb_dc(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(val_p)), Shield(Rcpp::wrap(val_q))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector pgpb_dc(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool lower_tail = true) { typedef SEXP(*Ptr_pgpb_dc)(SEXP,SEXP,SEXP,SEXP,SEXP); static Ptr_pgpb_dc p_pgpb_dc = NULL; if (p_pgpb_dc == NULL) { validateSignature("NumericVector(*pgpb_dc)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector,const bool)"); p_pgpb_dc = (Ptr_pgpb_dc)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_pgpb_dc"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_pgpb_dc(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(val_p)), Shield(Rcpp::wrap(val_q)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dgpb_dftcf(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q) { typedef SEXP(*Ptr_dgpb_dftcf)(SEXP,SEXP,SEXP,SEXP); static Ptr_dgpb_dftcf p_dgpb_dftcf = NULL; if (p_dgpb_dftcf == NULL) { validateSignature("NumericVector(*dgpb_dftcf)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector)"); p_dgpb_dftcf = (Ptr_dgpb_dftcf)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dgpb_dftcf"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dgpb_dftcf(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(val_p)), Shield(Rcpp::wrap(val_q))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector pgpb_dftcf(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool lower_tail = true) { typedef SEXP(*Ptr_pgpb_dftcf)(SEXP,SEXP,SEXP,SEXP,SEXP); static Ptr_pgpb_dftcf p_pgpb_dftcf = NULL; if (p_pgpb_dftcf == NULL) { validateSignature("NumericVector(*pgpb_dftcf)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector,const bool)"); p_pgpb_dftcf = (Ptr_pgpb_dftcf)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_pgpb_dftcf"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_pgpb_dftcf(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(val_p)), Shield(Rcpp::wrap(val_q)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector pgpb_na(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool refined = true, const bool lower_tail = true) { typedef SEXP(*Ptr_pgpb_na)(SEXP,SEXP,SEXP,SEXP,SEXP,SEXP); static Ptr_pgpb_na p_pgpb_na = NULL; if (p_pgpb_na == NULL) { validateSignature("NumericVector(*pgpb_na)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector,const bool,const bool)"); p_pgpb_na = (Ptr_pgpb_na)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_pgpb_na"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_pgpb_na(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(val_p)), Shield(Rcpp::wrap(val_q)), Shield(Rcpp::wrap(refined)), Shield(Rcpp::wrap(lower_tail))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline NumericVector dgpb_na(const IntegerVector obs, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q, const bool refined = true) { typedef SEXP(*Ptr_dgpb_na)(SEXP,SEXP,SEXP,SEXP,SEXP); static Ptr_dgpb_na p_dgpb_na = NULL; if (p_dgpb_na == NULL) { validateSignature("NumericVector(*dgpb_na)(const IntegerVector,const NumericVector,const IntegerVector,const IntegerVector,const bool)"); p_dgpb_na = (Ptr_dgpb_na)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_dgpb_na"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_dgpb_na(Shield(Rcpp::wrap(obs)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(val_p)), Shield(Rcpp::wrap(val_q)), Shield(Rcpp::wrap(refined))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } inline IntegerVector rgpb_bernoulli(const int n, const NumericVector probs, const IntegerVector val_p, const IntegerVector val_q) { typedef SEXP(*Ptr_rgpb_bernoulli)(SEXP,SEXP,SEXP,SEXP); static Ptr_rgpb_bernoulli p_rgpb_bernoulli = NULL; if (p_rgpb_bernoulli == NULL) { validateSignature("IntegerVector(*rgpb_bernoulli)(const int,const NumericVector,const IntegerVector,const IntegerVector)"); p_rgpb_bernoulli = (Ptr_rgpb_bernoulli)R_GetCCallable("PoissonBinomial", "_PoissonBinomial_rgpb_bernoulli"); } RObject rcpp_result_gen; { RNGScope RCPP_rngScope_gen; rcpp_result_gen = p_rgpb_bernoulli(Shield(Rcpp::wrap(n)), Shield(Rcpp::wrap(probs)), Shield(Rcpp::wrap(val_p)), Shield(Rcpp::wrap(val_q))); } if (rcpp_result_gen.inherits("interrupted-error")) throw Rcpp::internal::InterruptedException(); if (Rcpp::internal::isLongjumpSentinel(rcpp_result_gen)) throw Rcpp::LongjumpException(rcpp_result_gen); if (rcpp_result_gen.inherits("try-error")) throw Rcpp::exception(Rcpp::as(rcpp_result_gen).c_str()); return Rcpp::as(rcpp_result_gen); } } #endif // RCPP_PoissonBinomial_RCPPEXPORTS_H_GEN_ PoissonBinomial/inst/include/PoissonBinomial.h0000644000176200001440000000043213552351410021233 0ustar liggesusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #ifndef RCPP_PoissonBinomial_H_GEN_ #define RCPP_PoissonBinomial_H_GEN_ #include "PoissonBinomial_RcppExports.h" #endif // RCPP_PoissonBinomial_H_GEN_