dirmult/0000755000176200001440000000000014216051652011734 5ustar liggesusersdirmult/NAMESPACE0000644000176200001440000000035614211404320013144 0ustar liggesusersexport(adapGridProf) export(dirmult) export(dirmult.summary) export(equalTheta) export(estProfLogLik) export(gridProf) export(nullTest) export(weirMoM) export(simPop) export(rdirichlet) importFrom("stats", "rgamma", "rmultinom", "rnorm") dirmult/data/0000755000176200001440000000000012071034740012641 5ustar liggesusersdirmult/data/us.RData0000644000176200001440000000302412071034740014204 0ustar liggesusers XYoE.뵓8qxkpl@@6$@ "< xx/S0mWԌg]KXmQ1s7DTJq3(?*^./C}6ρ2+G=m_}˸x sY:MFqqB3dȍtks7ݾs}fXaH||zx6!c I]q*2LGQrўQK!ÌS7qq\٫gW-M3s2碰`Q_cԥ|â=-9-ӥ6GVZΟ%ޢ8 ?˃%j Q\]{U߮Y:ϒꅊLUn>9\ ^M aDY.sS*ca=3B_k""c(" >@9lՒu's\2ѮI;1ӔG^cx5*l;/W|R]#]4[5nޖ] +Yo8Y2.:ӛ75$$$MR6y11ssĩ;^7Ĕ3xM5`ۈ5コ2h3~`|/O3 /_5fN-ȞS"8O~BcHȌ[)皺T\,JdjH 3%?6G~|՚Ofx $/bNuё9? pȭ̂r7:bՔ#aScb9t:n0)AbBD"#4niQɆȆ洯|kJi5Cy{,< 2Y9$ I_P5\诘~}̐7 ӴȈ׍e^ed:wUƾ-ΈuqOC/.tA@M/3"/YqҞj{gcrGj2v֯E}km#)$oEؙL+D|ݶϕd݌oܫW b?S_y1Cߙp2pm=,5=(I`O}f)o *cơoZzcuzG!eQY#C#E'#O0u/y?M×?!y^aSֳ?ӍShtVw>&eN|tt>J):*oBmD=Ţ4* DzME}̚N"94ħœ?.(p:U?qbg;՛s9*˒8%mv\h{7+ VǹJ0N\'cZFi9]vi' q촚K*ҿF;dirmult/man/0000755000176200001440000000000012135701174012506 5ustar liggesusersdirmult/man/estProfLogLik.Rd0000644000176200001440000000313412133053070015513 0ustar liggesusers\name{estProfLogLik} \alias{estProfLogLik} \concept{Genetics} \concept{Overdispersion} \concept{Dirichlet-multinomial} \title{Profile log-likelihood of Dirichlet-multinomial model} \description{ Computes the profile log-likelihood of \eqn{\ell(\pi,\theta;x)}{l(\pi,theta;x)} for a given value of \eqn{\theta}{theta}, i.e. \eqn{\hat{\ell}(\theta)=\max_{\pi}\ell(\pi,\theta;x)}{l(theta)=max_{\pi} l(\pi,theta;x)}. } \usage{ estProfLogLik(data, theta, epsilon=10^(-4), trace=TRUE, initPi, maxit=1000) } \arguments{ \item{data}{A matrix or table with counts. Rows represent subpopulations and columns the different categories of the data. Zero rows or columns are automaticly removed.} \item{theta}{The theta-value of which the profile log-likelihood is to be computed.} \item{epsilon}{Tolerance used in the iterations. Succeeding log-likelihood values need to be within epsilon for convergence.} \item{trace}{Logical. Whether parameter estimates and log-likelihood values should be printed to the screen while iterating.} \item{initPi}{Initial pi vector.} \item{maxit}{Maximum number of iterations. Default is 1000 and will often not be envoked, but if theta is to extreme compared to the MLE of theta the log-likelihood may misbehave near theta.} } \value{ Gives a list of components (similar to output from \code{\link{dirmult}} where \code{loglik} and \code{lambda} (the Lagrange multiplier) are the most interesting. } \seealso{ \code{\link{dirmult}} } \examples{ data(us) fit <- dirmult(us[[1]],epsilon=10^(-12),trace=FALSE) estProfLogLik(us[[1]],fit$theta*1.2,epsilon=10^(-12),trace=FALSE) } dirmult/man/adapGridProf.Rd0000644000176200001440000000244312133053051015332 0ustar liggesusers\name{adapGridProf} \alias{adapGridProf} \concept{Genetics} \concept{Overdispersion} \concept{Dirichlet-multinomial} \title{Profile log-likelihood of Dirichlet-multinomial model} \description{ Computes the profile log-likelihood of \eqn{\ell(\pi,\theta;x)}{l(\pi,theta;x)} for an interval determined by a given difference in log-likelihood value from the maximum log-likelihood value. } \usage{adapGridProf(data, delta, stepsize=50)} \arguments{ \item{data}{A matrix or table with counts. Rows represent subpopulations and columns the different categories of the data. Zero rows or columns are automaticly removed.} \item{delta}{The difference between max of log-likelihood and the profile log-likelihood. May be used to construct approximate confidence intervals, e.g. with delta = qchisq(0.95,df=1)*2.} \item{stepsize}{The stepsize used when stepping left/right of the MLE. The stepsize used by the algorithm is given by the MLE of theta divided by \code{stepsize}. Default value is 50.} } \value{ Gives a data frame with theta values and associated profile log-likelihood values. } \seealso{ \code{\link{estProfLogLik}} } \examples{ data(us) fit <- dirmult(us[[1]],epsilon=10^(-12),trace=FALSE) adapGridProf(us[[1]],delta=0.5) \dontrun{adapGridProf(us[[1]],delta=qchisq(0.95,df=1)*2)} } dirmult/man/simPop.Rd0000644000176200001440000000204312133053101014227 0ustar liggesusers\name{simPop} \alias{simPop} \concept{Genetics} \concept{Overdispersion} \concept{Dirichlet-multinomial} \title{Simulate data from Dirichlet-multinomial distribution} \description{ Simulates data using user defined \eqn{\theta}{theta} value and allele probabilities in the reference population, \eqn{\pi}{pi}. } \usage{simPop(J=10, K=20, n, pi, theta)} \arguments{ \item{J}{The number of subpopulations sampled.} \item{K}{Number of different alleles. If argument \code{pi} is given, the length of \code{pi} is used as \code{K}.} \item{n}{The number of alleles sampled in each subpopulation. If scalar repeated for all subpopulations, otherwise a vector of length \code{J} is needed with subpopulation specific total sampled alleles.} \item{pi}{Vector of allele probabilities. If missing a random vector of length \code{K} is generated.} \item{theta}{The theta-value used for simulations.} } \value{ Return an J x K matrix with allelic counts. } \seealso{ \code{\link{dirmult}} } \examples{ simPop(n=100, theta=0.03) } dirmult/man/equalTheta.Rd0000644000176200001440000000247612133053123015073 0ustar liggesusers\name{equalTheta} \alias{equalTheta} \concept{Genetics} \concept{Overdispersion} \concept{Dirichlet-multinomial} \title{Test whether theta is equal for several tables} \description{ Estimates parameters \eqn{\pi}{\pi} for each table under the contraint that \eqn{\theta}{theta} is equal for all tables. } \usage{equalTheta(data, theta, epsilon=10^(-4), trace=TRUE, initPi, maxit=1000)} \arguments{ \item{data}{A list of matrix or table with counts. Rows in the tables represent subpopulations and columns the different categories of the data. Zero columns are automaticly removed.} \item{theta}{Initial value of the commen theta paramter.} \item{epsilon}{Tolerance of the convergence, see \code{\link{dirmult}}.} \item{trace}{Logical. TRUE: print estimates while iterating.} \item{initPi}{Initial values for each pi vector (one of each table).} \item{maxit}{Maximum number of iterations.} } \value{ Returns a list similar to the output of \code{\link{dirmult}}. } \seealso{ \code{\link{dirmult}} } \examples{ \dontrun{data(us) fit <- lapply(us[1:2],dirmult,epsilon=10^(-12),trace=FALSE) thetas <- unlist(lapply(fit,function(x) x$theta)) logliks <- unlist(lapply(fit,function(x) x$loglik)) fit1 <- equalTheta(us[c(1:2)],theta=mean(thetas),epsilon=10^(-12)) lr <- -2*(fit1$loglik-sum(logliks)) 1-pchisq(lr,df=1) fit1$theta[[1]] }} dirmult/man/nullTest.Rd0000644000176200001440000000172312133053225014605 0ustar liggesusers\name{nullTest} \alias{nullTest} \concept{Genetics} \concept{Overdispersion} \concept{Dirichlet-multinomial} \title{Simulation based test for null-hypothesis, H0:theta=0} \description{ Simulates data sets under the null-hypothesis, \eqn{H_0:\theta=0}{H_0: theta=0}. This corresponds to an ordinary multinomial model without any overdispersion. Based on the returned data frame simulated \eqn{p}{p}-values may be computed. } \usage{nullTest(data, m=1000, prec=6)} \arguments{ \item{data}{A matrix or table with counts. Rows represent subpopulations and columns the different categories of the data. Zero rows or columns are automaticly removed.} \item{m}{Number of simulated data tables.} \item{prec}{The tolerance of the iterations. Corresponds to epsilon=1e-prec in \code{\link{dirmult}}.} } \value{ Returns a data frame with theta estimates and log-likelihood values. } \seealso{ \code{\link{dirmult}} } \examples{ data(us) \dontrun{nullTest(us[[1]],m=50)} } dirmult/man/gridProf.Rd0000644000176200001440000000235312133053130014542 0ustar liggesusers\name{gridProf} \alias{gridProf} \concept{Genetics} \concept{Overdispersion} \concept{Dirichlet-multinomial} \title{Profile log-likelihood of Dirichlet-multinomial model} \description{ Computes the profile log-likelihood of \eqn{\ell(\pi,\theta;x)}{l(\pi,theta;x)} for a given sequence of \eqn{\theta}{theta} by calling \code{\link{estProfLogLik}}. } \usage{gridProf(data, theta, from, to, len)} \arguments{ \item{data}{A matrix or table with counts. Rows represent subpopulations and columns the different categories of the data. Zero rows or columns are automaticly removed.} \item{theta}{A theta-value used as offset for the interval: [theta+from; theta+to].} \item{from}{Left endpoint in the interval: [theta+from; theta+to].} \item{to}{Right endpoint in the interval: [theta+from; theta+to].} \item{len}{Number of points in the [from; to] interval. Similar to the \code{len} argument in \code{\link{seq}}.} } \value{ Gives a data frame with theta values and associated profile log-likelihood values. } \seealso{ \code{\link{estProfLogLik}} } \examples{ data(us) fit <- dirmult(us[[1]],epsilon=10^(-12),trace=FALSE) \dontrun{grid <- gridProf(us[[1]],fit$theta,from=-0.001,to=0.001,len=10) plot(loglik ~ theta, data=grid, type="l")} } dirmult/man/weirMoM.Rd0000644000176200001440000000161712133053110014345 0ustar liggesusers\name{weirMoM} \alias{weirMoM} \concept{Genetics} \concept{Overdispersion} \concept{Dirichlet-multinomial} \title{Method of moment estimator of theta} \description{ Estimates \eqn{\theta}{theta} using a method of moment (MoM) estimate by 'Weir and Hill (2002).' } \usage{weirMoM(data, se=FALSE)} \arguments{ \item{data}{A matrix or table with counts. Rows represent subpopulations and columns the different categories of the data. Zero rows or columns are automaticly removed.} \item{se}{Logical. Determines if a standard error of theta sould be computed or not. The variance is based on an expression by Li cited in 'Weir and Hill (2002)'.} } \references{ Weir, B. S. and W. G. Hill (2002). 'Esimating F-statistics'. Annu Rev Genet 36: 721-750 } \value{ MoM-estimate (and standard error) of theta. } \seealso{ \code{\link{dirmult.summary}} } \examples{ data(us) weirMoM(us[[1]],se=TRUE) } dirmult/man/dirmult.summary.Rd0000644000176200001440000000234712134506036016156 0ustar liggesusers\name{dirmult.summary} \alias{dirmult.summary} \concept{Genetics} \concept{Overdispersion} \concept{Dirichlet-multinomial} \title{Summary table of parameter estimates from dirmult} \description{ Produces a summary table based on the estimated parameters from \code{\link{dirmult}}. The table contains MLE estimates and standard errors together with method of moment (MoM) estimates and standard errors based on MoM estimates from 'Weir and Hill (2002)'. } \usage{dirmult.summary(data, fit, expectedFIM=FALSE)} \arguments{ \item{data}{A matrix or table with counts. Rows represent subpopulations and columns the different categories of the data. Zero rows or columns are automaticly removed.} \item{fit}{Output from \code{dirmult} used on the same data table as above.} \item{expectedFIM}{Logical. Determines whether the observed or expected Fisher Information Matrix should be used. For speed use observed (i.e. FALSE) - for accuracy (and theoretical support) use expected (i.e. TRUE).} } \value{ Summary table with estimates and standard errors for \eqn{\pi}{\pi} and \eqn{\theta}{theta}. } \seealso{ \code{\link{dirmult}} } \examples{ data(us) fit <- dirmult(us[[1]],epsilon=10^(-4),trace=FALSE) dirmult.summary(us[[1]],fit) } dirmult/man/dirmult.Rd0000644000176200001440000000706712134503624014466 0ustar liggesusers\name{dirmult} \alias{dirmult} \concept{Genetics} \concept{Overdispersion} \concept{Dirichlet-multinomial} \title{Parameter estimation in Dirichlet-multinomial distribution} \description{Consider allele frequencies from different subpopulations. The allele counts, \eqn{X}{X}, (or equivalently allele frequencies) are expected to vary between subpopulation. This variability are sometimes refered to as identity-by-decent, but may be modelled as overdispersion due to intra-class correlation \eqn{\theta}{theta}. The allele counts within each subpopulation is assumed to follow a multinomial distribution conditioned on the allele probabilities, \eqn{\pi_1,\dots,\pi_{k-1}}{\pi_1,...,\pi_{k-1}}. When \eqn{\pi}{\pi} follows a Dirichlet distribution the marginal distribution of \eqn{X}{X} is Dirichlet-multinomial with parameters \eqn{\pi}{\pi} and \eqn{\theta}{theta} with density: \deqn{% P(X=x) = {n \choose x} \frac{\prod_{j=1}^k\prod_{r=1}^{x_j}\{\pi_j(1-\theta) + (r-1)\theta\}}% {\prod_{r=1}^{n}\{1-\theta + (r-1)\theta\}}.}{% P(X=x) = (prod_{j=1}^k (1/x_j!) prod_{r=1}^{x_j}(\pi_j(1-theta) + (r-1)theta))/% ((1/n!) prod_{r=1}^{n}(1-\theta + (r-1)theta)).} Using an alternative parameterization the density may be written as: \deqn{% P(X=x) = {n \choose x} \frac{\Gamma(\gamma_+)}{\Gamma(n+\gamma_+)} \prod_{j=1}^k \frac{\Gamma\left(x_j + \gamma_j\right)}% {\Gamma\left(\gamma_j\right)},}{ P(X=x) = (n!*\Gamma(gamma_+))/\Gamma(n+gamma_+) prod_{j=1}^k \Gamma(x_j + gamma_j)/(\Gamma(gamma_j)*x_j!),} where \eqn{\gamma_+=(1-\theta)/\theta}{gamma_+=(1-theta)/theta} and \eqn{\gamma_j=\pi_j\theta}{gamma_j=\pi_j*theta}. This formulation second parameterization is used in the iterations since it converges much faster than the original parameterization. The function \code{dirmult} estimates the parameters \eqn{\gamma}{gamma} in the Dirichlet-multinomial distribution and transform these into \eqn{\pi_1,\dots,\pi_{k-1}}{\pi_1,\dots,\pi_{k-1}} and \eqn{\theta}{theta}. } \usage{dirmult(data, init, initscalar, epsilon=10^(-4), trace=TRUE, mode)} \arguments{ \item{data}{A matrix or table with counts. Rows represent subpopulations and columns the different categories of the data. Zero rows or columns are automaticly removed.} \item{init}{Initial values for the \eqn{\gamma}{gamma}-vector. Default is empty implying the column-proportions are used as initial values.} \item{initscalar}{Initial value for \eqn{(1-\theta)/\theta}{(1-theta)/theta}. Default value is (1-MoM)/MoM where MoM a the method of moment estimate.} \item{epsilon}{Convergence tolerance. On termination the difference between to succeeding log-likelihoods must be smaller than epsilon.} \item{trace}{Logical. If TRUE the parameter estimates and log-likelihood value is printed to the screen after each iteration, otherwise no out-put is produces while iterating.} \item{mode}{Takes values "obs" (default) or "exp" determining whether the observed or expected FIM should be used in the Fisher Scoring. All other arguments produces an error message, but the observed FIM is used in the iterations.} } \value{ Returns a list containing: \item{loglik}{The final log-likelihood value.} \item{ite}{Number of iterations used.} \item{gamma}{A vector of \eqn{\gamma}{gamma} estimates.} \item{pi}{A vector of \eqn{\pi}{\pi} estimates.} \item{theta}{Estimated \eqn{\theta}{theta}-value.} } \seealso{ \code{\link{dirmult.summary}} } \examples{ data(us) fit <- dirmult(us[[1]],epsilon=10^(-4),trace=FALSE) dirmult.summary(us[[1]],fit) } dirmult/man/rdirichlet.Rd0000644000176200001440000000102212071034740015116 0ustar liggesusers\name{rdirichlet} \alias{rdirichlet} \concept{Genetics} \concept{Overdispersion} \concept{Dirichlet-multinomial} \title{Simulate from Dirichlet distribution} \description{ Simulates from a Dirichlet distribution } \usage{rdirichlet(n=1, alpha)} \arguments{ \item{n}{The number of samples.} \item{alpha}{The shape parameters, need to be positive.} } \value{ Return an n x length(alpha) matrix where each row is drawn from a Dirichlet. } \seealso{ \code{\link{dirmult}} } \examples{ rdirichlet(n=100, alpha=rep(1,10)) } dirmult/man/us.Rd0000644000176200001440000000122714211404477013431 0ustar liggesusers\name{us} \docType{data} \alias{us} \title{Allele counts for six US subpopulations.} \description{ 9 STR loci were typed in sample populations of African Americans, U.S. Caucasians, Hispanics, Bahamians, Jamaicans, and Trinidadians. } \format{A list of tables with allele counts.} \source{http://www.fbi.gov/hq/lab/fsc/backissu/july1999/budowle.htm} \references{ Budowle, B., Moretti, T. R., Baumstark, A. L., Defenbaugh, D. A., and Keys, K. M. Population data on the thirteen CODIS core short tandem repeat loci in African Americans, U.S. Caucasians, Hispanics, Bahamians, Jamaicans, and Trinidadians, Journal of Forensic Sciences. 1999. } \keyword{datasets}dirmult/DESCRIPTION0000644000176200001440000000070514216051652013444 0ustar liggesusersPackage: dirmult Version: 0.1.3-5 Date: 2022-03-08 Title: Estimation in Dirichlet-Multinomial Distribution Author: Torben Tvedebrink Maintainer: Torben Tvedebrink Description: Estimate parameters in Dirichlet-Multinomial and compute log-likelihoods. Depends: R (>= 2.5.0) License: GPL (>= 2) NeedsCompilation: no Repository: CRAN Packaged: 2022-03-07 13:35:52 UTC; tvede Date/Publication: 2022-03-21 10:30:02 UTC dirmult/R/0000755000176200001440000000000012135701174012134 5ustar liggesusersdirmult/R/dirmult.R0000644000176200001440000003503612135700721013743 0ustar liggesusers## Help-functions for computing the score-function and log-likelihood function uu <- function(x,t) 1/(t+x-1) ff <- function(x,t) 1/(t+x-1)^2 ll <- function(x,t) log(t+x-1) ## Computes Beta-Binomial probabilities dbbin.ab <- function(x,n,a,b){ res <- lchoose(n,x) if(x==0) res <- res else res <- res+sum(unlist(lapply(list(1:x),ll,t=a))) if(x==n) res <- res else res <- res+sum(unlist(lapply(list(1:(n-x)),ll,t=b))) res <- res-sum(unlist(lapply(list(1:n),ll,t=a+b))) exp(res) } ## Score-function u <- function(x,t){ nc <- ncol(x) S <- rep(0,nc) ts <- sum(t) for(j in 1:nrow(x)){ Sn <- sum(unlist(lapply(list(1:(rowSums(x)[j])),uu,t=ts))) for(i in 1:nc){ if(x[j,i]==0) Sij <- 0 else Sij <- sum(unlist(lapply(list(1:(x[j,i])),uu,t=t[i]))) S[i] <- S[i] - Sn + Sij } } S } ## Score-function when Lagrange multiplier is envoked profU <- function(x,t,tp){ K <- length(t) S <- rep(0,K+1) nc <- ncol(x) ts <- sum(t) for(j in 1:nrow(x)){ Sn <- sum(unlist(lapply(list(1:(rowSums(x)[j])),uu,t=ts))) for(i in 1:nc){ if(x[j,i]==0) Sij <- 0 else Sij <- sum(unlist(lapply(list(1:(x[j,i])),uu,t=t[i]))) S[i] <- S[i] - Sn + Sij } } S[1:K] <- S[1:K]-tp S[K+1] <- tp-sum(t) S } ## Score-function with several Lagrange multiplier for testing equal thetas equalU <- function(x,t,tp,l){ K <- length(t) S <- rep(0,K+1) nc <- ncol(x) ts <- sum(t) for(j in 1:nrow(x)){ Sn <- sum(unlist(lapply(list(1:(rowSums(x)[j])),uu,t=ts))) for(i in 1:nc){ if(x[j,i]==0) Sij <- 0 else Sij <- sum(unlist(lapply(list(1:(x[j,i])),uu,t=t[i]))) S[i] <- S[i] - Sn + Sij } } S[1:K] <- S[1:K]-l S[K+1] <- tp-sum(t) S } ## Computing the observed Fisher Information Matrix obsfim <- function(x,t){ nc <- ncol(x) D <- rep(0,nc) od <- 0 ts <- sum(t) for(j in 1:nrow(x)){ od <- od + sum(unlist(lapply(list(1:(rowSums(x)[j])),ff,t=ts))) for(i in 1:nc){ if(x[j,i]==0) Dij <- 0 else Dij <- sum(unlist(lapply(list(1:(x[j,i])),ff,t=t[i]))) D[i] <- D[i] + Dij } } F <- matrix(od,nc,nc) diag(F) <- diag(F)-D F } ## Computing the expected Fisher Information Matrix expfim <- function(x,t){ Sn <- rowSums(x) J <- nrow(x) K <- ncol(x) inner <- matrix(0,1,K) od <- 0 ts <- sum(t) for(j in 1:J){ P <- matrix(0,Sn[j],K) R <- matrix(0,Sn[j],K) for(r in Sn[j]:1){ for(k in 1:K){ if(r==Sn[j]) P[r,k] <- dbbin.ab(r,Sn[j],t[k],ts-t[k]) else P[r,k] <- P[r+1,k] + dbbin.ab(r,Sn[j],t[k],ts-t[k]) R[r,k] <- 1/(t[k]+r-1)^2 } } inner <- inner + colSums(P*R) od <- od + sum(unlist(lapply(list(1:(Sn[j])),ff,t=ts))) } F <- matrix(-1*od,K,K) diag(F) <- diag(F)+inner F } ## Computes the log-likelihood loglik <- function(x,t){ l <- 0 ts <- sum(t) nc <- ncol(x) for(j in 1:nrow(x)){ l <- l - sum(unlist(lapply(list(1:(rowSums(x)[j])),ll,t=ts))) # maybe not lij <- 0 ## NEW LINE: This initializes lij to zero for each row. Otherwise lij is a cummulant. ## for(i in 1:nc){ if(x[j,i]==0) lij <- 0 else lij <- sum(unlist(lapply(list(1:(x[j,i])),ll,t=t[i]))) l <- l + lij } } l } ## Computes: I(pi,theta) = t(D)%*%I(gamma)%*%D, where D is the Jacobi matrix thetafim <- function(t,f){ K <- length(t) D <- matrix(0,K,K) pi <- t/sum(t) theta <- 1/(sum(t)+1) diag(D) <- (1-theta)/theta D[K,] <- -(1-theta)/theta D[,K] <- -1*pi/(theta^2) D[K,K] <- D[K,K] t(D)%*%f%*%D } ## Estimate parameters in the Dirichlet-Multinomial distribution dirmult <- function(data,init,initscalar,epsilon=10^(-4),trace=TRUE,mode){ data <- data[rowSums(data)!=0,colSums(data)!=0] if(missing(initscalar)){ mom <- weirMoM(data) if(mom<=0) mom <- 0.005 initscalar <- (1-mom)/mom } if(missing(init)) gamma <- colSums(data)/sum(data)*initscalar else gamma <- init if(missing(mode)) mode <- "obs" if(!is.element(mode,c("obs","exp"))){ message(paste("Warning: Mode '",mode,"' not valid\n",sep="")) mode <- "obs" } lik1 <- 0 lik2 <- epsilon*10 ite <- 1 gamite <- 0 conv <- TRUE # Iterations while(conv){ if(abs(lik2-lik1)maxit) return(NULL) fimGam <- obsfim(data,gamma) fim <- matrix(-1,K+1,K+1) fim[K+1,K+1] <- 0 fim[1:K,1:K] <- fimGam lik1 <- loglik(data,gamma)+gamlambda[K+1]*(gamplus-sum(gamma)) # Updates parameter estimates gamlambda <- gamlambda - solve(fim)%*%profU(data,gamma,gamplus) gamma <- gamlambda[1:K] if(any(gamma<0)){ if((gamite%%10)==0) print(gamma) gamite <- gamite+1 } gamma[gamma<0] <- 0.001 # Negative gamma_j are set to 0.001 if(trace) message(paste("Iteration ",ite,": Log-likelihood value: ",lik1,sep="")) if(ite>50) message(paste("Iteration ",ite,": Log-likelihood value: ",lik1,sep="")) gams <- paste(" Gamma",1:length(gamma),sep="") lik2 <- loglik(data,gamma)+gamlambda[K+1]*(gamplus-sum(gamma)) ite <- ite+1 } gamma <- gamlambda[1:K] sumgam <- sum(gamma) pi <- as.numeric(gamma/sumgam) names(pi) <- dimnames(data)[[2]] list(loglik=lik1,ite=ite-1,gamma=as.numeric(gamma),pi=pi,theta=1/(sumgam+1),lambda=gamlambda[K+1]) } ## Estimates the profile log-likelihood for a defined grid of values gridProf <- function(data,theta,from,to,len){ step <- theta+seq(from=from,to=to,len=len) res <- data.frame(theta=step,loglik=rep(0,len)) for(i in 1:len) res$loglik[i] <- estProfLogLik(data,step[i],trace=FALSE)$loglik res } ## Estimates the profile log-likelihood such that difference in loglik is max delta adapGridProf <- function(data,delta,stepsize=50){ mle <- dirmult(data,trace=FALSE,epsilon=10^(-8)) if(is.na(mle$theta)) print("MLE theta=NA") step <- mle$theta/stepsize ## use /100 or /1000 for better precision in CI res <- data.frame(theta=mle$theta,loglik=mle$loglik) stopp <- TRUE k <- 1 pip <- mle$pi pim <- mle$pi # Algorithm works its way to the right of MLE while(stopp){ tmpp <- mle$theta+k*step llp <- estProfLogLik(data,tmpp,trace=FALSE,initPi=pip) if(is.null(llp)) return(NULL) if(llp$ite>300) return(NULL) pip <- llp$pi llp <- llp$loglik res <- rbind(res,c(tmpp,llp)) if(is.na(llp) | abs(llp-mle$loglik)>delta) stopp <- FALSE else k <- k+1 } stopm <- TRUE l <- 1 # Algorithm works its way to the left of MLE while(stopm){ tmpm <- mle$theta-l*step llm <- estProfLogLik(data,tmpm,trace=FALSE,initPi=pim) if(is.null(llm)) return(NULL) if(llm$ite>300) return(NULL) pim <- llm$pi llm <- llm$loglik res <- rbind(res,c(tmpm,llm)) if(is.na(llm) | abs(llm-mle$loglik)>delta) stopm <- FALSE else l <- l+1 } res[order(res$theta),] } ## Computes the log-likelihood function assuming equal theta for all tables in the list 'data' equalTheta <- function(data,theta,epsilon=10^(-4),trace=TRUE,initPi,maxit=1000){ gamplus <- (1-theta)/theta data <- lapply(data,function(x) x[,colSums(x)!=0]) L <- length(data) K <- unlist(lapply(data,ncol)) KK <- K+1 ## dim(gamma) + dim(lambda) sKK <- sum(KK) cKK <- c(0,cumsum(KK)) if(!missing(initPi)) gamma <- lapply(initPi,function(x,t) x*t,t=gamplus) else gamma <- lapply(data,function(x) colSums(x)/sum(x)*gamplus) gamlambda <- lapply(gamma,function(x) c(x,1)) fimGam <- as.list(rep(0,L)) invfimGam <- as.list(rep(0,L)) fim <- as.list(rep(0,L)) invfim <- as.list(rep(0,L)) deninvfim <- as.list(rep(0,L)) numinvfim <- as.list(rep(0,L)) scorevector <- as.list(rep(0,L)) lik1 <- 0 lik2 <- epsilon*10 ite <- 1 conv <- TRUE while(conv){ if(abs(lik2-lik1)maxit) return(NULL) lik1 <- 0 FIM <- matrix(0,sKK+1,sKK+1) FIMtest <- matrix(0,sKK+1,sKK+1) lambda <- unlist(lapply(gamlambda,function(x) x[length(x)])) for(l in 1:L){ # Fits for each data table in list 'data' fimGam[[l]] <- obsfim(data[[l]],gamma[[l]]) invfimGam[[l]] <- solve(fimGam[[l]]) fim[[l]] <- matrix(-1,KK[l],KK[l]) fim[[l]][KK[l],KK[l]] <- 0 fim[[l]][1:K[l],1:K[l]] <- fimGam[[l]] invfim[[l]] <- solve(fim[[l]]) deninvfim[[l]] <- (-1)/(matrix(1,1,K[l])%*%invfimGam[[l]]%*%matrix(1,K[l],1)) numinvfim[[l]] <- invfim[[l]]%*%matrix(c(rep(0,K[l]),1),K[l]+1,1) FIM[(cKK[l]+1):cKK[l+1],(cKK[l]+1):cKK[l+1]] <- invfim[[l]] FIMtest[(cKK[l]+1):cKK[l+1],(cKK[l]+1):cKK[l+1]] <- fim[[l]] scorevector[[l]] <- equalU(data[[l]],gamma[[l]],gamplus,lambda[l]) lik1 <- lik1 + loglik(data[[l]],gamma[[l]])+lambda[l]*(gamplus-sum(gamma[[l]])) } # Computes overall parameters and log-likelihood kk <- rep(1,length(K)*2) kk[rep(c(T,F),L)] <- K FIMtest[sKK+1,1:sKK] <- rep(rep(c(0,1),L),kk) FIMtest[1:sKK,sKK+1] <- rep(rep(c(0,1),L),kk) FIMtest[sKK+1,sKK+1] <- 0 Ax <- unlist(numinvfim) xAx <- sum(unlist(deninvfim)) FIM[1:sKK,1:sKK] <- FIM[1:sKK,1:sKK]-Ax%*%t(Ax)/xAx FIM[1:sKK,sKK+1] <- Ax/xAx FIM[sKK+1,1:sKK] <- t(Ax)/xAx FIM[sKK+1,sKK+1] <- (-1)/xAx gammalambda <- c(unlist(gamlambda),gamplus) uvector <- c(unlist(scorevector),sum(lambda)) gammalambda <- gammalambda - FIM%*%uvector gamplus <- gammalambda[sKK+1] gamlambda <- split(gammalambda[-(sKK+1)],as.factor(rep(1:L,KK))) gamma <- lapply(gamlambda,function(x) x[-length(x)]) if(any(unlist(gamma)<0)){ ## NEGATIVE ENTRY IN GAMMA VECTOR ## neg <- (1:L)[unlist(lapply(gamma,function(x) any(x<0)))] print(gamma[neg]) gamma <- lapply(gamma,function(x){ x[x<0] <- 0.01; x}) # set negative entries to 0.01 } if(trace) message(paste("Iteration ",ite,": Log-likelihood value: ",lik1,sep="")) lik2 <- 0 for(l in 1:L) lik2 <- lik2 + loglik(data[[l]],gamma[[l]])+lambda[l]*(gamplus-sum(gamma[[l]])) ite <- ite+1 } pi <- lapply(gamma,function(x) x/sum(x)) for(l in 1:L) names(pi[[l]]) <- dimnames(data[[l]])[[2]] theta <- lapply(gamma,function(x) 1/(1+sum(x))) list(loglik=lik1,ite=ite-1,gamma=gamma,pi=pi,theta=theta,lambda=lambda) } #log-like of multinomial mnloglik <- function(x){ x <- x[,colSums(x)!=0] p <- colSums(x)/sum(x) sum(x*rep(log(p),each=nrow(x))) } ## Simulates under H_0: theta=0 nullTest <- function(data,m=1000,prec=6){ pi.null <- colSums(data)/sum(data) dats <- replicate(m,data,simplify=FALSE) res <- data.frame(mle=rep(0,m+1),dm=rep(0,m+1),mom=rep(0,m+1),mn=rep(0,m+1)) rs <- rowSums(data) nr <- nrow(data) for(i in 1:m){ for(j in 1:nr) dats[[i]][j,] <- rmultinom(1,rs[j],pi.null) tmp <- unlist(dirmult(dats[[i]],trace=FALSE,epsilon=10^(-prec))[c("theta","loglik")]) res[i,] <- c(tmp,weirMoM(dats[[i]]),mnloglik(dats[[i]])) } tmp <- unlist(dirmult(data,trace=FALSE,epsilon=10^(-prec))[c("theta","loglik")]) res[m+1,] <- c(tmp,weirMoM(data),mnloglik(data)) list(data=dats,res=res) } rdirichlet <- function(n=1,alpha){ Gam <- matrix(0,n,length(alpha)) for(i in 1:length(alpha)) Gam[,i] <- rgamma(n,shape=alpha[i]) Gam/rowSums(Gam) } simPop <- function(J=10,K=20,n,pi,theta){ if(length(n)==1) n <- rep(n,J) if(missing(pi)) pi <- rnorm(K,mean=14,sd=4) else K <- length(pi) pi <- pi/sum(pi) P <- rdirichlet(J,pi*(1-theta)/theta) X <- matrix(0,J,K) for(i in 1:J) X[i,] <- rmultinom(1,n[i],P[i,]) list(theta=theta,pi=pi,data=X) } dirmult/MD50000644000176200001440000000143014216051652012242 0ustar liggesusers879b48a71503a69d6f552ff2b80c6078 *DESCRIPTION fc1f18a23961939db6d09af8d6b8af37 *NAMESPACE f2a82eb0e607cb7f66b41d0720df891d *R/dirmult.R f6e89f58718db7795c680ab79300fbf5 *data/us.RData 11fded436a8e946902b1f67176ef73d4 *inst/CITATION 9269574e73df4df843d6352dc85e97df *man/adapGridProf.Rd a15c1a65f84f58715548158284f49afe *man/dirmult.Rd d7ff4ac7c216b67b0a853ce5f5a087fe *man/dirmult.summary.Rd ab6120994ad1869ca0275bcd3832f0c3 *man/equalTheta.Rd 4d8662e1716126eec07866aca198d6d6 *man/estProfLogLik.Rd 79bf7b9c38ca77b29889d9450c67768c *man/gridProf.Rd 61e804b515491526d70a459f02e67e58 *man/nullTest.Rd f69342195f6e48f0d809ee0870e8b037 *man/rdirichlet.Rd 94c4310e96f3f2f63f834e32339f0014 *man/simPop.Rd 6b60163ba02ae2c78cfa9a9177b59064 *man/us.Rd 7128f4cd8d0ecc70d22d1d49994f98af *man/weirMoM.Rd dirmult/inst/0000755000176200001440000000000012135701174012710 5ustar liggesusersdirmult/inst/CITATION0000644000176200001440000000137212133054024014041 0ustar liggesuserscitHeader("To cite the 'dirmult' package in publications use:") citEntry(entry="Article", title = "Overdispersion in allelic counts and theta-correction in forensic genetics", author = personList(as.person("Torben Tvedebrink")), journal = "Theoretical Population Biology", year = "2010", volume = "78", number = "3", pages = "200--210", url = "http://dx.doi.org/10.1016/j.tpb.2010.07.002", textVersion = paste("Torben Tvedebrink (2010).", "Overdispersion in allelic counts and theta-correction in forensic genetics.", "Theoretical Population Biology, 78(3), 200-210.", "URL http://dx.doi.org/10.1016/j.tpb.2010.07.002.") )