mclogit/0000755000176200001440000000000014326453475011725 5ustar liggesusersmclogit/NAMESPACE0000644000176200001440000000262214056413354013136 0ustar liggesusersimport(stats,Matrix) importFrom(memisc,getSummary,"%nin%",Sapply) importFrom(methods,as) export( mclogit, mclogit.fit, mmclogit.fitPQLMQL, mclogit.control, mmclogit.control, getSummary.mclogit, getSummary.mmclogit, mblogit, getSummary.mblogit, getSummary.mmblogit ) S3method(print,mclogit) S3method(vcov,mclogit) S3method(deviance,mclogit) S3method(logLik,mclogit) S3method(summary,mclogit) S3method(print,summary.mclogit) S3method(fitted,mclogit) S3method(predict,mclogit) S3method(weights,mclogit) S3method(residuals,mclogit) S3method(AIC,mclogit) S3method(BIC,mclogit) S3method(nobs,mclogit) S3method(extractAIC,mclogit) S3method(anova,mclogit) S3method(update,mclogit) S3method(print,mmclogit) S3method(vcov,mmclogit) S3method(summary,mmclogit) S3method(print,summary.mmclogit) S3method(print,mblogit) S3method(summary,mblogit) S3method(print,summary.mblogit) S3method(fitted,mblogit) S3method(predict,mblogit) S3method(weights,mblogit) S3method(print,mmblogit) S3method(summary,mmblogit) S3method(print,summary.mmblogit) export(dispersion) S3method(dispersion,mclogit) S3method(getSummary,mclogit) S3method(getSummary,mblogit) S3method(getSummary,mmclogit) S3method(getSummary,mmblogit) S3method(simulate,mclogit) S3method(simulate,mblogit) S3method(simulate,mmclogit) S3method(simulate,mmblogit) S3method(predict,mmblogit) S3method(predict,mmclogit) mclogit/demo/0000755000176200001440000000000014224651232012635 5ustar liggesusersmclogit/demo/test-mblogit-random-nonnested.R0000644000176200001440000000407514171074617020660 0ustar liggesuserslibrary(mclogit) set.seed(534) nwithin <- 100 nbetween1 <- 20 nbetween2 <- 20 nbetween <- nbetween1*nbetween2 a1 <- -1 a2 <- 1 x <- seq(from=-2,to=2,length=nwithin) x <- rep(x,nbetween) u1_1 <- rnorm(nbetween1,sd=1) u2_1 <- rnorm(nbetween1,sd=1) u1_2 <- rnorm(nbetween2,sd=1) u2_2 <- rnorm(nbetween2,sd=1) g1 <- rep(1:nbetween1,each=nwithin*nbetween2) g2 <- rep(1:nbetween2,each=nwithin,nbetween1) eta1 <- 1*x + a1 + u1_1[g1] + u1_2[g2] eta2 <- -1*x + a2 + u2_1[g1] + u2_2[g2] exp.eta1 <- exp(eta1) exp.eta2 <- exp(eta2) sum.exp.eta <- 1 + exp.eta1 + exp.eta2 pi2 <- exp.eta1/sum.exp.eta pi3 <- exp.eta2/sum.exp.eta pi1 <- 1 - pi2 - pi3 pi <- cbind(pi1,pi2,pi3) y <- sapply(1:length(x), function(i)sample.int(n=3,size=1,prob=pi[i,])) y <- factor(y,labels=letters[1:3]) plot(y~x) (fem <- mblogit(y~x)) (mxm_x <- mblogit(y~x, random=list(~1|g1,~1|g2), estimator="REML" )) summary(mxm_x) (mxm <- mblogit(y~x, random=~1|g1, estimator="REML" )) summary(mxm) pred_x <- predict(mxm_x,type="response") pred_1 <- predict(mxm,type="response") plot(pred_x,type="l") plot(x,pred_x[,1],type="l") plot(x,pred_x[,2],type="l") plot(x,pred_x[,3],type="l") plot(x,pi1,type="l") plot(x,pi2,type="l") plot(x,pi3,type="l") plot(pi1,pred_x[,1],type="l") plot(pi2,pred_x[,2],type="l") plot(pi3,pred_x[,3],type="l") epred_x <- predict(mxm_x) plot(eta1,epred_x[,1],type="l") abline(a=0,b=1,col="red") plot(eta2,epred_x[,2],type="l") abline(a=0,b=1,col="red") Bmxm_x <- mclogit:::reff(mxm_x) c(u1_1=mean(u1_1), u1_1_hat=mean(Bmxm_x[[1]][,1])) plot(u1_1-mean(u1_1),Bmxm_x[[1]][,1]) abline(a=0,b=1) plot(u2_1-mean(u2_1),Bmxm_x[[1]][,2]) abline(a=0,b=1) plot(u1_2-mean(u1_2),Bmxm_x[[2]][,1]) abline(a=0,b=1) plot(u2_2-mean(u2_2),Bmxm_x[[2]][,2]) abline(a=0,b=1) (mxm_i <- mblogit(y~x, random=~1+x|g1 )) f <- sample(1:2,size=length(x),replace=TRUE) (mxm_ii <- mblogit(y~x*f, random=~1+x|g1 )) summary(mxm_ii) mclogit/demo/mclogit.test.R0000644000176200001440000000226614171071107015400 0ustar liggesuserslibrary(mclogit) options(error=recover) mclogitP <- function(eta,s){ expeta <- exp(eta) sum.expeta <- rowsum(expeta,s) expeta/sum.expeta[s] } N <- 10000 n <- 100 test.data <- data.frame( x = rnorm(N), f = gl(4,N/4), set = gl(N/5,5,N), altern0 = gl(5,1,N), nat = gl(15,N/15,N), occ = gl(10,1,N) ) test.data <- within(test.data,{ altern <- as.integer(interaction(altern0,nat)) altern.occ <- as.integer(interaction(altern,occ)) b1 <- rnorm(n=length(altern)) b2 <- rnorm(n=length(altern.occ)) ff <- 1+.2*(as.numeric(f)-1) eta <- x*ff + b1[altern] + b2[altern.occ] p <- mclogitP(eta,set) n <- unlist(tapply(p,set,function(p)rmultinom(n=1,size=n,prob=p))) rm(b1,b2) }) test.mc0 <- mclogit(cbind(n,set)~x:f,data=test.data ) test.mc <- mclogit(cbind(n,set)~x:f,data=test.data, random=~1|altern/occ, #start.theta=c(1,1) maxit=100 ) # By construction, the `true' coefficient values # are 1, 1.2, 1.4, 1.6 coef(test.mc) # The asymptotic covariance matrix of the coefficient estimates vcov(test.mc) print(test.mc) summary(test.mc) p0 <- predict(test.mc0) p <- predict(test.mc) mclogit/demo/00Index0000644000176200001440000000031014224651232013761 0ustar liggesusersmclogit.test Test run of mclogit with simulated data test-mblogit-random-nonnested Test run of mblogit with simulated data, model with non-nested random effects mclogit/data/0000755000176200001440000000000014326446242012630 5ustar liggesusersmclogit/data/electors.rda0000644000176200001440000000240214326446242015136 0ustar liggesusers\MlU]MhH 8"dU)T-(IMvvp@jR)G @)m(% "  5Dz{fr8Yivv~o켙7,y=г&ӓqsx Lztr&4\U' N1qRܑhkkk^. j\^hM`P!ˑ (A ۃ!.˕K 򫅡m+BߎjvPCjF*cEӶ^K9]͋*.B"XTQ=GPOo3ap9Ieb g|>48S󦥮"_E1E*_ᵂr[keA7-5~- Jg/&̟%*I`4oxlsDyYi8c&$<ѣo/f: "!*@]DGWo $ D zݷ0k0o˅ʡ6mu>F41~fn\G1Zy } u>-q;a60`[ {jdc:p3$x8,?aۀxq!G B۝DMؾ5 c>g3b<1_io'ñQAFcg$* N蹎6n9!ƒ=g>VcX݈Dq+=p3؂ʗf%S|,֟bW{0b?~f߅u/@*ߍ1-ߓC3%|lxY8s F~,"7rpaaMDxu'[7^qx;A;K D=ϳ^?>Y~2~||_?n ]/>?g5[̛-TfSȩ5{đ- {Ym qm ?33w><~XB^ua ![zmu9k9Ӷ?86 XDݰol}y@{7~QAKx+^Wx+}a߭x+^Wx+^ /?Nx+^Wx+^񊿸ob`a9 !` jhrR6 z͏aIcd~FdH̸tTctZXi|9`cOmclogit/data/Transport.rda0000644000176200001440000000145114326446242015315 0ustar liggesusers r0b```b`abd`b2Y# ' )J+./*a`` i^ -V/Łl9e9@X"ʜTZ c&'A%Eyh&$MMKL./1„>fA¬H #a$̉0~k5m P }90/AeR6_\\b[Rz_ۉ5lfru\ cŋ7;?0DOZA%ɝP V4g>?Mmyj }D Ϻ+j6})osViޱw+pqم_[-WҼ3A`pGJoWe$| Fu$ DG1$ wРV %+/o RG '?s~KsѾ"Vٿe|No!v!l';ؿӉvگݷX'_;8w _m-qے ϋN6*>unӞ"y hI\E5wz^x]\iu5?ڿN ]gzέdbu@ɼ ocbrsW;x} 3B w&hs$g4Oe@P=м;8q-OЋTX+K1*.M*-J8R2KS|bJ̼tIEz%EpE70G+ RKҊ.B`.S#hcmclogit/man/0000755000176200001440000000000014321577736012502 5ustar liggesusersmclogit/man/predict.Rd0000644000176200001440000000525114225024235014406 0ustar liggesusers\name{predict} \alias{predict.mblogit} \alias{predict.mmblogit} \alias{predict.mclogit} \alias{predict.mmclogit} \title{Predicting responses or linear parts of the baseline-category and conditional logit models} \description{ The \code{predict()} methods allow to obtain within-sample and out-of-sample predictions from models fitted with \code{mclogit()} and \code{mblogit()}. For models with random effecs fitted using the PQL-method, it is possible to obtain responses that are conditional on the reconstructed random effects. } \usage{ \method{predict}{mblogit}(object, newdata=NULL,type=c("link","response"),se.fit=FALSE, \dots) \method{predict}{mclogit}(object, newdata=NULL,type=c("link","response"),se.fit=FALSE, \dots) \method{predict}{mmblogit}(object, newdata=NULL,type=c("link","response"),se.fit=FALSE, conditional=TRUE, \dots) \method{predict}{mmclogit}(object, newdata=NULL,type=c("link","response"),se.fit=FALSE, conditional=TRUE, \dots) } \arguments{ \item{object}{an object in class "mblogit", "mmblogit", "mclogit", or "mmclogit"} \item{newdata}{an optional data frame with new data} \item{type}{a character string specifying the kind of prediction} \item{se.fit}{a logical value; whether predictions should be accompanied with standard errors} \item{conditional}{a logical value; whether predictions should be made conditional on the random effects (or whether they are set to zero, i.e. their expectation). This argument is consequential only if the "mmblogit" or "mmclogit" object was created with \code{method="PQL"}.} \item{\dots}{other arguments, ignored.} } \value{ The \code{predict} methods return either a matrix (unless called with \code{se.fit=TRUE}) or a list with two matrix-valued elements \code{"fit"} and \code{"se.fit"}. } \examples{ library(MASS) (house.mblogit <- mblogit(Sat ~ Infl + Type + Cont, data = housing, weights=Freq)) head(pred.house.mblogit <- predict(house.mblogit)) str(pred.house.mblogit <- predict(house.mblogit,se=TRUE)) head(pred.house.mblogit <- predict(house.mblogit, type="response")) str(pred.house.mblogit <- predict(house.mblogit,se=TRUE, type="response")) \donttest{ # This takes a bit longer. data(electors) (mcre <- mclogit( cbind(Freq,interaction(time,class))~econ.left/class+welfare/class+auth/class, random=~1|party.time, data=within(electors,party.time<-interaction(party,time)))) str(predict(mcre)) str(predict(mcre,type="response")) str(predict(mcre,se.fit=TRUE)) str(predict(mcre,type="response",se.fit=TRUE)) } } mclogit/man/mclogit.Rd0000644000176200001440000002212714321555104014414 0ustar liggesusers\name{mclogit} \alias{mclogit} \alias{anova.mclogit} \alias{print.mclogit} \alias{vcov.mclogit} \alias{deviance.mclogit} \alias{logLik.mclogit} \alias{summary.mclogit} \alias{print.summary.mclogit} \alias{fitted.mclogit} \alias{residuals.mclogit} \alias{weights.mclogit} \alias{AIC.mclogit} \alias{BIC.mclogit} \alias{update.mclogit} \alias{anova.mclogit} \alias{summary.mmclogit} \alias{print.summary.mmclogit} \title{Conditional Logit Models and Mixed Conditional Logit Models} \description{ \code{mclogit} fits conditional logit models and mixed conditional logit models to count data and individual choice data, where the choice set may vary across choice occasions. Conditional logit models without random effects are fitted by Fisher-scoring/IWLS. Models with random effects (mixed conditional logit models) are estimated via maximum likelihood with a simple Laplace aproximation (aka PQL). } \usage{ mclogit(formula, data=parent.frame(), random=NULL, subset, weights = NULL, offset=NULL, na.action = getOption("na.action"), model = TRUE, x = FALSE, y = TRUE, contrasts=NULL, method = NULL, estimator=c("ML","REML"), dispersion = FALSE, start=NULL, control=if(length(random)) mmclogit.control(\dots) else mclogit.control(\dots), \dots) \method{update}{mclogit}(object, formula., dispersion, \dots) \method{summary}{mclogit}(object, dispersion = NULL, correlation = FALSE, symbolic.cor = FALSE, \dots) } \arguments{ \item{formula}{a model formula: a symbolic description of the model to be fitted. The left-hand side should result in a two-column matrix. The first column contains the choice counts or choice indicators (alternative is chosen=1, is not chosen=0). The second column contains unique numbers for each choice set. The left-hand side can either take the form \code{cbind(choice,set)} or (from version 0.9.1) \code{choice|set} If individual-level data is used, choice sets correspond to individuals, if aggregated data with choice counts are used, choice sets usually correspond to covariate classes. The right-hand of the formula contains choice predictors. It should be noted that constants are deleted from the formula as are predictors that do not vary within choice sets. } \item{data}{an optional data frame, list or environment (or object coercible by \code{\link{as.data.frame}} to a data frame) containing the variables in the model. If not found in \code{data}, the variables are taken from \code{environment(formula)}, typically the environment from which \code{glm} is called.} \item{random}{an optional formula or list of formulas that specify the random-effects structure or NULL.} \item{weights}{an optional vector of weights to be used in the fitting process. Should be \code{NULL} or a numeric vector.} \item{offset}{an optional model offset. Currently only supported for models without random effects.} \item{subset}{an optional vector specifying a subset of observations to be used in the fitting process.} \item{na.action}{a function which indicates what should happen when the data contain \code{NA}s. The default is set by the \code{na.action} setting of \code{\link{options}}, and is \code{\link{na.fail}} if that is unset. The \sQuote{factory-fresh} default is \code{\link{na.omit}}. Another possible value is \code{NULL}, no action. Value \code{\link{na.exclude}} can be useful.} \item{start}{an optional numerical vector of starting values for the conditional logit parameters. If the model has random effects, the vector should have a "VarCov" attribute wtih starting values for the random effects (co-)variances. If the random effects model is estimated with the "PQL" method, the starting values matrix should also have a "random.effects" attribute, which should have the same structure as the "random.effects" component of an object returned by \code{mblogit()}. } \item{model}{a logical value indicating whether \emph{model frame} should be included as a component of the returned value.} \item{x, y}{ logical values indicating whether the response vector and model matrix used in the fitting process should be returned as components of the returned value. } \item{contrasts}{an optional list. See the \code{contrasts.arg} of \code{model.matrix.default}.} \item{method}{\code{NULL} or a character string, either "PQL" or "MQL", specifies the type of the quasilikelihood approximation to be used if a random-effects model is to be estimated.} \item{estimator}{a character string; either "ML" or "REML", specifies which estimator is to be used/approximated.} \item{dispersion}{a real number used as dispersion parameter; a character vector that specifies the method to compute the dispersion; a logical value -- if \code{TRUE} the default method (\code{"Afroz"}) is used, if \code{FALSE}, the dispersion parameter is set to 1, that is, no dispersion. For details see \code{\link{dispersion}}.} \item{control}{a list of parameters for the fitting process. See \code{\link{mclogit.control}} } \item{\dots}{ arguments to be passed to \code{mclogit.control} or \code{mmclogit.control} } \item{object}{an object that inherits class \code{"mclogit"}. When passed to \code{dispersion()}, it should be the result of a call of \code{mclogit()} of \code{mblogit()}, \emph{without} random effects. } \item{formula.}{a changes to the model formula, see \code{\link[stats:update]{update.default}} and \code{\link[stats]{update.formula}}.} \item{correlation}{logical; see \code{\link[stats]{summary.lm}}.} \item{symbolic.cor}{logical; see \code{\link[stats]{summary.lm}}.} } \value{ \code{mclogit} returns an object of class "mclogit", which has almost the same structure as an object of class "\link[stats]{glm}". } \note{ Covariates that are constant within choice sets are automatically dropped from the model formula specified by the \code{formula} argument of \code{mclogit}. If the model contains random effects, these should \itemize{ \item either vary within choice sets (e.g. the levels of a factor that defines the choice sets should not be nested within the levels of factor) \item or be random coefficients of covariates that vary within choice sets. } In earlier versions of the package (prior to 0.6) it will lead to a failure of the model fitting algorithm if these conditions are not satisfied. Since version 0.6 of the package, the function \code{mclogit} will complain about such model a misspecification explicitely. } \references{ Agresti, Alan (2002). \emph{Categorical Data Analysis.} 2nd ed, Hoboken, NJ: Wiley. \doi{10.1002/0471249688} Breslow, N.E. and D.G. Clayton (1993). "Approximate Inference in Generalized Linear Mixed Models". \emph{Journal of the American Statistical Association} 88 (421): 9-25. \doi{10.1080/01621459.1993.10594284} Elff, Martin (2009). "Social Divisions, Party Positions, and Electoral Behaviour". \emph{Electoral Studies} 28(2): 297-308. \doi{10.1016/j.electstud.2009.02.002} McFadden, D. (1973). "Conditionial Logit Analysis of Qualitative Choice Behavior". Pp. 105-135 in P. Zarembka (ed.). \emph{Frontiers in Econometrics}. New York: Wiley. \url{https://eml.berkeley.edu/reprints/mcfadden/zarembka.pdf} } \examples{ data(Transport) summary(mclogit( cbind(resp,suburb)~distance+cost, data=Transport )) # New syntactic sugar: summary(mclogit( resp|suburb~distance+cost, data=Transport )) \dontrun{ # This takes a bit longer. data(electors) electors <- within(electors,{ party.time <-interaction(party,time) time.class <- interaction(time,class) }) # Time points nested within parties summary(mclogit( Freq|time.class~econ.left/class+welfare/class+auth/class, random=~1|party/time, data=electors)) # Party-level random intercepts and random slopes varying over time points summary(mclogit( Freq|time.class~econ.left/class+welfare/class+auth/class, random=list(~1|party,~econ.left+0|time), data=electors)) } } \keyword{models} \keyword{regression} \seealso{ Conditional logit models are also supported by \pkg{gmnl}, \pkg{mlogit}, and \pkg{survival}. \pkg{survival} supports conditional logit models for binary panel data and case-control studies. \pkg{mlogit} and \pkg{gmnl} treat conditional logit models from an econometric perspective. Unlike the present package, they focus on the random utility interpretation of discrete choice models and support generalisations of conditional logit models, such as nested logit models, that are intended to overcome the IIA (indipendence from irrelevant alterantives) assumption. Mixed multinomial models are also supported and estimated using simulation-based techniques. Unlike the present package, mixed or random-effects extensions are mainly intended to fit repeated choices of the same individuals and not aggregated choices of many individuals facing identical alternatives. } mclogit/man/mclogit.fit.Rd0000644000176200001440000000377414321600051015174 0ustar liggesusers\name{mclogit.fit} \alias{mclogit.fit} \alias{mmclogit.fitPQLMQL} \title{ Internal functions used for model fit. } \description{ These functions are exported and documented for use by other packages. They are not intended for end users. } \usage{ mclogit.fit(y, s, w, X, dispersion=FALSE, start = NULL, offset = NULL, control = mclogit.control()) mmclogit.fitPQLMQL(y, s, w, X, Z, d, start = NULL, start.Phi = NULL, start.b = NULL, offset = NULL, method=c("PQL","MQL"), estimator = c("ML","REML"), control = mmclogit.control()) } \arguments{ \item{y}{a response vector. Should be binary.} \item{s}{a vector identifying individuals or covariate strata} \item{w}{a vector with observation weights.} \item{X}{a model matrix; required.} \item{dispersion}{a logical value or a character string; whether and how a dispersion parameter should be estimated. For details see \code{\link{dispersion}}.} \item{Z}{the random effects design matrix.} \item{d}{dimension of random effects. Typically $d=1$ for random intercepts only, $d>1$ for models with random intercepts.} \item{start}{an optional numerical vector of starting values for the coefficients. } \item{offset}{an optional model offset. Currently only supported for models without random effects.} \item{start.Phi}{an optional matrix of strarting values for the (co-)variance parameters.} \item{start.b}{an optional list of vectors with starting values for the random effects.} \item{method}{a character string, either "PQL" or "MQL", specifies the type of the quasilikelihood approximation.} \item{estimator}{a character string; either "ML" or "REML", specifies which estimator is to be used/approximated.} \item{control}{a list of parameters for the fitting process. See \code{\link{mclogit.control}} } } \value{ A list with components describing the fitted model. } mclogit/man/mclogit_control.Rd0000644000176200001440000000305413661007607016160 0ustar liggesusers\name{mclogit.control} \alias{mclogit.control} \alias{mmclogit.control} \title{Control Parameters for the Fitting Process} \description{ \code{mclogit.control} returns a list of default parameters that control the fitting process of \code{mclogit}. } \usage{ mclogit.control(epsilon = 1e-08, maxit = 25, trace=TRUE) mmclogit.control(epsilon = 1e-08, maxit = 25, trace=TRUE, trace.inner=FALSE, avoid.increase = FALSE, break.on.increase = FALSE, break.on.infinite = FALSE, break.on.negative = FALSE) } \arguments{ \item{epsilon}{positive convergence tolerance \eqn{\epsilon}; the iterations converge when \eqn{|dev - dev_{old}|/(|dev| + 0.1) < \epsilon}{|dev - devold|/(|dev| + 0.1) < \epsilon}.} \item{maxit}{integer giving the maximal number of IWLS or PQL iterations.} \item{trace}{logical indicating if output should be produced for each iteration.} \item{trace.inner}{logical; indicating if output should be produced for each inner iteration of the PQL method.} \item{avoid.increase}{logical; should an increase of the deviance be avoided by step truncation?} \item{break.on.increase}{logical; should an increase of the deviance be avoided by stopping the algorithm?} \item{break.on.infinite}{logical; should an infinite deviance stop the algorithm instead of leading to step truncation?} \item{break.on.negative}{logical; should a negative deviance stop the algorithm?} } \value{ A list. } mclogit/man/electors.Rd0000644000176200001440000000263214224766563014614 0ustar liggesusers\name{electors} \alias{electors} \title{Class, Party Position, and Electoral Choice} \description{This is an artificial data set on electoral choice as influenced by class and party positions. } \usage{data(electors)} \format{A data frame containing the following variables: \describe{ \item{class}{class position of voters} \item{party}{party that runs for election} \item{Freq}{freqency by which each party list is chosen by members of each class} \item{time}{time variable, runs from zero to one} \item{econ.left}{economic-policy "leftness" of each party} \item{welfare}{emphasis of welfare expansion of each party} \item{auth}{position on authoritarian issues} } } \examples{ data(electors) summary(mclogit( cbind(Freq,interaction(time,class))~econ.left+welfare+auth, data=electors)) summary(mclogit( cbind(Freq,interaction(time,class))~econ.left/class+welfare/class+auth/class, data=electors)) \dontrun{# This takes a bit longer. summary(mclogit( cbind(Freq,interaction(time,class))~econ.left/class+welfare/class+auth/class, random=~1|party.time, data=within(electors,party.time<-interaction(party,time)))) summary(mclogit( cbind(Freq,interaction(time,class))~econ.left/(class*time)+welfare/class+auth/class, random=~1|party.time, data=within(electors,{ party.time <-interaction(party,time) econ.left.sq <- (econ.left-mean(econ.left))^2 }))) } } \keyword{datasets} mclogit/man/Transport.Rd0000644000176200001440000000126311276647570014767 0ustar liggesusers\name{Transport} \alias{Transport} \title{Choice of Means of Transport} \description{This is an artificial data set on choice of means of transport based on cost and walking distance. } \usage{data(Transport)} \format{A data frame containing the following variables: \describe{ \item{transport}{means of transportation that can be chosen.} \item{suburb}{identifying number for each suburb} \item{distance}{walking distance to bus or train station} \item{cost}{cost of each means of transportation} \item{working}{size of working population of each suburb} \item{prop.true}{true choice probabilities} \item{resp}{choice frequencies of means of transportation} } } \keyword{datasets} mclogit/man/getSummary-mclogit.Rd0000644000176200001440000001064313662272003016550 0ustar liggesusers\name{getSummary-methods} \alias{getSummary.mclogit} \alias{getSummary.mblogit} \alias{getSummary.mmclogit} \alias{getSummary.mmblogit} \title{`getSummary` Methods} \description{ \code{\link[memisc]{getSummary}} methods for use by \code{\link[memisc]{mtable}} } \usage{ \method{getSummary}{mblogit}(obj, alpha=.05, \dots) \method{getSummary}{mclogit}(obj, alpha=.05, rearrange=NULL, \dots) \method{getSummary}{mmblogit}(obj, alpha=.05, \dots) \method{getSummary}{mmclogit}(obj, alpha=.05, rearrange=NULL, \dots) } \arguments{ \item{obj}{an object returned by \code{\link{mblogit}} or \code{\link{mclogit}}} \item{alpha}{level of the confidence intervals; their coverage should be 1-alpha/2 } \item{rearrange}{an optional named list of character vectors. Each element of the list designates a column in the table of estimates, and each element of a character vector refers to a coefficient. Names of list elements become column heads and names of the character vector elements become coefficient labels. } \item{\dots}{further arguments; ignored.} } \examples{ \dontrun{ summary(classd.model <- mclogit(cbind(Freq,choice.set)~ (econdim1.sq+nonmatdim1.sq+nonmatdim2.sq)+ (econdim1+nonmatdim1+nonmatdim2)+ (econdim1+nonmatdim1+nonmatdim2):classd, data=mvoteint.classd,random=~1|mvoteint/eb, subset=classd!="Farmers")) myGetSummary.classd <- function(x)getSummary.mclogit(x,rearrange=list( "Econ. Left/Right"=c( "Squared effect"="econdim1.sq", "Linear effect"="econdim1", " x Intermediate/Manual worker"="econdim1:classdIntermediate", " x Service class/Manual worker"="econdim1:classdService class", " x Self-employed/Manual worker"="econdim1:classdSelf-employed" ), "Lib./Auth."=c( "Squared effect"="nonmatdim1.sq", "Linear effect"="nonmatdim1", " x Intermediate/Manual worker"="nonmatdim1:classdIntermediate", " x Service class/Manual worker"="nonmatdim1:classdService class", " x Self-employed/Manual worker"="nonmatdim1:classdSelf-employed" ), "Mod./Trad."=c( "Squared effect"="nonmatdim2.sq", "Linear effect"="nonmatdim2", " x Intermediate/Manual worker"="nonmatdim2:classdIntermediate", " x Service class/Manual worker"="nonmatdim2:classdService class", " x Self-employed/Manual worker"="nonmatdim2:classdSelf-employed" ) )) library(memisc) mtable(classd.model,getSummary=myGetSummary.classd) # Output would look like so: # ================================================================================== # Econ. Left/Right Lib./Auth. Mod./Trad. # ---------------------------------------------------------------------------------- # Squared effect 0.030 0.008 -0.129** # (0.081) (0.041) (0.047) # Linear effect -0.583*** -0.038 0.137** # (0.063) (0.041) (0.045) # x Intermediate/Manual worker 0.632*** -0.029 -0.015 # (0.026) (0.020) (0.019) # x Service class/Manual worker 1.158*** 0.084** 0.000 # (0.040) (0.032) (0.030) # x Self-employed/Manual worker 1.140*** 0.363*** 0.112*** # (0.035) (0.027) (0.026) # Var(mvoteint) 1.080*** # (0.000) # Var(mvoteint x eb) 0.118*** # (0.000) # ---------------------------------------------------------------------------------- # Dispersion 1.561 # Deviance 15007.0 # N 173445 # ================================================================================== } } mclogit/man/mblogit.Rd0000644000176200001440000001457714326445641014436 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mblogit.R \name{mblogit} \alias{mblogit} \alias{print.mblogit} \alias{summary.mblogit} \alias{print.summary.mblogit} \alias{fitted.mblogit} \alias{weights.mblogit} \alias{print.mmblogit} \alias{summary.mmblogit} \alias{print.summary.mmblogit} \title{Baseline-Category Logit Models for Categorical and Multinomial Responses} \usage{ mblogit( formula, data = parent.frame(), random = NULL, catCov = c("free", "diagonal", "single"), subset, weights = NULL, na.action = getOption("na.action"), model = TRUE, x = FALSE, y = TRUE, contrasts = NULL, method = NULL, estimator = c("ML", "REML"), dispersion = FALSE, start = NULL, from.table = FALSE, groups = NULL, control = if (length(random)) mmclogit.control(...) else mclogit.control(...), ... ) } \arguments{ \item{formula}{the model formula. The response must be a factor or a matrix of counts.} \item{data}{an optional data frame, list or environment (or object coercible by \code{\link{as.data.frame}} to a data frame) containing the variables in the model. If not found in \code{data}, the variables are taken from \code{environment(formula)}, typically the environment from which \code{glm} is called.} \item{random}{an optional formula or list of formulas that specify the random-effects structure or NULL.} \item{catCov}{a character string that specifies optional restrictions on the covariances of random effects between the logit equations. "free" means no restrictions, "diagonal" means that random effects pertinent to different categories are uncorrelated, while "single" means that the random effect variances pertinent to all categories are identical.} \item{subset}{an optional vector specifying a subset of observations to be used in the fitting process.} \item{weights}{an optional vector of weights to be used in the fitting process. Should be \code{NULL} or a numeric vector.} \item{na.action}{a function which indicates what should happen when the data contain \code{NA}s. The default is set by the \code{na.action} setting of \code{\link{options}}, and is \code{\link{na.fail}} if that is unset. The \sQuote{factory-fresh} default is \code{\link{na.omit}}. Another possible value is \code{NULL}, no action. Value \code{\link{na.exclude}} can be useful.} \item{model}{a logical value indicating whether \emph{model frame} should be included as a component of the returned value.} \item{x, y}{logical values indicating whether the response vector and model matrix used in the fitting process should be returned as components of the returned value.} \item{contrasts}{an optional list. See the \code{contrasts.arg} of \code{model.matrix.default}.} \item{method}{\code{NULL} or a character string, either "PQL" or "MQL", specifies the type of the quasilikelihood approximation to be used if a random-effects model is to be estimated.} \item{estimator}{a character string; either "ML" or "REML", specifies which estimator is to be used/approximated.} \item{dispersion}{a logical value or a character string; whether and how a dispersion parameter should be estimated. For details see \code{\link{dispersion}}.} \item{start}{an optional matrix of starting values (with as many rows as logit equations). If the model has random effects, the matrix should have a "VarCov" attribute wtih starting values for the random effects (co-)variances. If the random effects model is estimated with the "PQL" method, the starting values matrix should also have a "random.effects" attribute, which should have the same structure as the "random.effects" component of an object returned by \code{mblogit()}.} \item{from.table}{a logical value; do the data represent a contingency table, e.g. were created by applying \code{as.data.frame()} a the result of \code{table()} or \code{xtabs()}. This relevant only if the response is a factor. This argument should be set to \code{TRUE} if the data do come from a contingency table. Correctly setting \code{from.table=TRUE} in this case, will lead to efficiency gains in computing, but more importantly overdispersion will correctly be computed if present.} \item{groups}{an optional formula that specifies groups of observations relevant for the specification of overdispersed response counts.} \item{control}{a list of parameters for the fitting process. See \code{\link{mclogit.control}}} \item{\dots}{arguments to be passed to \code{mclogit.control} or \code{mmclogit.control}} } \value{ \code{mblogit} returns an object of class "mblogit", which has almost the same structure as an object of class "\link[stats]{glm}". The difference are the components \code{coefficients}, \code{residuals}, \code{fitted.values}, \code{linear.predictors}, and \code{y}, which are matrices with number of columns equal to the number of response categories minus one. } \description{ The function \code{mblogit} fits baseline-category logit models for categorical and multinomial count responses with fixed alternatives. } \details{ The function \code{mblogit} internally rearranges the data into a 'long' format and uses \code{\link{mclogit.fit}} to compute estimates. Nevertheless, the 'user data' are unaffected. } \examples{ library(MASS) # For 'housing' data library(nnet) library(memisc) (house.mult<- multinom(Sat ~ Infl + Type + Cont, weights = Freq, data = housing)) (house.mblogit <- mblogit(Sat ~ Infl + Type + Cont, weights = Freq, data = housing)) summary(house.mult) summary(house.mblogit) mtable(house.mblogit) } \references{ Agresti, Alan. 2002. \emph{Categorical Data Analysis.} 2nd ed, Hoboken, NJ: Wiley. \doi{10.1002/0471249688} Breslow, N.E. and D.G. Clayton. 1993. "Approximate Inference in Generalized Linear Mixed Models". \emph{Journal of the American Statistical Association} 88 (421): 9-25. \doi{10.1080/01621459.1993.10594284} } \seealso{ The function \code{\link[nnet]{multinom}} in package \pkg{nnet} also fits multinomial baseline-category logit models, but has a slightly less convenient output and does not support overdispersion or random effects. However, it provides some other options. Baseline-category logit models are also supported by the package \pkg{VGAM}, as well as some reduced-rank and (semi-parametric) additive generalisations. The package \pkg{mnlogit} estimates logit models in a way optimized for large numbers of alternatives. } mclogit/man/dispersion.Rd0000644000176200001440000000461014025215067015134 0ustar liggesusers\name{dispersion} \alias{dispersion} \alias{dispersion.mclogit} \title{Overdispersion in Multinomial Logit Models} \description{ The function \code{dispersion()} extracts the dispersion parameter from a multinomial logit model or computes a dispersion parameter estimate based on a given method. This dispersion parameter can be attached to a model using \code{update()}. It can also given as an argument to \code{summary()}. } \usage{ dispersion(object,method, \dots) \method{dispersion}{mclogit}(object,method=NULL, \dots) } \arguments{ \item{object}{an object that inherits class \code{"mclogit"}. When passed to \code{dispersion()}, it should be the result of a call of \code{mclogit()} of \code{mblogit()}, \emph{without} random effects. } \item{method}{a character string, either \code{"Afroz"}, \code{"Fletcher"}, \code{"Pearson"}, or \code{"Deviance"}, that specifies the estimator of the dispersion; or \code{NULL}, in which case the default estimator, \code{"Afroz"} is used. The estimators are discussed in Afroz et al. (2019). } \item{\dots}{other arguments, ignored or passed to other methods.} } \references{ Afroz, Farzana, Matt Parry, and David Fletcher. (2020). "Estimating Overdispersion in Sparse Multinomial Data." \emph{Biometrics} 76(3): 834-842. \doi{10.1111/biom.13194}. } \examples{ library(MASS) # For 'housing' data # Note that with a factor response and frequency weighted data, # Overdispersion will be overestimated: house.mblogit <- mblogit(Sat ~ Infl + Type + Cont, weights = Freq, data = housing) dispersion(house.mblogit,method="Afroz") dispersion(house.mblogit,method="Deviance") summary(house.mblogit) phi.Afroz <- dispersion(house.mblogit,method="Afroz") summary(house.mblogit, dispersion=phi.Afroz) summary(update(house.mblogit, dispersion="Afroz")) # In order to be able to estimate overdispersion accurately, # data like the above (which usually comes from applying # 'as.data.frame' to a contingency table) the model has to be # fitted with the optional argument 'from.table=TRUE': house.mblogit.corrected <- mblogit(Sat ~ Infl + Type + Cont, weights = Freq, data = housing, from.table=TRUE, dispersion="Afroz") # Now the estimated dispersion parameter is no longer larger than 20, # but just bit over 1.0. summary(house.mblogit.corrected) } mclogit/man/simulate.Rd0000644000176200001440000000644513712747533014622 0ustar liggesusers\name{simulate.mclogit} \alias{simulate.mclogit} \alias{simulate.mblogit} \alias{simulate.mmclogit} \alias{simulate.mmblogit} \title{ Simulating responses from baseline-category and conditional logit models } \description{ The \code{simulate()} methods allow to simulate responses from models fitted with \code{mclogit()} and \code{mblogit()}. Currently only models \emph{without} random effects are supported for this. } \usage{ \method{simulate}{mblogit}(object, nsim = 1, seed = NULL, \dots) \method{simulate}{mclogit}(object, nsim = 1, seed = NULL, \dots) # These methods are currently just 'stubs', causing an error # message stating that simulation from models with random # effects are not supported yet \method{simulate}{mmblogit}(object, nsim = 1, seed = NULL, \dots) \method{simulate}{mmclogit}(object, nsim = 1, seed = NULL, \dots) } \arguments{ \item{object}{an object from the relevant class} \item{nsim}{a number, specifying the number of simulated responses for each observation.} \item{seed}{an object specifying if and how the random number generator should be initialized ('seeded'). The interpetation of this argument follows the default method, see \code{link[stats]{simulate}} } \item{\dots}{other arguments, ignored.} } \value{ The result of the \code{\link[stats]{simulate}} method for objects created by \code{\link{mclogit}} is a data frame with one variable for each requested simulation run (their number is given by the \code{nsim=} argument). The contents of the columns are counts (or zero-one values), with group-wise multinomial distribution (within choice sets) just like it is assumed for the original response. The shape of the result of the \code{\link[stats]{simulate}} method for objects created by \code{\link{mblogit}} is also a data frame. The variables within the data frame have a mode or shape that corresponds to the response to which the model was fitted. If the response is a matrix of counts, then the variables in the data frame are also matrices of counts. If the response is a factor and \code{\link{mblogit}} was called with an argument \code{from.table=FALSE}, the variables in the data frame are factors with the same factor levels as the response to which the model was fitted. If instead the function was called with \code{from.table=TRUE}, the variables in the data frame are counts, which represent frequency weights that would result from applying \code{\link[base]{as.data.frame}} to a contingency table of simulated frequency counts. } \examples{ library(MASS) (house.mblogit <- mblogit(Sat ~ Infl + Type + Cont, data = housing, weights=Freq, from.table=TRUE)) sm <- simulate(house.mblogit,nsim=7) housing.long <- housing[rep(seq.int(nrow(housing)),housing$Freq),] (housel.mblogit <- mblogit(Sat ~ Infl + Type + Cont, data=housing.long)) sml <- simulate(housel.mblogit,nsim=7) housing.table <- xtabs(Freq~.,data=housing) housing.mat <- memisc::to.data.frame(housing.table) head(housing.mat) (housem.mblogit <- mblogit(cbind(Low,Medium,High) ~ Infl + Type + Cont, data=housing.mat)) smm <- simulate(housem.mblogit,nsim=7) str(sm) str(sml) str(smm) head(smm[[1]]) } mclogit/DESCRIPTION0000644000176200001440000000176614326453475013445 0ustar liggesusersPackage: mclogit Type: Package Title: Multinomial Logit Models, with or without Random Effects or Overdispersion Version: 0.9.6 Date: 2022-10-27 Author: Martin Elff Maintainer: Martin Elff Description: Provides estimators for multinomial logit models in their conditional logit and baseline logit variants, with or without random effects, with or without overdispersion. Random effects models are estimated using the PQL technique (based on a Laplace approximation) or the MQL technique (based on a Solomon-Cox approximation). Estimates should be treated with caution if the group sizes are small. License: GPL-2 Depends: stats, Matrix Imports: memisc, methods Suggests: MASS, nnet Enhances: emmeans LazyLoad: Yes URL: http://mclogit.elff.eu,https://github.com/melff/mclogit/ BugReports: https://github.com/melff/mclogit/issues RoxygenNote: 7.1.2 NeedsCompilation: no Packaged: 2022-10-27 09:17:22 UTC; elff Repository: CRAN Date/Publication: 2022-10-27 10:02:37 UTC mclogit/build/0000755000176200001440000000000014326446242013016 5ustar liggesusersmclogit/build/partial.rdb0000644000176200001440000000007514326446242015145 0ustar liggesusersb```b`abd`b1 H020piּb C"wa7mclogit/R/0000755000176200001440000000000014326445005012114 5ustar liggesusersmclogit/R/anova-mclogit.R0000644000176200001440000000522114224653516015004 0ustar liggesusersanova.mclogit <- function (object, ..., dispersion = NULL, test = NULL) { dotargs <- list(...) named <- if (is.null(names(dotargs))) rep_len(FALSE, length(dotargs)) else (names(dotargs) != "") if (any(named)) warning("the following arguments to 'anova.mclogit' are invalid and dropped: ", paste(deparse(dotargs[named]), collapse = ", ")) dotargs <- dotargs[!named] is.mclogit <- vapply(dotargs, function(x) inherits(x, "mclogit") , #&!inherits(x,"mclogitRandeff"), NA) dotargs <- dotargs[is.mclogit] if (length(dotargs)) return(anova.mclogitlist(c(list(object), dotargs), dispersion = dispersion, test = test)) stop("'anova.mclogit' can only be used to compare fitted models") } anova.mclogitlist <- function (object, ..., dispersion = NULL, test = NULL) { responses <- as.character(lapply(object, function(x) { deparse(formula(x)[[2L]]) })) sameresp <- responses == responses[1L] if (!all(sameresp)) { object <- object[sameresp] warning(gettextf("models with response %s removed because response differs from model 1", sQuote(deparse(responses[!sameresp]))), domain = NA) } ns <- sapply(object, function(x) x$N) if (any(ns != ns[1L])) stop("models were not all fitted to the same size of dataset") nmodels <- length(object) if (nmodels == 1) stop("'anova.mclogit' can only be used to compare fitted models") hasRE <- sapply(object,inherits,"mmclogit") if(any(hasRE)) warning("Results are unreliable, since deviances from quasi-likelihoods are not comparable.") resdf <- as.numeric(lapply(object, function(x) x$df.residual)) resdev <- as.numeric(lapply(object, function(x) x$deviance)) table <- data.frame(resdf, resdev, c(NA, -diff(resdf)), c(NA, -diff(resdev))) variables <- lapply(object, function(x) paste(deparse(formula(x)), collapse = "\n")) dimnames(table) <- list(1L:nmodels, c("Resid. Df", "Resid. Dev", "Df", "Deviance")) title <- "Analysis of Deviance Table\n" topnote <- paste("Model ", format(1L:nmodels), ": ", variables, sep = "", collapse = "\n") if (!is.null(test)) { bigmodel <- object[[order(resdf)[1L]]] df.dispersion <- Inf table <- stat.anova(table = table, test = test, scale = 1, df.scale = df.dispersion, n = bigmodel$N) } structure(table, heading = c(title, topnote), class = c("anova", "data.frame")) } mclogit/R/zzz.R0000644000176200001440000000233114036642701013073 0ustar liggesusers.onLoad <- function(lib,pkg){ if(requireNamespace("memisc",quietly = TRUE)){ memisc::setSummaryTemplate( mclogit = c( "Likelihood-ratio" = "($LR:f1#)", #p = "($p:#)", "Log-likelihood" = "($logLik:f1#)", Deviance = "($deviance:f1#)", AIC = "($AIC:f1#)", BIC = "($BIC:f1#)", N = "($N:d)" ), mmclogit = c( #"Likelihood-ratio" = "($LR:f1#)", #p = "($p:#)", #"Log-likelihood" = "($logLik:f1#)", Deviance = "($deviance:f1#)", #AIC = "($AIC:f1#)", #BIC = "($BIC:f1#)", N = "($N:d)" ), mblogit = c( "Log-likelihood" = "($logLik:f1#)", Deviance = "($deviance:f1#)", AIC = "($AIC:f1#)", BIC = "($BIC:f1#)", N = "($N:d)" ) ) } if (requireNamespace("emmeans", quietly = TRUE)) emmeans::.emm_register(c("mblogit"), pkg) options(mblogit.basecat.sep="/") options(mblogit.show.basecat=TRUE) options(summary.stats.mclogit=c("Deviance","N")) options(summary.stats.mmclogit=c("Deviance","N")) } mclogit/R/mclogit.R0000644000176200001440000010326714322773105013707 0ustar liggesusersquickInteraction <- function(by){ if(is.list(by)){ n.arg <- length(by) f <- 0L uf <- 0L for(i in rev(1:n.arg)){ y <- by[[i]] y <- as.numeric(y) uy <- unique(na.omit(y)) y <- match(y,uy,NA) l <- length(uy) f <- f*l + y - 1 uf <- unique(na.omit(f)) f <- match(f,uf,NA) uf <- seq(length(uf)) } } else { by <- as.numeric(by) uf <- unique(na.omit(by)) f <- match(by,uf,NA) uf <- seq(length(uf)) } return(structure(f,unique=uf)) } matConstInSets <- function(X,sets){ ans <- logical(ncol(X)) for(i in 1:ncol(X)){ v <- tapply(X[,i],sets,varies) ans[i] <- !any(v) } ans } listConstInSets <- function(X,sets){ ans <- logical(length(X)) for(i in 1:length(X)){ v <- tapply(X[[i]],sets,varies) ans[i] <- !any(v) } ans } groupConstInSets <- function(X,sets){ ans <- logical(length(X)) for(i in 1:length(X)){ v <- tapply(X[[i]],sets,varies) ans[i] <- !any(v) } ans } varies <- function(x) !all(duplicated(x)[-1L]) mclogit <- function( formula, data=parent.frame(), random=NULL, subset, weights=NULL, offset=NULL, na.action = getOption("na.action"), model = TRUE, x = FALSE, y = TRUE, contrasts=NULL, method = NULL, estimator=c("ML","REML"), dispersion = FALSE, start=NULL, control=if(length(random)) mmclogit.control(...) else mclogit.control(...), ... ){ # Assumptions: # left hand side of formula: cbind(counts, choice set index) # right hand side of the formula: attributes # intercepts are removed! call <- match.call(expand.dots = TRUE) if(missing(data)) data <- environment(formula) mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "offset", "na.action"), names(mf), 0) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1]] <- as.name("model.frame") if(as.character(formula[[2]][[1]])=="|") mf$formula[[2]][[1]] <- as.name("cbind") if(length(random)){ mf0 <- eval(mf, parent.frame()) mt <- attr(mf0,"terms") if(is_formula(random)){ rf <- paste(c(".~.",all.vars(random)),collapse="+") } else if(is.list(random)) { rf <- paste(c(".~.",unlist(lapply(random,all.vars))),collapse="+") } else stop("'random' argument must be either a formula or a list of formulae") rf <- as.formula(rf) if (typeof(mf$formula) == "symbol") { mff <- formula } else { mff <- structure(mf$formula,class="formula") } mff <- eval(mff, parent.frame()) mf$formula <- update(mff,rf) mf <- eval(mf, parent.frame()) check.names(control, "epsilon","maxit", "trace","trace.inner", "avoid.increase", "break.on.increase", "break.on.infinite", "break.on.negative") } else { mf <- eval(mf, parent.frame()) mt <- attr(mf,"terms") check.names(control, "epsilon","maxit", "trace") } na.action <- attr(mf,"na.action") weights <- as.vector(model.weights(mf)) offset <- as.vector(model.offset(mf)) if(!is.null(weights) && !is.numeric(weights)) stop("'weights' must be a numeric vector") Y <- as.matrix(model.response(mf, "any")) if(!is.numeric(Y)) stop("The response matrix has to be numeric.") if(ncol(Y)<2) stop("need response counts and choice set indicators") sets <- Y[,2] sets <- match(sets,unique(sets)) Y <- Y[,1] if (is.null(weights)){ prior.weights <- rep(1,length(Y)) N <- rowsum(Y,sets,na.rm=TRUE) weights <- N[sets] } else{ prior.weights <- weights N <- rowsum(weights*Y,sets,na.rm=TRUE) weights <- N[sets] } N <- sum(N) Y <- Y/weights Y[weights==0] <- 0 X <- model.matrix(mt,mf,contrasts) contrasts <- attr(X, "contrasts") xlevels <- .getXlevels(mt,mf) icpt <- match("(Intercept)",colnames(X),nomatch=0) if(icpt) X <- X[,-icpt,drop=FALSE] const <- matConstInSets(X,sets) if(any(const)){ warning("removing ", gsub("(Intercept)","intercept",paste(colnames(X)[const],collapse=","),fixed=TRUE), " from model due to insufficient within-choice set variance") X <- X[,!const,drop=FALSE] } drop.coefs <- check.mclogit.drop.coefs(Y,sets,weights,X, offset = offset) if(any(drop.coefs)){ warning("removing ",paste(colnames(X)[drop.coefs],collapse=",")," from model") X <- X[,!drop.coefs,drop=FALSE] } if(ncol(X)<1) stop("No predictor variable remains in model") start.VarCov <- NULL start.randeff <- NULL if(length(start)){ start.VarCov <- attr(start,"VarCov") start.randeff <- attr(start,"random.effects") start.names <- names(start) X.names <- colnames(X) if(length(start.names)) start <- start[X.names] if(length(start)!=ncol(X)) stop("Columns of 'start' argument do not match independent variables.") } if(!length(random)){ fit <- mclogit.fit(y=Y,s=sets,w=weights,X=X, dispersion=dispersion, control=control, start = start, offset = offset) groups <- NULL } else { ## random effects if(!length(method)) method <- "PQL" if(inherits(random,"formula")) random <- list(random) random <- lapply(random,setupRandomFormula) rt <- lapply(random,"[[","formula") rt <- lapply(rt,terms) suppressWarnings(Z <- lapply(rt,model.matrix,mf, contrasts.arg=contrasts)) # Use suppressWarnings() to stop complaining about unused contasts nn <- length(Z) randstruct <- lapply(1:nn,function(k){ group.labels <- random[[k]]$groups groups <- mf[group.labels] groups <- lapply(groups,as.factor) nlev <- length(groups) if(nlev > 1){ for(i in 2:nlev){ groups[[i]] <- interaction(groups[c(i-1,i)]) group.labels[i] <- paste(group.labels[i-1],group.labels[i],sep=":") } } Z_k <- Z[[k]] gconst <- groupConstInSets(groups,sets) # Is grouping factor constant within choice sets? if(any(gconst)){ # If grouping factor is constant within choice sets, remove covariates that # are constants within choice sets rconst <- matConstInSets(Z_k,sets) if(any(rconst)){ cat("\n") warning("removing ", gsub("(Intercept)","intercept",paste(colnames(Z_k)[rconst],collapse=","),fixed=TRUE), " from random part of the model\n because of insufficient within-choice set variance") Z_k <- Z_k[,!rconst,drop=FALSE] } if(ncol(Z_k)<1) stop("No predictor variable remains in random part of the model.\nPlease reconsider your model specification.") } d <- ncol(Z_k) colnames(Z_k) <- gsub("(Intercept)","(Const.)",colnames(Z_k),fixed=TRUE) VarCov.names.k <- rep(list(colnames(Z_k)),nlev) Z_k <- lapply(groups,mkZ,rX=Z_k) d <- rep(d,nlev) names(groups) <- group.labels list(Z_k,groups,d,VarCov.names.k) }) Z <- lapply(randstruct,`[[`,1) groups <- lapply(randstruct,`[[`,2) d <- lapply(randstruct,`[[`,3) VarCov.names <- lapply(randstruct,`[[`,4) Z <- unlist(Z,recursive=FALSE) groups <- unlist(groups,recursive=FALSE) VarCov.names <- unlist(VarCov.names,recursive=FALSE) d <- unlist(d) Z <- blockMatrix(Z,ncol=length(Z)) fit <- mmclogit.fitPQLMQL(Y,sets,weights,X,Z, d=d, start=start, start.Phi=start.VarCov, start.b=start.randeff, method = method, estimator=estimator, control=control, offset = offset) nlev <- length(fit$VarCov) for(k in 1:nlev) dimnames(fit$VarCov[[k]]) <- list(VarCov.names[[k]],VarCov.names[[k]]) names(fit$VarCov) <- names(groups) } if(x) fit$x <- X if(x && length(random)) fit$z <- Z if(!y) { fit$y <- NULL fit$s <- NULL } fit <- c(fit,list(call = call, formula = formula, terms = mt, random = random, groups = groups, data = data, contrasts = contrasts, xlevels = xlevels, na.action = na.action, prior.weights=prior.weights, weights=weights, model=mf, N=N)) if(length(random)) class(fit) <- c("mmclogit","mclogit","lm") else class(fit) <- c("mclogit","lm") fit } check.mclogit.drop.coefs <- function(y, s, w, X, offset){ nvar <- ncol(X) nobs <- length(y) if(!length(offset)) offset <- rep.int(0, nobs) eta <- mclogitLinkInv(y,s,w) pi <- mclogitP(eta,s) y.star <- eta - offset + (y-pi)/pi yP.star <- y.star - rowsum(pi*y.star,s)[s] XP <- X - rowsum(pi*X,s)[s,,drop=FALSE] ww <- w*pi good <- ww > 0 wlsFit <- lm.wfit(x=XP[good,,drop=FALSE],y=yP.star[good],w=ww[good]) is.na(wlsFit$coef) } setupRandomFormula <- function(formula){ trms <- terms(formula) fo <- delete.response(trms) attributes(fo) <- NULL if(length(fo[[2]]) < 2 || as.character(fo[[2]][1])!="|") stop("missing '|' operator") groups <- fo fo[2] <- fo[[2]][2] groups[2] <- groups[[2]][3] checkRandomFormula(groups[[2]]) list( formula=structure(fo,class="formula"), groups=all.vars(groups) ) } checkRandomFormula <- function(x){ l <- as.list(x) if(length(l) < 3) return(NULL) if(!as.character(l[[1]])=="/") stop("Invalid random formula",call.=FALSE) x <- x[[2]] if(length(x)>1) Recall(x) } print.mclogit <- function(x,digits= max(3, getOption("digits") - 3), ...){ cat("\nCall: ",paste(deparse(x$call), sep="\n", collapse="\n"), "\n\n", sep="") if(length(coef(x))) { cat("Coefficients") if(is.character(co <- x$contrasts)) cat(" [contrasts: ", apply(cbind(names(co),co), 1, paste, collapse="="), "]") cat(":\n") print.default(format(x$coefficients, digits=digits), print.gap = 2, quote = FALSE) } else cat("No coefficients\n\n") if(x$phi != 1) cat("\nDispersion: ",x$phi) cat("\nNull Deviance: ", format(signif(x$null.deviance, digits)), "\nResidual Deviance:", format(signif(x$deviance, digits))) if(!x$converged) cat("\nNote: Algorithm did not converge.\n") if(nchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n", sep="") else cat("\n") invisible(x) } vcov.mclogit <- function(object,...){ phi <- object$phi if(!length(phi)) phi <- 1 cov.unscaled <- safeInverse(object$information.matrix) return(cov.unscaled * phi) } weights.mclogit <- function(object,...){ return(object$weights) } deviance.mclogit <- function(object,...){ return(object$deviance) } summary.mclogit <- function(object,dispersion=NULL,correlation = FALSE, symbolic.cor = FALSE,...){ ## calculate coef table coef <- object$coefficients if(is.null(dispersion)) dispersion <- object$phi covmat.scaled <- vcov(object) var.cf <- diag(covmat.scaled) s.err <- sqrt(var.cf) zvalue <- coef/s.err if(dispersion == 1) pvalue <- 2*pnorm(-abs(zvalue)) else pvalue <- 2*pt(-abs(zvalue),df=object$df.residual) coef.table <- array(NA,dim=c(length(coef),4)) rownames(coef.table) <- names(coef) if(dispersion == 1) colnames(coef.table) <- c("Estimate", "Std. Error","z value","Pr(>|z|)") else colnames(coef.table) <- c("Estimate", "Std. Error","t value","Pr(>|t|)") coef.table[,1] <- coef coef.table[,2] <- s.err coef.table[,3] <- zvalue coef.table[,4] <- pvalue ans <- c(object[c("call","terms","deviance","contrasts", "null.deviance","iter","na.action","model.df", "df.residual","N","converged")], list(coefficients = coef.table, cov.coef=covmat.scaled, dispersion = dispersion )) p <- length(coef) if(correlation && p > 0) { dd <- sqrt(diag(ans$cov.coef)) ans$correlation <- ans$cov.coef/outer(dd,dd) ans$symbolic.cor <- symbolic.cor } class(ans) <- "summary.mclogit" return(ans) } print.summary.mclogit <- function (x, digits = max(3, getOption("digits") - 3), symbolic.cor = x$symbolic.cor, signif.stars = getOption("show.signif.stars"), ...){ cat("\nCall:\n") cat(paste(deparse(x$call), sep="\n", collapse="\n"), "\n\n", sep="") coefs <- x$coefficients printCoefmat(coefs, digits=digits, signif.stars=signif.stars, na.print="NA", ...) if(x$dispersion != 1) cat("\nDispersion: ",x$dispersion," on ",x$df.residual," degrees of freedom") cat("\nNull Deviance: ", format(signif(x$null.deviance, digits)), "\nResidual Deviance:", format(signif(x$deviance, digits)), "\nNumber of Fisher Scoring iterations: ", x$iter, "\nNumber of observations: ",x$N, "\n") correl <- x$correlation if(!is.null(correl)) { p <- NCOL(correl) if(p > 1) { cat("\nCorrelation of Coefficients:\n") if(is.logical(symbolic.cor) && symbolic.cor) { print(symnum(correl, abbr.colnames = NULL)) } else { correl <- format(round(correl, 2), nsmall = 2, digits = digits) correl[!lower.tri(correl)] <- "" print(correl[-1, -p, drop=FALSE], quote = FALSE) } } } if(!x$converged) cat("\n\nNote: Algorithm did not converge.\n") if(nchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n\n", sep="") else cat("\n\n") invisible(x) } fitted.mclogit <- function(object,type=c("probabilities","counts"),...){ weights <- object$weights res <- object$fitted.values type <- match.arg(type) na.act <- object$na.action res <- switch(type, probabilities=res, counts=weights*res) if(is.null(na.act)) res else napredict(na.act,res) } predict.mclogit <- function(object, newdata=NULL,type=c("link","response"),se.fit=FALSE,...){ type <- match.arg(type) fo <- object$formula if(as.character(fo[[2]][[1]])=="|") fo[[2]][[1]] <- as.name("cbind") lhs <- fo[[2]] rhs <- fo[-2] if(length(lhs)==3) sets <- lhs[[3]] else stop("no way to determine choice set ids") if(missing(newdata)){ m <- model.frame(fo,data=object$data) set <- m[[1]][,2] na.act <- object$na.action } else{ lhs <- lhs[[3]] fo[[2]] <- lhs m <- model.frame(fo,data=newdata) set <- m[[1]] na.act <- attr(m,"na.action") } X <- model.matrix(rhs,m, contasts.arg=object$contrasts, xlev=object$xlevels ) cf <- coef(object) X <- X[,names(cf), drop=FALSE] eta <- c(X %*% cf) if(se.fit){ V <- vcov(object) stopifnot(ncol(X)==ncol(V)) } if(type=="response") { set <- match(set,unique(set)) exp.eta <- exp(eta) sum.exp.eta <- rowsum(exp.eta,set) p <- exp.eta/sum.exp.eta[set] if(se.fit){ wX <- p*(X - rowsum(p*X,set)[set,,drop=FALSE]) se.p <- sqrt(rowSums(wX * (wX %*% V))) if(is.null(na.act)) list(fit=p,se.fit=se.p) else list(fit=napredict(na.act,p), se.fit=napredict(na.act,se.p)) } else { if(is.null(na.act)) p else napredict(na.act,p) } } else if(se.fit) { se.eta <- sqrt(rowSums(X * (X %*% V))) if(is.null(na.act)) list(fit=eta,se.fit=se.eta) else list(fit=napredict(na.act,eta), se.fit=napredict(na.act,se.eta)) } else { if(is.null(na.act)) eta else napredict(na.act,eta) } } logLik.mclogit <- function(object,...){ if (length(list(...))) warning("extra arguments discarded") val <- if(length(object$ll)) object$ll else NA attr(val, "nobs") <- object$N attr(val, "df") <- object$model.df class(val) <- "logLik" return(val) } residuals.mclogit <- function(object, type = c("deviance", "pearson", "working", "response", "partial"), ...){ type <- match.arg(type) resid <- switch(type, deviance=mclogit.dev.resids(object), pearson=stop("not yet implemented"), working=object$working.residuals, response=object$response.residuals, partial=stop("not yet implemented") ) naresid(object$na.action,resid) } mclogit.dev.resids <- function(obj){ y <- obj$y s <- obj$s w <- obj$weights pi <- obj$fitted.values n <- w*y+0.5 f <- n/(rowsum(n,s)[s]) #sign(y-p)*sqrt(2*abs(log(f)-log(y))) r <- 2*(f*log(f/pi)) r - ave(r,s) } nobs.mclogit <- function(object,...) object$N extractAIC.mclogit <- function(fit, scale = 0, k = 2, ...) { N <- fit$N edf <- N - fit$df.residual aic <- AIC(fit) c(edf, aic + (k - 2) * edf) } weights.mclogit <- function(object, type = c("prior", "working"),...) { type <- match.arg(type) res <- if (type == "prior") object$prior.weights else object$weights if (is.null(object$na.action)) res else naresid(object$na.action, res) } print.mmclogit <- function(x,digits= max(3, getOption("digits") - 3), ...){ cat(paste(deparse(x$call), sep="\n", collapse="\n"), "\n\n", sep="") if(length(coef(x))) { cat("Coefficients") if(is.character(co <- x$contrasts)) cat(" [contrasts: ", apply(cbind(names(co),co), 1, paste, collapse="="), "]") cat(":\n") print.default(format(x$coefficients, digits=digits), print.gap = 2, quote = FALSE) } else cat("No coefficients\n\n") cat("\n(Co-)Variances:\n") VarCov <- x$VarCov names(VarCov) <- names(x$groups) for(k in 1:length(VarCov)){ if(k > 1) cat("\n") cat("Grouping level:",names(VarCov)[k],"\n") VarCov.k <- VarCov[[k]] VarCov.k[] <- format(VarCov.k, digits=digits) VarCov.k[upper.tri(VarCov.k)] <- "" print.default(VarCov.k, print.gap = 2, quote = FALSE) } cat("\nApproximate residual deviance:", format(signif(x$deviance, digits))) if(!x$converged) cat("\n\nNote: Algorithm did not converge.\n") if(nchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n", sep="") else cat("\n") invisible(x) } vcov.mmclogit <- function(object,...){ info.coef <- object$info.coef vcov.cf <- solve(info.coef) return(vcov.cf) } summary.mmclogit <- function(object,dispersion=NULL,correlation = FALSE, symbolic.cor = FALSE,...){ ## calculate coef table coef <- object$coefficients info.coef <- object$info.coef vcov.cf <- safeInverse(info.coef) var.cf <- diag(vcov.cf) s.err <- sqrt(var.cf) zvalue <- coef/s.err pvalue <- 2*pnorm(-abs(zvalue)) coef.table <- array(NA,dim=c(length(coef),4)) dimnames(coef.table) <- list(names(coef), c("Estimate", "Std. Error","z value","Pr(>|z|)")) coef.table[,1] <- coef coef.table[,2] <- s.err coef.table[,3] <- zvalue coef.table[,4] <- pvalue VarCov <- object$VarCov info.lambda <- object$info.lambda se_VarCov <- se_Phi(VarCov,info.lambda) names(VarCov) <- names(object$groups) names(se_VarCov) <- names(VarCov) ans <- c(object[c("call","terms","deviance","contrasts", "null.deviance","iter","na.action","model.df", "df.residual","groups","N","converged")], list(coefficients = coef.table, vcov.coef = vcov.cf, VarCov = VarCov, se_VarCov = se_VarCov)) p <- length(coef) if(correlation && p > 0) { dd <- sqrt(diag(ans$cov.coef)) ans$correlation <- ans$cov.coef/outer(dd,dd) ans$symbolic.cor <- symbolic.cor } ans$ngrps <- sapply(object$groups,nlevels) class(ans) <- "summary.mmclogit" return(ans) } print.summary.mmclogit <- function (x, digits = max(3, getOption("digits") - 3), symbolic.cor = x$symbolic.cor, signif.stars = getOption("show.signif.stars"), ...){ cat("\nCall:\n") cat(paste(deparse(x$call), sep="\n", collapse="\n"), "\n\n", sep="") coefs <- x$coefficients cat("Coefficents:\n") printCoefmat(coefs, digits=digits, signif.stars=signif.stars, na.print="NA", ...) cat("\n(Co-)Variances:\n") VarCov <- x$VarCov se_VarCov <- x$se_VarCov for(k in 1:length(VarCov)){ if(k > 1) cat("\n") cat("Grouping level:",names(VarCov)[k],"\n") VarCov.k <- VarCov[[k]] VarCov.k[] <- format(VarCov.k, digits=digits) VarCov.k[upper.tri(VarCov.k)] <- "" #print.default(VarCov.k, print.gap = 2, quote = FALSE) VarCov.k <- format_Mat(VarCov.k,title="Estimate") se_VarCov.k <- se_VarCov[[k]] se_VarCov.k[] <- format(se_VarCov.k, digits=digits) se_VarCov.k[upper.tri(se_VarCov.k)] <- "" se_VarCov.k <- format_Mat(se_VarCov.k,title="Std.Err.",rownames=" ") VarCov.k <- paste(VarCov.k,se_VarCov.k) writeLines(VarCov.k) } cat("\nApproximate residual deviance:", format(signif(x$deviance, digits)), "\nNumber of Fisher scoring iterations: ", x$iter) cat("\nNumber of observations") for(i in seq_along(x$groups)){ g <- nlevels(x$groups[[i]]) nm.group <- names(x$groups)[i] cat("\n Groups by", paste0(nm.group,": ",format(g))) } cat("\n Individual observations: ",x$N) correl <- x$correlation if(!is.null(correl)) { p <- NCOL(correl) if(p > 1) { cat("\nCorrelation of Coefficients:\n") if(is.logical(symbolic.cor) && symbolic.cor) { print(symnum(correl, abbr.colnames = NULL)) } else { correl <- format(round(correl, 2), nsmall = 2, digits = digits) correl[!lower.tri(correl)] <- "" print(correl[-1, -p, drop=FALSE], quote = FALSE) } } } if(!x$converged) cat("\nNote: Algorithm did not converge.\n") if(nchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n\n", sep="") else cat("\n\n") invisible(x) } predict.mmclogit <- function(object, newdata=NULL,type=c("link","response"),se.fit=FALSE, conditional=TRUE, ...){ type <- match.arg(type) fo <- object$formula if(as.character(fo[[2]][[1]])=="|") fo[[2]][[1]] <- as.name("cbind") lhs <- fo[[2]] rhs <- fo[-2] random <- object$random if(length(lhs)==3) sets <- lhs[[3]] else stop("no way to determine choice set ids") if(missing(newdata)){ mf <- object$model sets <- mf[[1]][,2] na.act <- object$na.action rmf <- mf } else{ mf <- model.frame(rhs,data=newdata,na.action=na.exclude) rnd <- object$random for(i in seq_along(rnd)){ rf_i <- random2formula(rnd[[i]]) if(i == 1) rfo <- rf_i else rfo <- c_formulae(rfo,rf_i) } rmf <- model.frame(rfo,data=newdata,na.action=na.exclude) sets <- eval(sets,newdata) na.act <- attr(mf,"na.action") } X <- model.matrix(rhs,mf, contrasts.arg=object$contrasts, xlev=object$xlevels ) cf <- coef(object) X <- X[,names(cf), drop=FALSE] eta <- c(X %*% cf) if(object$method=="PQL" && conditional){ rf <- lapply(random,"[[","formula") rt <- lapply(rf,terms) suppressWarnings(Z <- lapply(rt,model.matrix,rmf, contrasts.arg=object$contrasts, xlev=object$xlevels)) d <- sapply(Z,ncol) nn <- length(Z) orig.groups <- object$groups olevels <- lapply(orig.groups,levels) randstruct <- lapply(1:nn,function(k){ group.labels <- random[[k]]$groups groups <- rmf[group.labels] groups <- lapply(groups,as.factor) nlev <- length(groups) if(nlev > 1){ for(i in 2:nlev){ groups[[i]] <- interaction(groups[c(i-1,i)]) group.labels[i] <- paste(group.labels[i-1],group.labels[i],sep=":") } } olevels <- olevels[group.labels] groups <- Map(factor,x=groups,levels=olevels) VarCov.names.k <- rep(list(colnames(Z[[k]])),nlev) Z_k <- lapply(groups,mkZ,rX=Z[[k]]) d <- rep(d[k],nlev) names(groups) <- group.labels list(Z_k,groups,d,VarCov.names.k) }) Z <- lapply(randstruct,`[[`,1) groups <- lapply(randstruct,`[[`,2) Z <- unlist(Z,recursive=FALSE) d <- lapply(randstruct,`[[`,3) groups <- unlist(groups,recursive=FALSE) d <- unlist(d) Z <- blockMatrix(Z) b <- object$random.effects nlev <- length(Z) for(k in 1:nlev) eta <- eta + as.vector(Z[[k]]%*%b[[k]]) } nvar <- ncol(X) nobs <- nrow(X) if(type=="response" || object$method=="PQL" && conditional ){ j <- match(sets,unique(sets)) exp.eta <- exp(eta) sum.exp.eta <- rowsum(exp.eta,j) p <- exp.eta/sum.exp.eta[j] } if(se.fit && (type=="response" || object$method=="PQL" && conditional)){ nsets <- j[length(j)] W <- Matrix(0,nrow=nobs,ncol=nsets) i <- 1:nobs W[cbind(i,j)] <- p W <- Diagonal(x=p)-tcrossprod(W) WX <- W%*%X if(object$method=="PQL" && conditional){ WZ <- bMatProd(W,Z) H <- object$info.fixed.random K <- solve(H) } } if(type=="response") { if(se.fit){ if(object$method=="PQL" && conditional){ WXZ <- structure(cbind(blockMatrix(WX),WZ),class="blockMatrix") var.p <- bMatProd(WXZ,K) var.p <- Map(`*`,WXZ,var.p) var.p <- lapply(var.p,rowSums) var.p <- Reduce(`+`,var.p) se.p <- sqrt(var.p) } else { vcov.coef <- vcov(object) se.p <- sqrt(rowSums(WX*(WX%*%vcov.coef))) } if(is.null(na.act)) list(fit=p,se.fit=se.p) else list(fit=napredict(na.act,p), se.fit=napredict(na.act,se.p)) } else{ if(is.null(na.act)) p else napredict(na.act,p) } } else { if(se.fit){ if(object$method=="PQL" && conditional){ XZ <- structure(cbind(blockMatrix(X),Z),class="blockMatrix") var.eta <- bMatProd(XZ,K) var.eta <- Map(`*`,XZ,var.eta) var.eta <- lapply(var.eta,rowSums) var.eta <- Reduce(`+`,var.eta) } else { vcov.coef <- vcov(object) var.eta <- rowSums(X*(X%*%vcov.coef)) } se.eta <- sqrt(var.eta) if(is.null(na.act)) list(fit=eta,se.fit=se.eta) else list(fit=napredict(na.act,eta), se.fit=napredict(na.act,se.eta)) } else { if(is.null(na.act)) eta else napredict(na.act,eta) } } } tr <- function(x) sum(diag(x)) mkZ <- function(groups,rX){ n <- length(groups) m <- nlevels(groups) p <- ncol(rX) Z <- Matrix(0,nrow=n,ncol=m*p) i <- 1:n k <- 1:p j <- as.integer(groups) i <- rep(i,p) jk <- rep((j-1)*p,p)+rep(k,each=n) i.jk <- cbind(i,jk) Z[i.jk] <- rX Z } mkZ2 <- function(all.groups, groups, rX){ n <- length(groups) ug <- unique(all.groups) m <- length(ug) p <- ncol(rX) Z <- Matrix(0,nrow=n,ncol=m*p) i <- 1:n k <- 1:p j <- groups i <- rep(i,p) jk <- rep((j-1)*p,p)+rep(k,each=n) i.jk <- cbind(i,jk) Z[i.jk] <- rX Z } mkG <- function(rX){ p <- ncol(rX) nms <- colnames(rX) G <- matrix(0,p,p) ltT <- lower.tri(G,diag=TRUE) ltF <- lower.tri(G,diag=FALSE) n <- p*(p+1)/2 m <- p*(p-1)/2 diag(G) <- 1:p G[ltF] <- p + 1:m G <- lwr2sym(G) rownames(G) <- colnames(G) <- nms lapply(1:n,mkG1,G) } mkG1 <- function(i,G) Matrix(array(as.integer(i==G), dim=dim(G), dimnames=dimnames(G) )) fillG <- function(G,theta){ Phi <- Map(`*`,theta,G) if(length(Phi)>1){ for(i in 2:length(Phi)) Phi[[1]] <- Phi[[1]] + Phi[[i]] } Phi[[1]] } lunq <- function(x)length(attr(x,"unique")) G.star1 <- function(I,G)Map(`%x%`,list(I),G) quadform <- function(A,x) as.numeric(crossprod(x,A%*%x)) tr.crossprod <- function(A,B) sum(A*B) lwr2sym <- function(X){ lwrX <- lower.tri(X) x.lwr <- X[lwrX] Y <- t(X) Y[lwrX] <- x.lwr Y } fuseMat <- function(x){ if(ncol(x)>1){ y <- lapply(1:nrow(x), fuseCols,x=x) } else y <- x y <- do.call(rbind,y) # The following looks redundant, but appears to # be necessary to avoid a bug that prevents the # resulting matrix be correctly inverted sparse.x <- sapply(x,inherits,"sparseMatrix") if(any(sparse.x)) y <- as(y,"sparseMatrix") return(y) } cbindList <- function(x) do.call(cbind,x) fuseCols <- function(x,i) do.call(cbind,x[i,]) format_Mat <- function(x,title="",rownames=NULL){ if(length(rownames)) rn <- format(c("",rownames)) else rn <- format(c("",rownames(x))) x <- format(x) x <- apply(x,1,paste,collapse=" ") x <- format(c(title,x)) paste(rn,x) } update.mclogit <- function(object, formula., dispersion, ...) { if(!inherits(object,"mmclogit") && (missing(formula.) || formula. == object$formula) && !missing(dispersion)) update_mclogit_dispersion(object,dispersion) else NextMethod() } getFirst <- function(x) x[1] simulate.mclogit <- function(object, nsim = 1, seed = NULL, ...){ if(object$phi > 1) stop("Simulating responses from models with oversdispersion is not supported yet") if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1) if (is.null(seed)) RNGstate <- get(".Random.seed", envir = .GlobalEnv) else { R.seed <- get(".Random.seed", envir = .GlobalEnv) set.seed(seed) RNGstate <- structure(seed, kind = as.list(RNGkind())) on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv)) } weights <- object$weights probs <- object$fitted.values set <- object$s i <- 1:length(probs) probs <- split(probs,set) weights <- split(weights,set) i <- split(i,set) weights <- sapply(weights,getFirst) yy <- mapply(rmultinom,size=weights,prob=probs, MoreArgs=list(n=nsim),SIMPLIFY=FALSE) yy <- do.call(rbind,yy) i <- unlist(i) yy[i,] <- yy rownames(yy) <- names(object$working.residuals) colnames(yy) <- paste0("sim_",1:nsim) yy <- as.data.frame(yy) attr(yy,"seed") <- RNGstate yy } simulate.mmclogit <- function(object, nsim = 1, seed = NULL, ...) stop("Simulating responses from random-effects models is not supported yet") eigen.solve <- function(x){ ev <- eigen(x) d <- ev$values V <- ev$vectors id <- 1/d V %*% (id*t(V)) } solve2 <- function(x){ ix <- try(solve(x)) if(inherits(ix,"try-error")) return(eigen.solve(x)) else return(ix) } check.names <- function(x,...){ nms <- c(...) res <- nms %in% names(x) if(!all(res)){ mis <- nms[!(nms %in% names(x))] mis <- paste(dQuote(mis),collapse=", ") msg_tmpl <- "Elements with names %s are missing" msg <- paste(strwrap(sprintf(msg_tmpl,mis),width=80), collapse="\n") stop(msg) } } mclogit/R/mclogit-fit.R0000644000176200001440000001235714320404511014454 0ustar liggesusersmclogit.fit <- function( y, s, w, X, dispersion=FALSE, start=NULL, offset=NULL, control=mclogit.control() ){ nvar <- ncol(X) nobs <- length(y) if(!length(offset)) offset <- rep.int(0, nobs) if(length(start)){ stopifnot(length(start)==ncol(X)) eta <- c(X%*%start) + offset } else eta <- mclogitLinkInv(y,s,w) pi <- mclogitP(eta,s) dev.resids <- ifelse(y>0, 2*w*y*(log(y)-log(pi)), 0) deviance <- sum(dev.resids) if(length(start)) coef <- start else coef <- NULL converged <- FALSE for(iter in 1:control$maxit){ y.star <- eta - offset + (y-pi)/pi yP.star <- y.star - rowsum(pi*y.star,s)[s] XP <- X - as.matrix(rowsum(pi*X,s))[s,,drop=FALSE] ww <- w*pi good <- ww > 0 & is.finite(yP.star) wlsFit <- lm.wfit(x=XP[good,,drop=FALSE],y=yP.star[good],w=ww[good]) last.coef <- coef coef <- wlsFit$coefficients eta <- c(X%*%coef) + offset pi <- mclogitP(eta,s) last.deviance <- deviance dev.resids <- ifelse(y>0, 2*w*y*(log(y)-log(pi)), 0) deviance <- sum(dev.resids) ## check for divergence boundary <- FALSE if(!is.finite(deviance) || deviance > last.deviance && iter > 1){ if(is.null(last.coef)) stop("no valid set of coefficients has been found: please supply starting values", call. = FALSE) warning("step size truncated due to divergence", call. = FALSE) ii <- 1 while (!is.finite(deviance) || deviance > last.deviance){ if(ii > control$maxit) stop("inner loop; cannot correct step size") ii <- ii + 1 coef <- (coef + last.coef)/2 eta <- c(X %*% coef) + offset pi <- mclogitP(eta,s) dev.resids <- ifelse(y>0,2*w*y*(log(y)-log(pi)),0) deviance <- sum(dev.resids) } boundary <- TRUE if (control$trace) cat("Step halved: new deviance =", deviance, "\n") } ## inner loop crit <- abs(deviance-last.deviance)/abs(0.1+deviance) if(control$trace) cat("\nIteration",iter,"- deviance =",deviance,"- criterion =",crit) if(crit < control$eps){ converged <- TRUE if(control$trace) cat("\nconverged\n") break } } if (!converged) warning("algorithm did not converge",call.=FALSE) if (boundary) warning("algorithm stopped at boundary value",call.=FALSE) eps <- 10*.Machine$double.eps if (any(pi < eps) || any(1-pi < eps)) warning("fitted probabilities numerically 0 occurred",call.=FALSE) XP <- X - as.matrix(rowsum(pi*X,s))[s,,drop=FALSE] ww <- w*pi XWX <- crossprod(XP,ww*XP) ntot <- length(y) pi0 <- mclogitP(offset,s) null.deviance <- sum(ifelse(y>0, 2*w*y*(log(y)-log(pi0)), 0)) resid.df <- length(y)-length(unique(s)) model.df <- ncol(X) resid.df <- resid.df - model.df ll <- mclogit.logLik(y,pi,w) if(!isFALSE(dispersion)){ if(isTRUE(dispersion)) odisp.method <- "Afroz" else odisp.method <- match.arg(dispersion, c("Afroz", "Fletcher", "Pearson", "Deviance")) phi <- mclogit.dispersion(y,w,s,pi,coef, method=odisp.method) } else phi <- 1 return(list( coefficients = drop(coef), phi = phi, linear.predictors = eta, working.residuals = (y-pi)/pi, response.residuals = y-pi, df.residual = resid.df, model.df = model.df, fitted.values = pi, deviance=deviance, ll=ll, deviance.residuals=dev.resids, null.deviance=null.deviance, iter = iter, y = y, s = s, offset = offset, converged = converged, control=control, information.matrix=XWX )) } mclogit.control <- function( epsilon = 1e-08, maxit = 25, trace=TRUE ) { if (!is.numeric(epsilon) || epsilon <= 0) stop("value of epsilon must be > 0") if (!is.numeric(maxit) || maxit <= 0) stop("maximum number of iterations must be > 0") list(epsilon = epsilon, maxit = maxit, trace = trace) } log.Det <- function(x) determinant(x,logarithm=TRUE)$modulus mclogitP <- function(eta,s){ expeta <- exp(eta) sum.expeta <- rowsum(expeta,s) expeta/sum.expeta[s] } # mclogit.dev.resids <- function(y,p,w) # ifelse(y>0, # 2*w*y*(log(y)-log(p)), # 0) mclogit.logLik <- function(y,p,w) sum(w*y*log(p)) mclogitLinkInv <- function(y,s,w){ #n.alt <- tapply(y,s,length) #c(log(sqrt(w)*y+1/n.alt[s])-log(w)/2) n <- w*y+0.5 f <- n/(rowsum(n,s)[s]) log(f) - ave(log(f),s) } mclogit/R/blockMatrices.R0000644000176200001440000001543114226031273015022 0ustar liggesusersall_equal <- function(x) length(unique(x)) == 1 blockMatrix <- function(x=list(),nrow,ncol,horizontal=TRUE){ if(!is.list(x)) x <- list(x) if(horizontal){ if(missing(nrow)) nrow <- 1 if(missing(ncol)) ncol <- length(x) } else { if(missing(nrow)) nrow <- length(x) if(missing(ncol)) ncol <- 1 } y <- matrix(x,nrow=nrow,ncol=ncol) ncols <- apply(y,1:2,ncol) nrows <- apply(y,1:2,nrow) ncols <- array(sapply(y,ncol),dim=dim(y)) nrows <- array(sapply(y,nrow),dim=dim(y)) nrows_equal <- apply(nrows,1,all_equal) ncols_equal <- apply(ncols,2,all_equal) if(!all(nrows_equal)) stop("Non-matching numbers of rows") if(!all(ncols_equal)) stop("Non-matching numbers of columns") structure(y,class="blockMatrix") } Ops.blockMatrix <- function(e1, e2){ if(!inherits(e1,"blockMatrix")) e1 <- blockMatrix(e1) if(!inherits(e2,"blockMatrix")) e2 <- blockMatrix(e2) stopifnot(dim(e1)==dim(e2)) d <- dim(e1) if(!(.Generic%in% c("+","-","*","=="))) stop(sQuote(.Generic)," not implemented for block matrices") res <- switch(.Generic, `+`= mapply(`+`,e1,e2,SIMPLIFY=FALSE), `-`= mapply(`-`,e1,e2,SIMPLIFY=FALSE), `*`= mapply(`*`,e1,e2,SIMPLIFY=FALSE), `==`= all(Reduce(`&`,mapply(`==`,e1,e2))) ) if(is.list(res)){ dim(res) <- d structure(res, class=class(e1)) } else res } bMatProd <- function(x,y){ if(!inherits(x,"blockMatrix")) x <- blockMatrix(x) if(!inherits(y,"blockMatrix")) y <- blockMatrix(y) dim.x <- dim(x) dim.y <- dim(y) stopifnot(dim.x[2]==dim.y[1]) m <- dim.x[1] n <- dim.y[2] q <- dim.x[2] res <- blockMatrix(nrow=m,ncol=n) for(i in 1:m) for(j in 1:n){ res[[i,j]] <- inner_p(x[i,],y[,j]) } res } bMatCrsProd <- function(x,y=NULL){ if(missing(y)) y <- x if(!inherits(x,"blockMatrix")) x <- blockMatrix(x) if(!inherits(y,"blockMatrix")) y <- blockMatrix(y) dim.x <- dim(x) dim.y <- dim(y) stopifnot(dim.x[1]==dim.y[1]) m <- dim.x[2] n <- dim.y[2] q <- dim.x[1] res <- blockMatrix(nrow=m,ncol=n) for(i in 1:m) for(j in 1:n){ res[[i,j]] <- inner_crsp(x[,i],y[,j]) } res } bMatTCrsProd <- function(x,y=NULL){ if(missing(y)) y <- x if(!inherits(x,"blockMatrix")) x <- blockMatrix(x) if(!inherits(y,"blockMatrix")) y <- blockMatrix(y) dim.x <- dim(x) dim.y <- dim(y) stopifnot(dim.x[2]==dim.y[2]) m <- dim.x[1] n <- dim.y[1] q <- dim.x[2] res <- blockMatrix(nrow=m,ncol=n) for(i in 1:m) for(j in 1:n){ res[[i,j]] <- inner_tcrsp(x[i,],y[j,]) } res } bMatTrns <- function(x){ m <- nrow(x) n <- ncol(x) res <- blockMatrix(nrow=n,ncol=m) for(i in 1:n) for(j in 1:m){ res[[i,j]] <- t(x[[j,i]]) } res } inner_p <- function(x,y){ xy <- mapply(`%*%`,x,y,SIMPLIFY=FALSE) Reduce(`+`,xy) } inner_crsp <- function(x,y){ xy <- mapply(crossprod,x,y,SIMPLIFY=FALSE) Reduce(`+`,xy) } inner_tcrsp <- function(x,y){ xy <- mapply(tcrossprod,x,y,SIMPLIFY=FALSE) Reduce(`+`,xy) } matprod1 <- function(x,y){ if(!length(x) || !length(y)) NULL else x %*% y } blockDiag <- function(x,n=length(x)){ y <- blockMatrix(nrow=n,ncol=n) i <- 1:n y[cbind(i,i)] <- x bM_fill(y) } bM_check <- function(x){ nnrow <- sapply(x,NROW) nncol <- sapply(x,NCOL) dim(nnrow) <- dim(x) dim(nncol) <- dim(x) lunq.cols <- apply(nncol,2,lunq) lunq.rows <- apply(nnrow,1,lunq) ok <- all(lunq.cols==1) && all(lunq.cols) return(ok) } bM_nrow <- function(x) sapply(x[,1],nrow) bM_ncol <- function(x) sapply(x[1,],ncol) to_bM <- function(x,nnrow,nncol){ nnrow1 <- cumsum(c(0,nnrow[-length(nnrow)])) + 1 nncol1 <- cumsum(c(0,nncol[-length(nncol)])) + 1 rows <- mapply(seq.int,from=nnrow1,length.out=nnrow,SIMPLIFY=FALSE) cols <- mapply(seq.int,from=nncol1,length.out=nncol,SIMPLIFY=FALSE) m <- length(nnrow) n <- length(nncol) y <- blockMatrix(nrow=m,ncol=n) for(i in 1:m) for(j in 1:n) y[i,j] <- list(Matrix(x[rows[[i]],cols[[j]]])) return(y) } bM_fill <- function(x){ nnrow <- Sapply(x,NROW) nncol <- Sapply(x,NCOL) dim(nnrow) <- dim(x) dim(nncol) <- dim(x) nnrow <- apply(nnrow,1,max) nncol <- apply(nncol,2,max) m <- nrow(x) n <- ncol(x) for(i in 1:m) for(j in 1:n){ if(is.null(x[[i,j]])){ x[[i,j]] <- Matrix(0,nnrow[i],nncol[j]) } } return(x) } solve.blockMatrix <- function(a,b,...){ nnrow.a <- bM_nrow(a) nncol.a <- bM_ncol(a) a <- fuseMat(a) if(missing(b)){ x <- solve(a) return(to_bM(x,nnrow=nnrow.a,nncol=nncol.a)) } else { nnrow.b <- bM_nrow(b) nncol.b <- bM_ncol(b) b <- fuseMat(b) x <- solve(a,b) return(to_bM(x,nnrow=nnrow.a,nncol=nncol.b)) } } format_dims <- function(x){ sprintf("<%d x %d>",nrow(x),ncol(x)) } print.blockMatrix <- function(x,quote=FALSE,...){ cat(sprintf("Block matrix with %d x %d blocks\n\n",nrow(x),ncol(x))) y <- sapply(x,format_dims) dim(y) <- dim(x) print.default(y,quote=quote,...) invisible(x) } sum_blockDiag <- function(x,n){ i <- rep(1:n,n) j <- rep(1:n,each=n) nblks <- nrow(x) %/% n offs <- rep(seq.int(from=0,to=nblks-1),each=n*n) i <- rep(i,nblks) + offs j <- rep(j,nblks) + offs y <- x[cbind(i,j)] dim(y) <- c(n*n,nblks) y <- rowSums(y) dim(y) <- c(n,n) Matrix(y) } v_bCrossprod <- function(x,d){ n <- length(x)%/%d dim(x) <- c(d,n) tcrossprod(x) } v_bQuadfm <- function(x,W){ d <- nrow(W) n <- length(x)%/%d dim(x) <- c(d,n) colSums((W%*%x)*x) } set_blockDiag <- function(x,v){ n <- ncol(v) i <- rep(1:n,n) j <- rep(1:n,each=n) nblks <- ncol(x) %/% n offs <- rep(seq.int(from=0,to=nblks-1)*n,each=n*n) i <- rep(i,nblks) + offs j <- rep(j,nblks) + offs x[cbind(i,j)] <- v return(x) } logDet_blockMatrix <- function(x){ d <- determinant(fuseMat(x),logarithm=TRUE) d$modulus } chol_blockMatrix <- function(x,resplit=TRUE){ y <- chol(fuseMat(x)) if(resplit){ nnrow <- bM_nrow(x) nncol <- bM_ncol(x) return(to_bM(y,nnrow=nnrow,nncol=nncol)) } else return(y) } kron_bM <- function(x,y){ m1 <- nrow(x) m2 <- nrow(y) n1 <- ncol(x) n2 <- ncol(y) attributes(x) <- NULL attributes(y) <- NULL lx <- length(x) ly <- length(y) x <- rep(x,each=ly) y <- rep(y,lx) xy <- mapply(`%x%`,x,y,SIMPLIFY=FALSE) blockMatrix(xy,m1*m2,n1*n2) } mclogit/R/mclogit-dispersion.R0000644000176200001440000000367713703631670016072 0ustar liggesusersmclogit.dispersion <- function(y,w,s,pi,coef,method){ N <- length(w) n <- length(unique(s)) p <- length(coef) res.df <- N - n -p if(method=="Deviance"){ Dresid <- 2*w*y*(log(y)-log(pi)) Dresid[w==0 | y== 0] <- 0 D <- sum(Dresid) phi <- D/res.df } else { X2 <- sum(w*(y - pi)^2/pi) phi.pearson <- X2/(N - n - p) if(method %in% c("Afroz","Fletcher")) s.bar <- sum((y - pi)/pi)/(N - n) phi <- switch(method, Pearson = phi.pearson, Afroz = phi.pearson/(1 + s.bar), Fletcher = phi.pearson - (N - n)*s.bar/(N - n - p)) } return(phi) } update_mclogit_dispersion <- function(object,dispersion){ if(!missing(dispersion)){ if(is.numeric(dispersion)) phi <- dispersion else { if(isTRUE(dispersion)) method <- "Afroz" else method <- match.arg(dispersion, c("Afroz", "Fletcher", "Pearson", "Deviance")) phi <- dispersion(object,method=method) } } else phi <- 1 object$phi <- phi return(object) } dispersion <- function(object,method,...) UseMethod("dispersion") dispersion.mclogit <- function(object,method=NULL,...){ if(is.null(method)) return(object$phi) else { y <- object$y s <- object$s w <- object$weights pi <- object$fitted.values coef <- object$coefficients method <- match.arg(method,c("Afroz", "Fletcher", "Pearson", "Deviance")) phi <- mclogit.dispersion(y,w,s,pi,coef, method=method) return(phi) } } mclogit/R/AIC-mclogit.R0000644000176200001440000000223112216653672014274 0ustar liggesusers# Contributed by Nic Elliot AIC.mclogit <- function(object,...,k=2){ devNdf <- function(object) unname(unlist(object[c("deviance","N","model.df")])) if (length(list(...))) { dvs <- sapply(list(object, ...), devNdf) nobs <- dvs[2,] if(length(unique(nobs))>1) warning("models are not all fitted to the same number of observations") val <- data.frame(df=dvs[3,],AIC=dvs[1,]+k*dvs[3,]) Call <- match.call() Call$k <- NULL row.names(val) <- as.character(Call[-1L]) val } else { dvs <- devNdf(object) dvs[1]+k*dvs[3] } } BIC.mclogit <- function(object,...){ devNdf <- function(object) unname(unlist(object[c("deviance","N","model.df")])) if (length(list(...))) { dvs <- sapply(list(object, ...), devNdf) nobs <- dvs[2,] if(length(unique(nobs))>1) warning("models are not all fitted to the same number of observations") val <- data.frame(df=dvs[3,],BIC=dvs[1,]+log(dvs[2,])*dvs[3,]) Call <- match.call() Call$k <- NULL row.names(val) <- as.character(Call[-1L]) val } else { dvs <- devNdf(object) dvs[1]+log(dvs[2])*dvs[3] } }mclogit/R/mblogit.R0000644000176200001440000011532714322773105013706 0ustar liggesusers#' Baseline-Category Logit Models for Categorical and Multinomial Responses #' #' The function \code{mblogit} fits baseline-category logit models for categorical #' and multinomial count responses with fixed alternatives. #' #' @param formula the model formula. The response must be a factor or a matrix #' of counts. #' @param data an optional data frame, list or environment (or object coercible #' by \code{\link{as.data.frame}} to a data frame) containing the variables #' in the model. If not found in \code{data}, the variables are taken from #' \code{environment(formula)}, typically the environment from which #' \code{glm} is called. #' @param random an optional formula or list of formulas that specify the #' random-effects structure or NULL. #' @param catCov a character string that specifies optional restrictions #' on the covariances of random effects between the logit equations. #' "free" means no restrictions, "diagonal" means that random effects #' pertinent to different categories are uncorrelated, while "single" means #' that the random effect variances pertinent to all categories are identical. #' @param subset an optional vector specifying a subset of observations to be #' used in the fitting process. #' @param weights an optional vector of weights to be used in the fitting #' process. Should be \code{NULL} or a numeric vector. #' @param na.action a function which indicates what should happen when the data #' contain \code{NA}s. The default is set by the \code{na.action} setting #' of \code{\link{options}}, and is \code{\link{na.fail}} if that is unset. #' The \sQuote{factory-fresh} default is \code{\link{na.omit}}. Another #' possible value is \code{NULL}, no action. Value \code{\link{na.exclude}} #' can be useful. #' @param model a logical value indicating whether \emph{model frame} should be #' included as a component of the returned value. #' @param x,y logical values indicating whether the response vector and model #' matrix used in the fitting process should be returned as components of #' the returned value. #' @param contrasts an optional list. See the \code{contrasts.arg} of #' \code{model.matrix.default}. #' @param method \code{NULL} or a character string, either "PQL" or "MQL", #' specifies the type of the quasilikelihood approximation to be used if a #' random-effects model is to be estimated. #' @param estimator a character string; either "ML" or "REML", specifies which #' estimator is to be used/approximated. #' @param dispersion a logical value or a character string; whether and how a #' dispersion parameter should be estimated. For details see #' \code{\link{dispersion}}. #' @param start an optional matrix of starting values (with as many rows #' as logit equations). If the model has random effects, the matrix #' should have a "VarCov" attribute wtih starting values for #' the random effects (co-)variances. If the random effects model #' is estimated with the "PQL" method, the starting values matrix #' should also have a "random.effects" attribute, which should have #' the same structure as the "random.effects" component of an object #' returned by \code{mblogit()}. #' @param from.table a logical value; do the data represent a contingency table, #' e.g. were created by applying \code{as.data.frame()} a the result of #' \code{table()} or \code{xtabs()}. This relevant only if the response is #' a factor. This argument should be set to \code{TRUE} if the data do come #' from a contingency table. Correctly setting \code{from.table=TRUE} in #' this case, will lead to efficiency gains in computing, but more #' importantly overdispersion will correctly be computed if present. #' @param groups an optional formula that specifies groups of observations #' relevant for the specification of overdispersed response counts. #' @param control a list of parameters for the fitting process. See #' \code{\link{mclogit.control}} #' @param \dots arguments to be passed to \code{mclogit.control} or #' \code{mmclogit.control} #' #' @return \code{mblogit} returns an object of class "mblogit", which has almost #' the same structure as an object of class "\link[stats]{glm}". The #' difference are the components \code{coefficients}, \code{residuals}, #' \code{fitted.values}, \code{linear.predictors}, and \code{y}, which are #' matrices with number of columns equal to the number of response #' categories minus one. #' #' @details The function \code{mblogit} internally rearranges the data into a #' 'long' format and uses \code{\link{mclogit.fit}} to compute #' estimates. Nevertheless, the 'user data' are unaffected. #' #' @seealso The function \code{\link[nnet]{multinom}} in package \pkg{nnet} also #' fits multinomial baseline-category logit models, but has a slightly less #' convenient output and does not support overdispersion or random #' effects. However, it provides some other options. Baseline-category logit #' models are also supported by the package \pkg{VGAM}, as well as some #' reduced-rank and (semi-parametric) additive generalisations. The package #' \pkg{mnlogit} estimates logit models in a way optimized for large numbers #' of alternatives. #' #' @example examples/mblogit-ex.R #' #' @references #' Agresti, Alan. 2002. #' \emph{Categorical Data Analysis.} 2nd ed, Hoboken, NJ: Wiley. #' \url{https://doi.org/10.1002/0471249688} #' #' Breslow, N.E. and D.G. Clayton. 1993. #' "Approximate Inference in Generalized Linear Mixed Models". #' \emph{Journal of the American Statistical Association} 88 (421): 9-25. #' \url{https://doi.org/10.1080/01621459.1993.10594284} #' #' #' @aliases print.mblogit summary.mblogit print.summary.mblogit fitted.mblogit #' weights.mblogit print.mmblogit summary.mmblogit print.summary.mmblogit mblogit <- function(formula, data=parent.frame(), random=NULL, catCov=c("free","diagonal","single"), subset, weights=NULL, na.action = getOption("na.action"), model = TRUE, x = FALSE, y = TRUE, contrasts=NULL, method = NULL, estimator=c("ML","REML"), dispersion = FALSE, start = NULL, from.table = FALSE, groups = NULL, control=if(length(random)) mmclogit.control(...) else mclogit.control(...), ...){ call <- match.call(expand.dots = TRUE) if(missing(data)) data <- environment(formula) else if(is.table(data)){ from.table <- TRUE data <- as.data.frame(data) } else data <- as.data.frame(data) mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "offset", "na.action"), names(mf), 0) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1]] <- as.name("model.frame") if(length(random)){ mf0 <- eval(mf, parent.frame()) mt <- attr(mf0,"terms") if(inherits(random,"formula")){ rf <- paste(c(".~.",all.vars(random)),collapse="+") } else if(inherits(random,"list")) { rf <- paste(c(".~.",unlist(lapply(random,all.vars))),collapse="+") } else stop("'random' argument must be either a formula or a list of formulae") rf <- as.formula(rf) if (typeof(mf$formula) == "symbol") { mff <- formula } else { mff <- structure(mf$formula,class="formula") } mff <- eval(mff, parent.frame()) mf$formula <- update(mff,rf) mf <- eval(mf, parent.frame()) check.names(control, "epsilon","maxit", "trace","trace.inner", "avoid.increase", "break.on.increase", "break.on.infinite", "break.on.negative") catCov <- match.arg(catCov) } else if(length(groups)){ mf0 <- eval(mf, parent.frame()) mt <- attr(mf0,"terms") gf <- paste(c(".~.",all.vars(groups)),collapse="+") gf <- as.formula(gf) if (typeof(mf$formula) == "symbol") { mff <- formula } else { mff <- structure(mf$formula,class="formula") } mff <- eval(mff, parent.frame()) mf$formula <- update(mff,gf) mf <- eval(mf, parent.frame()) check.names(control, "epsilon","maxit", "trace","trace.inner", "avoid.increase", "break.on.increase", "break.on.infinite", "break.on.negative") } else { mf <- eval(mf, parent.frame()) mt <- attr(mf,"terms") check.names(control, "epsilon","maxit", "trace") } na.action <- attr(mf,"na.action") weights <- as.vector(model.weights(mf)) offset <- as.vector(model.offset(mf)) if(!is.null(weights) && !is.numeric(weights)) stop("'weights' must be a numeric vector") Y <- model.response(mf, "any") X <- model.matrix(mt,mf,contrasts) contrasts <- attr(X, "contrasts") xlevels <- .getXlevels(mt,mf) if(is.null(weights)) weights <- rep(1,nrow(X)) N <- sum(weights) prior.weights <- weights if(is.factor(Y)){ response.type <- "factor" n.categs <- nlevels(Y) n.obs <- length(Y) if(from.table){ # Create an appropriate response matrix if data # come from a table of frequencies tmf <- terms(mf) respix <- attr(tmf,"response") vars <- as.character(attr(tmf,"variables")[-1]) respname <- vars[respix] respix <- match(respname,names(mf)) wghix <- match("(weights)",names(mf)) mf1 <- mf[-c(respix,wghix)] umf1 <- !duplicated(mf1) i <- cumsum(umf1) j <- as.integer(Y) attr(mf,"ij") <- cbind(i,j) attr(mf,"j==1") <- umf1 levs <- levels(Y) m <- nlevels(Y) n <- i[length(i)] Y <- matrix(0,nrow=n,ncol=m) Y[cbind(i,j)] <- prior.weights w <- rowSums(Y) Y <- Y/w if(any(w==0)){ Y[w==0,] <- 0 N <- sum(weights[w>0]) warning(sprintf("ignoring %d observerations with counts that sum to zero", sum(w==0)), call. = FALSE, immediate. = TRUE) } Y <- as.vector(t(Y)) weights <- rep(w,each=m) D <- diag(m)[,-1, drop=FALSE] dimnames(D) <- list(levs,levs[-1]) X <- X[umf1,,drop=FALSE] } else { weights <- rep(weights,each=nlevels(Y)) D <- diag(nlevels(Y))[,-1, drop=FALSE] dimnames(D) <- list(levels(Y),levels(Y)[-1]) I <- diag(nlevels(Y)) dimnames(I) <- list(levels(Y),levels(Y)) Y <- as.vector(I[,Y]) } } else if(is.matrix(Y)){ response.type <- "matrix" n.categs <- ncol(Y) n.obs <- nrow(Y) D <- diag(ncol(Y))[,-1, drop=FALSE] if(length(colnames(Y))){ rownames(D) <- colnames(Y) colnames(D) <- colnames(Y)[-1] } else { rownames(D) <- 1:ncol(Y) colnames(D) <- 2:ncol(Y) } w <- rowSums(Y) Y <- Y/w if(any(w==0)){ Y[w==0,] <- 0 N <- sum(weights[w>0]) warning(sprintf("ignoring %d observerations with counts that sum to zero", sum(w==0)), call. = FALSE, immediate. = TRUE) } weights <- rep(w*weights,each=ncol(Y)) Y <- as.vector(t(Y)) } else stop("response must either be a factor or a matrix of counts or dummies") start.VarCov <- NULL start.randeff <- NULL if(length(start)){ start.VarCov <- attr(start,"VarCov") start.randeff <- attr(start,"random.effects") if(nrow(start)!=ncol(D)) stop("Rows of 'start' argument do not match dependent variable.") start.names <- colnames(start) X.names <- colnames(X) if(length(start.names)) start <- start[,X.names,drop=FALSE] if(ncol(start)!=ncol(X)) stop("Columns of 'start' argument do not match independent variables.") start <- as.vector(start) } s <- rep(seq_len(nrow(X)),each=nrow(D)) XD <- X%x%D colnames(XD) <- paste0(rep(colnames(D),ncol(X)), "~", rep(colnames(X),each=ncol(D))) if(!length(random)) fit <- mclogit.fit(y=Y,s=s,w=weights,X=XD, dispersion=dispersion, start=start, control=control) else { ## random effects if(!length(method)) method <- "PQL" if(inherits(random,"formula")) random <- list(random) random <- lapply(random,setupRandomFormula) rt <- lapply(random,"[[","formula") rt <- lapply(rt,terms) suppressWarnings(Z <- lapply(rt,model.matrix,mf, contrasts.arg=contrasts)) # Use suppressWarnings() to stop complaining about unused contasts if(catCov == "free"){ ZD <- lapply(Z,`%x%`,D) d <- sapply(ZD,ncol) nn <- length(ZD) for(k in 1:nn){ colnames(ZD[[k]]) <- paste0(rep(colnames(D),ncol(Z[[k]])), "~", rep(colnames(Z[[k]]),each=ncol(D))) colnames(ZD[[k]]) <- gsub("(Intercept)","1",colnames(ZD[[k]]),fixed=TRUE) } randstruct <- lapply(1:nn,function(k){ group.labels <- random[[k]]$groups groups <- mf[group.labels] groups <- lapply(groups,as.factor) nlev <- length(groups) if(nlev > 1){ for(i in 2:nlev){ groups[[i]] <- interaction(groups[c(i-1,i)]) group.labels[i] <- paste(group.labels[i-1],group.labels[i],sep=":") } } groups <- lapply(groups,rep,each=nrow(D)) VarCov.names.k <- rep(list(colnames(ZD[[k]])),nlev) ZD_k <- lapply(groups,mkZ,rX=ZD[[k]]) d <- rep(d[k],nlev) names(groups) <- group.labels list(ZD_k,groups,d,VarCov.names.k) }) ZD <- lapply(randstruct,`[[`,1) groups <- lapply(randstruct,`[[`,2) d <- lapply(randstruct,`[[`,3) VarCov.names <- lapply(randstruct,`[[`,4) ZD <- unlist(ZD,recursive=FALSE) groups <- unlist(groups,recursive=FALSE) VarCov.names <- unlist(VarCov.names,recursive=FALSE) d <- unlist(d) ZD <- blockMatrix(ZD,ncol=length(ZD)) } else if(catCov =="single"){ cc <- rep(1:n.categs,n.obs) stopifnot(length(Y)==length(cc)) d <- sapply(Z,ncol) nn <- length(Z) for(k in 1:nn){ colnames(Z[[k]]) <- paste0("~",colnames(Z[[k]])) colnames(Z[[k]]) <- gsub("(Intercept)","1",colnames(Z[[k]]),fixed=TRUE) } randstruct <- lapply(1:nn,function(k){ group.labels <- random[[k]]$groups groups <- mf[group.labels] groups <- lapply(groups,as.factor) nlev <- length(groups) groups[[1]] <- interaction(cc,groups[[1]]) if(nlev > 1){ for(i in 2:nlev){ groups[[i]] <- interaction(groups[c(i-1,i)]) group.labels[i] <- paste(group.labels[i-1],group.labels[i],sep=":") } } VarCov.names.k <- rep(list(colnames(Z[[k]])),nlev) ZD_k <- lapply(groups,mkZ,rX=Z[[k]]) d <- rep(d[k],nlev) names(groups) <- group.labels list(ZD_k,groups,d,VarCov.names.k) }) ZD <- lapply(randstruct,`[[`,1) groups <- lapply(randstruct,`[[`,2) d <- lapply(randstruct,`[[`,3) VarCov.names <- lapply(randstruct,`[[`,4) ZD <- unlist(ZD,recursive=FALSE) groups <- unlist(groups,recursive=FALSE) VarCov.names <- unlist(VarCov.names,recursive=FALSE) d <- unlist(d) ZD <- blockMatrix(ZD,ncol=length(ZD)) } else { # catCov == "diagonal" categs <- 1:n.categs cc <- rep(categs,n.obs) stopifnot(length(Y)==length(cc)) randstruct <- list() for(categ in categs){ u <- as.integer(categ==categs) ZD <- lapply(Z,`%x%`,u) d <- sapply(ZD,ncol) nn <- length(ZD) for(k in 1:nn){ colnames(ZD[[k]]) <- paste0(rownames(D)[categ],"~",colnames(Z[[k]])) colnames(ZD[[k]]) <- gsub("(Intercept)","1",colnames(ZD[[k]]),fixed=TRUE) } randstruct_c <- lapply(1:nn,function(k){ group.labels <- random[[k]]$groups groups <- mf[group.labels] groups <- lapply(groups,as.factor) nlev <- length(groups) if(nlev > 1){ for(i in 2:nlev){ groups[[i]] <- interaction(groups[c(i-1,i)]) group.labels[i] <- paste(group.labels[i-1],group.labels[i],sep=":") } } groups <- lapply(groups,rep,each=nrow(D)) VarCov.names.k <- rep(list(colnames(ZD[[k]])),nlev) ZD_k <- lapply(groups,mkZ,rX=ZD[[k]]) d <- rep(d[k],nlev) names(groups) <- group.labels list(ZD_k,groups,d,VarCov.names.k) }) randstruct <- c(randstruct,randstruct_c) } ZD <- lapply(randstruct,`[[`,1) groups <- lapply(randstruct,`[[`,2) d <- lapply(randstruct,`[[`,3) VarCov.names <- lapply(randstruct,`[[`,4) ZD <- unlist(ZD,recursive=FALSE) groups <- unlist(groups,recursive=FALSE) VarCov.names <- unlist(VarCov.names,recursive=FALSE) d <- unlist(d) ZD <- blockMatrix(ZD,ncol=length(ZD)) } fit <- mmclogit.fitPQLMQL(y=Y,s=s,w=weights, X=XD,Z=ZD,d=d, start=start, start.Phi=start.VarCov, start.b=start.randeff, method=method, estimator=estimator, control=control, offset = offset) nlev <- length(fit$VarCov) for(k in 1:nlev) dimnames(fit$VarCov[[k]]) <- list(VarCov.names[[k]],VarCov.names[[k]]) names(fit$VarCov) <- names(groups) } coefficients <- fit$coefficients coefmat <- matrix(coefficients,nrow=ncol(D), dimnames=list("Response categories"=colnames(D), "Predictors"=colnames(X) )) fit$coefmat <- coefmat fit$coefficients <- coefficients if(x) fit$x <- X if(x && length(random)) fit$z <- Z if(!y) { fit$y <- NULL fit$s <- NULL } fit <- c(fit,list(call = call, formula = formula, terms = mt, random = random, groups = groups, data = data, contrasts = contrasts, xlevels = xlevels, na.action = na.action, start = start, prior.weights=prior.weights, weights=weights, model=mf, D=D, N=N, response.type=response.type, from.table=from.table)) if(length(random)){ class(fit) <- c("mmblogit","mblogit","mmclogit","mclogit","lm") } else class(fit) <- c("mblogit","mclogit","lm") fit } print.mblogit <- function(x,digits= max(3, getOption("digits") - 3), ...){ cat("\nCall: ",paste(deparse(x$call), sep="\n", collapse="\n"), "\n\n", sep="") D <- x$D categs <- colnames(D) basecat <- rownames(D)[!(rownames(D)%in%categs)] coefmat <- x$coefmat if(getOption("mblogit.show.basecat",TRUE)){ rn <- paste0(rownames(coefmat), getOption("mblogit.basecat.sep","/"), basecat) rownames(coefmat) <- rn } if(length(coefmat)) { cat("Coefficients") if(is.character(co <- x$contrasts)) cat(" [contrasts: ", apply(cbind(names(co),co), 1, paste, collapse="="), "]") cat(":\n") print.default(format(coefmat, digits=digits), print.gap = 2, quote = FALSE) } else cat("No coefficients\n\n") if(x$phi != 1) cat("\nDispersion: ",x$phi) cat("\nNull Deviance: ", format(signif(x$null.deviance, digits)), "\nResidual Deviance:", format(signif(x$deviance, digits))) if(!x$converged) cat("\n\nNote: Algorithm did not converge.\n") if(nchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n", sep="") else cat("\n") invisible(x) } summary.mblogit <- function(object,...){ ans <- NextMethod() ans$D <- object$D class(ans) <- c("summary.mblogit","summary.mclogit") return(ans) } print.summary.mblogit <- function (x, digits = max(3, getOption("digits") - 3), symbolic.cor = x$symbolic.cor, signif.stars = getOption("show.signif.stars"), ...){ cat("\nCall:\n") cat(paste(deparse(x$call), sep="\n", collapse="\n"), "\n\n", sep="") D <- x$D categs <- colnames(D) basecat <- rownames(D)[!(rownames(D)%in%categs)] coefs <- x$coefficients rn.coefs <- rownames(coefs) ncategs <- length(categs) for(i in 1:ncategs){ cat <- categs[i] patn <- paste0(cat,"~") ii <- grep(patn,rn.coefs,fixed=TRUE) coefs.cat <- coefs[ii,,drop=FALSE] rownames(coefs.cat) <- gsub(patn,"",rownames(coefs.cat)) if(i>1) cat("\n") cat("Equation for ",cat," vs ",basecat,":\n",sep="") printCoefmat(coefs.cat, digits=digits, signif.stars=signif.stars, signif.legend=signif.stars && i==ncategs, na.print="NA", ...) } if(x$dispersion != 1) cat("\nDispersion: ",x$dispersion," on ",x$df.residual," degrees of freedom") cat("\nApproximate residual Deviance:", format(signif(x$deviance, digits)), "\nNumber of Fisher scoring iterations: ", x$iter, "\nNumber of observations: ",x$N, "\n") correl <- x$correlation if(!is.null(correl)) { p <- NCOL(correl) if(p > 1) { cat("\nCorrelation of Coefficients:\n") if(is.logical(symbolic.cor) && symbolic.cor) { print(symnum(correl, abbr.colnames = NULL)) } else { correl <- format(round(correl, 2), nsmall = 2, digits = digits) correl[!lower.tri(correl)] <- "" print(correl[-1, -p, drop=FALSE], quote = FALSE) } } } if(!x$converged) cat("\n\nNote: Algorithm did not converge.\n") if(nchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n", sep="") else cat("\n") invisible(x) } fitted.mblogit <- function(object,type=c("probabilities","counts"),...){ weights <- object$weights nobs <- length(weights) res <- object$fitted.values type <- match.arg(type) na.act <- object$na.action longfit <- switch(type, probabilities=res, counts=weights*res) ncat <- nrow(object$D) fit <- t(matrix(longfit,nrow=ncat)) if(!is.null(na.act)) fit <- napredict(na.act,fit) fit } predict.mblogit <- function(object, newdata=NULL,type=c("link","response"),se.fit=FALSE,...){ type <- match.arg(type) mt <- terms(object) rhs <- delete.response(mt) if(missing(newdata)){ m <- object$model na.act <- object$na.action } else{ m <- model.frame(rhs,data=newdata,na.action=na.exclude) na.act <- attr(m,"na.action") } X <- model.matrix(rhs,m, contrasts.arg=object$contrasts, xlev=object$xlevels ) rn <- rownames(X) D <- object$D XD <- X%x%D rspmat <- function(x){ y <- t(matrix(x,nrow=nrow(D))) colnames(y) <- rownames(D) y } eta <- c(XD %*% coef(object)) eta <- rspmat(eta) rownames(eta) <- rn if(se.fit){ V <- vcov(object) stopifnot(ncol(XD)==ncol(V)) } if(type=="response") { exp.eta <- exp(eta) sum.exp.eta <- rowSums(exp.eta) p <- exp.eta/sum.exp.eta if(se.fit){ p.long <- as.vector(t(p)) s <- rep(1:nrow(X),each=nrow(D)) wX <- p.long*(XD - rowsum(p.long*XD,s)[s,,drop=FALSE]) se.p.long <- sqrt(rowSums(wX * (wX %*% V))) se.p <- rspmat(se.p.long) rownames(se.p) <- rownames(p) if(is.null(na.act)) list(fit=p,se.fit=se.p) else list(fit=napredict(na.act,p), se.fit=napredict(na.act,se.p)) } else { if(is.null(na.act)) p else napredict(na.act,p) } } else if(se.fit) { se.eta <- sqrt(rowSums(XD * (XD %*% V))) se.eta <- rspmat(se.eta) eta <- eta[,-1,drop=FALSE] se.eta <- se.eta[,-1,drop=FALSE] if(is.null(na.act)) list(fit=eta,se.fit=se.eta) else list(fit=napredict(na.act,eta), se.fit=napredict(na.act,se.eta)) } else { eta <- eta[,-1,drop=FALSE] if(is.null(na.act)) eta else napredict(na.act,eta) } } weights.mblogit <- function (object, ...) { res <- object$prior.weights if (is.null(object$na.action)) res else naresid(object$na.action, res) } print.mmblogit <- function(x,digits= max(3, getOption("digits") - 3), ...){ cat(paste(deparse(x$call), sep="\n", collapse="\n"), "\n\n", sep="") D <- x$D categs <- colnames(D) basecat <- rownames(D)[!(rownames(D)%in%categs)] coefmat <- x$coefmat if(getOption("mblogit.show.basecat",TRUE)){ rn <- paste0(rownames(coefmat), getOption("mblogit.basecat.sep","/"), basecat) rownames(coefmat) <- rn } if(length(coefmat)) { cat("Coefficients") if(is.character(co <- x$contrasts)) cat(" [contrasts: ", apply(cbind(names(co),co), 1, paste, collapse="="), "]") cat(":\n") print.default(format(coefmat, digits=digits), print.gap = 2, quote = FALSE) } else cat("No coefficients\n\n") cat("\n(Co-)Variances:\n") VarCov <- x$VarCov for(k in 1:length(VarCov)){ if(k > 1) cat("\n") cat("Grouping level:",names(VarCov)[k],"\n") VarCov.k <- VarCov[[k]] VarCov.k[] <- format(VarCov.k, digits=digits) VarCov.k[upper.tri(VarCov.k)] <- "" print.default(VarCov.k, print.gap = 2, quote = FALSE) } cat("\nNull Deviance: ", format(signif(x$null.deviance, digits)), "\nResidual Deviance:", format(signif(x$deviance, digits))) if(!x$converged) cat("\n\nNote: Algorithm did not converge.\n") if(nchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n", sep="") else cat("\n") invisible(x) } summary.mmblogit <- function(object,...){ ans <- NextMethod() ans$D <- object$D class(ans) <- c("summary.mmblogit","summary.mmclogit") return(ans) } print.summary.mmblogit <- function (x, digits = max(3, getOption("digits") - 3), symbolic.cor = x$symbolic.cor, signif.stars = getOption("show.signif.stars"), ...){ cat("\nCall:\n") cat(paste(deparse(x$call), sep="\n", collapse="\n"), "\n\n", sep="") D <- x$D categs <- colnames(D) basecat <- rownames(D)[!(rownames(D)%in%categs)] coefs <- x$coefficients rn.coefs <- rownames(coefs) ncategs <- length(categs) for(i in 1:ncategs){ cat <- categs[i] patn <- paste0(cat,"~") ii <- grep(patn,rn.coefs,fixed=TRUE) coefs.cat <- coefs[ii,,drop=FALSE] rownames(coefs.cat) <- gsub(patn,"",rownames(coefs.cat)) if(i>1) cat("\n") cat("Equation for ",cat," vs ",basecat,":\n",sep="") printCoefmat(coefs.cat, digits=digits, signif.stars=signif.stars, signif.legend=signif.stars && i==ncategs, na.print="NA", ...) } cat("\n(Co-)Variances:\n") VarCov <- x$VarCov se_VarCov <- x$se_VarCov for(k in 1:length(VarCov)){ if(k > 1) cat("\n") cat("Grouping level:",names(VarCov)[k],"\n") VarCov.k <- VarCov[[k]] VarCov.k[] <- format(VarCov.k, digits=digits) VarCov.k[upper.tri(VarCov.k)] <- "" #print.default(VarCov.k, print.gap = 2, quote = FALSE) VarCov.k <- format_Mat(VarCov.k,title="Estimate") se_VarCov.k <- se_VarCov[[k]] se_VarCov.k[] <- format(se_VarCov.k, digits=digits) se_VarCov.k[upper.tri(se_VarCov.k)] <- "" se_VarCov.k <- format_Mat(se_VarCov.k,title="Std.Err.",rownames=" ") VarCov.k <- paste(VarCov.k,se_VarCov.k) writeLines(VarCov.k) } cat("\nApproximate residual deviance:", format(signif(x$deviance, digits)), "\nNumber of Fisher scoring iterations: ", x$iter) cat("\nNumber of observations") for(i in seq_along(x$groups)){ g <- nlevels(x$groups[[i]]) nm.group <- names(x$groups)[i] cat("\n Groups by", paste0(nm.group,": ",format(g))) } cat("\n Individual observations: ",x$N) correl <- x$correlation if(!is.null(correl)) { p <- NCOL(correl) if(p > 1) { cat("\nCorrelation of Coefficients:\n") if(is.logical(symbolic.cor) && symbolic.cor) { print(symnum(correl, abbr.colnames = NULL)) } else { correl <- format(round(correl, 2), nsmall = 2, digits = digits) correl[!lower.tri(correl)] <- "" print(correl[-1, -p, drop=FALSE], quote = FALSE) } } } if(!x$converged) cat("\nNote: Algorithm did not converge.\n") if(nchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n", sep="") else cat("\n") invisible(x) } simulate.mblogit <- function(object, nsim = 1, seed = NULL, ...){ if(object$phi > 1) stop("Simulating responses from models with oversdispersion is not supported yet") if(object$response.type=="matrix" || object$from.table){ yy <- NextMethod() seed_attr <- attr(yy,"seed") nm <- nrow(yy) m <- nrow(object$D) n <- nm %/% m yy <- lapply(yy,array, dim=c(m,n), dimnames=list(rownames(object$D), NULL)) yy <- lapply(yy,t) names(yy) <- paste0("sim_",1:nsim) if(object$response.type=="matrix"){ class(yy) <- "data.frame" attr(yy,"row.names") <- rownames(object$model) attr(yy,"seed") <- seed_attr return(yy) } else { ij <- attr(object$model,"ij") n <- nrow(ij) yy <- lapply(yy,"[",ij) yy <- as.data.frame(yy) attr(yy,"seed") <- seed_attr return(yy) } } else { # response.type == "factor" probs <- object$fitted.values response <- model.response(object$model) nm <- length(probs) m <- nrow(object$D) n <- nm %/% m dim(probs) <- c(m,n) yy <- sample_factor(probs,nsim=nsim,seed=seed) seed_attr <- attr(yy,"seed") colnames(yy) <- paste0("sim_",1:nsim) rownames(yy) <- rownames(object$model) yy <- as.data.frame(yy) yy <- lapply(yy,factor,labels=levels(response)) yy <- as.data.frame(yy) attr(yy,"seed") <- seed_attr return(yy) } } simulate.mmblogit <- function(object, nsim = 1, seed = NULL, ...) stop("Simulating responses from random-effects models is not supported yet") sample_factor <- function(probs, nsim =1, seed = NULL, ...){ if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1) if (is.null(seed)) RNGstate <- get(".Random.seed", envir = .GlobalEnv) else { R.seed <- get(".Random.seed", envir = .GlobalEnv) set.seed(seed) RNGstate <- structure(seed, kind = as.list(RNGkind())) on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv)) } yy <- apply(probs,2,sample.int,size=nsim,n=nrow(probs),replace=TRUE) yy <- t(yy) attr(yy,"seed") <- RNGstate return(yy) } lenuniq <- function(x) length(unique(x)) predict.mmblogit <- function(object, newdata=NULL,type=c("link","response"),se.fit=FALSE, conditional=TRUE, ...){ type <- match.arg(type) rhs <- object$formula[-2] random <- object$random if(missing(newdata)){ mf <- object$model na.act <- object$na.action rmf <- mf } else{ mf <- model.frame(rhs,data=newdata,na.action=na.exclude) rnd <- object$random for(i in seq_along(rnd)){ rf_i <- random2formula(rnd[[i]]) if(i == 1) rfo <- rf_i else rfo <- c_formulae(rfo,rf_i) } rmf <- model.frame(rfo,data=newdata,na.action=na.exclude) na.act <- attr(mf,"na.action") } X <- model.matrix(rhs,mf, contrasts.arg=object$contrasts, xlev=object$xlevels ) D <- object$D XD <- X%x%D eta <- c(XD %*% coef(object)) if(object$method=="PQL" && conditional){ rf <- lapply(random,"[[","formula") rt <- lapply(rf,terms) suppressWarnings(Z <- lapply(rt,model.matrix,rmf, contrasts.arg=object$contrasts, xlev=object$xlevels)) ZD <- lapply(Z,`%x%`,D) d <- sapply(ZD,ncol) nn <- length(ZD) for(k in 1:nn){ colnames(ZD[[k]]) <- paste0(rep(colnames(D),ncol(Z[[k]])), "~", rep(colnames(Z[[k]]),each=ncol(D))) colnames(ZD[[k]]) <- gsub("(Intercept)","1",colnames(ZD[[k]]),fixed=TRUE) } orig.groups <- object$groups olevels <- lapply(orig.groups,levels) randstruct <- lapply(1:nn,function(k){ group.labels <- random[[k]]$groups groups <- rmf[group.labels] groups <- lapply(groups,as.factor) nlev <- length(groups) if(nlev > 1){ for(i in 2:nlev){ groups[[i]] <- interaction(groups[c(i-1,i)]) group.labels[i] <- paste(group.labels[i-1],group.labels[i],sep=":") } } groups <- lapply(groups,rep,each=nrow(D)) olevels <- olevels[group.labels] groups <- Map(factor,x=groups,levels=olevels) VarCov.names.k <- rep(list(colnames(ZD[[k]])),nlev) ZD_k <- lapply(groups,mkZ,rX=ZD[[k]]) d <- rep(d[k],nlev) names(groups) <- group.labels list(ZD_k,groups,d,VarCov.names.k) }) ZD <- lapply(randstruct,`[[`,1) groups <- lapply(randstruct,`[[`,2) ZD <- unlist(ZD,recursive=FALSE) d <- lapply(randstruct,`[[`,3) groups <- unlist(groups,recursive=FALSE) d <- unlist(d) ZD <- blockMatrix(ZD) b <- object$random.effects nlev <- length(ZD) for(k in 1:nlev) eta <- eta + as.vector(ZD[[k]]%*%b[[k]]) } rspmat <- function(x){ y <- t(matrix(x,nrow=nrow(D))) colnames(y) <- rownames(D) y } eta <- rspmat(eta) nvar <- ncol(X) nobs <- nrow(X) if(se.fit || type=="response"){ exp.eta <- exp(eta) sum.exp.eta <- rowSums(exp.eta) p <- exp.eta/sum.exp.eta } if(se.fit){ ncat <- ncol(p) W <- Matrix(0,nrow=nobs*ncat,ncol=nobs) i <- seq.int(ncat*nobs) j <- rep(1:nobs,each=ncat) pv <- as.vector(t(p)) W[cbind(i,j)] <- pv W <- Diagonal(x=pv)-tcrossprod(W) WX <- W%*%XD if(object$method=="PQL"){ WZ <- bMatProd(W,ZD) H <- object$info.fixed.random K <- solve(H) } } if(type=="response") { if(se.fit){ if(object$method=="PQL" && conditional){ WXZ <- structure(cbind(blockMatrix(WX),WZ),class="blockMatrix") var.p <- bMatProd(WXZ,K) var.p <- Map(`*`,WXZ,var.p) var.p <- lapply(var.p,rowSums) var.p <- Reduce(`+`,var.p) } else { vcov.coef <- vcov(object) var.p <- rowSums(WX*(WX%*%vcov.coef)) } se.p <- sqrt(var.p) se.p <- rspmat(se.p) if(is.null(na.act)) list(fit=p,se.fit=se.p) else list(fit=napredict(na.act,p), se.fit=napredict(na.act,se.p)) } else{ if(is.null(na.act)) p else napredict(na.act,p) } } else { eta <- eta[,-1,drop=FALSE] if(se.fit){ if(object$method=="PQL" && conditional){ XZ <- structure(cbind(blockMatrix(XD),ZD),class="blockMatrix") var.eta <- bMatProd(XZ,K) var.eta <- Map(`*`,XZ,var.eta) var.eta <- lapply(var.eta,rowSums) var.eta <- Reduce(`+`,var.eta) } else { vcov.coef <- vcov(object) var.eta <- rowSums(XD*(XD%*%vcov.coef)) } se.eta <- sqrt(var.eta) se.eta <- rspmat(se.eta) se.eta <- se.eta[,-1,drop=FALSE] if(is.null(na.act)) list(fit=eta,se.fit=se.eta) else list(fit=napredict(na.act,eta), se.fit=napredict(na.act,se.eta)) } else { if(is.null(na.act)) eta else napredict(na.act,eta) } } } mclogit/R/mmclogit-fitPQLMQL.R0000644000176200001440000005432414325064251015570 0ustar liggesusersmmclogit.fitPQLMQL <- function( y, s, w, X, Z, d, start = NULL, start.Phi = NULL, start.b = NULL, offset = NULL, method = c("PQL","MQL"), estimator = c("ML","REML"), control=mmclogit.control() ){ method <- match.arg(method) estimator <- match.arg(estimator) nvar <- ncol(X) nobs <- length(y) nsets <- length(unique(s)) nlevs <- length(Z) m <- sapply(Z,ncol)/d sqrt.w <- sqrt(w) i <- 1:nobs if(!length(offset)) offset <- rep.int(0, nobs) if(length(start)){ stopifnot(length(start)==ncol(X)) eta <- c(X%*%start) + offset if(method=="PQL"){ if(length(start.b) == nlevs){ for(k in 1:nlevs) eta <- eta + as.vector(Z[[k]]%*%start.b[[k]]) } else stop("PQL requires starting values for random effects") } } else eta <- mclogitLinkInv(y,s,w) pi <- mclogitP(eta,s) dev.resids <- ifelse(y>0, 2*w*y*(log(y)-log(pi)), 0) deviance <- sum(dev.resids) # Outer iterations: update non-linear part of the model converged <- FALSE fit <- NULL do.backup <- FALSE step.truncated <- FALSE msg <- "Random effects design matrix at index %d has fewer rows than columns (%d < %d). This will almost certainly lead to noncovergence or other numerical problems. Please reconsider your model specification." for(k in 1:nlevs){ Z.k <- Z[[k]] if(nrow(Z.k) < ncol(Z.k)) warning(sprintf(msg,k,nrow(Z.k),ncol(Z.k))) } parms <- NULL last.parms <- NULL last.deviance <- deviance prev.last.deviance <- NULL last.eta <- eta model.struct <- list(y=y, s=s, nsets=nsets, nobs=nobs, i=i, w=w, sqrt.w=sqrt.w, offset=offset, X=X, Z=Z, d=d, m=m, nlevs=nlevs) parms$coefficients <- list(fixed=start, random=start.b) parms$Phi <- start.Phi for(iter in 1:control$maxit){ W <- Matrix(0,nrow=nobs,ncol=nsets) W[cbind(i,s)] <- sqrt.w*pi W <- Diagonal(x=w*pi)-tcrossprod(W) y.star <- eta - offset + (y-pi)/pi # cat("\n") # print(head(y.star)) prev.last.parms <- last.parms last.parms <- parms aux <- list(y=y.star,W=W) parms <- PQLMQL_innerFit(parms,aux,model.struct,method,estimator,control) step.back <- FALSE if(inherits(parms,"try-error")){ if(length(prev.last.deviance) && last.deviance > prev.last.deviance && length(prev.last.parms)){ # Previous step increased the deviance, so we better step back twice warning("Numeric problems in inner iteration and previous step increased deviance, stepping back twice") parms <- prev.last.parms } else { # Previous step decreased the deviance warning("Numeric problems in inner iteration, stepping back") parms <- last.parms } step.back <- TRUE } last.fit <- fit fit <- PQLMQL_eval_parms(parms,model.struct,method,estimator) deviance <- fit$deviance if(control$trace){ cat("\nIteration",iter,"- deviance =",deviance) } if(is.finite(deviance)){ if(deviance > last.deviance && control$break.on.increase){ warning("Cannot decrease the deviance, stepping back",call.=FALSE) step.back <- TRUE parms <- last.parms fit <- last.fit deviance <- fit$deviance } if(deviance < 0 && control$break.on.negative){ warning("Negative deviance, backing up",call.=FALSE) step.back <- TRUE parms <- last.parms fit <- last.fit deviance <- fit$deviance } } else if(!is.finite(deviance)){ warning("Non-finite deviance, backing up",call.=FALSE) step.back <- TRUE parms <- last.parms fit <- last.fit deviance <- fit$deviance } eta <- fit$eta pi <- fit$pi coef <- parms$coefficients$fixed Phi <- parms$Phi # print(start) # print(coef) # print(start.Phi) # print(Phi) if(step.back) { if(control$trace) cat(" - new deviance = ",deviance) break } else { if(length(last.fit)) last.eta <- last.fit$eta crit <- sum((eta - last.eta)^2) /sum(eta^2) if(control$trace) cat(" - criterion =",crit) if(crit <= control$eps){ converged <- TRUE if(control$trace) cat("\nconverged\n") break } } } if(!converged && !step.back){ # if(control$trace) cat("\n") warning("Algorithm did not converge",call.=FALSE) } if(step.back){ # if(control$trace) cat("\n") warning("Algorithm stopped without convergence",call.=FALSE) } eps <- 10*.Machine$double.eps if (any(pi < eps) || any(1-pi < eps)){ # if(control$trace) cat("\n") warning("Fitted probabilities numerically 0 or 1 occurred",call.=FALSE) } if(deviance < 0){ # if(control$trace) cat("\n") warning("Approximate deviance is negative.\nYou might be overfitting your data or the group size is too small.",call.=FALSE) } ntot <- length(y) pi0 <- mclogitP(offset,s) null.deviance <- sum(ifelse(y>0, 2*w*y*(log(y)-log(pi0)), 0)) resid.df <- length(y) - length(unique(s)) model.df <- ncol(X) + length(parms$lambda) resid.df <- resid.df - model.df return( list( coefficients = parms$coefficients$fixed, random.effects = parms$coefficients$random, VarCov = parms$Phi, lambda = parms$lambda, linear.predictors = eta, working.residuals = (y-pi)/pi, response.residuals = y-pi, df.residual = resid.df, model.df = model.df, deviance=deviance, deviance.residuals=dev.resids, null.deviance=null.deviance, method = method, estimator = estimator, iter = iter, y = y, s = s, offset = offset, converged = converged, control=control, info.coef = parms$info.fixed, info.fixed.random = parms$info.fixed.random, info.lambda = parms$info.lambda, info.psi = parms$info.psi )) } matrank <- function(x) { qr(x)$rank } PQLMQL_innerFit <- function(parms,aux,model.struct,method,estimator,control){ m <- model.struct$m d <- model.struct$d nlevs <- model.struct$nlevs X <- model.struct$X Z <- model.struct$Z y <- aux$y W <- aux$W # Naive starting values Wy <- W%*%y WX <- W%*%X XWX <- crossprod(X,WX) XWy <- crossprod(X,Wy) yWy <- crossprod(y,Wy) alpha.start <- parms$coefficients$fixed Phi.start <- parms$Phi if(!length(alpha.start)) alpha.start <- solve(XWX,XWy) y_Xalpha <- as.vector(y - X%*%alpha.start) if(!length(Phi.start)){ Phi.start <- list() for(k in 1:nlevs){ Z.k <- Z[[k]] ZZ.k <- crossprod(Z.k) Zy_Xa.k <- crossprod(Z.k,y_Xalpha) ZZ.k <- ZZ.k + Diagonal(ncol(ZZ.k)) b.k <- solve(ZZ.k,Zy_Xa.k) m.k <- m[k] d.k <- d[k] dim(b.k) <- c(d.k,m.k) S.k <- tcrossprod(b.k) if(matrank(S.k) < d.k){ #warning(sprintf("Singular initial covariance matrix at level %d in inner fitting routine",k)) S.k <- diag(S.k) S.k <- diag(x=S.k,nrow=d) } Phi.start[[k]] <- S.k/(m.k-1) } } Psi.start <- lapply(Phi.start,safeInverse) Lambda.start <- lapply(Psi.start,chol) lambda.start <- unlist(lapply(Lambda.start,uvech)) WZ <- bMatProd(W,Z) ZWZ <- bMatCrsProd(WZ,Z) ZWX <- bMatCrsProd(WZ,X) ZWy <- bMatCrsProd(WZ,y) aux <- list(yWy=yWy, XWy=XWy, ZWy=ZWy, XWX=XWX, ZWX=ZWX, ZWZ=ZWZ) if(control$trace.inner) cat("\n") devfunc <- function(lambda) -2*as.vector(PQLMQL_pseudoLogLik(lambda, model.struct=model.struct, estimator=estimator, aux=aux)$logLik) gradfunc <- function(lambda) -2*as.vector(PQLMQL_pseudoLogLik(lambda, model.struct=model.struct, estimator=estimator, aux=aux, gradient=TRUE)$gradient) res.port <- nlminb(lambda.start, objective = devfunc, gradient = gradfunc, control = list(trace = as.integer(control$trace.inner)) ) if(res.port$convergence != 0){ warning(sprintf("Inner iterations did not coverge - nlminb message: %s",res.port$message), call.=FALSE,immediate.=TRUE) } lambda <- res.port$par # 'nlminb' seems to be more stable - but this allows to check the analyticals. # # dev_f <- function(lambda){ # res <- PQLMQL_pseudoLogLik(lambda, # model.struct=model.struct, # estimator=estimator, # aux=aux, # gradient=TRUE) # structure(-2*res$logLik, # gradient=-2*res$gradient) # } # # res.nlm <- nlm(f=dev_f,p=lambda.start,hessian=TRUE,check.analyticals=TRUE, # print.level=if(control$trace.inner) 2 else 0) # # if(res.nlm$code > 2){ # nlm.messages <- c("","", # paste("Last global step failed to locate a point lower than", # "'estimate'. Either 'estimate' is an approximate local", # "minimum of the function or 'steptol' is too small.",sep="\n"), # "Iteration limit exceeded.", # paste("Maximum step size 'stepmax' exceeded five consecutive", # "times. Either the function is unbounded below, becomes", # "asymptotic to a finite value from above in some direction", # "or 'stepmax' is too small.",sep="\n")) # retcode <- res.nlm$code # cat("\n") # warning(sprintf("Inner iterations failed to coverge - nlm code indicates: %s", # nlm.messages[retcode]), # call.=FALSE,immediate.=TRUE) # } # lambda <- res.nlm$estimate info.varPar <- PQLMQL_pseudoLogLik(lambda, model.struct=model.struct, estimator=estimator, aux=aux, info.lambda=TRUE, info.psi=TRUE)$info Lambda <- lambda2Mat(lambda,m,d) Psi <- lapply(Lambda,crossprod) iSigma <- Psi2iSigma(Psi,m) Phi <- lapply(Psi,safeInverse) ZWZiSigma <- ZWZ + iSigma K <- solve(ZWZiSigma) log.det.iSigma <- Lambda2log.det.iSigma(Lambda,m) log.det.ZWZiSigma <- 2*sum(log(diag(chol_blockMatrix(ZWZiSigma,resplit=FALSE)))) XiVX <- XWX - fuseMat(bMatCrsProd(ZWX,bMatProd(K,ZWX))) XiVy <- XWy - fuseMat(bMatCrsProd(ZWX,bMatProd(K,ZWy))) alpha <- solve(XiVX,XiVy) alpha <- drop(as.matrix(alpha)) b <- bMatProd(K,ZWy-bMatProd(ZWX,alpha)) b[] <- lapply(b[],as.matrix) XZWiSZX <- structure(rbind(cbind(blockMatrix(XWX),bMatTrns(ZWX)), cbind(ZWX,ZWZiSigma)),class="blockMatrix") list( lambda = lambda, coefficients = list(fixed = alpha, random = b), Psi = Psi, Phi = Phi, info.fixed = as.matrix(XiVX), info.fixed.random = XZWiSZX, info.lambda = info.varPar$lambda, info.psi = info.varPar$psi, log.det.iSigma = log.det.iSigma, log.det.ZiVZ = log.det.ZWZiSigma, ZiVZ = ZWZiSigma ) } PQLMQL_eval_parms <- function(parms,model.struct,method,estimator){ nlevs <- model.struct$nlevs d <- model.struct$d s <- model.struct$s y <- model.struct$y w <- model.struct$w X <- model.struct$X Z <- model.struct$Z offset <- model.struct$offset alpha <- parms$coefficients$fixed b <- parms$coefficients$random Psi <- parms$Psi ZiVZ <- parms$ZiVZ eta <- as.vector(X%*%alpha) + offset if(method=="PQL"){ rand.ssq <- 0 for(k in 1:nlevs){ eta <- eta + as.vector(Z[[k]]%*%b[[k]]) B.k <- matrix(b[[k]],nrow=d[k]) Psi.k <- Psi[[k]] rand.ssq <- rand.ssq + sum(B.k * (Psi.k%*%B.k)) } } else { b_ <- blockMatrix(b,nrow=nlevs) rand.ssq <- as.vector(fuseMat(bMatCrsProd(b_,bMatProd(ZiVZ,b_)))) } pi <- mclogitP(eta,s) dev.resids <- ifelse(y>0, 2*w*y*(log(y)-log(pi)), 0) deviance <- -parms$log.det.iSigma + parms$log.det.ZiVZ + sum(dev.resids) + rand.ssq list( eta = eta, pi = pi, deviance = deviance ) } PQLMQL_pseudoLogLik <- function(lambda, model.struct, estimator, aux, gradient=FALSE, info.lambda=FALSE, info.psi=FALSE ){ nlevs <- model.struct$nlevs d <- model.struct$d m <- model.struct$m yWy <- aux$yWy XWy <- aux$XWy ZWy <- aux$ZWy XWX <- aux$XWX ZWX <- aux$ZWX ZWZ <- aux$ZWZ Lambda <- lambda2Mat(lambda,m,d) Psi <- lapply(Lambda,crossprod) iSigma <- Psi2iSigma(Psi,m) H <- ZWZ + iSigma K <- solve(H) XiVX <- XWX - fuseMat(bMatCrsProd(ZWX,bMatProd(K,ZWX))) XiVy <- XWy - fuseMat(bMatCrsProd(ZWX,bMatProd(K,ZWy))) XiVX <- symmpart(XiVX) alpha <- solve(XiVX,XiVy) b <- bMatProd(K,ZWy-bMatProd(ZWX,alpha)) y.aXiVXa.y <- yWy - crossprod(XWy,alpha) - fuseMat(bMatCrsProd(ZWy,b)) log.det.iSigma <- Lambda2log.det.iSigma(Lambda,m) log.det.H <- 2*sum(log(diag(chol_blockMatrix(H,resplit=FALSE)))) logLik <- (log.det.iSigma - log.det.H - y.aXiVXa.y)/2 if(estimator == "REML"){ log.det.XiVX <- log.Det(XiVX) logLik <- logLik - log.det.XiVX/2 } res <- list( logLik=as.vector(logLik), coefficients=as.vector(alpha), random.effects=b, Psi=Psi ) if(gradient || info.lambda || info.psi){ if(estimator=="REML"){ iA <- solve(XiVX) XWZK <- bMatCrsProd(ZWX,K) iAXWZK <- bMatProd(blockMatrix(iA),XWZK) M <- bMatCrsProd(XWZK,iAXWZK) } } if(gradient){ if(estimator=="REML"){ K <- K + M } Phi <- lapply(Psi,safeInverse) S <- mapply(v_bCrossprod,b,d,SIMPLIFY=FALSE) K.kk <- diag(K) SumK.k <- mapply(sum_blockDiag,K.kk,d,SIMPLIFY=FALSE) Gr <- list() for(k in 1:nlevs) Gr[[k]] <- Lambda[[k]]%*%(m[k]*Phi[[k]] - SumK.k[[k]] - S[[k]]) res$gradient <- unlist(lapply(Gr,uvech)) } if(info.lambda || info.psi){ res$info <- list() T <- iSigma - K if(estimator=="REML"){ T <- T - M } if(info.lambda){ G.lambda <- d.psi.d.lambda(Lambda) I.lambda <- blockMatrix(list(matrix(0,0,0)),nlevs,nlevs) } if(info.psi) I.psi <- blockMatrix(list(matrix(0,0,0)),nlevs,nlevs) for(k in 1:nlevs){ T.k <- T[[k,k]] B.kk <- block_kronSum(T.k,m[k],m[k]) if(info.lambda){ G.k <- G.lambda[[k]] I.lambda[[k,k]] <- crossprod(G.k,B.kk%*%G.k) } if(info.psi){ I.psi[[k,k]] <- B.kk/2 } if(k < nlevs){ for(k_ in seq(from=k+1,to=nlevs)){ T.kk_ <- T[[k,k_]] B.kk_ <- block_kronSum(T.kk_,m[k],m[k_]) if(info.lambda){ G.k_ <- G.lambda[[k_]] I.lambda[[k,k_]] <- crossprod(G.k,B.kk_%*%G.k_) I.lambda[[k_,k]] <- t(I.lambda[[k,k_]]) } if(info.psi){ I.psi[[k,k_]] <- B.kk_/2 I.psi[[k_,k]] <- t(I.psi[[k,k_]]) } } } } if(info.lambda) res$info$lambda <- as.matrix(fuseMat(I.lambda)) if(info.psi) res$info$psi <- as.matrix(fuseMat(I.psi)) } return(res) } vech <- function(x) x[lower.tri(x,diag=TRUE)] setVech <- function(x,v) { ij <- lower.tri(x,diag=TRUE) x[ij] <- v x <- t(x) x[ij] <- v x } uvech <- function(x) x[upper.tri(x,diag=TRUE)] set_uvech <- function(x,v,symm=FALSE) { ij <- upper.tri(x,diag=TRUE) x[ij] <- v if(symm){ x <- t(x) x[ij] <- v } x } lambda2Mat <- function(lambda,m,d){ nlevs <- length(m) dd2 <- d*(d+1)/2 lambda <- split_(lambda,dd2) D <- lapply(d,diag) Map(set_uvech,D,lambda) } Psi2iSigma <- function(Psi,m){ iSigma <- mapply(mk.iSigma.k,Psi,m,SIMPLIFY=FALSE) blockDiag(iSigma) } mk.iSigma.k <- function(Psi,m){ Diagonal(m) %x% Psi } split_ <- function(x,d){ m <- length(x) n <- length(d) i <- rep(1:n,d) split(x,i) } mmclogit.control <- function( epsilon = 1e-08, maxit = 25, trace = TRUE, trace.inner = FALSE, avoid.increase = FALSE, break.on.increase = FALSE, break.on.infinite = FALSE, break.on.negative = FALSE ) { if (!is.numeric(epsilon) || epsilon <= 0) stop("value of epsilon must be > 0") if (!is.numeric(maxit) || maxit <= 0) stop("maximum number of iterations must be > 0") list(epsilon = epsilon, maxit = maxit, trace = trace, trace.inner = trace.inner, avoid.increase = avoid.increase, break.on.increase = break.on.increase, break.on.infinite = break.on.infinite, break.on.negative = break.on.negative ) } split_bdiag1 <- function(x,n){ m0 <- ncol(x) stopifnot(nrow(x)==m0) m <- m0%/%n i <- rep(1:m,each=n) j <- rep(1:m0) j <- split(j,i) y <- list() for(k in 1:m){ j.k <- j[[k]] y[[k]] <- x[j.k,j.k] } y } split_bdiag <- function(x,d){ m <- length(d) n <- ncol(x) s <- 1:m s <- rep(s,d) j <- 1:n j <- split(j,s) y <- list() for(k in 1:m){ j.k <- j[[k]] y[[k]] <- x[j.k,j.k] } y } se_Phi <- function(Phi,info.lambda){ d <- sapply(Phi,ncol) dd2 <- d*(d+1)/2 info.lambda <- split_bdiag(info.lambda,dd2) Map(se_Phi_,Phi,info.lambda) } block_kronSum <- function(A,m1,m2){ nr <- nrow(A) nc <- ncol(A) d1 <- nr%/%m1 d2 <- nc%/%m2 A <- as.array(A) dim(A) <- c(d1,m1,d2,m2) A <- aperm(A,c(2,4,1,3)) # dim = m1,m2,d1,d2 dim(A) <- c(m1*m2,d1*d2) B <- crossprod(A) # dim = d1*d2,d1*d2 dim(B) <- c(d1,d2,d1,d2) B <- aperm(B,c(1,3,2,4)) # dim = d1,d1,d2,d2 dim(B) <- c(d1*d1,d2*d2) return(B) } d.psi.d.lambda <- function(Lambda) { lapply(Lambda,d.psi.d.lambda.1) } d.psi.d.lambda.1 <- function(Lambda){ d <- ncol(Lambda) d_2 <- d*(d+1)/2 G <- array(0,c(d,d,d,d)) g <- rep(1:d,d*d*d) h <- rep(1:d,each=d,d*d) i <- rep(1:d,each=d*d,d) j <- rep(1:d,each=d*d*d) delta <- diag(d) G[cbind(g,h,i,j)] <- delta[cbind(g,j)]*Lambda[cbind(i,h)] + Lambda[cbind(i,g)]*delta[cbind(h,j)] dim(G) <- c(d*d,d*d) keep.lambda <- as.vector(upper.tri(Lambda,diag=TRUE)) G[,keep.lambda] } se_Phi_ <- function(Phi,info.lambda){ d <- ncol(Phi) Psi <- solve(Phi) Lambda <- chol(Psi) G <- d.psi.d.lambda.1(Lambda) vcov.lambda <- solve(info.lambda) vcov.psi <- G%*%tcrossprod(vcov.lambda,G) PhiPhi <- Phi%x%Phi vcov.phi <- PhiPhi%*%vcov.psi%*%PhiPhi se.phi <- sqrt(diag(vcov.phi)) matrix(se.phi,d,d,dimnames=dimnames(Phi)) } Lambda2log.det.iSigma <- function(Lambda,m){ res <- Map(Lambda2log.det.iSigma_1,Lambda,m) sum(unlist(res)) } Lambda2log.det.iSigma_1 <- function(Lambda,m){ dLambda <- diag(Lambda) if(any(dLambda < 0)){ Psi <- crossprod(Lambda) svd.Psi <- svd(Psi) dLambda <- svd.Psi$d/2 } 2*m*sum(log(dLambda)) } reff <- function(object){ b <- object$random.effects Phi <- object$VarCov nlev <- length(b) B <- list() for(k in 1:nlev){ d <- ncol(Phi[[k]]) B_k <- matrix(b[[k]],nrow=d) B_k <- t(B_k) colnames(B_k) <- colnames(Phi[[k]]) B[[k]] <- B_k } B } mclogit/R/formula-utils.R0000644000176200001440000000131114170652133015035 0ustar liggesusers# Deparse into a single string deparse0 <- function(formula) paste(trimws(deparse(formula)),collapse=" ") # Concatenate two formulae c_formulae <- function(formula,extra){ formula.deparsed <- deparse0(formula) extra.deparsed <- sub("~","+",deparse0(extra)) as.formula(paste(formula.deparsed, extra.deparsed), env=environment(formula)) } # Check if formula is_formula <- function(x)inherits(x,"formula") # Subtitute "|" with "+" random2formula <- function(r) { formula.deparsed <- deparse0(r$formula) gf <- paste(r$groups,collapse="+") as.formula(paste(formula.deparsed, gf,sep="+"), env=environment(r$formula)) } mclogit/R/saveInverse.R0000644000176200001440000000111514320404511014516 0ustar liggesuserssafeInverse <- function(x,tol=1e-7){ tryCatch(solve(x), error=function(e){ warning(e$message,call.=FALSE,immediate.=TRUE) warning("saveInverse: Using Moore-Penrose inverse",call.=FALSE,immediate.=TRUE) moore.penrose(x,tol=tol) }) } mach.eps <- .Machine$double.eps moore.penrose <- function(x,tol=mach.eps*max(dim(x))*max(abs(d))){ svd.x <- svd(x) d <- svd.x$d u <- svd.x$u v <- svd.x$v good <- abs(d) > tol id <- 1/d id[!good] <- 0 v %*% diag(id,nrow=length(id)) %*% t(u) } mclogit/R/emmeans.R0000644000176200001440000000157314036645075013701 0ustar liggesusers## Added by Russel V. Lenth ### emmeans support for mblogit and mmblogit models recover_data.mblogit <- function(object, ...) { rd <- get("recover_data.multinom", asNamespace("emmeans")) rd(object, ...) } emm_basis.mblogit <- function(object, trms, xlev, grid, mode = c("prob", "latent"), vcov., ...) { object$coefficients <- object$coefmat object$lev <- levels(object$model[[1]]) object$edf <- Inf # we have to rearrange the vcov elements in row-major order if(missing(vcov.)) vcov. <- vcov(object) perm <- matrix(seq_along(as.numeric(object$coefmat)), ncol = ncol(object$coefmat)) perm <- as.numeric(t(perm)) vcov. <- vcov.[perm, perm] emb <- get("emm_basis.multinom", asNamespace("emmeans")) emb(object, trms = trms, xlev = xlev, grid = grid, mode = mode, vcov. = vcov., ...) } mclogit/R/getSummary-mclogit.R0000644000176200001440000000667314020755202016036 0ustar liggesusersgetSummary.mclogit <- function(obj, alpha=.05, rearrange=NULL, ...){ smry <- summary(obj) N <- obj$N coef <- smry$coefficients varPar <- smry$varPar lower.cf <- qnorm(p=alpha/2,mean=coef[,1],sd=coef[,2]) upper.cf <- qnorm(p=1-alpha/2,mean=coef[,1],sd=coef[,2]) coef <- cbind(coef,lower.cf,upper.cf) colnames(coef) <- c("est","se","stat","p","lwr","upr") if(length(rearrange)){ coef.grps <- lapply(rearrange,function(ii){ if(is.character(ii) && !all(ii %in% rownames(coef))) stop("coefficient(s) ",dQuote(unname(ii[!(ii %in% rownames(coef))]))," do not exist") structure(coef[ii,], dimnames=list(names(ii),dimnames(coef)[[2]]) ) }) grp.titles <- names(rearrange) coef.grps <- do.call(memisc::collect,coef.grps) coef <- array(NA,dim=c( dim(coef.grps)[1] + NROW(varPar), dim(coef.grps)[2], dim(coef.grps)[3] )) coef[seq(dim(coef.grps)[1]),,] <- coef.grps if(length(varPar)) coef[dim(coef.grps)[1]+seq(nrow(varPar)),,1] <- varPar dimnames(coef) <- list( c(dimnames(coef.grps)[[1]],rownames(varPar)), dimnames(coef.grps)[[2]], grp.titles ) } VarPar <- NULL VarCov <- smry$VarCov se_VarCov <- smry$se_VarCov for(i in seq_along(VarCov)){ lv.i <- names(VarCov)[i] vc.i <- VarCov[[i]] vr.i <- diag(vc.i) cv.i <- vc.i[lower.tri(vc.i)] se_vc.i <- se_VarCov[[i]] se_vr.i <- diag(se_vc.i) se_cv.i <- se_vc.i[lower.tri(se_vc.i)] nms.i <- rownames(vc.i) nms.i <- gsub("(Intercept)","1",nms.i,fixed=TRUE) vrnames.i <- paste0("Var(~",nms.i,"|",lv.i,")") cvnames.i <- t(outer(nms.i,nms.i,FUN=paste,sep=":")) cvnames.i <- cvnames.i[lower.tri(cvnames.i)] if(length(cvnames.i)) cvnames.i <- paste0("Cov(~",cvnames.i,"|",lv.i,")") vp.i <- matrix(NA,nrow=length(vr.i)+length(cv.i),ncol=6) vp.i[,1] <- c(vr.i,cv.i) vp.i[,2] <- c(se_vr.i,se_cv.i) dimnames(vp.i) <- list(c(vrnames.i,cvnames.i), c("est","se","stat","p","lwr","upr")) VarPar <- c(VarPar,structure(list(vp.i),names=lv.i)) } phi <- smry$phi LR <- smry$null.deviance - smry$deviance df <- obj$model.df deviance <- deviance(obj) if(df > 0){ p <- pchisq(LR,df,lower.tail=FALSE) L0.pwr <- exp(-smry$null.deviance/N) LM.pwr <- exp(-smry$deviance/N) McFadden <- 1- smry$deviance/smry$null.deviance Cox.Snell <- 1 - exp(-LR/N) Nagelkerke <- Cox.Snell/(1-L0.pwr) } else { LR <- NA df <- NA p <- NA McFadden <- NA Cox.Snell <- NA Nagelkerke <- NA } ll <- obj$ll AIC <- AIC(obj) BIC <- AIC(obj,k=log(N)) sumstat <- c( phi = phi, LR = LR, df = df, #p = p, logLik = ll, deviance = deviance, McFadden = McFadden, Cox.Snell = Cox.Snell, Nagelkerke = Nagelkerke, AIC = AIC, BIC = BIC, N = N ) ans <- list(coef= coef) ans <- c(ans,VarPar) if(length(smry$ngrps)){ G <-as.integer(smry$ngrps) names(G) <- names(smry$ngrps) names(G) <- paste("Groups by",names(G)) ans <- c(ans,list(Groups=G)) } c(ans, list(sumstat=sumstat, call=obj$call, contrasts = obj$contrasts, xlevels = obj$xlevels)) } getSummary.mmclogit <- getSummary.mclogit mclogit/R/getSummary-mblogit.R0000644000176200001440000001137614024746157016046 0ustar liggesusersrbind_list <- function(x) do.call(rbind,x) getSummary.mblogit <- function(obj, alpha=.05, ...){ smry <- summary(obj) N <- obj$N coef <- smry$coefficients lower.cf <- qnorm(p=alpha/2,mean=coef[,1],sd=coef[,2]) upper.cf <- qnorm(p=1-alpha/2,mean=coef[,1],sd=coef[,2]) coef <- cbind(coef,lower.cf,upper.cf) ttl <- c("est","se","stat","p","lwr","upr") colnames(coef) <- ttl modcat <- colnames(obj$D) basecat <- rownames(obj$D)[rownames(obj$D)%nin%modcat] eqs <- paste0(modcat,"~") rn.coef <- rownames(coef) coef.grps <- lapply(eqs,function(eq){ ii <- grep(eq,rn.coef,fixed=TRUE) coef.grp <- coef[ii,,drop=FALSE] rownames(coef.grp) <- gsub(eq,"",rownames(coef.grp),fixed=TRUE) coef.grp }) if(getOption("mblogit.show.basecat",TRUE)) grp.titles <- paste(modcat,basecat,sep=getOption("mblogit.basecat.sep","/")) else grp.titles <- modcat names(coef.grps) <- grp.titles coef <- do.call(memisc::collect,coef.grps) VarPar <- NULL VarCov <- smry$VarCov se_VarCov <- smry$se_VarCov n.eq <- length(eqs) for(i in seq_along(VarCov)){ lv.i <- names(VarCov)[i] vc.i <- VarCov[[i]] se_vc.i <- se_VarCov[[i]] vp.i <- array(NA,c( nrow(vc.i), ncol(vc.i), 6 )) vp.i[,,1] <- vc.i vp.i[,,2] <- se_vc.i m.i <- ncol(vc.i) %/% n.eq d <- c(n.eq,m.i) dim(vp.i) <- c(d,d,6) vn.i <- colnames(vc.i) vn.i <- strsplit(vn.i,"~") vn.i <- unique(sapply(vn.i,"[",2)) dn <- list(eqs,vn.i) dimnames(vp.i) <- c(dn,dn,list(ttl)) vp.i.arr <- aperm(vp.i,c(4,2,3,1,5)) # vp.i <- lapply(eqs,function(eq){ # ii <- grep(eq,dn.4,fixed=TRUE) # browser() # vp.i.grp <- vp.i[,,,ii,,drop=FALSE] # nr.i.g <- nrow(vp.i.grp) # nc.i.g <- ncol(vp.i.grp) # dn1.i.grp <- dimnames(vp.i.grp)[[1]] # dn2.i.grp <- dimnames(vp.i.grp)[[2]] # dn2.i.grp <- gsub(eq,"~",dn2.i.grp,fixed=TRUE) # dn3.i.grp <- dimnames(vp.i.grp)[[3]] # dim(vp.i.grp) <- c(nr.i.g*nc.i.g,6) # rn.i.g.1 <- rep(dn1.i.grp,nc.i.g) # rn.i.g.2 <- rep(dn2.i.grp,each=nr.i.g) # #rn.i.g <- ifelse(dn1.i.grp == dn2.i.grp,"Var","Cov") # rn.i.g <- paste0(rn.i.g.1,",",rn.i.g.2) # rownames(vp.i.grp) <- rn.i.g # colnames(vp.i.grp) <- dn3.i.grp # vp.i.grp # }) vp.i_ <- matrix(list(NULL),n.eq,n.eq) for(j in 1:n.eq){ for(k in 1:n.eq){ vp.ijk <- vp.i.arr[,,j,k,] dim(vp.ijk) <- c(m.i^2,6) rn.i.1 <- rep(vn.i,m.i) rn.i.2 <- rep(vn.i,each=m.i) jk.1 <- rep(1:m.i,m.i) jk.2 <- rep(1:m.i,each=m.i) rownames(vp.ijk) <- paste0("VCov(~",rn.i.1,",","~",rn.i.2,")") rownames(vp.ijk)[1] <- paste0(grp.titles[j],": ",rownames(vp.ijk)[1]) rownames(vp.ijk) <- format(rownames(vp.ijk),justify="right") colnames(vp.ijk) <- ttl ii <- c(which(jk.1==jk.2),which(jk.1 < jk.2)) ii <- which(jk.1<=jk.2) vp.ijk <- vp.ijk[ii,,drop=FALSE] vp.i_[[j,k]] <- vp.ijk } } vp.i_ <- lapply(1:n.eq,function(j)do.call(rbind,vp.i_[,j])) vp.i <- list() #vp.i <- array(NA,c(dim(vp.i_[[1]]),n.eq),dimnames=c(dimnames(vp.i_[[1]]),list(grp.titles))) vp.i <- array(NA,c(dim(vp.i_[[1]]),n.eq),dimnames=c(dimnames(vp.i_[[1]]),list(NULL))) for(j in 1:n.eq) vp.i[,,j] <- vp.i_[[j]] VarPar <- c(VarPar,structure(list(vp.i),names=lv.i)) } phi <- smry$phi LR <- smry$null.deviance - smry$deviance df <- obj$model.df deviance <- deviance(obj) if(df > 0){ p <- pchisq(LR,df,lower.tail=FALSE) L0.pwr <- exp(-smry$null.deviance/N) LM.pwr <- exp(-smry$deviance/N) McFadden <- 1- smry$deviance/smry$null.deviance Cox.Snell <- 1 - exp(-LR/N) Nagelkerke <- Cox.Snell/(1-L0.pwr) } else { LR <- NA df <- NA p <- NA McFadden <- NA Cox.Snell <- NA Nagelkerke <- NA } ll <- obj$ll AIC <- AIC(obj) BIC <- AIC(obj,k=log(N)) sumstat <- c( phi = phi, LR = LR, df = df, #p = p, logLik = ll, deviance = deviance, McFadden = McFadden, Cox.Snell = Cox.Snell, Nagelkerke = Nagelkerke, AIC = AIC, BIC = BIC, N = N ) ans <- list(coef= coef) ans <- c(ans,VarPar) if(length(smry$ngrps)){ G <-as.integer(smry$ngrps) names(G) <- names(smry$ngrps) names(G) <- paste("Groups by",names(G)) ans <- c(ans,list(Groups=G)) } c(ans, list(sumstat=sumstat, call=obj$call, contrasts = obj$contrasts, xlevels = obj$xlevels)) } getSummary.mmblogit <- getSummary.mblogit mclogit/MD50000644000176200001440000000332514326453475012240 0ustar liggesusers9be298f9be2d4b6b409be78551a77f65 *DESCRIPTION 253457c7632c75e465a2a23d604b0a49 *NAMESPACE 5f46b25a049e81e86774e78cd294ff89 *R/AIC-mclogit.R 7b1e9ee3e3ef8d915b8cd2485db79b62 *R/anova-mclogit.R 93500a8be28832c880c99ea0bbfcbb24 *R/blockMatrices.R dacab808f8b9ce42192cf24ea874652a *R/emmeans.R fa959f9bebaf365d4cf273e33f4cebde *R/formula-utils.R e83609e43103c2645274bd60bd3badf0 *R/getSummary-mblogit.R a78012b2522420f4e4fcc38760d5fe85 *R/getSummary-mclogit.R a38bac836039c90f4637b1269c448964 *R/mblogit.R f814c3a2ac1bd0a7a0d953085bb68f4d *R/mclogit-dispersion.R 1f8f50a6db07fe26eb1190cc59642eac *R/mclogit-fit.R 3e06e00e546b9eb85b94d1b81a5e0f74 *R/mclogit.R 027a7d563d0039dae11ad13d24734598 *R/mmclogit-fitPQLMQL.R cf1424f2f8fe57773bdc1eb775e77a04 *R/saveInverse.R 457a0140aed815ca117ea94b2599e3b2 *R/zzz.R 0c6ad5b3446721162c89589e8077ab1c *build/partial.rdb 7ff125cb6371c22de9f418f468626741 *data/Transport.rda 8920f0fb2baa8640629a74bef8724019 *data/electors.rda ed465db40af36367e177732466dfb56b *demo/00Index 8d2478382a70e25be6cf209394748af2 *demo/mclogit.test.R 40dc38ba49744a8f05ca9463b8e54c28 *demo/test-mblogit-random-nonnested.R 36ae67dc84ef302f2ac1f439fbdf1543 *inst/ChangeLog 836e45b4216811309fd5e40d51e3ce7e *inst/NEWS.Rd c05ff442f7a2312ff8f3d6b5aa96ddeb *man/Transport.Rd 0fa123411869d0d1646b5b58b5a9ad6d *man/dispersion.Rd a32b2bef83b21794f54056f861ae7c7d *man/electors.Rd 63859c49419644624290e47c206e1087 *man/getSummary-mclogit.Rd 2dd38f9fb3d055115b64bb9d90e67ef4 *man/mblogit.Rd 4519c99161b73222b43bdd93c922db3f *man/mclogit.Rd 891577742ce9dd7e195f188568767175 *man/mclogit.fit.Rd d5ede3a30233cb659a85d69bced029da *man/mclogit_control.Rd d77d5ecc5744e8a3f939e856f16724ae *man/predict.Rd 1337900365bda2918af7aaa5cef8a210 *man/simulate.Rd mclogit/inst/0000755000176200001440000000000014326442450012671 5ustar liggesusersmclogit/inst/ChangeLog0000755000176200001440000001672114326442426014460 0ustar liggesusers2022-10-23: - Refactored MQL/PQL algorithm: Eliminated redundant code and adapted it to both 'nlm' and 'nlminb' 2022-10-16: - Fixed bug in MQL/PQL-objective function that led to false non-convergence and bias in variance parameter estimates 2022-10-12: - Support for starting values in random effects models - Support for restriction on random effects variances in multinomial baseline logit models 2022-10-09: - Improve handling of boundary values and singular information matrices 2022-10-07: - Remove spurious messages about missing starting values 2022-05-21: - Add checks of 'control=' argument of 'mclogit()' and 'mblogit()'. 2022-04-13: - Fixed bug in 'blockMatrix' and make it check for argument validity 2022-04-11: - Hotfix of prediction method 2022-04-10: - Fix handling of singular initial covariance matrices in PQLMQL_innerFit - Issue a warning if models with random effects are compared using anova - Fix predict methods for mmclogit models - Handle DOIs in documentation as required by new guidelines 2022-01-16: - Fix prediction with complicated terms in the model - Add some more demos 2021-08-13: - predict.mmclogit: create W-Matrix only when really needed 2021-07-13: - Include variance parameters in the computation of degrees of freedom 2021-06-03: - Be less zealous about group-level covariates constant in some choice sets. 2021-05-30: - Added support for vertical-bar syntax for responses of conditional logit models. 2021-05-27: - Added support for non-nested random effects. 2021-05-25: - Fixed serious bug in the handling of multilevel random effects models. - Detect some misspecified models with too many groups. 2021-04-17: - Merged pull request by Russel V. Lenth that adds support for "emmeans". 2021-04-04: - Apply patch suggested by Ilya Yalchyk to improve formula argument of 'mclogit()' and 'mblogit()'. 2021-03-19: - Last fixes for CRAN 2021-03-18: - Improved support 'mtable()' for multinomial logit models with random effects. 2021-02-21: - Fixed predictions from models with scaled independent variables etc. - 'summary()' now reports the number of groups per random effects level. 2021-01-28: - Another prediction fix. Do not refer to weights that are not needed. 2021-01-10: - Fixed prediction method also for mmclogit objects 2020-12-23: - Refactored computations - Fixed predictions from random-effects models where group indices are not a sequence of integers starting at 1. 2020-11-03: - Correct URLs in DESCRIPTION 2020-09-09: - Fix reference to weights in 'predict()' methods 2020-08-06: - Let 'mclogit'/'mblogit' handle empty responses (i.e. where counts sum to zero) correclty. - Make 'mclogit' complain about non-numeric responses 2020-07-17: - Documented prediction methods. - Improved flexibility of prediction methods. 2020-07-16: - Implemented reasonable 'predict' method for mmblogit and mmclogit objects. 2020-07-15: - Bugfix: Make 'update' work with missing 'dispersion=' argument. - Bugfix: Make 'vcov' work for objects without 'phi' component. - Add 'vcov' method for 'mmclogit' objects. 2020-06-27: - Documented 'simulate()' methods. 2020-06-11: - Implemented (approximate) REML estimator. 2020-06-07: - Added a 'simulate()' method for "mblogit" and "mclogit" models. 2020-05-24: - Adapt the package NAMESPACE file to explicitly export S3 methods as methods, even if they are export as functions, as newly required by R 4.0. 2020-05-23: - Added documentation of (over-)dispersion parameter estimation, rename 'overdispersion=' arguments into 'dispersion=' arguments. 2020-05-22: - Added support for estimation of (over-)dispersion parameters 2020-05-21: - Implemented MQL technique as an alternative to PQL estimation 2020-05-19: - Improve handling of numerical difficulties 2020-05-11: - Use a Cholesky-factor parameterisation to make sure that covariance matrices are positive (semi-)definite 2020-03-30: - Refactored the algorithm for fitting mixed-effects models 2020-01-09: - Document getSummary.mmclogit, getSummary.mmblogit - Make 'mblogit' handle matrix responses with zero-sum rows - Renamed 'residual.df' to 'df.residual' in results object of 'mclogit.fit' et al. (Fixes issue #4) 2019-10-23: - Merge pull request #3 from skyborla/fix-mblogit Fix mblogit for responses with only two levels - Export getSummary.mmclogit, getSummary.mmblogit 2019-04-20: - Let 'mmblogit' models inherit from class 'mblogit' 2019-02-04: - Merged pull request #2 from pmcharrison/ftt-fix: Fixed typo (ftt -> fit) 2018-09-26: - Fixed matrix column selection in predict.mclogit if there is only one predictor (also PR from skyborla) 2018-04-25: - Improved handling of with misspecified random effect structures. - Added documentation about new handling of misspecified models. 2017-10-25: - Fixed handling of dropped predictors in `predict.mclogit`. 2017-01-26: - Fixed some bugs in predict models for `mclogit` objects. - Made sure that dummy coding is used for response factors even if they are ordinal 2017-01-07: - Implemented random slopes for baseline logit models. 2017-01-05: - Implemented random slopes for conditional logit models. 2016-09-01: - Fixed `mclogit.fit()` and `predict.mclogit()` to work better without covariates. 2016-02-07: - Explicitely import package "methods" 2016-01-17: - Import `as` from package "methods". - Make sure `nobs` is defined in `mclogit.fit.rePQL`. 2016-01-16: - Updated `DESCRIPTION` file: Maintainer email address changed and no "This package" at start of package discriptions. 2015-10-08: - Fix display of number of observations - Drop redundant coefficients 2015-08-01: - Added row and column names to estimator result of `vcov()` - Make sure that scripts run with "mclogit" loaded by `devtools::load_all()` 2015-07-15: - mclogit, mclogit.fit: Added support for starting values. 2015-07-03: - predict.mblogit: 'contrasts.arg' not 'contast.arg' ... - predict-methods now should handle NAs in newdata arguments better. 2015-06-17: - Corrected handling of weights and standard errors of prediction. 2015-06-15: - 'getSummary' methods now return "contrasts" and "xlevels" components. - Fixed prediction method for 'mclogit' results. - Added 'fitted' and 'predict' methods for 'mblogit' results. 2015-01-25: - Added support for multinomial baseline logit models in form of 'mblogit' as a frontend to 'mclogit.fit' 2015-01-23: - Added URLs to DESCRIPTION file 2015-01-21: - Added `nobs` and `extractAIC` methods for `mclogit` objects, so that `drop1.default` should work with these. 2015-01-19: - Added call to result of `getSummary.mclogit`. 2015-01-18: - Cleanup of NAMESPACE file; added aliases to methods for `mclogit` objects so that users can see that they are present. - Export `mclogit.fit`, and `mclogit.fit.rePQL` to enable use by other packages. 2014-10-13: Simplified some namespace dependencies. Eliminated useless pseudo-R-squared statistics from getSummary.mclogit 2014-08-23: Added 'anova' methods 2014-03-10: Refactored code -- algorithms should be more transparent and robust now (hopefully!). mclogit without and with random effects can handle missing values now. Fixed predict method -- use of napredict; handles single indep-variable situation now. Fixed embarassing typo -- prior weights do work now (again?). Included AIC and BIC methods contributed by Nic Elliot mclogit/inst/NEWS.Rd0000644000176200001440000001322714242260456013742 0ustar liggesusers\name{NEWS} \title{\emph{mclogit} News} \encoding{UTF-8} \section{Version 0.9}{ \subsection{NEW FEATURES}{ \itemize{ \item It is now possible to estimate models with non-nested (e.g. crossed) random effects. Such models can be specified by providing a list of formulas as \code{random=} argument to the \code{mclogit()} or \code{mblogit()} function. \item The left-hand side of conditional logit models can now more conveniently specified using the vertical-bar (\code{|}) operator. } } \subsection{BUGFIXES}{ \itemize{ \item Singular initial covariance matrices no longer cause errors. \item A warning about unreliable results is issued if \code{anova()} is applied to models with random effects. } } \subsection{IMPROVEMENTS}{ \itemize{ \item \code{mclogit()} and \code{mblogit()} check whether the list passed as \code{control} is complete i.e. contains all the relevant named arguments. } } } \section{Version 0.8}{ \subsection{NEW FEATURES}{ \itemize{ \item It is now possible to use the MQL estimation technique as an alternative to PQL. \item As an alternative to extending a logit model with random effects, it is now possible to add an over-dispersion parameter to the model. \item In addition to approximate the ML estimator, MQL and PQL have a variant that approximates the REML estimator. \item There is now a \code{simulate()} method for objects returned by \code{mblogit()} or \code{mclogit()} (but only for those without random effects). \item Predictions from random-effects models estimated using the PQL technique now are now conditional on the random effects (unless requested otherwise). } } \subsection{BUGFIXES}{ \itemize{ \item \code{mclogit()} now handles empty responses (i.e. counts that sum to zero) correclty. \item \code{mclogit()} now flags non-numeric response vectors as an error. \item \code{predict()} now handles scaled independent variables correcty. } } \subsection{IMPROVEMENTS}{ \itemize{ \item \code{summary()} shows the number of groups per random effects level (if present). \item \code{mclogit()} and \code{mblogit()} with random effects now work with \code{formula=}-argumements passed in variables. } } } \section{Version 0.7}{ \subsection{IMPROVEMENTS}{ \itemize{ \item The algorithm for fitting random-effects models tended to stop prematurely returning the starting values obtained using a methods of moments. It has been completely refactored and proceeds similar to the PQL algorithm in Professor Brian Ripley's MASS package: Now an inner step, in which a linear mixed model is fitted to a working dependent variable is nested into outer step iterations, in which the the working dependent variable is updated. \item Also, the PQL algorithm no longer builds on starting values from a no-random-effects model, because surprisingly this makes the algorithm more stable and not less. As a consequence, the algorithm does a much better job at avoiding divergence or running into numerical difficulties. \item The PQL estimator for random-effects model uses a (inverse) Cholesky factor parametrisation, which makes sure that random-effects (Co-)Variance matrices are always positive (semi-)definite. } } } \section{Version 0.6}{ \subsection{NEW FEATURES}{ \itemize{ \item \code{mclogit} now complains about (i.e. throws an error exception) when the random effects structure cannot be estimated, e.g. because random effects are constant within choice sets and therefore drop out by the formation of conditional logits. } } \subsection{BUGFIXES}{ \itemize{ \item \code{mblogit} now handles responses with only two columns. \item \code{mblogit} now can deal with matrix responses that have rows that sum to zero. \item \code{mclogit} and \code{mblogit} now return a component named "df.residual" instead of "residual.df". } } } \section{Version 0.5}{ \subsection{NEW FEATURES}{ \itemize{ \item \code{mclogit} now supports conditional logit models with random slopes. \item \code{mblogit} now supports multinomial baseline models with random intercept and random slopes. } } \subsection{BUGFIXES}{ \itemize{ \item \code{predict} methods of objects created by \code{mclogit} and \code{mblogit} are better in handling missing data. } } } \section{Version 0.4}{ \subsection{NEW FEATURES}{ \itemize{ \item New \code{nobs} and \code{extractAIC} methods for \code{mclogit} objects, so that \code{drop1.default} should work with these. \item New function \code{mblogit} to fit multinomial baseline logit models. \item \code{mclogit} \code{mclogit.fit} now allow user-provided starting values. } } \subsection{BUGFIXES}{ \itemize{ \item \code{getSummary} methods now return "contrasts" and "xlevels" components. \item Fixed prediction method for \code{mclogit} results. \item Corrected handling of weights and standard errors of prediction. \item Matrices returned by the \code{mclogit} method of \code{vcov()} have row and column names. \item The number of observations is now displayed where it was not before. \item \code{nobs} is defined in \code{mclogit.fit.rePQL}. } } \subsection{USER-VISIBLE CHANGES}{ \itemize{ \item \code{mclogit.fit} and \code{mclogit.fit.rePQL} are exported to enable their use by other packages. } } }