qvcalc/0000755000176000001440000000000012464245000011550 5ustar ripleyusersqvcalc/NAMESPACE0000644000176000001440000000014710404066011012764 0ustar ripleyusersexport(qvcalc, worstErrors, indentPrint) S3method(print, qv) S3method(summary, qv) S3method(plot, qv) qvcalc/R/0000755000176000001440000000000012464223612011756 5ustar ripleyusersqvcalc/R/qvcalc.R0000644000176000001440000002611212464223612013354 0ustar ripleyusersqvcalc <- function(object, factorname = NULL, coef.indices = NULL, labels = NULL, dispersion = NULL, estimates = NULL, modelcall = NULL) { coef.indices.saved <- coef.indices if (!is.matrix(object)) { model <- object ## special case of an unstructured Bradley-Terry model ## more standard lm, glm, etc. objects if (is.null(factorname) && is.null(coef.indices)) { stop("arguments \"factorname\" and \"coef.indices\" are both NULL") } if (is.null(coef.indices)) { ## try to use factorname term.index <- which(attr(terms(model),"term.labels") == factorname) modelmat <- model.matrix(model) has.coef <- colnames(modelmat) %in% names(coef(model)) coef.indices <- which(attr(modelmat,"assign")[has.coef] == term.index) if (length(model$xlevels[[factorname]]) == length(coef.indices)){ ## factor has no constraint applied, eg if no intercept in model contmat <- diag(length(coef.indices))} else { contmat <- eval(call(model$contrasts[[factorname]], model$xlevels[[factorname]]))} # rownames(contmat) <- model$xlevels[[factorname]] ## not needed? if (is.null(estimates)) estimates <- contmat %*% coef(model)[coef.indices] covmat <- vcov(model, dispersion = dispersion) covmat <- covmat[coef.indices, coef.indices, drop = FALSE] covmat <- contmat %*% covmat %*% t(contmat) } else { k <- length(coef.indices) refPos <- numeric(0) if (0 %in% coef.indices) { ## there's a reference level to include refPos <- which(coef.indices == 0) coef.indices <- coef.indices[-refPos] } covmat <- vcov(model, dispersion = dispersion) covmat <- covmat[coef.indices, coef.indices, drop = FALSE] if (is.null(estimates)) estimates <- coef(model)[coef.indices] if (length(refPos) == 1) { if (length(estimates) != k) estimates <- c(0, estimates) covmat <- cbind(0, rbind(0, covmat)) names(estimates)[1] <- rownames(covmat)[1] <- colnames(covmat)[1] <- "(reference)" if (refPos != 1) { if (refPos == k){ perm <- c(2:k, 1) } else { perm <- c(2:refPos, 1, (refPos + 1):k) } estimates <- estimates[perm] covmat <- covmat[perm, perm, drop = FALSE] } } } return(qvcalc(covmat, factorname = factorname, coef.indices = coef.indices.saved, labels = labels, dispersion = dispersion, estimates = estimates, modelcall = model$call) ) } else { ## the basic QV calculation, on a covariance matrix if (inherits(object, "BTabilities")) { ## catch this special case first vc <- vcov(object) cf <- coef(object) if (is.null(factorname)) factorname <- attr(object, "factorname") if (is.null(modelcall)) modelcall <- attr(object, "modelcall") return(qvcalc(vc, factorname = factorname, labels = labels, dispersion = dispersion, estimates = cf, modelcall = modelcall)) } covmat <- object if (!is.null(labels)) rownames(covmat) <- colnames(covmat) <- labels n <- dim(covmat)[1] if (n <= 2) stop( "qvcalc works only for factors with 3 or more levels") simple.contrasts <- function(n, levelnames = 1:n){ result <- list() for (i in 1:(n-1)){ for (j in (i+1):n){ result[[paste(levelnames[i], levelnames[j], sep = ",")]] <- c(i, j)}} result } qvdesign <- function(n){ nrows <- choose(n, 2) m <- matrix(0, nrows, n) indices <- simple.contrasts(n) for (i in 1:nrows){ m[i, indices[[i]][1]] <- 1 m[i, indices[[i]][2]] <- 1} m } level <- qvdesign(n) contrast.variance <- function(contrast, covmat){ if (!(is.matrix(covmat) && (dim(covmat)[1] == dim(covmat)[2]))) stop("covmat must be a square matrix") n <- dim(covmat)[1] if (length(contrast) == n && sum(contrast) == 0) ## arbitrary contrast vector return(as.vector(contrast %*% covmat %*% contrast)) if (length(contrast) == 2 && all(contrast %in% 1:n)){ ## simple contrast specified as an index pair i <- contrast[1] j <- contrast[2] return(covmat[i,i] + covmat[j,j] - 2*covmat[i,j])} else stop("invalid contrast") } simple.contrast.variances <- function(n, covmat){ if (!is.null(rownames(covmat))) levelnames <- rownames(covmat) else levelnames <- 1:n sapply(simple.contrasts(n, levelnames), function(contrast){contrast.variance(contrast, covmat)}) } response <- simple.contrast.variances(n,covmat) if (any(response <= 0)) { stop("not all contrasts have positive variance") } else response <- log(response) expLinear <- structure(list( family = "expLinear", link = "exp", linkfun = function(mu) exp(mu), linkinv = function(eta) log(eta), variance = function(mu) rep(1, length(mu)), dev.resids = function(y, mu, wt) wt * ((y - mu)^2), aic = function(y, n, mu, wt, dev) sum(wt) * (log(dev/sum(wt) * 2 * pi) + 1) + 2, mu.eta = function (eta) 1/eta, initialize = expression({ n <- rep(1, nobs) mustart <- y}), validmu = function(mu) TRUE), class = "family") model <- glm(response ~ 0 + level, family = expLinear) qv <- coef(model) NAs <- rep(NA, length(qv)) if (!is.null(rownames(covmat))) names(qv) <- rownames(covmat) frame <- data.frame(estimate = NAs, SE = sqrt(diag(covmat)), quasiSE = sqrt(qv), quasiVar = qv, row.names = names(qv)) if (!is.null(estimates)) frame$estimate <- estimates relerrs <- sqrt(exp(- residuals(model))) - 1 ## The above formula was corrected in v0.8-9; it ## previously said 1 - sqrt(exp(residuals(model)), which is ## not what should be expected for "relative error" here. ## This corrected version agrees with the Biometrika paper. ## Thanks to Shaun Killingbeck for spotting this error in the ## previous version. names(relerrs) <- names(response) return(structure(list(covmat = covmat, qvframe = frame, dispersion = dispersion, relerrs = relerrs, factorname = factorname, coef.indices = coef.indices, modelcall = modelcall), class="qv"))} } worstErrors <- function(qv.object) { reducedForm <- function(covmat, qvmat){ nlevels <- dim(covmat)[1] firstRow <- covmat[1, ] ones <- rep(1, nlevels) J <- outer(ones, ones) notzero <- 2:nlevels r.covmat <- covmat + (firstRow[1]*J) - outer(firstRow, ones) - outer(ones, firstRow) r.covmat <- r.covmat[notzero, notzero] qv1 <- qvmat[1, 1] r.qvmat <- (qvmat + qv1*J)[notzero, notzero] list(r.covmat, r.qvmat)} covmat <- qv.object$covmat qvmat <- diag(qv.object$qvframe$quasiVar) r.form <- reducedForm(covmat, qvmat) r.covmat <- r.form[[1]] r.qvmat <- r.form[[2]] inverse.sqrt <- solve(chol(r.covmat)) evalues <- eigen(t(inverse.sqrt) %*% r.qvmat %*% inverse.sqrt, symmetric=TRUE)$values sqrt(c(min(evalues), max(evalues))) - 1 } indentPrint <- function(object, indent = 4, ...){ zz <- "" tc <- textConnection("zz", "w", local = TRUE) sink(tc) try(print(object, ...)) sink() close(tc) indent <- paste(rep(" ", indent), sep = "", collapse = "") cat(paste(indent, zz, sep = ""), sep = "\n")} print.qv <- function(x, ...){ print(x$qvframe) } summary.qv <- function(object, ...) { if (!is.null(object$modelcall)) cat("Model call: ", deparse(object$modelcall), "\n") if (!is.null(object$dispersion)) cat("Dispersion: ", object$dispersion, "\n") if (!is.null(object$factorname)) cat("Factor name: ",object$factorname,"\n") indentPrint(object$qvframe,...) if (!is.null(object$relerrs)){ minErrSimple <- round(100*min(object$relerrs), 1) maxErrSimple <- round(100*max(object$relerrs), 1) errors<-worstErrors(object) minErrOverall<-round(100*errors[1], 1) maxErrOverall<-round(100*errors[2], 1) cat("Worst relative errors in SEs of simple contrasts (%): ", minErrSimple, maxErrSimple, "\n") cat("Worst relative errors over *all* contrasts (%): ", minErrOverall, maxErrOverall, "\n") } } plot.qv <- function(x, intervalWidth = 2, ylab = "estimate", xlab = x$factorname, ylim = NULL, main = "Intervals based on quasi standard errors", levelNames = NULL, ...) { frame <- x$qvframe if (!is.null(levelNames)) { if (nrow(frame) != length(levelNames)) stop( "levelNames is not a vector of the right length" ) row.names(frame) <- levelNames } if (is.null(frame$quasiSE)) stop("Cannot plot, because there are no quasi standard errors") if (is.na(frame$estimate[1])) stop("No parameter estimates to plot") if (any(is.nan(frame$quasiSE))) stop(paste("No comparison intervals available,\n", "since one of the quasi variances is negative.", " See ?qvcalc for more.", sep = "")) faclevels <- factor(row.names(frame), levels = row.names(frame)) xvalues <- seq(along = faclevels) est <- frame$estimate se <- frame$quasiSE tops <- est + (intervalWidth * se) tails <- est - (intervalWidth * se) range <- max(tops) - min(tails) if (is.null(ylim)) ylim <- c(min(tails) - range/10, max(tops) + range/10) if (is.null(xlab)) xlab <- "factor level" plot(faclevels, frame$estimate, border = "transparent", ylim = ylim, xlab = xlab, ylab = ylab, main = main, ...) points(frame$estimate, ...) segments(xvalues, tails, xvalues, tops) invisible(x) } qvcalc/MD50000644000176000001440000000052212464245000012057 0ustar ripleyusers519e1447a7623fad549e778bd5583c20 *DESCRIPTION 755aeabf1a497103b963957370989667 *NAMESPACE c622353dd7bbfaecfe4b8f09a9683b06 *R/qvcalc.R 2b1549ea018f65892ce3b5378a0c0030 *man/indentPrint.Rd 109288b4bf5065d81f254f69f6913071 *man/plot.qv.Rd 74adae2966c9758cea57bbf615e04d81 *man/qvcalc.Rd fb57525cb3410ce1f24c199b0b461a99 *man/worstErrors.Rd qvcalc/DESCRIPTION0000644000176000001440000000073112464245000013257 0ustar ripleyusersPackage: qvcalc Version: 0.8-9 Date: 2015-02-03 Title: Quasi Variances for Factor Effects in Statistical Models Author: David Firth Maintainer: David Firth URL: http://warwick.ac.uk/qvcalc Description: Functions to compute quasi variances and associated measures of approximation error Suggests: relimp, MASS License: GPL-2 | GPL-3 Packaged: 2015-02-03 20:26:53 UTC; david NeedsCompilation: no Repository: CRAN Date/Publication: 2015-02-03 23:21:51 qvcalc/man/0000755000176000001440000000000012464227415012335 5ustar ripleyusersqvcalc/man/indentPrint.Rd0000644000176000001440000000114610377456203015124 0ustar ripleyusers\name{indentPrint} \alias{indentPrint} \title{Print with Line Indentation} \description{ Same as \code{\link{print}}, but adds a specified amount of white space at the start of each printed line } \usage{ indentPrint(object, indent=4, ...) } \arguments{ \item{object}{any printable object } \item{indent}{a non-negative integer, the number of spaces to insert} \item{\dots}{other arguments to pass to \code{\link{print}}} } \value{ \code{object} is returned invisibly } \author{David Firth, \email{d.firth@warwick.ac.uk}} \examples{ indentPrint("this indented by 10 spaces", indent=10) } \keyword{IO} qvcalc/man/worstErrors.Rd0000644000176000001440000000277512464224225015206 0ustar ripleyusers\name{worstErrors} \alias{worstErrors} \title{Accuracy of a Quasi-variance Approximation} \description{ Computes the worst relative error, among all contrasts, for the standard error as derived from a set of quasi variances. For details of the method see Menezes (1999) or Firth and Menezes (2004). } \usage{ worstErrors(qv.object) } \arguments{ \item{qv.object}{An object of class \code{qv}} } \value{ A numeric vector of length 2, the worst negative relative error and the worst positive relative error. } \references{ Firth, D. and Mezezes, R. X. de (2004) Quasi-variances. \emph{Biometrika} \bold{91}, 69--80. McCullagh, P. and Nelder, J. A. (1989) \emph{Generalized Linear Models}. London: Chapman and Hall. Menezes, R. X. (1999) More useful standard errors for group and factor effects in generalized linear models. \emph{D.Phil. Thesis}, Department of Statistics, University of Oxford. } \author{David Firth, \email{d.firth@warwick.ac.uk}} \seealso{\code{\link{qvcalc}}} \examples{ ## Overdispersed Poisson loglinear model for ship damage data ## from McCullagh and Nelder (1989), Sec 6.3.2 library(MASS) data(ships) ships$year <- as.factor(ships$year) ships$period <- as.factor(ships$period) shipmodel <- glm(formula = incidents ~ type + year + period, family = quasipoisson, data = ships, subset = (service > 0), offset = log(service)) shiptype.qvs <- qvcalc(shipmodel, "type") summary(shiptype.qvs, digits = 4) worstErrors(shiptype.qvs) } \keyword{regression} \keyword{models} qvcalc/man/qvcalc.Rd0000644000176000001440000001456712464227253014112 0ustar ripleyusers\name{qvcalc} \alias{qvcalc} \alias{summary.qv} \alias{print.qv} \title{Quasi Variances for Model Coefficients} \description{ Computes a set of quasi variances (and corresponding quasi standard errors) for estimated model coefficients relating to the levels of a categorical (i.e., factor) explanatory variable. For details of the method see Firth (2000), Firth (2003) or Firth and de Menezes (2004). Quasi variances generalize and improve the accuracy of \dQuote{floating absolute risk} (Easton et al., 1991). } \usage{ qvcalc(object, factorname = NULL, coef.indices = NULL, labels = NULL, dispersion = NULL, estimates = NULL, modelcall = NULL) } \arguments{ \item{object}{A model (of class lm, glm, etc.), or the covariance (sub)matrix for the estimates of interest, or an object of class \code{Btabilities}} \item{factorname}{Either \code{NULL}, or a character vector of length 1} \item{coef.indices}{Either \code{NULL}, or a numeric vector of length at least 3} \item{labels}{An optional vector of row names for the \code{qvframe} component of the result (redundant if \code{object} is a model)} \item{dispersion}{an optional scalar multiplier for the covariance matrix, to cope with overdispersion for example} \item{estimates}{an optional vector of estimated coefficients (redundant if \code{object} is a model)} \item{modelcall}{optional, the call expression for the model of interest (redundant if \code{object} is a model)} } \details{ If \code{object} is a model, then at least one of \code{factorname} or \code{coef.indices} must be non-\code{NULL}. The value of \code{coef.indices}, if non-\code{NULL}, determines which rows and columns of the model's variance-covariance matrix to use. If \code{coef.indices} contains a zero, an extra row and column are included at the indicated position, to represent the zero variances and covariances associated with a reference level. If \code{coef.indices} is \code{NULL}, then \code{factorname} should be the name of a factor effect in the model, and is used in order to extract the necessary variance-covariance estinmates. Ordinarily the quasi variances are positive and so their square roots (the quasi standard errors) exist and can be used in plots, etc. Occasionally one (and only one) of the quasi variances is negative, and so the corresponding quasi standard error does not exist (it appears as \code{NaN}). This is fairly rare in applications, and when it occurs it is because the factor of interest is strongly correlated with one or more other predictors in the model. It is not an indication that quasi variances are inaccurate. An example is shown below using data from the \code{car} package: the quasi variance approximation is exact (since \code{type} has only 3 levels), and there is a negative quasi variance. The quasi variances remain perfectly valid (they can be used to obtain inference on any contrast), but it makes no sense to plot `comparison intervals' in the usual way since one of the quasi standard errors is not a real number. } \value{ A list of class \code{qv}, with components \item{covmat}{the full variance-covariance matrix for the estimated coefficients corresponding to the factor of interest} \item{qvframe}{a data frame with variables \code{estimate}, \code{SE}, \code{quasiSE} and \code{quasiVar}, the last two being a quasi standard error and quasi-variance for each level of the factor of interest} \item{relerrs}{relative errors for approximating the standard errors of all simple contrasts} \item{factorname}{the factor name if given} \item{coef.indices}{the coefficient indices if given} \item{modelcall}{if \code{object} is a model, \code{object$call}; otherwise \code{NULL}} } \references{ Easton, D. F, Peto, J. and Babiker, A. G. A. G. (1991) Floating absolute risk: an alternative to relative risk in survival and case-control analysis avoiding an arbitrary reference group. \emph{Statistics in Medicine} \bold{10}, 1025--1035. Firth, D. (2000) Quasi-variances in Xlisp-Stat and on the web. \emph{Journal of Statistical Software} \bold{5.4}, 1--13. At \url{http://www.jstatsoft.org} Firth, D. (2003) Overcoming the reference category problem in the presentation of statistical models. \emph{Sociological Methodology} \bold{33}, 1--18. Firth, D. and de Mezezes, R. X. (2004) Quasi-variances. \emph{Biometrika} \bold{91}, 65--80. McCullagh, P. and Nelder, J. A. (1989) \emph{Generalized Linear Models}. London: Chapman and Hall. Menezes, R. X. (1999) More useful standard errors for group and factor effects in generalized linear models. \emph{D.Phil. Thesis}, Department of Statistics, University of Oxford. } \author{David Firth, \email{d.firth@warwick.ac.uk}} \seealso{\code{\link{worstErrors}}, \code{\link{plot.qv}}} \examples{ ## Overdispersed Poisson loglinear model for ship damage data ## from McCullagh and Nelder (1989), Sec 6.3.2 library(MASS) data(ships) ships$year <- as.factor(ships$year) ships$period <- as.factor(ships$period) shipmodel <- glm(formula = incidents ~ type + year + period, family = quasipoisson, data = ships, subset = (service > 0), offset = log(service)) shiptype.qvs <- qvcalc(shipmodel, "type") summary(shiptype.qvs, digits = 4) plot(shiptype.qvs) ## Quasi-variance summary for "ability" estimates in a Bradley-Terry model ## Requires the "BradleyTerry2" package \dontrun{ library(BradleyTerry2) example(baseball) baseball.qv <- qvcalc(BTabilities(baseballModel2)) print(baseball.qv) plot(baseball.qv, levelNames = c("Bal", "Bos", "Cle", "Det", "Mil", "NY", "Tor")) } ## Example of a negative quasi variance ## Requires the "car" package \dontrun{ library(car) data(Prestige) attach(Prestige) mymodel <- lm(prestige ~ type + education) library(qvcalc) type.qvs <- qvcalc(mymodel, "type") ## Warning message: ## In sqrt(qv) : NaNs produced summary(type.qvs) ## Model call: lm(formula = prestige ~ type + education) ## Factor name: type ## estimate SE quasiSE quasiVar ## bc 0.000000 0.000000 2.874361 8.261952 ## prof 6.142444 4.258961 3.142737 9.876793 ## wc -5.458495 2.690667 NaN -1.022262 ## Worst relative errors in SEs of simple contrasts (\%): 0 0 ## Worst relative errors over *all* contrasts (\%): 0 0 plot(type.qvs) ## Error in plot.qv(type.qvs) : No comparison intervals available, ## since one of the quasi variances is negative. See ?qvcalc for more. } } \keyword{models} \keyword{regression} qvcalc/man/plot.qv.Rd0000644000176000001440000000514412464204236014227 0ustar ripleyusers\name{plot.qv} \alias{plot.qv} \title{ Plot method for objects of class qv } \description{ Provides visualization of estimated contrasts using intervals based on quasi standard errors. } \usage{ \method{plot}{qv}(x, intervalWidth = 2, ylab = "estimate", xlab = x$factorname, ylim = NULL, main = "Intervals based on quasi standard errors", levelNames = NULL, ...) } \arguments{ \item{x}{ an object of class \code{"qv"}, typically the result of calling \code{\link{qvcalc}}} \item{intervalWidth}{ the half-width, in quasi standard errors, of the plotted intervals} \item{ylab}{ as for \code{\link{plot.default}}} \item{xlab}{ as for \code{\link{plot.default}}} \item{ylim}{ as for \code{\link{plot.default}}} \item{main}{ as for \code{\link{plot.default}}} \item{levelNames}{ labels to be used on the x axis for the levels of the factor whose effect is plotted } \item{\dots}{ other arguments understood by \code{plot} } } \details{ If \code{levelNames} is unspecified, the row names of \code{x$qvframe} will be used. } \value{ \code{invisible(x)} } \references{ Easton, D. F, Peto, J. and Babiker, A. G. A. G. (1991) Floating absolute risk: an alternative to relative risk in survival and case-control analysis avoiding an arbitrary reference group. \emph{Statistics in Medicine} \bold{10}, 1025--1035. Firth, D. (2000) Quasi-variances in Xlisp-Stat and on the web. \emph{Journal of Statistical Software} \bold{5.4}, 1--13. At \url{http://www.jstatsoft.org} Firth, D. (2003) Overcoming the reference category problem in the presentation of statistical models. \emph{Sociological Methodology} \bold{33}, 1--18. Firth, D. and Mezezes, R. X. de (2004) Quasi-variances. \emph{Biometrika} \bold{91}, 65--80. McCullagh, P. and Nelder, J. A. (1989) \emph{Generalized Linear Models}. London: Chapman and Hall. Menezes, R. X. (1999) More useful standard errors for group and factor effects in generalized linear models. \emph{D.Phil. Thesis}, Department of Statistics, University of Oxford. } \author{David Firth, \email{d.firth@warwick.ac.uk}} \seealso{ \code{\link{qvcalc}} } \examples{ ## Overdispersed Poisson loglinear model for ship damage data ## from McCullagh and Nelder (1989), Sec 6.3.2 library(MASS) data(ships) ships$year <- as.factor(ships$year) ships$period <- as.factor(ships$period) shipmodel <- glm(formula = incidents ~ type + year + period, family = quasipoisson, data = ships, subset = (service > 0), offset = log(service)) shiptype.qvs <- qvcalc(shipmodel, "type") summary(shiptype.qvs, digits=4) plot(shiptype.qvs) } \keyword{ models } \keyword{ hplot }