vcdExtra/0000755000176200001440000000000014471023412012030 5ustar liggesusersvcdExtra/NAMESPACE0000644000176200001440000000720514430460317013257 0ustar liggesusers# last modified 29 Oct 2014 by M. Friendly # - removed aperm, now that aperm.table is in base # - removed meanResiduals, which is now in gnm # + added HLtest & methods # + added CMHtest # + added functions in loglin-utilities # + added logseries functions # + added seq_mosaic # + added logLik.loglm, blogits # - removed as.array generic, now in base # + added cutfac # + now required to import: grid # - removed loddsratio (now in vcd) # - removed print.Kappa (now in vcd) # + import from ca; export mpcaplot # we are a vcd extension; do we need to import gnm here? import(vcd) import(gnm) import(grid) #import(rgl) importFrom(MASS, loglm) # add inports for all base packages importFrom("grDevices", "hsv") importFrom("stats", "as.formula", "deviance", "family", "fitted", "formula", "glm", "logLik", "loglin", "model.frame", "na.pass", "nobs", "pchisq", "poisson", "qnorm", "quantile", "reformulate", "residuals", "rstandard", "runif", "terms", "update", "xtabs") importFrom("utils", "data", "menu", "type.convert") importFrom("ca", "cacoord", "multilines") importFrom("grDevices", "rgb") importFrom("graphics", "abline", "plot", "points", "text") importFrom("stats", "update.formula") # for datasets vignette importFrom("here", "here") importFrom("readxl", "read_excel") importFrom("glue", "glue", "glue_collapse") importFrom("purrr", "map") importFrom("stringr", "str_split_1") importFrom("tidyr", "separate_longer_delim") importFrom("dplyr", "select", "rename", "mutate", "group_by", "summarise", "left_join", "relocate") # joy to the world export( expand.dft,expand.table,collapse.table, mosaic.glm, sieve.glm, assoc.glm, modFit, modFit.glm, modFit.loglm, GKgamma,print.GKgamma, glmlist, loglmlist, summarise, logLik.loglm, # print.Kappa, mosaic3d, split3d, range3d, center3d, Kway, Crossings, datasets, # loddsratio, # as.array, # aperm, meanResiduals, HLtest, HosmerLemeshow, CMHtest, conditional, joint, markov, mutual, saturated, loglin2formula, loglin2string, seq_loglm, dlogseries, plogseries, qlogseries, rlogseries, seq_mosaic, blogits, Summarise, LRstats, cutfac, mcaplot, update.xtabs, zero.test ) # basic methods for glm & loglm objects S3method(mosaic, glm) S3method(sieve, glm) S3method(assoc, glm) S3method(modFit, glm) S3method(modFit, loglm) S3method(mosaic, glmlist) S3method(mosaic, loglmlist) S3method(update, xtabs) #S3method(summarise, glm) #S3method(summarise, glmlist) #S3method(summarise, loglm) #S3method(summarise, loglmlist) S3method(Summarise, glmlist) S3method(Summarise, loglmlist) S3method(Summarise, default) S3method(LRstats, glmlist) S3method(LRstats, loglmlist) S3method(LRstats, default) S3method(coef, glmlist) S3method(logLik, loglm) S3method(print, GKgamma) #S3method(print, Kappa) # moved to vcd # mosaic3d related S3method(split3d, shape3d) S3method(split3d, list) S3method(mosaic3d, default) S3method(mosaic3d, loglm) # logoddsratio related methods -- moved to vcd #S3method(loddsratio, default) #S3method(coef, loddsratio) #S3method(confint, loddsratio) #S3method(dim, loddsratio) #S3method(dimnames, loddsratio) #S3method(print, loddsratio) #S3method(vcov, loddsratio) #S3method(as.matrix, loddsratio) #S3method(as.array, loddsratio) #S3method(as.data.frame, loddsratio) # aperm method removed, as this is now in stats #S3method(aperm, default) #S3method(aperm, table) S3method(print, HLtest) S3method(summary, HLtest) S3method(plot, HLtest) S3method(rootogram, HLtest) S3method(print, CMHtest) S3method(CMHtest,formula) S3method(CMHtest,default) vcdExtra/demo/0000755000176200001440000000000014422306401012752 5ustar liggesusersvcdExtra/demo/mosaic3d-demo.R0000644000176200001440000000572714422306401015534 0ustar liggesusers### mosaic3d-demo: proof-of-concept for exploring 3D mosaic plots ## ## split a 3D object along a given dimension, dim, into copies whose ## extent along that dimension are given by the proportions in vector p ## (rescaled to proportions if they are not already so). ## ## The objects are slightly separated along that dimension, allowing ## a total inter-object space = space ## split3d <- function(obj, p, dim, space=.10) { range <-range3d(obj) min <- range[1,] p <- p/sum(p) # assure proportions uspace <- space/(length(p)-1) # unit space between objects scales <- p * (1-space) shifts <- c(0, cumsum(p)[-length(p)])*diff(range[,dim]) result <- list() for (i in seq_along(p)) { xscale <- yscale <- zscale <- 1 xshift <- yshift <- zshift <- 0 if (dim == 1) { xscale <- scales[i] xshift <- shifts[i] + min[1]*(1-xscale) + (uspace * (i-1)) } else if (dim == 2) { yscale <- scales[i] yshift <- shifts[i] + min[2]*(1-yscale) + (uspace * (i-1)) } else if (dim == 3) { zscale <- scales[i] zshift <- shifts[i] + min[3]*(1-zscale) + (uspace * (i-1)) } result[[i]] <- translate3d(scale3d(obj, xscale, yscale, zscale), xshift, yshift, zshift) } result } range3d <- function(obj) { if (!"vb" %in% names(obj)) stop("Not a mesh3d or shape3d object") x <- with(obj, range(vb[1,]/vb[4,])) y <- with(obj, range(vb[2,]/vb[4,])) z <- with(obj, range(vb[3,]/vb[4,])) result <- cbind(x,y,z) rownames(result)<- c('min', 'max') result } label3d <- function(objlist, dim, text, offset=.1, adj=c(0.5, 1), ...) { ranges <- lapply(objlist, range3d) loc <- t(sapply(ranges, colMeans)) # positions of labels on dimension dim min <- t(sapply(ranges, function(x) x[1,])) # other dimensions at min values xyz <- min - offset xyz[,dim] <- loc[,dim] texts3d(xyz, texts=text, adj=adj, ...) } library(rgl) # level 1 open3d() # use transparent colors for the side walls of mosaic cubes crgb <- col2rgb(c("red", "gray90", "blue"))/255 clr <-rbind(crgb, alpha=0.5) col <- rgb(clr[1,], clr[2,], clr[3,], clr[4,]) #col <- c("#FF000080", "#E5E5E580", "#0000FF80") sl0 <- cube <- cube3d(alpha=0.3) sl1 <- split3d(sl0, c(.2, .3, .5), 1) shapelist3d(sl1, col=col) label3d(sl1, 1, c("A1", "A2", "A3")) # level 2 open3d() sl2 <- list() for (i in seq_along(sl1)) { p <- runif(1, .2, .8) sl2 <- c(sl2, split3d(sl1[[i]], c(p, 1-p), 2, space=.1)) } shapelist3d(sl2, col=col) label3d(sl1, 1, c("A1", "A2", "A3")) label3d(sl2[1:2], 2, c("B1", "B2")) # level 3 open3d() sl3 <- list() for (i in seq_along(sl2)) { p <- runif(1, .2, .8) sl3 <- c(sl3, split3d(sl2[[i]], c(p, 1-p), 3, space=.05)) } shapelist3d(sl3, col=col) label3d(sl1, 1, c("A1", "A2", "A3")) label3d(sl2[1:2], 2, c("B1", "B2")) #label3d(sl3[1:2], 3, c("C1", "C2")) label3d(sl3[1:2], 3, c("C1", "C2"), adj=rev(c(0.5, 1))) vcdExtra/demo/mosaic3d-hec.R0000644000176200001440000000127514422306401015341 0ustar liggesusers## mosaic3d-hec 2D and 3D visualizations of HairEyeColor data library(vcdExtra) # two-way mosaic displays HairEye <- margin.table(HairEyeColor, c(1,2)) HairEye mosaic(HairEye, shade=TRUE) # three-way mosaic displays structable(HairEyeColor) # mutual independence model mosaic(HairEyeColor, shade=TRUE) # joint independence of Hair*Eye with Sex mosaic(HairEyeColor, expected =~(Hair*Eye)+Sex) # observed frequencies, mutual independence mosaic3d(HairEyeColor) # expected frequencies under mutual independence mosaic3d(HairEyeColor, type="expected") # expected frequencies under joint independence mosaic3d(HairEyeColor, type="expected", expected =~(Hair*Eye)+Sex) vcdExtra/demo/Wong3-1.R0000644000176200001440000001041114422306401014225 0ustar liggesusers# Wong3-1 Political views, support for women to work and national welfare spending library(vcdExtra) # Data from Wong, R. (2010), Association Models, Los Angeles: Sage, Number 07-164 # Table 3.1, from the General Social Survey, 2006. # Questions: # polviews: Think of self as liberal or conservative # fefam: Better for men to work and women to tend home # natfare: National welfare spending: too little, about right, too much # Table 3.1 Freq<-c( 9, 5, 5, 1, 1, 6, 5, 1, 2, 2, 2, 1, 17,13, 7, 4, 13,22, 9, 1, 7,13, 6, 2, 8,14, 6, 0, 10,29,10, 0, 5,14, 6, 2, 20,38,24, 8, 23,72,34,10, 17,67,36,12, 4,21,12, 4, 7,30, 9, 1, 9,19,14, 2, 2, 9, 8, 3, 1,16,19, 2, 11,28,28,11, 0, 1, 5, 0, 2, 3, 3, 2, 2, 7, 6, 6) polviews<-gl(7,4*3) fefam<-gl(4,1,length=7*4*3) natfare<-gl(3,4,length=7*4*3) long.vnames <- list(set_varnames = c(polviews="Political views", fefam="Females should tend home", natfare="National welfare spending")) long.lnames <- list(polviews = c("Lib++", "2", "3", "Moderate", "5", "6", "Cons++"), fefam = c("Dis+", "Dis", "Agr", "Agr+"), natfare = c("--", "OK", "++") ) ############################################ Wong31 <- data.frame(Freq, polviews, fefam, natfare) Wong31.xtab <- xtabs(Freq ~ polviews+fefam+natfare, data=Wong31) dimnames(Wong31.xtab) <- long.lnames # Quick look at all pairwise associations pairs(Wong31.xtab, gp=shading_Friendly, diag_panel=pairs_diagonal_mosaic) ############################################ # Model 1 - Independence Model Wong31.O<-gnm(Freq~polviews+fefam+natfare,family=poisson, data=Wong31) summary(Wong31.O) mosaic(Wong31.O, main=paste("Independence model",modFit(Wong31.O)), labeling_args=long.vnames, set_labels=long.lnames, split_vertical=c(TRUE, FALSE, FALSE), labeling=labeling_residuals, suppress=2, gp=shading_Friendly) ############################################ # NB: add1 doesn't work with gnm() objects. Re-fit using glm() Wong31.O<-glm(Freq~polviews+fefam+natfare,family=poisson, data=Wong31) # consider all two-way terms add1(Wong31.O, ~.+(polviews + fefam + natfare)^2, test="Chisq") # same result with MASS::addterm addterm(Wong31.O, ~.+(polviews + fefam + natfare)^2, test="Chisq") # or, start with saturated model and drop terms Wong31.sat<-glm(Freq~polviews*fefam*natfare, family=poisson, data=Wong31) drop1(Wong31.sat, test="Chisq") ############################################ # Model 2 - Full Two-way Interaction Wong31.twoway <- update(Wong31.O, ~ .^2) summary(Wong31.twoway) mosaic(Wong31.twoway, main=paste("All two-way model", modFit(Wong31.twoway)), labeling_args=long.vnames, set_labels=long.lnames, split_vertical=c(TRUE, FALSE, FALSE), labeling=labeling_residuals, suppress=1, gp=shading_Friendly) ############################################ # Model 3 - Conditional Independence on polviews Wong31.cond1 <- glm(Freq~polviews * (fefam + natfare), family=poisson) summary(Wong31.cond1) mosaic(Wong31.cond1, main=paste("Cond1: ~P * (F+N)", modFit(Wong31.cond1)), labeling_args=long.vnames, set_labels=long.lnames, split_vertical=c(TRUE, FALSE, FALSE), labeling=labeling_residuals, suppress=1, gp=shading_Friendly) ############################################ # Model 4 - Conditional Independence on fefam Wong31.cond2 <- glm(Freq~polviews*fefam + fefam*natfare,family=poisson) summary(Wong31.cond2) mosaic(Wong31.cond2, main=paste("Cond2: ~F * (P+N)", modFit(Wong31.cond2)), labeling_args=long.vnames, set_labels=long.lnames, split_vertical=c(TRUE, FALSE, FALSE), labeling=labeling_residuals, suppress=1, gp=shading_Friendly) ############################################ # Model 5 - Conditional Independence on natfare Wong31.cond3<-glm(Freq~fefam*natfare+polviews*natfare,family=poisson) summary(Wong31.cond3) mosaic(Wong31.cond3, main=paste("Cond2: ~N * (F+N)", modFit(Wong31.cond3)), labeling_args=long.vnames, set_labels=long.lnames, split_vertical=c(TRUE, FALSE, FALSE), labeling=labeling_residuals, suppress=1, gp=shading_Friendly) anova(Wong31.O, Wong31.cond3, Wong31.cond2, Wong31.cond1,Wong31.twoway, Wong31.sat) vcdExtra/demo/housing.R0000644000176200001440000001115514422306401014554 0ustar liggesusers## housing.R Visualize models from example(housing, package="MASS") # These examples fit a variety of models to the data(housing), giving a 4-way # frequency table of 1681 individuals from the Copenhagen Housing Conditions # Survey, classified by their Type of rental dwelling, perceived Influence on # management of the property, and degree of Contact with other residents. The # response variable here is Satisfaction of householders with their present # housing circumstances. library(vcdExtra) library(MASS) data(housing, package="MASS") oldop <-options(contrasts = c("contr.treatment", "contr.poly")) ########################## # Poisson models for Freq, equivalent to loglinear models ########################## # Baseline model, with Satisfaction as a response house.glm0 <- glm(Freq ~ Infl*Type*Cont + Sat, family = poisson, data = housing) modFit(house.glm0) # labeling_args for mosaic() largs <- list(set_varnames = c( Infl="Influence on management", Cont="Contact among residents", Type="Type of dwelling", Sat="Satisfaction"), abbreviate=c(Type=3)) mosaic(house.glm0, labeling_args=largs, main='Baseline model: [ITC][Sat]') # reorder variables in the mosaic, putting Sat last mosaic(house.glm0, ~ Type+Infl+Cont+Sat, labeling_args=largs, main='Baseline model: [ITC][Sat]') # what terms need to be added? Consider main effects, interactions of Sat with each other MASS::addterm(house.glm0, ~. + Sat:(Infl+Type+Cont), test = "Chisq") # add all two way terms with Satisfaction house.glm1 <- update(house.glm0, . ~ . + Sat*(Infl+Type+Cont)) # did it get better? anova(house.glm0, house.glm1, test="Chisq") # plot it mosaic(house.glm1, labeling_args=largs, main='Model [IS][TS][CS]', gp=shading_Friendly) # Same model, fit by iterative proportional scaling (house.loglm <- MASS::loglm(Freq ~ Infl*Type*Cont + Sat*(Infl+Type+Cont), data = housing)) # Can we drop any terms? MASS::dropterm(house.glm1, test = "Chisq") # Need to add any terms? MASS::addterm(house.glm1, ~. + Sat:(Infl+Type+Cont)^2, test = "Chisq") # add an interaction house.glm2 <- update(house.glm1, . ~ . + Sat:Infl:Type) LRstats(house.glm0, house.glm1, house.glm2) ########################## # Effect plots, for glm1 model ########################## library(effects) house.eff <-allEffects(house.glm1) # show the interactions of Infl, Cont and Type with Sat plot(house.eff, 'Infl:Sat', x.var='Sat', xlab="Satisfaction") plot(house.eff, 'Infl:Sat', x.var='Infl', xlab="Influence") # same plot in one panel, no std errors shown plot(house.eff, 'Infl:Sat', x.var='Sat', xlab="Satisfaction", multiline=TRUE) plot(house.eff, 'Cont:Sat', x.var='Sat', xlab="Satisfaction") plot(house.eff, 'Type:Sat', x.var='Sat', xlab="Satisfaction") ########################## # multinomial model ########################## library(nnet) # multinomial model, similar in spirit to house.glm1 (house.mult<- multinom(Sat ~ Infl + Type + Cont, weights = Freq, data = housing)) # Do we need a more complex model? house.mult2 <- multinom(Sat ~ Infl*Type*Cont, weights = Freq, data = housing) anova(house.mult, house.mult2) # effect plots for multinomial model house.effm <- allEffects(house.mult) plot(house.effm, 'Infl', xlab='Influence on management', style="stacked", main="Multinomial: Infl effect plot") plot(house.effm, 'Cont', xlab='Contact among residents', style="stacked", main="Multinomial: Cont effect plot") plot(house.effm, 'Type', xlab='Type of dwelling', style="stacked", main="Multinomial: Type effect plot") ########################## # proportional odds model ########################## (house.plr <- polr(Sat ~ Infl + Type + Cont, data = housing, weights = Freq)) # Test proportional odds assumption by likelihood ratio test # NB: multinom() objects do not have a df.residual component, so we have # to use the difference in edf to get df for the test pchisq(deviance(house.plr) - deviance(house.mult), df = house.mult$edf -house.plr$edf, lower.tail = FALSE) # try more complex models house.plr2 <- stepAIC(house.plr, ~.^2) house.plr2$anova house.effp <- allEffects(house.plr) plot(house.effp, 'Infl', xlab='Influence on management', style="stacked", main="Proportional odds: Infl effect plot") plot(house.effp, 'Cont', xlab='Contact among residents', style="stacked", main="Proportional odds: Cont effect plot") plot(house.effp, 'Type', xlab='Type of dwelling', style="stacked", main="Proportional odds: Type effect plot") options(oldop) vcdExtra/demo/yaish-unidiff.R0000644000176200001440000000210614422306401015633 0ustar liggesusers# Yaish data: Unidiff model for 3-way table library(gnm) library(vcd) data(yaish) # Ignore orig==7 & dest == 7 with very few cases yaish <- yaish[,1:6,1:6] ## Fit mutual independence model. long.labels <- list(set_varnames = c(orig="Origin status", dest="Destination status", educ="Education")) mosaic(~orig + dest + educ, data=yaish, gp=shading_Friendly, labeling_args=long.labels) ## Fit conditional independence model mosaic(~orig + dest | educ, data=yaish, gp=shading_Friendly, labeling_args=long.labels) ## Fit the "UNIDIFF" mobility model across education levels ## unidiff <- gnm(Freq ~ educ*orig + educ*dest + Mult(Exp(educ), orig:dest), family = poisson, data = yaish, subset = (dest != 7 & orig != 7)) structable(round(residuals(unidiff), digits=2)) # can use mosaic.loglm, passing residuals mosaic(yaish[, 1:6, 1:6], residuals=residuals(unidiff), gp=shading_Friendly, labeling_args=long.labels) # what about mosaic.gnm? mosaic(unidiff, gp=shading_Friendly, labeling_args=long.labels) vcdExtra/demo/mental-glm.R0000644000176200001440000000456614422306401015145 0ustar liggesusers## Mental health data: mosaics for glm() and gnm() models library(gnm) library(vcdExtra) data(Mental) # display the frequency table (Mental.tab <- xtabs(Freq ~ mental+ses, data=Mental)) # fit independence model # Residual deviance: 47.418 on 15 degrees of freedom indep <- glm(Freq ~ mental+ses, family = poisson, data = Mental) deviance(indep) long.labels <- list(set_varnames = c(mental="Mental Health Status", ses="Parent SES")) mosaic(indep,residuals_type="rstandard", labeling_args = long.labels, labeling=labeling_residuals, main="Mental health data: Independence") # as a sieve diagram mosaic(indep, labeling_args = long.labels, panel=sieve, gp=shading_Friendly, main="Mental health data: Independence") # fit linear x linear (uniform) association. Use integer scores for rows/cols Cscore <- as.numeric(Mental$ses) Rscore <- as.numeric(Mental$mental) # column effects model (ses) coleff <- glm(Freq ~ mental + ses + Rscore:ses, family = poisson, data = Mental) mosaic(coleff,residuals_type="rstandard", labeling_args = long.labels, labeling=labeling_residuals, suppress=1, gp=shading_Friendly, main="Mental health data: Col effects (ses)") # row effects model (mental) roweff <- glm(Freq ~ mental + ses + mental:Cscore, family = poisson, data = Mental) mosaic(roweff,residuals_type="rstandard", labeling_args = long.labels, labeling=labeling_residuals, suppress=1, gp=shading_Friendly, main="Mental health data: Row effects (mental)") linlin <- glm(Freq ~ mental + ses + Rscore:Cscore, family = poisson, data = Mental) # compare models anova(indep, roweff, coleff, linlin) AIC(indep, roweff, coleff, linlin) mosaic(linlin,residuals_type="rstandard", labeling_args = long.labels, labeling=labeling_residuals, suppress=1, gp=shading_Friendly, main="Mental health data: Linear x Linear") ## Goodman Row-Column association model fits well (deviance 3.57, df 8) Mental$mental <- C(Mental$mental, treatment) Mental$ses <- C(Mental$ses, treatment) RC1model <- gnm(Freq ~ mental + ses + Mult(mental, ses), family = poisson, data = Mental) mosaic(RC1model,residuals_type="rstandard", labeling_args = long.labels, labeling=labeling_residuals, suppress=1, gp=shading_Friendly, main="Mental health data: RC1 model") vcdExtra/demo/vision-quasi.R0000644000176200001440000000230714422306401015526 0ustar liggesusers# VisualAcuity data: Quasi- and Symmetry models library(vcdExtra) library(gnm) women <- subset(VisualAcuity, gender=="female", select=-gender) indep <- glm(Freq ~ right + left, data = women, family=poisson) mosaic(indep, residuals_type="rstandard", gp=shading_Friendly, main="Vision data: Independence (women)" ) quasi.indep <- glm(Freq ~ right + left + Diag(right, left), data = women, family = poisson) mosaic(quasi.indep, residuals_type="rstandard", gp=shading_Friendly, main="Quasi-Independence (women)" ) symmetry <- glm(Freq ~ Symm(right, left), data = women, family = poisson) # BUG FIXED mosaic(symmetry, residuals_type="rstandard", gp=shading_Friendly, main="Symmetry model (women)") quasi.symm <- glm(Freq ~ right + left + Symm(right, left), data = women, family = poisson) mosaic(quasi.symm, residuals_type="rstandard", gp=shading_Friendly, main="Quasi-Symmetry model (women)") # model comparisons: for *nested* models anova(indep, quasi.indep, quasi.symm, test="Chisq") anova(symmetry, quasi.symm, test="Chisq") # model summaries, with AIC and BIC models <- glmlist(indep, quasi.indep, symmetry, quasi.symm) LRstats(models) vcdExtra/demo/ucb-glm.R0000644000176200001440000000137014422306401014424 0ustar liggesusers# UCBAdmissions data: Conditional independence via loglm and glm library(vcd) data("UCBAdmissions") structable(Dept ~ Admit+Gender,UCBAdmissions) ## conditional independence in UCB admissions data mod.1 <- loglm(~ Dept * (Gender + Admit), data=UCBAdmissions) mod.1 # this is correct, except the Pearson residuals dont show that # all the lack of fit is concentrated in Dept A mosaic(mod.1, gp=shading_Friendly, labeling=labeling_residuals) library(vcdExtra) # using glm() berkeley <- as.data.frame(UCBAdmissions) mod.3 <- glm(Freq ~ Dept * (Gender+Admit), data=berkeley, family="poisson") summary(mod.3) # (BUG FIXED )the large residuals are all in Dept A mosaic(mod.3, residuals_type="rstandard", labeling=labeling_residuals) vcdExtra/demo/occStatus.R0000644000176200001440000001000214422306401015036 0ustar liggesusers# Occupational status data from Goodman (1979) and Duncan (1979) # Fit a variety of models. Compare mosaic using expected= to mosaic.glm # Occ status (1:8)-- professional, managerial, upper non-man, lower non-man, # ... unskilled library(gnm) library(vcdExtra) data(occupationalStatus, package="datasets") str(occupationalStatus) occupationalStatus # graphics::mosaicplot is the default plot method for a table plot(occupationalStatus, shade=TRUE) # define long labels for use in mosaics long.labels <- list(set_varnames = c(origin="origin: Son's status", destination="destination: Father's status")) mosaic(occupationalStatus, shade=TRUE, main="Occupational status: Independence model", labeling_args = long.labels, legend=FALSE) # the standard model of independence indep <- glm(Freq ~ origin + destination, family = poisson, data=occupationalStatus) # the same mosaic, using the fitted model mosaic(indep, main="Independence model", labeling_args = long.labels, legend=FALSE, gp=shading_Friendly) # fit the model of quasi-independence, ignoring the diagonal cells quasi <- gnm(Freq ~ origin + destination + Diag(origin,destination), family=poisson, data=occupationalStatus) #str(quasi$data) anova(quasi, test="Chisq") ## BUGLET (vcd): the diagonal cells should be considered 0 here--- outlined in black mosaic(occupationalStatus, expected=fitted(quasi), main="Quasi-independence model", labeling_args = long.labels, legend=FALSE, gp=shading_Friendly) ## using mosaic.gnm mosaic(quasi, main="Quasi-independence model", labeling_args = long.labels, legend=FALSE, gp=shading_Friendly, labeling=labeling_residuals) # symmetry model symmetry <- glm(Freq ~ Symm(origin, destination), family=poisson, data=occupationalStatus) # mosaic(occupationalStatus, expected=fitted(symmetry), main="Symmetry model", # gp=shading_Friendly, labeling=labeling_residuals, labeling_args = long.labels ) # using mosaic.glm --- OK mosaic(symmetry, main="Symmetry model", gp=shading_Friendly, labeling=labeling_residuals, labeling_args = long.labels ) quasi.symm <- glm(Freq ~ origin + destination + Symm(origin, destination), family=poisson, data=occupationalStatus) anova(quasi.symm) mosaic(occupationalStatus, expected=fitted(quasi.symm), main="Quasi-symmetry model") # model comparisons anova(independence, quasi, quasi.symm, test="Chisq") # compare symmetry to quasi summetry anova(symmetry, quasi.symm, test="Chisq") # association models # uniform association, aka linear x linear association Rscore <- as.vector(row(occupationalStatus)) Cscore <- as.vector(col(occupationalStatus)) uniform <- gnm(Freq ~ origin + destination + Rscore:Cscore, family=poisson, data=occupationalStatus) mosaic(uniform, main="Uniform association model", labeling_args = long.labels, legend=FALSE, gp=shading_Friendly, labeling=labeling_residuals ) RChomog <- gnm(Freq ~ origin + destination + Diag(origin, destination) + MultHomog(origin, destination), family=poisson, data=occupationalStatus) mosaic(RChomog, main="RC homogeneous model", labeling_args = long.labels, legend=FALSE, gp=shading_Friendly, labeling=labeling_residuals) # RC1 - heterogeneous association RC1 <- gnm(Freq ~ origin + destination + Diag(origin, destination) + Mult(origin, destination), family=poisson, data=occupationalStatus) mosaic(RC1, main="RC heterogeneous model", labeling_args = long.labels, legend=FALSE, gp=shading_Friendly, labeling=labeling_residuals) vcdExtra/demo/Wong2-3.R0000644000176200001440000001254014422306401014233 0ustar liggesusers# Wong2-3 Political views and support for women to work library(vcdExtra) # Data from Wong, R. (2010), Association Models, Los Angeles: Sage, Number 07-164 # Table 2.3A, from the General Social Survey, 1998-2000. # Questions: # polviews: Think of self as liberal or conservative # fefam: Better for men to work and women to tend home Freq<-c(39, 50, 18, 4, 140,178, 85, 23, 108,195, 97, 23, 238,598,363,111, 78,250,150, 55, 50,200,208, 74, 8, 29, 46, 21) polviews<- gl(7,4) fefam <- gl(4,1,length=28) # create better labels for levels in mosaic() ## but, this screws up parameter names: use set_labels instead #polviews <- factor(polviews, labels=c("Lib++", "2", "3", "Moderate", "5", "6", "Cons++")) #fefam <- factor(fefam, labels=c("Dis+", "Dis", "Agr", "Agr+")) # long.vnames <- list(set_varnames = c(polviews="Political views", fefam="Females should tend home")) long.lnames <- list(polviews = c("Lib++", "2", "3", "Moderate", "5", "6", "Cons++"), fefam = c("Dis+", "Dis", "Agr", "Agr+")) # numeric versions for U, R, C, RC models Rscore<-as.numeric(polviews) Cscore<-as.numeric(fefam) # make a data frame Wong23 <- data.frame(Freq, polviews, fefam, Rscore, Cscore) #################################### # do a correspondence analysis first, to see the row/category relations Wong23.xtab <- xtabs(Freq ~ polviews+fefam, data=Wong23) dimnames(Wong23.xtab) <- long.lnames library(ca) plot(ca(Wong23.xtab)) title(main="Political views and support for women to work", xlab="Dim 1 (90.8%)", ylab="Dim 2 (8.5%)") #################################### ## OK now, gives warning Wong23.O <- gnm(Freq~polviews+fefam, family=poisson, data=Wong23) # OK, with formula mosaic(Wong23.O, main=paste("Independence model", modFit(Wong23.O)), formula=~polviews+fefam, labeling_args=long.vnames, set_labels=long.lnames) #################################### # Uniform association model Wong23.U<-gnm(Freq~polviews+fefam+Rscore:Cscore,family=poisson,tolerance = 1e-12, data=Wong23) anova(Wong23.U) # OK, w/o formula mosaic(Wong23.U, main=paste("Uniform association", modFit(Wong23.U)), formula=~polviews+fefam, labeling_args=long.vnames, set_labels=long.lnames) # display standardized residuals mosaic(Wong23.U, formula=~polviews+fefam, main=paste("Uniform association", modFit(Wong23.U)), labeling_args=long.vnames, set_labels=long.lnames, residuals_type="rstandard", labeling=labeling_residuals, suppress=1) # compare with gp=shading_Friendly) mosaic(Wong23.U, formula=~polviews+fefam, main=paste("Uniform association", modFit(Wong23.U)), labeling_args=long.vnames, set_labels=long.lnames, residuals_type="rstandard", labeling=labeling_residuals, suppress=1, gp=shading_Friendly) #################################### # Model B - R: Row Effects Wong23.R<-gnm(Freq~polviews+fefam+Cscore:polviews,family=poisson, data=Wong23) anova(Wong23.R) mosaic(Wong23.R, formula=~polviews+fefam, main="Row effects model", labeling_args=long.vnames, set_labels=long.lnames, residuals_type="rstandard", labeling=labeling_residuals, suppress=1, gp=shading_Friendly) ##################################### # Model C - C: Column Effect Wong23.C<-gnm(Freq~polviews+fefam+Rscore:fefam,family=poisson, data=Wong23) anova(Wong23.C) mosaic(Wong23.C, formula=~polviews+fefam, main="Column effects model", labeling_args=long.vnames, set_labels=long.lnames, residuals_type="rstandard", labeling=labeling_residuals, suppress=1, gp=shading_Friendly) ##################################### # Model D - R+C: Row and Column Effect oldopt <- options(contrasts = c(factor="contr.treatment", ordered="contr.treatment")) Wong23.RplusC<-gnm(Freq~polviews+fefam+Rscore:Cscore+Cscore:polviews+Rscore:fefam, constrain=c(17,20),constrainTo=c(0,0), family=poisson,tolerance = 1e-12) anova(Wong23.RplusC) mosaic(Wong23.RplusC, formula=~polviews+fefam, main="Column effects model", labeling_args=long.vnames, set_labels=long.lnames, residuals_type="rstandard", labeling=labeling_residuals, suppress=1, gp=shading_Friendly) options(oldopt) ##################################### # Model E - RC: RC(1) model Wong23.RC1<-gnm(Freq~polviews+fefam+Mult(1,polviews,fefam), family=poisson,tolerance = 1e-12) mosaic(Wong23.RC1, formula=~polviews+fefam, main="RC(1) model", labeling_args=long.vnames, set_labels=long.lnames, residuals_type="rstandard", labeling=labeling_residuals, suppress=1, gp=shading_Friendly) ##################################### # LRstats the collection of models models <- list(Indep=Wong23.O, Uniform=Wong23.U, RowEff=Wong23.R, ColEff=Wong23.C, RplusC=Wong23.RplusC, RC1=Wong23.RC1) res <- lapply(models, residuals) boxplot(as.data.frame(res), main="Residuals from various models") aic <- t(as.data.frame(lapply(models, extractAIC))) colnames(aic) <- c("df", "AIC") aic # sort by df aic <- aic[order(aic[,1]),] plot(aic, type = "b", main="AIC plot") text(aic, labels=rownames(aic), pos=c(4,1,3,1,3,1)) ###################################### # compare models; they are not nested, so only some Chisq tests make sense anova(Wong23.O, Wong23.U, Wong23.R, Wong23.C, Wong23.RplusC, Wong23.RC1) anova(Wong23.O, Wong23.U, Wong23.R, Wong23.RplusC, test="Chisq") anova(Wong23.O, Wong23.U, Wong23.C, Wong23.RplusC, test="Chisq") vcdExtra/demo/00Index0000644000176200001440000000164214422306401014107 0ustar liggesusersmental-glm Mental health data: mosaics for glm() and gnm() models occStatus Occupational status data: Compare mosaic using expected= to mosaic.glm ucb-glm UCBAdmissions data: Conditional independence via loglm() and glm() vision-quasi VisualAcuity data: Quasi- and Symmetry models yaish-unidiff Yaish data: Unidiff model for 3-way table Wong2-3 Political views and support for women to work (U, R, C, R+C and RC(1) models) Wong3-1 Political views, support for women to work and national welfare spending (3-way, marginal, and conditional independence models) housing Visualize glm(), multinom() and polr() models from example(housing, package="MASS") mosaic3d-demo proof-of-concept for exploring 3D mosaic plots mosaic3d-hec 2D and 3D visualizations of HairEyeColor data yamaguchi-xie Yamaguchi data with models from Xie(1992) for homogeneous and log multiplicative layer effect models vcdExtra/demo/yamaguchi-xie.R0000644000176200001440000001345614422306401015640 0ustar liggesusers## Models for Yamaguchi (1987) data on social mobility in US, UK and Japan, following Xie (1992) ## These models reproduce the results in Table 1, appplied to the off-diagonal cells library(gnm) library(vcdExtra) data(Yamaguchi87) # create table form Yama.tab <- xtabs(Freq ~ Father + Son + Country, data=Yamaguchi87) # define labeling_args for convenient reuse in 3-way displays largs <- list(rot_labels=c(right=0), offset_varnames = c(right = 0.6), offset_labels = c(right = 0.2), set_varnames = c(Father="Father's status", Son="Son's status") ) # no association between F and S given country ('perfect mobility') # asserts same associations for all countries yamaNull <- gnm(Freq ~ (Father + Son) * Country, data=Yamaguchi87, family=poisson) LRstats(yamaNull) mosaic(yamaNull, ~Country + Son + Father, condvars="Country", labeling_args=largs, main="[FC][SC] Null [FS] association (perfect mobility)") ## same, with data in xtabs form #yamaNull <- gnm(Freq ~ (Father + Son) * Country, data=Yama.tab, family=poisson) #LRstats(yamaNull) #mosaic(yamaNull, ~Country + Son + Father, condvars="Country", # labeling_args=largs, # main="[FC][SC] Null [FS] association (perfect mobility)") # ignore diagonal cells, overall #yamaDiag0 <- gnm(Freq ~ (Father + Son) * Country + Diag(Father, Son), data=Yama.tab, family=poisson) #LRstats(yamaDiag0) # same, using update() yamaDiag0 <- update(yamaNull, ~ . + Diag(Father, Son)) LRstats(yamaDiag0) # ignore diagonal cells in each Country [Model NA in Xie(1992), Table 1] yamaDiag <- update(yamaNull, ~ . + Diag(Father, Son):Country) LRstats(yamaDiag) mosaic(yamaDiag, ~Country + Son + Father, condvars="Country", labeling_args=largs, gp=shading_Friendly, main="[FC][SC] Quasi perfect mobility, +Diag(F,S)") # fit models using integer scores for rows/cols Rscore <- as.numeric(Yamaguchi87$Father) Cscore <- as.numeric(Yamaguchi87$Son) # cross-nationally homogeneous row effect associations (Xie, model R_o) yamaRo <- update(yamaDiag, ~ . + Father:Cscore) LRstats(yamaRo) mosaic(yamaRo, ~Country + Son + Father, condvars="Country", labeling_args=largs, gp=shading_Friendly, main="Model Ro: homogeneous row effects, +Father:j ") # cross-nationally log multiplicative row effect associations (Xie, model R_x) yamaRx <- update(yamaDiag, ~ . + Mult(Father:Cscore, Exp(Country))) LRstats(yamaRx) # cross-nationally homogeneous col effect associations (Xie, model C_o) yamaCo <- update(yamaDiag, ~ . + Rscore:Son) LRstats(yamaCo) mosaic(yamaCo, ~Country + Son + Father, condvars="Country", labeling_args=largs, gp=shading_Friendly, main="Model Co: homogeneous col effects, +i:Son") # cross-nationally log multiplicative col effect associations (Xie, model C_x) yamaCx <- update(yamaDiag, ~ . + Mult(Rscore:Son, Exp(Country))) LRstats(yamaCx) # cross-nationally homogeneous row + col effect associations I (Xie, model (R+C)_o) yamaRpCo <- update(yamaDiag, ~ . + Father:Cscore + Rscore:Son) LRstats(yamaRpCo) mosaic(yamaRpCo, ~Country + Son + Father, condvars="Country", labeling_args=largs, gp=shading_Friendly, main="Model (R+C)o: homogeneous, F:j + i:S") # cross-nationally log multiplicative row + col effect associations I (Xie, model (R+C)_x) yamaRpCx <- update(yamaDiag, ~ . + Mult(Father:Cscore + Rscore:Son, Exp(Country))) LRstats(yamaRpCx) mosaic(yamaRpCx, ~Country + Son + Father, condvars="Country", labeling_args=largs, gp=shading_Friendly, main="Model (R+C)x: log multiplicative (Fj + iS) : Country") # cross-nationally homogeneous row and col effect associations II (Xie, model RC_o) yamaRCo <- update(yamaDiag, ~ . + Mult(Father,Son)) LRstats(yamaRCo) mosaic(yamaRCo, ~Country + Son + Father, condvars="Country", labeling_args=largs, gp=shading_Friendly, main="Model RCo: homogeneous RC(1)") # cross-nationally log multiplicative row and col effect associations II (Xie, model RC_x) yamaRCx <- update(yamaDiag, ~ . + Mult(Father,Son, Exp(Country))) LRstats(yamaRCx) mosaic(yamaRCx, ~Country + Son + Father, condvars="Country", labeling_args=largs, gp=shading_Friendly, main="Model RCx: log multiplicative RC(1) : Country") # cross-nationally homogeneous full two-way RxC association (Xie, model FI_o) yamaFIo <- update(yamaDiag, ~ . + Father:Son) LRstats(yamaFIo) # cross-nationally log multiplicative full two-way RxC association (Xie, model FI_x) yamaFIx <- update(yamaDiag, ~ . + Mult(Father:Son, Exp(Country))) LRstats(yamaFIx) # compare models models <- glmlist(yamaNull, yamaDiag, yamaRo, yamaRx, yamaCo, yamaCx, yamaRpCo, yamaRpCx, yamaRCo, yamaRCx, yamaFIo, yamaFIx) LRstats(models) # extract models sumaries, consider as factorial of RC model by layer model BIC <- matrix(LRstats(models)$BIC[-(1:2)], 5, 2, byrow=TRUE) dimnames(BIC) <- list("Father-Son model" = c("row eff", "col eff", "row+col", "RC(1)", "R:C"), "Country model" = c("homogeneous", "log multiplicative")) BIC matplot(BIC, type='b', xlab="Father-Son model", xaxt='n', pch=15:16, cex=1.5, cex.lab=1.5, main="Yamaguchi-Xie models: R:C model by Layer model Summary") axis(side=1, at=1:nrow(BIC), labels=rownames(BIC), cex.axis=1.2) text(5, BIC[5,], colnames(BIC), pos=2, col=1:2, cex=1.2) text(5, max(BIC[5,])+10, "Country model", pos=2, cex=1.3) AIC <- matrix(LRstats(models)$AIC[-(1:2)], 5, 2, byrow=TRUE) dimnames(AIC) <- list("Father-Son model" = c("row eff", "col eff", "row+col", "RC(1)", "R:C"), "Country model" = c("homogeneous", "log multiplicative")) AIC matplot(AIC, type='b', xlab="Father-Son model", xaxt='n', pch=15:16, cex=1.5, cex.lab=1.5, main="Yamaguchi-Xie models: R:C model by Layer model Summary") axis(side=1, at=1:nrow(AIC), labels=rownames(AIC), cex.axis=1.2) text(5, AIC[5,], colnames(AIC), pos=2, col=1:2, cex=1.2) text(5, max(AIC[5,])+10, "Country model", pos=2, cex=1.3) vcdExtra/data/0000755000176200001440000000000014422306401012737 5ustar liggesusersvcdExtra/data/Titanicp.rda0000644000176200001440000001574214422306401015213 0ustar liggesusersM$W}v RHa$B B|#G6X3IV]^AVDY7Y>R)*+ȌoUݷ-9U]STuA?Wރs7?Y{ C?}GfwOw?O?7D~/:?i}wܥVN}N;]۲Z9Sǘ_zܩ>ZRSKT?N͟6vmm1u.suܺ%b{+dlk%}9uKSl*{m>ܸ強߶vg\/KqXߖ=w/KҒqK}1Vw'5S_z:Ʒ-i˩mv箣]]m;շa:XKߥ\;M]K]=w/ے1{;;6F85U%}%-=}}3i:,v;w9KmXRoɽd[{-5rw1ox:3nKܘ=D;zM]Ͼ.ѫ:y;vw޽'o4~ѽ vmuZM 7swӲ]RN}mj%wmm]xipھhɕ;5&m[Lm?4sm\6'stf]lI[OoK#\9y8ʛKaj /K_ҦZ?X.lmKln-鷩,'sfkdivKlΜ}mڧs}>\5휖.Y[r].i˥vʎ)î}9͜g:vd /^1Ŀ񵴯u]gI̹%vk]z2{6{mmt΍]v9ky4ssߥ_No2ɽwOk~2= c/>nϟ0? '[cIX~g,IڸX>7?:)XHy~\9+ca,o0.:.lqelW87e~<^^r|?ou#p,_r<&Ov1Ǖ?l;e˷0ɗdoOj{Yvv)r<4Q^8wlʸʸs}~<+gw5,zq%׻[EƱOƟ^M{s]u9/8˯dk,e\u)yRdc!5[oyw22~nOoOKLH}i?2+k]4ׁ[K{woyI_3z$^׃`ֿqd?9_qD)K]7m]|ŜoiiWiO^ M}[YX&e<˸ǾkJcǏۣ~x}a{vvɓxoδKK:tүRɱξ%M)#'-ۣ}/2q,K; fYIܾmvŬIq9qX~Or󡴋7q?e4u,^__ ~;'o<Ǿjvr^ό-SJ\3{K)%^Iƙc9׋{>}۔Gc)(שJYM)|?S8'=s/xfX7vs{]a}n~J޸v9OƥSs{uv)i{q/i}w鱔q+q6o^I%թyǗ~q-!sde(02T~ORθ^8Ǔɱfc0+q=<#;g#~7ַ;/aWH}~N27:/я7.KI|/e9O}NOƏ=j,o3}q>߾ug}亓h,dYK]AW;Wwo?gc+ ljVcٟ'ٱ=r].}4[#[QK?o2$^+<9έZ=_mdKI\f?듭o~oϮz.ǓX7)8:^{'gJ\c{Xc|弤ݬ{v\s>)> %vK;jy/#G_|:,ק}v4!oƭ{_/J׶Gso\}x>ä^{N/~~cXscz>\ʸ\~џ7US/yN17\3q,-85^9ҾMٿGl׎˾l'cߍms鬒v_{wɹyRq"#e{׉~`C߲Xߍl~}y~O'Ki_zao

Ɇ82ˑzGR߱#.T~ǿy9h>7nwƿ>27sM;,j‘~?w۝s46g;^e'(v2 v=ޡws>~Zt톉5g8>k;sD9?osy_ׇoνáË}9te]myhS\}n75{u(.u<j*AWoՎv}}ORۼM9SKt:eߜ}ieZt磋~QӾtC\i(wZՋλvPRo\:{˾?OMn?}оSwg,{(z7rU}4eo:l7]}Wgzq/߷iUޓϞu]a\벧a,/KG޻N:UOsyqދ^kz俙}_~w߿`u~JƥG~tqƃ{'wJsO>x/| ܽ_~S=qcNf2d&L*3my9| }.r+}{g{g{gzFgzFg{FgĞ{FgĞ{FgzFgzFg{Fg䞑{Fg䞑{FgQzFgQzFgQ{FgԞQ{FgԞQ{FgzFgzFg4ɸᎎuzflf0[%͑Hs9i4G#͑HyvcdExtra/data/Hoyt.rda0000644000176200001440000000115314422306401014352 0ustar liggesusersu]kQO%^xDDDĦ*RS*fYE//)_P>o`Zؾ3gf3sIsR<(c&˄_(.dcJ"] ?֛Y2xh8B/ x ao_+s}z?upk3Ke}YJڕef?!>wUqV?n5&6I|eOGn'W%)k+Aj]8R| 3tscSx[sA>C^9*¯T^U=m~k_5WO)|d)=c]vpľGݟ/׹m;e??WuxޯB_}{ly\?4GߏSok;p9Z߼k!tc7|(W-={sQZN=\ .6uwݮ'9ZfYBEŠU k%<&+]{Ҏ;>%/R,Ņvp<ՏSӞ 9>?o!yϞ7s y>g&c^vcdExtra/data/Heart.rda0000644000176200001440000000036714422306401014500 0ustar liggesusers r0b```b`bfb H020piVĢf> !8U 0 _ B{1@":*L B@r\ bh8sSPP yvT(/?/M obL-5 K-ȁ832KRa|vR5΂$0=5/%?9nTrNb1(F%I`awvcdExtra/data/Hauser79.RData0000644000176200001440000000050414422306401015262 0ustar liggesusers r0b```b`fd`b2Y#s1Gbiqj%0T %j b ,@J}1外YP Q?_'f-3}*reMI, ,-1$et̄3c,X0+&9?< 3 @r& n0@Pu :n=:>@;@.Y:j^>}+A\|%T,BWC3u :hKME+3,`!XZB48`Ss˕XV tcvcdExtra/data/Caesar.rda0000644000176200001440000000037314422306400014627 0ustar liggesusersP 0 "nZť(k`Mt|$@֫.}9Yͷ mA0Xn8B+@Oo1C/ۄ;*G8 ,u3f}gz'#ZƨgդKw҇Z.34ԈGJPgQ)e~OX@;72+ uA,#ED6U(& R\g-Z}(ixҥ?UvcdExtra/data/Vote1980.RData0000644000176200001440000000052214422306401015112 0ustar liggesusersN@ ǝ%i"U0 1#L :@SzJi". 46J*P"_dǾs&Q8 @ 8 sY=_;K[hV1lF.?хNrTUV.KVN;ԋswXE63\KH#|" 6%UPeΐň1,X\N{Q?V|y2V&Z=/]@z䏑C@ޑG!v52P?>>UKiCr}R  ׂo &g > .F¢eCuȃ[Fm`'v~a_îص > {3r#7?Ğ|K0oOςG_B"O #N|ULJpZE/\ $DžՂDC~9qA~;3K?_F}] +_ÜH#R< M0:\ڣ_Q+~/>v><N/ $K9GE\xWO|Uy$7ȓWsvpODJSCAN"O Ofu >MMwoi!~{\iLWӯ`og?BͯS 5z]b@8uCp?o>O>̓wgu|}+áG@p9hEv_ٗHda|~GCt.q/ĸc`{O@sܫq{._Wy|NH~C y;wlx:#{̛}HKcNQs!_+TŜL'ME>uSϺ}Pƾ|KI|?i sJV|q+ckC{g杂 `zl5<ƚ>cl[M>>}DI7iCMpꐙ9q.ϔefA[ O۹m(Y6z=J 3i?@Oa0`.`Rc/^gƨ¹1 cC=1?GsFgjڌslX;d}S)*eg1?>)FTgn=.*_Kni)Eb{Gn9cI3v}&zf~*nNu{!Uz/G\tgt=wT{]g|xLBsνv`cx@@~TUsSxwݐRxݿ{}j7U-N;~r<]޶֮V=֛}R_ޙK!V S3ߡ8q|d; w+vcdExtra/data/WorkerSat.RData0000644000176200001440000000040114422306401015570 0ustar liggesusersR=o@ uWJHfVVԭV71jZ?!S0wGٺϵu_j_{\i?ScKt>5eLW-15ޭ*g^WG?ƾRz2jtxG/oѾ5EvZ-Ϛ]5-eǯڸEֹ^G-^#Vy->hقiqkx4Lgޒ+ݺ5lӒ-5;&1{<{j^X _SQ+op_}{4Z=KzztӓNQk+ztM,G1}]5+kB<ޡe(#}ӯy)yMfGcjmOsױ1ʳ.k#ҡ&}Xnۼ/圥55[?|yǮ{k (巷׶(_Kqվ(^OG[+~yڲ׽|kgT #z 5ADoeFSm[+^e==/o,Z+`mLV#NgIӳEy`i<~5٥h[ljǞQ=0j5Xp6-a{ǫ&':ӢxZBϔ8;ӃlS;3 =wOw"V6Oo[lov?D6mw$/v}Y3mio|Gly@oͱ sq-\|Xpܼ={wWsn@7dO1ϙ69$LS[{ڿ<+{fMt rl=GX43#׹Ao_ws8! '!ZaX$c(S8f=]s)01Ѭ$u whRՆSdKJ'x{Ů1k94H;QZwOmM{n@'}S|Cڛc($c9M˕-6A*7$1~CEiC#" ]I?j$~}E2v̓ɨR2vtDe~ ڀ\HiC;δ#)w$cAtn:b ? &bhV)g%DyKy-Q2%׃4Glj}`Iu +ɘɵ^tC<%uCm?3ؔJ{@:SCfFw.Nd3-&v䋅<ܒ'9,nqN4#9ԥ}?\+,_D7;~lKݓ;s)d4_rm9ousuSr^|4%?+M9;z';\\=з˳:#/~ ˋxb~rb1w~7Wk|~/ir_?r#^ҏ3WvcdExtra/data/DaytonSurvey.RData0000644000176200001440000000056314422306400016333 0ustar liggesusersTKN05N *$.%bQ` %B) $F%'mĂ(#p#J)X3϶|Z!VRMհzS>$]%X*(TuTلHY[*Z,B 8DQ3/RYsCNʦi m^TOLUYV޲rȯ Yg/jhjL ֒"3MnrF~9~/㽛ƯF>1׳'խ7A<[;T dq0[,.y'||20`E M[W<M)]4{e4;4Mab c3vcdExtra/data/JobSat.rda0000644000176200001440000000036514422306401014615 0ustar liggesusers ]= 0jC-v$XX#6_ Emɛyr6'yj۪Kj`ֶg@ Y'dᙥ=oX {}1HPN峠,)*],r?Ҟ7ܝ:?rbX9qdžR\ac\I~?jeIIiL)U.,U%%;<XM,vcdExtra/data/Heckman.rda0000644000176200001440000000040014422306401014767 0ustar liggesusers r0b```b`bf H020piv<f> Wj883@h(ՠ|(m)a@JPZJs@i( 5?9h뷀h@szGPsJf.bVP(b`@y@)aZvcdExtra/data/Cancer.rda0000644000176200001440000000033214422306400014617 0ustar liggesusers]PM0 - I4&P1Ń* !#IgM׏e~H 9+`&.^B[ki3>`+cNxt9ͶUll-&V3w\_e\\ MP.DgE7rCĶ5f'ўmk2a\zI./RLlFvcdExtra/data/Abortion.rda0000644000176200001440000000033114422306400015200 0ustar liggesusers r0b```b`bfb H020piǤ<f>!5 tNy@U@59%3H`g13sSRPP y6ĜT(FVcyd1G HY  (-$.-(X<"X 3j1kIb@vvcdExtra/data/Detergent.rda0000644000176200001440000000044114422306400015346 0ustar liggesusers]RMK@dj#=(Jmm~a<@M&}Q͛%σ܏|"rY\rzW "5ipAL{|~ ~gۦ~o+` 8>F:qW=4Q%S΍}|F f?->/},󠯥Ϣ& ulYn'Ա5o V[x^9f-fHpgY-j.?0MoyX˼`V!?׼O ZkvcdExtra/data/Vietnam.rda0000644000176200001440000000055014422306401015032 0ustar liggesusers ͓JA7{{ XYH K;5Dl HmP8sqoo67#fOQ!r$ b\gBf!."nGf-SezZ<-5Lġsa?Ma'K 6xw-KTOM3ڨVg- +o¨0؈;XNL?OuHhep|i.:e@: A@wW-y ܁% n=!x"Gp &W$}=tcϭR=$) )4]:N\ͦ-myм 5B vcdExtra/data/Bartlett.rda0000644000176200001440000000032714422306400015211 0ustar liggesusers r0b```b`bfb H020piĢԒf>!BPixBC TS2s/ v:,/17(% U+U gu,KrX\RS0ClEyh X|b`ӂ3JМ &a ̅|RK2$$aX Q!PvcdExtra/data/Accident.RData0000644000176200001440000000107414422306400015367 0ustar liggesusersUn@=k{! G9.$hJ- 4cO-(!$Z*| ih%@3ǰ (ř;w>ffւ@.G9 T)wx^a{)(y!Vb諳"r{:ٴ\sZ+'{J)I@]Y+QX*%QE % Zol;z.H6LueZڔ*fuMH1JW쫷3szTk."}+$n?j['e6D͙1/8AƉ'PDyR 6jO 1m>5)qYW)o[`2i?qh~gȱ4iW{Fy\J)aVo{-,aO=/~v0~| ƨR<;[nWv~݌U>JuKn|}0vTx ׫W'LL´_ۅ _0L~jQ=v6y+鹒*:7ƜN#>i^rF' ;(>*گᑮjApsZmˤfoq.bxxȈ3iMa'/|8u[\i KaoŽ{I,L?o[kTM8/ KG5q&oX1h.t&\DnpPoWGjatF/Ȳt-h [%I Zy[x]t5*L77;~i7PS~kǸ>yf7?3b9"8)݋)n1vf ;[iGa0ΑQp v| ̂$x|Lk0DWXeIqs0Iq3/y gyY~@svxEWkvO2sR fJS,ea e=U&;Gy%ςSaS\=4s̏#̗ރt>˴reYWy<ʹɽ#{ GY扬}A}Wy>2Ӕ1@RexmOh꡽i4}=L[;2JdϤc~:Dn;h(w'cy }:mQgi}k1h Gv-XFxq: e1P;1g8랤%җ&ihM\vD[4g7t+u췳,0+;gN6*yi}2xν$i 3+;?D6k}s~zR^?^GT`vd{Of,'5Fen{6?^Ӝ!Ǹ S݌O0Ҿg|d<.Tv $qmKC2_\ak~t-9Ttn *M~C Q仕ZG%k{/kuu5uglTG=wgˆNGEesԋ0z6a+ ݆m5$)PtuWkW=׌VzɩIRe_vnfuŲb-FQab\N]?Qgg;;65nPgnĢ0?[2 ?ˠKj!]E6cyl*G: *yOJoW$"O)̫W/GE];~sej0 {L y.v\*+抯Q3pVRS#jT*QHY*{ٸ]LnTΚJxΕژgRM'NjLR"G B:Y6VaBL gJJ1R(˗cLZ*s'K Tqm89خNSͰ"kWvy!YWK4nnoќ___i՜__K4rPΞלQ9xLsΥiΙB9(#ܨ%:朽6)9#'i+*k-]o @@Cꈍ>N3,x<`i0 43;[v^17gf; @;0w!pd~q,x| 4`/ v&&i6jN` O$@̋[J/ל__lmL<' ] 48C@w#ހ3mgv?#1=?IOvcdExtra/data/Yamaguchi87.RData0000644000176200001440000000100114422306401015732 0ustar liggesusers 픿/AqB$ZJD!h8sDY+h$F FT* JRrfmVBrf&x11iL+Q)jgxr-`LsKVKiIg;h[溙^Y-F8jET\7rn܎A.2`"W;"Oؖ٧輡bPҕϧO}/q]Ռ" Xs 7ĕ q}GV}QOCz[q~gǸ [XG2BEAİO g?k&))3,.X\F5Kg.))Wz[wWfjMKQc/9y ңFæ7Sn'uˀd

:"{0΋e^$YU R3}=R _f(Ӟ] t^*Tu`BQXφ lr=yV<)eV$MJԓq^Le!Z,TyF׊ǖ=q>@Q!:h2thъ]̶Fl4Ym]'||sWێw|h>;0>Cŗ¢#1l` ,L/\VF|xx=7;n=z 5n=(2U۰̹srѶ~:g!.ɺt-xsV Zb/NݖE6Ma8F}mtth$zHjhvu|d`}ܞjTީuB;yz\]cpa.pIp1pbᛳkE1&j2aa6 tD>K6x/Y#y8I%Aoqp<6?(|n8\x11sߥ>\llq}/_8{] 8}G0yZqIrys }?)GSeVK'| aqLBbgnޟ57"B>!E;$CA: vElk ݌ 8!5,а5k#"N:Oϙh [)|&pkmݾ404YO=qb~5Y_!X 7XG> _*&?f |#Η]A˒,Ū%{ާyqg(e;\cOeV.*1 ).Z$?ă"8Fu+qss~ I %"Vא?bp.vcdExtra/data/Donner.RData0000644000176200001440000000273414422306400015106 0ustar liggesusersWoE_I'v|@J RHH "MSi:{1 \rHPLng޼yo^^iqƜ2P/; q 05B"t*h4)o5.hJg:fѴ'5*X6f& c?JJm [W+%QCɽC鞘kql+Ņבf/Ee[MY.jF y$xΉ،ԗN ^Iodmӱ4#d&rBIay1:-t2=EEak^;#Ƣ}Qp6ntBđ#vJsx3"FW$2Ol?+Vns{&Kg;0?G4kq'`L E>g='y)98"c Ƴ~s,eϣeɳx?}Y~|~T4?L7{?7y1dW[߹|}\7| v]OΟy:H=>_fڻk1X^Op垱twI־<>d\{qouٓ&6#xidu`QלAbyO}$ḋStۇ Q [(B˻lY ,>\i;-g[JBtQ(8D3f$Fځ(^cY[:K^HS՘a䮵Su q^ͮqa0^FZw:rHO  YK6( ^]Ns9Lj`p<2dj`5ɎmQ [t'Mb ԍoۨ{f>pP/Y2W_|C]% 3G*Ecj k d2JF,KдUpSRq`6f,ak2NJ{{^rܪL$VT9KF=[%Ҩ׌/B]Y|PvVjqGMBz[UiySΙIHg >ILRI&?Ŗܕz.XvSVmx2zabv^}~cs`UIk& U$(dYIηTp/NZX =OX6 /"3aY/kY[-`N Wj3ۤB7ɜCd-R5[`x\mn{2S Vzm##Mu`SlG\EaKJ#B 2o6wH NwU\ U}UOZ'$W:J|V1NO86rvcdExtra/data/Dyke.rda0000644000176200001440000000043014422306400014317 0ustar liggesusers r0b```b`bfb H020p8.٩ |@C(8TL tւ{Bi4J@i(P? ړ3!Ҡ#TJG6SjF8a*6)@Y@AQs%zjA힟c3CY̑ Ch$fV:Xy9)PĔ̼t /k"!~ C.9'f#Y%I9 DvcdExtra/data/ShakeWords.RData0000644000176200001440000000104214422306401015723 0ustar liggesusersuOsQ')TEEUDڤ"H$4$e$$ŌUf쬬,x^%XXZى{wX|L2yrb

QɰUɘd3gkB [!kߚZsV2jMꟅp5I[rymC Q}fm4/f{9-^{ciTnt>/o؇{vcdExtra/data/Toxaemia.RData0000644000176200001440000000063614422306401015430 0ustar liggesusers唻JQg/Q EFd H@TD-E C<1M4RKQ|N#dȂZ|;̙s/;($H'4 HSC#NKZEcX:5#bDČH,]U奬֡F:Y-IJY%VBbŪNKp\vǿs6;c(,RfazCvR]w9+vrr\lHȃ P0e9nw@FʟsHr@k)=&q='d+=;?5:xYyp ,HG0x8vQ0isw*j2<nLD>1#! |,!{[KVQ`-"L541Ȍ!1 -n|?&uNVl2R]9߹fZm]+:U֝Y˴A6݈ab4?;`Pml&+.gp`P@wp#x7:`6xWz5Zv!ʥ7<5VSy̏>W :vcdExtra/data/CyclingDeaths.RData0000644000176200001440000000130314422306400016371 0ustar liggesusers aά]N$IJg$.I}8ZK{K$I$I$I$I$]$I$I$zZ͎Sif޹y=qĕ8.QDu뢝ٍ LapHdq[YH| &cHn 5׈G71q,r&~Y\m< 7(abX 8Ӹxgx/" _a84x@*@%@f@hvy=sP] ԁs ɜ xA ATZ T" U'U -M̃}Q@NibBXK3Q "& _p~^rriAbIf~TT-$#Ey90$&BT3(hvcdExtra/data/Gilby.rda0000644000176200001440000000046414422306400014500 0ustar liggesusers]N0/I[B! c&H0PMj:n<*T8Ē<$lHD.c ;8D{-|MqET,_"˷r× ԋKR'9x/a5ebi0ھW[(X0xR|r;B[X\JFWL|u3w6u4ʷͦ9eH ډ_ҫ<0wm_IgPsNZzeXamY9 C҉Zjk\sP'j#g,cmiGY.e7( @rlMh4=ӧ 'ءYK͢ސ!Q|&0t6Ӏ-וWn+RT\r' ϗ^ĩBwTo3Cr,9q2n_˴?ɦv.LDxGYW)"@l͡eɖ?`aYm!W4> Ef_mvcdExtra/data/Mice.RData0000644000176200001440000000044614422306401014535 0ustar liggesusersN0vPBAbĀڎ݊ baj±'/='w)CE/m]t$(!@*ė0Xd@trN\ Kv/ʜ}é"CE #逧$~K.IJ$P\\"wjbhf_pe \-GkpedltLmƻKWJl߹~i:727sOWj?ٺsGi+8/t~pN7Hsv4|^7ϸ5>v&?]8 ⴢ#6cL[Y`rwj=Ϩe-U-Ú @ k. Z67a.끻Pg1cjŬ=Fo?}BJ/͡^ l@9=Zi`==TA!D^G-.w,24skQy6lpTy;E ;x LWq vcdExtra/data/TV.rda0000644000176200001440000000107614422306401013764 0ustar liggesusers ]_LQϽ;3ںQEfe8Wnxs?v]':܇]&3~ >hRW)Gg m=r8wAA;ccڏ2,_'rF{C< o5{nx^z{5ؽ.sNL%{as;XLǻm߂ ؁'=6KyRaҤU |jطMG) FÖA7@7/M_D:;7ҳc~.b(?D dτK<-5Ԣ|gCv ^%Qն]R_(~Nf}Y!cvcdExtra/data/Burt.RData0000644000176200001440000000052514422306400014571 0ustar liggesusersTN0 HH,b`@ bƥgAy @Ȁ}w?7q 8Y:A7JMSMrvӪyE{! mU|(bvsNqSe7Єs,sLp<ց3e /Q0^&hZѻ@)JG4a#QXRMIڹ۪wed)vZIwzvh߱) nFP7O>wd/~H^(Q5 %]N?,Y|qASe\Jo<vcdExtra/data/HairEyePlace.RData0000644000176200001440000000052514422306401016151 0ustar liggesusers r0b```b`f`d`b2Y# 'H,rL ILN*i8$0Ck.-P3ҡPJ)@&ݓ+ t4Ԝt(]yj9P fԼP-Qw@x*T]]?\ R9 ҽP9OQP0@J3d)^$Ĭ (AUTZ ȳ8唦B٬>%PojJfi.LKbQ6T++L 05 `0A93K2Ra.vLJ-JIMC +`0{ f9ДavcdExtra/data/HospVisits.RData0000644000176200001440000000033014422306401015763 0ustar liggesusers r0b```b`fbd`b2Y# '/.,,)f``p:60VPZ JAi(&\ v (J8JsSNaȳ$VZXWaK-K-Blk 745h9j;X?L ήYB^F23yD.,HY,VveᏻE):H2 I{(S.8zXJ@(?k2kJs/t*cgli:MNH0ϭ%C19aLm+<{pgu:pG~UZRu.Zq vCg#>O3/+o;~ag~5ߔooEz+77777777777777qVvxa /?0{>?\>g2S:6?e޿{yW g ӳnq^偲Uy4:tRly土C>6a^K͸\HsSh娾{wXyU)u-cYszp枷x,GK]sI2τ:DVFO(چ9<3K께LTk"w;lIqu`x4w]CG;䒼~WkFTXY\P u;XTE 35D,QvIz;rFE3;.W̕Ueu:D'ߕ!?L;@ݪ.~>UB y.TxghGQѓAz.!q5~!W|R_igeMԡ}W{:\W/|Y :zEٯlԾ@Sdz[?ڋ wGѲ(vzua;:hw磺F߷y(C|&vEE3jPר3|D{֮+wiTCjGQ |wS: F3ڿgGwHt)H<$4#Dai'ZKl^MT%ˡ\r9& MZwY)ˑٚrximr9zjrY-]6*LN`*~Iu,˖%F1!œ)%. ivcdExtra/man/0000755000176200001440000000000014430460317012607 5ustar liggesusersvcdExtra/man/ShakeWords.Rd0000644000176200001440000000324214430460317015151 0ustar liggesusers\name{ShakeWords} \alias{ShakeWords} \docType{data} \title{ Shakespeare's Word Type Frequencies } \description{ This data set, from Efron and Thisted (1976), gives the number of distinct words types (\code{Freq}) of words that appeared exactly once, twice, etc. up to 100 times (\code{count}) in the complete works of Shakespeare. In these works, Shakespeare used 31,534 distinct words (types), comprising 884,647 words in total. Efron & Thisted used this data to ask the question, "How many words did Shakespeare know?" Put another way, suppose another new corpus of works Shakespeare were discovered, also with 884,647 words. How many new word types would appear? The answer to the main question involves contemplating an infinite number of such new corpora. } \usage{data(ShakeWords)} \format{ A data frame with 100 observations on the following 2 variables. \describe{ \item{\code{count}}{the number of times a word type appeared in Shakespeare's written works} \item{\code{Freq}}{the number of different words (types) appearing with this count.} } } \details{ In addition to the words that appear \code{1:100} times, there are 846 words that appear more than 100 times, not listed in this data set. } \source{ Bradley Efron and Ronald Thisted (1976). Estimating the Number of Unseen Species: How Many Words Did Shakespeare Know? \emph{Biometrika}, Vol. 63, No. 3, pp. 435-447, %\url{http://www.jstor.org/stable/2335721} } %\references{ %% ~~ possibly secondary sources and usages ~~ %} \examples{ data(ShakeWords) str(ShakeWords) plot(sqrt(Freq) ~ count, data=ShakeWords) } \keyword{datasets} \concept{one-way tables} vcdExtra/man/Toxaemia.Rd0000644000176200001440000000464414430460317014655 0ustar liggesusers\name{Toxaemia} \alias{Toxaemia} \docType{data} \title{Toxaemia Symptoms in Pregnancy} \description{ Brown et al (1983) gave these data on two signs of toxaemia, an abnormal condition during pregnancy characterized by high blood pressure (hypertension) and high levels of protein in the urine. If untreated, both the mother and baby are at risk of complications or death. The data frame \code{Toxaemia} represents 13384 expectant mothers in Bradford, England in their first pregnancy, who were also classified according to social class and the number of cigarettes smoked per day. } \usage{data(Toxaemia)} \format{ A data frame in frequency form representing a 5 x 3 x 2 x 2 contingency table, with 60 observations on the following 5 variables. \describe{ \item{\code{class}}{Social class of mother, a factor with levels \code{1} \code{2} \code{3} \code{4} \code{5}} \item{\code{smoke}}{Cigarettes smoked per day during pregnancy, a factor with levels \code{0} \code{1-19} \code{20+}} \item{\code{hyper}}{Hypertension level, a factor with levels \code{Low} \code{High}} \item{\code{urea}}{Protein urea level, a factor with levels \code{Low} \code{High}} \item{\code{Freq}}{frequency in each cell, a numeric vector} } } %\details{ %%% ~~ If necessary, more details than the __description__ above ~~ %} \source{ Brown, P. J., Stone, J. and Ord-Smith, C. (1983), Toxaemic signs during pregnancy. \emph{JRSS, Series C, Applied Statistics}, 32, 69-72 } \references{ Friendly, M. (2000), \emph{Visualizing Categorical Data}, SAS Institute, Cary, NC, Example 7.15. Friendly, M. and Meyer, D. (2016). \emph{Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data}. Boca Raton, FL: Chapman & Hall/CRC. \url{http://ddar.datavis.ca}. Example 10.10. } \examples{ data(Toxaemia) tox.tab <- xtabs(Freq ~ class + smoke + hyper + urea, Toxaemia) ftable(tox.tab, row.vars=1) # symptoms by smoking mosaic(~smoke + hyper + urea, data=tox.tab, shade=TRUE) # symptoms by social class mosaic(~class + hyper + urea, data=tox.tab, shade=TRUE) # predictors mosaic(~smoke + class, data=tox.tab, shade=TRUE) # responses mosaic(~hyper + urea, data=tox.tab, shade=TRUE) # log odds ratios for urea and hypertension, by class and smoke \dontrun{ LOR <-loddsratio(aperm(tox.tab)) LOR } } \keyword{datasets} \concept{loglinear models} vcdExtra/man/CyclingDeaths.Rd0000644000176200001440000000327014430460317015621 0ustar liggesusers\name{CyclingDeaths} \alias{CyclingDeaths} \docType{data} \title{ London Cycling Deaths } \description{ A data frame containing the number of deaths of cyclists in London from 2005 through 2012 in each fortnightly period. Aberdein & Spiegelhalter (2013) discuss these data in relation to the observation that six cyclists died in London between Nov. 5 and Nov. 13, 2013. } \usage{data(CyclingDeaths)} \format{ A data frame with 208 observations on the following 2 variables. \describe{ \item{\code{date}}{a Date} \item{\code{deaths}}{number of deaths, a numeric vector} } } %\details{ %%% ~~ If necessary, more details than the __description__ above ~~ %} \source{ \url{https://www.data.gov.uk/dataset/cb7ae6f0-4be6-4935-9277-47e5ce24a11f/road-safety-data}, STATS 19 data, 2005-2012, using the files \code{Casualty0512.csv} and \code{Accidents0512.csv} } \references{ Aberdein, Jody and Spiegelhalter, David (2013). Have London's roads become more dangerous for cyclists? \emph{Significance}, 10(6), 46--48. } \examples{ data(CyclingDeaths) plot(deaths ~ date, data=CyclingDeaths, type="h", lwd=3, ylab="Number of deaths", axes=FALSE) axis(1, at=seq(as.Date('2005-01-01'), by='years', length.out=9), labels=2005:2013) axis(2, at=0:3) # make a one-way frequency table CyclingDeaths.tab <- table(CyclingDeaths$deaths) gf <- goodfit(CyclingDeaths.tab) gf summary(gf) rootogram(gf, xlab="Number of Deaths") distplot(CyclingDeaths.tab) # prob of 6 or more deaths in one fortnight lambda <- gf$par$lambda ppois(5, lambda, lower.tail=FALSE) } \keyword{datasets} \concept{one-way tables} vcdExtra/man/Detergent.Rd0000644000176200001440000000336614430460317015027 0ustar liggesusers\name{Detergent} \Rdversion{1.1} \alias{Detergent} \docType{data} \title{Detergent preference data} \description{Cross-classification of a sample of 1008 consumers according to (a) the softness of the laundry water used, (b) previous use of detergent Brand M, (c) the temperature of laundry water used and (d) expressed preference for Brand X or Brand M in a blind trial.} \usage{ data(Detergent) } \format{ A 4-dimensional array resulting from cross-tabulating 4 variables for 1008 observations. The variable names and their levels are: \tabular{rll}{ No \tab Name \tab Levels \cr 1\tab \code{Temperature}\tab \code{"High", "Low"}\cr 2\tab \code{M_User}\tab \code{"Yes", "No"}\cr 3\tab \code{Preference}\tab \code{"Brand X", "Brand M"}\cr 4\tab \code{Water_softness}\tab \code{"Soft", "Medium", "Hard"}\cr } } %\details{ } \source{ % \cite{Fienberg:80 [p. 71]} Fienberg, S. E. (1980). \emph{The Analysis of Cross-Classified Categorical Data} Cambridge, MA: MIT Press, p. 71. } \references{ % \cite{RiesSmith:63} Ries, P. N. & Smith, H. (1963). The use of chi-square for preference testing in multidimensional problems. \emph{Chemical Engineering Progress}, 59, 39-43. } %\seealso{ } \examples{ data(Detergent) # basic mosaic plot mosaic(Detergent, shade=TRUE) require(MASS) (det.mod0 <- loglm(~ Preference + Temperature + M_User + Water_softness, data=Detergent)) # examine addition of two-way terms add1(det.mod0, ~ .^2, test="Chisq") # model for Preference as a response (det.mod1 <- loglm(~ Preference + (Temperature * M_User * Water_softness), data=Detergent)) mosaic(det.mod0) } \keyword{datasets} \concept{loglinear models} vcdExtra/man/seq_loglm.Rd0000644000176200001440000000737014422306403015063 0ustar liggesusers\name{seq_loglm} \alias{seq_loglm} %- Also NEED an '\alias' for EACH other topic documented here. \title{Sequential Loglinear Models for an N-way Table} \description{ This function takes an n-way contingency table and fits a series of sequential models to the 1-, 2-, ... n-way marginal tables, corresponding to a variety of types of loglinear models. } \usage{ seq_loglm(x, type = c("joint", "conditional", "mutual", "markov", "saturated"), marginals = 1:nf, vorder = 1:nf, k = NULL, prefix = "model", fitted = TRUE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a contingency table in array form, with optional category labels specified in the dimnames(x) attribute, or else a data.frame in frequency form, with the frequency variable named \code{"Freq"}. } \item{type}{type of sequential model to fit, a character string. One of \code{"joint"}, \code{"conditional"}, \code{"mutual"}, \code{"markov"}, or \code{"saturated"}. } \item{marginals}{which marginal sub-tables to fit? A vector of a (sub)set of the integers, \code{1:nf} where \code{nf} is the number of factors in the full n-way table. } \item{vorder}{order of variables, a permutation of the integers \code{1:nf}, used to reorder the variables in the original table for the purpose of fitting sequential marginal models. } \item{k}{conditioning variable(s) for \code{type} = \code{"joint"}, \code{"conditional"} or Markov chain order for \code{type} = \code{"markov"} } \item{prefix}{prefix used to give names to the sequential models} \item{fitted}{argument passed to \code{loglm} to store the fitted values in the model objects} \item{\dots}{other arguments, passed down} } \details{ Sequential marginal models for an n-way tables begin with the model of equal-probability for the one-way margin (equivalent to a \code{\link[stats]{chisq.test}}) and add successive variables one at a time in the order specified by \code{vorder}. All model types give the same result for the two-way margin, namely the test of independence for the first two factors. Sequential models of \emph{joint independence} (\code{type="joint"}) have a particularly simple interpretation, because they decompose the likelihood ratio test for the model of mutual independence in the full n-way table, and hence account for "total" association in terms of portions attributable to the conditional probabilities of each new variable, given all prior variables. } \value{ An object of class \code{"loglmlist"}, each of which is a class \code{"loglm"} object %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% ... } \references{ These functions were inspired by the original SAS implementation of mosaic displays, described in the \emph{User's Guide}, \url{http://www.datavis.ca/mosaics/mosaics.pdf} } \author{ Michael Friendly } \note{ One-way marginal tables are a bit of a problem here, because they cannot be fit directly using \code{\link[MASS]{loglm}}. The present version uses \code{\link[stats]{loglin}}, and repairs the result to look like a \code{loglm} object (sort of). } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{loglin-utilities}} for descriptions of sequential models, \code{\link{conditional}}, \code{\link{joint}}, \code{\link{mutual}}, \dots \code{\link{loglmlist}}, } \examples{ data(Titanic, package="datasets") # variables are in the order Class, Sex, Age, Survived tt <- seq_loglm(Titanic) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{models} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/Mobility.Rd0000644000176200001440000000266714430460317014701 0ustar liggesusers\name{Mobility} \Rdversion{1.1} \alias{Mobility} \docType{data} \title{Social Mobility data} \description{Data on social mobility, recording the occupational category of fathers and their sons. } \usage{data(Mobility)} \format{ A 2-dimensional array resulting from cross-tabulating 2 variables for 19912 observations. The variable names and their levels are: \tabular{rll}{ No \tab Name \tab Levels \cr 1\tab \code{Son's_Occupation}\tab \code{"UpNonMan", "LoNonMan", "UpManual", "LoManual", "Farm"}\cr 2\tab \code{Father's_Occupation}\tab \code{"UpNonMan", "LoNonMan", "UpManual", "LoManual", "Farm"}\cr } } %\details{ } \source{ Falguerolles, A. de and Mathieu, J. R. (1988). \emph{Proceedings of COMPSTAT 88}, Copenhagen, Denmark, Springer-Verlag. % \cite{FeathermanHauser:78} Featherman, D. L. and Hauser, R. M. Occupations and social mobility in the United States. \emph{Sociological Microjournal}, 12, Fiche 62. Copenhagen: Sociological Institute. } %\references{ %} \seealso{ \code{\link{Glass}}, \code{\link{Hauser79}}, \code{\link{Yamaguchi87}} for other examples of mobility data. } \examples{ data(Mobility) Mobility # independence model MASS::loglm(~Father_Occupation + Son_Occupation, data = Mobility) vcd::mosaic(Mobility, shade=TRUE, legend = FALSE) } \keyword{datasets} \concept{square tables} \concept{mobility tables} \concept{ordinal variables} vcdExtra/man/Draft1970table.Rd0000644000176200001440000000541714430460317015476 0ustar liggesusers\name{Draft1970table} \alias{Draft1970table} \docType{data} \title{ USA 1970 Draft Lottery Table } \description{ This data set gives the results of the 1970 US draft lottery, in the form of a frequency table. The rows are months of the year, Jan--Dec and columns give the number of days in that month which fall into each of three draft risk categories High, Medium, and Low, corresponding to the chances of being called to serve in the US army. } \usage{data(Draft1970table)} \format{ The format is: 'table' int [1:12, 1:3] 9 7 5 8 9 11 12 13 10 9 ... - attr(*, "dimnames")=List of 2 ..$ Month: chr [1:12] "Jan" "Feb" "Mar" "Apr" ... ..$ Risk : chr [1:3] "High" "Med" "Low" } \details{ The lottery numbers are divided into three categories of risk of being called for the draft -- High, Medium, and Low -- each representing roughly one third of the days in a year. Those birthdays having the highest risk have lottery numbers 1-122, medium risk have numbers 123-244, and the lowest risk category contains lottery numbers 245-366. } \source{ This data is available in several forms, but the table version was obtained from \url{https://sas.uwaterloo.ca/~rwoldfor/software/eikosograms/data/draft-70} } \references{ Fienberg, S. E. (1971), "Randomization and Social Affairs: The 1970 Draft Lottery," \emph{Science}, 171, 255-261. Starr, N. (1997). Nonrandom Risk: The 1970 Draft Lottery, \emph{Journal of Statistics Education}, v.5, n.2 \url{https://jse.amstat.org/v5n2/datasets.starr.html} } \seealso{\code{\link{Draft1970}} } \examples{ data(Draft1970table) chisq.test(Draft1970table) # plot.table -> graphics:::mosaicplot plot(Draft1970table, shade=TRUE) mosaic(Draft1970table, gp=shading_Friendly) # correspondence analysis if(require(ca)) { ca(Draft1970table) plot(ca(Draft1970table)) } # convert to a frequency data frame with ordered factors Draft1970df <- as.data.frame(Draft1970table) Draft1970df <- within(Draft1970df, { Month <- ordered(Month) Risk <- ordered(Risk, levels=rev(levels(Risk))) }) str(Draft1970df) # similar model, as a Poisson GLM indep <- glm(Freq ~ Month + Risk, family = poisson, data = Draft1970df) mosaic(indep, residuals_type="rstandard", gp=shading_Friendly) # numeric scores for tests of ordinal factors Cscore <- as.numeric(Draft1970df$Risk) Rscore <- as.numeric(Draft1970df$Month) # linear x linear association between Month and Risk linlin <- glm(Freq ~ Month + Risk + Rscore:Cscore, family = poisson, data = Draft1970df) # compare models anova(indep, linlin, test="Chisq") mosaic(linlin, residuals_type="rstandard", gp=shading_Friendly) } \keyword{datasets} \concept{correspondence analysis} \concept{generalized linear models} \concept{ordinal variables} vcdExtra/man/print.Kappa.Rd0000644000176200001440000000227214430460317015270 0ustar liggesusers\name{print.Kappa} \alias{print.Kappa} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Print Kappa } \description{ This is a replacement for the \code{print.Kappa} method in \code{vcd}, adding display of \code{z} values to the \code{vcd} version and optional confidence intervals. } \usage{ \method{print}{Kappa}( x, digits=max(getOption("digits") - 3, 3), CI=FALSE, level=0.95, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A Kappa object} \item{digits}{number of digits to print} \item{CI}{Include confidence intervals in the display?} \item{level}{confidence level} \item{\dots}{ Other arguments } } %\details{ % ~~ If necessary, more details than the description above ~~ %} \value{ Returns the Kappa object, invisibly. } \author{ Michael Friendly} \seealso{ \code{\link[vcd]{confint.Kappa}} } \examples{ data("SexualFun") Kappa(SexualFun) print(Kappa(SexualFun), CI=TRUE) # stratified 3-way table apply(MSPatients, 3, Kappa) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{htest} \keyword{category} vcdExtra/man/Donner.Rd0000644000176200001440000001027214430460317014325 0ustar liggesusers \name{Donner} \alias{Donner} \docType{data} \title{ Survival in the Donner Party } \description{ This data frame contains information on the members of the Donner Party, a group of people who attempted to migrate to California in 1846. They were trapped by an early blizzard on the eastern side of the Sierra Nevada mountains, and before they could be rescued, nearly half of the party had died. What factors affected who lived and who died? } \usage{data(Donner)} \format{ A data frame with 90 observations on the following 5 variables. \describe{ \item{\code{family}}{family name, a factor with 10 levels } \item{\code{age}}{age of person, a numeric vector} \item{\code{sex}}{a factor with levels \code{Female} \code{Male}} \item{\code{survived}}{a numeric vector, 0 or 1} \item{\code{death}}{date of death for those who died before rescue, a POSIXct} } } \details{ This data frame uses the person's name as row labels. \code{family} reflects a recoding of the last names of individuals to reduce the number of factor levels. The main families in the Donner party were: Donner, Graves, Breen and Reed. The families of Murphy, Foster and Pike are grouped as \code{'MurFosPik'}, those of Fosdick and Wolfinger are coded as \code{'FosdWolf'}, and all others as \code{'Other'}. \code{survived} is the response variable. What kind of models should be used here? } \source{ D. K. Grayson, 1990, "Donner party deaths: A demographic assessment", \emph{J. Anthropological Research}, \bold{46}, 223-242. Johnson, K. (1996). \emph{Unfortunate Emigrants: Narratives of the Donner Party}. Logan, UT: Utah State University Press. Additions, and dates of death from \url{http://user.xmission.com/~octa/DonnerParty/Roster.htm}. } \references{ Ramsey, F.L. and Schafer, D.W. (2002). \emph{The Statistical Sleuth: A Course in Methods of Data Analysis}, (2nd ed), Duxbury. Friendly, M. and Meyer, D. (2016). \emph{Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data}. Boca Raton, FL: Chapman & Hall/CRC. \url{http://ddar.datavis.ca}. } \seealso{ \code{donner} in \pkg{alr3}, \code{\link[Sleuth2]{case2001}} in \pkg{Sleuth2}(adults only) provide similar data sets. } \examples{ # conditional density plots op <- par(mfrow=c(1,2), cex.lab=1.5) cdplot(factor(survived) ~ age, subset=sex=='Male', data=Donner, main="Donner party: Males", ylevels=2:1, ylab="Survived", yaxlabels=c("yes", "no")) with(Donner, rug(jitter(age[sex=="Male"]), col="white", quiet=TRUE)) cdplot(factor(survived) ~ age, subset=sex=='Female', data=Donner, main="Donner party: Females", ylevels=2:1, ylab="Survived", yaxlabels=c("yes", "no")) with(Donner, rug(jitter(age[sex=="Female"]), col="white", quiet=TRUE)) par(op) # fit some models (mod1 <- glm(survived ~ age + sex, data=Donner, family=binomial)) (mod2 <- glm(survived ~ age * sex, data=Donner, family=binomial)) anova(mod2, test="Chisq") (mod3 <- glm(survived ~ poly(age,2) * sex, data=Donner, family=binomial)) anova(mod3, test="Chisq") LRstats(glmlist(mod1, mod2, mod3)) # plot fitted probabilities from mod2 and mod3 # idea from: http://www.ling.upenn.edu/~joseff/rstudy/summer2010_ggplot2_intro.html library(ggplot2) # separate linear fits on age for M/F ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ x, alpha = 0.2, size=2, aes(fill = sex)) # separate quadratics ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ poly(x,2), alpha = 0.2, size=2, aes(fill = sex)) } \keyword{datasets} \concept{generalized linear models} \concept{logistic regression} vcdExtra/man/HairEyePlace.Rd0000644000176200001440000000336214430460317015375 0ustar liggesusers\name{HairEyePlace} \alias{HairEyePlace} \docType{data} \title{ Hair Color and Eye Color in Caithness and Aberdeen } \description{ A three-way frequency table crossing eye color and hair color in two places, Caithness and Aberdeen, Scotland. These data were of interest to Fisher (1940) and others because there are mixtures of people of Nordic, Celtic and Anglo-Saxon origin. One or both tables have been widely analyzed in conjunction with RC and canonical correlation models for categorical data, e.g., Becker and Clogg (1989). } \usage{data(HairEyePlace)} \format{ The format is: num [1:4, 1:5, 1:2] 326 688 343 98 38 116 84 48 241 584 ... - attr(*, "dimnames")=List of 3 ..$ Eye : chr [1:4] "Blue" "Light" "Medium" "Dark" ..$ Hair : chr [1:5] "Fair" "Red" "Medium" "Dark" ... ..$ Place: chr [1:2] "Caithness" "Aberdeen" } \details{ The hair and eye colors are ordered as in the original source, suggesting that they form ordered categories. } \source{ This data was taken from the \code{colors} data in \pkg{logmult}. } \references{ Becker, M. P., and Clogg, C. C. (1989). Analysis of Sets of Two-Way Contingency Tables Using Association Models. \emph{Journal of the American Statistical Association}, 84(405), 142-151. Fisher, R.A. (1940) The precision of discriminant functions. \emph{Annals of Eugenics}, 10, 422-429. } \examples{ data(HairEyePlace) # separate mosaics mosaic(HairEyePlace[,,1], shade=TRUE, main="Caithness") mosaic(HairEyePlace[,,2], shade=TRUE, main="Aberdeen") # condition on Place mosaic(~Hair + Eye |Place, data=HairEyePlace, shade=TRUE, legend=FALSE) cotabplot(~Hair+Eye|Place, data=HairEyePlace, shade=TRUE, legend=FALSE) } \keyword{datasets} \concept{ordinal variables} vcdExtra/man/Burt.Rd0000644000176200001440000000377214430460317014023 0ustar liggesusers\name{Burt} \alias{Burt} \docType{data} \title{ Burt (1950) Data on Hair, Eyes, Head and Stature } \description{ Cyril Burt (1950) gave these data, on a sample of 100 people from Liverpool, to illustrate the application of a method of factor analysis (later called multiple correspondence analysis) applied to categorical data. He presented these data initially in the form that has come to be called a "Burt table", giving the univariate and bivariate frequencies for an n-way frequency table. } \usage{data("Burt")} \format{ A frequency data frame (representing a 3 x 3 x 2 x 2 frequency table) with 36 cells on the following 5 variables. \describe{ \item{\code{Hair}}{hair color, a factor with levels \code{Fair} \code{Red} \code{Dark}} \item{\code{Eyes}}{eye color, a factor with levels \code{Light} \code{Mixed} \code{Dark}} \item{\code{Head}}{head shape, a factor with levels \code{Narrow} \code{Wide}} \item{\code{Stature}}{height, a factor with levels \code{Tall} \code{Short}} \item{\code{Freq}}{a numeric vector} } } \details{ Burt says: "In all, 217 individuals were examined, about two-thirds of them males. But, partly to simplify the calculations and partly because the later observations were rather more trustworthy, I shall here restrict my analysis to the data obtained from the last hundred males in the series." \code{Head} and \code{Stature} reflect a binary coding where people are classified according to whether they are below or above the average for the population. } \source{ Burt, C. (1950). The factorial analysis of qualitative data, \emph{British Journal of Statistical Psychology}, \bold{3}(3), 166-185. Table IX. } %\references{ %% ~~ possibly secondary sources and usages ~~ %} \examples{ data(Burt) mosaic(Freq ~ Hair + Eyes + Head + Stature, data=Burt, shade=TRUE) #or burt.tab <- xtabs(Freq ~ Hair + Eyes + Head + Stature, data=Burt) mosaic(burt.tab, shade=TRUE) } \keyword{datasets} \concept{correspondence analysis} vcdExtra/man/blogits.Rd0000644000176200001440000000535714430460317014553 0ustar liggesusers\name{blogits} \alias{blogits} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Bivariate Logits and Log Odds Ratio } \description{ This function calculates the log odds and log odds ratio for two binary responses classified by one or more stratifying variables. It is useful for plotting the results of bivariate logistic regression models, such as those fit using \code{\link[VGAM]{vglm}} in the \pkg{VGAM}. } \usage{ blogits(Y, add, colnames, row.vars, rev=FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{Y}{ A four-column matrix or data frame whose columns correspond to the 2 x 2 combinations of two binary responses. } \item{add}{ Constant added to all cells to allow for zero frequencies. The default is 0.5 if \code{any(Y)==0} and 0 otherwise. } \item{colnames}{ Names for the columns of the results. The default is \code{c("logit1", "logit2", "logOR")}. If less than three names are supplied, the remaining ones are filled in from the default. } \item{row.vars}{ A data frame or matrix giving the factor levels of one or more factors corresponding to the rows of \code{Y} } \item{rev}{A logical, indicating whether the order of the columns in \code{Y} should be reversed.} } \details{ For two binary variables with levels 0,1 the logits are calculated assuming the columns in \code{Y} are given in the order 11, 10, 01, 00, so the logits give the log odds of the 1 response compared to 0. If this is not the case, either use \code{rev=TRUE} or supply \code{Y[,4:1]} as the first argument. } \value{ A data frame with \code{nrow(Y)} rows and \code{3 + ncol(row.vars)} columns } \references{ Friendly, M. and Meyer, D. (2016). \emph{Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data}. Boca Raton, FL: Chapman & Hall/CRC. \url{http://ddar.datavis.ca}. } \author{ Michael Friendly } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[VGAM]{vglm}} } \examples{ data(Toxaemia) tox.tab <- xtabs(Freq~class + smoke + hyper + urea, Toxaemia) # reshape to 4-column matrix toxaemia <- t(matrix(aperm(tox.tab), 4, 15)) colnames(toxaemia) <- c("hu", "hU", "Hu", "HU") rowlabs <- expand.grid(smoke=c("0", "1-19", "20+"), class=factor(1:5)) toxaemia <- cbind(toxaemia, rowlabs) # logits for H and U logitsTox <- blogits(toxaemia[,4:1], add=0.5, colnames=c("logitH", "logitW"), row.vars=rowlabs) logitsTox } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/WorkerSat.Rd0000644000176200001440000000235114430460317015020 0ustar liggesusers\name{WorkerSat} \alias{WorkerSat} \docType{data} \title{Worker Satisfaction Data} \description{ Blue collar workers job satisfaction from large scale investigation in Denmark in 1968 (Andersen, 1991). } \usage{data("WorkerSat")} \format{ A frequency data frame with 8 observations on the following 4 variables, representing the 2 x 2 x 2 classification of 715 cases. \describe{ \item{\code{Manage}}{Quality of management, an ordered factor with levels \code{bad} < \code{good}} \item{\code{Super}}{Supervisor satisfaction, an ordered factor with levels \code{low} < \code{high}} \item{\code{Worker}}{Worker job satisfaction, an ordered factor with levels \code{low} < \code{high}} \item{\code{Freq}}{a numeric vector} } } %\details{ %%% ~~ If necessary, more details than the __description__ above ~~ %} \source{ Originally from \url{https://online.stat.psu.edu/stat504/lesson/10/} } \references{ Andersen, E. B. (1991) Statistical Analysis of Categorical Data, 2nd Ed., Springer-Verlag. } \examples{ data(WorkerSat) worker.tab <- xtabs(Freq ~ Worker + Super + Manage, data=WorkerSat) fourfold(worker.tab) mosaic(worker.tab, shade=TRUE) } \keyword{datasets} \concept{loglinear models} vcdExtra/man/Summarise.Rd0000644000176200001440000000630714422306403015045 0ustar liggesusers\name{Summarise} \alias{Summarise} \alias{Summarise.glmlist} \alias{Summarise.loglmlist} \alias{Summarise.default} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Brief Summary of Model Fit for glm and loglm Models } \description{ For \code{glm} objects, the \code{print} and \code{summary} methods give too much information if all one wants to see is a brief summary of model goodness of fit, and there is no easy way to display a compact comparison of model goodness of fit for a collection of models fit to the same data. All \code{loglm} models have equivalent glm forms, but the \code{print} and \code{summary} methods give quite different results. \code{Summarise} provides a brief summary for one or more models fit to the same dataset for which \code{logLik} and \code{nobs} methods exist (e.g., \code{glm} and \code{loglm} models). %This implementation is experimental, and is subject to change. } \usage{ Summarise(object, ...) \method{Summarise}{glmlist}(object, ..., saturated = NULL, sortby = NULL) \method{Summarise}{loglmlist}(object, ..., saturated = NULL, sortby = NULL) \method{Summarise}{default}(object, ..., saturated = NULL, sortby = NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ a fitted model object for which there exists a logLik method to extract the corresponding log-likelihood} \item{\dots}{ optionally more fitted model objects } \item{saturated}{ saturated model log likelihood reference value (use 0 if deviance is not available) } \item{sortby}{ either a numeric or character string specifying the column in the result by which the rows are sorted (in decreasing order)} } \details{ The function relies on residual degrees of freedom for the LR chisq test being available in the model object. This is true for objects inheriting from \code{lm}, \code{glm}, \code{loglm}, \code{polr} and \code{negbin}. } \value{ A data frame (also of class \code{anova}) with columns \code{c("AIC", "BIC", "LR Chisq", "Df", "Pr(>Chisq)")}. Row names are taken from the names of the model object(s). } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Achim Zeileis } %\note{ %% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[stats]{logLik}}, \code{\link[stats]{glm}}, \code{\link[MASS]{loglm}}, \code{\link{logLik.loglm}}, \code{\link{modFit}} } \examples{ data(Mental) indep <- glm(Freq ~ mental+ses, family = poisson, data = Mental) Summarise(indep) Cscore <- as.numeric(Mental$ses) Rscore <- as.numeric(Mental$mental) coleff <- glm(Freq ~ mental + ses + Rscore:ses, family = poisson, data = Mental) roweff <- glm(Freq ~ mental + ses + mental:Cscore, family = poisson, data = Mental) linlin <- glm(Freq ~ mental + ses + Rscore:Cscore, family = poisson, data = Mental) # compare models Summarise(indep, coleff, roweff, linlin) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{models} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/HouseTasks.Rd0000644000176200001440000000430114430460317015165 0ustar liggesusers\name{HouseTasks} \alias{HouseTasks} \docType{data} \title{ Household Tasks Performed by Husbands and Wives } \description{ A 13 x 4 table of frequencies of household tasks performed by couples, either by the `Husband`, `Wife`, `Alternating` or `Jointly`. } \usage{data("HouseTasks")} \format{ The format is: 'table' int [1:13, 1:4] 36 11 24 51 13 1 1 14 20 46 ... - attr(*, "dimnames")=List of 2 ..$ Task: chr [1:13] "Breakfast" "Dinner" "Dishes" "Driving" ... ..$ Who : chr [1:4] "Alternating" "Husband" "Jointly" "Wife" } %\details{ %% ~~ If necessary, more details than the __description__ above ~~ %} \source{ This data set was taken from \code{\link[factoextra]{housetasks}}, a 13 x 4 data.frame. In this table version, the rows and columns were sorted alphabetically (and a typo was corrected). } %\references{ %% ~~ possibly secondary sources and usages ~~ %} \examples{ data(HouseTasks) ## maybe str(HouseTasks) chisq.test(HouseTasks) # mosaic plot, illustrating some tweaks to handle overlapping labels require(vcd) mosaic(HouseTasks, shade = TRUE, labeling = labeling_border(rot_labels = c(45,0, 0, 0), offset_label =c(.5,5,0, 0), varnames = c(FALSE, TRUE), just_labels=c("center","right"), tl_varnames = FALSE), legend = FALSE) # use seriation package to permute rows & cols using correspondence analysis if(require(seriation)) { order <- seriate(HouseTasks, method = "CA") # the permuted row and column labels rownames(HouseTasks)[order[[1]]] colnames(HouseTasks)[order[[2]]] # do the permutation HT_perm <- permute(HouseTasks, order, margin=1) mosaic(HT_perm, shade = TRUE, labeling = labeling_border(rot_labels = c(45,0, 0, 0), offset_label =c(.5,5,0, 0), varnames = c(FALSE, TRUE), just_labels=c("center","right"), tl_varnames = FALSE), legend = FALSE) } } \keyword{datasets} \concept{correspondence analysis} \concept{reordering values} vcdExtra/man/split3d.Rd0000644000176200001440000000605414430460317014465 0ustar liggesusers\name{split3d} \Rdversion{1.1} \alias{split3d} \alias{split3d.shape3d} \alias{split3d.list} \alias{range3d} \alias{center3d} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Subdivide a 3D Object } \description{ Subdivides a \code{shape3d} object or a list of \code{shape3d} objects into objects of the same shape along a given dimension according to the proportions or frequencies specified in vector(s). \code{split3d} is the basic workhorse used in \code{\link{mosaic3d}}, but may be useful in other contexts. \code{range3d} and \code{center3d} are utility functions, also useful in other contexts. } \usage{ split3d(obj, ...) \method{split3d}{shape3d}(obj, p, dim, space = 0.1, ...) \method{split3d}{list}(obj, p, dim, space = 0.1, ...) range3d(obj) center3d(obj) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{obj}{ A \code{shape3d} object, or a list composed of them } \item{\dots}{ Other arguments for split3d methods } \item{p}{ For a single \code{shade3d} object, a vector of proportions (or a vector of non-negative numbers which will be normed to proportions) indicating the number of subdivisions and their scaling along dimension \code{dim}. For a list of \code{shade3d} objects, a matrix whose columns indicate the subdivisions of each object. } \item{dim}{ The dimension along which the object is to be subdivided. Either an integer: 1, 2, or 3, or a character: "x", "y", or "z". } \item{space}{ The total space used to separate the copies of the object along dimension \code{dim}. The unit inter-object space is therefore \code{space/(length(p)-1)}. } } \details{ The resulting list of \code{shape3d} objects is actually composed of \emph{copies} of the input object(s), scaled according to the proportions in \code{p} and then translated to make their range along the splitting dimension equal to that of the input object(s). } \value{ \code{split3d} returns a list of \code{shape3d} objects. \code{range3d} returns a 2 x 3 matrix, whose first row contains the minima on dimensions x, y, z, and whose second row contains the maxima. \code{center3d} returns a numeric vector containing the means of the minima and maxima on dimensions x, y, z. } \author{ Duncan Murdoch, with refinements by Michael Friendly } %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{mosaic3d}} \code{\link[rgl]{shapelist3d}} for the plotting of lists of \code{shape3d} objects. } \examples{ if (require(rgl)) { open3d() cube <- cube3d(alpha=0.4) sl1 <- split3d(cube, c(.2, .3, .5), 1) col <- c("#FF000080", "#E5E5E580", "#0000FF80") shapelist3d(sl1, col=col) open3d() p <- matrix(c(.6, .4, .5, .5, .2, .8), nrow=2) sl2 <- split3d(sl1, p, 2) shapelist3d(sl2, col=col) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{dplot} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/Caesar.Rd0000644000176200001440000000515014430460317014275 0ustar liggesusers\name{Caesar} \alias{Caesar} \docType{data} \title{Risk Factors for Infection in Caesarian Births} \description{Data from infection from birth by Caesarian section, classified by \code{Risk} (two levels), whether \code{Antibiotics} were used (two levels) and whether the Caesarian section was \code{Planned} or not. The outcome is \code{Infection} (three levels).} \usage{ data(Caesar) } \format{ A 4-dimensional array resulting from cross-tabulating 4 variables for 251 observations. The variable names and their levels are: \tabular{rll}{ No \tab Name \tab Levels \cr 1\tab \code{Infection}\tab \code{"Type 1", "Type 2", "None"}\cr 2\tab \code{Risk}\tab \code{"Yes", "No"} (presence of risk factors)\cr 3\tab \code{Antibiotics}\tab \code{"Yes", "No"} (were antibiotics given?)\cr 4\tab \code{Planned}\tab \code{"Yes", "No"} (was the C section planned?)\cr } } \details{ \code{Infection} is regarded as the response variable here. There are quite a few 0 cells here, particularly when \code{Risk} is absent and the Caesarian section was unplanned. Should these be treated as structural or sampling zeros? } \source{ % \cite{Fahrmeir:94} Fahrmeir, L. & Tutz, G. (1994). Multivariate Statistical Modelling Based on Generalized Linear Models New York: Springer Verlag, Table 1.1. } %\references{ %} \seealso{\code{\link[Fahrmeir]{caesar}} for the same data recorded as a frequency data frame with other variables.} \examples{ data(Caesar) #display table; note that there are quite a few 0 cells structable(Caesar) require(MASS) # baseline model, Infection as response Caesar.mod0 <- loglm(~Infection + (Risk*Antibiotics*Planned), data=Caesar) # NB: Pearson chisq cannot be computed due to the 0 cells Caesar.mod0 mosaic(Caesar.mod0, main="Baseline model") # Illustrate handling structural zeros zeros <- 0+ (Caesar >0) zeros[1,,1,1] <- 1 structable(zeros) # fit model excluding possible structural zeros Caesar.mod0s <- loglm(~Infection + (Risk*Antibiotics*Planned), data=Caesar, start=zeros) Caesar.mod0s anova(Caesar.mod0, Caesar.mod0s, test="Chisq") mosaic (Caesar.mod0s) # what terms to add? add1(Caesar.mod0, ~.^2, test="Chisq") # add Association of Infection:Antibiotics Caesar.mod1 <- update(Caesar.mod0, ~ . + Infection:Antibiotics) anova(Caesar.mod0, Caesar.mod1, test="Chisq") mosaic(Caesar.mod1, gp=shading_Friendly, main="Adding Infection:Antibiotics") } \keyword{datasets} \concept{loglinear models} \concept{zero counts} vcdExtra/man/logseries.Rd0000644000176200001440000000670714430460317015104 0ustar liggesusers\name{logseries} \alias{Logseries} \alias{dlogseries} \alias{plogseries} \alias{qlogseries} \alias{rlogseries} %- Also NEED an '\alias' for EACH other topic documented here. \title{ The Logarithmic Series Distribution } \description{ The logarithmic series distribution is a long-tailed distribution introduced by Fisher etal. (1943) in connection with data on the abundance of individuals classified by species. These functions provide the density, distribution function, quantile function and random generation for the logarithmic series distribution with parameter \code{prob}. } \usage{ dlogseries(x, prob = 0.5, log = FALSE) plogseries(q, prob = 0.5, lower.tail = TRUE, log.p = FALSE) qlogseries(p, prob = 0.5, lower.tail = TRUE, log.p = FALSE, max.value = 10000) rlogseries(n, prob = 0.5) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x, q}{ vector of quantiles representing the number of events. } \item{prob}{ parameter for the distribution, \code{0 < prob < 1} } \item{log, log.p}{ logical; if TRUE, probabilities \code{p} are given as \code{log(p)} } \item{lower.tail}{ logical; if TRUE (default), probabilities are \eqn{P[X \le x]}{P[X <= x]}, otherwise, \eqn{P[X > x]}{P[X > x]}. } \item{p}{ vector of probabilities } \item{max.value}{ maximum value returned by \code{qlogseries} } \item{n}{ number of observations for \code{rlogseries} } } \details{ The logarithmic series distribution with \code{prob} = \eqn{p} has density \deqn{ p ( x ) = \alpha p^x / x } for \eqn{x = 1, 2, \dots}, where \eqn{\alpha= -1 / \log(1 - p)} and \eqn{0 < p <1}. Note that counts \code{x==2} cannot occur. } \value{ \code{dlogseries} gives the density, \code{plogseries} gives the distribution function, \code{qlogseries} gives the quantile function, and \code{rlogseries} generates random deviates. %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% ... } \references{ \url{https://en.wikipedia.org/wiki/Logarithmic_distribution} Fisher, R. A. and Corbet, A. S. and Williams, C. B. (1943). The relation between the number of species and the number of individuals \emph{Journal of Animal Ecology}, 12, 42-58. } \author{ Michael Friendly, using original code modified from the \code{gmlss.dist} package by Mikis Stasinopoulos. } %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[stats]{Distributions}}, ~~~ } \examples{ XL <-expand.grid(x=1:5, p=c(0.33, 0.66, 0.99)) lgs.df <- data.frame(XL, prob=dlogseries(XL[,"x"], XL[,"p"])) lgs.df$p = factor(lgs.df$p) str(lgs.df) require(lattice) mycol <- palette()[2:4] xyplot( prob ~ x, data=lgs.df, groups=p, xlab=list('Number of events (k)', cex=1.25), ylab=list('Probability', cex=1.25), type='b', pch=15:17, lwd=2, cex=1.25, col=mycol, key = list( title = 'p', points = list(pch=15:17, col=mycol, cex=1.25), lines = list(lwd=2, col=mycol), text = list(levels(lgs.df$p)), x=0.9, y=0.98, corner=c(x=1, y=1) ) ) # random numbers hist(rlogseries(200, prob=.4), xlab='x') hist(rlogseries(200, prob=.8), xlab='x') } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{distribution} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/loglin-utilities.Rd0000644000176200001440000001615414430460317016402 0ustar liggesusers\name{loglin-utilities} \alias{loglin-utilities} \alias{conditional} \alias{joint} \alias{loglin2formula} \alias{loglin2string} \alias{markov} \alias{mutual} \alias{saturated} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Loglinear Model Utilities } \description{ These functions generate lists of terms to specify a loglinear model in a form compatible with \code{\link[stats]{loglin}} and also provide for conversion to an equivalent \code{\link[MASS]{loglm}} specification or a shorthand character string representation. They allow for a more conceptual way to specify such models by a function for their type, as opposed to just an uninterpreted list of model terms and also allow easy specification of marginal models for a given contingency table. They are intended to be used as tools in higher-level modeling and graphics functions, but can also be used directly. } \usage{ conditional(nf, table = NULL, factors = 1:nf, with = nf) joint(nf, table = NULL, factors = 1:nf, with = nf) markov(nf, factors = 1:nf, order = 1) mutual(nf, table = NULL, factors = 1:nf) saturated(nf, table = NULL, factors = 1:nf) loglin2formula(x, env = parent.frame()) loglin2string(x, brackets = c("[", "]"), sep = ",", collapse = " ", abbrev) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{nf}{ number of factors for which to generate the model } \item{table}{ a contingency table used only for factor names in the model, typically the output from \code{\link[base]{table}} and possibly permuted with \code{aperm} } \item{factors}{ names of factors used in the model formula when \code{table} is not specified } \item{with}{ For \code{joint} and \code{conditional} models, \code{with} gives the indices of the factors against which all others are considered jointly or conditionally independent } \item{order}{ For \code{markov}, this gives the order of the Markov chain model for the factors. An \code{order=1} Markov chain allows associations among sequential pairs of factors, e.g., \code{[A,B], [B,C], [C,D]} \dots. An \code{order=2} Markov chain allows associations among sequential triples. } \item{x}{ For the \code{loglin2*} functions, a list of terms in a loglinear model, such as returned by \code{conditional}, \code{joint}, \dots } \item{env}{ For \code{loglin2formula}, environment in which to evaluate the formula } \item{brackets}{ For \code{loglin2string}, characters to use to surround model terms. Either a single character string containing two characters (e.g., \code{'[]'} or a character vector of length two. } \item{sep}{ For \code{loglin2string}, the separator character string used for factor names within a given model term } \item{collapse}{ For \code{loglin2string}, the character string used between terms in the the model string } \item{abbrev}{ For \code{loglin2string}, whether and how to abbreviate the terms in the string representation. This has not yet been implemented. } } \details{ The main model specification functions, \code{conditional}, \code{joint}, \code{markov}, \dots, \code{saturated}, return a list of vectors indicating the marginal totals to be fit, via the \code{margin} argument to \code{\link[stats]{loglin}}. Each element of this list corresponds to a high-order term in a hierarchical loglinear model, where, e.g., a term like \code{c("A", "B")} is equivalent to the \code{\link[MASS]{loglm}} term \code{"A:B"} and hence automatically includes all low-order terms. Note that these can be used to supply the \code{expected} argument for the default \code{\link[vcd]{mosaic}} function, when the data is supplied as a contingency table. The table below shows some typical results in terms of the standard shorthand notation for loglinear models, with factors A, B, C, \dots, where brackets are used to delimit the high-order terms in the loglinear model. \tabular{llll}{ \strong{function} \tab \strong{3-way} \tab \strong{4-way} \tab \strong{5-way} \cr \code{mutual} \tab [A] [B] [C] \tab [A] [B] [C] [D] \tab [A] [B] [C] [D] [E] \cr \code{joint} \tab [AB] [C] \tab [ABC] [D] \tab [ABCE] [E] \cr \code{joint (with=1)} \tab [A] [BC] \tab [A] [BCD] \tab [A] [BCDE] \cr \code{conditional} \tab [AC] [BC] \tab [AD] [BD] [CD] \tab [AE] [BE] [CE] [DE] \cr \code{condit (with=1)} \tab [AB] [AC] \tab [AB] [AC] [AD] \tab [AB] [AC] [AD] [AE] \cr \code{markov (order=1)} \tab [AB] [BC] \tab [AB] [BC] [CD] \tab [AB] [BC] [CD] [DE] \cr \code{markov (order=2)} \tab [A] [B] [C] \tab [ABC] [BCD] \tab [ABC] [BCD] [CDE] \cr \code{saturated} \tab [ABC] \tab [ABCD] \tab [ABCDE] \cr } \code{loglin2formula} converts the output of one of these to a model formula suitable as the \code{formula} for of \code{\link[MASS]{loglm}}. \code{loglin2string} converts the output of one of these to a string describing the loglinear model in the shorthand bracket notation, e.g., \code{"[A,B] [A,C]"}. } \value{ For the main model specification functions, \code{conditional}, \code{joint}, \code{markov}, \dots, the result is a list of vectors (terms), where the elements in each vector are the names of the factors. The elements of the list are given names \code{term1, term2, \dots}. } \references{ These functions were inspired by the original SAS implementation of mosaic displays, described in the \emph{User's Guide}, \url{http://www.datavis.ca/mosaics/mosaics.pdf} } \author{ Michael Friendly } %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[stats]{loglin}}, \code{\link[MASS]{loglm}} } \examples{ joint(3, table=HairEyeColor) # as a formula or string loglin2formula(joint(3, table=HairEyeColor)) loglin2string(joint(3, table=HairEyeColor)) joint(2, HairEyeColor) # marginal model for [Hair] [Eye] # other possibilities joint(4, factors=letters, with=1) joint(5, factors=LETTERS) joint(5, factors=LETTERS, with=4:5) conditional(4) conditional(4, with=3:4) # use in mosaic displays or other strucplots mosaic(HairEyeColor, expected=joint(3)) mosaic(HairEyeColor, expected=conditional(3)) # use with MASS::loglm cond3 <- loglin2formula(conditional(3, table=HairEyeColor)) cond3 <- loglin2formula(conditional(3)) # same, with factors 1,2,3 require(MASS) loglm(cond3, data=HairEyeColor) saturated(3, HairEyeColor) loglin2formula(saturated(3, HairEyeColor)) loglin2string(saturated(3, HairEyeColor)) loglin2string(saturated(3, HairEyeColor), brackets='{}', sep=', ') } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{models} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/Glass.Rd0000644000176200001440000000450414430460317014152 0ustar liggesusers\name{Glass} \alias{Glass} \docType{data} \title{British Social Mobility from Glass(1954)} \description{ Glass(1954) gave this 5 x 5 table on the occupations of 3500 British fathers and their sons. } \usage{data("Glass")} \format{ A frequency data frame with 25 observations on the following 3 variables representing a 5 x 5 table with 3500 cases. \describe{ \item{\code{father}}{a factor with levels \code{Managerial} \code{Professional} \code{Skilled} \code{Supervisory} \code{Unskilled}} \item{\code{son}}{a factor with levels \code{Managerial} \code{Professional} \code{Skilled} \code{Supervisory} \code{Unskilled}} \item{\code{Freq}}{a numeric vector} } } \details{ The occupational categories in order of status are: (1) Professional & High Administrative (2) Managerial, Executive & High Supervisory (3) Low Inspectional & Supervisory (4) Routine Nonmanual & Skilled Manual (5) Semi- & Unskilled Manual However, to make the point that factors are ordered alphabetically by default, Friendly & Meyer (2016) introduce this data set in the form given here. } \source{ Glass, D. V. (1954), \emph{Social Mobility in Britain}. The Free Press. } \references{ Bishop, Y. M. M. and Fienberg, S. E. and Holland, P. W. (1975). \emph{Discrete Multivariate Analysis: Theory and Practice}, MIT Press. Friendly, M. and Meyer, D. (2016). \emph{Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data}. Boca Raton, FL: Chapman & Hall/CRC. \url{http://ddar.datavis.ca}. } \examples{ data(Glass) glass.tab <- xtabs(Freq ~ father + son, data=Glass) largs <- list(set_varnames=list(father="Father's Occupation", son="Son's Occupation"), abbreviate=10) gargs <- list(interpolate=c(1,2,4,8)) mosaic(glass.tab, shade=TRUE, labeling_args=largs, gp_args=gargs, main="Alphabetic order", legend=FALSE, rot_labels=c(20,90,0,70)) # reorder by status ord <- c(2, 1, 4, 3, 5) mosaic(glass.tab[ord, ord], shade=TRUE, labeling_args=largs, gp_args=gargs, main="Effect order", legend=FALSE, rot_labels=c(20,90,0,70)) } \keyword{datasets} \concept{square} \concept{mobility} \concept{reorder} \concept{square tables} \concept{mobility tables} \concept{reordering values} vcdExtra/man/AirCrash.Rd0000644000176200001440000000423414470737600014603 0ustar liggesusers\name{AirCrash} \alias{AirCrash} \docType{data} \title{ Air Crash Data } \description{ Data on all fatal commercial airplane crashes from 1993--2015. Excludes small planes (less than 6 passengers) and non-commercial (cargo, military, private) aircraft. } \usage{data("AirCrash")} \format{ A data frame with 439 observations on the following 5 variables. \describe{ \item{\code{Phase}}{phase of the flight, a factor with levels \code{en route} \code{landing} \code{standing} \code{take-off} \code{unknown}} \item{\code{Cause}}{a factor with levels \code{criminal} \code{human error} \code{mechanical} \code{unknown} \code{weather}} \item{\code{date}}{date of crash, a Date} \item{\code{Fatalities}}{number of fatalities, a numeric vector} \item{\code{Year}}{year, a numeric vector} } } \details{ \code{Phase} of the flight was cleaned by combining related variants, spelling, etc. } \source{ Originally from David McCandless, \url{https://informationisbeautiful.net/visualizations/plane-truth-every-single-commercial-plane-crash-visualized/}, with the data at \url{https://docs.google.com/spreadsheets/d/1OvDq4_BtbR6nSnnHnjD5hVC3HQ-ulZPGbo0RDGbzM3Q/edit?usp=drive_web}, downloaded April 14, 2015. } \references{ Rick Wicklin, \url{http://blogs.sas.com/content/iml/2015/03/30/visualizing-airline-crashes.html} } \examples{ data(AirCrash) aircrash.tab <- xtabs(~Phase + Cause, data=AirCrash) mosaic(aircrash.tab, shade=TRUE) # fix label overlap mosaic(aircrash.tab, shade=TRUE, labeling_args=list(rot_labels=c(30, 30, 30, 30))) # reorder by Phase phase.ord <- rev(c(3,4,1,2,5)) mosaic(aircrash.tab[phase.ord,], shade=TRUE, labeling_args=list(rot_labels=c(30, 30, 30, 30)), offset_varnames=0.5) # reorder by frequency phase.ord <- order(rowSums(aircrash.tab), decreasing=TRUE) cause.ord <- order(colSums(aircrash.tab), decreasing=TRUE) mosaic(aircrash.tab[phase.ord,cause.ord], shade=TRUE, labeling_args=list(rot_labels=c(30, 30, 30, 30))) library(ca) aircrash.ca <- ca(aircrash.tab) plot(aircrash.ca) } \keyword{datasets} \concept{reordering values} \concept{correspondence analysis} vcdExtra/man/Kway.Rd0000644000176200001440000001051714430460317014015 0ustar liggesusers\name{Kway} \Rdversion{1.1} \alias{Kway} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fit All K-way Models in a GLM } \description{ Generate and fit all 0-way, 1-way, 2-way, ... k-way terms in a glm. This function is designed mainly for hierarchical loglinear models (or \code{glm}s in the poisson family), where it is desired to find the highest-order terms necessary to achieve a satisfactory fit. Using \code{\link[stats]{anova}} on the resulting \code{\link{glmlist}} object will then give sequential tests of the pooled contributions of all terms of degree \eqn{k+1} over and above those of degree \eqn{k}. This function is also intended as an example of a generating function for \code{\link{glmlist}} objects, to facilitate model comparison, extraction, summary and plotting of model components, etc., perhaps using \code{lapply} or similar. } \usage{ Kway(formula, family=poisson, data, ..., order = nt, prefix = "kway") } %- maybe also 'usage' for other objects documented here. \arguments{ \item{formula}{ a two-sided formula for the 1-way effects in the model. The LHS should be the response, and the RHS should be the first-order terms connected by \code{+} signs. } \item{family}{ a description of the error distribution and link function to be used in the model. This can be a character string naming a family function, a family function or the result of a call to a family function. (See \code{\link[stats]{family}} for details of family functions.) } \item{data}{ an optional data frame, list or environment (or object coercible by \code{\link[base]{as.data.frame}} to a data frame) containing the variables in the model. If not found in data, the variables are taken from \code{environment(formula)}, typically the environment from which \code{glm} is called. } \item{\dots}{Other arguments passed to \code{glm}} \item{order}{Highest order interaction of the models generated. Defaults to the number of terms in the model formula.} \item{prefix}{Prefix used to label the models fit in the \code{glmlist} object.} } \details{ With \code{y} as the response in the \code{formula}, the 0-way (null) model is \code{y ~ 1}. The 1-way ("main effects") model is that specified in the \code{formula} argument. The k-way model is generated using the formula \code{. ~ .^k}. With the default \code{order = nt}, the final model is the saturated model. As presently written, the function requires a two-sided formula with an explicit response on the LHS. For frequency data in table form (e.g., produced by \code{xtabs}) you the \code{data} argument is coerced to a data.frame, so you should supply the \code{formula} in the form \code{Freq ~ } \dots. } \value{ An object of class \code{glmlist}, of length \code{order+1} containing the 0-way, 1-way, ... models up to degree \code{order}. } %\references{ %%% ~put references to the literature/web site here ~ %} \author{ Michael Friendly and Heather Turner } %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{glmlist}}, \code{\link{Summarise}} (soon to be deprecated), \code{\link{LRstats}} } \examples{ ## artificial data factors <- expand.grid(A=factor(1:3), B=factor(1:2), C=factor(1:3), D=factor(1:2)) Freq <- rpois(nrow(factors), lambda=40) df <- cbind(factors, Freq) mods3 <- Kway(Freq ~ A + B + C, data=df, family=poisson) LRstats(mods3) mods4 <- Kway(Freq ~ A + B + C + D, data=df, family=poisson) LRstats(mods4) # JobSatisfaction data data(JobSatisfaction, package="vcd") modSat <- Kway(Freq ~ management+supervisor+own, data=JobSatisfaction, family=poisson, prefix="JobSat") LRstats(modSat) anova(modSat, test="Chisq") # Rochdale data: very sparse, in table form data(Rochdale, package="vcd") \dontrun{ modRoch <- Kway(Freq~EconActive + Age + HusbandEmployed + Child + Education + HusbandEducation + Asian + HouseholdWorking, data=Rochdale, family=poisson) LRstats(modRoch) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{models} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/Hoyt.Rd0000644000176200001440000000664314430460317014032 0ustar liggesusers\name{Hoyt} \Rdversion{1.1} \alias{Hoyt} \docType{data} \title{Minnesota High School Graduates} \description{Minnesota high school graduates of June 1930 were classified with respect to (a) \code{Rank} by thirds in their graduating class, (b) post-high school \code{Status} in April 1939 (4 levels), (c) \code{Sex}, (d) father's \code{Occupation}al status (7 levels, from 1=High to 7=Low). The data were first presented by Hoyt et al. (1959) and have been analyzed by Fienberg(1980), Plackett(1974) and others. } \usage{ data(Hoyt) } \format{ A 4-dimensional array resulting from cross-tabulating 4 variables for 13968 observations. The variable names and their levels are: \tabular{rll}{ No \tab Name \tab Levels \cr 1\tab \code{Status}\tab \code{"College", "School", "Job", "Other"}\cr 2\tab \code{Rank}\tab \code{"Low", "Middle", "High"}\cr 3\tab \code{Occupation}\tab \code{"1", "2", "3", "4", "5", "6", "7"}\cr 4\tab \code{Sex}\tab \code{"Male", "Female"}\cr } } \details{Post high-school \code{Status} is natural to consider as the response. \code{Rank} and father's \code{Occupation} are ordinal variables.} \source{ % \cite{Hoyt-etal:59} % \cite{Fienberg:80 [pp.91-92]} % \cite{Plackett:74} % \cite{minn38{MASS}} Fienberg, S. E. (1980). \emph{The Analysis of Cross-Classified Categorical Data}. Cambridge, MA: MIT Press, p. 91-92. R. L. Plackett, (1974). \emph{The Analysis of Categorical Data}. London: Griffin. } \references{ Hoyt, C. J., Krishnaiah, P. R. and Torrance, E. P. (1959) Analysis of complex contingency tables, \emph{Journal of Experimental Education} 27, 187-194. } \seealso{ \code{\link[MASS]{minn38}} provides the same data as a data frame. } \examples{ data(Hoyt) # display the table structable(Status + Sex ~ Rank + Occupation, data=Hoyt) # mosaic for independence model plot(Hoyt, shade=TRUE) # examine all pairwise mosaics pairs(Hoyt, shade=TRUE) # collapse Status to College vs. Non-College Hoyt1 <- collapse.table(Hoyt, Status=c("College", rep("Non-College",3))) plot(Hoyt1, shade=TRUE) ################################################# # fitting models with loglm, plotting with mosaic ################################################# # fit baseline log-linear model for Status as response require(MASS) hoyt.mod0 <- loglm(~ Status + (Sex*Rank*Occupation), data=Hoyt1) hoyt.mod0 mosaic(hoyt.mod0, gp=shading_Friendly, main="Baseline model: Status + (Sex*Rank*Occ)") # add one-way association of Status with factors hoyt.mod1 <- loglm(~ Status * (Sex + Rank + Occupation) + (Sex*Rank*Occupation), data=Hoyt1) hoyt.mod1 mosaic(hoyt.mod1, gp=shading_Friendly, main="Status * (Sex + Rank + Occ)") # can we drop any terms? drop1(hoyt.mod1, test="Chisq") # assess model fit anova(hoyt.mod0, hoyt.mod1) # what terms to add? add1(hoyt.mod1, ~.^2, test="Chisq") # add interaction of Sex:Occupation on Status hoyt.mod2 <- update(hoyt.mod1, ~ . + Status:Sex:Occupation) mosaic(hoyt.mod2, gp=shading_Friendly, main="Adding Status:Sex:Occupation") # compare model fits anova(hoyt.mod0, hoyt.mod1, hoyt.mod2) # Alternatively, try stepwise analysis, heading toward the saturated model steps <- step(hoyt.mod0, direction="forward", scope=~Status*Sex*Rank*Occupation) # display anova steps$anova } \keyword{datasets} \concept{loglinear models} vcdExtra/man/Accident.Rd0000644000176200001440000000751714430460317014622 0ustar liggesusers\name{Accident} \alias{Accident} \docType{data} \title{ Traffic Accident Victims in France in 1958 } \description{ Bertin (1983) used these data to illustrate the cross-classification of data by numerous variables, each of which could have various types and could be assigned to various visual attributes. For modeling and visualization purposes, the data can be treated as a 4-way table using loglinear models and mosaic displays, or as a frequency-weighted data frame using a binomial response for \code{result} (\code{"Died"} vs. \code{"Injured"}) and plots of predicted probabilities. } \usage{data(Accident)} \format{ A data frame in frequency form (comprising a 5 x 2 x 4 x 2 table) with 80 observations on the following 5 variables. \describe{ \item{\code{age}}{an ordered factor with levels \code{0-9} < \code{10-19} < \code{20-29} < \code{30-49} < \code{50+}} \item{\code{result}}{a factor with levels \code{Died} \code{Injured}} \item{\code{mode}}{mode of transportation, a factor with levels \code{4-Wheeled} \code{Bicycle} \code{Motorcycle} \code{Pedestrian}} \item{\code{gender}}{a factor with levels \code{Female} \code{Male}} \item{\code{Freq}}{a numeric vector} } } \details{ \code{age} is an ordered factor, but arguably, \code{mode} should be treated as ordered, with levels \code{Pedestrian} < \code{Bicycle} < \code{Motorcycle} < \code{4-Wheeled} as Bertin does. This affects the parameterization in models, so we don't do this directly in the data frame. } \source{ Bertin (1983), p. 30; original data from the Ministere des Travaux Publics } \references{ Bertin, J. (1983), \emph{Semiology of Graphics}, University of Wisconsin Press. } \examples{ # examples data(Accident) head(Accident) # for graphs, reorder mode Accident$mode <- ordered(Accident$mode, levels=levels(Accident$mode)[c(4,2,3,1)]) # Bertin's table accident_tab <- xtabs(Freq ~ gender + mode + age + result, data=Accident) structable(mode + gender ~ age + result, data=accident_tab) ## Loglinear models ## ---------------- # mutual independence acc.mod0 <- glm(Freq ~ age + result + mode + gender, data=Accident, family=poisson) LRstats(acc.mod0) mosaic(acc.mod0, ~mode + age + gender + result) # result as a response acc.mod1 <- glm(Freq ~ age*mode*gender + result, data=Accident, family=poisson) LRstats(acc.mod1) mosaic(acc.mod1, ~mode + age + gender + result, labeling_args = list(abbreviate = c(gender=1, result=4))) # allow two-way association of result with each explanatory variable acc.mod2 <- glm(Freq ~ age*mode*gender + result*(age+mode+gender), data=Accident, family=poisson) LRstats(acc.mod2) mosaic(acc.mod2, ~mode + age + gender + result, labeling_args = list(abbreviate = c(gender=1, result=4))) acc.mods <- glmlist(acc.mod0, acc.mod1, acc.mod2) LRstats(acc.mods) ## Binomial (logistic regression) models for result ## ------------------------------------------------ library(car) # for Anova() acc.bin1 <- glm(result=='Died' ~ age + mode + gender, weights=Freq, data=Accident, family=binomial) Anova(acc.bin1) acc.bin2 <- glm(result=='Died' ~ (age + mode + gender)^2, weights=Freq, data=Accident, family=binomial) Anova(acc.bin2) acc.bin3 <- glm(result=='Died' ~ (age + mode + gender)^3, weights=Freq, data=Accident, family=binomial) Anova(acc.bin3) # compare models anova(acc.bin1, acc.bin2, acc.bin3, test="Chisq") # visualize probability of death with effect plots \dontrun{ library(effects) plot(allEffects(acc.bin1), ylab='Pr (Died)') plot(allEffects(acc.bin2), ylab='Pr (Died)') } #} \keyword{datasets} \concept{loglinear models} \concept{generalized linear models} \concept{logistic regression} vcdExtra/man/vcdExtra-package.Rd0000644000176200001440000001276514470437420016265 0ustar liggesusers\name{vcdExtra-package} \alias{vcdExtra-package} \alias{vcdExtra} \docType{package} \title{ Extensions and additions to vcd: Visualizing Categorical Data } \description{ % \if{html}{\figure{man/figures/logo.png}{options: align='right' alt='logo' width='100'}} This package provides additional data sets, documentation, and a few functions designed to extend the \code{vcd} package for Visualizing Categorical Data and the \code{gnm} package for Generalized Nonlinear Models. In particular, vcdExtra extends mosaic, assoc and sieve plots from vcd to handle glm() and gnm() models and adds a 3D version in \code{\link{mosaic3d}}. This package is also a support package for the book, \emph{Discrete Data Analysis with R} by Michael Friendly and David Meyer, Chapman & Hall/CRC, 2016, \url{https://www.routledge.com/Discrete-Data-Analysis-with-R-Visualization-and-Modeling-Techniques-for/Friendly-Meyer/9781498725835} with a number of additional data sets, and functions. The web site for the book is \url{http://ddar.datavis.ca}. In addition, I teach a course, \emph{Psy 6136: Categorical Data Analysis}, \url{https://friendly.github.io/psy6136/} using this package. } \details{ The main purpose of this package is to serve as a sandbox for introducing extensions of mosaic plots and related graphical methods that apply to loglinear models fitted using \code{glm()} and related, generalized nonlinear models fitted with \code{gnm()} in the \code{\link[gnm]{gnm-package}} package. A related purpose is to fill in some holes in the analysis of categorical data in R, not provided in base R, the \pkg{vcd}, or other commonly used packages. The method \code{\link{mosaic.glm}} extends the \code{\link[vcd]{mosaic.loglm}} method in the \pkg{vcd} package to this wider class of models. This method also works for the generalized nonlinear models fit with the \code{\link[gnm]{gnm-package}} package, including models for square tables and models with multiplicative associations. \code{\link{mosaic3d}} introduces a 3D generalization of mosaic displays using the \pkg{rgl} package. In addition, there are several new data sets, a tutorial vignette, \describe{ \item{vcd-tutorial}{Working with categorical data with R and the vcd package, \code{vignette("vcd-tutorial", package = "vcdExtra") }} } and a few functions for manipulating categorical data sets and working with models for categorical data. A new class, \code{\link{glmlist}}, is introduced for working with collections of \code{glm} objects, e.g., \code{\link{Kway}} for fitting all K-way models from a basic marginal model, and \code{\link{LRstats}} for brief statistical summaries of goodness-of-fit for a collection of models. For square tables with ordered factors, \code{\link{Crossings}} supplements the specification of terms in model formulas using \code{\link[gnm]{Symm}}, \code{\link[gnm]{Diag}}, \code{\link[gnm]{Topo}}, etc. in the \code{\link[gnm]{gnm-package}}. Some of these extensions may be migrated into \pkg{vcd} or \pkg{gnm}. A collection of demos is included to illustrate fitting and visualizing a wide variety of models: \describe{ \item{mental-glm}{Mental health data: mosaics for glm() and gnm() models} \item{occStatus}{Occupational status data: Compare mosaic using expected= to mosaic.glm} \item{ucb-glm}{UCBAdmissions data: Conditional independence via loglm() and glm()} \item{vision-quasi}{VisualAcuity data: Quasi- and Symmetry models} \item{yaish-unidiff}{Yaish data: Unidiff model for 3-way table} \item{Wong2-3}{Political views and support for women to work (U, R, C, R+C and RC(1) models)} \item{Wong3-1}{Political views, support for women to work and national welfare spending (3-way, marginal, and conditional independence models)} \item{housing}{Visualize glm(), multinom() and polr() models from \code{example(housing, package="MASS")}} } Use \code{ demo(package="vcdExtra")} for a complete current list. The \pkg{vcdExtra} package now contains a large number of data sets illustrating various forms of categorical data analysis and related visualizations, from simple to advanced. Use \code{data(package="vcdExtra")} for a complete list, or \code{datasets(package="vcdExtra")} for an annotated one showing the \code{class} and \code{dim} for each data set. } \author{ Michael Friendly Maintainer: Michael Friendly || (\href{https://orcid.org/0000-0002-3237-0941}{ORCID}) } \references{ Friendly, M. \emph{Visualizing Categorical Data}, Cary NC: SAS Institute, 2000. Web materials: \url{http://www.datavis.ca/books/vcd/}. Friendly, M. and Meyer, D. (2016). \emph{Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data}. Boca Raton, FL: Chapman & Hall/CRC. \url{http://ddar.datavis.ca}. Meyer, D.; Zeileis, A. & Hornik, K. The Strucplot Framework: Visualizing Multi-way Contingency Tables with vcd \emph{Journal of Statistical Software}, 2006, \bold{17}, 1-48. Available in R via \code{vignette("strucplot", package = "vcd")} Turner, H. and Firth, D. \emph{Generalized nonlinear models in R: An overview of the gnm package}, 2007, \url{http://eprints.ncrm.ac.uk/472/}. Available in R via \code{vignette("gnmOverview", package = "gnm")}. } \keyword{ package } \seealso{ \code{\link[gnm]{gnm-package}}, for an extended range of models for contingency tables \code{\link[vcd]{mosaic}} for details on mosaic displays within the strucplot framework. } \examples{ example(mosaic.glm) demo("mental-glm") } vcdExtra/man/JobSat.Rd0000644000176200001440000000226414430460317014264 0ustar liggesusers\name{JobSat} \Rdversion{1.1} \alias{JobSat} \docType{data} \title{Cross-classification of job satisfaction by income} \description{ This data set is a contingency table of job satisfaction by income for a small sample of black males from the 1996 General Social Survey, as used by Agresti (2002) for an example. } \usage{data(JobSat)} \format{ A 4 x 4 contingency table of \code{income} by \code{satisfaction}, with the following structure: \preformatted{ table [1:4, 1:4] 1 2 1 0 3 3 6 1 10 10 ... - attr(*, "dimnames")=List of 2 ..$ income : chr [1:4] "< 15k" "15-25k" "25-40k" "> 40k" ..$ satisfaction: chr [1:4] "VeryD" "LittleD" "ModerateS" "VeryS" } } \details{ Both \code{income} and \code{satisfaction} are ordinal variables, and are so ordered in the table. Measures of association, visualizations, and models should take ordinality into account. } \source{ Agresti, A. Categorical Data Analysis John Wiley & Sons, 2002, Table 2.8, p. 57. } %\references{ % ~~ possibly secondary sources and usages ~~ %} \examples{ data(JobSat) assocstats(JobSat) GKgamma(JobSat) } \keyword{datasets} \concept{loglinear models} \concept{ordinal variables} vcdExtra/man/PhdPubs.Rd0000644000176200001440000000405414430460317014446 0ustar liggesusers\name{PhdPubs} \alias{PhdPubs} \docType{data} \title{ Publications of PhD Candidates } \description{ A data set giving the number of publications by doctoral candidates in biochemistry in relation to various predictors, originally from Long (1997). There is a large number of zero counts. Is there evidence for a separate group of non-publishers? } \usage{data(PhdPubs)} \format{ A data frame with 915 observations on the following 6 variables. \describe{ \item{\code{articles}}{number of articles published in the final three years of PhD studies} \item{\code{female}}{dummy variable for gender, coded \code{1} for female} \item{\code{married}}{dummy variable for marital status, coded \code{1} for married} \item{\code{kid5}}{number of young children, age 5 and under} \item{\code{phdprestige}}{prestige of the PhD department. The higher the number the more prestigious the program. } \item{\code{mentor}}{number of publications by the mentor in the preceeding three years} } } \details{ In this version of the data set, \code{phdprestige} had been rounded to the nearest integer. A Stata version with the continuous values was subsequently found at \url{https://www.stata-press.com/data/lf2/couart2.dta} } \source{ Long, J. S. (1997). \emph{Regression Models for Categorical and Limited Dependent Variables}, Sage. Long, J. S. & Freese, J. (2006). \emph{Regression Models for Categorical Dependent Variables Using Stata}, 2nd Ed., Stata Press. } %\references{ %% ~~ possibly secondary sources and usages ~~ %} \examples{ data(PhdPubs) # very uninformative hist(PhdPubs$articles, breaks=0:19, col="pink", xlim=c(0,20), xlab="Number of Articles") library(vcd) rootogram(goodfit(PhdPubs$articles), xlab="Number of Articles") # compare with negative binomial rootogram(goodfit(PhdPubs$articles, type="nbinomial"), xlab="Number of Articles", main="Negative binomial") } \keyword{datasets} \concept{generalized linear models} \concept{zero counts} \concept{Poisson distributions} vcdExtra/man/collapse.table.Rd0000644000176200001440000000721214422306403015764 0ustar liggesusers\name{collapse.table} \alias{collapse.table} %- Also NEED an '\alias' for EACH other topic documented here. \title{Collapse Levels of a Table} \description{ Collapse (or re-label) variables in a a contingency table, array or \code{ftable} object by re-assigning levels of the table variables. } \usage{ collapse.table(table, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{table}{A \code{\link[base]{table}}, \code{\link[base]{array}} or \code{\link[stats]{ftable}} object} \item{\dots}{ A collection of one or more assignments of factors of the table to a list of levels } } \details{ Each of the \code{\dots} arguments must be of the form \code{variable = levels}, where \code{variable} is the name of one of the table dimensions, and \code{levels} is a character or numeric vector of length equal to the corresponding dimension of the table. } \value{ A \code{xtabs} and \code{table} object, representing the original table with one or more of its factors collapsed or rearranged into other levels. } %\references{ ~put references to the literature/web site here ~ } \author{Michael Friendly} %\note{ ~~further notes~~ % % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link{expand.dft}} expands a frequency data frame to case form. \code{\link[base]{margin.table}} "collapses" a table in a different way, by summing over table dimensions. } \examples{ # create some sample data in table form sex <- c("Male", "Female") age <- letters[1:6] education <- c("low", 'med', 'high') data <- expand.grid(sex=sex, age=age, education=education) counts <- rpois(36, 100) data <- cbind(data, counts) t1 <- xtabs(counts ~ sex + age + education, data=data) structable(t1) ## age a b c d e f ## sex education ## Male low 119 101 109 85 99 93 ## med 94 98 103 108 84 84 ## high 81 88 96 110 100 92 ## Female low 107 104 95 86 103 96 ## med 104 98 94 95 110 106 ## high 93 85 90 109 99 86 # collapse age to 3 levels t2 <- collapse.table(t1, age=c("A", "A", "B", "B", "C", "C")) structable(t2) ## age A B C ## sex education ## Male low 220 194 192 ## med 192 211 168 ## high 169 206 192 ## Female low 211 181 199 ## med 202 189 216 ## high 178 199 185 # collapse age to 3 levels and pool education: "low" and "med" to "low" t3 <- collapse.table(t1, age=c("A", "A", "B", "B", "C", "C"), education=c("low", "low", "high")) structable(t3) ## age A B C ## sex education ## Male low 412 405 360 ## high 169 206 192 ## Female low 413 370 415 ## high 178 199 185 # change labels for levels of education to 1:3 t4 <- collapse.table(t1, education=1:3) structable(t4) structable(t4) ## age a b c d e f ## sex education ## Male 1 119 101 109 85 99 93 ## 2 94 98 103 108 84 84 ## 3 81 88 96 110 100 92 ## Female 1 107 104 95 86 103 96 ## 2 104 98 94 95 110 106 ## 3 93 85 90 109 99 86 } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} \keyword{attribute}% __ONLY ONE__ keyword per line vcdExtra/man/DaytonSurvey.Rd0000644000176200001440000000530014430460317015550 0ustar liggesusers\name{DaytonSurvey} \alias{DaytonSurvey} \docType{data} \title{Dayton Student Survey on Substance Use} \description{ This data, from Agresti (2002), Table 9.1, gives the result of a 1992 survey in Dayton Ohio of 2276 high school seniors on whether they had ever used alcohol, cigarettes and marijuana. } \usage{data(DaytonSurvey)} \format{ A frequency data frame with 32 observations on the following 6 variables. \describe{ \item{\code{cigarette}}{a factor with levels \code{Yes} \code{No}} \item{\code{alcohol}}{a factor with levels \code{Yes} \code{No}} \item{\code{marijuana}}{a factor with levels \code{Yes} \code{No}} \item{\code{sex}}{a factor with levels \code{female} \code{male}} \item{\code{race}}{a factor with levels \code{white} \code{other}} \item{\code{Freq}}{a numeric vector} } } \details{ Agresti uses the letters G (\code{sex}), R (\code{race}), A (\code{alcohol}), C (\code{cigarette}), M (\code{marijuana}) to refer to the table variables, and this usage is followed in the examples below. Background variables include \code{sex} and \code{race} of the respondent (GR), typically treated as explanatory, so that any model for the full table should include the term \code{sex:race}. Models for the reduced table, collapsed over \code{sex} and \code{race} are not entirely unreasonable, but don't permit the estimation of the effects of these variables on the responses. The full 5-way table contains a number of cells with counts of 0 or 1, as well as many cells with large counts, and even the ACM table collapsed over GR has some small cell counts. Consequently, residuals for these models in mosaic displays are best represented as standardized (adjusted) residuals. } \source{ Agresti, A. (2002). \emph{Categorical Data Analysis}, 2nd Ed., New York: Wiley-Interscience, Table 9.1, p. 362. } \references{ Thompson, L. (2009). \emph{R (and S-PLUS) Manual to Accompany Agresti's Categorical Data}, http://www.stat.ufl.edu/~aa/cda/Thompson_manual.pdf } \examples{ data(DaytonSurvey) # mutual independence mod.0 <- glm(Freq ~ ., data=DaytonSurvey, family=poisson) # mutual independence + GR mod.GR <- glm(Freq ~ . + sex*race, data=DaytonSurvey, family=poisson) anova(mod.GR, test = "Chisq") # all two-way terms mod.all2way <- glm(Freq ~ .^2, data=DaytonSurvey, family=poisson) anova(mod.all2way, test = "Chisq") # compare models LRstats(mod.0, mod.GR, mod.all2way) # collapse over sex and race Dayton.ACM <- aggregate(Freq ~ cigarette+alcohol+marijuana, data=DaytonSurvey, FUN=sum) Dayton.ACM } \keyword{datasets} \concept{generalized linear models} vcdExtra/man/HLtest.Rd0000644000176200001440000000701314430460317014302 0ustar liggesusers\name{HLtest} \alias{HosmerLemeshow} \alias{HLtest} \alias{plot.HLtest} \alias{print.HLtest} \alias{rootogram.HLtest} \alias{summary.HLtest} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Hosmer-Lemeshow Goodness of Fit Test } \description{ The \code{HLtest} function computes the classical Hosmer-Lemeshow (1980) goodness of fit test for a binomial \code{glm} object in logistic regression The general idea is to assesses whether or not the observed event rates match expected event rates in subgroups of the model population. The Hosmer-Lemeshow test specifically identifies subgroups as the deciles of fitted event values, or other quantiles as determined by the \code{g} argument. Given these subgroups, a simple chisquare test on \code{g-2} df is used. In addition to \code{print} and \code{summary} methods, a \code{plot} method is supplied to visualize the discrepancies between observed and fitted frequencies. } \usage{ HosmerLemeshow(model, g = 10) HLtest(model, g = 10) \method{print}{HLtest}(x, ...) \method{summary}{HLtest}(object, ...) \method{plot}{HLtest}(x, ...) \method{rootogram}{HLtest}(x, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{model}{A \code{glm} model object in the \code{binomial} family} \item{g}{Number of groups used to partition the fitted values for the GOF test.} \item{x, object}{A \code{HLtest} object} \item{\dots}{Other arguments passed down to methods} } %\details{ %%% ~~ If necessary, more details than the description above ~~ %} \value{ A class \code{HLtest} object with the following components: \item{table}{A data.frame describing the results of partitioning the data into \code{g} groups with the following columns: \code{cut}, \code{total}, \code{obs}, \code{exp}, \code{chi}} \item{chisq}{The chisquare statistics} \item{df}{Degrees of freedom} \item{p.value}{p value} \item{groups}{Number of groups} \item{call}{\code{model} call} %% ... } \references{ Hosmer, David W., Lemeshow, Stanley (1980). A goodness-of-fit test for multiple logistic regression model. \emph{Communications in Statistics, Series A}, 9, 1043-1069. Hosmer, David W., Lemeshow, Stanley (2000). \emph{Applied Logistic Regression}, New York: Wiley, ISBN 0-471-61553-6 Lemeshow, S. and Hosmer, D.W. (1982). A review of goodness of fit statistics for use in the development of logistic regression models. \emph{American Journal of Epidemiology}, 115(1), 92-106. } \author{ Michael Friendly } %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[vcd]{rootogram}}, ~~~ } \examples{ data(birthwt, package="MASS") # how to do this without attach? attach(birthwt) race = factor(race, labels = c("white", "black", "other")) ptd = factor(ptl > 0) ftv = factor(ftv) levels(ftv)[-(1:2)] = "2+" bwt <- data.frame(low = factor(low), age, lwt, race, smoke = (smoke > 0), ptd, ht = (ht > 0), ui = (ui > 0), ftv) detach(birthwt) options(contrasts = c("contr.treatment", "contr.poly")) BWmod <- glm(low ~ ., family=binomial, data=bwt) (hlt <- HLtest(BWmod)) str(hlt) summary(hlt) plot(hlt) # basic model BWmod0 <- glm(low ~ age, family=binomial, data=bwt) (hlt0 <- HLtest(BWmod0)) str(hlt0) summary(hlt0) plot(hlt0) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{htest} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/Fungicide.Rd0000644000176200001440000000352614430460317015001 0ustar liggesusers\name{Fungicide} \alias{Fungicide} \docType{data} \title{ Carcinogenic Effects of a Fungicide } \description{ Data from Gart (1971) on the carcinogenic effects of a certain fungicide in two strains of mice. Of interest is how the association between \code{group} (Control, Treated) and \code{outcome} (Tumor, No Tumor) varies with \code{sex} and \code{strain} of the mice. Breslow (1976) used this data to illustrate the application of linear models to log odds ratios. } \usage{data(Fungicide)} \format{ The data comprise a set of four 2 x 2 tables classifying 403 mice, either Control or Treated and whether or not a tumor was later observed. The four groups represent the combinations of sex and strain of mice. The format is: num [1:2, 1:2, 1:2, 1:2] 5 4 74 12 3 2 84 14 10 4 ... - attr(*, "dimnames")=List of 4 ..$ group : chr [1:2] "Control" "Treated" ..$ outcome: chr [1:2] "Tumor" "NoTumor" ..$ sex : chr [1:2] "M" "F" ..$ strain : chr [1:2] "1" "2" } \details{ All tables have some small cells, so a continuity correction is recommended. } \source{ Gart, J. J. (1971). The comparison of proportions: a review of significance tests, confidence intervals and adjustments for stratification. \emph{International Statistical Review}, 39, 148-169. } \references{ Breslow, N. (1976), Regression analysis of the log odds ratio: A method for retrospective studies, \emph{Biometrics}, 32(3), 409-416. } \examples{ data(Fungicide) # loddsratio was moved to vcd; requires vcd_1.3-3+ \dontrun{ fung.lor <- loddsratio(Fungicide, correct=TRUE) fung.lor confint(fung.lor) } # visualize odds ratios in fourfold plots cotabplot(Fungicide, panel=cotab_fourfold) # -- fourfold() requires vcd >= 1.2-10 fourfold(Fungicide, p_adjust_method="none") } \keyword{datasets} \concept{odds ratios} vcdExtra/man/Vietnam.Rd0000644000176200001440000000333714430460317014507 0ustar liggesusers\name{Vietnam} \alias{Vietnam} \docType{data} \title{Student Opinion about the Vietnam War} \description{ A survey of student opinion on the Vietnam War was taken at the University of North Carolina at Chapel Hill in May 1967 and published in the student newspaper. Students were asked to fill in ballot papers stating which policy out of A,B,C or D they supported. Responses were cross-classified by gender/year. The response categories were: \describe{ \item{\code{A}}{Defeat North Vietnam by widespread bombing and land invasion} \item{\code{B}}{Maintain the present policy} \item{\code{C}}{De-escalate military activity, stop bombing and begin negotiations} \item{\code{D}}{Withdraw military forces Immediately} } } \usage{data(Vietnam)} \format{ A frequency data frame with 40 observations representing a 2 x 5 x 4 contingency table on the following 4 variables. \describe{ \item{\code{sex}}{a factor with levels \code{Female} \code{Male}} \item{\code{year}}{year of study, an ordered factor with levels \code{Freshmen}, \code{Sophomore}, \code{Junior}, \code{Senior}, \code{Grad student}} \item{\code{response}}{a factor with levels \code{A} \code{B} \code{C} \code{D}} \item{\code{Freq}}{cell frequency, a numeric vector} } } \details{ For some analyses, it is useful to treat \code{year} as numeric, and possibly assign grad students a value \code{year=7}. } \source{ Aitken, M. etal, 1989, \emph{Statistical Modelling in GLIM} } \references{ Friendly, M. (2000), \emph{Visualizing Categorical Data}, SAS Institute, Cary, NC, Example 7.9. } \examples{ data(Vietnam) ## maybe str(Vietnam) ; plot(Vietnam) ... } \keyword{datasets} \concept{loglinear models} vcdExtra/man/expand.dft.Rd0000644000176200001440000000446314430460317015140 0ustar liggesusers\name{expand.dft} \alias{expand.dft} \alias{expand.table} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Expand a frequency table to case form} \description{ Converts a frequency table, given either as a table object or a data frame in frequency form to a data frame representing individual observations in the table. } \usage{ expand.dft(x, var.names = NULL, freq = "Freq", ...) expand.table(x, var.names = NULL, freq = "Freq", ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A table object, or a data frame in frequency form containing factors and one numeric variable representing the cell frequency for that combination of factors. } \item{var.names}{A list of variable names for the factors, if you wish to override those already in the table} \item{freq}{The name of the frequency variable in the table} \item{\dots}{Other arguments passed down to \code{type.convert}. In particular, pay attention to \code{na.strings} (default: \code{na.strings=NA} if there are missing cells) and \code{as.is} (default: \code{as.is=FALSE}, converting character vectors to factors).} } \details{ \code{expand.table} is a synonym for \code{expand.dft}. } \value{ A data frame containing the factors in the table and as many observations as are represented by the total of the \code{freq} variable. } \references{ Originally posted on R-Help, Jan 20, 2009, http://tolstoy.newcastle.edu.au/R/e6/help/09/01/1873.html Friendly, M. and Meyer, D. (2016). \emph{Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data}. Boca Raton, FL: Chapman & Hall/CRC. \url{http://ddar.datavis.ca}. } \author{ Mark Schwarz } %\note{ ~~further notes~~ % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{ \code{\link[utils]{type.convert}}, \code{\link[gnm]{expandCategorical}}} \examples{ library(vcd) art <- xtabs(~Treatment + Improved, data = Arthritis) art artdf <- expand.dft(art) str(artdf) # 1D case (tab <- table(sample(head(letters), 20, replace=TRUE))) expand.table(tab, var.names="letter") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} \keyword{array} % __ONLY ONE__ keyword per line vcdExtra/man/Dyke.Rd0000644000176200001440000000504514430460317013776 0ustar liggesusers\name{Dyke} \Rdversion{1.1} \alias{Dyke} \docType{data} \title{Sources of Knowledge of Cancer} \description{Observational data on a sample of 1729 individuals, cross-classified in a 2^5 table according to their sources of information (read newspapers, listen to the radio, do 'solid' reading, attend lectures) and whether they have good or poor knowledge regarding cancer. Knowledge of cancer is often treated as the response.} \usage{data(Dyke)} \format{ A 5-dimensional array resulting from cross-tabulating 5 variables for 1729 observations. The variable names and their levels are: \tabular{rll}{ No \tab Name \tab Levels \cr 1\tab \code{Knowledge}\tab \code{"Good", "Poor"}\cr 2\tab \code{Reading}\tab \code{"No", "Yes"}\cr 3\tab \code{Radio}\tab \code{"No", "Yes"}\cr 4\tab \code{Lectures}\tab \code{"No", "Yes"}\cr 5\tab \code{Newspaper}\tab \code{"No", "Yes"}\cr } } %\details{ } \source{ Fienberg, S. E. (1980). \emph{The Analysis of Cross-Classified Categorical Data} Cambridge, MA: MIT Press, p. 85, Table 5-6. } \references{ Dyke, G. V. and Patterson, H. D. (1952). Analysis of factorial arrangements when the data are proportions. \emph{Biometrics}, 8, 1-12. Lindsey, J. K. (1993). \emph{Models for Repeated Measurements} Oxford, UK: Oxford University Press, p. 57. } %\seealso{ } \examples{ data(Dyke) # independence model mosaic(Dyke, shade=TRUE) # null model, Knowledge as response, independent of others require(MASS) dyke.mod0 <- loglm(~ Knowledge + (Reading * Radio * Lectures * Newspaper), data=Dyke) dyke.mod0 mosaic(dyke.mod0) # view as doubledecker plot Dyke <- Dyke[2:1,,,,] # make Good the highlighted value of Knowledge doubledecker(Knowledge ~ ., data=Dyke) # better version, with some options doubledecker(Knowledge ~ Lectures + Reading + Newspaper + Radio, data=Dyke, margins = c(1,6, length(dim(Dyke)) + 1, 1), fill_boxes=list(rep(c("white", gray(.90)),4)) ) # separate (conditional) plots for those who attend lectures and those who do not doubledecker(Knowledge ~ Reading + Newspaper + Radio, data=Dyke[,,,1,], main="Do not attend lectures", margins = c(1,6, length(dim(Dyke)) + 1, 1), fill_boxes=list(rep(c("white", gray(.90)),3)) ) doubledecker(Knowledge ~ Reading + Newspaper + Radio, data=Dyke[,,,2,], main="Attend lectures", margins = c(1,6, length(dim(Dyke)) + 1, 1), fill_boxes=list(rep(c("white", gray(.90)),3)) ) drop1(dyke.mod0, test="Chisq") } \keyword{datasets} \concept{loglinear models} vcdExtra/man/vcdExtra-deprecated.Rd0000644000176200001440000000126014422306403016747 0ustar liggesusers\name{vcdExtra-deprecated} \alias{vcdExtra-deprecated} \alias{summarise} %\alias{summarise.glm} %\alias{summarise.glmlist} %\alias{summarise.loglm} %\alias{summarise.loglmlist} % \title{Deprecated Functions in vcdExtra Package} % \description{ These functions are provided for compatibility with older versions of the \pkg{vcdExtra} package only. They are replaced by \code{\link{LRstats}}. } % \usage{ summarise(...) %summarise.glm(...) %summarise.glmlist(...) %summarise.loglm(...) %summarise.loglmlist(...) } % \arguments{ \item{\dots}{pass arguments down.} } % \details{ \code{summarise.*} have been replaced by \code{\link{LRstats}} functions. } vcdExtra/man/Mental.Rd0000644000176200001440000000325014430460317014316 0ustar liggesusers\name{Mental} \Rdversion{1.1} \alias{Mental} \docType{data} \title{Mental Impairment and Parents SES} \description{ A 6 x 4 contingency table representing the cross-classification of mental health status (\code{mental}) of 1660 young New York residents by their parents' socioeconomic status (\code{ses}). } \usage{data(Mental)} \format{ A data frame frequency table with 24 observations on the following 3 variables. \describe{ \item{\code{ses}}{an ordered factor with levels \code{1} < \code{2} < \code{3} < \code{4} < \code{5} < \code{6}} \item{\code{mental}}{an ordered factor with levels \code{Well} < \code{Mild} < \code{Moderate} < \code{Impaired}} \item{\code{Freq}}{cell frequency: a numeric vector} } } \details{ Both \code{ses} and \code{mental} can be treated as ordered factors or integer scores. For \code{ses}, 1="High" and 6="Low". } \source{ Haberman, S. J. \emph{The Analysis of Qualitative Data: New Developments}, Academic Press, 1979, Vol. II, p. 375. Srole, L.; Langner, T. S.; Michael, S. T.; Kirkpatrick, P.; Opler, M. K. & Rennie, T. A. C. \emph{Mental Health in the Metropolis: The Midtown Manhattan Study}, NYU Press, 1978, p. 289 } \references{ Friendly, M. \emph{Visualizing Categorical Data}, Cary, NC: SAS Institute, 2000, Appendix B.7. } \examples{ data(Mental) str(Mental) (Mental.tab <- xtabs(Freq ~ ses + mental, data=Mental)) # mosaic and sieve plots mosaic(Mental.tab, gp=shading_Friendly) sieve(Mental.tab, gp=shading_Friendly) if(require(ca)){ plot(ca(Mental.tab), main="Mental impairment & SES") } } \keyword{datasets} \concept{correspondence analysis} \concept{ordinal variables} vcdExtra/man/TV.Rd0000644000176200001440000000514314430460317013432 0ustar liggesusers\name{TV} \Rdversion{1.1} \alias{TV} \title{TV Viewing Data} \description{This data set \code{TV} comprises a 5 x 11 x 3 contingency table based on audience viewing data from Neilsen Media Research for the week starting November 6, 1995. } \usage{data(TV)} \format{ A 5 x 11 x 3 array of cell frequencies with the following structure: \preformatted{ int [1:5, 1:11, 1:3] 146 244 233 174 294 151 181 161 183 281 ... - attr(*, "dimnames")=List of 3 ..$ Day : chr [1:5] "Monday" "Tuesday" "Wednesday" "Thursday" ... ..$ Time : chr [1:11] "8:00" "8:15" "8:30" "8:45" ... ..$ Network: chr [1:3] "ABC" "CBS" "NBC" } } \details{ The original data, \code{tv.dat}, contains two additional networks: "Fox" and "Other", with small frequencies. These levels were removed in the current version. There is also a fourth factor, transition State transition (turn the television Off, Switch channels, or Persist in viewing the current channel). The \code{TV} data here includes only the Persist observations. } \source{ The original data, \code{tv.dat}, came from the initial implementation of mosaic displays in R by Jay Emerson (1998). Similar data had been used by Hartigan and Kleiner (1984) as an illustration. } \references{ Friendly, M. and Meyer, D. (2016). \emph{Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data}. Boca Raton, FL: Chapman & Hall/CRC. \url{http://ddar.datavis.ca}. Emerson, John W. Mosaic Displays in S-PLUS: A General Implementation and a Case Study. \emph{Statistical Graphics and Computing Newsletter}, 1998, 9(1), 17--23, \url{http://www.stat.yale.edu/~jay/R/mosaic/v91.pdf} Hartigan, J. A. & Kleiner, B. A Mosaic of Television Ratings. \emph{The American Statistician}, 1984, 38, 32-35. } \examples{ data(TV) structable(TV) doubledecker(TV) # reduce number of levels of Time TV.df <- as.data.frame.table(TV) levels(TV.df$Time) <- rep(c("8:00-8:59", "9:00-9:59", "10:00-10:44"), c(4, 4, 3)) TV2 <- xtabs(Freq ~ Day + Time + Network, TV.df) # re-label for mosaic display levels(TV.df$Time) <- c("8", "9", "10") # fit mode of joint independence, showing association of Network with Day*Time mosaic(~ Day + Network + Time, data = TV.df, expected = ~ Day:Time + Network, legend = FALSE) # with doubledecker arrangement mosaic(~ Day + Network + Time, data = TV.df, expected = ~ Day:Time + Network, split = c(TRUE, TRUE, FALSE), spacing = spacing_highlighting, legend = FALSE) } \keyword{datasets} \concept{loglinear models} vcdExtra/man/GSS.Rd0000644000176200001440000000234314430460317013534 0ustar liggesusers\name{GSS} \alias{GSS} \docType{data} \title{General Social Survey-- Sex and Party affiliation} \description{ Data from the General Social Survey, 1991, on the relation between sex and party affiliation. } \usage{data(GSS)} \format{ A data frame in frequency form with 6 observations on the following 3 variables. \describe{ \item{\code{sex}}{a factor with levels \code{female} \code{male}} \item{\code{party}}{a factor with levels \code{dem} \code{indep} \code{rep}} \item{\code{count}}{a numeric vector} } } %\details{ % ~~ If necessary, more details than the __description__ above ~~ %} \source{ Agresti, A. \emph{Categorical Data Analysis}, 2nd E., John Wiley & Sons, 2002, Table 3.11, p. 106. } %\references{ % ~~ possibly secondary sources and usages ~~ %} \examples{ data(GSS) str(GSS) # use xtabs to show the table in a compact form (GSStab <- xtabs(count ~ sex + party, data=GSS)) # fit the independence model (mod.glm <- glm(count ~ sex + party, family = poisson, data = GSS)) # display all the residuals in a mosaic plot mosaic(mod.glm, formula = ~ sex + party, labeling = labeling_residuals, suppress=0) } \keyword{datasets} \concept{generalized linear models} vcdExtra/man/LRstats.Rd0000644000176200001440000000625714422306403014500 0ustar liggesusers\name{LRstats} \alias{LRstats} \alias{LRstats.glmlist} \alias{LRstats.loglmlist} \alias{LRstats.default} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Brief Summary of Model Fit for glm and loglm Models } \description{ For \code{glm} objects, the \code{print} and \code{summary} methods give too much information if all one wants to see is a brief summary of model goodness of fit, and there is no easy way to display a compact comparison of model goodness of fit for a collection of models fit to the same data. All \code{loglm} models have equivalent glm forms, but the \code{print} and \code{summary} methods give quite different results. \code{LRstats} provides a brief summary for one or more models fit to the same dataset for which \code{logLik} and \code{nobs} methods exist (e.g., \code{glm} and \code{loglm} models). %This implementation is experimental, and is subject to change. } \usage{ LRstats(object, ...) \method{LRstats}{glmlist}(object, ..., saturated = NULL, sortby = NULL) \method{LRstats}{loglmlist}(object, ..., saturated = NULL, sortby = NULL) \method{LRstats}{default}(object, ..., saturated = NULL, sortby = NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ a fitted model object for which there exists a logLik method to extract the corresponding log-likelihood} \item{\dots}{ optionally more fitted model objects } \item{saturated}{ saturated model log likelihood reference value (use 0 if deviance is not available) } \item{sortby}{ either a numeric or character string specifying the column in the result by which the rows are sorted (in decreasing order)} } \details{ The function relies on residual degrees of freedom for the LR chisq test being available in the model object. This is true for objects inheriting from \code{lm}, \code{glm}, \code{loglm}, \code{polr} and \code{negbin}. } \value{ A data frame (also of class \code{anova}) with columns \code{c("AIC", "BIC", "LR Chisq", "Df", "Pr(>Chisq)")}. Row names are taken from the names of the model object(s). } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Achim Zeileis } %\note{ %% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[stats]{logLik}}, \code{\link[stats]{glm}}, \code{\link[MASS]{loglm}}, \code{\link{logLik.loglm}}, \code{\link{modFit}} } \examples{ data(Mental) indep <- glm(Freq ~ mental+ses, family = poisson, data = Mental) LRstats(indep) Cscore <- as.numeric(Mental$ses) Rscore <- as.numeric(Mental$mental) coleff <- glm(Freq ~ mental + ses + Rscore:ses, family = poisson, data = Mental) roweff <- glm(Freq ~ mental + ses + mental:Cscore, family = poisson, data = Mental) linlin <- glm(Freq ~ mental + ses + Rscore:Cscore, family = poisson, data = Mental) # compare models LRstats(indep, coleff, roweff, linlin) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{models} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/mosaic.glm.Rd0000644000176200001440000002112414422306403015123 0ustar liggesusers\name{mosaic.glm} \alias{mosaic.glm} \alias{sieve.glm} \alias{assoc.glm} %- Also NEED an '\alias' for EACH other topic documented here. \title{Mosaic plots for fitted generalized linear and generalized nonlinear models } \description{ Produces mosaic plots (and other plots in the \code{\link[vcd]{strucplot}} framework) for a log-linear model fitted with \code{\link[stats]{glm}} or for a generalized nonlinear model fitted with \code{\link[gnm]{gnm}}. These methods extend the range of strucplot visualizations well beyond the models that can be fit with \code{\link[MASS]{loglm}}. They are intended for models for counts using the Poisson family (or quasi-poisson), but should be sensible as long as (a) the response variable is non-negative and (b) the predictors visualized in the \code{strucplot} are discrete factors. } \usage{ \method{mosaic}{glm}(x, formula = NULL, panel = mosaic, type = c("observed", "expected"), residuals = NULL, residuals_type = c("pearson", "deviance", "rstandard"), gp = shading_hcl, gp_args = list(), ...) \method{sieve}{glm}(x, ...) \method{assoc}{glm}(x, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A \code{glm} or \code{gnm} object. The response variable, typically a cell frequency, should be non-negative. } \item{formula}{ A one-sided formula with the indexing factors of the plot separated by '+', determining the order in which the variables are used in the mosaic. A formula must be provided unless \code{x$data} inherits from class \code{"table"} -- in which case the indexing factors of this table are used, or the factors in \code{x$data} (or model.frame(x) if \code{x$data} is an environment) exactly cross-classify the data -- in which case this set of cross-classifying factors are used. } \item{panel}{Panel function used to draw the plot for visualizing the observed values, residuals and expected values. Currently, one of \code{"mosaic"}, \code{"assoc"}, or \code{"sieve"} in \code{vcd}.} \item{type}{A character string indicating whether the \code{"observed"} or the \code{"expected"} values of the table should be visualized by the area of the tiles or bars.} \item{residuals}{ An optional array or vector of residuals corresponding to the cells in the data, for example, as calculated by \code{residuals.glm(x)}, \code{residuals.gnm(x)}.} \item{residuals_type}{If the \code{residuals} argument is \code{NULL}, residuals are calculated internally and used in the display. In this case, \code{residual_type} can be \code{"pearson"}, \code{"deviance"} or \code{"rstandard"}. Otherwise (when \code{residuals} is supplied), \code{residuals_type} is used as a label for the legend in the plot. } \item{gp}{Object of class \code{"gpar"}, shading function or a corresponding generating function (see \code{\link[vcd]{strucplot}} Details and \code{\link[vcd]{shadings}}). Ignored if shade = FALSE.} \item{gp_args}{A list of arguments for the shading-generating function, if specified.} \item{\dots}{ Other arguments passed to the \code{panel} function e.g., \code{\link[vcd]{mosaic}} } } \details{ For both poisson family generalized linear models and loglinear models, standardized residuals provided by \code{rstandard} (sometimes called adjusted residuals) are often preferred because they have constant unit asymptotic variance. The \code{sieve} and \code{assoc} methods are simple convenience interfaces to this plot method, setting the panel argument accordingly. } %\note{ %In the current version, the \code{glm} or \code{gnm} object \emph{must} have been fit using %the \code{data} argument to supply a data.frame or table, rather than with variables %in the global environment. %} \value{ The \code{structable} visualized by \code{\link[vcd]{strucplot}} is returned invisibly. } %\references{ ~put references to the literature/web site here ~ } \author{Heather Turner, Michael Friendly, with help from Achim Zeileis} %\note{ %} \seealso{ \code{\link[stats]{glm}}, \code{\link[gnm]{gnm}}, \code{\link[vcd]{plot.loglm}}, \code{\link[vcd]{mosaic}} } \examples{ GSStab <- xtabs(count ~ sex + party, data=GSS) # using the data in table form mod.glm1 <- glm(Freq ~ sex + party, family = poisson, data = GSStab) res <- residuals(mod.glm1) std <- rstandard(mod.glm1) # For mosaic.default(), need to re-shape residuals to conform to data stdtab <- array(std, dim=dim(GSStab), dimnames=dimnames(GSStab)) mosaic(GSStab, gp=shading_Friendly, residuals=stdtab, residuals_type="Std\nresiduals", labeling = labeling_residuals) # Using externally calculated residuals with the glm() object mosaic.glm(mod.glm1, residuals=std, labeling = labeling_residuals, shade=TRUE) # Using residuals_type mosaic.glm(mod.glm1, residuals_type="rstandard", labeling = labeling_residuals, shade=TRUE) ## Ordinal factors and structured associations data(Mental) xtabs(Freq ~ mental+ses, data=Mental) long.labels <- list(set_varnames = c(mental="Mental Health Status", ses="Parent SES")) # fit independence model # Residual deviance: 47.418 on 15 degrees of freedom indep <- glm(Freq ~ mental+ses, family = poisson, data = Mental) long.labels <- list(set_varnames = c(mental="Mental Health Status", ses="Parent SES")) mosaic(indep, residuals_type="rstandard", labeling_args = long.labels, labeling=labeling_residuals) # or, show as a sieve diagram mosaic(indep, labeling_args = long.labels, panel=sieve, gp=shading_Friendly) # fit linear x linear (uniform) association. Use integer scores for rows/cols Cscore <- as.numeric(Mental$ses) Rscore <- as.numeric(Mental$mental) linlin <- glm(Freq ~ mental + ses + Rscore:Cscore, family = poisson, data = Mental) mosaic(linlin, residuals_type="rstandard", labeling_args = long.labels, labeling=labeling_residuals, suppress=1, gp=shading_Friendly, main="Lin x Lin model") ## Goodman Row-Column association model fits even better (deviance 3.57, df 8) if (require(gnm)) { Mental$mental <- C(Mental$mental, treatment) Mental$ses <- C(Mental$ses, treatment) RC1model <- gnm(Freq ~ ses + mental + Mult(ses, mental), family = poisson, data = Mental) mosaic(RC1model, residuals_type="rstandard", labeling_args = long.labels, labeling=labeling_residuals, suppress=1, gp=shading_Friendly, main="RC1 model") } ############# UCB Admissions data, fit using glm() structable(Dept ~ Admit+Gender,UCBAdmissions) berkeley <- as.data.frame(UCBAdmissions) berk.glm1 <- glm(Freq ~ Dept * (Gender+Admit), data=berkeley, family="poisson") summary(berk.glm1) mosaic(berk.glm1, gp=shading_Friendly, labeling=labeling_residuals, formula=~Admit+Dept+Gender) # the same, displaying studentized residuals; # note use of formula to reorder factors in the mosaic mosaic(berk.glm1, residuals_type="rstandard", labeling=labeling_residuals, shade=TRUE, formula=~Admit+Dept+Gender, main="Model: [DeptGender][DeptAdmit]") ## all two-way model berk.glm2 <- glm(Freq ~ (Dept + Gender + Admit)^2, data=berkeley, family="poisson") summary(berk.glm2) mosaic.glm(berk.glm2, residuals_type="rstandard", labeling = labeling_residuals, shade=TRUE, formula=~Admit+Dept+Gender, main="Model: [DeptGender][DeptAdmit][AdmitGender]") anova(berk.glm1, berk.glm2, test="Chisq") # Add 1 df term for association of [GenderAdmit] only in Dept A berkeley <- within(berkeley, dept1AG <- (Dept=='A')*(Gender=='Female')*(Admit=='Admitted')) berkeley[1:6,] berk.glm3 <- glm(Freq ~ Dept * (Gender+Admit) + dept1AG, data=berkeley, family="poisson") summary(berk.glm3) mosaic.glm(berk.glm3, residuals_type="rstandard", labeling = labeling_residuals, shade=TRUE, formula=~Admit+Dept+Gender, main="Model: [DeptGender][DeptAdmit] + DeptA*[GA]") # compare models anova(berk.glm1, berk.glm3, test="Chisq") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{hplot} \keyword{models} \keyword{multivariate} % __ONLY ONE__ keyword per line vcdExtra/man/CMHtest.Rd0000644000176200001440000002054414430460317014412 0ustar liggesusers\name{CMHtest} \alias{CMHtest} \alias{CMHtest.formula} \alias{CMHtest.default} \alias{Cochran Mantel Haenszel test} \alias{print.CMHtest} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Generalized Cochran-Mantel-Haenszel Tests } \description{ Provides generalized Cochran-Mantel-Haenszel tests of association of two possibly ordered factors, optionally stratified other factor(s). With strata, \code{CMHtest} calculates these tests for each level of the stratifying variables and also provides overall tests controlling for the strata. For ordinal factors, more powerful tests than the test for general association (independence) are obtained by assigning scores to the row and column categories. } \usage{ CMHtest(x, ...) \method{CMHtest}{formula}(formula, data = NULL, subset = NULL, na.action = NULL, ...) \method{CMHtest}{default}(x, strata = NULL, rscores = 1:R, cscores = 1:C, types = c("cor", "rmeans", "cmeans", "general"), overall=FALSE, details=overall, ...) \method{print}{CMHtest}(x, digits = max(getOption("digits") - 2, 3), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A 2+ way contingency table in array form, or a class \code{"table"} object with optional category labels specified in the dimnames(x) attribute. } \item{formula}{a formula specifying the variables used to create a contingency table from \code{data}. This should be a one-sided formula when \code{data} is in array form, and a two-sided formula with a response \code{Freq} if \code{data} is a data frame with a cell frequency variable. For convenience, conditioning formulas can be specified indicating strata. } \item{data}{either a data frame, or an object of class \code{"table"} or \code{"ftable"}. } \item{subset}{an optional vector specifying a subset of observations to be used. } \item{na.action}{a function which indicates what should happen when the data contain \code{NA}s. Ignored if \code{data} is a contingency table } \item{strata}{ For a 3- or higher-way table, the names or numbers of the factors to be treated as strata. By default, the first 2 factors are treated as the main table variables, and all others considered stratifying factors. } \item{rscores}{ Row scores. Either a set of numbers (typically integers, \code{1:R}) or the string \code{"midrank"} for standardized midrank scores, or \code{NULL} to exclude tests that depend on row scores. } \item{cscores}{ Column scores. Same as for row scores. } \item{types}{ Types of CMH tests to compute: Any one or more of \code{c("cor", "cmeans", "rmeans", "general")}, or \code{"ALL"} for all of these. } \item{overall}{ logical. Whether to calculate overall tests, controlling for the stratifying factors. } \item{details}{ logical. Whether to include computational details in the result } \item{\dots}{ Other arguments passed to default method. } \item{digits}{ Digits to print. } } \details{ The standard \eqn{\chi^2} tests for association in a two-way table treat both table factors as nominal (unordered) categories. When one or both factors of a two-way table are quantitative or ordinal, more powerful tests of association may be obtained by taking ordinality into account using row and or column scores to test for linear trends or differences in row or column means. The CMH analysis for a two-way table produces generalized Cochran-Mantel-Haenszel statistics (Landis etal., 1978). These include the CMH \bold{correlation} statistic (\code{"cor"}), treating both factors as ordered. For a given statum, with equally spaced row and column scores, this CMH statistic reduces to \eqn{(n-1) r^2}, where \eqn{r} is the Pearson correlation between X and Y. With \code{"midrank"} scores, this CMH statistic is analogous to \eqn{(n-1) r_S^2}, using the Spearman rank correlation. The \bold{ANOVA} (row mean scores and column mean scores) statistics, treat the columns and rows respectively as ordinal, and are sensitive to mean shifts over columns or rows. These are transforms of the \eqn{F} statistics from one-way ANOVAs with equally spaced scores and to Kruskal-Wallis tests with \code{"midrank"} scores. The CMH \bold{general} association statistic treat both factors as unordered, and give a test closely related to the Pearson \eqn{\chi^2} test. When there is more than one stratum, the overall general CMH statistic gives a stratum-adjusted Pearson \eqn{\chi^2}, equivalent to what is calculated by \code{\link[stats]{mantelhaen.test}}. For a 3+ way table, one table of CMH tests is produced for each combination of the factors identified as \code{strata}. If \code{overall=TRUE}, an additional table is calculated for the same two primary variables, controlling for (pooling over) the \code{strata} variables. These overall tests implicitly assume no interactions between the primary variables and the strata and they will have low power in the presence of interactions. } \value{ An object of class \code{"CMHtest"} , a list with the following 4 components: \item{table}{A matrix containing the test statistics, with columns \code{Chisq}, \code{Df} and \code{Prob} } \item{names}{The names of the table row and column variables} \item{rscore}{Row scores} \item{cscore}{Column scores} If \code{details==TRUE}, additional components are included. If there are strata, the result is a list of \code{"CMHtest"} objects. If \code{overall=TRUE} another component, labeled \code{ALL} is appended to the list. } \references{ Stokes, M. E. & Davis, C. S. & Koch, G., (2000). \emph{Categorical Data Analysis using the SAS System}, 2nd Ed., Cary, NC: SAS Institute, pp 74-75, 92-101, 124-129. Details of the computation are given at: \url{http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_freq_a0000000648.htm } Cochran, W. G. (1954), Some Methods for Strengthening the Common \eqn{\chi^2} Tests, \emph{Biometrics}, 10, 417-451. Landis, R. J., Heyman, E. R., and Koch, G. G. (1978). Average Partial Association in Three-way Contingency Tables: A Review and Discussion of Alternative Tests, \emph{International Statistical Review}, \bold{46}, 237-254. Mantel, N. (1963), Chi-square Tests with One Degree of Freedom: Extensions of the Mantel-Haenszel Procedure," \emph{Journal of the American Statistical Association}, 58, 690-700. } \author{ Michael Friendly } %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[coin]{cmh_test}} provides the CMH test of general association; \code{\link[coin]{lbl_test}} provides the CMH correlation test of linear by linear association. \code{\link[stats]{mantelhaen.test}} provides the overall general Cochran-Mantel-Haenszel chi-squared test of the null that two nominal variables are conditionally independent in each stratum, assuming that there is no three-way interaction } \examples{ data(JobSat, package="vcdExtra") CMHtest(JobSat) CMHtest(JobSat, rscores="midrank", cscores="midrank") # formula interface CMHtest(~ ., data=JobSat) # A 3-way table (both factors ordinal) data(MSPatients, package="vcd") CMHtest(MSPatients) # also calculate overall tests, controlling for Patient CMHtest(MSPatients, overall = TRUE) # compare with mantelhaen.test mantelhaen.test(MSPatients) # formula interface CMHtest(~ ., data = MSPatients, overall = TRUE) # using a frequency data.frame CMHtest(xtabs(Freq~ses + mental, data = Mental)) # or, more simply CMHtest(Freq~ses + mental, data = Mental) # conditioning formulae CMHtest(Freq~right + left | gender, data = VisualAcuity) CMHtest(Freq ~ attitude + memory | education + age, data = Punishment) # Stokes etal, Table 5.1, p 92: two unordered factors parties <- matrix( c(221, 160, 360, 140, 200, 291, 160, 311, 208, 106, 316, 97), nrow=3, ncol=4, byrow=TRUE) dimnames(parties) <- list(party=c("Dem", "Indep", "Rep"), neighborhood=c("Bayside", "Highland", "Longview", "Sheffield")) CMHtest(parties, rscores=NULL, cscores=NULL) # compare with Pearson chisquare chisq.test(parties) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{htest} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/Titanicp.Rd0000644000176200001440000000412314430460317014651 0ustar liggesusers\name{Titanicp} \alias{Titanicp} \docType{data} \title{Passengers on the Titanic} \description{ Data on passengers on the RMS Titanic, excluding the Crew and some individual identifier variables. } \usage{data(Titanicp)} \format{ A data frame with 1309 observations on the following 6 variables. \describe{ \item{\code{pclass}}{a factor with levels \code{1st} \code{2nd} \code{3rd}} \item{\code{survived}}{a factor with levels \code{died} \code{survived}} \item{\code{sex}}{a factor with levels \code{female} \code{male}} \item{\code{age}}{passenger age in years (or fractions of a year, for children), a numeric vector; age is missing for 263 of the passengers} \item{\code{sibsp}}{number of siblings or spouses aboard, integer: \code{0:8}} \item{\code{parch}}{number of parents or children aboard, integer: \code{0:6}} } } \details{ There are a number of related versions of the Titanic data, in various formats. This version was derived from \code{ptitanic} in the \pkg{rpart.plot} package, modifying it to remove the \code{Class 'labelled'} attributes for some variables (inherited from Frank Harrell's \code{titanic3} version) which caused problems with some applications, notably \code{ggplot2}. Other versions: \code{\link[datasets]{Titanic}} is the 4-way frequency table of all 2201 people aboard the Titanic, including passengers and crew. } \source{ The original R source for this dataset was compiled by Frank Harrell and Robert Dawson: \url{https://biostat.app.vumc.org/wiki/pub/Main/DataSets/titanic.html}, described in more detail in \url{https://biostat.app.vumc.org/wiki/pub/Main/DataSets/titanic3info.txt} For this version of the Titanic data, passenger details were deleted, survived was cast as a factor, and the name changed to \code{Titanicp} to minimize confusion with other versions. } %\references{ %%% ~~ possibly secondary sources and usages ~~ %} \examples{ data(Titanicp) ## maybe str(Titanicp) ; plot(Titanicp) ... } \keyword{datasets} \concept{loglinear models} \concept{logistic regression} vcdExtra/man/Cancer.Rd0000644000176200001440000000230114430460317014265 0ustar liggesusers\name{Cancer} \Rdversion{1.1} \alias{Cancer} \docType{data} \title{Survival of Breast Cancer Patients} \description{Three year survival of 474 breast cancer patients according to nuclear grade and diagnostic center.} \usage{ data(Cancer) } \format{ A 3-dimensional array resulting from cross-tabulating 3 variables for 474 observations. The variable names and their levels are: \tabular{rll}{ No \tab Name \tab Levels \cr 1\tab \code{Survival}\tab \code{"Died", "Surv"}\cr 2\tab \code{Grade}\tab \code{"Malignant", "Benign"}\cr 3\tab \code{Center}\tab \code{"Boston", "Glamorgan"}\cr } } %\details { } \source{ % \cite{Lindsey:95 [p38]} % \cite{Whittaker:90} Lindsey, J. K. (1995). Analysis of Frequency and Count Data Oxford, UK: Oxford University Press. p. 38, Table 2.5. Whittaker, J. (1990) Graphical Models in Applied Multivariate Statistics New York: John Wiley and Sons, p. 220. } %\references{ % \cite{Morrison etal} %} %\seealso { } \examples{ data(Cancer) MASS::loglm(~Survival + Grade + Center, data = Cancer) vcd::mosaic(Cancer, shade=TRUE) } \keyword{datasets} \concept{loglinear models} \concept{logit models} vcdExtra/man/datasets.Rd0000644000176200001440000000516114430460317014711 0ustar liggesusers\name{datasets} \alias{datasets} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Information on Data Sets in Packages } \description{ The \code{\link[utils]{data}} function is used both to load data sets from packages, and give a display of the names and titles of data sets in one or more packages, however it does not return a result that can be easily used to get additional information about the nature of data sets in packages. The \code{datasets()} function is designed to produce a more useful summary display of data sets in one or more packages. It extracts the \code{class} and dimension information (\code{dim} or code{length}) of each item, and formats these to provide additional descriptors. } \usage{ datasets(package, allClass=FALSE, incPackage=length(package) > 1, maxTitle=NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{package}{a character vector giving the package(s) to look in} \item{allClass}{a logical variable. Include all classes of the item (\code{TRUE}) or just the last class (\code{FALSE})?} \item{incPackage}{include the package name in result?} \item{maxTitle}{maximum length of data set Title} } \details{ The requested packages must be installed, and are silently loaded in order to extract \code{class} and size information. } \value{ A \code{data.frame} whose rows correspond to data sets found in \code{package}. The columns (for a single package) are: \item{Item}{data set name, a character variable} \item{class}{class, the object class of the data set, typically one of \code{"data.frame"}, \code{"table"}, \code{"array"} ...} \item{dim}{an abbreviation of the dimensions of the data set, in a form like \code{"36x3"} for a data.frame or matrix with 36 rows and 3 columns.} \item{Title}{data set title} } %\references{ %%% ~put references to the literature/web site here ~ %} \author{ Michael Friendly, with R-help from Curt Seeliger } \note{ In Rmd documents, `datasets("package") |> knitr::kable()` can be used to create a more pleasing display. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[utils]{data}}, \code{\link[knitr]{kable}} } \examples{ datasets("vcdExtra") # datasets(c("vcd", "vcdExtra")) datasets("datasets", maxTitle=50) # just list dataset names in a package datasets("vcdExtra")[,"Item"] datasets("vcd")[,"Item"] } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{package} \keyword{data}% __ONLY ONE__ keyword per line vcdExtra/man/zero.test.Rd0000644000176200001440000000452114430460317015035 0ustar liggesusers\name{zero.test} \alias{zero.test} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Score test for zero inflation in Poisson data } \description{ Carries out a simple score test (van den Broek, 1995) for excess zeros in an otherwise Poisson distribution of counts. It gives a \eqn{\chi^2_1} statistic on one degree of freedom. } \usage{ zero.test(x) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A vector of non-negative counts, or a one-way frequency table of such counts. } } \details{ The test first calculates the rate estimate from the mean, \eqn{\hat{\lambda} = \bar{x}}. The number of observed zeros, \eqn{n_0} is then compared with the expected number, \eqn{n \hat{p_0}}, where \eqn{\hat{p}_0=\exp[-\hat{\lambda}]}. Then the test statistic is calculated by the formula: \deqn{\frac{(n_0 - n\hat{p}_0)^2}{n\hat{p}_0(1-\hat{p}_0) - n\bar{x}\hat{p}_0^2}} This test statistic has a \eqn{\chi^2_1} distribution. } \value{ Returns invisibly a list of three elements: \item{\code{statistic}}{Description of 'comp1'} \item{\code{df}}{Description of 'comp2'} \item{\code{pvalue}}{Upper tail p-value} } \references{ The original R code came from a Stackexchange question, \url{https://stats.stackexchange.com/questions/118322/how-to-test-for-zero-inflation-in-a-dataset} Van den Broek, J. (1995). A Score Test for Zero Inflation in a Poisson Distribution. \emph{Biometrics}, \bold{51}(2), 738-743. https://www.jstor.org/stable/2532959 Yang, Zhao, James W. Hardin, and Cheryl L. Addy (2010). Score Tests for Zero-Inflation in Overdispersed Count Data. \emph{Communications in Statistics - Theory and Methods} \bold{39} (11) 2008-2030. \doi{10.1080/03610920902948228} } \author{ Michael Friendly } %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ %\seealso{ %%% ~~objects to See Also as \code{\link{help}}, ~~~ %} \examples{ # synthetic tests zero.test(rpois(100, 1)) zero.test(rpois(100, 5)) # add some extra zeros zero.test(c(rep(0, 20), rpois(100, 5))) # Articles by Phd candidates data(PhdPubs, package="vcdExtra") zero.test(PhdPubs$articles) phd.tab <- table(PhdPubs$articles) zero.test(phd.tab) } \keyword{htest} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/figures/0000755000176200001440000000000014470742322014256 5ustar liggesusersvcdExtra/man/figures/logo.png0000644000176200001440000004500014422306403015714 0ustar liggesusersPNG  IHDRXٹptRNSTC pHYs.#.#x?v IDATxw@SA⬣GժuQZGZWmj{UZĽֽpPQHxk^!8㞣PU!Z  9[%)N_F$A鄁v!E$ 8 X8 >i. {` `C00 0JJn cDJO0.qar|+X'[+ -f\} `Ңe@0a UiMD01 0JI1d pDlF6H HMWY@br*6Hl@1"@D$Ysh#`g c+y;2Q։V  D톁6HGh#`K%_`h%22 t\!!  !DҏKX H@:qƁ> ~Da`5# ND҄X!"Gx4`Ua`VHyG0Xf@+ah% U@sB$DAZ#a'2A:cVEX"# 4E$ )KaX iH@hX3 H D l"-"`.LB60Z 04JD#  0oLX" 0C8 0@GF0DC`] V€Gh. D=#  01  HH+4`  v0@ la" Z6fJV" h.8 "HOC!l aK!c 6<1 h%փ}#aD0+"J.+a%"8`gB vHl`jyJy;DU;@+ H`EDpma,!p( 0Z 0#`@ i0t5#y$u"a`6D@J փH0L ̃z4,HH-0/@!,-LDi0,VB@z K!`CZc,!y`%`HkB։B! # Ǡ^]?{644..-cFo?/`#t.a~>2e`mX ü-]Bn o{Ԍos~L ҉TA$$!*2U@RL` $/*2U@sIJycaaCR3QJ `)]f'DG;{w_sFb˾~\w"DBW9rL_𧷏愈Cv?}2obbb&^iϝt;? KB-v(Z$No`HA5ڶY$>>K=̛/Jmώwn|>61FX<Ǧ LeQU? ׶^6zgOs7S\O4+x$ɣGjԩg;s4_ƾ}§Uό;lЍWRZgO k64mh;e+TJoZpwڗT avCzIǀ!vo߲{OJļ ya`% ^6yoYU!֨%k6T*#t ]\]4mtix)ژ3#둃VխS>֮4/=Sׯi6:jj˜'#Zݙ'A{>i_~;CBoL&?{wh{Mk&Dy6 ''FͿ/7!>w&mNaZu]䏱 }:B+1=zػccp Ԣm{.]wvnZ*!!AS>.% S&|AjIB!Bzb@¦TNc~L<9f0^$GO&_y(xpnУ)z)ҬU$.Ypޒy?t( e'!}< 6ͽS囓k(kNaٚnSmڽ; `Ӡׯ^ZP}-JU%vIj#XMT{ I)_tʏ׽#$>>ݿ(s[⧉]rdeBxxz~y5k,\4Kl2fzy໷n^rڕ˵5LVo^/>)Uqe+̓+!Ai]t25ϹlѶKCuuh_Ȕ9KbWں1&Fwcޮn:W3pvɒ0^ڞ99s6Xޠi?PdR*+Q2G\Y{x8;;GGEz?rH=6% G8Ȫ?PBu; 4w#u *|_9{t ֹg_zk&Uo$;NסoԐgNg;l$qmBB:z:5MSrb%ݜik/c4>tK$U*U&uur.sv2fnW? cDsTYD̙zzyEFsuuK(iov'd~OnO)wrrڴRdȳ`#6Hug)|pnУFÑ{tJ5oO5]{0u1{ AbWn+<Jz̘Zq} 8t@ӡ[O٥drwn ʝK!<B̕l! *2}RfT*Ϧ:qVq3;qNabs9WqV}}mx!+0}R*gsЭN᫗/%25`Ӛ:%>2lɄjZ??g<Fr|1-FC5AF;:XV#6S:2gj(cng{  Ř LnjYٴc_qz97qs1VneU쵹`ua pQ:;:r@79+U5<DVڻ[K/v߸YReu!kzxzC7])ܸZӪ}g*h~]{~9(~aX۴oZ`HH@ ƾ?kԤEk?kW~ }xoCs~$i~SyҺC(ՙs2uiZ ǧa3ǘɬaap4~$k kJhmdciSQϞ)]6_:'JΜ8є9k֪J^/˞h!O/PȌ6m;utЃ{BlY;m^F`]n~ߏò DBj@wtToQ-J@׭g1WȘZ,(Cƌ;Nڲnsg.T(3 4_?mоϞZ= KAöixnɔZGYt^yGLJM]\buS!::*0>WVU$2)>ܹuC_(Ppuuָ^p^FMX%m\?wO*Z: /Y*wޏ2dֿ{L8>jq2i;_HB}5؛d27wu=ѫDب1^ROJBt;P䐧 ِ?/YljߵG22gT:yeȘ+O*|nܮsv6)JVϙKP(*2ܩ%>>ש\ȼO|W{|gݿP}^[ՕK_%V~J-2@eHi A)&zϚ;:y4Q~ɮN*!;z*"݀93tJJ+P~ 7_DϜ/lд*BҬ6 5nJ燩w"^i,TD𖡇Lfpr`_DxsRQҼM{‡NbB=ũ[֯6-,n SN5J)WNoi ܼn̵9GXs)kbg,Fq?rPgw_{$V^DI͟&h֪ NkԿDG\NaBԝoMlMnSSY f}Bxy]{3gXgP 8JU}''n."mYf!u0$ pѢ?/(eʥ͕J>_ sj$=4jЛW=GPtwMhd͖AiA$B4m48⹺BkE녫6'iWFX@©y檏#H`0{Űuأ^<%旽c^h OǏdwT(~TL?F/9uucvg,PP"E+Q\-O&q|ƾU*g_ը۰Wކ}˗A&=.V|n 8r}0""<&:+C_Pj5_[DɗCED:9;{ye͞99]R?otܱ?W*~ .ZHŊɵHډ˷L`P*^& ݐ_z)K9|?CF3l6o""""Uqnnnn>2e={YLhR"=482]tTTllg֬rʝ=GN;ژq?`rOii K v<[ )PJ 9RX9H  ڑe-)6@J`b+`߈HȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȈHȜ-]ߴ񣤃@DcAɪu^vt0tϤ` S,[)Z  c vH3p[6INռ|U۶m??hMR3qC:htp`-RHc2" #2[7>fC殏 H%[m}h%dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@l Hԋ?o`q2" #2+(!X2 Z H2" :%֖|8HȈHȈHȈHȈHX:Nϟ4p@$n( IDATZ  c /^/#Y:f4Ixtp,3dJžJȈ#XM`Ӑ|I`fqn|8JȈHKiCXJȈHȈHȈHȈHȈ-`aNϟoٚKH8o,tz٧m[yydv@mv^2" #2P?~8dy07S?t;8 kdi(Z  c vhĄ)D+ #2" #2" #2" #2" e;4m( 0003 [6V@F$dD@X !޸!L p˗[ 1`j~8 } h%dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@F$dD@lIB|++3;f;0ϑW) |w#p0Á~&p# % 1?rIcO/j|ZBlR"E h.!7]]h޹S'Ԩ]w...RJ@"GZR*Rt'j,XP:PB5q&v%M7Q@@@Vˢ4ߦ1?%4?B$_Ǭi6MHHg=<<Μx#͙հQ\%@V] +]D=trrԣO>݅QQykW7'5a #Zeʐ1GBOv]Ky }9f/ BR]r9}*cڧ'2׌1 v󻈈%fkoB*>`gBe(WA?G3gگ֬T*\@RalF*aDžѭMs<GLk.8dʬVfOUSxȡ?<Iy R  z`7>_`Oxk6m1ɚέ^™SBiܤ)u61S"AB0pjJE6? _yWg.;s҅ b EK~ոy2#A"ϞG5n7J*3e>>ݩEW/_(Uf򵦭I-tL*x-pvv_ǏB$@op!čWeS8HeTя.ݾy_v J័0{׮fϱa.2m'66v@Vh-If_H/= ;(!~W\iYJAsuu *|ZMn޻}k?.=)s MN-FEF6k2oqaH ʚeKϝV+|Ze꼺yyӧT*77wBH rZ?g.W*mܞfU$"AB0gO9[fБI}Dm5uR\\\|ۭu'A*W`i*ca 1XA 0{wlBkܬU:y XpXϰAi &(!R)T88 !>Z]ZiXy*ꔫTY1)Sj *^"".=ݳ]H/DGKo߆\k`!DTdbȈ1JSA[֭JQqN !XDVm#{b؀ޏ}Zz7:W(_"EI% nѦ]J֨im% $ # LQh1!Dȳ.:oٚ&H/9;;WRMyH s !Z 5neyHJ0X +aHG111[4el~%˔>CB7>Y\R޽-(^P3 4ˮKa2D~$ݽZ:zx!SϞ:}Br4i.ksgJK&i⟔񷳭"i\x@:н㍫W+|gggMyLL]$#Wޏܸ.W7mHD`mikOHH4|t.ݵ_}xmƾBxxz6oݮgA^^ɾ6/+ i۵=CƌvɔY՘* ,$DJ44.$ǟ9pMO/>W`'~SQQ[;2=J#AB0H֥g']ȳҗ eNCȚeΞT*q&@bՑ HI:yC7+P/9 hѦ4 veWdeJojy ?$}7vsjԮ+J-!Ĵy+Uص'n eܙĎ@)V@i ҿإGޖ&T*RTGM"תg͗ߘ{}Hl#$T*.Z^fB?BdΒEzsM~00k6~C5h"ww17u0R$HjiH#vϙn Sq?7}\\];ӭOOѡ@XA 0 "<|͗. {*Uv܄\y !._!jǴ={>BsohY w 3]Z6 zי?Tr̩!Voۣ)4~԰ !VvQܼvUѭO>_ 5v8A?`O^zpŠU>􌎊yB!{¹I )]v޲5͙҇ϥqqq5jؽwRej#OP*j[A"$n}F,>>}/_^[0FJж}Ӻ M{?Hi'A  X3!cݢy@V !rHmM5#8b$HX H#}}]D35aBLtN~iSg.^ަS7V:W ..n)9d}BEi4ӍB߼^2eɺ&sH[T9K_BmwBooϞ<dr%JJ %8p3Ra仈׮i.}^]B7uZu4 hc 1]Nvn$~x]D0J5}A\\56k2ftuqNsvqqqqvҢ93(xԉ#ժ:t-#d` yѣ|Ckm Gn$TR|Մ1Ûh}MkV}\SkֶsssoԼ-WH*ϑsca`.Df6A0ݾqm݊?/]8j_2gZf*]8sjߪ֨)u= ~u,Y5WʓsϾ.<~Ν{5`h:;oL\5oko⒚.\@="</*2rC5uRe !? ѶJWB7nѲC^9rּÞ[اRJ.ss_fG$$`M^W/5o6myu_6[|KmX\zXtݠQ} *bM 4B$$`fⅳǪ Yfy5"n┄3'֯*W1l 9$T#1QH_6 !R?{"8gkW E5g/Yx ʝV4*vŒ 3.+fjWV-^ԂM[ך0HR0B=m P*BOJt2b浫wLMY"= )F0vhf_!>a/=EA$;z?z+OޯZG '_V{O|2exE I=z8񻑷o^3F[S `BܶTr&N mX;cB'M׸+C}^Iܻsʖ:gIN#TW/3g"<<{ov5 !.;3ap ;ݭwܔ. ̌ų=,<,T_?DGEu;! {vlyRYRu5^( 4&ؖwW8~7 ,Թg j$gмM{!ĚeKm=9rv_ i`Mx~==֔(/ܳd䟷mX۲}oF/uk4IPͺ ~m&0N2k:aW`6hm3~)&&F߾KV:S  @*Jlߴn/? !~9kktȡq !.F߿{'BJQma,`h޺]B+-^T=A|\0w *bLh-",`i}]۵_C!D6_? SrtZ2I+ qGyp۴wuu3xl͙Qni!w-B4unJ7c,Z0sylϝ>))R˽2<n۹g~C=u|oDž.XN3, օ`@*;}Ǒ"#ȕvFqq6mԼ $vٿY/' !W2۹?C<#`3GrXP%...wn^pԆ| !jk("S,BlZ2IPbnnaoCڶIrp?!D?Z҅7>bM–H `w#˔8۱j5xzyz`%oPjLṖlRV2s>/a" v@VգӍk۹?WΚ>)RsgLyۆ^^vϜըA}$8+ʔq¹)L*զT*=BB8;; !լS*BO>"#͝aB}ROO"ްzd܈!C{{v%q_B0#(N>~e*roL Bݹ)R@`xDGE-3K&tqukӼzxazkgϞCHqm !-"$D* zPEێ.ڬU\yY%6A%?,(!!a_;ΞK!D:fd\5'\}޿qqu4lT&sGkաuj <,p;{k-ZjcC  IliR^z9n߸&pߎ)[9ʕsö_}?3{ IDAT&| #,Bq턄}|޺9|`!D,YsB,gdJp 4ҙZաՃ{w3eβyߑv?\bٟ͕̉;o͗1WQ\?|'ںiêB$TYS>l|3"vor_gΓ~ΗΟ_O!cv}fȳN7m;w+[oLJ~֣{K[0^v8}y;+Z:U~۩SZƐRDbjZț@-Kώ ~3]DsT*զ5+;6ocz!yxx ! !}2M2}”>2߿{CB߼624`:Hu ##ߵUڤLp5+N#!D;FRە/ ӭxR|54LF$@֭X`4''v|":*Sy*R-}RJh>~}B_8  &^[ 8?G˔(7}J\\BP 9VP< ^r᳧ңmeС[vyS0/(uV#uggG{kR|=ciǍ:{ժ6,]0'] K1~u4NMFFzsBDxjc4U;-vprr}ҏ:*+ !ųStGHDL R##sݦSWsf>~l/!}hڤרy&yS®jHȄ$߲IS&V\}מB!rɛPp߹}=EzH-Cƌ_ ܔn"X]NT]2'nnBgO$vsB|;v>V 8&/!~t% ?Cˁ y7{vcf VVɞ 7BTZMzy̩IN}L:V*r9D/5H Oc4F"K(7_EFBu;@qOѤZZtwWgQRH#<4dO0\/{(ߴysʭySO/ZhBQ=u\QdkFGG;x IɿWUțNDҜæ5+ϘR2g:mBEƌ8okL*4nTڗ(Yz ~l̽X3"ʃ?&<쭧i+T*ذjS{>o^yr*s ֏H@CfG8~\8wZM2=5rְ /#XR9s_vFvE+'mY6V,Ú,[x ZݦcgNy%#0FOlK`81#ÕJZ޼QtRjl_cZ?GZlXl.DGE>P^Cwwwc' `X cDma{X0X/kn.KDa;F$XJKG$fX@HI` 8ؤ II0Q<ۖH=0W0ppDGj0;'L@8[9I ;@1 v+->7Z gfl.pD_*0 (L8?8 Z p868,"`0ppDhc, #2" #+C?d>IENDB`vcdExtra/man/Hauser79.Rd0000644000176200001440000001244114430460317014507 0ustar liggesusers\name{Hauser79} \alias{Hauser79} \docType{data} \title{ Hauser (1979) Data on Social Mobility } \description{ Hauser (1979) presented this two-way frequency table, cross-classifying occupational categories of sons and fathers in the United States. It is a good example for exploring a variety of models for square tables: quasi-independence, quasi-symmetry, row/column effects, uniform association, etc., using the facilities of the \pkg{gnm}. } \usage{data(Hauser79)} \format{ A frequency data frame with 25 observations on the following 3 variables, representing the cross-classification of 19912 individuals by father's occupation and son's first occupation. \describe{ \item{\code{Son}}{a factor with levels \code{UpNM} \code{LoNM} \code{UpM} \code{LoM} \code{Farm}} \item{\code{Father}}{a factor with levels \code{UpNM} \code{LoNM} \code{UpM} \code{LoM} \code{Farm}} \item{\code{Freq}}{a numeric vector} } } \details{ Hauser's data was first presented in 1979, and then published in 1980. The name of the dataset reflects the earliest use. It reflects the "frequencies in a classification of son's first full-time civilian occupation by father's (or other family head's) occupation at son's sixteenth birthday among American men who were aged 20 to 64 in 1973 and were not currently enrolled in school". As noted in Hauser's Table 1, "Counts are based on observations weighted to estimate population counts and compensate for departures of the sampling design from simple random sampling. Broad occupation groups are upper nonmanual: professional and kindred workers, managers and officials, and non-retail sales workers; lower nonmanual: proprietors, clerical and kindred workers, and retail sales workers; upper manual: craftsmen, foremen, and kindred workers; lower manual: service workers, operatives and kindred workers, and laborers (except farm); farm: farmers and farm managers, farm laborers, and foremen. density of mobility or immobility in the cells to which they refer." The table levels for \code{Son} and \code{Father} have been arranged in order of decreasing status as is common for mobility tables. } \source{ R.M. Hauser (1979), Some exploratory methods for modeling mobility tables and other cross-classified data. In: K.F. Schuessler (Ed.), \emph{Sociological Methodology}, 1980, Jossey-Bass, San Francisco, pp. 413-458. Table 1. } \references{ Powers, D.A. and Xie, Y. (2008). \emph{Statistical Methods for Categorical Data Analysis}, Bingley, UK: Emerald. } \examples{ data(Hauser79) str(Hauser79) # display table structable(~Father+Son, data=Hauser79) #Examples from Powers & Xie, Table 4.15 # independence model mosaic(Freq ~ Father + Son, data=Hauser79, shade=TRUE) hauser.indep <- gnm(Freq ~ Father + Son, data=Hauser79, family=poisson) mosaic(hauser.indep, ~Father+Son, main="Independence model", gp=shading_Friendly) # Quasi-independence hauser.quasi <- update(hauser.indep, ~ . + Diag(Father,Son)) mosaic(hauser.quasi, ~Father+Son, main="Quasi-independence model", gp=shading_Friendly) # Quasi-symmetry hauser.qsymm <- update(hauser.indep, ~ . + Diag(Father,Son) + Symm(Father,Son)) mosaic(hauser.qsymm, ~Father+Son, main="Quasi-symmetry model", gp=shading_Friendly) # numeric scores for row/column effects Sscore <- as.numeric(Hauser79$Son) Fscore <- as.numeric(Hauser79$Father) # row effects model hauser.roweff <- update(hauser.indep, ~ . + Father*Sscore) LRstats(hauser.roweff) # uniform association hauser.UA <- update(hauser.indep, ~ . + Fscore*Sscore) LRstats(hauser.UA) # uniform association, omitting diagonals hauser.UAdiag <- update(hauser.indep, ~ . + Fscore*Sscore + Diag(Father,Son)) LRstats(hauser.UAdiag) # Levels for Hauser 5-level model levels <- matrix(c( 2, 4, 5, 5, 5, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 5, 5, 5, 4, 1 ), 5, 5, byrow=TRUE) hauser.topo <- update(hauser.indep, ~ . + Topo(Father, Son, spec=levels)) mosaic(hauser.topo, ~Father+Son, main="Topological model", gp=shading_Friendly) # RC model hauser.RC <- update(hauser.indep, ~ . + Mult(Father, Son), verbose=FALSE) mosaic(hauser.RC, ~Father+Son, main="RC model", gp=shading_Friendly) LRstats(hauser.RC) # crossings models hauser.CR <- update(hauser.indep, ~ . + Crossings(Father,Son)) mosaic(hauser.topo, ~Father+Son, main="Crossings model", gp=shading_Friendly) LRstats(hauser.CR) hauser.CRdiag <- update(hauser.indep, ~ . + Crossings(Father,Son) + Diag(Father,Son)) LRstats(hauser.CRdiag) # compare model fit statistics modlist <- glmlist(hauser.indep, hauser.roweff, hauser.UA, hauser.UAdiag, hauser.quasi, hauser.qsymm, hauser.topo, hauser.RC, hauser.CR, hauser.CRdiag) sumry <- LRstats(modlist) sumry[order(sumry$AIC, decreasing=TRUE),] # or, more simply LRstats(modlist, sortby="AIC") mods <- substring(rownames(sumry),8) with(sumry, {plot(Df, AIC, cex=1.3, pch=19, xlab='Degrees of freedom', ylab='AIC') text(Df, AIC, mods, adj=c(0.5,-.5), col='red', xpd=TRUE) }) } \keyword{datasets} \concept{square tables} \concept{mobility tables} \concept{ordinal variables} vcdExtra/man/Cormorants.Rd0000644000176200001440000000627314430460317015235 0ustar liggesusers\name{Cormorants} \alias{Cormorants} \docType{data} \title{ Advertising Behavior by Males Cormorants } \description{ Male double-crested cormorants use advertising behavior to attract females for breeding. In this study by Meagan McRae (2015), cormorants were observed two or three times a week at six stations in a tree-nesting colony for an entire season, April 10, 2014-July 10, 2014. The number of advertising birds was counted and these observations were classified by characteristics of the trees and nests. The goal is to determine how this behavior varies temporally over the season and spatially, as well as with characteristics of nesting sites. } \usage{data("Cormorants")} \format{ A data frame with 343 observations on the following 8 variables. \describe{ \item{\code{category}}{Time of season, divided into 3 categories based on breeding chronology, an ordered factor with levels \code{Pre} < \code{Incubation} < \code{Chicks Present}} \item{\code{week}}{Week of the season} \item{\code{station}}{Station of observations on two different peninsulas in a park, a factor with levels \code{B1} \code{B2} \code{C1} \code{C2} \code{C3} \code{C4}} \item{\code{nest}}{Type of nest, an ordered factor with levels \code{no} < \code{partial} < \code{full}} \item{\code{height}}{Relative height of bird in the tree, an ordered factor with levels \code{low} < \code{mid} < \code{high}} \item{\code{density}}{Number of other nests in the tree, an ordered factor with levels \code{zero} < \code{few} < \code{moderate} < \code{high}} \item{\code{tree_health}}{Health of the tree the bird is advertising in, a factor with levels \code{dead} \code{healthy}} \item{\code{count}}{Number of birds advertising, a numeric vector} } } \details{ Observations were made on only 2 days in weeks 3 and 4, but 3 days in all other weeks. One should use log(days) as an offset, so that the response measures rate. \code{Cormorants$days <- ifelse(Cormorants$week \%in\% 3:4, 2, 3)} } \source{ McRae, M. (2015). Spatial, Habitat and Frequency Changes in Double-crested Cormorant Advertising Display in a Tree-nesting Colony. Unpublished MA project, Environmental Studies, York University. } %\references{ %%% ~~ possibly secondary sources and usages ~~ %} \examples{ data(Cormorants) str(Cormorants) if (require("ggplot2")) { print(ggplot(Cormorants, aes(count)) + geom_histogram(binwidth=0.5) + labs(x="Number of birds advertising")) # Quick look at the data, on the log scale, for plots of `count ~ week`, # stratified by something else. print(ggplot(Cormorants, aes(week, count, color=height)) + geom_jitter() + stat_smooth(method="loess", size=2) + scale_y_log10(breaks=c(1,2,5,10)) + geom_vline(xintercept=c(4.5, 9.5))) } # ### models using week fit1 <-glm(count ~ week + station + nest + height + density + tree_health, data=Cormorants, family = poisson) if (requireNamespace("car")) car::Anova(fit1) # plot fitted effects if (requireNamespace("effects")) plot(effects::allEffects(fit1)) } \keyword{datasets} \concept{generalized linear models} \concept{Poisson distributions} vcdExtra/man/Mice.Rd0000644000176200001440000000313414430460317013754 0ustar liggesusers\name{Mice} \alias{Mice} \docType{data} \title{Mice Depletion Data} \description{ Data from Kastenbaum and Lamphiear (1959). The table gives the number of depletions (deaths) in 657 litters of mice, classified by litter size and treatment. This data set has become a classic in the analysis of contingency tables, yet unfortunately little information on the details of the experiment has been published. } \usage{data("Mice")} \format{ A frequency data frame with 30 observations on the following 4 variables, representing a 5 x 2 x 3 contingency table. \describe{ \item{\code{litter}}{litter size, a numeric vector} \item{\code{treatment}}{treatment, a factor with levels \code{A} \code{B}} \item{\code{deaths}}{number of depletions, a factor with levels \code{0} \code{1} \code{2+}} \item{\code{Freq}}{cell frequency, a numeric vector} } } %\details{ %%% ~~ If necessary, more details than the __description__ above ~~ %} \source{ Goodman, L. A. (1983) The analysis of dependence in cross-classifications having ordered categories, using log-linear models for frequencies and log-linear models for odds. \emph{Biometrics}, 39, 149-160. } \references{ Kastenbaum, M. A. & Lamphiear, D. E. (1959) Calculation of chi-square to calculate the no three-factor interaction hypothesis. \emph{Biometrics}, 15, 107-115. } \examples{ data(Mice) # make a table ftable(mice.tab <- xtabs(Freq ~ litter + treatment + deaths, data=Mice)) #library(vcd) vcd::mosaic(mice.tab, shade=TRUE) } \keyword{datasets} \concept{loglinear models} \concept{ordinal variables} vcdExtra/man/modFit.Rd0000644000176200001440000000346314430460317014326 0ustar liggesusers\name{modFit} \Rdversion{1.1} \alias{modFit} \alias{modFit.loglm} \alias{modFit.glm} %- Also NEED an '\alias' for EACH other topic documented here. \title{Brief Summary of Model Fit for a glm or loglm Object} \description{ Formats a brief summary of model fit for a \code{glm} or \code{loglm} object, showing the likelihood ratio Chisq (df) value and or AIC. Useful for inclusion in a plot title or annotation. } \usage{ modFit(x, ...) \method{modFit}{glm}(x, stats="chisq", digits=2, ...) \method{modFit}{loglm}(x, stats="chisq", digits=2, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A \code{glm} or \code{loglm} object} \item{\dots}{Arguments passed down} \item{stats}{One or more of \code{chisq} or \code{aic}, determining the statistics displayed.} \item{digits}{Number of digits after the decimal point in displayed statistics.} } %\details{ %%% ~~ If necessary, more details than the description above ~~ %} \value{ A character string containing the formatted values of the chosen statistics. } %\references{ %%% ~put references to the literature/web site here ~ %} \author{Michael Friendly} %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{Summarise}} (soon to be deprecated), \code{\link{LRstats}} } \examples{ data(Mental) require(MASS) (Mental.tab <- xtabs(Freq ~ ses + mental, data=Mental)) (Mental.mod <- loglm(~ses + mental, Mental.tab)) Mental.mod modFit(Mental.mod) # use to label mosaic() mosaic(Mental.mod, main=paste("Independence model,", modFit(Mental.mod))) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{utilities} \keyword{models}% __ONLY ONE__ keyword per line vcdExtra/man/Draft1970.Rd0000644000176200001440000000447714430460317014473 0ustar liggesusers\name{Draft1970} \alias{Draft1970} \docType{data} \title{ USA 1970 Draft Lottery Data } \description{ This data set gives the results of the 1970 US draft lottery, in the form of a data frame. } \usage{data(Draft1970)} \format{ A data frame with 366 observations on the following 3 variables. \describe{ \item{\code{Day}}{day of the year, 1:366} \item{\code{Rank}}{draft priority rank of people born on that day} \item{\code{Month}}{an ordered factor with levels \code{Jan} < \code{Feb} \dots < \code{Dec}} } } \details{ The draft lottery was used to determine the order in which eligible men would be called to the Selective Service draft. The days of the year (including February 29) were represented by the numbers 1 through 366 written on slips of paper. The slips were placed in separate plastic capsules that were mixed in a shoebox and then dumped into a deep glass jar. Capsules were drawn from the jar one at a time. The first number drawn was 258 (September 14), so all registrants with that birthday were assigned lottery number \code{Rank} 1. The second number drawn corresponded to April 24, and so forth. All men of draft age (born 1944 to 1950) who shared a birthdate would be called to serve at once. The first 195 birthdates drawn were later called to serve in the order they were drawn; the last of these was September 24. } \source{ Starr, N. (1997). Nonrandom Risk: The 1970 Draft Lottery, \emph{Journal of Statistics Education}, v.5, n.2 \url{http://jse.amstat.org/v5n2/datasets.starr.html} } \references{ Fienberg, S. E. (1971), "Randomization and Social Affairs: The 1970 Draft Lottery," \emph{Science}, 171, 255-261. \url{https://en.wikipedia.org/wiki/Draft_lottery_(1969)} } \seealso{\code{\link{Draft1970table}} } \examples{ data(Draft1970) # scatterplot plot(Rank ~ Day, data=Draft1970) with(Draft1970, lines(lowess(Day, Rank), col="red", lwd=2)) abline(lm(Rank ~ Day, data=Draft1970), col="blue") # boxplots plot(Rank ~ Month, data=Draft1970, col="bisque") lm(Rank ~ Month, data=Draft1970) anova(lm(Rank ~ Month, data=Draft1970)) # make the table version Draft1970$Risk <- cut(Draft1970$Rank, breaks=3, labels=c("High", "Med", "Low")) with(Draft1970, table(Month, Risk)) } \keyword{datasets} \concept{linear models} vcdExtra/man/mosaic.glmlist.Rd0000644000176200001440000001267014422306403016025 0ustar liggesusers\name{mosaic.glmlist} \alias{mosaic.glmlist} \alias{mosaic.loglmlist} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Mosaic Displays for \code{glmlist} and \code{loglmlist} Objects } \description{ This function provides a convenient interface for viewing mosaic displays associated with a collection of glm models for frequency tables that have been stored in a \code{glmlist} or \code{loglmlist} object. You can plot either selected models individually, or mosaics for all models in an array of viewports. } \usage{ \method{mosaic}{glmlist}(x, selection, panel=mosaic, type=c("observed", "expected"), legend=ask | !missing(selection), main=NULL, ask=TRUE, graphics=TRUE, rows, cols, newpage=TRUE, ...) \method{mosaic}{loglmlist}(x, selection, panel=mosaic, type=c("observed", "expected"), legend=ask | !missing(selection), main=NULL, ask=TRUE, graphics=TRUE, rows, cols, newpage=TRUE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ a \code{glmlist} or \code{loglmlist} object } \item{selection}{ the index or name of one \code{glm} or \code{loglm} object in \code{x}. If no selection is specified, a menu of models is presented or all models are plotted. } \item{panel}{ a \code{\link[vcd]{strucplot}} panel function, typically \code{\link[vcd]{mosaic}} or \code{\link[vcd]{sieve}} } \item{type}{ a character string indicating whether the \code{"observed"} or the \code{"expected"} values of the table should be visualized } \item{legend}{ logical: show a legend for residuals in the mosaic display(s)? The default behavior is to include a legend when only a single plot is shown, i.e., if \code{ask} is \code{TRUE} or a \code{selection} has been specified. } \item{main}{ either a logical, or a vector of character strings used for plotting the main title. If main is a logical and \code{TRUE}, the name of the selected glm object is used. } \item{ask}{ logical: should the function display a menu of models, when one is not specified in \code{selection}? If \code{selection} is not supplied and \code{ask} is \code{TRUE} (the default), a menu of model names is presented; if \code{ask} is \code{FALSE}, mosaics for all models are plotted in an array. } \item{graphics}{ logical: use a graphic dialog box when \code{ask=TRUE}? } \item{rows,cols}{ when \code{ask=FALSE}, the number of rows and columns in which to plot the mosaics. } \item{newpage}{ start a new page? (only applies to \code{ask=FALSE}) } \item{\dots}{ other arguments passed to \code{\link{mosaic.glm}} and ultimately to \code{\link[vcd]{mosaic}}. } } \details{ Most details of the plots produced can be controlled via \dots arguments as shown in some of the examples below. In particular, with \code{panel=sieve} you need to also pass \code{gp=shading_Friendly} to get a color version. } \value{ Returns the result of \code{\link{mosaic.glm}}. %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% ... } \references{ David Meyer, Achim Zeileis, and Kurt Hornik (2006). The Strucplot Framework: Visualizing Multi-Way Contingency Tables with vcd. \emph{Journal of Statistical Software}, 17(3), 1-48. % \url{https://www.jstatsoft.org/v17/i03/}, \doi{10.18637/jss.v017.i03}, available as \code{vignette("strucplot", package="vcd")}. } \author{ Michael Friendly } %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{glmlist}}, \code{\link{loglmlist}}, \code{\link{Kway}} \code{\link{mosaic.glm}}, \code{\link[vcd]{mosaic}}, \code{\link[vcd]{strucplot}}, for the many parameters that control the details of mosaic plots. } \examples{ data(JobSatisfaction, package="vcd") # view all pairwise mosaics pairs(xtabs(Freq~management+supervisor+own, data=JobSatisfaction), shade=TRUE, diag_panel=pairs_diagonal_mosaic) modSat <- Kway(Freq ~ management+supervisor+own, data=JobSatisfaction, family=poisson, prefix="JobSat") names(modSat) \dontrun{ mosaic(modSat) # uses menu, if interactive() } mosaic(modSat, "JobSat.1") # model label mosaic(modSat, 2) # model index # supply a formula to determine the order of variables in the mosaic mosaic(modSat, 2, formula=~own+supervisor+management) mosaic(modSat, ask=FALSE) # uses viewports # use a different panel function, label the observed valued in the cells mosaic(modSat, 1, main=TRUE, panel=sieve, gp=shading_Friendly, labeling=labeling_values) data(Mental) indep <- glm(Freq ~ mental+ses, family = poisson, data = Mental) Cscore <- as.numeric(Mental$ses) Rscore <- as.numeric(Mental$mental) coleff <- glm(Freq ~ mental + ses + Rscore:ses, family = poisson, data = Mental) roweff <- glm(Freq ~ mental + ses + mental:Cscore, family = poisson, data = Mental) linlin <- glm(Freq ~ mental + ses + Rscore:Cscore, family = poisson, data = Mental) # assign names for the plot labels modMental <- glmlist(Indep=indep, ColEff=coleff, RowEff=roweff, `Lin x Lin`=linlin) mosaic(modMental, ask=FALSE, margins=c(3,1,1,2), labeling_args=list(abbreviate_labs=5)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{hplot} vcdExtra/man/Heart.Rd0000644000176200001440000000207114430460317014141 0ustar liggesusers\name{Heart} \Rdversion{1.1} \alias{Heart} \docType{data} \title{Sex, Occupation and Heart Disease} \description{Classification of individuals by gender, occupational category and occurrence of heart disease} \usage{data(Heart)} \format{ A 3-dimensional array resulting from cross-tabulating 3 variables for 21522 observations. The variable names and their levels are: \tabular{rll}{ No \tab Name \tab Levels \cr 1\tab \code{Disease}\tab \code{"Disease", "None"}\cr 2\tab \code{Gender}\tab \code{"Male", "Female"}\cr 3\tab \code{Occup}\tab \code{"Unempl", "WhiteCol", "BlueCol"}\cr } } %\details{ } \source{ % \cite{Karger, 1980} Karger, (1980). } %\references{ %} %\seealso{ } \examples{ data(Heart) str(Heart) # Display the frequencies for occupational categories. # Each row is a 2 x 2 table vcd::structable(Disease + Gender ~ Occup, data=Heart) # display as fourfold plots vcd::cotabplot(~ Disease + Gender | Occup, data=Heart, panel = cotab_fourfold) } \keyword{datasets} \concept{2 by 2 tables} vcdExtra/man/Mammograms.Rd0000644000176200001440000000216414430460317015201 0ustar liggesusers\name{Mammograms} \alias{Mammograms} \docType{data} \title{Mammogram Ratings} \description{ Kundel & Polansky (2003) give (possibly contrived) data on a set of 110 mammograms rated by two readers. } \usage{data(Mammograms)} \format{ A frequency table in matrix form. The format is: num [1:4, 1:4] 34 6 2 0 10 8 5 1 2 8 ... - attr(*, "dimnames")=List of 2 ..$ Reader2: chr [1:4] "Absent" "Minimal" "Moderate" "Severe" ..$ Reader1: chr [1:4] "Absent" "Minimal" "Moderate" "Severe" } \source{ Kundel, H. L. & Polansky, M. (2003), "Measurement of Observer Agreement", \emph{Radiology}, \bold{228}, 303-308, Table A1 } %\references{ %%% ~~ possibly secondary sources and usages ~~ %} \examples{ data(Mammograms) B <- agreementplot(Mammograms, main="Mammogram ratings") # agreement measures B Kappa(Mammograms) ## other displays mosaic(Mammograms, shade=TRUE) sieve(Mammograms, pop = FALSE, shade = TRUE) labeling_cells(text = Mammograms, gp_text = gpar(fontface = 2, cex=1.75))(as.table(Mammograms)) } \keyword{datasets} \concept{observer agreement} \concept{ordinal variables} vcdExtra/man/Depends.Rd0000644000176200001440000000347014430460317014464 0ustar liggesusers\name{Depends} \alias{Depends} \docType{data} \title{ Dependencies of R Packages } \description{ This one-way table gives the type-token distribution of the number of dependencies declared in 4983 packages listed on CRAN on January 17, 2014. } \usage{data(Depends)} \format{ The format is a one-way frequency table of counts of packages with 0, 1, 2, ... dependencies. 'table' int [1:15(1d)] 986 1347 993 685 375 298 155 65 32 19 ... - attr(*, "dimnames")=List of 1 ..$ Depends: chr [1:15] "0" "1" "2" "3" ... } %\details{ %%% ~~ If necessary, more details than the __description__ above ~~ %} \source{ Using code from \url{https://blog.revolutionanalytics.com/2013/12/a-look-at-the-distribution-of-r-package-dependencies.html} } %\references{ %%% ~~ possibly secondary sources and usages ~~ %} \examples{ data(Depends) plot(Depends, xlab="Number of Dependencies", ylab="Number of R Packages", lwd=8) # what type of distribution? # Ord_plot can't classify this! Ord_plot(Depends) \dontrun{ # The code below, from Joseph Rickert, downloads and tabulates the data p <- as.data.frame(available.packages(),stringsAsFactors=FALSE) names(p) pkgs <- data.frame(p[,c(1,4)]) # Pick out Package names and Depends row.names(pkgs) <- NULL # Get rid of row names pkgs <- pkgs[complete.cases(pkgs[,2]),] # Remove NAs pkgs$Depends2 <-strsplit(pkgs$Depends,",") # split list of Depends pkgs$numDepends <- as.numeric(lapply(pkgs$Depends2,length)) # Count number of dependencies in list zeros <- c(rep(0,dim(p)[1] - dim(pkgs)[1])) # Account for packages with no dependencies Deps <- as.vector(c(zeros,pkgs$numDepends)) # Set up to tablate Depends <- table(Deps) } } \keyword{datasets} \concept{one-way tables} vcdExtra/man/mosaic3d.Rd0000644000176200001440000001662014422306403014601 0ustar liggesusers\name{mosaic3d} \Rdversion{1.1} \alias{mosaic3d} \alias{mosaic3d.default} \alias{mosaic3d.loglm} %- Also NEED an '\alias' for EACH other topic documented here. \title{ 3D Mosaic Plots } \description{ Produces a 3D mosaic plot for a contingency table (or a \code{link[MASS]{loglm}} model) using the \code{\link[rgl]{rgl-package}}. Generalizing the 2D mosaic plot, this begins with a given 3D shape (a unit cube), and successively sub-divides it along the X, Y, Z dimensions according to the table margins, generating a nested set of 3D tiles. The volume of the resulting tiles is therefore proportional to the frequency represented in the table cells. Residuals from a given loglinear model are then used to color or shade each of the tiles. This is a developing implementation. The arguments and details are subject to change. } \usage{ mosaic3d(x, ...) \method{mosaic3d}{loglm}( x, type = c("observed", "expected"), residuals_type = c("pearson", "deviance"), ...) \method{mosaic3d}{default}( x, expected = NULL, residuals = NULL, type = c("observed", "expected"), residuals_type = NULL, shape = rgl::cube3d(alpha = alpha), alpha = 0.5, spacing = 0.1, split_dir = 1:3, shading = shading_basic, interpolate=c(2,4), zero_size=.05, label_edge, labeling_args = list(), newpage = TRUE, box=FALSE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A \code{link[MASS]{loglm}} model object. Alternatively, a multidimensional \code{array} or \code{table} or\code{\link[vcd]{structable}} of frequencies in a contingency table. In the present implementation, the dimensions are taken in sequential order. Use \code{link[base]{aperm}} or \code{\link[vcd]{structable}} to change this. } \item{expected}{optionally, for contingency tables, an array of expected frequencies of the same dimension as \code{x}, or alternatively the corresponding loglinear model specification as used by \code{link[stats]{loglin}} or \code{link[MASS]{loglm}} (see \code{\link[vcd]{structable}} for details).} \item{residuals}{optionally, an array of residuals of the same dimension as \code{x} (see details).} \item{type}{a character string indicating whether the \code{"observed"} or the \code{"expected"} frequencies in the table should be visualized by the volume of the 3D tiles. } \item{residuals_type}{a character string indicating the type of residuals to be computed when none are supplied. If residuals is \code{NULL}, \code{residuals_type} must be one of \code{"pearson"} (default; giving components of Pearson's chi-squared), \code{"deviance"} (giving components of the likelihood ratio chi-squared), or \code{"FT"} for the Freeman-Tukey residuals. The value of this argument can be abbreviated. } \item{shape}{The initial 3D shape on which the mosaic is based. Typically this is a call to an rgl function, and must produce a \code{shape3d} object. The default is a "unit cube" on (-1, +1), with transparency specified by \code{alpha}.} \item{alpha}{Specifies the transparency of the 3D tiles used to compose the 3D mosaic.} \item{spacing}{A number or vector giving the total amount of space used to separate the 3D tiles along each of the dimensions of the table. The values specified are re-cycled to the number of table dimensions.} \item{split_dir}{A numeric vector composed of the integers \code{1:3} or a character vector composed of \code{c("x", "y", "z")}, where \code{split_dir[i]} specifies the axis along which the tiles should be split for dimension \code{i} of the table. The values specified are re-cycled to the number of table dimensions.} \item{shading}{A function, taking an array or vector of residuals for the given model, returning a vector of colors. At present, only the default \code{shading=shading_basic} is provided. This is roughly equivalent to the use of the \code{shade} argument in \code{\link[graphics]{mosaicplot}} or to the use of \code{gp=shading_Friendly} in \code{\link[vcd]{mosaic}}. } \item{interpolate}{a vector of interpolation values for the \code{shading} function.} \item{zero_size}{ The radius of a small sphere used to mark zero cells in the display. } \item{label_edge}{A character vector composed of \code{c("-", "+")} indicating whether the labels for a given table dimension are to be written at the minima (\code{"-"}) or maxima (\code{"+"}) of the \emph{other} dimensions in the plot. The default is \code{rep( c('-', '+'), each=3, length=ndim)}, meaning that the first three table variables are labeled at the minima, and successive ones at the maxima. } \item{labeling_args}{ This argument is intended to be used to specify details of the rendering of labels for the table dimensions, but at present has no effect. } \item{newpage}{logical indicating whether a new page should be created for the plot or not.} \item{box}{ logical indicating whether a bounding box should be drawn around the plot. } \item{\dots}{Other arguments passed down to \code{mosaic.default} or 3D functions.} } \details{ Friendly (1995), Friendly [Sect. 4.5](2000) and Theus and Lauer (1999) have all used the idea of 3D mosaic displays to explain various aspects of loglinear models (the iterative proportional fitting algorithm, the structure of various models for 3-way and n-way tables, etc.), but no implementation of 3D mosaics was previously available. For the default method, residuals, used to color and shade the 3D tiles, can be passed explicitly, or, more typically, are computed as needed from observed and expected frequencies. In this case, the expected frequencies are optionally computed for a specified loglinear model given by the \code{expected} argument. For the loglm method, residuals and observed frequencies are calculated from the model object. } \value{Invisibly, the list of \code{shape3d} objects used to draw the 3D mosaic, with names corresponding to the concatenation of the level labels, separated by ":".} \references{ Friendly, M. (1995). Conceptual and Visual Models for Categorical Data, \emph{The American Statistician}, \bold{49}, 153-160. Friendly, M. \emph{Visualizing Categorical Data}, Cary NC: SAS Institute, 2000. Web materials: \url{http://www.datavis.ca/books/vcd/}. Theus, M. & Lauer, S. R. W. (1999) Visualizing Loglinear Models. \emph{Journal of Computational and Graphical Statistics}, \bold{8}, 396-412. } \author{Michael Friendly, with the help of Duncan Murdoch and Achim Zeileis} %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[vcd]{strucplot}}, \code{\link[vcd]{mosaic}}, \code{\link[graphics]{mosaicplot}} \code{\link[stats]{loglin}}, \code{\link[MASS]{loglm}} for details on fitting loglinear models } \examples{ # 2 x 2 x 2 if(requireNamespace("rgl")){ mosaic3d(Bartlett, box=TRUE) # compare with expected frequencies under model of mutual independence mosaic3d(Bartlett, type="expected", box=TRUE) # 2 x 2 x 3 mosaic3d(Heart, box=TRUE) } \dontrun{ # 2 x 2 x 2 x 3 # illustrates a 4D table mosaic3d(Detergent) # compare 2D and 3D mosaics demo("mosaic-hec") } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{hplot } %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/logLik.loglm.Rd0000644000176200001440000000610714422306403015430 0ustar liggesusers\name{logLik.loglm} \alias{logLik.loglm} %- Also NEED an '\alias' for EACH other topic documented here. \title{Log-Likelihood of a loglm Object} \description{ Calculates the log-likelihood value of the \code{loglm} model represented by \code{object} evaluated at the estimated coefficients. It allows the use of \code{\link[stats]{AIC}} and \code{\link[stats]{BIC}}, which require that a \code{logLik} method exists to extract the corresponding log-likelihood for the model. } \usage{ \method{logLik}{loglm}(object, ..., zero=1E-10) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{A \code{loglm} object} \item{\dots}{For compatibility with the S3 generic; not used here} \item{zero}{value used to replace zero frequencies in calculating the log-likelihood} } \details{ If cell frequencies have not been stored with the \code{loglm} object (via the argument \code{keep.frequencies = TRUE}), they are obtained using \code{update}. This function calculates the log-likelihood in a way that allows for non-integer frequencies, such as the case where 0.5 has been added to all cell frequencies to allow for sampling zeros. If the frequencies still contain zero values, those are replaced by the value of \code{start}. For integer frequencies, it gives the same result as the corresponding model fit using \code{\link[stats]{glm}}, whereas \code{\link[stats]{glm}} returns \code{-Inf} if there are any non-integer frequencies. } \value{ Returns an object of class \code{logLik}. This is a number with one attribute, \code{"df"} (degrees of freedom), giving the number of (estimated) parameters in the model. } %\references{ %%% ~put references to the literature/web site here ~ %} \author{Achim Zeileis} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[MASS]{loglm}}, \code{\link[stats]{AIC}}, \code{\link[stats]{BIC}}, } \examples{ data(Titanic, package="datasets") require(MASS) titanic.mod1 <- loglm(~ (Class * Age * Sex) + Survived, data=Titanic) titanic.mod2 <- loglm(~ (Class * Age * Sex) + Survived*(Class + Age + Sex), data=Titanic) titanic.mod3 <- loglm(~ (Class * Age * Sex) + Survived*(Class + Age * Sex), data=Titanic) logLik(titanic.mod1) AIC(titanic.mod1, titanic.mod2, titanic.mod3) BIC(titanic.mod1, titanic.mod2, titanic.mod3) # compare with models fit using glm() titanic <- as.data.frame(Titanic) titanic.glm1 <- glm(Freq ~ (Class * Age * Sex) + Survived, data=titanic, family=poisson) titanic.glm2 <- glm(Freq ~ (Class * Age * Sex) + Survived*(Class + Age + Sex), data=titanic, family=poisson) titanic.glm3 <- glm(Freq ~ (Class * Age * Sex) + Survived*(Class + Age * Sex), data=titanic, family=poisson) logLik(titanic.glm1) AIC(titanic.glm1, titanic.glm2, titanic.glm3) BIC(titanic.glm1, titanic.glm2, titanic.glm3) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{models} \keyword{htest}% __ONLY ONE__ keyword per line vcdExtra/man/cutfac.Rd0000644000176200001440000000567614430460317014361 0ustar liggesusers\name{cutfac} \alias{cutfac} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Cut a Numeric Variable to a Factor } \description{ \code{cutfac} acts like \code{\link[base]{cut}}, dividing the range of \code{x} into intervals and coding the values in \code{x} according in which interval they fall. However, it gives nicer labels for the factor levels and by default chooses convenient breaks among the values based on deciles. It is particularly useful for plots in which one wants to make a numeric variable discrete for the purpose of getting boxplots, spinograms or mosaic plots. } \usage{ cutfac(x, breaks = NULL, q = 10) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a numeric vector which is to be converted to a factor by cutting } \item{breaks}{ either a numeric vector of two or more unique cut points or a single number (greater than or equal to 2) giving the number of intervals into which \code{x} is to be cut. } \item{q}{ the number of quantile groups used to define \code{breaks}, if that has not been specified. } } \details{ By default, \code{\link[base]{cut}} chooses breaks by equal lengths of the range of \code{x}, whereas \code{cutfac} uses \code{\link[stats]{quantile}} to choose breaks of roughly equal count. } \value{ A \code{\link[base]{factor}} corresponding to \code{x} is returned } \references{ Friendly, M. and Meyer, D. (2016). \emph{Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data}. Boca Raton, FL: Chapman & Hall/CRC. \url{http://ddar.datavis.ca}. } \author{Achim Zeileis} %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[base]{cut}}, \code{\link[stats]{quantile}} } \examples{ if (require(AER)) { data("NMES1988", package="AER") nmes <- NMES1988[, c(1, 6:8, 13, 15, 18)] plot(log(visits+1) ~ cutfac(chronic), data = nmes, ylab = "Physician office visits (log scale)", xlab = "Number of chronic conditions", main = "chronic") plot(log(visits+1) ~ cutfac(hospital, c(0:2, 8)), data = nmes, ylab = "Physician office visits (log scale)", xlab = "Number of hospital stays", main = "hospital") } %\donttest{ %# countreg not yet on CRAN %if (require(countreg)) { %data("CrabSatellites", package = "countreg") % %# jittered scatterplot %plot(jitter(satellites) ~ width, data=CrabSatellites, % ylab="Number of satellites (jittered)", xlab="Carapace width", % cex.lab=1.25) %with(CrabSatellites, lines(lowess(width, satellites), col="red", lwd=2)) % %# boxplot, using deciles %plot(satellites ~ cutfac(width), data=CrabSatellites, % ylab="Number of satellites", xlab="Carapace width (deciles)") %} } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/Heckman.Rd0000644000176200001440000000512014430460317014442 0ustar liggesusers\name{Heckman} \Rdversion{1.1} \alias{Heckman} \docType{data} \title{Labour Force Participation of Married Women 1967-1971} \description{1583 married women were surveyed over the years 1967-1971, recording whether or not they were employed in the labor force. The data, originally from Heckman & Willis (1977) provide an example of modeling longitudinal categorical data, e.g., with markov chain models for dependence over time. } \usage{ data(Heckman) } \format{ A 5-dimensional \eqn{2^5} array resulting from cross-tabulating 5 binary variables for 1583 observations. The variable names and their levels are: \tabular{rll}{ No \tab Name \tab Levels \cr 1\tab \code{e1971}\tab \code{"71Yes", "No"}\cr 2\tab \code{e1970}\tab \code{"70Yes", "No"}\cr 3\tab \code{e1969}\tab \code{"69Yes", "No"}\cr 4\tab \code{e1968}\tab \code{"68Yes", "No"}\cr 5\tab \code{e1967}\tab \code{"67Yes", "No"}\cr } } \details{ Lindsey (1993) fits an initial set of logistic regression models examining the dependence of employment in 1971 (\code{e1971}) on successive subsets of the previous years, \code{e1970}, \code{e1969}, \dots \code{e1967}. Alternatively, one can examine markov chain models of first-order (dependence on previous year), second-order (dependence on previous two years), etc. } \source{ % \cite{Lindsey:93 [p. 185]} Lindsey, J. K. (1993). \emph{Models for Repeated Measurements} Oxford, UK: Oxford University Press, p. 185. } \references{ % \cite{HeckmanWillis:77} Heckman, J.J. & Willis, R.J. (1977). "A beta-logistic model for the analysis of sequential labor force participation by married women." \emph{Journal of Political Economy}, 85: 27-58 } %\seealso{ } \examples{ data(Heckman) # independence model mosaic(Heckman, shade=TRUE) # same, as a loglm() require(MASS) (heckman.mod0 <- loglm(~ e1971+e1970+e1969+e1968+e1967, data=Heckman)) mosaic(heckman.mod0, main="Independence model") # first-order markov chain: bad fit (heckman.mod1 <- loglm(~ e1971*e1970 + e1970*e1969 +e1969*e1968 + e1968*e1967, data=Heckman)) mosaic(heckman.mod1, main="1st order markov chain model") # second-order markov chain: bad fit (heckman.mod2 <- loglm(~ e1971*e1970*e1969 + e1970*e1969*e1968 +e1969*e1968*e1967, data=Heckman)) mosaic(heckman.mod2, main="2nd order markov chain model") # third-order markov chain: fits OK (heckman.mod3 <- loglm(~ e1971*e1970*e1969*e1968 + e1970*e1969*e1968*e1967, data=Heckman)) mosaic(heckman.mod2, main="3rd order markov chain model") } \keyword{datasets} \concept{loglinear models} vcdExtra/man/Bartlett.Rd0000644000176200001440000000377114430460317014667 0ustar liggesusers\name{Bartlett} \Rdversion{1.1} \alias{Bartlett} \docType{data} \title{Bartlett Data on Plum Root Cuttings} \description{In an experiment to investigate the effect of cutting length (two levels) and planting time (two levels) on the survival of plum root cuttings, 240 cuttings were planted for each of the 2 x 2 combinations of these factors, and their survival was later recorded. Bartlett (1935) used these data to illustrate a method for testing for no three-way interaction in a contingency table.} \usage{ data(Bartlett) } \format{ A 3-dimensional array resulting from cross-tabulating 3 variables for 960 observations. The variable names and their levels are: \tabular{rll}{ No \tab Name \tab Levels \cr 1 \tab \code{Alive} \tab \code{"Alive", "Dead"}\cr 2 \tab \code{Time} \tab \code{"Now", "Spring"}\cr 3 \tab \code{Length} \tab \code{"Long", "Short"}\cr } } %\details { } \source{ % \cite{Hand-etal:94 [p.15 #19]} Hand, D. and Daly, F. and Lunn, A. D.and McConway, K. J. and Ostrowski, E. (1994). \emph{A Handbook of Small Data Sets}. London: Chapman & Hall, p. 15, # 19. } \references{ % \cite{Bartlett:35} Bartlett, M. S. (1935). Contingency Table Interactions \emph{Journal of the Royal Statistical Society}, Supplement, 1935, 2, 248-252. } %\seealso { } \examples{ data(Bartlett) # measures of association assocstats(Bartlett) oddsratio(Bartlett) # Test models ## Independence MASS::loglm(formula = ~Alive + Time + Length, data = Bartlett) ## No three-way association MASS::loglm(formula = ~(Alive + Time + Length)^2, data = Bartlett) # Use woolf_test() for a formal test of homogeneity of odds ratios vcd::woolf_test(Bartlett) # Plots fourfold(Bartlett, mfrow=c(1,2)) mosaic(Bartlett, shade=TRUE) pairs(Bartlett, gp=shading_Friendly) } \keyword{datasets} \concept{2 by 2 tables} \concept{loglinear models} \concept{homogeneity of association} \concept{odds ratios} vcdExtra/man/Geissler.Rd0000644000176200001440000000475114430460317014662 0ustar liggesusers\name{Geissler} \alias{Geissler} \docType{data} \title{Geissler's Data on the Human Sex Ratio} \description{ Geissler (1889) published data on the distributions of boys and girls in families in Saxony, collected for the period 1876-1885. The \code{Geissler} data tabulates the family composition of 991,958 families by the number of boys and girls listed in the table supplied by Edwards (1958, Table 1). } \usage{data(Geissler)} \format{ A data frame with 90 observations on the following 4 variables. The rows represent the non-NA entries in Edwards' table. \describe{ \item{\code{boys}}{number of boys in the family, \code{0:12}} \item{\code{girls}}{number of girls in the family, \code{0:12}} \item{\code{size}}{family size: \code{boys+girls}} \item{\code{Freq}}{number of families with this sex composition} } } \details{ The data on family composition was available because, on the birth of a child, the parents had to state the sex of all their children on the birth certificate. These family records are not necessarily independent, because a given family may have had several children during this 10 year period, included as multiple records. } \source{ Edwards, A. W. F. (1958). An Analysis Of Geissler's Data On The Human Sex Ratio. \emph{Annals of Human Genetics}, 23, 6-15. } \references{ Friendly, M. and Meyer, D. (2016). \emph{Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data}. Boca Raton, FL: Chapman & Hall/CRC. \url{http://ddar.datavis.ca}. Geissler, A. (1889). \emph{Beitrage zur Frage des Geschlechts verhaltnisses der Geborenen} Z. K. Sachsischen Statistischen Bureaus, 35, n.p. Lindsey, J. K. & Altham, P. M. E. (1998). Analysis of the human sex ratio by using overdispersion models. \emph{Journal of the Royal Statistical Society: Series C (Applied Statistics)}, 47, 149-157. } \seealso{ \code{\link[vcd]{Saxony}}, containing the data for families of size 12. } \examples{ data(Geissler) str(Geissler) # reproduce Saxony data, families of size 12 Saxony12 <- subset(Geissler, size==12, select=c(boys, Freq)) rownames(Saxony12)<-NULL # make a 1-way table xtabs(Freq~boys, Saxony12) # extract data for other family sizes Saxony11 <- subset(Geissler, size==11, select=c(boys, Freq)) rownames(Saxony11)<-NULL Saxony10 <- subset(Geissler, size==10, select=c(boys, Freq)) rownames(Saxony10)<-NULL } \keyword{datasets} \concept{binomial distributions} vcdExtra/man/Yamaguchi87.Rd0000644000176200001440000000774214430460317015176 0ustar liggesusers\name{Yamaguchi87} \alias{Yamaguchi87} \docType{data} \title{Occupational Mobility in Three Countries} \description{ Yamaguchi (1987) presented this three-way frequency table, cross-classifying occupational categories of sons and fathers in the United States, United Kingdom and Japan. This data set has become a classic for models comparing two-way mobility tables across layers corresponding to countries, groups or time (e.g., Goodman and Hout, 1998; Xie, 1992). The US data were derived from the 1973 OCG-II survey; those for the UK from the 1972 Oxford Social Mobility Survey; those for Japan came from the 1975 Social Stratification and Mobility survey. They pertain to men aged 20-64. } \usage{data(Yamaguchi87)} \format{ A frequency data frame with 75 observations on the following 4 variables. The total sample size is 28887. \describe{ \item{\code{Son}}{a factor with levels \code{UpNM} \code{LoNM} \code{UpM} \code{LoM} \code{Farm}} \item{\code{Father}}{a factor with levels \code{UpNM} \code{LoNM} \code{UpM} \code{LoM} \code{Farm}} \item{\code{Country}}{a factor with levels \code{US} \code{UK} \code{Japan}} \item{\code{Freq}}{a numeric vector} } } \details{ Five status categories -- upper and lower nonmanuals (\code{UpNM}, \code{LoNM}), upper and lower manuals (\code{UpM}, \code{LoM}), and \code{Farm}) are used for both fathers' occupations and sons' occupations. Upper nonmanuals are professionals, managers, and officials; lower nonmanuals are proprietors, sales workers, and clerical workers; upper manuals are skilled workers; lower manuals are semi-skilled and unskilled nonfarm workers; and farm workers are farmers and farm laborers. Some of the models from Xie (1992), Table 1, are fit in \code{demo(yamaguchi-xie)}. } \source{ Yamaguchi, K. (1987). Models for comparing mobility tables: toward parsimony and substance, \emph{American Sociological Review}, vol. 52 (Aug.), 482-494, Table 1 } \references{ Goodman, L. A. and Hout, M. (1998). Statistical Methods and Graphical Displays for Analyzing How the Association Between Two Qualitative Variables Differs Among Countries, Among Groups, Or Over Time: A Modified Regression-Type Approach. \emph{Sociological Methodology}, 28 (1), 175-230. Xie, Yu (1992). The log-multiplicative layer effect model for comparing mobility tables. \emph{American Sociological Review}, 57 (June), 380-395. } \examples{ data(Yamaguchi87) # reproduce Table 1 structable(~ Father + Son + Country, Yamaguchi87) # create table form Yama.tab <- xtabs(Freq ~ Son + Father + Country, data=Yamaguchi87) # define mosaic labeling_args for convenient reuse in 3-way displays largs <- list(rot_labels=c(right=0), offset_varnames = c(right = 0.6), offset_labels = c(right = 0.2), set_varnames = c(Son="Son's status", Father="Father's status") ) ################################### # Fit some models & display mosaics # Mutual independence yama.indep <- glm(Freq ~ Son + Father + Country, data=Yamaguchi87, family=poisson) anova(yama.indep) mosaic(yama.indep, ~Son+Father, main="[S][F] ignoring country") mosaic(yama.indep, ~Country + Son + Father, condvars="Country", labeling_args=largs, main='[S][F][C] Mutual independence') # no association between S and F given country ('perfect mobility') # asserts same associations for all countries yama.noRC <- glm(Freq ~ (Son + Father) * Country, data=Yamaguchi87, family=poisson) anova(yama.noRC) mosaic(yama.noRC, ~~Country + Son + Father, condvars="Country", labeling_args=largs, main="[SC][FC] No [SF] (perfect mobility)") # ignore diagonal cells yama.quasi <- update(yama.noRC, ~ . + Diag(Son,Father):Country) anova(yama.quasi) mosaic(yama.quasi, ~Son + Father, main="Quasi [S][F]") ## see also: # demo(yamaguchi-xie) ## } \keyword{datasets} \concept{square tables} \concept{mobility tables} \concept{ordinal variables} vcdExtra/man/glmlist.Rd0000644000176200001440000000644214422306403014553 0ustar liggesusers\name{glmlist} \Rdversion{1.1} \alias{glmlist} \alias{loglmlist} \alias{coef.glmlist} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Create a Model List Object } \description{ \code{glmlist} creates a \code{glmlist} object containing a list of fitted \code{glm} objects with their names. \code{loglmlist} does the same for \code{loglm} objects. The intention is to provide object classes to facilitate model comparison, extraction, summary and plotting of model components, etc., perhaps using \code{\link[base]{lapply}} or similar. There exists a \code{\link[stats]{anova.glm}} method for \code{glmlist} objects. Here, a \code{coef} method is also defined, collecting the coefficients from all models in a single object of type determined by \code{result}. } \usage{ glmlist(...) loglmlist(...) \method{coef}{glmlist}(object, result=c("list", "matrix", "data.frame"), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{\dots}{One or more model objects, as appropriate to the function, optionally assigned names as in \code{list}.} \item{object}{a \code{glmlist} object} \item{result}{type of the result to be returned} } \details{ The arguments to \code{glmlist} or \code{loglmlist} are of the form \code{value} or \code{name=value}. Any objects which do not inherit the appropriate class \code{glm} or \code{loglm} are excluded, with a warning. In the \code{coef} method, coefficients from the different models are matched by name in the list of unique names across all models. } \value{ An object of class \code{glmlist} \code{loglmlist}, just like a \code{list}, except that each model is given a \code{name} attribute.} %\references{ %%% ~put references to the literature/web site here ~ %} \author{ Michael Friendly; \code{coef} method by John Fox } %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ The function \code{\link[Hmisc]{llist}} in package \code{Hmisc} is similar, but perplexingly more general. The function \code{\link[stats]{anova.glm}} also handles \code{glmlist objects} \code{\link{LRstats}} gives LR statistics and tests for a \code{glmlist} object. } \examples{ data(Mental) indep <- glm(Freq ~ mental+ses, family = poisson, data = Mental) Cscore <- as.numeric(Mental$ses) Rscore <- as.numeric(Mental$mental) coleff <- glm(Freq ~ mental + ses + Rscore:ses, family = poisson, data = Mental) roweff <- glm(Freq ~ mental + ses + mental:Cscore, family = poisson, data = Mental) linlin <- glm(Freq ~ mental + ses + Rscore:Cscore, family = poisson, data = Mental) # use object names mods <- glmlist(indep, coleff, roweff, linlin) names(mods) # assign new names mods <- glmlist(Indep=indep, Col=coleff, Row=roweff, LinxLin=linlin) names(mods) LRstats(mods) coef(mods, result='data.frame') #extract model components unlist(lapply(mods, deviance)) res <- lapply(mods, residuals) boxplot(as.data.frame(res), main="Residuals from various models") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{utilities} \keyword{models}% __ONLY ONE__ keyword per line vcdExtra/man/Asbestos.Rd0000644000176200001440000000371514430460317014667 0ustar liggesusers\name{Asbestos} \alias{Asbestos} \docType{data} \title{ Effect of Exposure to Asbestos } \description{ A two-way contingency table formed from the cross-classification of the number of years of occupational exposure to asbestos and the diagnosed severity of asbestosis of 1117 New York workers. Asbestosis is a chronic lung disease that results in the lung tissue being scared due to contact with the fibers which can lead to severe breathing difficulties. } \usage{data("Asbestos")} \format{ The format is: num [1:5, 1:4] 310 212 21 25 7 36 158 35 102 35 ... - attr(*, "dimnames")=List of 2 ..$ exposure: chr [1:5] "0-9" "10-19" "20-29" "30-39" ... ..$ grade : chr [1:4] "None" "Grade 1" "Grade 2" "Grade 3" } \details{ \code{exposure} and \code{grade} should be regarded as ordered factors. Beh and Lombardo (2022) use this data to illustrate a polynomial biplot for ordered categories. The data summarized here was studied by Beh and Smith (2011) and comes from the original data collected and published by Selikoff (1981) who examined the link between asbestos exposure and asbestosis severity in 1963. } \source{ Beh, E. J. & Lombardo, R. (2022). Features of the Polynomial Biplot for Ordered Contingency Tables, \emph{Journal of Computational and Graphical Statistics}, 31:2, 403-412, DOI: 10.1080/10618600.2021.1990773, Table 1. } \references{ Beh, E. J., and D. R. Smith (2011b), Real World Occupational Epidemiology, Part 2: A Visual Interpretation of Statistical Significance, \emph{Archives of Environmental & Occupational Health}, \bold{66}, 245-248. Selikoff, I. J. (1981), Household Risks With Inorganic Fibers, \emph{Bulletin of the New York Academy of Medicine}, \bold{57}, 947-961. } \examples{ data(Asbestos) # mosaic plot vcd::mosaic(Asbestos, shade=TRUE, legend=FALSE) # do the correspondence analysis library(ca) Asbestos.ca <- ca(Asbestos) plot(Asbestos.ca, lines=TRUE) } \keyword{datasets} vcdExtra/man/GKgamma.Rd0000644000176200001440000000430514430460317014404 0ustar liggesusers\name{GKgamma} \alias{GKgamma} \alias{print.GKgamma} %- Also NEED an '\alias' for EACH other topic documented here. \title{Calculate Goodman-Kruskal Gamma for ordered tables} \description{ The Goodman-Kruskal \eqn{\gamma}{gamma} statistic is a measure of association for ordinal factors in a two-way table proposed by Goodman and Kruskal (1954). } \usage{ GKgamma(x, level = 0.95) %\method{print}{GKgamma}{x, digits = 3, ...} } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A two-way frequency table, in matrix or table form. The rows and columns are considered to be ordinal factors} \item{level}{Confidence level for a significance test of \eqn{\gamma \ne =}{gamma !=0}} % \item{digits}{Number of digits printed by the print method} % \item{...}{Other arguments} } %\details{ % ~~ If necessary, more details than the description above ~~ %} \value{ Returns an object of class \code{"GKgamma"} with 6 components, as follows % If it is a LIST, use %\describe{ \item{gamma}{The gamma statistic} \item{C}{Total number of concordant pairs in the table} \item{D}{Total number of discordant pairs in the table} \item{sigma}{Standard error of gamma } \item{CIlevel}{Confidence level} \item{CI}{Confidence interval} % } } \references{ Agresti, A. \emph{Categorical Data Analysis}. John Wiley & Sons, 2002, pp. 57--59. Goodman, L. A., & Kruskal, W. H. (1954). Measures of association for cross classifications. \emph{Journal of the American Statistical Association}, 49, 732-764. Goodman, L. A., & Kruskal, W. H. (1963). Measures of association for cross classifications III: Approximate sampling theory. \emph{Journal of the American Statistical Association}, 58, 310-364. } \author{Michael Friendly; original version by Laura Thompson} %\note{ ~~further notes~~ % % ~Make other sections like Warning with \section{Warning }{....} ~ %} \seealso{\code{\link[vcd]{assocstats}}, \link[vcd]{Kappa}} \examples{ data(JobSat) GKgamma(JobSat) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{htest} \keyword{category} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/Gilby.Rd0000644000176200001440000000357314430460317014154 0ustar liggesusers\name{Gilby} \Rdversion{1.1} \alias{Gilby} \docType{data} \title{Clothing and Intelligence Rating of Children} \description{Schoolboys were classified according to their clothing and to their teachers rating of "dullness" (lack of intelligence), in a 5 x 7 table originally from Gilby (1911). Anscombe (1981) presents a slightly collapsed 4 x 6 table, used here, where the last two categories of clothing were pooled as were the first two categories of dullness due to small counts. Both \code{Dullness} and \code{Clothing} are ordered categories, so models and methods that examine their association in terms of ordinal categories are profitable. } \usage{ data(Gilby) } \format{ A 2-dimensional array resulting from cross-tabulating 2 variables for 1725 observations. The variable names and their levels are: \tabular{rll}{ No \tab Name \tab Levels \cr 1\tab \code{Dullness}\tab \code{"Ment. defective", "Slow", "Slow Intell", "Fairly Intell", "Capable", "V.Able"}\cr 2\tab \code{Clothing}\tab \code{"V.Well clad", "Well clad", "Passable", "Insufficient"}\cr } } %\details{ } \source{ Anscombe, F. J. (1981). \emph{Computing in Statistical Science Through APL}. New York: Springer-Verlag, p. 302 } \references{ % \cite{Gilby & Pearson 1911, from Anscombe 1981, p 302} Gilby, W. H. (1911). On the significance of the teacher's appreciation of general intelligence. \emph{Biometrika}, 8, 93-108 (esp. p. 94). [Quoted by Kendall (1943,..., 1953) Table 13.1, p 320.] } %\seealso{ } \examples{ data(Gilby) # CMH tests treating row/column variables as ordinal CMHtest(Gilby) mosaic(Gilby, shade=TRUE) # correspondence analysis to see relations among categories if(require(ca)){ ca(Gilby) plot(ca(Gilby), lines=TRUE) } } \keyword{datasets} \concept{correspondence analysis} \concept{ordinal variables} vcdExtra/man/Abortion.Rd0000644000176200001440000000366414470702554014672 0ustar liggesusers\name{Abortion} \Rdversion{1.1} \alias{Abortion} \docType{data} \title{Abortion Opinion Data} \description{Opinions about abortion classified by gender and SES} \usage{ data(Abortion) } \format{ A 3-dimensional array resulting from cross-tabulating 3 variables for 1100 observations. The variable names and their levels are: \tabular{rll}{ No \tab Name \tab Levels \cr 1 \tab \code{Sex} \tab \code{"Female", "Male"}\cr 2 \tab \code{Status} \tab \code{"Lo", "Hi"}\cr 3 \tab \code{Support_Abortion} \tab \code{"Yes", "No"}\cr } } \details{ \code{Support_Abortion} is a natural response variable. The combinations of \code{Sex} and \code{Status} represent four independent samples, having fixed \code{Sex}-\code{Status} marginal totals. There were 500 females and 600 males. Within the female group, 250 of low status and 250 of high status were sampled. Similarly for the males, with 300 in each of the low and high status sub-groups. This is an example of a product-multinomial sampling scheme. the \code{Sex:Status} association must be included in any loglinear model where the goal is to determine how attitude toward abortion depends on the others. Alternatively, a logit model for abortion support may provide a simpler analysis. } \source{ % \cite{Christensen:90 [p. 92]} Christensen, R. (1990). \emph{Log-Linear Models}, New York, NY: Springer-Verlag, p. 92, Example 3.5.2. Christensen, R. (1997). \emph{Log-Linear Models and Logistic Regression}, New York, NY: Springer, p. 100, Example 3.5.2. } %\references{ %} %\seealso { } \examples{ data(Abortion) ftable(Abortion) mosaic(Abortion, shade=TRUE) # stratified by Sex fourfold(aperm(Abortion, 3:1)) # stratified by Status fourfold(aperm(Abortion, c(3,1,2))) } \keyword{datasets} \concept{loglinear models} \concept{logit models} \concept{2 by 2 tables} vcdExtra/man/mcaplot.Rd0000644000176200001440000000712714430460317014544 0ustar liggesusers\name{mcaplot} \alias{mcaplot} %- Also NEED an '\alias' for EACH other topic documented here. \title{Simple and enhanced plot of MCA solutions} \description{ This function is intended as an alternative to \code{\link[ca]{plot.mjca}} for plotting multiple correspondence analysis solutions. It provides more flexibility for labeling factor levels and connecting them with lines. It does not support some features of \code{plot.mjca} (centroids, supplementary points, arrows, etc.) } \usage{ mcaplot(obj, map = "symmetric", dim = 1:2, col = c("blue", "red", "brown", "black", "green3", "purple"), pch = 15:20, cex = 1.2, pos = 3, lines = TRUE, lwd = 2, legend = FALSE, legend.pos = "topright", xlab = "_auto_", ylab = "_auto_", rev.axes = c(FALSE, FALSE), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{obj}{An \code{"mjca"} object} \item{map}{Character string specifying the map type, i.e., the scaling applied to coordinates for different types of MCA representations. Allowed options include: \code{"symmetric"} (default), \code{"rowprincipal"}, \code{"colprincipal"}, \code{"symbiplot"}, \code{"rowgab"}, \code{"colgab"}, \code{"rowgreen"}, \code{"colgreen"}. See \code{\link[ca]{mjca}} for details. } \item{dim}{Dimensions to plot, an integer vector of length 2} \item{col}{Vector of colors, one for each factor in the MCA} \item{pch}{Vector of point symbols for the category levels, one for each factor} \item{cex}{Character size for points and level labels} \item{pos}{Position of level labels relative to the category points; either a single number or a vector of length equal to the number of category points.} \item{lines}{A logical or an integer vector indicating which factors are to be joined with lines using \code{\link{multilines}}} \item{lwd}{Line width(s) for the lines} \item{legend}{Logical; draw a legend for the factor names?} \item{legend.pos}{Position of the legend in the plot, as in \code{\link[graphics]{legend}}} \item{xlab,ylab}{Labels for horizontal and vertical axes. The default, \code{"_auto_"} means that the function auto-generates a label of the form \code{"Dimension X (xx.x \%)"} } \item{rev.axes}{A logical vector of length 2, where TRUE reverses the direction of the corresponding axis} \item{\dots}{Arguments passed down to \code{plot}} } %\details{ %% ~~ If necessary, more details than the description above ~~ %} \value{ Returns the coordinates of the category points invisibly %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% ... } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Michael Friendly } %\note{ %% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[ca]{mjca}}, \code{\link[ca]{plot.mjca}} \code{\link[ca]{cacoord}} returns CA and MCA coordinates, \code{\link[ca]{multilines}} draw multiple lines according to a factor, } \examples{ require(ca) data(Titanic) titanic.mca <- mjca(Titanic) mcaplot(titanic.mca, legend=TRUE, legend.pos="topleft") data(HairEyeColor) haireye.mca <- mjca(HairEyeColor) mcaplot(haireye.mca, legend=TRUE, cex.lab=1.3) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{hplot} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/ICU.Rd0000644000176200001440000001213214430460317013515 0ustar liggesusers\name{ICU} \alias{ICU} \docType{data} \title{ICU data set} \description{ The ICU data set consists of a sample of 200 subjects who were part of a much larger study on survival of patients following admission to an adult intensive care unit (ICU), derived from Hosmer, Lemeshow and Sturdivant (2013) and Friendly (2000). The major goal of this study was to develop a logistic regression model to predict the probability of survival to hospital discharge of these patients and to study the risk factors associated with ICU mortality. The clinical details of the study are described in Lemeshow, Teres, Avrunin, and Pastides (1988). This data set is often used to illustrate model selection methods for logistic regression. } \usage{data(ICU)} \format{ A data frame with 200 observations on the following 22 variables. \describe{ % \item{\code{id}}{Patient id code, a numeric vector} \item{\code{died}}{Died before discharge?, a factor with levels \code{No} \code{Yes}} \item{\code{age}}{Patient age, a numeric vector} \item{\code{sex}}{Patient sex, a factor with levels \code{Female} \code{Male}} \item{\code{race}}{Patient race, a factor with levels \code{Black} \code{Other} \code{White}. Also represented here as \code{white}.} \item{\code{service}}{Service at ICU Admission, a factor with levels \code{Medical} \code{Surgical}} \item{\code{cancer}}{Cancer part of present problem?, a factor with levels \code{No} \code{Yes}} \item{\code{renal}}{History of chronic renal failure?, a factor with levels \code{No} \code{Yes}} \item{\code{infect}}{Infection probable at ICU admission?, a factor with levels \code{No} \code{Yes}} \item{\code{cpr}}{Patient received CPR prior to ICU admission?, a factor with levels \code{No} \code{Yes}} \item{\code{systolic}}{Systolic blood pressure at admission (mm Hg), a numeric vector} \item{\code{hrtrate}}{Heart rate at ICU Admission (beats/min), a numeric vector} \item{\code{previcu}}{Previous admission to an ICU within 6 Months?, a factor with levels \code{No} \code{Yes}} \item{\code{admit}}{Type of admission, a factor with levels \code{Elective} \code{Emergency}} \item{\code{fracture}}{Admission with a long bone, multiple, neck, single area, or hip fracture? a factor with levels \code{No} \code{Yes}} \item{\code{po2}}{PO2 from initial blood gases, a factor with levels \code{>60} \code{<=60}} \item{\code{ph}}{pH from initial blood gases, a factor with levels \code{>=7.25} \code{<7.25}} \item{\code{pco}}{PCO2 from initial blood gases, a factor with levels \code{<=45} \code{>45}} \item{\code{bic}}{Bicarbonate (HCO3) level from initial blood gases, a factor with levels \code{>=18} \code{<18}} \item{\code{creatin}}{Creatinine, from initial blood gases, a factor with levels \code{<=2} \code{>2}} \item{\code{coma}}{Level of unconsciousness at admission to ICU, a factor with levels \code{None} \code{Stupor} \code{Coma}} \item{\code{white}}{a recoding of \code{race}, a factor with levels \code{White} \code{Non-white}} \item{\code{uncons}}{a recoding of \code{coma} a factor with levels \code{No} \code{Yes}} } } \details{ Patient ID numbers are the rownames of the data frame. Note that the last two variables \code{white} and \code{uncons} are a recoding of respectively \code{race} and \code{coma} to binary variables. } \source{ M. Friendly (2000), \emph{Visualizing Categorical Data}, Appendix B.4. SAS Institute, Cary, NC. Hosmer, D. W. Jr., Lemeshow, S. and Sturdivant, R. X. (2013) \emph{Applied Logistic Regression}, NY: Wiley, Third Edition. } \references{ Lemeshow, S., Teres, D., Avrunin, J. S., Pastides, H. (1988). Predicting the Outcome of Intensive Care Unit Patients. \emph{Journal of the American Statistical Association}, 83, 348-356. } \examples{ data(ICU) # remove redundant variables (race, coma) ICU1 <- ICU[,-c(4,20)] # fit full model icu.full <- glm(died ~ ., data=ICU1, family=binomial) summary(icu.full) # simpler model (found from a "best" subsets procedure) icu.mod1 <- glm(died ~ age + sex + cancer + systolic + admit + uncons, data=ICU1, family=binomial) summary(icu.mod1) # even simpler model icu.mod2 <- glm(died ~ age + cancer + admit + uncons, data=ICU1, family=binomial) summary(icu.mod2) anova(icu.mod2, icu.mod1, icu.full, test="Chisq") ## Reproduce Fig 6.12 from VCD icu.fit <- data.frame(ICU, prob=predict(icu.mod2, type="response")) # combine categorical risk factors to a single string risks <- ICU[, c("cancer", "admit", "uncons")] risks[,1] <- ifelse(risks[,1]=="Yes", "Cancer", "") risks[,2] <- ifelse(risks[,2]=="Emergency", "Emerg", "") risks[,3] <- ifelse(risks[,3]=="Yes", "Uncons", "") risks <- apply(risks, 1, paste, collapse="") risks[risks==""] <- "(none)" icu.fit$risks <- risks library(ggplot2) ggplot(icu.fit, aes(x=age, y=prob, color=risks)) + geom_point(size=2) + geom_line(size=1.25, alpha=0.5) + theme_bw() + ylab("Probability of death") } \keyword{datasets} \concept{logistic regression} \concept{generalized linear models} vcdExtra/man/seq_mosaic.Rd0000644000176200001440000000637514422306403015230 0ustar liggesusers\name{seq_mosaic} \alias{seq_mosaic} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Sequential Mosaics and Strucplots for an N-way Table } \description{ This function takes an n-way contingency table and plots mosaics for series of sequential models to the 1-, 2-, ... n-way marginal tables, corresponding to a variety of types of loglinear models. } \usage{ seq_mosaic(x, panel = mosaic, type = c("joint", "conditional", "mutual", "markov", "saturated"), plots = 1:nf, vorder = 1:nf, k = NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a contingency table in array form, with optional category labels specified in the dimnames(x) attribute, or else a data.frame in frequency form, with the frequency variable named \code{"Freq"}.} \item{panel}{a \code{\link[vcd]{strucplot}} panel function, typically \code{\link[vcd]{mosaic}} or \code{\link[vcd]{sieve}. NOT yet implemented.} } \item{type}{type of sequential model to fit, a character string. One of \code{"joint"}, \code{"conditional"}, \code{"mutual"}, \code{"markov"}, or \code{"saturated"}.} \item{plots}{which marginal sub-tables to plot? A vector of a (sub)set of the integers, \code{1:nf} where \code{nf} is the number of factors in the full n-way table.} \item{vorder}{order of variables, a permutation of the integers \code{1:nf}, used to reorder the variables in the original table for the purpose of fitting sequential marginal models. } \item{k}{conditioning variable(s) for \code{type} = \code{"joint"}, \code{"conditional"} or Markov chain order for \code{type} = \code{"markov"} } \item{\dots}{other arguments passed to \code{\link[vcd]{mosaic}}. } } \details{ This function produces similar plots to the use of \code{\link{mosaic.loglmlist}}, called with the result of \code{\link{seq_loglm}}. } \value{ None. Used for its side-effect of producing plots %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% ... } \references{ These functions were inspired by the original SAS implementation of mosaic displays, described in the \emph{User's Guide for Mosaics}, \url{http://www.datavis.ca/mosaics/mosaics.pdf} } \author{ Michael Friendly } %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{loglin-utilities}} for descriptions of sequential models, \code{\link{conditional}}, \code{\link{joint}}, \code{\link{mutual}}, \dots \code{\link{loglmlist}}, \code{\link{mosaic.loglmlist}}, \code{\link{seq_loglm}} \code{\link{mosaic.glm}}, \code{\link[vcd]{mosaic}}, \code{\link[vcd]{strucplot}}, for the many parameters that control the details of mosaic plots. } \examples{ data(Titanic, package="datasets") seq_mosaic(Titanic) # models of joint independence, Survived last seq_mosaic(Titanic, type="condit") seq_mosaic(Titanic, type="mutual") # other panel functions and options: presently BUGGED \dontrun{ seq_mosaic(Titanic, type="mutual", panel=sieve, gp=shading_Friendly, labeling=labeling_values) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{hplots} vcdExtra/man/update.xtabs.Rd0000644000176200001440000000322014430460317015475 0ustar liggesusers\name{update.xtabs} \alias{update.xtabs} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Update method for a \code{xtabs} object } \description{ Provides an \code{update} method for \code{"xtabs"} objects, typically by removing terms from the formula to collapse over them. } \usage{ \method{update}{xtabs}(object, formula., ..., evaluate = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{An existing \code{"xtabs"} object} \item{formula.}{Changes to the formula ? see \code{\link[stats]{update.formula}} for details} \item{\dots}{Additional arguments to the call, or arguments with changed values. } \item{evaluate}{If \code{TRUE}, evaluate the new call else return the call} } %\details{ %%% ~~ If necessary, more details than the description above ~~ %} \value{ If \code{evaluate == TRUE}, the new \code{"xtabs"} object, otherwise the updated call } %\references{ %%% ~put references to the literature/web site here ~ %} \author{Michael Friendly} %\note{ %%% ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[stats]{update.formula}} for details on updates to model formulae \code{\link[base]{margin.table}} does something similar, \code{\link{collapse.table}} collapses category levels } \examples{ vietnam.tab <- xtabs(Freq ~ sex + year + response, data=Vietnam) update(vietnam.tab, formula = ~ . -year) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{models} %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line vcdExtra/man/Alligator.Rd0000644000176200001440000000430514430460317015016 0ustar liggesusers\name{Alligator} \alias{Alligator} \docType{data} \title{ Alligator Food Choice } \description{ The Alligator data, from Agresti (2002), comes from a study of the primary food choices of alligators in four Florida lakes. Researchers classified the stomach contents of 219 captured alligators into five categories: Fish (the most common primary food choice), Invertebrate (snails, insects, crayfish, etc.), Reptile (turtles, alligators), Bird, and Other (amphibians, plants, household pets, stones, and other debris). } \usage{data(Alligator)} \format{ A frequency data frame with 80 observations on the following 5 variables. \describe{ \item{\code{lake}}{a factor with levels \code{George} \code{Hancock} \code{Oklawaha} \code{Trafford}} \item{\code{sex}}{a factor with levels \code{female} \code{male}} \item{\code{size}}{alligator size, a factor with levels \code{large} (>2.3m) \code{small} (<=2.3m)} \item{\code{food}}{primary food choice, a factor with levels \code{bird} \code{fish} \code{invert} \code{other} \code{reptile}} \item{\code{count}}{cell frequency, a numeric vector} } } \details{ The table contains a fair number of 0 counts. \code{food} is the response variable. \code{fish} is the most frequent choice, and often taken as a baseline category in multinomial response models. } \source{ Agresti, A. (2002). \emph{Categorical Data Analysis}, New York: Wiley, 2nd Ed., Table 7.1 } %\references{ %%% ~~ possibly secondary sources and usages ~~ %} \examples{ data(Alligator) # change from frequency data.frame to table allitable <- xtabs(count ~ lake + sex + size + food, data=Alligator) # Agresti's Table 7.1 structable(food ~ lake + sex + size, allitable) plot(allitable, shade=TRUE) # mutual independence model mosaic(~ food + lake + size, allitable, shade=TRUE) # food jointly independent of lake and size mosaic(~ food + lake + size, allitable, shade=TRUE, expected = ~lake:size + food) if (require(nnet)) { # multinomial logit model mod1 <- multinom(food ~ lake + size + sex, data=Alligator, weights=count) } } \keyword{datasets} \concept{loglinear models} \concept{multinomial models} \concept{zero counts} vcdExtra/man/Crossings.Rd0000644000176200001440000000352414430460317015054 0ustar liggesusers\name{Crossings} \alias{Crossings} %- Also NEED an '\alias' for EACH other topic documented here. \title{Crossings Interaction of Factors} \description{ Given two ordered factors in a square, n x n frequency table, \code{Crossings} creates an n-1 column matrix corresponding to different degrees of difficulty in crossing from one level to the next, as described by Goodman (1972). } \usage{ Crossings(...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{\dots}{ Two factors } } %\details{ %% ~~ If necessary, more details than the description above ~~ %} \value{ For two factors of \code{n} levels, returns a binary indicator matrix of \code{n*n} rows and \code{n-1} columns. } \references{ Goodman, L. (1972). Some multiplicative models for the analysis of cross-classified data. In: \emph{Proceedings of the Sixth Berkeley Symposium on Mathematical Statistics and Probability}, Berkeley, CA: University of California Press, pp. 649-696. } \author{Michael Friendly and Heather Turner} %\note{ %% ~~further notes~~ %} \seealso{ \code{\link[stats]{glm}}, \code{\link[gnm]{gnm}} for model fitting functions for frequency tables \code{\link[gnm]{Diag}}, \code{\link[gnm]{Mult}}, \code{\link[gnm]{Symm}}, \code{\link[gnm]{Topo}} for similar extensions to terms in model formulas. } \examples{ data(Hauser79) # display table structable(~Father + Son, data=Hauser79) hauser.indep <- gnm(Freq ~ Father + Son, data=Hauser79, family=poisson) hauser.CR <- update(hauser.indep, ~ . + Crossings(Father,Son)) LRstats(hauser.CR) hauser.CRdiag <- update(hauser.indep, ~ . + Crossings(Father,Son) + Diag(Father,Son)) LRstats(hauser.CRdiag) } \keyword{models} \keyword{manip} vcdExtra/man/HospVisits.Rd0000644000176200001440000000277614430460317015225 0ustar liggesusers\name{HospVisits} \alias{HospVisits} \docType{data} \title{ Hospital Visits Data } \description{ Length of stay in hospital for 132 schizophrenic patients, classified by visiting patterns, originally from Wing (1962). } \usage{data("HospVisits")} \format{ A 3 by 3 frequency table, with format: table [1:3, 1:3] 43 6 9 16 11 18 3 10 16 - attr(*, "dimnames")=List of 2 ..$ visit: chr [1:3] "Regular" "Infrequent" "Never" ..$ stay : chr [1:3] "2-9" "10-19" "20+" } \details{ Both table variables can be considered ordinal. The variable \code{visit} refers to visiting patterns recorded hospital. The category labels are abbreviations of those given by Goodman (1983); e.g., \code{"Regular"} is short for \dQuote{received visitors regularly or patient went home}. The variable \code{stay} refers to length of stay in hospital, in year groups. } \source{ Goodman, L. A. (1983) The analysis of dependence in cross-classifications having ordered categories, using log-linear models for frequencies and log-linear models for odds. \emph{Biometrics}, 39, 149-160. } \references{ Wing, J. K. (1962). Institutionalism in Mental Hospitals, \emph{British Journal of Social and Clinical Psychology}, 1 (1), 38-51. } \seealso{\code{\link[ca]{ca}}} \examples{ data(HospVisits) mosaic(HospVisits, gp=shading_Friendly) if(require(ca)){ ca(HospVisits) # surprisingly 1D ! plot(ca(HospVisits)) } } \keyword{datasets} \concept{correspondence analysis} \concept{ordinal variables} vcdExtra/man/Vote1980.Rd0000644000176200001440000000312714430460317014340 0ustar liggesusers\name{Vote1980} \alias{Vote1980} \docType{data} \title{ Race and Politics in the 1980 Presidential Vote } \description{ Data from the 1982 General Social Survey on votes in the 1980 U.S. presidential election in relation to race and political conservatism. } \usage{data(Vote1980)} \format{ A frequency data frame representing a 2 x 7 x 2 table, with 28 observations on the following 4 variables. \describe{ \item{\code{race}}{a factor with levels \code{NonWhite} \code{White}} \item{\code{conservatism}}{ a factor with levels \code{1} \code{2} \code{3} \code{4} \code{5} \code{6} \code{7}, \code{1}=most liberal, \code{7}=most conservative} \item{\code{votefor}}{a factor with levels \code{Carter} \code{Reagan}; \code{Carter} represents Jimmy Carter or other.} \item{\code{Freq}}{a numeric vector} } } \details{ The data contains a number of sampling zeros in the frequencies of NonWhites voting for Ronald Reagan. } \source{ Clogg, C. & Shockey, J. W. (1988). In Nesselroade, J. R. & Cattell, R. B. (ed.) Multivariate Analysis of Discrete Data, \emph{Handbook of Multivariate Experimental Psychology}, New York: Plenum Press. } \references{ Agresti, A. (1990) \emph{Categorical Data Analysis}, Table 4.12 New York: Wiley-Interscience. Friendly, M. (2000) \emph{Visualizing Categorical Data}, Example 7.5 Cary, NC: SAS Institute. } \examples{ data(Vote1980) fourfold(xtabs(Freq ~ race + votefor + conservatism, data=Vote1980), mfrow=c(2,4)) } \keyword{datasets} \concept{loglinear models} \concept{zero counts} vcdExtra/DESCRIPTION0000644000176200001440000000505214471023412013540 0ustar liggesusersPackage: vcdExtra Type: Package Title: 'vcd' Extensions and Additions Version: 0.8-5 Date: 2023-08-19 Language: en-US Authors@R: c( person(given = "Michael", family = "Friendly", role=c("aut", "cre"), email="friendly@yorku.ca", comment = c(ORCID = "0000-0002-3237-0941")), person(given = "Heather", family = "Turner", role="ctb"), person(given = "David", family = "Meyer", role="ctb"), person(given = "Achim", family = "Zeileis", role="ctb", comment = c(ORCID = "0000-0003-0918-3766")), person(given = "Duncan", family = "Murdoch", role="ctb"), person(given = "David", family = "Firth", role="ctb"), person(given = "Matt", family = "Kumar", role="ctb"), person(given = "Shuguang", family = "Sun", role="ctb") ) Maintainer: Michael Friendly Depends: R (>= 3.5.0), vcd, gnm (>= 1.0-3), grid Suggests: gmodels, Fahrmeir, effects, VGAM, plyr, lmtest, nnet, ggplot2, Sleuth2, car, lattice, stats4, rgl, AER, coin, Hmisc, knitr, rmarkdown, seriation, Imports: MASS, grDevices, stats, utils, ca, dplyr, glue, here, purrr, readxl, stringr, tidyr (>= 1.3.0) Description: Provides additional data sets, methods and documentation to complement the 'vcd' package for Visualizing Categorical Data and the 'gnm' package for Generalized Nonlinear Models. In particular, 'vcdExtra' extends mosaic, assoc and sieve plots from 'vcd' to handle 'glm()' and 'gnm()' models and adds a 3D version in 'mosaic3d'. Additionally, methods are provided for comparing and visualizing lists of 'glm' and 'loglm' objects. This package is now a support package for the book, "Discrete Data Analysis with R" by Michael Friendly and David Meyer. License: GPL (>= 2) URL: https://friendly.github.io/vcdExtra/ BugReports: https://github.com/friendly/vcdExtra VignetteBuilder: knitr, rmarkdown LazyLoad: yes LazyData: yes NeedsCompilation: no Packaged: 2023-08-21 20:10:59 UTC; friendly Author: Michael Friendly [aut, cre] (), Heather Turner [ctb], David Meyer [ctb], Achim Zeileis [ctb] (), Duncan Murdoch [ctb], David Firth [ctb], Matt Kumar [ctb], Shuguang Sun [ctb] Repository: CRAN Date/Publication: 2023-08-22 03:10:02 UTC vcdExtra/build/0000755000176200001440000000000014470742320013134 5ustar liggesusersvcdExtra/build/vignette.rds0000644000176200001440000000065614470742320015502 0ustar liggesusersR=o04R|USJ'@UTU ]ccbT ~J߳r!IToxi`jhF;fMߓأ]nxb1s,."14Wk{lԂĬ`wɘm'2IxVPo> %\VignetteIndexEntry{Creating and manipulating frequency tables} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut01-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Old Sweave options # \SweaveOpts{engine=R,eps=TRUE,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{engine=R,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{prefix.string=fig/vcd-tut,eps=FALSE} # \SweaveOpts{keep.source=TRUE} # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ``` R provides many methods for creating frequency and contingency tables. Several are described below. In the examples below, we use some real examples and some anonymous ones, where the variables `A`, `B`, and `C` represent categorical variables, and `X` represents an arbitrary R data object. ## Forms of frequency data The first thing you need to know is that categorical data can be represented in three different forms in R, and it is sometimes necessary to convert from one form to another, for carrying out statistical tests, fitting models or visualizing the results. Once a data object exists in R, you can examine its complete structure with the `str()` function, or view the names of its components with the `names()` function. ### Case form Categorical data in case form are simply data frames containing individual observations, with one or more factors, used as the classifying variables. In case form, there may also be numeric covariates. The total number of observations is `nrow(X)`, and the number of variables is `ncol(X)`. ***Example***: The `Arthritis` data is available in case form in the `vcd` package. There are two explanatory factors: `Treatment` and `Sex`. `Age` is a numeric covariate, and `Improved` is the response--- an ordered factor, with levels `r paste(levels(Arthritis$Improved),collapse=' < ')`. Excluding `Age`, this represents a $2 \times 2 \times 3$ contingency table for `Treatment`, `Sex` and `Improved`, but in case form. ```{r, case-form} names(Arthritis) # show the variables str(Arthritis) # show the structure head(Arthritis,5) # first 5 observations, same as Arthritis[1:5,] ``` ### Frequency form Data in frequency form is also a data frame containing one or more factors, and a frequency variable, often called `Freq` or `count`. The total number of observations is: `sum(X$Freq)`, `sum(X[,"Freq"])` or some equivalent form. The number of cells in the table is given by `nrow(X)`. ***Example***: For small frequency tables, it is often convenient to enter them in frequency form using `expand.grid()` for the factors and `c()` to list the counts in a vector. The example below, from [@vcd:Agresti:2002] gives results for the 1991 General Social Survey, with respondents classified by sex and party identification. ```{r, frequency-form} # Agresti (2002), table 3.11, p. 106 GSS <- data.frame( expand.grid(sex = c("female", "male"), party = c("dem", "indep", "rep")), count = c(279,165,73,47,225,191)) GSS names(GSS) str(GSS) sum(GSS$count) ``` ### Table form Table form data is represented by a `matrix`, `array` or `table` object, whose elements are the frequencies in an $n$-way table. The variable names (factors) and their levels are given by `dimnames(X)`. The total number of observations is `sum(X)`. The number of dimensions of the table is `length(dimnames(X))`, and the table sizes are given by `sapply(dimnames(X), length)`. ***Example***: The `HairEyeColor` is stored in table form in `vcd`. ```{r, table-form1} str(HairEyeColor) # show the structure sum(HairEyeColor) # number of cases sapply(dimnames(HairEyeColor), length) # table dimension sizes ``` ***Example***: Enter frequencies in a matrix, and assign `dimnames`, giving the variable names and category labels. Note that, by default, `matrix()` uses the elements supplied by *columns* in the result, unless you specify `byrow=TRUE`. ```{r, table-form2} # A 4 x 4 table Agresti (2002, Table 2.8, p. 57) Job Satisfaction JobSat <- matrix(c( 1, 2, 1, 0, 3, 3, 6, 1, 10,10,14, 9, 6, 7,12,11), 4, 4) dimnames(JobSat) = list( income = c("< 15k", "15-25k", "25-40k", "> 40k"), satisfaction = c("VeryD", "LittleD", "ModerateS", "VeryS") ) JobSat ``` `JobSat` is a **matrix**, not an object of `class("table")`, and some functions are happier with tables than matrices. You can coerce it to a table with `as.table()`, ```{r, table-form3} JobSat <- as.table(JobSat) str(JobSat) ``` ## Ordered factors and reordered tables {#sec:ordered-factors} In table form, the values of the table factors are ordered by their position in the table. Thus in the `JobSat` data, both `income` and `satisfaction` represent ordered factors, and the *positions* of the values in the rows and columns reflects their ordered nature. Yet, for analysis, there are times when you need *numeric* values for the levels of ordered factors in a table, e.g., to treat a factor as a quantitative variable. In such cases, you can simply re-assign the `dimnames` attribute of the table variables. For example, here, we assign numeric values to `income` as the middle of their ranges, and treat `satisfaction` as equally spaced with integer scores. ```{r, relevel, eval=FALSE} dimnames(JobSat)$income <- c(7.5,20,32.5,60) dimnames(JobSat)$satisfaction <- 1:4 ``` For the `HairEyeColor` data, hair color and eye color are ordered arbitrarily. For visualizing the data using mosaic plots and other methods described below, it turns out to be more useful to assure that both hair color and eye color are ordered from dark to light. Hair colors are actually ordered this way already, and it is easiest to re-order eye colors by indexing. Again `str()` is your friend. ```{r, reorder1} HairEyeColor <- HairEyeColor[, c(1,3,4,2), ] str(HairEyeColor) ``` This is also the order for both hair color and eye color shown in the result of a correspondence analysis (@ref(fig:ca-haireye) below. With data in case form or frequency form, when you have ordered factors represented with character values, you must ensure that they are treated as ordered in R. Imagine that the `Arthritis` data was read from a text file. By default the `Improved` will be ordered alphabetically: `Marked`, `None`, `Some` --- not what we want. In this case, the function `ordered()` (and others) can be useful. ```{r, reorder2, echo=TRUE, eval=FALSE} Arthritis <- read.csv("arthritis.txt",header=TRUE) Arthritis$Improved <- ordered(Arthritis$Improved, levels=c("None", "Some", "Marked") ) ``` The dataset `Arthritis` in the `vcd` package is a data.frame in this form With this order of `Improved`, the response in this data, a mosaic display of `Treatment` and `Improved` (@ref(fig:arthritis) shows a clearly interpretable pattern. The original version of `mosaic` in the `vcd` package required the input to be a contingency table in array form, so we convert using `xtabs()`. ```{r} #| Arthritis, #| fig.height = 6, #| fig.width = 6, #| fig.cap = "Mosaic plot for the `Arthritis` data, showing the marginal model of independence for Treatment and Improved. Age, a covariate, and Sex are ignored here." data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) mosaic(art, gp = shading_max, split_vertical = TRUE, main="Arthritis: [Treatment] [Improved]") ``` Several data sets in the package illustrate the salutary effects of reordering factor levels in mosaic displays and other analyses. See: * `help(AirCrash)` * `help(Glass)` * `help(HouseTasks)` The [seriate](https://CRAN.R-project.org/package=seriation) package now contains a general method to permute the row and column variables in a table according to the result of a correspondence analysis, using scores on the first CA dimension. ### Re-ordering dimensions Finally, there are situations where, particularly for display purposes, you want to re-order the *dimensions* of an $n$-way table, or change the labels for the variables or levels. This is easy when the data are in table form: `aperm()` permutes the dimensions, and assigning to `names` and `dimnames` changes variable names and level labels respectively. We will use the following version of `UCBAdmissions` in \@ref(sec:mantel) below. ^[Changing `Admit` to `Admit?` might be useful for display purposes, but is dangerous--- because it is then difficult to use that variable name in a model formula. See \@ref(sec:tips) for options `labeling_args` and `set_labels`to change variable and level names for displays in the `strucplot` framework.] ```{r, reorder3} UCB <- aperm(UCBAdmissions, c(2, 1, 3)) dimnames(UCB)[[2]] <- c("Yes", "No") names(dimnames(UCB)) <- c("Sex", "Admit?", "Department") # display as a flattened table stats::ftable(UCB) ``` ## `structable()` {#sec:structable} For 3-way and larger tables the `structable()` function in `vcd` provides a convenient and flexible tabular display. The variables assigned to the rows and columns of a two-way display can be specified by a model formula. ```{r, structable} structable(HairEyeColor) # show the table: default structable(Hair+Sex ~ Eye, HairEyeColor) # specify col ~ row variables ``` It also returns an object of class `"structable"` which may be plotted with `mosaic()` (not shown here). ```{r, structable1,eval=FALSE} HSE < - structable(Hair+Sex ~ Eye, HairEyeColor) # save structable object mosaic(HSE) # plot it ``` ## `table()` and friends {#sec:table} You can generate frequency tables from factor variables using the `table()` function, tables of proportions using the `prop.table()` function, and marginal frequencies using `margin.table()`. For these examples, create some categorical vectors: ```{r, table-setup} n=500 A <- factor(sample(c("a1","a2"), n, rep=TRUE)) B <- factor(sample(c("b1","b2"), n, rep=TRUE)) C <- factor(sample(c("c1","c2"), n, rep=TRUE)) mydata <- data.frame(A,B,C) ``` These lines illustrate `table`-related functions: ```{r, table-ex1} # 2-Way Frequency Table attach(mydata) mytable <- table(A,B) # A will be rows, B will be columns mytable # print table margin.table(mytable, 1) # A frequencies (summed over B) margin.table(mytable, 2) # B frequencies (summed over A) prop.table(mytable) # cell percentages prop.table(mytable, 1) # row percentages prop.table(mytable, 2) # column percentages ``` `table()` can also generate multidimensional tables based on 3 or more categorical variables. In this case, you can use the `ftable()` or `structable()` function to print the results more attractively. ```{r, table-ex2} # 3-Way Frequency Table mytable <- table(A, B, C) ftable(mytable) ``` `table()` ignores missing values by default. To include `NA` as a category in counts, include the table option `exclude=NULL` if the variable is a vector. If the variable is a factor you have to create a new factor using \code{newfactor <- factor(oldfactor, exclude=NULL)}. ## `xtabs()` {#sec:xtabs} The `xtabs()` function allows you to create cross-tabulations of data using formula style input. This typically works with case-form data supplied in a data frame or a matrix. The result is a contingency table in array format, whose dimensions are determined by the terms on the right side of the formula. ```{r, xtabs-ex1} # 3-Way Frequency Table mytable <- xtabs(~A+B+C, data=mydata) ftable(mytable) # print table summary(mytable) # chi-square test of indepedence ``` If a variable is included on the left side of the formula, it is assumed to be a vector of frequencies (useful if the data have already been tabulated in frequency form). ```{r, xtabs-ex2} (GSStab <- xtabs(count ~ sex + party, data=GSS)) summary(GSStab) ``` ## Collapsing over table factors: `aggregate()`, `margin.table()` and `apply()` It sometimes happens that we have a data set with more variables or factors than we want to analyse, or else, having done some initial analyses, we decide that certain factors are not important, and so should be excluded from graphic displays by collapsing (summing) over them. For example, mosaic plots and fourfold displays are often simpler to construct from versions of the data collapsed over the factors which are not shown in the plots. The appropriate tools to use again depend on the form in which the data are represented--- a case-form data frame, a frequency-form data frame (`aggregate()`), or a table-form array or table object (`margin.table()` or `apply()`). When the data are in frequency form, and we want to produce another frequency data frame, `aggregate()` is a handy tool, using the argument `FUN=sum` to sum the frequency variable over the factors *not* mentioned in the formula. ***Example***: The data frame `DaytonSurvey` in the `vcdExtra` package represents a $2^5$ table giving the frequencies of reported use (``ever used?'') of alcohol, cigarettes and marijuana in a sample of high school seniors, also classified by sex and race. ```{r, dayton1} data("DaytonSurvey", package="vcdExtra") str(DaytonSurvey) head(DaytonSurvey) ``` To focus on the associations among the substances, we want to collapse over sex and race. The right-hand side of the formula used in the call to `aggregate()` gives the factors to be retained in the new frequency data frame, `Dayton.ACM.df`. ```{r, dayton2} # data in frequency form # collapse over sex and race Dayton.ACM.df <- aggregate(Freq ~ cigarette+alcohol+marijuana, data=DaytonSurvey, FUN=sum) Dayton.ACM.df ``` When the data are in table form, and we want to produce another table, `apply()` with `FUN=sum` can be used in a similar way to sum the table over dimensions not mentioned in the `MARGIN` argument. `margin.table()` is just a wrapper for `apply()` using the `sum()` function. ***Example***: To illustrate, we first convert the `DaytonSurvey` to a 5-way table using `xtabs()`, giving `Dayton.tab`. ```{r, dayton3} # in table form Dayton.tab <- xtabs(Freq ~ cigarette+alcohol+marijuana+sex+race, data=DaytonSurvey) structable(cigarette+alcohol+marijuana ~ sex+race, data=Dayton.tab) ``` Then, use `apply()` on `Dayton.tab` to give the 3-way table `Dayton.ACM.tab` summed over sex and race. The elements in this new table are the column sums for `Dayton.tab` shown by `structable()` just above. ```{r, dayton4} # collapse over sex and race Dayton.ACM.tab <- apply(Dayton.tab, MARGIN=1:3, FUN=sum) Dayton.ACM.tab <- margin.table(Dayton.tab, 1:3) # same result structable(cigarette+alcohol ~ marijuana, data=Dayton.ACM.tab) ``` Many of these operations can be performed using the `**ply()` functions in the [`plyr`]( https://CRAN.R-project.org/package=plyr) package. For example, with the data in a frequency form data frame, use `ddply()` to collapse over unmentioned factors, and `plyr::summarise()` as the function to be applied to each piece. ```{r, dayton5} library(plyr) Dayton.ACM.df <- plyr::ddply(DaytonSurvey, .(cigarette, alcohol, marijuana), plyr::summarise, Freq=sum(Freq)) Dayton.ACM.df ``` ## Collapsing table levels: `collapse.table()` A related problem arises when we have a table or array and for some purpose we want to reduce the number of levels of some factors by summing subsets of the frequencies. For example, we may have initially coded Age in 10-year intervals, and decide that, either for analysis or display purposes, we want to reduce Age to 20-year intervals. The `collapse.table()` function in `vcdExtra` was designed for this purpose. ***Example***: Create a 3-way table, and collapse Age from 10-year to 20-year intervals. First, we generate a $2 \times 6 \times 3$ table of random counts from a Poisson distribution with mean of 100. ```{r, collapse1} # create some sample data in frequency form sex <- c("Male", "Female") age <- c("10-19", "20-29", "30-39", "40-49", "50-59", "60-69") education <- c("low", 'med', 'high') data <- expand.grid(sex=sex, age=age, education=education) counts <- rpois(36, 100) # random Possion cell frequencies data <- cbind(data, counts) # make it into a 3-way table t1 <- xtabs(counts ~ sex + age + education, data=data) structable(t1) ``` Now collapse `age` to 20-year intervals, and `education` to 2 levels. In the arguments, levels of `age` and `education` given the same label are summed in the resulting smaller table. ```{r, collapse2} # collapse age to 3 levels, education to 2 levels t2 <- collapse.table(t1, age=c("10-29", "10-29", "30-49", "30-49", "50-69", "50-69"), education=c(" mutate(sibspF = case_match(sibsp, 0 ~ "0", 1 ~ "1", 2:max(sibsp) ~ "2+")) |> mutate(sibspF = ordered(sibspF)) |> mutate(parchF = case_match(parch, 0 ~ "0", 1 ~ "1", 2:max(parch) ~ "2+")) |> mutate(parchF = ordered(parchF)) table(Titanicp$sibspF, Titanicp$parchF) ``` `car::recode()` is a similar function, but with a less convenient interface. The [`forcats`]( https://CRAN.R-project.org/package=forcats) package provides a collection of functions for reordering the levels of a factor or grouping categories according to their frequency: * `forcats::fct_reorder()`: Reorder a factor by another variable. * `forcats::fct_infreq()`: Reorder a factor by the frequency of values. * `forcats::fct_relevel()`: Change the order of a factor by hand. * `forcats::fct_lump()`: Collapse the least/most frequent values of a factor into “other”. * `forcats::fct_collapse()`: Collapse factor levels into manually defined groups. * `forcats::fct_recode()`: Change factor levels by hand. ## Converting among frequency tables and data frames As we've seen, a given contingency table can be represented equivalently in different forms, but some R functions were designed for one particular representation. The table below shows some handy tools for converting from one form to another. | **From this** | | **To this** | | |:-----------------|:--------------------|:---------------------|-------------------| | | _Case form_ | _Frequency form_ | _Table form_ | | _Case form_ | noop | `xtabs(~A+B)` | `table(A,B)` | | _Frequency form_ | `expand.dft(X)` | noop | `xtabs(count~A+B)`| | _Table form_ | `expand.dft(X)` | `as.data.frame(X)` | noop | For example, a contingency table in table form (an object of `class(table)`) can be converted to a data.frame with `as.data.frame()`. ^[Because R is object-oriented, this is actually a short-hand for the function `as.data.frame.table()`.] The resulting `data.frame` contains columns representing the classifying factors and the table entries (as a column named by the `responseName` argument, defaulting to `Freq`. This is the inverse of `xtabs()`. ***Example***: Convert the `GSStab` in table form to a data.frame in frequency form. ```{r, convert-ex1} as.data.frame(GSStab) ``` ***Example***: Convert the `Arthritis` data in case form to a 3-way table of `Treatment` $\times$ `Sex` $\times$ `Improved`. Note the use of `with()` to avoid having to use `Arthritis\$Treatment` etc. within the call to `table()`.% ^[`table()` does not allow a `data` argument to provide an environment in which the table variables are to be found. In the examples in \@ref(sec:table) I used `attach(mydata)` for this purpose, but `attach()` leaves the variables in the global environment, while `with()` just evaluates the `table()` expression in a temporary environment of the data.] ```{r, convert-ex2} Art.tab <- with(Arthritis, table(Treatment, Sex, Improved)) str(Art.tab) ftable(Art.tab) ``` There may also be times that you will need an equivalent case form `data.frame` with factors representing the table variables rather than the frequency table. For example, the `mca()` function in package `MASS` only operates on data in this format. Marc Schwartz initially provided code for `expand.dft()` on the Rhelp mailing list for converting a table back into a case form `data.frame`. This function is included in `vcdExtra`. ***Example***: Convert the `Arthritis` data in table form (`Art.tab`) back to a `data.frame` in case form, with factors `Treatment`, `Sex` and `Improved`. ```{r, convert-ex3} Art.df <- expand.dft(Art.tab) str(Art.df) ``` ## A complex example {#sec:complex} If you've followed so far, you're ready for a more complicated example. The data file, `tv.dat` represents a 4-way table of size $5 \times 11 \times 5 \times 3$ where the table variables (unnamed in the file) are read as `V1` -- `V4`, and the cell frequency is read as `V5`. The file, stored in the `doc/extdata` directory of `vcdExtra`, can be read as follows: ```{r, tv1} tv.data<-read.table(system.file("extdata","tv.dat", package="vcdExtra")) head(tv.data,5) ``` For a local file, just use `read.table()` in this form: ```{r, tv2,eval=FALSE} tv.data<-read.table("C:/R/data/tv.dat") ``` The data `tv.dat` came from the initial implementation of mosaic displays in R by Jay Emerson. In turn, they came from the initial development of mosaic displays [@vcd:Hartigan+Kleiner:1984] that illustrated the method with data on a large sample of TV viewers whose behavior had been recorded for the Neilsen ratings. This data set contains sample television audience data from Neilsen Media Research for the week starting November 6, 1995. The table variables are: * `V1`-- values 1:5 correspond to the days Monday--Friday; * `V2`-- values 1:11 correspond to the quarter hour times 8:00PM through 10:30PM; * `V3`-- values 1:5 correspond to ABC, CBS, NBC, Fox, and non-network choices; * `V4`-- values 1:3 correspond to transition states: turn the television Off, Switch channels, or Persist in viewing the current channel. We are interested just the cell frequencies, and rely on the facts that the (a) the table is complete--- there are no missing cells, so `nrow(tv.data)` = `r nrow(tv.data)`; (b) the observations are ordered so that `V1` varies most rapidly and `V4` most slowly. From this, we can just extract the frequency column and reshape it into an array. [That would be dangerous if any observations were out of order.] ```{r, tv3} TV <- array(tv.data[,5], dim=c(5,11,5,3)) dimnames(TV) <- list(c("Monday","Tuesday","Wednesday","Thursday","Friday"), c("8:00","8:15","8:30","8:45","9:00","9:15","9:30", "9:45","10:00","10:15","10:30"), c("ABC","CBS","NBC","Fox","Other"), c("Off","Switch","Persist")) names(dimnames(TV))<-c("Day", "Time", "Network", "State") ``` More generally (even if there are missing cells), we can use `xtabs()` (or `plyr::daply()`) to do the cross-tabulation, using `V5` as the frequency variable. Here's how to do this same operation with `xtabs()`: ```{r, tv3a,eval=FALSE} TV <- xtabs(V5 ~ ., data=tv.data) dimnames(TV) <- list(Day = c("Monday","Tuesday","Wednesday","Thursday","Friday"), Time = c("8:00","8:15","8:30","8:45","9:00","9:15","9:30", "9:45","10:00","10:15","10:30"), Network = c("ABC","CBS","NBC","Fox","Other"), State = c("Off","Switch","Persist")) # table dimensions dim(TV) ``` But this 4-way table is too large and awkward to work with. Among the networks, Fox and Other occur infrequently. We can also cut it down to a 3-way table by considering only viewers who persist with the current station. ^[This relies on the fact that that indexing an array drops dimensions of length 1 by default, using the argument `drop=TRUE`; the result is coerced to the lowest possible dimension.] ```{r, tv4} TV2 <- TV[,,1:3,] # keep only ABC, CBS, NBC TV2 <- TV2[,,,3] # keep only Persist -- now a 3 way table structable(TV2) ``` Finally, for some purposes, we might want to collapse the 11 times into a smaller number. Half-hour time slots make more sense. Here, we use `as.data.frame.table()` to convert the table back to a data frame, `levels()` to re-assign the values of `Time`, and finally, `xtabs()` to give a new, collapsed frequency table. ```{r, tv5} TV.df <- as.data.frame.table(TV2) levels(TV.df$Time) <- c(rep("8:00", 2), rep("8:30", 2), rep("9:00", 2), rep("9:30", 2), rep("10:00",2), "10:30" ) TV3 <- xtabs(Freq ~ Day + Time + Network, TV.df) structable(Day ~ Time+Network, TV3) ``` We've come this far, so we might as well show a mosaic display. This is analogous to that used by @vcd:Hartigan+Kleiner:1984. ```{r tv-mosaic1, fig.height=6, fig.width=7} mosaic(TV3, shade = TRUE, labeling = labeling_border(rot_labels = c(0, 0, 0, 90))) ``` This mosaic displays can be read at several levels, corresponding to the successive splits of the tiles and the residual shading. Several trends are clear for viewers who persist: * Overall, there are about the same number of viewers on each weekday, with slightly more on Thursday. * Looking at time slots, viewership is slightly greater from 9:00 - 10:00 overall and also 8:00 - 9:00 on Thursday and Friday From the residual shading of the tiles: * Monday: CBS dominates in all time slots. * Tuesday" ABC and CBS dominate after 9:00 * Thursday: is a largely NBC day * Friday: ABC dominates in the early evening # References vcdExtra/vignettes/mobility.Rmd0000644000176200001440000003507114422306403016342 0ustar liggesusers--- title: "Mobility tables" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib", "vignettes.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Mobility tables} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/mobility-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # colorize text colorize <- function(x, color) { if (knitr::is_latex_output()) { sprintf("\\textcolor{%s}{%s}", color, x) } else if (knitr::is_html_output()) { sprintf("%s", color, x) } else x } ``` ## Social mobility Social mobility is an important concept in sociology, and its' study has led to a wide range of developments in categorical data analysis in what are often called _mobility tables_. The idea is to study the movement of individuals, families, households or other categories of people within or between social strata in a society, across time or space. This refers to a change in social status relative to one's current social location within a given society. Using survey data, the most frequent examples relate to changes in income or wealth, but most often this is studied via classification in occupational categories ("professional, "managerial", "skilled manual", ...). Most often this is studied _intergenerationaly_ using the occupational categories of fathers and sons. Mobility tables are nearly always _square_ tables, with the same categories for the row and column variables. As such, they nearly always exhibit positive associations along the diagonal cells. What is of interest are specialized models, intermediate between the null model of independence and the saturated model. ### Models These models include important special cases: - **quasi-independence**: Ignoring diagonal cells, are the row and column variables independent? - **symmetry**: Are associations above the diagonal equal to the corresponding ones below the diagonal? - **row effects, col effects, linear x linear**: Typically, the factors in such tables are ordinal. To what extent can the models be simplified by assigning integer scores to the row, column categories or both? - **multiplicative RC**: RC models attempt to estimate the scores for the row and column categories. - **topographical models**: It is possible that the associations among occupational categories exhibit consistent patterns according to their nature. These models allow specifying a theoretically interesting pattern. - **crossings models**: assert that there are different difficulty parameters for crossing from category to the next and associations between categories decrease with their separation. While standard loglinear models can be fit using `MASS::loglm`, these models require use of ``stats::glm()` or `gnm::gnm()`, as I illustrate below. ## Hauser data This vignette uses the `vcdExtra::Hauser79` dataset, a cross-classification of 19,912 individuals by father's occupation and son's first occupation for U.S. men aged 20-64 in 1973. The data comes from @Hauser:79 and has been also analysed by @PowersXie:2008. The discussion draws on @FriendlyMeyer:2016:DDAR, Ch. 10. ```{r hauser-data} data("Hauser79", package="vcdExtra") str(Hauser79) (Hauser_tab <- xtabs(Freq ~ Father + Son, data=Hauser79)) ``` As can be seen, `Hauser79` is a data.frame in frequency form. The factor levels in this table are a coarse grouping of occupational categories, so: - `UpNM` = professional and kindred workers, managers and officials, and non-retail sales workers; - `LoNM` = proprietors, clerical and kindred workers, and retail sales workers; - `UpM` = craftsmen, foremen, and kindred workers; - `LoM` = service workers, operatives and kindred workers, and laborers (except farm); - `Farm` = farmers and farm managers, farm laborers, and foremen. ### Load packages ```{r load} library(vcdExtra) library(gnm) library(dplyr) ``` ### Mosaic plots `Hauser_tab` is a `table` object, and the simplest plot for the frequencies is the default `plot()` method, giving a `graphics::mosaicplot()`. ```{r mosaicplot} plot(Hauser_tab, shade=TRUE) ``` The frequencies are first split according to father's occupational category (the first table dimension) and then by sons' occupation. The most common category for fathers is lower manual, followed by farm. `mosaicplot()`, using `shade=TRUE` colors the tiles according to the sign and magnitude of the residuals from an independence model: shades of `r colorize("positive", "blue")` for positive residuals and `r colorize("negative", "red")` red for negative residuals. `vcd::mosaic()` gives a similar display, but is much more flexible in the labeling of the row and column variable, labels for the categories, and the scheme used for shading the tiles. Here, I simply assign longer labels for the row and column variables, using the `labeling_args` argument to `mosaic()`. ```{r mosaic1} labels <- list(set_varnames = c(Father="Father's occupation", Son="Son's occupation")) mosaic(Freq ~ Father + Son, data=Hauser79, labeling_args = labels, shade=TRUE, legend = FALSE) ``` ### Fitting and graphing models The call to `vcd::mosaic()` above takes the `Hauser79` dataset as input. Internally, it fits the model of independence and displays the result, but for more complex tables, control of the fitted model is limited. Unlike `mosaicplot()` and even the [`ggmosaic`]( https://CRAN.R-project.org/package=ggmosaic) package, `vcdExtra::mosaic.glm()` is a `mosaic` **method** for `glm` objects. This means you can fit any model, and supply the model object to `mosaic()`. (Note that in `mosaic()`, the `formula` argument determines the order of splitting in the mosaic, not a loglinear formula.) ```{r indep} hauser.indep <- glm(Freq ~ Father + Son, data=Hauser79, family=poisson) # the same mosaic, using the fitted model mosaic(hauser.indep, formula = ~ Father + Son, labeling_args = labels, legend = FALSE, main="Independence model") ``` ## Quasi-independence Among the most important advances from the social mobility literature is the idea that associations between row and column variables in square tables can be explored in greater depth if we ignore the obvious association in the diagonal cells. The result is a model of _quasi-independence_, asserting that fathers' and sons' occupations are independent, ignoring the diagonal cells. For a two-way table, quasi-independence can be expressed as $$ \pi_{ij} = \pi_{i+} \pi_{+j} \quad\quad \mbox{for } i\ne j $$ or in loglinear form as: $$ \log m_{ij} = \mu + \lambda_i^A + \lambda_j^B + \delta_i I(i=j) \quad . $$ This model effectively adds one parameter, $\delta_i$, for each main diagonal cell and fits those frequencies perfectly. In the [`gnm`]( https://CRAN.R-project.org/package=gnm) package, `gnm::Diag()` creates the appropriate term in the model formula, using a symbol in the diagonal cells and "." otherwise. ```{r Diag} # with symbols with(Hauser79, Diag(Father, Son)) |> matrix(nrow=5) ``` We proceed to fit and plot the quasi-independence model by updating the independence model, adding the term `Diag(Father, Son)`. ```{r quasi} hauser.quasi <- update(hauser.indep, ~ . + Diag(Father, Son)) mosaic(hauser.quasi, ~ Father+Son, labeling_args = labels, legend = FALSE, main="Quasi-independence model") ``` Note that the pattern of residuals shows a systematic pattern of `r colorize("positive", "blue")` and `r colorize("negative", "red")` residuals above and below the diagonal tiles. We turn to this next. ### Symmetry and quasi-symmetry Another advance from the social mobility literature was the idea of how to test for _differences_ in occupational categories between fathers and sons. The null hypothesis of no systematic differences can be formulated as a test of **symmetry** in the table, $$ \pi_{ij} = \pi_{ji} \quad\quad \mbox{for } i\ne j \quad , $$ which asserts that sons are as likely to move from their father's occupation $i$ to another category $j$ as they were to move in the reverse direction, $j$ to $i$. An alternative, "Upward mobility", i.e., that sons who did not stay in their father's occupational category moved to a higher category on average would mean that $$ \pi_{ij} < \pi_{ji} \quad\quad \mbox{for } i\ne j $$ Yet this model is overly strong, because it also asserts **marginal homogeneity**, that the marginal probabilities of row and column values are equal, $\pi_{i+} = \pi_{+i}$ for all $i$. Consequently, this hypothesis is most often tested as a model for **quasi-symmetry**, that also ignores the diagonal cells. Symmetry is modeled by the function `gnm::Symm()`. It returns a factor with the same labels for positions above and below the diagonal. ```{r symm} with(Hauser79, Symm(Father, Son)) |> matrix(nrow=5) ``` To fit the model of quasi-symmetry, add both `Diag()` and `Symm()` to the model of independence. ```{r qsymm} hauser.qsymm <- update(hauser.indep, ~ . + Diag(Father,Son) + Symm(Father,Son)) ``` To compare the models so far, we can use `anova()` or `vcdExtra::LRstats(): ```{r anova1} anova(hauser.indep, hauser.quasi, hauser.qsymm, test="Chisq") LRstats(hauser.indep, hauser.quasi, hauser.qsymm) ``` This `hauser.qsymm` model represents a huge improvement in goodness of fit. With such a large sample size, it might be considered an acceptable fit. But, this model of quasi-symmetry still shows some residual lack of fit. To visualize this in the mosaic, we can label the cells with their standardized residuals. ```{r qsymm-mosaic} mosaic(hauser.qsymm, ~ Father+Son, labeling_args = labels, labeling = labeling_residuals, residuals_type ="rstandard", legend = FALSE, main="Quasi-symmetry model") ``` The cells with the largest lack of symmetry (using standardized residuals) are those for the upper and lower non-manual occupations, where the son of an upper manual worker is less likely to move to lower non-manual work than the reverse. ### Topological models It is also possible that there are more subtle patterns of association one might want to model, with specific parameters for particular combinations of the occupational categories (beyond the idea of symmetry). @Hauser:79 developed this idea in what are now called **topological** models or **levels** models, where an arbitrary pattern of associations can be specified, implemented in `gnm::Topo()`. ```{r topo-levels} # Levels for Hauser 5-level model levels <- matrix(c( 2, 4, 5, 5, 5, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 5, 5, 5, 4, 1), nrow = 5, ncol = 5, byrow=TRUE) ``` ```{r topo-mosaic} hauser.topo <- update(hauser.indep, ~ . + Topo(Father, Son, spec=levels)) mosaic(hauser.topo, ~Father+Son, labeling_args = labels, labeling = labeling_residuals, residuals_type ="rstandard", legend = FALSE, main="Topological model") ``` Comparing models, we can see that the model of quasi-symmetry is the best so far, using AIC as the measure: ```{r} LRstats(hauser.indep, hauser.quasi, hauser.qsymm, hauser.topo, sortby = "AIC") ``` ## Ordinal tables Because the factors in mobility tables are ordered, another path to simplifying the saturated model is to consider assigning numerical scores (typically consecutive integers) to the categories. When both variables are assigned scores, this gives the **linear-by-linear model**, $$ \log ( m_{ij} ) = \mu + \lambda_i^A + \lambda_j^B + \gamma \: a_i b_j \quad , $$ where $a_i$ and $b_j$ are the row and column numeric scores. This model is also called the model of **uniform association** [@Goodman:79] because, for integer scores, $a_i=i$, $b_j=j$, this model has only one extra parameter, $\gamma$, which is the common odds local ratio. The independence model is the special case, $\gamma=0$. In contrast, the saturated model, allowing general association $\lambda_{ij}^{AB}$, uses $(I-1)(J-1)$ additional parameters. For square tables, like mobility tables, this model can be amended to include a diagonal term, `Diag()` ```{r scores} Sscore <- as.numeric(Hauser79$Son) Fscore <- as.numeric(Hauser79$Father) Hauser79 |> cbind(Fscore, Fscore) |> head() ``` To fit this model, I use `Fscore * Sscore` for the linear x linear association and add `Diag(Father, Son)` to fit the diagonal cells exactly. ```{r hauser-UAdiag} hauser.UAdiag <- update(hauser.indep, . ~ . + Fscore : Sscore + Diag(Father, Son)) LRstats(hauser.UAdiag) ``` In this model, the estimated common local log odds ratio---the coefficient $\gamma$ for the linear-by-linear term `Fscore:Sscore`, is given by: ```{r} coef(hauser.UAdiag)[["Fscore:Sscore"]] ``` For comparisons not involving the diagonal cells, each step down the scale of occupational categories for the father multiplies the odds that the son will also be in one lower category by $\exp (0.158) = 1.172$, an increase of 17%. But this model does not seem to be any improvement over quasi-symmetry. From the pattern of residuals in the mosaic, we see a number of large residuals of various signs in the lower triangular, where the son's occupation is of a higher level than that of the father. ```{r UAdiag-mosaic} mosaic(hauser.UAdiag, ~ Father+Son, labeling_args = labels, labeling = labeling_residuals, residuals_type ="rstandard", legend = FALSE, main="Uniform association + Diag()") ``` ## Model comparison plots Finally, for comparing a largish collection of models, a model comparison plot can show the trade-off between goodness-of-fit and parsimony by plotting measures like $G^2/df$, AIC, or BIC against degrees of freedom. The plot below, including quite a few more models, uses a log scale for BIC to emphasize differences among better fitting models. (The code for this plot is shown on p. 399 of @FriendlyMeyer:2016:DDAR). ![](fig/hauser-model-plot.png){width=80%} ## References vcdExtra/vignettes/vcdExtra.bib0000644000176200001440000000611214422306403016276 0ustar liggesusers@ARTICLE{Cohen:60, author = {J. Cohen}, title = {A coefficient of agreement for nominal scales}, journal = {Educational and Psychological Measurement}, year = {1960}, volume = {20}, pages = {37--46}, owner = {Michael}, timestamp = {2009.01.21} } @BOOK{Agresti:2013, title = {Categorical Data Analysis}, publisher = {Wiley-Interscience [John Wiley \& Sons]}, year = {2013}, author = {Agresti, Alan}, series = {Wiley Series in Probability and Statistics}, address = {New York}, edition = {Third}, isbn = {978-0-470-46363-5}, lccn = {QA278.A353 2013} } @Article{Landis-etal:1978, author = {Landis, R. J. and Heyman, E. R. and Koch, G. G.}, journal = {International Statistical Review}, title = {Average Partial Association in Three-way Contingency Tables: A Review and Discussion of Alternative Tests,}, year = {1978}, pages = {237-254}, volume = {46}, bibdate = {Tuesday, December 24, 2013 09:32:18}, } @article{Bangdiwala:87, Author = {Shrikant I. Bangdiwala}, Journal = {Proceedings of the {SAS} User's Group International Conference}, Pages = {1083--1088}, Title = {Using {SAS} Software Graphical Procedures for the Observer Agreement Chart}, Volume = {12}, Year = {1987}} @article{Goodman:79, Author = {L. A. Goodman}, Journal = {Journal of the American Statistical Association}, Pages = {537--552}, Title = {Simple models for the analysis of association in cross-classifications having ordered categories}, Volume = {74}, Year = {1979}} @Article{effects:1, title = {Visualizing Fit and Lack of Fit in Complex Regression Models with Predictor Effect Plots and Partial Residuals}, author = {John Fox and Sanford Weisberg}, journal = {Journal of Statistical Software}, year = {2018}, volume = {87}, number = {9}, pages = {1--27}, doi = {10.18637/jss.v087.i09}, } @Article{effects:2, title = {Effect Displays in {R} for Generalised Linear Models}, author = {John Fox}, journal = {Journal of Statistical Software}, year = {2003}, volume = {8}, number = {15}, pages = {1--27}, doi = {10.18637/jss.v008.i15}, } @article{FriendlyKwan:02:effect, Author = {Michael Friendly and Kwan, E.}, Comment = {\url{http://datavis.ca/papers/effect.pdf}}, Journal = {Computational Statistics and Data Analysis}, Mrnumber = {MR2005451}, Number = {4}, Pages = {509--539}, Title = {Effect Ordering for Data Displays}, Url = {http://authors.elsevier.com/sd/article/S0167947302002906}, Volume = {43}, Year = {2003}, Bdsk-Url-1 = {http://authors.elsevier.com/sd/article/S0167947302002906} } @book{FriendlyMeyer:2016:DDAR, Author = {Michael Friendly and Meyer, David}, Title = {Discrete Data Analysis with {R}: Visualization and Modeling Techniques for Categorical and Count Data}, Address = {Boca Raton, FL}, Isbn = {978-1-4987-2583-5}, Publisher = {Chapman \& Hall/CRC}, Year = {2016} } @book{Glass:54, Address = {Glencoe, IL}, Author = {Glass, D. V.}, Publisher = {The Free Press}, Title = {Social Mobility in Britain}, Year = {1954}} vcdExtra/vignettes/datasets.Rmd0000644000176200001440000001024414422306403016315 0ustar liggesusers--- title: "Datasets for categorical data analysis" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Datasets for categorical data analysis} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/datasets-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") ``` The `vcdExtra` package contains `r nrow(vcdExtra::datasets("vcdExtra"))` datasets, taken from the literature on categorical data analysis, and selected to illustrate various methods of analysis and data display. These are in addition to the `r nrow(vcdExtra::datasets("vcd"))` datasets in the [vcd package](https://cran.r-project.org/package=vcd). To make it easier to find those which illustrate a particular method, the datasets in `vcdExtra` have been classified using method tags. This vignette creates an "inverse table", listing the datasets that apply to each method. It also illustrates a general method for classifying datasets in R packages. ```{r load} library(dplyr) library(tidyr) library(readxl) ``` ## Processing tags Using the result of `vcdExtra::datasets(package="vcdExtra")` I created a spreadsheet, `vcdExtra-datasets.xlsx`, and then added method tags. ```{r read-datasets} dsets_tagged <- read_excel(here::here("inst", "extdata", "vcdExtra-datasets.xlsx"), sheet="vcdExtra-datasets") dsets_tagged <- dsets_tagged |> dplyr::select(-Title, -dim) |> dplyr::rename(dataset = Item) head(dsets_tagged) ``` To invert the table, need to split tags into separate observations, then collapse the rows for the same tag. ```{r split-tags} dset_split <- dsets_tagged |> tidyr::separate_longer_delim(tags, delim = ";") |> dplyr::mutate(tag = stringr::str_trim(tags)) |> dplyr::select(-tags) #' ## collapse the rows for the same tag tag_dset <- dset_split |> arrange(tag) |> dplyr::group_by(tag) |> dplyr::summarise(datasets = paste(dataset, collapse = "; ")) |> ungroup() # get a list of the unique tags unique(tag_dset$tag) ``` ## Make this into a nice table Another sheet in the spreadsheet gives a more descriptive `topic` for corresponding to each tag. ```{r read-tags} tags <- read_excel(here::here("inst", "extdata", "vcdExtra-datasets.xlsx"), sheet="tags") head(tags) ``` Now, join this with the `tag_dset` created above. ```{r join-tags} tag_dset <- tag_dset |> dplyr::left_join(tags, by = "tag") |> dplyr::relocate(topic, .after = tag) tag_dset |> dplyr::select(-tag) |> head() ``` ### Add links to `help()` We're almost there. It would be nice if the dataset names could be linked to their documentation. This function is designed to work with the `pkgdown` site. There are different ways this can be done, but what seems to work is a link to `../reference/{dataset}.html` Unfortunately, this won't work in the actual vignette. ```{r add-links} add_links <- function(dsets, style = c("reference", "help", "rdrr.io"), sep = "; ") { style <- match.arg(style) names <- stringr::str_split_1(dsets, sep) names <- dplyr::case_when( style == "help" ~ glue::glue("[{names}](help({names}))"), style == "reference" ~ glue::glue("[{names}](../reference/{names}.html)"), style == "rdrr.io" ~ glue::glue("[{names}](https://rdrr.io/cran/vcdExtra/man/{names}.html)") ) glue::glue_collapse(names, sep = sep) } ``` ## Make the table {#table} Use `purrr::map()` to apply `add_links()` to all the datasets for each tag. (`mutate(datasets = add_links(datasets))` by itself doesn't work.) ```{r kable} tag_dset |> dplyr::select(-tag) |> dplyr::mutate(datasets = purrr::map(datasets, add_links)) |> knitr::kable() ``` Voila! vcdExtra/vignettes/fig/0000755000176200001440000000000014422306403014605 5ustar liggesusersvcdExtra/vignettes/fig/tut02-agreesex-1.png0000644000176200001440000002243114470742317020245 0ustar liggesusersPNG  IHDR@e$uPLTE:f:::f:f::::f:::::::f:::ff:f:f::ff:f:f::f:ffff:ffffffffffffuuu::ff:ffffې۶ېff:ff:f۶ې۶ې:ېf۶f۶۶ې۶fې۶D;D pHYsod IDATx u}H!3 psiBJRȺɆfL-/$-vϑ\Ye9)`L;5@P XAk (` 5@P XAk (` 5@P XAk (` 5:9p?$I~15$p : +!LP}$n҇[?ZeA4+kɕa篧WWu\}AthVmg}-@P'o>B[^PVr yd*Y-uW}T#@ms ]h' ꫩaAgSZAԂ]'O6 ) DjA\_q^o 9w>~U +L:=z{}N)z4ӻНqK띶7S~YV~¶¡հKP{kM@P'=;iO-e^{~BP7!>V}`Mɛ zyb~BP7f'4>auڎ}AWگOꦩQG89s }cCw܂}mk:ޗuXN\vZ+P>fWw%Wq4BV}vjATj;*'#?o7 uA4mLw;z?~[ԧvW0Zx-@P'm'F:sz5t~'vr^}ek7z!:z?#I$;o?h_hV?IQ9Ak (` 5@P XAk (` zxZ_rq2y_ih.+.MN̻8=p 1ȅ Aؽ}A<;=@קUɇ pmUErY6o\+?ԝ:y"A% MW \oNy/P ],3w*ApuGW\AsKalg9=4/4HeG \xx΂:sW^/Ck_`6qTPA>Ƭ?b>r( wMfYsQQ/@ nNڟ[<( KwMZ:O[2 I5鮣i?.8:UЉA>z9 AMf@Cs5@PV@P jJ@<8FO~;%ZQp΍^kΦhQNsnp΍N ?! `$p΍>*?! `#h'5\&s#Ԅsnp΍j~BPιAPOj97p΍v&s# Ԅsnp΍ȂZ~BPιWPOj97 p΍&s# N?! 'Oj97 '5p΍H AM8F@A&s# ~BPιC!?!  蠟Ԅsn :'5䂎 AM8Fc~BPι@,訟Ԅsn :'-=Oնb5N-F SEq7`9N A;ʍ:A AEN{+7r'-2ˍ*A A!œ :Oj97h'5$'! Ar5\27:=Oڒ&^-9'm hxA6w8l?!hۋ}xFNXAp΍swg/T#\En C :OZVA[ :Oj97B :Oj97 :Oj97:Oj97Oj97BOZ 3t$}ˍOS4ay 3 ąjHܨ#7?CKPӏr#Nj쎃6xp΍z+aso<3 o0Yċ>UF-A·E g>fw_%ܨY.W?Ges= / o^;-o^XP~TK1[n,Գ,R!VbA} AbA!^LP~BPI*&!$GPP~BP ιD~BP ι@~B;4TPIRwFlA A[;/h ?!hDQ '6Dܨ'h0?!h+xF,A A%eFA Akz (!hH?!h z ' fל-hX?!hQ-n<9 Oj974Ԃsn'hp?!h}E'ڠ 'J'8js2 yd8 0*ɛreriEO ˒'HNE`bbY5ӊO'OԠ O'h6L)>T^/7Z&OڐcT0^|R?!A A-8FXs#`~BP ι0\|r?!,>Ԃsn ?Ԃsn ?Ԃsn?Ԃsn?Ԃsn?Ԃsn?ԂsnOj97ŏ'GZp΍Gc A-8F]~BP ι`?Ԃsnŏ'Fc A-8F@s#S~BP ιԂsnh'mY A-8F@S|~BP ιPԂsnT'ls#@Ԃsn3Zp΍Ԃsn$s# 'ssΘ4ߜF%vssQ>݅[8yn{h cT݈GV|jmOP'%$G(8 AD=lOaAY1| q}8Owc>=4'A+?!hǝ`za=㶇#h'uL{=4A?!he#u0Knwc>=4A[?!hYrE0暾kTƉ*7?~I *]i/^Fzz 8In{hłvѢw~1 FOy}LUӔ[nc,FK~2}C62AM?!hĨ}NۢI6Ɖz LkT?qCc,'5TgcIA35\m1_P4JԲn=2Oj=4lA~B(Q!pCc'uX3Uw'UlQ a>?!h9C!m{Ɖ棾^|]*1'ոXowtO%ꠠ۵ :'o2C\n{hFǺja?!hĨ3F~dq|ǡ1tO)MO'I4B?-訟4NtΠ}^/t\lq?!hirw'M笧j'U,Nȿuo}?!hGb:sC#$?9 zx*kK5.WđXjdZYu(ҠvCWI'_As5.z'yJAXf2O=ĭu>yM0U[A-FctAP :4oTq|(>4m1EPq+T5IJke3u]om1APy+$]!*_AE-/Ϯ zss1*huثTP RDd. n{h1A_6TrzO~5An{hAk_]Q>qewzcXW"NuR}O~^5kl 5rCc A9G+wTL rCc  A9G /w})#=4F?!(#Uv8=4F]?!(먩n}AOPO:aܫ|h R5[1z5L*&I&7]!D5 m\-'euWxx@PNQ97.A A#F\8]'?!hĨ~vO+ս\ux4RTt(Ux4NT,a?=nKqsjV}Sb?!hj:Hyj:]ݎ ADȭ14A= ADUNV}tݙ"% O%It#|>T;C1A AD-{lUqo7@|^ⶇƨ'k1ZV´/_嶇ƨ(_.~p*yqgezQ ;6w_@ˎ:p;nA_zQ>ߋ߫!FAտrV ^l~3I}m<fꌜ4AV j/}Ӆf;_n_ {r+*?WLC3s"N^Qu+.2BT"ĭl}tC8h^|XFM 3U#/X<,fl?CSAsw/oOz=Qּ A?mK@P/+,SٶMP[ j~2ۖ޹zA?+hAu?!GYt3vuM:'ݓ..'CsD/NW Oz-Q]8Y,?!DT_PK'5%i ^K6%I ^Kԑ^%) ^KԱˍk 'gdI^x+>S"zu\|ھwOzQ~`1陾h=GAqw^!?9m@ݖ~rږ!XEfM :'mʽ :'mjL1?YoK@zazE1^l[zЭLA1'mw8lfs ~ږXڪ K惆sJ.eV/͒"AtOF2i7-b-CzA!(I~BPZxf 'AԆ-E1.ܜ:lT06;=RfBϐ6F/h'mzٓWbHou-*K A5gc*]TftҼ(^=qt!_&P^z򲗤6q߽3/HkT]T+{SBrh]C|hLA:ӷNP,wDAM?!h[jqbLy}YxZ~BPmnq^ߤNŗmh~BЦxBlu:;u A1Y&.?!(# 뇆&i!h'uA롵 ?!ڹ!qf\P'uA~RNR'uAMn  5!Y.ڠ T&tvn hю3~BP7 fw_'uAe?o,Sn :xJG%'uAl&q;DBAm?!Z-!'uAɫ9Mn IxA~BP7$n?! ڝN_O A 'uAi A@PA =%|/J(~BPJ)~BPJ6\?!( % ?!( $?!( #,?!( "mtZ"{?},uY>ϵ:QD,t+Һwn{G3+;y泩(t7ğAԺa~|Wy}wj z^gO]nJ?,ֶSߔȿ,ۺM1;r0IDATo6WOq=WCAXj7C@ݰ#3R5\=FSe2><ն(]SL=pMwFƪS9z_?wzz},cSj.v->^Nؑa/XeST跋  ,Mtxkqj$Id>oN &zU5.Nyj#<U[ wz;;zNf*h[u`ʭ*?ApŤR#p *ɂ[`?@B*[`_@P XAk (` 5@P XAk (` ~8l򤽭\ȧ~'H.]{/TPY *HG 'y[֬Lˏ<A=7ucMem}8 ꉶZ:bz]Ip~jU٪^ʥ=O~1>K'I&5:iV]c@P_vV΅c$cq (j-XhA'_/rE\S533)cUv͚C;zP極JA] `\5>ji)x,|#Ư=n 7P1T2!| ?R9ZI6pC^,ꏲc9lnQ776(FgAQ:M5ŧ8G/K>H㠪 E]:Tg'g6{LR~(`~#>ac+>@P"j˶(ވL|c/ XAk (` 5@P XAk (` 5@P XAk (` 5@P XAk (` 5@P XAk (` 5?<37IENDB`vcdExtra/vignettes/fig/tut04-housetasks-ca-plot-1.png0000644000176200001440000002262014470742315022170 0ustar liggesusersPNG  IHDR@e$uePLTE:f2::ff  !((f(*2::::f::2::::::::X:f:{:|::IIIIfIff:ffffff:fXffffffff|f2::::IXffI:Xfff||ې:ې۶f:f::::f:Xff:fff:fې۶f> pHYsod IDATx흍YǽfsBI`[ 5pmk…B]v7uM]gF%{F43#Ɩ%yQcFmE@P XAk (` 5@P XAk (` 5@P XAk (` 5@P XAk (` 5@P XAk (` 5@P XAk (` 5@P XAk (` 5@P XAk (` 5@P XAk (` 5@P XAk (`cAG-A&: ZÊ (Rhn pt%9A0S9>_=n͙f8g6>GG!x&\gO:(BIq,|亱F+OTS O;x-$R=4.rf8gNV>U%9xǢvptTT0E$ }:łAҊ?e)'*cG3rĥxFt&l}EAT9 :ZN">TÛ,*G)N&i]q"(I\e4$7_ hQ1coD9rRzAJhH[w:EY$q֙,J1rfc 3նMHQaEm w&M`" |(PR*(0$ ː^: Bʙ&lw]r鳕\~-ć%]3uȐ2DP$AE*9yQă28 !W cWy嶦im h'@揔YZ%RN)VvvCJnaq.[TDkAY'a)ֻ3 TNAv:8K h?Tзo('vna~zhZjp)(N :=?w)\VM۸.4ā~ѯ ^rBa Jm.t!|3^<ع`)(? )5L0i?o|{%~WHrЛ?(/+hzw;AOta(|?Wޙ\*AWtWot 研Gw5[_T+:A϶bf;yTipXvX6#?} *GgQ\L*+T^d/ZƩNP*d˯nĸtGz蓙hl'_+H6QMxk&JPN~vU( !ħB\T,n,*w}+YyAu,(U4Me Qo+^7vDc\Ophzv1W2T҅&vE{\DDEAH텟xXĈ+AgZA'1IA{a3s/hOĝ$3-LF/~taapZY't{o{@PT< ~és,hQ/owv;o{@PT:7"e ˤVY>7" mT>Yt ٸ& qcͨiA{'-r$h>+>Usy2Gf! Z"w:EbtxT"3f 4)hEѦIA{'-‰6r5j9A'-…6A2]DШLAMTt0<"z9Ĭ2w:GajD'-P+pL1A{g74c_;PCS^ ~AK֧sGE$hCE|HV*7L AM+^!ZuZDg-5@"xFx A~ AK&KgAK(G4!Y]_L Z.FnG!h I.~2(SKkE:%]TmZnR]ͥ.uy{xm`jmW2[!$DnZ}M:~^L>DѼHPOߖ~хKeЗƔL|._6"=u+=Y0OE/NϢǣyȗp|44^}-^jV#:BoݎsETPt)v$Òz?ˇD^))kS|gu: [ 3CQ\Ot|^_Н-$'I_²Q~EJ( 2R:sBP݆AcAjLDVz*!&$?죏7T4 XdOA.{*i(~e[:so`/e44(|Z/V|,X}\3 ԊTȓb~+?=巨PNa$P񻨎vBV6yA(৳yJ J8${eOЩnkȵA[osлg~*;nb uHFTܰR7RVиqECY-M掯S9x؋a=,+(E+^hZ: ^<e_hCV6zsVY>|,(yTu3=5#h>Q%?۫"16KpКsSZO uV> ]((%h4KjqF?ͳzQ/תǙҠȪC%ۥ% W, ?g|:_<#?ԸEQ=+:l!nq)6QfGǙziGx)jaA3ힺ[gZhh%A) 5Վd'9(ܓ'( /LjwK hJ 7Ԩ 2jAYaˍugs5Շty"5TYZ5eYug.Tœ]Xm[KzѓY렬#^T_ IW=؊?"gކTfyc!3\ j"]sj)hF@tp"hA A%բۙ:#L]jH4 eݮ9y 4~6nR;AОZSt-9CˉIzM?4O{Q3$h:iwou:ŝ3'(ί :2 ?,KA4Wr?GC=6\~Aq8.ը !YK ;@YA'$5  Ib,D#;35 j9 AnFR%J n~n=HPcC "7xzgLMѿ%6T@0U?E<dL(ͣ~F5R9o#8o(/($ HFsuyo߀ը(hQd]i4\x~LE@P+* z0OmrzE<%(L,v"Zd&:VfLO 7 8 h~f?SA}SUP6O3mZjE<2ziݐ-`Bms͙B@PT~A[APFwET@P@P+ h um6D=)YCP=7 - m]CꁠўiA@P@P+ h+{KTmZOn.DxA4faff Uo!/n.S-fzA`$^-*L A3 Mqoax/sû0|E!harW4BQ-!/Ń4wq O$q'TkQ?'~>_z,@?hKĆy'QCPT .9h'( j!U93uМP#m AaHn[J{Ym9A["7<$-7y<\iXߐJx9o $%0^)g.hA'\ y8 w o,?s\V.FTG ABP=><"ALz ( V@P@P+ЊgAA@PߠZAA@P&@P=7(⭀V@P&@P= T x+ o  TeA}" jeAA@PߠZAA@P&@P=7(⭀){AA@P.@P-7(⭀V@P.@P- T  x+ o  T A}SzZAA@P.@P-7(⭀V@P.@P- T M"~uzԻda:plRd5ua+AApb"`]7 yAϢǣy,(-53Z(΢Mtw!7M">ܐBXPZxb~=#YM:|3 d\<G!(E.O䪻g 7:([۰S)׊V] _MRW߬y$rwh%@P&ŋb4}TOTvz>NOۧszg Asy7Ac5 j›)PCrPߠj'AEh,JKug ?f> @?(` p |+~8g䠀5aAG)N9S/ p $[ ?9 h:p P ?FQC\gP3<y ^Ӄyw!A72==gp׋{z4 h]/A<^될0==  XAk (` 5@P XAk (` a(ň:[MbIr /r%OybA~Ź$_\^חOVmXd_l> 9wx__>A'Xn.F/_̹$nlr/ 7Aߩ*QlI\BٌW%$.}lIDAT`58 OV\s~7K4 ::j IНmwx ] gA꣓և'Xīdm|M7j2=M6}%~Iև'qHm)/u?b6Yn&Ótt,py*^;O&鸣>ۊwr*|k.NBw%}T.9~tr`(([ (` 5@P XAk (` 5@P XAk (` 5@P XAk (` F(Qh}^6"Qᖙb6j+ Z,CE[.OFRmNlQ,O%`!0t٨\g=QFZ[ʣԈECEvB8]qE@O'YЀFbAIiyg2f*stB9,yQ3K]AǷB!XPzU2AߨT&铙>,i)B,] W*" rnA$ /DxHqF}G[BdebfBڂS;O֪FR6=y =A'QCegԉ2i[fW&)f1f[YE*kC+Ⳃ&A˴Ut8Mڀ<{4KRtzH3VDP߃ OiOԺq}.J(uP' W[e8m)i*l'E,}UY @AWe ͯ_24*Kdh~U>,}UY @AWe ͯ_24*Kdh~U>ȤtxxOkA& mOF^SsVY 3sYmKŽafR *g t~*z\QWxYrP?;*65y} 2x\Ռ@Ou -) 2 qsyɣ¹RQ-,~kߝ޵@{yi 7\HU>]|wyyK(jw _Opz @գo6|=Z\ :@Ͼti=X:|ѷev(>ЈUل]|jyWp3~vX֏6J1"@o7r_3vFu ePQ@u"pMj;q:Mu@T8$@ ;My .B!:h5GVGaȣ0h= hku woehPy P0oΆs% *_?0k/KX:n }߉ndP`sPU>dR}kFf*Kd *;IZL9WY S ɇ q2%JY5U j@=YV { "dUXE2xɪ&dޓUM`'*{OV5U7UFi@8śUXi  F!JSeL@LEthCBĀ(M2P*Kd24U>&@i,}eTY ЀA&+!JSeLf@BZ(M2c4U>ȌheTY 3AfD@oZXIVY 3* 7`-lU>Ȍ H!*Kd$'@T2 hNYeL$A@&SsVY S !,}) QU>Ȕ(cwj*KdJD uNYeL\:v欲AH@AADUY S& ԜU> (`*Kdd{A؝5 Gޚp4)yrB*KdJduE؝]Q?v欲AfzAf ,n0v欲Ay@CDUY c v9[2ݩ9,}1$ $wK;5g2ηIhE*Kd@lCr@{zݩ9,}= RRcNYe郌a cwj*Kd )in1z欲A&D -wd2iD)MTep$]2 ? # $]2)?LFX@N @8ŰÂI M_Y*rhb. dIhtU>B IRUY Av q@; JuWIVY ;A(BA& MWe*dzb.!$),}}]X{;#w =&YeW2 hHhtU>Ȍris!{ʯAfk#.SsVY 3pɼ{ʯAfshw*KdƼmNYęR :81x>U BZ@s52cߡL.HH EjSsVY 3> 7i FBݧ"AFRί @8ޅO%::5g2e2$G欲A@=&;5g2BmSsVY S PNYeLQXjb٩9,}) CP>@۪(,}) ЎJ0aUn *KdD .y(,})A @ݤAH@AA%rTPUY S&оD'.~U?xyTieUUd}eqeNtsϏ[/ -@ W&PTm Yϫ8*?4Au^IB?zym{r@1'0u2jov?H"bCAI@m% Zl0P'ztosp'BA N>TB-ˣb~@lAP' 'Kb]A<[-:) A!d8D-@C_[t $l8AS q*Q-!| 4DҴZP((HbZAS  e;OT>r RFP+HΊ)OPD;O" Y3Pd\AS POM;3Oh@]AFT>b2kg)P[];3OXZ ۙT> lgRP hKȴ])Iv&ONRsXPH$E;O$NsPH$I;OdNWPH$I;Oig)tzf;;O4T>RRⴳCPHHN])< Ol9ܤ/| $m瞠)@e ~@DZ). *#ZNYs]@T:hM\QPi;ˤ,yS&@TJUd2 R '!vޕsL"5E$QDiN_Q)АI;w؂JBf4I V閕 xbyX@T%d% #ԧ >vv1b 4u${Arot'wjPи\QӃO/=\~sKl}sI4Ѣ{6  *6_>r_On3TEM㝻¼=@rAN h ոnMPNК;tТ~AYmž΋fV5Tc PDHqA]z}ci5)ן[Uo0֎aT\DuOPb@%5cĻjɻ[ {`ΈЊsnӊ:#(!Ā K'A5U oc#$HFSIѧ>|R9j 6d /$4vqZ,dT;+2( ']$#theh@v{V@BJ@1) $Vh H(Haր/M<Юy1yh :67ѳq(9P9 A@A(k> pB iP RR@2c(,;ߤ-z hAmȢIf&NiBAW5@'܏X£P vmSUMвpx":UPUMP'_iB (f~M"AD1>gBр$@Tq_Kw݄ I;uSd_1N q+ $T|ڹ.WͣGGG_PB&s34A@UO>@W/i>;=l1) | 1Oߝr~~}B ; tVЄ5W,?j8z Ity}ZP_-/TWq'-By99o:@On-_hlaGGoo\|vOэ@afbAQ 4|~pYk5Qв}*h74yv}"P :U%hV#PsuaPsntqQFwdd@Ƚ甬S5 < [Z0-@mMpq}qv]tD< @4 RS傦 yٯ-@ |z7+QO=<:2< !߱[N 0j;ua{w/ﮏ†t(Î{ N uڀ>= LEh@RA (tPXP\+%&61?*z$&$aJ+ Q I&T";ߤ@"B`v*_оi&$4 }4*ր鑲St+鑲SxSp_CK٩_^z2)D99oټSC& / 77r lߩA |?j؄/ $ !i;5 hz't-"鑴S hgt4A@LZ# HI(a6q  F\C[1PD%M(P m-Τu.w#B: bdL@[vzq$IW122 S{i  FF$"P[1R Ɇ,= b@|B%iUܓi8ULAg֝Z1R &,Rʹ @L *ЙlyS݉@&UL"mUЙhyF@fU!B(ULAu<@U^BM؂\'l)h! ~:UlAO vU\APԢ7* CACHGAY@y(@#PٝPQ@y(DD@y(DZS"(@|B)S(@\BW(#@}!$P f8@y(Z,NB7j  wu~Ѐ-0!R4KŨe H,@(}ͼHh>TJqc<߮4Eb#24!ΊA nBin@6(D6_ rZLtoW R P;1 "{@o' P%@P]B oO>@ k_( 4u M(9Q) ,c h( EQ{@4'ϩ- AH@9UUi94T2e UI9=?2e |Uh!,cK~OA[e%hEAJH9˘_(2%@m-  !,c>ADRVet"8?1I[1(h  (0IEA[eh h@E(n bRPV rP`2UvNu$d>)- ?E EPVet>$S(hL-J"k3@R *@a @b *@a"@r * @a@ *@a2 ZlHGAYPߐ-P mU4SلL.Gl  5eU4)ZjBtyo˗GGG__EAY h!4I@W/]xy(h!ϰ]xp׌_rF ʪH@ B6tfYzTmҚ-ry+<ץGH[6Y:؍v գ{Vl7 KUQ#B5G  4U11gt䆼Z*n: >O VSBB ϽToVGak?69oe4@BBe h}?sW1((BƟ{  (Q((b v7W8,U#!"@ySYn(OkR&8PL H!M"ĹPjtx z3t@6|vx3wzz(Of='K@^B{$vPm PyIͶ S+S bvܥu'T4D4rg B[$w Sbj@LB@[͙=%>Ѐː JQ5, c hE@op]&o[@>MT)+ P!i*oSP7Hԧ)e@hv߻ AYRҝ*?<==y7zHDUZw֤#y^m0~{12¢v*kaeeI(((^(!H-(8vS4O ysxvkb/2@#nI2%bd4B#i/s >ȤFt?( yP/{ (>PhllBgb &f>,}UY @AWe ͯ_24*Kdh~U>,}UY @AWe ͯ_24*Kdh~U>,}UY @AWe ͯ5Nw J˚3 0**T $*D@['o-!®}8cCZ&Pw Q}l{Yiͳgܳ1>:a׍8.z!ˌ ix@'rU#B]|/&,kC%Pw uiy/՛, M#5gCeR#{W-*䘋d*>P/5^չg9ԝB#aV~-,jL3@Ax灪|}灐!Q $*DB@HT㝃ktcԲ>޹\[O?=Q_^e_M×գNQw&fp鋃&^e}sa=mazWywm S={=T@_\{ۉDA4 hj>0~c@2Tos@&GB@HT !Q $*D?U ߭kIENDB`vcdExtra/vignettes/fig/tut01-tv-mosaic-1.png0000644000176200001440000003212714422306403020333 0ustar liggesusersPNG  IHDRs?PLTE*:f f*2Q:*:|::IIIXff ! *:1::::f::::ff::IJoXf:Xff:ffffffff{ {:::fffffffې?j|!ې2ې:䕥(:fQې 0 pHYsod IDATx {vz&CLr0x!niи0/B*TiK/ b5GA>PjPjPjPjPjPjPjPjPjPjPjPjPMm#` zrt ,j6㳿oc3X>ޭ Rmf5z7n*GbMӲ*G@{Q>m r#@MY:oGv\-ҧޖ)"j D(,, U- U- U- U- U- U- U|@;u\o>w~4h|N C"Ip7\37\w(OVR MOj{õͽ1=qQSvDԔY0P Ozgχn)ߡ߳ MD0 j{%}1SθΎvvԴY(I+ OCۦTMSBpIsw̶?(mGFMyA$]= <,TdUдOQ>p(Bp s_̦?s5Gt5i揶eڎ^8W]5߹r#jM:$X@5s;fOi3(P7jB>ڱ_¡Zpisgt,ǎ*T{_!~ɣGʩfiB" $ipisg̖u nW޶#"=VJPqtSEz5zᴝG)f̽ޘI瞸4eGFmEF{6itt:F@dn:uS{ÄϱpDn 6L9%);*j,m ŋ<6#y\T^r6>E[9cB̽ޘ 瞸v);"j,mdjPjPjPjPjPjPjPjPjPjPjPj4o#^ t;5Mt{PP>t0\֮`c QǝޡV4Q~#-`In}_-{̅7h"K7{Kq+O+juXU% @4YPa@h(dP @/n8@Ю9AsX hļdNzP;0lS;?@h%hmT'Mo F:]Kߺa#Pt;Il^d@;@_MVXR}kP;$@ lrb't}?u)?2zs_ }^IK {NNrX;rzG(߶ayz 9;zJ6>IPP^>7T^>\1PJ]n'겧i? @Ǣsz:@qh@erBmtm H^hK?TEh:>Sڠs:r& Z=sQV%p ܋ߪZÍ:Ja(+)st6D[4Q*`~n/@[b#~$QzBE& 1 : G9;*P)\W彴0K{⇤6(yd6W5Сh@O만W)Q:21l-qb;hҿ* g*hQ7g$: 6AMYxOP^S\aNRM!>^NS8#}{Qt_摷{r޺8Sܹy2.*zߜNCjEYYyZgwwqh&crw9 OzB7T;wE%ulq@Orj{Ug CO!lTX=@5T+ADn,ֱ#?2y?P/w @۠ݲÚJ7ЊΆr3!D;9nz}za9I7L&n"Q_-Q 71N˃N9>]G_9^Nmͨ;hg@Lk"lbf*K*J<:Iqɀ[) (YzQ PObpCb`'IjUP ڈ3@6D7rT{<:P0*E/X:I MhӤhifUPL6$6e-K펀zY ꜄Vt:sfU+ay3d):b3ۣ:JgQVfԑ5f^Ж\} ;*߫9vu<D7ڳBbuF?*M&Ia~t:=)&1c9P#6 h; (=G(& %ڊd۲hP@s:5$]9w#7|P7K~"4=P:;ʄD=]p݈n4/o$e &&6}7;j-PY' *%j:fTgG .M<fmRVhcD]L_ZPe5 mQvB61Kp@5%<%jkT~(Up@74Y:widҰeIiCSt˛54в3϶(taPjPj]aPPPPPPPPPPPPPPժPeĽA1)KʧkECUwoFiN WjO5cS5uxJߜՀɽzza캫u`ў}0hMC~^ <.#Tcwk@4L5)xgGPdP)f1UL;?'P'M1Sb @u4Y8j>hj /'R'jvݛ R'RM|z!')fqP]T @STc?)Pq @tj~:ZmP?4L5IgŏqGhBk@h*T՟!} Clj?Ё`T!m~RL54YGA}j|1@Yix,/:W8 PfJU:k8t GӻB+`&62*)E.y@o? oOo:MC7uf ƵA~$u]x;UFW㚀~7 ӯ?H7*cxSq@ٍ~qJtzWW6h7? ƘžP fjU:@m:U.sI+J>c sN;|3MnD{!* L3pNRq@M%scO3A{}DF@W|F@q!zU\ yD=SFy#MGAYڡ @ct@iSEjD ـtД#gcNFD UǟEˆ36g:dF2rFP[G^e39|,i@bڎpv4Y3! *S߃9~;Vҿ?/*:YvEC.RЂL ̫tTk)'Dő0t!c7c:?ݹ!eGu'tP@WbeRdO/$O;ג8dK6t|PtEХe9(=az@m_4', җ 3 @ڠ]. |^4+);~ll @ @ 9q{Λ\ dHҤ tQ%hOn@@5@2lQN?;gPcKzD=[zҡNZXGj̨!tax}*U],h]rqtE>H3Y>>nyj . @Sg! @UХ=D17'h1PLDmKF?-<(@ǒ4y߆J \ȶ^~Jwva%F_{\]ަڕ$]Wtsƶ7g@D|"8Ь'@oFڧ7l~rtKŃ'30[ mٰw]I4\l!m@ Je鶎R_IG.'y~⭎crՐrY_q|1/U|[fʷ{!uC0к@}/7a m[??)g by5@b; .BjU)@jtҪItu L*N],>tQdNzouu)vM˶џ ކk ]XZZ ]WZZZZZZZZZZZZZZZ,|1|7>;JŎS @h#mZ@!>~zë(=Щ;%dX1;Hz~2gĮwVw6-F=e'ڠ6UŀSy(2y7b;Sy@χ/za@'tJG:՘F :SqLԞA̹N$~s8:r:[;h/P$IR^X/| [ @G l]zwz@/Z9$*oA{P,M㹑FԞOot@3eS횈L{W,fN&jϺ#]$&6MAymDn(@E㹊^|{$Nԛ{NʌوWW7εŐ"Ӧ)8Թ6tnJ/TSt,?v7ȶRI@s:+!(IJOV<(țy|GN jW@"Q/lUf"`2X;yI>U[ -"ȠUh$IjXpZs]^{vTr|Ӌ&l PC5ucDt,28nI^Ξж4;G4#z:jloѽ/]TFހ6^zX'I_@ ʨq],&eWSO$< ()]KϦ۱jTm ZbmQDl:=њLcUY_%` Sp:) JH[*Q :{xg742aCVՔ=t)A`ѽx"M#UՀ*h\'~_@[y2ma!Cͳر}iQaPE}21߉Yca~ qy@^#s) s#ϒ^V5"T\t"aFr^5Ra~hPZ&;=4o>h3eoH٣+;˶GϡًT $Ps&UZ-}e4 €:Yl^9*T XD"]WPz߽2G7 :7M7ޡ uG#4MjڋK[ۏ/5lw$5z! G$6}4t, h` c-*Jзy?!)]K3 MHkQ<(-4"S,MhMãm@M`H2E Z^nPz pzKV~3Yj R#IzG`o8r$igU< l.hiAUi@À'eDLl;jU-ДTU|Jo}sZx@{G[ߜV4з[ߜV4ЮQ8Dn}9ULMPr:ΓJ#Qo~LT%yPbK&hl[\^C eh(>a!Q2&w4Epٖm ^ڭ!3po:TK>6ܢN3t3ny@RhXYrqxZLԧs@=ra:-zHyPXNb9=r.;X6ӕ;@)Fo Pi53W#[zP2=" Egl;h'1f.w;hx@)[J|l@a o:ZmPZ-^<]>2@GҀV2G @)@O+JjT YcՋRCR"PJQ9:ѷl3"n8MT="Zt@ (^f4>3:H[ xyqz}U[ T Ӵ7?ocRKЮɛ6oX/WPg\fLh:u$h"6(CT8 H֪=u.*w;1YM`}XI i IDAT|.]lOd0r:J:/RP6/];!kD~[thnq[HZN@O呀?OUIR7tlM|3>gZ#4xo@#=g~N\M{zZfj.(i<"7mcl+tjԿ{XP$c{}3z =/N (3STypw]@򔜩 DqP9{5}ꙥ &TOX"u/X۠튵AwSH POwO_~P*u5-"PYA7_-o;w? r;f+IPNM:^Ժv"X4-@o:@C3b9L7WIeyP֑=Cre @3%6mG8cc?+Y;c*8cfT^&M9ެ0hd4G %(D@#9JihltyLYQ5Q5Q5"lPUE @n wʔf`wԹB l8h=ݍAab6S^'t,Ip͔y} 8ARl [.o%aUq$⤀~ڀf6?Հ>n1Qc|<. *Ӑc7ˉ0hd4QӪN1pu@6Y5r='nMZ 贳nh7U:he8-A'/AOLO\1Z(yhv+ ]XZ*<DB5 BU BU BU BU BU BU BU BU BU BU BU BU BU BU BU@kvl\0^g3$꺡]\XmWN/_Er{ʘ]~]XLX[0ե\kgkڔXi 񜾰kʅ8u^Gk@cje*5m @ @)kf;?Rִ)v$iS,HJYӦ'㗓hwލ훦BOT1YPMLT1Yvn{PZ'S%(kyA/*_۝POB/&SU<-8aȡ?c {(YE}r @іtl+G(d  @}ilа(F?C49?D)fgG4N'F\φ SOOyɯps@el"bx2S'x6kNJ-ƛ #~tڨolJ7׋1*9ԉ=ـ̓Ru'>L_B6W+%|k"")Xop_u@P耚H&rt-uϿuwef`^L3k> I.|йiO$ZS-DOސa h7 ŜՀM ;ivܚeG@5g1H< EֲSF'45Q?dЎǽ2 @W(=zjYvn D}}s0OL3pNRFUy@[euyRG4i*(Ku17GPFTP b.'WQ .%h-lIf_m&f Mlt!ِ86z[fzJ (_Fm+dF#oh~yq:o$itbtZ5[2l9kW2&G{1''.o ({-Qǣ=`d`R}K:5+5ߣ;%[wS9r2]S_xqʳXQJPyċ@[&Im _Z(WoG%bWƽ#^v9P3ejI#nvDe$Jw!Is~92Z'~̀.Pßb!VNU6cel,j#GAPegUkB):Iz~[FXT>ދژɵmEY:^mX!rV}0buyШi=\kMLS_MT4m,~':-Z/*jHqTMB{We)*7 Ӎq!qkT&yyo~LbzbϸNo mP_i'UP.uQO~-& M"P;4c #;)rf*_r)ЏJT[:O2/;RLTN=~~!CM*p !w9@+дbyךq'v[j 14 hrҼM2/͓w웴4ӕ@K$Px΍Tod;<6d>;Nhc1М#+NmYs@oW]$FHϮ{*_G\QP@tuz[(ZV- @ m׽(%\S\Fi3@7r̀7Q־Pxj(taPjPja2E BU BU BU BU BU BU BU BU BU BU+&OӐ3>5o-eKuJφkqiw|\*gَd䳩KNېߖ#Oo aKuJ^7ef,O5k|lپ|6)PEs+nnϴvgiՠҶ[48 Iq_V(7p/l fnڔ< BDI[ڭVz?5'O8\pjwqs^ ql/ ;H9e kQ7DQ?ƹߨ.#=֐חp}b̕@?P10O43MuTU9sGY;A:Ҷ翻;'8tIq80\jTg;w߹l:@N>,Ŕumˬ٦kMvSaDTUU-['hVgn-mkWˍ qxmӌM֨Q""m>ڙ괦v'᣾h?:y!ݺ.l?k_8Mc4uz$ɒg7;H9etHx]ʓ4|&n]Zx~;͵/uRNDQ~\h³X,iF嶩M+S!Z0wQnz:k8lӌDkns"QJ^پ|6i@ u?­$R(Iqx.2ԩ4+=מta}sjھ|6u E@@@@@@@uڋV"Z|ɫ jꦹ=#j"]U5 ?h%L).7z5@5*yW &7GZjz *'j㥤lu>ӫ@kH;U@UO-зO/|& @k;yEPUSi$q*38-o|۠ͫpg^Ћ?@TW/O/#? 55IgYPjPjPjPjPjPjPjPjPjPjPjPj 1IENDB`vcdExtra/vignettes/fig/hauser-model-plot.png0000644000176200001440000012235514422306403020664 0ustar liggesusersPNG  IHDR{ pHYs+tIME *m  IDATxw|Wr 0ق`:QkE*bZW[+U,VnIEVu"AHIHB0M a݄Gsw9 H$ItI$IRd $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$I:V.YzW |CT$I.@$I ϦᬳC]E $I$)$ $I0J$IҁPASM~0yE%,͏\ͪ.qzk]ǒu^1=ȸsK7W|%do7 `E/Y'bSvEᘻ)L <}ų+? {)X[(xꎼ{{rT$I|+0&M7PJYl~swSN_htob)4zZ+x0,%giͷnfzyF;Ѱe64})5J$Iҡ6oLܶ zC:lݾڷo35 @Q䴲WI~6"+x)lzұ>CDlKፉt4;ʶD=^0c ') ț &wr^Tv(|~*0Ij $IC)My9C4l|e!c%kL9/}2 `Io4g'D 9-]-))3+8t!$I, w!;9!hDFb oBm/Q$ 뵖c$mVuw _mw;9n\@TUƦFI$I:751E!$I$T$IRI$IRHH%I$I!/$I]Et'qT$IRI$IRHdW$Ijٳg3oY`͚I.ҥ 111!n $IT,Y[o3z!,EH9~~v)\+I$I5O嗧gά?,`]z.hWW5RI$I!,Y.#6 {1 @*I$I5'%&)naI;إ$I$V-?|BAI795յ? $ITLz*UɣP L9` $ITlquY[b $I!o R$I$qutsH%I$D!tVx=T$Ij2~UkjV]zplb $IȔzԩ̖аxT$Ij iظo `cǿH֭Mq@*I$I5HӦxр 9WaeU .B'y X2$I$bׯo!7'gc{iӦ7DZ$I$IPׯ_g.ٕ$I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$DD .,=9 E)v1iY6m߉N-$I$i?2g<89o%Pn2KءQ$IC;0LijgjOj4N6\q\1}!w6|$ITc` ys`#Wtn7>Y)Εcxh&>t)!&Mkނ+I$IfZ(Lb^q"=ʄQ6 0+`*I$IZHY$ּU؄VM0 $I/jq 6!fef6UZ5k @\t-%I$i?4@랽Hf}6WnXp_~ s̭J$I]v$_xK]U\@iDrr(L.gbcW^i"I$Iw-IO-?b}%Í=SEˮ$IP,Ri"rVg_,^0gmo 'kIjtѓ[\H%I$TRI$I!9$I:l,=9 E)v1iY6m߉N-$I$i?2g<89o%Pn2=v%I$i@\ôf33)Aѫ=8)fG,&sZ2Ç{p=~ćH$IRUɛɼzs nϢULp3sF33H 4M$I$MY.K?ļDz E M3l86oOaVV.T$IK8.# H$y"02, DAak7H%I$i_@AlBln9#k@ ZJ$I~i*={>_m(ܰ&2 tEU[$I$mSH=!7>][$)NQY]6@7 "=E$IE-[*[FJ 4?￙{TDo]I$IYRE䬜ϿsY`Hg"@j7NlגԎޣ'=:"5J$I a$y}k $Ito +NUf_EO$IP1 gt,,XBzrodŘF:fж}':H$Id*Z˜瘖&o)Csd,ؕ$I}Sip !̤8F߫qF6jH㤘a(5kX N+=r jw I$I5V-nj$o _m(ܰ&2 tEU[$I$mSH=!7>][$)NQY]6@7 "=E$IE-[*[FJ 4?￙{TDo]I$IYRE䬜ϿsY`Hg"@j7NlגԎޣ'=:"5J$I  $IBsH%I$I5RuYM >|o*g/eM^8RӡGoN93)hɮ$I$Dxn>K^/I;_ˎ1]Ă #/`q7q~DC]J$I "ΐD>M~Et;1v4I~9zȟ/΅ W-bB]$I$Hm]cAq*g>4Z7ych? FOҠ:LJ$IR F,೹9r&NmV U2%I$3&ba˟ KlB&QPڍRI$I݄7m)ز =哻 mW$IiVM~)sbSx.L/o7H$ogZvF!Y$Ij>QII>6EƼDzdֱ$I$ZHi}38_%K1 4E3֕2o.l#bӁx ŃQI$I78a$6mMt©gӏe!uY EOӴ~} Fb$I‚`{TS5oC]$IZfM$I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$I!a $I$T$IRI$IRHH%I$$Ey=td&2cV=Vuȏ${ב{穥T>^w@֡c $IT3-}3A UT$IR O)&%*x-dl ua$I$Msω<*;iE+SH%I$<˾$oQ>Dw#u{S֗8UT$IR w./zZ\"E_TYRI$I5Y0H!o)'wZ 0翴Q a $ITPFi|>ŏD9\U]oF$Z(d釬z2t,gg]LGW0yE!-ZڋpUmHJs!'W8U{ΐJ$UK_`Fe%)Q@>H֨黎ԜHͩZ 'Ф8{EYEx/ɠ|)v#ZN[sh7@m*~|5y2(<HՆI$N3?-CǮ'qKm"~Hr2sSM0h(ΟKM$4>b7|h7ZldlYt'=f\@ިXr.IbZy?:L$))XDFJr)1H"O'èj]M &wuyq.$?ț8ro2͊ܨ:3Ij$숺{Kx#d5yoiDF+߱v٣D^܇ʤIS֗IZM`Ikނ+ İɸi'K)Ql3 !y@}3JNC K&r59 ]=VBc^o"y.[ %to,꽿sϭ͜–ir%߰~w@+bF^v9~G@T#P-t{;y- iul><&7W$I$oj֎Dz!Kibۃ(s7cپr9/Sh2. '{t.?CN'fۗ=q럚H&GIeje O; ǑW~N&|PLi]w[ew8tbZI޲MEqWU7ΐ!$ϐЖ{Ym]Jx6tSJf ^6w)gU 8w'| ]M֨w6 yn/kq5꣟QgW)!{Móvnʲc4ls ًE3Y%e"5~Β79IT3 FYmJĥQJ(&9ãI}J\q'|JP͈=d6O-w"q\֬}{oy:iGl׹}u?4c[>L'ׯn/L^mJxr=E*@*Ij% {ڏܳ=M΢|;qlGXyϱ6dѽIpڙĦ55HM ՅIsiɬ҃{Ç{H%IR Ne.$릛7;@RroL 2ݛm[2=w6]e umyig8m֏KᨡdmߞLT\8iG"O7lڴ vq\Ϟn:@2J-<O5#w)G`hIKUOgöDM!I]2 Ed+ )s-Ycʹ-dLL դL|}7\'t9gj0(;qҴiU@*Ij6$9:}?y9/,Lks6Xrs|O/}fWحOkBm/uL{1v\ky# evģ$(4é3g2f/, ~-Xbyː$ISѝ IDAT ?޿\ES/!?թ"E$IZ>;n(*Fl1G W@*I$)y 4>~`$yyy$"RI$I!`W$]m }^$IoɮrgҖG_̛wѡc $IZaaDZ-$I:NVyn:ףC@*I$)N:,Vyn ֭FI:D $IB۱hָnfIÁ\1* RI$I!U%QKZvi: $IB_?RBijFs$IUV1HLLknt֍뇸:srNbĝed7.5& $I޻K_JdJb{t~)Νfɒ%цqGˠLX0r?+U/i[tP!Iaaժ\saUui<=ACZk{H%I1Xf^,w{EitX2J$yyotr*杗3c]$ $Ie?;z> ywVItX3J$g2}v:]$=AIwRI$`ݺ҅U}KZc_3Y:{s,!=}97?F:fж}':H$鰳UEk3YҷTx?Œ(z}JL$IRլY `F++G$Wip !̤8F߫qF6jH㤘a(5kX N+=r jw Ixͩ>ҵ}M,I:n?Lɛɼzs nϢULp3sF33H 4F+IJ}$_;©^8 ?x6[$:,R"rV.fo7[.^_hC颖@j7NlגԎԡ#thOj5Lb^q"=ʄQ6 /=Y'Ah$pqxi r1J>k Ȭ´+SQ M({]}/<$>E|ed5o@dF%6U(`"0Jtظٰ!0du\i}t88Ig{O3iEY@*m[ѼQ4=m9p3k2ֲvb}.``ޱǯJtɥ&YMod$mg=I'111cӜu}zOIn҈{S At('a{C9+3/z\/3ҷ7pA3{GV$WRI$Ipj"$IP8.HQzr?xU$ITST1E1_{| I}  6OK^爡ø_+Ϛܽ gt,,XBzrodŘF:fж}':H{G%I$&B* cʿǙ;V1|Isſ[РҢ̙,~i[*mbI?o>GVk$I$fd -&{S\5_+nBetQov֠W?ͨSRoh a}W3䙙{ ՞F iSӥl2׬%c7|8aWL_G0M|(߁$I$X yu7㔿?ŃV]n oIV #9 L{ e> iw$Wyٙq:4n֚Ǟ/>+ǼC &nI$I*Nc:1{ `yeS,?J\~2yʼn>* hJaù{ v$ItXD -f,'nvYeR[t>ʵ[AϺ, Ȱ&jh $I}Q@xk!DUD gi[ q@6+3aY EWȒ$ITU"M[/Xr~:VYkX@}) ًd6?cBF1ܸ*s$Im*HZu@!߽:Y{ n|;s 3FBIS);w2,Y9(-\r۬t -ҎF$I/*5}֬Jqc硎<~]ޝ,~39~tƁ-g?b[-ћJ 4?￙{&W#m$I$zڰO d/sg 'lyPȆi;cVCqI'bn-,\_0׳x,>ζ@j7NlגԎޣ'=:"ٽ$I_‚`%dg}^wr_NFbY*y}k $It@z gt,,XBzrodŘF:fж}':HZCRI$IY:(Z˜瘖&o)CsdM$I$iH'+<{X4#w5CIq W ^ImԐI1;fQ6k֒>0W{+/#&>@$Ijݖ}ٓnr6Hޜ'xH;Ǎ`pĽxb߇s9$3AJKv%I$fZ|vI.K?ļDz E M3l86oOaVV.T$IK]HwӾyhSxIkހ* KlB&QڍEРZNK$IR[ #"׊m&YM7(ȚH .O2K$I~i*={>_m(ܰ&2 tEU[$I$mSŵE^7>YEŝѱZh {09Cnx.9w W]s}IR;>23Yl3'n\yEV’$ITeb b*9Xͻ |Ǥco) WJ$I "SL:nci8ڇDRmSRa8座8LK= f;ڍ۵$cgIέHv$I$Jxť@z.4_ɫW^12 ?,7GCn;_#VF߬']Љ.u-$IT;Tb/H޲|Q agҭ4NXD;?z4Ssn>@$IT#U"9s-9hOڴ%|dÎiu\yl2ڡj%oHg77 ~C>{,p%_:5 J$IRSQ4Ӏu5!|(olI:P'ʪt (ZŴGqz>uEӸWYPciT$IE%i8-6h`tvIRFPE?Ђ7!C:i|(î5d7p<[*I$I' M[Њf}-wNgɩOw[vPD*Af]̤WxUnWJ%I$逫Ԓ=8S,1__Rz!kSy-Io0)TrY,'VeذTN~u:׽݃oc✐V+I$I!<#LE8g'8r ~镌j^8Y>2;)uvHKF7ǘYTH$IE%EРߟD&I}Wt%PȄ#NϠy&Z&[޲%aoR,.Ŷa1]7 毆]O ۗV?<콙eR+6,nc˸Y O )[$IjRˆoFci3"'1-TG6At:4}&oN:&q82$g 1 c%I$T_L]|qEKy몁$&Kjְ~+hXɤ;\cso^ߑXD(Fr?}۪5-R+;,#Gsw]Lפ!S$I?a`pGlәxL~_nzClp 3yx޸XbR(g ˗Ԯ%]ּKW,i $Ij"!-Y:fBA<wsY? at~, \U_aD7uV!$ITSU"i"I~/Rvh?M`B*8 J$I/A rs)$AJ=m,G4IͫH%I$IP@Nl$bUZº7IJ2$IZR4U ؉|ao3A }ȋ)itoK|%I$IQ5å 9nXBy ٴ7|skI$IOU0[!ʱNĎmhVPBnw,r LF'cG]Dx<Px dr!y?;ގ!aIٶ!ʵlI$IRMW@xk!DѠ^N6Э+L^I$IRP@FT\1[B {t層M7_4aR/Z$ITU"Hl~4m07(fU@[Nt'* c Dz~9bz<>O"I$I:b6.xy/|)%%I$I^X0 2My%-W<͛{WzI+i7 ^z🼺 fCgv\mּKW,u$Ij"[caz1qIGwм}/ܕck:5qtޚa?\:z$I$|Z[;ro:4nFDJ_n Zۮ$ſ Yk$I$pV@Z̺3QO>]haӋSa˔y|_(H=61w:$I^%i⭅ *r+|JS$IrT"HJiF]63Y׀$o| ǵ e)/Ed XL;}%I$IX%raQ00%@ợoɩ0)ʘ̨۞a9 n4.[_J1Gq|FRI$I*7X."^Lc gv9W<Z0!\es>^bb"uI[9z)^ Z]NNmI$IʝC ̒r f;ŒzM㶁tky\3 3 * zpIKr̛e^K422dŮͼzEKViڪdF 0c@p| D4r^{5LcDDL^  !"wX[p{? Q&MκWp6 HEDDsMDDrwk)Uy8y^ݯT`O֟_Ѫ?ErnӒÝFr vG\ύrPEaDrV~HN7şYJ3_v\."k}"b:DD:r!sĻWVSǻ0fPsM`9!N.[z|U,5jy״'a:F(Tr=|j X&ojA*%""s˄>x'/s~E'63DM] >grZSIܶt=Iwuuдy+Z5."R yU,Ӏw|ϴ?gçniz咒ia(|?!]:`4o"cc%\7[|\ À ,)ܯ:Eg/¨) zup,{J-""՝[EI /2qzԍN]hĞ}]c"k{ U֑=+ؐt֜~Gԍc2i8]."XFtpm .e{("t+61 6D`~y HVl+a^ |$x&fm3k`]^%{Lg!GHd,Oƶfy]V~2}2Ey,>9%/>˿Mf<0FuaĶhLu %vO{6ii9}헵,o=j |?*ǵ8ь|s yz?>U6vڳH=B_zb}lʹ%0*@H%0a~b ݊Ve˾;Ɓ7B |wZWx0&|R/hp3OX|hvM~Ԋj\Jn$a7Fr57-d'yoz+ٍ18CTIsR/˯-ռ)!yp)a9iٮ3ߟMr"LKC4[d^'cG8h# ۾ l+tpZ`aZ`myEhGFrvd0NNM`&X7i\sX2;7/?"ࡶ73rN6}=~,AhszIܵXT|+D߮淼`:$RèztOF߰uҍDA@C'SL`9R & !7$,+y2ggoP)-kYJc1?eA#|<ޱj""rVeLRoE6>&=_0<7,랩9aUci@01 jQ\躾LJHEBYkq,@jIh_|.(|IJ㥍Av<birhQU$%E12m,90 s#XגPN|繊?›.+A8K[kF|8R3aP0 v,"b ;kcBFƹ.YkvR#qdDv%pX KZSBoEn@Ǔl_3MƂ@8>GU.M}R8sj9 J;AkO%w{&ORe9mOg#SKEcŷ?fWLSjEDZޓpgbU-ayѱ|ED%1e3~E$"}l'yz%&{ASSJ%W CFU͝OVOקl>{JZac*݁cW,X?ƺ&sGpPNǖ<]HHU"Um6m ;?@$5ZG3SDDǽaٰ7_xGvQg%6Frףәl\":FV4/"ο-!ި|nz1'1_$i6-9ܦGG3?s+ǐ!ث3[oĶ9&*Xow⽉8e嫤~E?ǂI_ӿb9,~#QR+bn:irT6įIbkx -EDDe**žnH;;`ݫЙ.5 ·";u+(u%*ÿ=a'8#G|DvXFv#xLwf81 AHU?dLѥ[vcJnxyVHE8Tei ;v"|c~:^!-&al߉[n7q0v=]#ʿO FHsID(~ÝkeߟI>s=)oN;tc[y1>h$"""""r>ܦdppZQR~M>ρd򫀦V{?5٫;aspA{1Ft :""""""-qb|_Z s IญSvDžyDsø7fN6}#?o][Y9]cT[4kDTִБ Q H0{8ί0/SU;&~+ nEkELEHߴ]p%3֑OаY=$`kd\c]E|ln~g!5G7jTJ+(7gCө9Qujj. ,R^e_=v3m3 mW "Ç۶-m۶%,,í9?^NYYay6l!06qiT7:!&5t76Cdcy2NAtb>䷐6 ?/!/2A*"Ubaƴ'yorUhmZOtlP+EDD,Rr Wzq+d@8ծ,u] |w:L][ÛS#TQ19==v& A6zj|M!-@Rp^ 9֊ߗ_|7qo`-aCh׺ue6M.aۇSt}( =C￸ j]{oqu" qN}O=cYq`o49cNzERdB I%!>]@*%ڷo Nʙ0iۦ5שTDD*Q53mwK}r$-{]٤%`Oysr#})6KOOeӶ$2F9lJi\͛p0zZ6x#aX*i""R:g[1g2R;SUl k8W1w*0cİ[r(9|icђwr/""rHؓ=lݍ8w^L$H4Hzpx9Zx$>~npOߞ=S?'x44x%&==_{KpgTDDʠ3o ʼn@-bkF.\6k۷#'W~ɞ71yNa~~{c틖1[ʖ;wdؓCgyέ4p,/R)t~r o>ЁzyU""r@L^qΫEgۚP<-3d\>/Yly"nJgλy(ff䛩xa! m*R "nJOy@`?7[Ы޸rm˚+?z8| \{W:I`Hd.H>yO>D&@F3DF7%d6p0X6@ޣAvbBF\|2&\J~G2]ԗ.l)""""""UxxxDE6{>^2}; -V>"""""""(=ww~d^[N&HuHi@=+u(nF7EDDDDDD5P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P P t 5m[پkIIؓ`1WRq6oE\ *{ W,o!d9ȏ0ep^hezRQ6<7on!CҧSs&"\6YM!y/|t1˾x{6fڒg$@DDDDDt:ng8lmg[MLY ̓i?7&p߂͊nҘ Iw0<} 6+^CIUHDDDDDZEDDDDD|T*.Y=g%cD*UeWDDDDDUqNfH)l}sЂLu5ԉew|?:>h9a#F D"xL{!p^%L7B9Uq) ӚS_㷼`: Nh$""Reco\2ލ7zy{*HRw䏃طo;7g1.>4 y; YxGS?*ro!X>p1C܈"ՋmF{`6!N.[z|U,5jW_""n;ziU1:bʉC~@\qQˎ;t5ʆx\̿U{ӀEDNށ$gԢL4o>vH얄O܍VD.y+4 eD'iߦNl%ODD@KBH0a~za!o}qE- !rpH+wP%~ Ƽm_"l-_Hr%""HED.%!Na߲SsMK<ʼng!tpΤs4Z}t/䕋.S?l.qWtkjfK~uzƸ'?xwy6k-eH y:  "F"RU%!Nrz~ g˄oj!X\kI'.1r^x8-,ȐmD.}spc[],Y 3/"RZ` e&tYwc>G[q,EƖmeOTD9O/ \Wʒ9Z `0/\4}H9Y˓]כgjs=a^l:r nH ܿKϪO9YBd9p啮c*e7goc]4dWD@\¹;`L]mE4dWy1Dl=X-3vn[E+zpkE`䬛Y^( eǾrG'u;k8b6`hݗK\;^0?1sna9{zy,`s9f,^#"2!S?~kTXFt%yC͋J/o;1` ]XDҤRO:VNr[=0('riX==Qt:b"$R 80X7`yj<٤#sn)30zA=H.3ɸu$i+pc(8F]0}IG"R=TD@y;-8ɘKBw+T GqfQE.r6ZNػZ0׀=*r""k27fdUtOIw@cߝZ l',]*h!}EzzTɣ jBWs m>քaLjf"sw`YTE8&^*'6'||w\撕9"RTDKaIL_he~OƄw\A(D@3m߂؁f ̊mHeJ;;ƁE` K\̑[gDSVM3ޑ5_ s}1DZX[_?h;O'{,'Ӥ~""UnpT%!BB]w%ދjO%w{YYc_0 &ٱQ?CwtsqGZ\$vkJ1a1YʍQxW9{ƛ;FQ#`%i.T# ""UvIpBY㪶[;8U,ż8#빓'pX0@TDE3/`nd{i?!'PX,/r[b +`ww]4rg!h!LJck:=,a&NLO溴r\HEd)ӡ4Ťŏvmς6lĴX\?e+82 ^C9` &84j@D$ U)[Vqb`io+dm9"kLgLCI$g+tjYxlſ\Cn3Vs~<r6-kdb[1ׁ䩋\ρ#qݙ疧8>x} ׵po) &m_oPʵEȿ=a!h@S`7YɸslHrVג ޥn\-!ސ|nU* M g޻=G< ķXEܳGun;Ll;5DhP|#^~à1w H0` qUҶrcݠ֙&;??`9 >DZq,m,2U|TrRD*)G9:d//Ӱq>#n'e?%-v2*oZ둤"K,qk6}YBNŲZ(I{ u왯gGd?8]8U g7YcICמ g\{=M"e߄$DQNKZ|0A8-߾5G>^O]0s¿ e@SgvƯGVÚY]'Qg  h֞%ԙ/% *i"k]?b 8~2anwGrbEÜpֳS \sܕIg$OprFz Id=;:Oȸxc)8vnl$Id֤xXV%hQҵPxS|',$x8Gڝϒk]KIȗ s-ȝ9X&Ov}]8\a}q.+K=o/c^!׺ߌԮprwl KBUD?%!"s%݀!:#}¾c8w#%w%.e N) Bڛnڷ2ExR~%md>}%gŭ㛟yqEK]7ϸqG,|Bֆ-6'{hߛ&T+!jra '\ p9%˺4 = hNX7`z<ùx,mw}Ym-iڼw]:uKdM܉aNv ı?Oֽ`يx/8e/"R^x7->Rǯa,"!例p&*v2'bmM2-GsHcxRhI#w^ؑ8^pl"gqwڍH{wS9yɫp}!|*v\CZ8~ 7%`Hw7~x~ĺ J%-yAs 0uǿk}'"/6$q?47KV*CNj)Xm8)WY!B"E2 Hv|\E>q`0&%.+#RhM$=A!}tl?"{Gd;GS|_EXlpɗhM7ܶE Q>oxa"wc93Ol0h&ƔwȥO{US;WF%%5`lvS|ʯ9ynG1&ёgn"!kZ7`7E&'ת/ q6߻30#|B/} E^GȜ%g^"I)ż8u8s5B•DN?HDzW*%GF`}0E.Fc89bvc5I8>#:.1%|A,b۳Ƙ(ps)/"UCjrlpnb5q[QDyӘ>]>1ʻF.ϻHg;Uגd.jK d *rE ;qs_EDDDА*˹OWK`ۺ+vc9ԹܖɯXt-J*"""""r1_|4r=L;A^i땝^\ Fҗ1w8KΑgMݳuK*"""""rH"K*K w o4WaN)pC,~rmg?l׵pu7w,S̨juſk89_+.}"=ֵ;Tuu- xX]ǁq_VEDDǜz[~*!"RHC}n:ir$d}^_Amւ˾]|Llk_؄9k5q=V J}UC(j-AW]>3ܥ]c`wO&`76yaw-n! -޳ ʤQ%(j/Ent ϼEnç[ g0*`0`&\Gtn+ţI @xHb4,!gr$at]FEDDDD(nH!,^"{J`n\TԨ+2D'߁y""""""Nګ yA _",ۻ(/J~#vfj'I˭M[;y*u*a[f[ZY&nJ}5J<,+)7f\<NQ**=Ge퐚Ů*ۮM;n-]ua.AԢU2vDY欑xi_M7KoxQrrrҋW&J$%IMW˖- >pF9k:$ ߪm+{ hIޝSsY"=P T1F#jClש?d7i%< [-z"GܧOmpیhmlޚ?1ePƓ:Šj"I2I-j-)wry]X[8W1~^u޳_7.ODl֖J bTR;g:HP17:v#|d닓vG:L?eNwB=_gNRϧ3_I_lڨb#4hHHmUqwB=rpGI'|5Iҭe?s[qZ~j͐ i4=OMI _|UsHF'WP _G&|xv/ԛMPIZwM6RQ.`gϢ{ЍRR2Y"6&7\JC))$I!!w TmdfWjS>Rꑒ*ͯ%We쮱*K[Ͷ+.Q[/+$)W2շw %RNPv ~&7ZѬCH> 156-^nod۵B#ew9v@IRZ\^aL]}wYRaf3o-",hHHrZxY] %Uȫբ(c(YRpЂf.M.JLV]̐T?p IDATz6 2q]kʹ;~@5y{5U* F9}$ޒ^\m RhZSWt*˧sT{O)DnOU`ש򞏫>o[tɩeѫgt_VM?%έ1GܬLfjhX{P]$cjbRpE!xySW_}CىZ_-7JF%6t tG $iGK-|.BF9 -OrgAN:Fܕ/"I^M5Yw& (_6?U~X2.= :]Ye_EM%)]Nd_ֹ2Oo4ғ-ܴZ7SS_Ҽ #]T]+ (YG/u*9[X[kN`*Lm>I.X;sϭ?ɲd+8Yfi}]K~AߤbB jYKwX5+y0ΑjN.VN7^re[p.IvѓyJ`+>wڙ)9*xkVk+\iKL}P qj6.]1jy 𖔻]9{Δ,ú<>;oF-]E ZhK/C>K {mhv+uQPKkVd+Iٗ 39H?g$ɦIcT]uX  V-F:0X([TNb`m;zvL7J85Z{UXjB y-DRk&jD.ӌٳeK-\)Ԭ>.˯Q`oZ>%]RGԃdԃxmh|HHð{s&(O%?da\ݎ3uk&k*h2Nq0|PFeԩY+ p )x Btϸ'&ꣵ '=zf4%'mTΝ8o6E>K: K&ԀJоyt8%]5Rmx{3~QT\ZԻwV%|4}:(88X~~~Lklrߕ%Unyp5%0ս \H.&5jhrrr܄ϦTͺ󔩚X3Hj_l14R-[?濥9^UGfjw.~dG i#@ )hϞ=矋7Lj$=؂\Xn͛...]5EC ݻW%IW\qzVhhhZ \Zl1~ )\vH3NԮK~TNW*n˺f@7=fΜ9q4&!Evܦ!A,=_tdÚOa4lF.ǐjeUI-]n0~*Rԩe^RKduifN&Oӎ01~]vQZ)ϵi)bϞ: @@B }cowQ&ZkuFf9_:C!!E%TIMj7 )*ʷS Thkk7 )*v$w+U=k= )*wc{y5W@l@BJMw]ٱ2$GW_4|$e_VUW nѲ'8  )Թsf=PDy0mؼI7_hȼv P3A:|h|WffRRRmmwܹs|?Q\i-ƂKÍ@#@]!HH !`R!HH !`R!HH !vnۍ SP@;jth$Om)6ju !) AB 0 )$C AB 0 )$C AB 0 )$C eFB 0 )$C AB 0 )$C AB 0 )$C AB 0 )$C AB 0 )$p-#EkƪW@0T˶*b3:44D4m]ǯ-Ox܇ښetdh0l"<-2mEWT֥(JpN%:߿4dSPY WeJsnN$k.Ku}:5cǽgE6YR5np=;?I֢2feUj,E>ZRu.#!k>E+Yzz&9vTա/>PLd/"EKDqV)o] Jqp;jü?)غQ#*a%-Q3GWll7dH!#fֵ%5,Y7#j:^PlRTm_ 1˚7eu]겔ƌZ\e׉-W)mMMx= v,R WikkլSۺ˧:IJ}k G oH5eSx+Ug4%tp44Lx,%oTų|JI*K)2̿Y1YKu'-u-Դ)XV=}3eIhwLF%ۉO4E(JpNk~q5eY&=2޳HHQ5+;|^[Wz:D̲ Y҂ѣ/= Zgեg%b׉_誋ZafI2+x-1W#&)vB좁} m[vOB[igTlx2^Ze)\/;cل)ٚ7>t(Ap&ɒ&QZ0{; -2Y!Fݳ1 CfڤEGLnFFp].ޣY*M UC͒uj+YϕU%fDE_h}q4tS\ٴZՈ]},헤4@ ]4K+xRTehaYFȌpW"{\HFtl'3[PG7f;<}Q5mXɺI$2t  X3 USX@uJ.[d~T<9\cMY)[²037Z>qE&/֒#je\j}T| e;yzJRQ^;ZjNy*GzD2 1߰ mXUFQaJi_wn(T$~wiT\iR,i鋔0=6,*>S0r^+e)ܓJ];lǵvJb4VeuˊYS/Jyw^zviHjjthL ,ҘQ5$/Y. WUԼ}Z3Z+cJA1/**I{zjM,o6 ,{ 5RP}؁,\K<$Jgzz<_s|^6/Utd!#,Qf/pI>ZŌTpv#5oM#50ZLYPRmynۍp7a!HH !`R!HH !`R!HH !`R!HH !`R!HH !`R!HH !`R!HH !`R!HH !`R!HH !|e$WP@`qvk,FaISrXrRR,}֩Sk=W~ TP@Ohjjt&CdIiz{cP]T-9L{N),r^"rI Qrs@u  R@ui%M[7{3(9e=~ZN@vdZ%Rꅁ\^eA'dV dmo @鮁&iQbE5~':a3:0T(IP w3ǭ+$2x4V=8KnxW9ktHmkkC+]]R@1'!M")Eo٣r;J(1icb)aՖuo+zu]3ikZpO~ JTFzzE+5em}V\ϔf~S-#E]Nu)(utͰ:+IghDN4\ HA-ֳĄF ~3.N.q %M[9M5T˶:Ձ,m]V~1:䧰2ǿ#c&:j $YK+i>YOC[QMphUj,E'(2ʓ)/k_e*lIl.{^[_՘QZW(v )$cr8G'S֤@z:ǭy$)K˦遘%\Q {9?;&LVAoߧ1v{>;P'/'/:it]ݺD%=qDv=~r :>`u=n׹b\{ߢHQ!]' ՠ!d?t;\WqHH.^Bj?`5ޭHC9Ea]g)x~nG}o/Ine]yBZK{bvY[nwۖ IԹ/qu$G/$'e|nA"s=bw%HH=:K_ϕ)H鄴&9! hOL+U;{ l)/αZ::1z3 ;Ȧd̑豰oO,YӔ̂YK2yD#B˖5uISTO !2k[M:;9_Yw;K}]y۔z}*FD(g[|ה$Y?aȞwmwt5߯)ckB}XeY{?[t]\ݷ1S@u)Sak|-c9Z T'fp+?k]رeyJx&V]7uJa/Z۲z\V/m?UӶu9VL#=aFO)Hzz yv0*GNg;3@*i@IJ.ђ]<:Gki7HnhIk|mEK:@?qT)d %p긾=#ږ5&e%Im6iśIZ8ynﭠ!3_r%r[9IR|UAϓz?g |Ah!ԡ?Ӡy͈+\?:%Yu:+Wcc T#Z)\a8BmJ5s~%W(fAZD[K:d*걌9k -cA'dV[S NR~=bk_ ntJYZk[Mʖ䯰{"tߤ^mX2Y̒olUFU-ZIR:)jV;A˳ Mպ*Y/:L |AHHu&˞M`cш]PIDAT:uJjMcq#e0]=KᓲSHj]jTθEA5c|-UҮ:QPO\:ZdEN-ue{O@eupsf;OJ |1HHu–EH2+x =-Z}GWPuoYjAKW{hdMo-whŪՌ&դ,ɺf(ov](I2GAU1&ݪfIڭw(vb^!ɬTGGF]ԭ"zU9׺*|$ZeH5q7(ʱc8Ŏ-cF0IJJ+j:D͛ؤ )d^*.S4Ӵ2R]MeMw 1-Rb"KذP'ޠ'!J]>^c^xi),}ƅO&-F3:H2u\9e|ѳ*0݀7K&U9|ޞ]˱&gP}JwhcUMbpL(K17x̭?bnY/ZW(f Ŕ)A1/* 5WYH8O׈ew ZŠ |ռy*8g4}:Vې7?es4oNYg%ʆTw4we݀73/n7:\FݫgTg,j~J~?lΚ_@_~t403J5[U govdvHFH< w4~JN+^$ŖqS4j~ܚ;,.b6t{)r?N "!4P6YҶf n6u Ƥ1x4R!0K)Y=IENDB`vcdExtra/vignettes/fig/mobility-quasi-1.png0000644000176200001440000001646314470742314020443 0ustar liggesusersPNG  IHDR@e$uPLTE:f:::f:ffff::::f:::::::f:::ff:f:f:::Joff:fff:f::f:ffff:fffffffffff:::ff:fffې۶ېff::ff۶ې۶۶۶?jې:ېf۶f۶۶۶ې۶䕥fې۶hԷ" pHYsodIDATx u@n[7ʭ?lQ6r* -9պے)c\p>I$ 3`lu (AA4 AP hD LNз?ï<`.riwZ݃ aMo#(%*٧lHӻTAPLJeudd*I ^eoVu-myjOڷ_D>UeTN~#|uÎ_f?o'G?MťB}<*pJ^]q3ޔVnJ۪0%AW Tޚi7BGswy&&N !ߞV./\9/4MstekSZCRef`V 'Qͼ\Q fo72mp'፥Bκ)*} ]V.eҴUZCGَY-JhUm5+N^5`p^.>Y)X⺅]NY1-A9Z؝6hk>AϒouAuY2\?t8za,X9RQƺ"8-*NjLvh/h/_imr_o$vD66(+̥B^\,8Elv"LHVC=/UZ͉)ՍK;gsku _仞 ZTmډ0-A;IDYن?lGMvmR[5% ZN;$hvsNaVR.6ª^ke,jX/[H+ًov"LJb"?3Ry)L˖5h6"j$6l4v gln*5A[4nd'u/&OVR>xex|@, W-d0v rlnJgڤ4B"8 ԣ }ۖ&gə'?Ǫ^Nب#Sk{ u ҽGyqiP+{GjzmN >Wdb‚A;AN k (AA4 AP hD  (AA4 AP hD  (A ^σ橚gr&(eV攜E4\ -}wݞeT,(AK`\Ap$ -=ުof5Jmp:Y9~>NIϞt׸M2hgzGgAJe,'+%mEI4R(' $ hX5w:^u2L灍ۡeQ9Ï^ns3645/vL,MS$|Ó( zr K ˻%&5O,>K-^οr2 4Ajg5UMT 5\jt@ -YY$s/J]GUYBM.{nEMO5*8~N zE]L~-̯ (AA4 9AÅLmyr]L).37upb˥TL=ؐ A4yo^Gngg:AՆ޻&^?(vېAmQM7}7ϦGN's):ӆٙ{P!A .?k=l(z:}v{~zVeS/W)* h/ET\;ToHw ٳ3jC, " C4$nGIez\t^Dp ٷ3]rCjNt0?eAu{Kܼ_r^Ф!{vTnHmFzW-&lKdpU iCL=ܐ yJmCpNjTQ]Τ㫖,xz6dt܃ RA[t(⛅Tn_ 5P[AK5eCL=ސ 6r@Ёq^69 *ڙ9 (( AP hD  (fTA3nt f #ބQNfne87+$OAm@P h/6  Am@P8I<[(8u "hnU&6gS[T jwڄ+czIE&'V[T jUgtGPm]r5q7"h/rrAm&)Mp&jAm@P8I~{!AyE&&jRNu"h'\&_MM8Ly4"hΒW]ڄfS'64?urAm\"[EfچS@P h/6H("hΒV Ap<\acAp6P: k5(v]4>չ*DPmݱzE&A{q|.zuwMN8(v¹IB. h/6{ K Amé^ mjA{A^ mjfgjG c&hAA4 AP h<ߊ;eclHz5@F@q<%V7;z1.{Ap4rAWɫڈ˝!(64*0ڀ9$3_AwA`>A WɗAAp|UxEF879"hZ-"h3/KgAp~E6Uir+ sn'nE> @ie+ だ'Wܞ&m # MI_-Jl7qSЭۛMS UPz\<Aoh͜Nt|t :"ՙ/T"Mr}x.kU_?*K(XAG49T8>-PP$Y<0/:Gep"." >&Q8<0}oJu*JA۷_?XǃNJe{s)L x,֡*4Xq\Ӌ5T:.Fۯ~(|RA%rf2FX"D\/O>|%<0.*93IN"̡ I /;"̡ ž9o,͸$\ EPx h8;l^IbT<Z3y؊3I9|A>ѯg.FT<>zj9wBo! rUMrG'B9|A$7WA 3Aç1>4.zAc~6Hn`qy`4j8'Q8<0>o{ԘF 8x hzAFr+ sF}_aGA͹%Q8<0/h~̩ADtf~탣g9Kn`qy`_*(XAG͓(XAG}r+ yr+ s~!ϋwy`<Ism #  ." PyG9qFPNu:<0ϰ1mP7:IDtQ=JmF 8"h6 V@ǑAQ"@e J'M9|A7[5P0ムmIAb0ۑfgC-8;$h4aqwV߱?pC.R 3ZFQ3';. zAeበTvAeበT~[/GPax#hˁzG $GPYx#At?lLE<#͚ EPa"28VMI@TAP#9A=:ķxA먇A)9Ar&ijx#vi*mAeFrTj$GPY Ae፠oժ* _]G?yXdbx"h>}͜SAU"E&7]2qD2jSL_<fL6=|ti7"A[&GPY y0T,9A7WWԠ A l[$GPYx#hc< Azqu8 AU?)SA* OmAeᑠWo1ԄFПя3EPuO.f>?zM O=ϣW194AY .G;EPax"~'9;Ei\Y'n4#,|tyy&~sf AᏠլYT>0棝^|^y/./QG* O=5BP_>A፠#˪T]KCT^zsuvqpg&fsE@P#9A7~CMxRAበ㸋׃sH Ag>yC|׊"0|T_k)x{5xx!'5߾?^Ֆ_^%Bt x"hM x"h8ڎL _snr'ޞ>}^dR=iA^*o]<A%㉠ͿΫ۠*_ U!d|4/*>EPx%h}׈"0|4?Y^* om$Tj$GPY Aeበ/ުxCPn;,j$GPY h* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5߁~Ω{N.ޘ14  (AA4{VlM`W\T05yqϦ;PVH] (v"mzj۱^ (v"mzj۱^ (v"mzj۱^ (v"mzj۱^ (v"mzj۱^ (v"mzj۱^ (v"mzᇠz"W J.Nw#(6*EErEՠgAp|nrE :TZA:I#IN h=*ŏ Ctdz#8K.6> IN h=:2ZWԏ ,zűyE&uζx"h.A7ᅧApy&K5Az%_Ex]ɗ'!(68 h=:2Z䕗2mTPE&tdAGAAБAztdG\ZAa~dAGAAБAztdAGAAБAztdAGAAБAztdAGAAБAztdAGAAБAztdAGAAБAztdAGAAБAztdqՊuZ Ҷ?՘@P hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 2eIENDB`vcdExtra/vignettes/fig/tut05-cdplot-1.png0000644000176200001440000001020714470742306017726 0ustar liggesusersPNG  IHDRJ NlPLTE:f:::f::::f::::MMMff:ff:f::fffېffېې:fېTd pHYsodIDATx흉vENlM,n&i: PX=gz`@,}D `p( `p( `p( `p( `p( `p( `p( `p( `p( `p( `p( `p( `p( `p( `p( `p( `0oUǏ)z`(oCq:lJ`TC t>~) QiDgňg'A썂`p(qC!EǾRy;/G#%ZoVRp0`p( `p( `p( `p( `p( `p( `p( `p( `p( `p( zPJ9;XX`, [w]%uB!x,F08mx) hpY&t8e/N hp(X(8WU?VP堚C9cz× YG!68o#\a? `p( `p( `lRRpT(#sKTKtv)/c2`D_ dKhT~){7O~+{J nޛǏq+S gI`(Xz%:tC+f2kϧXiO^p5[T3 ^NIͪXG5 ~/ߥoQzm1}MR'8^6Xp?kl8 -- u K˯V)[CO(3 }mט$4T(HpT6w!~ mpʵçVzWEk\[xT#8_ˁ/t\jüwcq}6k^K QARKd*t\ː)XqxrE-pk[Ex@*~gt\0#xdZd ^#̢EZ'JyJ;~e_6h\ڲ'*ie{'{l$YgR~_SwESw ;'*7mBi$)`$~8n=: _~pM2,>tȢConHԯCm 8}6iN~]:: yӗ9-S4^-'-xEO, &vˠwX Ɯ̢u|݉w< i_8ݙ'&Y3.-*"xoi~aJ,/fuw'Ԇ6~x_`&Y3 _(6%YM$k{Vz% ϧ,UќUyV_dM:U#  blp§`.+^T;.S_ fwALe_م#o=my}]X`~}^. (y-vJӕ}ٙPwrim ]2"}ڡW.},}^|[A/(uކeZ bŷ`T6i^_~{J&l:`+:|oE&/+ w*()Xn[C4u1[`5%MPp}IlUf75v{Y9}:=%8o[87%waoڒtJy&at~gK[0izՓhzmo #XV~@cW/8_|sxrNl)U,8g7T*nm[=qWxBYz# N"Y*8fw3yKLh($ o /E//`/+(xj +mD뭹oЙ vl\D3ڽ*޿Nk106!,3V?\kEׇ넶 8`zÐE-TJ?n5 fv8Ga+FRLd\ELۈv8%!hGQoNimzwY^NiW~D;zÐO?zE0ג<S5Y^{2u!}D$.z]I[0:`WAi _?$*/LHR0#A}""x,0vY0zE,>û8o8g`qmfG.ɘ^`͎6~%&u&~z!~0 `UGWskG.zxOGኵs$Iu+S`,J@ #++p<-3^Y@L!wA,w$+\u[팎7^ 3:.AW\/6OȢp&YL,u)8ѦƾR} _lK ~ѳj)8&bRpX,&Yq_t/hEǾR/z<踈҇'݌7^ [5R7'+* E$8fc#Y?޵w)K.go*gm-(&]F}Pyr7i_ᔻIL݆*e&{/NG[2` 6KZfeC/r|NTy$-~>|F1BJ7·J$. =E=M`p( `p nZyt*spϧa$ȥv芾zx8ٙZ9[[h7cn|>x5jMxpW%PMOA?]W=;|W}x(&jtoyEc1 *绺죘K3Y7 v,ۥ{r-񿣏rϺSp=ngT%s:f~veu.+vH}f)h6O( VU[jk; =a"L}vtS'\\iwa%+E +]QQ%Rm0Z)^UMM_3)eUQ7}(;xw lEe^^՞W?'M*n0MTviŒ,q}eDgV~G7[QCKVe<}]iy__/U!3k3=ܥmO*B4sJ=_3Cg[Wm)Ve9G e'*i K=k4B4gU늃ñ<$W{cD3ds6Uk+*C:lB@F%]|oj$?2+:Y[(r"PzR 0u$?%*jtU ZB^i 4 sU±7I/NЍgmƞZrUYʉgȻ}RnTW+k%-ZW#I@g -fM8]I/챵WEk]]\W٦<{aYr'yXu^yje͔6vTk$8P5K8E>R^7l'χT~Ɓ7^ziƁNJtDc(ߋ;q9O\ 䯋zq*WUތ C&Y^u&@1(6 @Ĵ }L $c"P1m@ 1diA @LH DbB 1W,*G= >AdL}"h?}.ԉF r62HSxyl˱,@C;p&\@2e  2@3*0rvl;'hKJRVM>ԹJVg+Psxx@6A+gӿa $L=l yaP&(2Zקh@A>18hrR'M}*4 nD߼ܾE)&Æ-9i1|L8}H"[UB@@>wU3ZC'L@TYu.:6imrS(l[wS P?^bq#' T$: 6HbȓsD> B Oޝ4zyAYq"*΅;=~6_ BDC[FPtUG $4H.\~7j7 i(4=D׎dsA$V4%܌O66^@ω@Zp~׍HP *hsm>-i(4e D'"\qk-΍a ? m@C'*4z}ȷ!@S@sf~@~@H1Q[=^ /Z_m=ox@ИN qK3U 'r@ $v#x<J:>4&ds*,YǙ:-i:ԟnJM5d 7Rn,lξMS/մl@ڇ9v@uڐj)14{L<NhR%48mzj?@m_@Pgq4@h-߽w'G8Uwy,>==%_~lUsV篥+6=VU>zw[hޭ ks@'j:+E}~Fkz.xP,v-om>'pI>-B}&wgV <܈L QseLf>x]dԱJ[a 4̤cKEj;i.yp 1$=6Hi^y3p*# Ɩ |OӮ $95FZEKb; K wl}2f=xfl!ǜTjZCPZS;g'ˬJW6pN'\M+4mG~{љ"ˤ‰#@ !j,(뼧 ^|P|Vy#7ȍfl@}4JӓD w=k&,OUQF'C+< *OZyuyBUm\ː@s\gƄ{M< J}S@\Q4+ [dW#Pu@^@%^oW`wNa$z^59+ $(0βT#d&Hmo 1eNj`&he!P=nYF8(E\ zݝ&hr4aMʠHe%6 0hz.qa d3ZC\4pu5P PA$P1 >^@a@+ @u]gAu\:)\Kw!PV͟ Mm֗j%' m |H] ՉBK'/d|qz ^JLQh|h|89d 5PDɔ+n Q]!Lꟙ8YZ ™A틵@=Kh@G^ urkrz83Vd@6]'r CpVȆ+iD%P]#.@F >y2@\*΅Ȍkv#r o? T3*aMB Cl7'j\f 1 pD\.b3p6PZmj5!\Euż.Kh@\8EKt@ѧAG\.b!&ij)hPh|V. ]X&@xHMN@-!6'Sݟ:_7DچU݇@(7cF m{ai;:^X86qHS@K'^.\XL.f7w:ߍ$PL@f [[Au,Pq@S!6Amm`ȝmmFQH{қz&_g.M\ w:tzQ{Ք@0@>\M fjM@ gYVMzM#(e~25F~TMȐ ;8ic2¦6(_R6z C[t6Ψ'Pс`+lNޗhqy ;,aSA m@́Luk7LE 6潰H.pO;ϴ ?[y@sB m=)Pa.ᄋhr% 4⁄ꝩh8|A/Ty"*i<i#Ј4yF=ў%x@@@>R* RGY?{h?@/ ~Fhd@h_p޾{ZvΓA un/7:hNHKZS>kldMHU3`kGe 4e'@^l>z 4B u; 4R'zZ*PuQ<A ub>6B u"R'n&W ԉV /H i~cCzUiU@;M_vh??g@ N䱞RGv>g4>@qM)> yW@NxoHhC̀rO3Em7Ȧwqg9$̀t(r Q̀t{{F $hm vm ?4n|p@KȞǁ@IAy Q@ f D?4$+ `Av QP5* Pǁ4AT f@ֈ^f@ MwP g =LUh>̀p h f @A@xh f n|sG@73 =X!P\X #s'@ _vt;Z!Pd c] E@C c@(*@Z!Pt(o-J<1{{.G  C[R(>n,켷p@:?J7zIRpUEƍi uEV/:mQ0QdyIVS(8@1=zr( Oa>s+4UxQhDoǡDžCQƋ@3@3B tT '6P e)!R(R^w"@@^@"DSx\E @ @"D @ @ @"D @ @ @"D @ @ @"D @ @ @"D @ @ @"D @ @ @"Dx {4ݘp)cwoߏ @.v(ږ!Ph ]QюGfUSvo~<_uw?;2mܧ/5k዇gɋek`2 txsRHM ŰE 2_8iE L`S4H,:bhjhw5MunFL@@Q+ԒCX5P^.EGVw]4ӈRt`uE Mҍlm'P/ҍZWgĀڻ&AڕChSe\28 "D @ @ @"D @ @ @"D @ @ @"D PŔS\T1abIuq 0#  PEcNp|Nµ;]&vo_KӥsUgKJ'IDxtdYe6wc*vC6_Z~榥>jb7o>6//߻IUNccA"[@FM J[1@  @ @"D @ @ @"D @ @ @"O)_IENDB`vcdExtra/vignettes/fig/tut02-doubledecker-1.png0000644000176200001440000001525014470742317021073 0ustar liggesusersPNG  IHDR%PLTE:f:::f:fff::::f:::f:f::MMMff:fff:ffff:ffffff:::ff:fff::fې۶ې:ېf۶f۶ېfې۶Uu pHYsodIDATx흍yegoI'۴k|ܘlZ/ec/@ ͜9s~], 3H xZf-@Y Ьh4kZf-@Y Ьh4kZ!IjiOsSxPrG(CW}w4@dDq%mLm:~867u `UWwׇ Gb@s' ͧino\7_6?O+_W'\VŢ}~_5'*&pQԋUEU?@t@}gO} P= \~Oj@_$^Z͊<i$U_/sĴXOo},1oˢI$vcLQ Erhgj]vEyFaY]=`{rw[6.`)C*e^Z_?n77]`)CМ*{S{{nc1Es6՚^_qv ƙ9'Ҿݾ5{yi-Cݢ><Ƙ9//wIDa{ƢQީƘ9/oj':ݾPFaL,bm3-y͘"^홟>S?^ \P9ʹ6cl{QD Ƙ9*Л@[htcLQ@zM- e:^1(~ &څ2@|S?^zo BN@`)CH vL' _0!W@P/cҫ@om]( z1EsU7.z= Ƙ9*Л@[htcLQ@zM- e:^1(~ &څ2@|S?^zo BN@`)CH vL' _0!W@P/cҫ@om]( z1EsU7.z= Ƙ9*Л@[htcLQ@zM RE@zMgp8@`1 Dő/ _[8sU78z= b'6r &@@.ж5G@ +u.qi 㚣)JS[keco -:jN@Z69/9/zH =t[p @-RBO \)'n .H@J K %m%zH =t[p @-RBO \)'n .H@J K %m%zH =t[p @-RBO 6'.@&.@/zH =t[p @-RBO \)'n .H@J K %m%zH =t[p @-RBO \)'n;2W2zRBO \)'n .H@J K %m%zH =t[p @-RBO \)'n .H@J K %m%zH =t[p @-RBO \)'n .H@J K %m%zH =t[p @-RBO \)'n .H@J K %m%zH =t[p @-RBO \)'n .H@J K %m%zH =t[p @-RBO \)'n .H@J K %m%zH =t[p @-RBO \)'n .H@J K %m%zH =t[p @-RBO \)'n .H@J K %m%zH =tۋ I٫Cǯ @H =t[p @-RBO + U@om H!x3bmKC@|H@J @Hv/wN߯ͷ]?` A `uP\^W+MQ Qsuf @z_OrOh]TTٿ~@I N]7в|M%'z_ +[hʍkzU٢jg}W%"h =vMu`w*c1'%}[ipg9"Pz -0x>or>@RBO , @-RBO \)'n .H@J KA&U=MqAX]LCm#t<jy=ou/O_󱊵ب 9p{)JH;B1`[yGďG=(5my2:eVʕ+Ig>q ,1;S.wbXqfӠ?P=z_&P-޶װm+: pLIW FP4V< u*LPK{<ƃ;BxJP껋iGYw^>q8(Owq,|iG`tXf{/I:x47Lo=SA/"Yim=:~7W_| $S=A+kčr4#|o#8[zmr-UwQ`/Ix>LZƵ;O @5?ބU0sRPA,08ډ_B+q2h5N?W~Q x[|dHVhȳ8ۙ8¹X|'8jd<jFwc`VsP};*v//08<}~ќgh# p,vߢNPh<, t.@G_x{5'c x==;zm|'8`{J}k8 z!: Npy4P]7pv k+x ? ڼ6C}yϐ7 mŋ08pbB^)o"NǼG!,{|om>}WN?0n{wk;希wE^->ZPZf-@Y Ьh4kZf-@Y Ьh4kZf-@Y Ьh4kZf-@\w|s+IIENDB`vcdExtra/vignettes/fig/tut04-glass-mosaic2-1.png0000644000176200001440000002172514470742315021113 0ustar liggesusersPNG  IHDR@e$uVPLTE:Nf :::f:KXff(Q2::::f:::::::f:::ff:f:f:::JoQVdff:fff:f::f:ffff:fffffffffffffgxxx{1|{:1:::ff:fff:fې۶ې⤼öff:f:f۶ې۶۶۶ې:ېf۶f۶۶۶ې۶ۼm䕥fې۶)a pHYsod IDATx{#Ya 1Jsi..64nI" &M]&ldSO6mjhEiœG#霙3>O%7_wnco.AkAkAkAk4'FW{_=HN%BG;AМA"7v+OgjI=|+ ή$ (y;AP' hN]Ps 4aM*H7o'^(}ӗ黷X*x9>'Ap/q24{O2Ao_imyU]8}ʼnyM'te@М*> n7e*|ɲVTSh*^ǟ&X_ i4/fVMM_݅.foW^)WMU(GS#ٗ055L/J#+^i4*Ԫ{* Z(eYf2HAМ*>) sή h*OR5,h6. #.޵M-YU4Ei4B-]A hN[)lR?/Zr AK hAXfA+D:O6jH =U'FߺGD*S{ ׫:[R{<ձ#8--u6Q.$?& hD7nl'~=vg1[EԶj >AmlRR~v $OIEQgsɣwnVLʢ엲-f)铹ftݙWStФ>}P4DZBJ"KƏ]avt1=(;rn*&x=|vWmm@eO."='_^HEAO$4:;AQP_"h5o$(ODAZ,AE_8w-g|~^U=9I_ dxϨO1 Z::4푛z@<*OO J5R?H\qpᅭ&iObjGQ dw#z *YMMx7 +l4A-~u45YRC;QPY/Dztaڌo[Dl-A*$]~֏ZxmөD|_ VmfWWDI3MPʺrg73 {3;Cq~QpiR5N[3={r_a"Wb!}Vh,~ h~G00 P#Widj ҟ8Mx( -Z]Y?LGd۵ePu"1V|%b(E̛V.w:$6 Ә'<mZއÙTu|>Fl¼n3Hmz0D}R)Sb޾eVw}Dt3 .:q>{d 7nfP#W؊trZ0O㖚Z='ieZZuHW>Jڬ+ ۝VI*M- R0*i좖ʂ/L׆d}XWY_+EtE>In]f[#J޳ HH; 7]<+VR< 2~f\@sW}CCdK=Q ewYnNS71t,y ?lw'\˴ncU.z$x-FV*HRq],ڊ)PA 누ev bOG}ekE. O@^1%BOVVd Q[Kɍj)fvrOG ߸[q O OI ث syC.LqVW=?5}b#n.ԧ]vWTФ;# {޼I(~2Q!h\EAҰ_t[GܫWC#;jwlGۊo4mlg]8[2n!J)5ѥ)G[/d~Z'Ft=$$-ގ%D^H YŠst(h=xw7rCݮ 9;ޢf?j_d}CۂC_{ 9c+9P1!9ŴcBPs1i9O [2AAPbtUޓAAPrE[:MCPsha" B7݅F'AIbAw5IOP(GPm) B+#(E¹-t x18"h ]ΨF[FPm}-IzA*-8.O;W:r7Lg:!kЀOGԐ_O%,0RlEw;AMC`Nvp"OH*{g1ų$(nzc]8<^w!6?wr' \8bÑM$anϜ`_ EDTaRp5AmH-/۞Yp5ATtG?h ennAMAPkoVsj "Qtgp5AmN@PT>-8A?|g<;T@PkD$JPk 5@{AMAP~nO#)U[p5Am/I$$Qoz9p'pt zGp5A-%AP/ǴAm6?I`vyp5A}N= 1jhakp5AmNh j z|b E>jx&7^T+AMAP|5Z#1j$5A-A^#)Kp5A-~;#)j5{3YA-?x<<->,ڇN M7Z$$Q[A- zۤz5A{ u_GPS&Ih=7@PN="b뛍HԓfZ5IAP38~ĪN{ MXoʗm 8!Qo z MH[A-B>jAPڄDu*$m!QoWGj޻=(XwfGHAHR~8d銠 -4Bz -8 j 5AtߺClROj # NӧFﶞ UV77(j v7g 1˗6ѹ j v@xtGYzAA/6Dtgk1ҹj Zo! 뉘Ǵg=y5Amξ3r|1I/#)j7/~Unz0 E40f +z2x6*n}ot&uq󰼆/γGPc&hZ~V#9jUAAP,BAAP[lk IO5Am!mhXQnwj Zc5 ?q$&,ZDo^"Pϰ7~ Eǟ4m M}|>/KZOI2 ED?IJM55,DFPc"A0'muKԸj't*_/JttdB@P ],b.#)jx6xrv~16ՏBPc"IR|]|B*I3/!1j#S9fB-oj D&_TM[j $D [U#)jǏH|Wyije5AvGPc&H^M؉yL '* 1j4Yj Zd= }4Bri<186 Fj>h^^AMAP[d$AmN@PT>-8ѽzp5Amzxw큠ֈ6(%5|K=i EoHej $~_ζIڄvAP'gs< Q4W.AMAP-:IrL&j#}Æj Z[#)Kp5Am!F*8DPc:AmN@P[$n~>c]E,;vAGPSj AMA^#)Kp5A{ h/%8GPSj AMA^#)Kp5A{ h/%8GPSj AMA^#)Kp5A{ h/%8GPSj AMA^#)Kp5A{ h/%8 O APAP{-=G>=|]I\%66~#޾3tAAkAAkAAkAAkAAkAAkAAkAAkNKEp6 CGA\:NKЄx St9N 0ήtz# 81Aj.Ω U7~qA TS Japj^|Ӌ'&bG!pR4}0:NIKW\?ep$hx@Gg28bH\~ A) ]J" 8%A1Md9V=y$ht˱#8AWOSG!p* |:A‰kl:A‰˱^7Dkx'!rGAIyY $Mx\#@8A7z TGZOf`E;:NDP<4ܧI8UÜQP-ÜLFAp d;'"h- MYP' hynj KH<4M7E~ @8~AדzאG!puwA *XO4HG!p"6G!pA칣IDATe) C]D 4_͙G!pFj),G!p.bftr 98c> C]gm&C Y噠:8#4~.Kۚ: XPɢ) C݈^ot I)T# 8AkSAUpG+h<{o\ Ch]):Vm;Np9ZA20NG!pԂn'c:AAkAAkAAkAAkAAkAAkAAkAAkNX l}f =޾>5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^QZ)AIENDB`vcdExtra/vignettes/fig/tut04-glass-mosaic1-1.png0000644000176200001440000002376714470742315021122 0ustar liggesusersPNG  IHDR@e$uDPLTE*:[f:::f:[ff5P::::f:::::::f:::ff:f:f:::?_?oJoJoff:fff:f::f:ffff:fffffffffffffx:::ff:fff:ffې۶ېff:f:f۶ې۶۶۶ې:ېf۶f۶۶۶ې۶m䕥fې۶Tc2 pHYsod IDATxkyzUoXmVnIa-nc,lH \%`ek70ۀi (p i (p %]llU9E3 D'6S4d,7̾g4^K&4O~44en/m&(hBAfzJrUٻ7鋯įyf 1=AyԘLP޿-pyWIɪ;c8xv_4IB4v ׂ꼬X+B u(?(J.N\_oIZeHHR{efsw}b$翔_>gg0$ŧgz,xf;^ ^f҇hQt}tZъ)Jw_xv f3vYl&7P0yq6yf:]3J @P u*A%U (p i (p i (p i (p i (p ϴ=\)~K´6D8Z V*h!m)!h ݛW/> /4UBe՚{?|;7]Srfz!t%[Ղ#RzWW[zFfg9h5ȁl$!l0-])]aZا?MO A Z@J"Vď]wA)t1H+kdx!\/iZYAIltD*L/}.MAOCAlXlfcz(PLA~u;<uҒ7 ΛE|v@j^` IvoH/[?%cd):-5vPMoʲ9ٕީ(\L=J:I^lR3_QKYoA&NR*lTh|4\{>&/MdWeΆ.2iBiQK޾a:VF{9D!hHoy&?T'e$]g>-6R3,txZEP;<8N%PԳU/YIt2T-MUBE3-6EsZYRl$Pow~Y(ky'e삊vω{>,G;\̛Ŧbv*-n* B~wdf3ґ+-sx,G?MR&*|/U;UA]t[*Ϫ0BAEHȖZ9~&.KU1b3+-gL⩒DwfwT D%k=gjSd=QҌ:{M>qUA:٥wz'Teu;U#gwԭz0nsQgsCPݿl]uREDW\*/X>35B4kZ_5J e;*lK9gHsF{>(ikƤwȾC%݊T{v}Z=Ǡm+]5'{lM+e|F*_dw5ϝN n@?Ik|`rT/0@"S[.Fq3疗j;OgvjyMb3Q$xQ}n_I o9筋cy"%Fo_"q9D`g~f(?&fKN1n:n9Q+SwU+kȗj YET`ɨty۳/}O iZ8yJNVխBlf? E_I%WMQDꞔ .˜7~$XƵO7SRpD꿠=x'I5IK& .(5hՎRj@f#8kK>ζ9ƒXi}v)qAUH˻Xsu}.'4H:L;7fM◛3cxAsH61|X:A!h߾u YBP 9 B&61tp~ (mb/>aTe A8A|V'M < A6gu!A‹1/ A4 ^!(mbXAru1T?&g*BQ:?B& _bҮz A6z+선?&tF!(mbkVZ1 B& .W +APpY5g (mb1"_ B&AU|pzڞႋ}R1ejEoA!h H*xh7M ~Z A(x61LpcBSKiA',hm!4S4_R@) dZJ:qAf61Lps.cc A$$N tfSAf61Hp&B&AkF㭦>t-?&tzH&M <_l2APĐ3LA!hJK@M& A=xe2Bz^"mKS͎"mI9hi) A!huBPD˚ʪSB&z .sl9 Bfz |1#}v-=EyMkh$A$x-xq;SNq}zvVJ^0CL炆>@CdrOP5ED#Ld@ 6*1ЂxCPcJk;JtnOpiAPR:Tc30Z"Je+`?nù=4mx=W Tc큠Pc큠#,1A@О3FjS ~j+ ݾqeqS[iA-xs=*rB@~-)4J:mAógAemWOeJnm?tjRhƗ6:tvm폀ެ]9}*=qùp &@d#(N%*F8E+ #"e рG "(r`$ncAž3Jj&-c?dn&0S!߶W/맳BaG{Ti"AP$Tv^PY] $!->JepJ }jW4Iȩ໠!$J \ $!{sVCP$Tv\CD (Mr*;x.hbUAIScA ]$!oY$!$PJ _zgu>AIS[AUYĔ$IȩૠZ>k$ 9|t;-[JxJ mJ */upJ> U& 9<h'AP$Tv_Ѓ!(Ir*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;@PNr*;'h䇇W!(Ir*;'h쟠CP$TvNWwH!(Ir*;x(hS֙$IȩI0j$!n7M!(Ir*;x' z ( :0:+@P$TvP=^76!(Mr*;'v.ΦHlm$IȩIuu'MȽ74Iȩz$~@{e& 9&%vI[& 9TNE R& 9{U 4BP$TvOh~^NrHAiSCA׵}%/3RJ {6$ъg $!_7,rJc`AB $!_ vl|Uz]#&AIS;AR?$ $$!_Ə/?޾\=^4IȩWr`D\ (Y5jW'OgHgy AiS3Ay5oK1i;Z& 94c줗!(Ir*;'h@67I5upJ z`0>TIȩrsrSs:}ASCAys}vEz^oyIBNe]lQ~RCP$TvNPo5EJ%<%JBNee_YCP$TvOPɋ|NiA,8%IBNe/7E_<@!(Ur*;x(zhʟ8&,?(Iȩ&# ϥ)& 9tdLv T$!V/Wrѻp<IBNeײ3maΐUg@P$TvNכ{ga;/ @P$TvNl|0)lASa &5L* 9os& 94RC? AIS;Aע) @'JBNe5fdSkauJ 6=IBQDi,y$Iȩ9 TzՎPJ *:zS,VAIS?A ┕x] AiS;AF1vk4IȩୠdYT%!wAISSA5$Iȩ{a$ 9|t{O@+WCP$TvLPLۗVwB$Iȩa$JzpBRږ>8%IBNeǏ0TA衈OB uAi2[$!gf|uq!(Ir*;&g xJ $_ AISCA$IȩA!h;IȩA!h;IȩA!h;IȩA!h;Iȩࡠ,47Q8$!FezJ ٻa] F4-0JAg_gWXJbȝxXF/lk4z 4Q?0F$[3AupJ ڼbC$!_&9A (AeR!(Ir*;x(h2$!w&lK4Iȩ$PgJ 4 ]&J 휝}Jb$IAiٳob FOpAGF_|p9#5@PAٛثJ񗲟 7AiǓ'OW4Iȩ࣠=!(Mr*;'hZ#-IztBJ$G p-OQ8z(:=f:WB|uomlF}ظ}mǨ^\n>ZsHĶOm<R_?OuسWsnП=YbjY߻^` ATپ0]9(mV~Y;i;Ft~!(%b4Ln&Ϻ_` AT砗?yzg/0d%3Y A!/&RW`/0tr]64$[Ivz%#~b/0dKr&/W%D!(mG_if#PE {ufVú_` A7JtwT~>PNf B#yKqtS~!(GFiNl/,X A!( :BP *qcCPJI$̛aAz*CЯ7ⷕmY A!(Ï{CPJ㰠ae~!(qYBP{2x A!Uv2x A!%ꂵ rզ7bD#)cxx3ʺ_` Am,˹@v <<{d_` Am2MIDAT_U[ FY A!MX A!\^;/ yfʁ$xf=~!(NG^P A"x:U9BA1Dpdo!E@v +3Aup:|4fgt x,hRt/Bqౠ0RMDI_-:& (Ey: he* Aǀ/K!V<, Aǀn݀h$o~Кv*8^ HGkO:|t @P4: NA!@P4: NA!@P4: NA!@P4: NA!LX:;p鿁P_)NA@P48 NA@P48 NA@P48 NA@P48 NA@P48 NA@P48 NA@P48 NA@P48 NA@P48 NA@P48 NA@P48 NA@P48 NA@P48 NA@P48jIENDB`vcdExtra/vignettes/fig/tut04-Arthritis1-1.png0000644000176200001440000001711114470742315020473 0ustar liggesusersPNG  IHDR@e$uPLTE:f:::f:ff::::f:::::::f:ff:f:f::Joff:f:f::ffff:fffffffff::fff:fېې۶ېff:f:ff۶ې۶۶۶?jې:ېf۶f۶۶۶۶䕥fې۶F pHYsodIDATxym(1vS(% m.WqKd1|ӲdȚW%ch>` AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD#T*zhپ"k;*޽W(~G5'ԄrC}ҨYmv+h/B*hҮɫ8KP s,v#S}QrC۩6sMyA՟W-")@$cݶlGAСdeZ/Y=ܔ5峬ڦս?~|G}4}H|i뷣Zmj*hbk>&:^cͥNyM.w2*A[ΝmtUmS[ٞY6W!@?~ﯮo'mtP_(Y)d?եaRb ~oϼaC*ng~QQ9V"ϭ+mBZM'7vzZ.zչ7}A^{iCO+ogI̾y\ޱTJ;e%cSϜŴћI2~ I ktA=B@PmO4jkã:[6B@PmO.\PhD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (#>;<n aA8rfWEP nY]}~~]1ۺ}e>fE7/>Xg?^foڝ6g^/CAlQt{m.~u4ǔ{!@P-Okf?^nV*7ͤ7"{Ud*;E7/q ՂfʔRl#6; AP]~&{z@A'AKAo4u 2v' OwO^nlZTts1 ZTm93S2rEMj- hM/tm~=omAt׺NA^/B (AA4 AP hD3Am[lq1wS^䔦ˌO7Ǚ X!\AМ .^4P_t4nzv؜#}>e>z[?>GP9h9^rُjE.{6p VЕ yJϠʭ?Z&2.\TuS$"J.+)|j}!lHb-hoFِ4)URϠ6[S]% jwE,d6d? Z}?4ߚ h S?9.Ϛ;_l%3SAe4eͼiˠjCȕZW$2- j2U Ur{k.i2N=Nf-/6Y ,ߪ_0SAucCA @[{!Av*. (AA4 AP hD Ôprb$ (f, ͕Y~kVT5jğc{lV1DP'V̊qMm/PMטmԓٛA@P#r jra E j CCb|M} h-hQ}o꧗-ת:5hgPDAPEPJA'A5-C1ԩhg}AiAPj3:1, AA>4(6(@mƞNA@Ѓ  * i3Sy7]t-4:ڥ A++BDAhAE&Tť!҃K9}\wyO=cy>QyfM4q j~v;q֪uk'NRnc57!h.h4Yܨj{JtW&Zt-zھvPeeieU|U27 *A} hP AA>4(A@!Uy/ی} jz|R{=刧>7̄M$Acs͚ h:j AO ]!$zB~Wi'A L'A T&AT4 :@PW SML^U=>$RhAЁqA>AE*EP (AT4AP tY'"6&hc~eWpE m#:H̄Wٙ A{A!Ѳ{%C"6ڣ*h컞+?&*Y+"2.O} Z/"2u f؜_p)a #E} vLt\EP (AT4AP "hAE*EP (AT4AP "hAE*EP (AT4AP "hAE*EP @AU>v()a 0AU>W$bӂa&_@ A@!6WA@ Qy} h]낦$A}M=H z"h?.h'΂&~+qg8:RhAPWAOf%4zHnˌ!脄t\No{e6r! h՝4EP hyZ_A'A;1eF#Nvjo:!)i (AOP=%*VHMsAO>:!ER)iXAgƘ m2oA/Tuh4Gݥmt^qFK=)i (Z ^D,Y՛)jn h?"9TsAϪG}(@.AP vC->0)i (AKA|tVg>&NC A^A'AH$!j:RhAt{23NvW4BO)i zhP= @PHMsAT4AP "hAE*EP (Ag,÷ i{_4Gt΂z)i WlPnF m#:cAahT4GtނnVnuE m#:wA>_Q$4Gtn'N xBĤ#6/coЦ93TU[QIl~\7HMsA-fe/ilV:MA%%-E%ݾ&4Gtނnxd"6Y%GE m#:wAբrpV3O"6;(DU_e7*-0v^[E AUIzc"6HMsA-L:%ZaAV})i ]/tg>CtB$nL}#-ofe]9WDTܳ[||C>.AHzDݤ/&jG HMsAfBּR3 ~o_8:Zq#u"\ zn^89XY[^ڕ c!V_89X_}#6+fEf&S >k^-۷_>G AЂb%nEʠo*`ʫ$)i ̛TJ^ ˡFgrF#蔄,Wpw MhAoO?~=":_A/U]LTZɷ/=tBhkG A:D m#:{A}L@;qE${z3A 6&RhAy gȇ<dJ0DϾHTZ@%I"ZUA'A $IAK^QI0X"ZAPHMsAg, Ɛ9 ;5l>|:D m#AP "hAE*EP (AT4AP "hAE*EP (AT4AP "hAE"\10@P "hAE*={ACWƝ10!AT4AP "hAE*EP  YE7)a ^=$7ղ)a _2AwIm$bӂ "A:!ډ)&]-R(zL@ЃhAmWU$AJ7w" aҫJ坍:!AEEc1u4GAE*EP (AT4AP "hR+&^AP "h ]qƤ>r)hBj@ & \DHMsA 6\CA{A!6Q:[}qhAP)=P8GtBu4j-W⤙ +mC F_G9&ݝFsԇ24n uy|19h(D(OU3P 1Sp͉B _uT}[wt\NvyH AOЬ<6p:!څjfR @.TC` hyJC}0B ZtX֍ M',6 e':LJ8ӷ ͕B aԒCvX6ϑ<&OT1] i *oМmMQ AjY=bh}8NAm%IzDtN Awqw+QIHMsAò,©>~A'DHİc TO*gv>t eeYI|ԙkD m#S;nD m#Ӵ EA A.zP CЦ9AA>4('$=[|(1mF Bƚ9UwTdLR( rEMA'APO6? h8ƹ7XDy4SbScL&vT(2IԌ [|8FTu6E|k֬ h̤+d!ן'!脌%rWSNVPFV5|Z B;h-h~WW(ߩW5Gv}6n8/b7tE]=3 ^gT>ٸz%K/nVIC  ZLY}e9-Nifjkq@ K,.HrywjZe<:'*{tJE`iWoQLFm^ğ4:!crZzA%)ڇP &miC ?hqԘ7D m#-hZm,^0w !脌tYU|_(POGX$q8Lo:9qhM>SmE\Y4Q\t}t  B5=Q? +L(_Pui09j[x (tg3{iGȟ (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 =1ןIENDB`vcdExtra/vignettes/fig/tut02-fourfold1-1.png0000644000176200001440000003075514470742317020353 0ustar liggesusersPNG  IHDRPLTE:f:f:f::f:::::::f:::ff:f:f::ff:fff:f::f:ff:ffff:ffffffffffffffff::::ff:fff:fېېېff:fff:ff۶ې۶۶۶ې:ېfې۶f۶۶ې۶fې۶o+ pHYsod IDATx 6fډ]ggleNOd3Mحgs&mFV59 z_[A<88iQD,V@C`Lf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%hVPbYAf%K!1_[0T?I=j1-<z7nuw74\:,c'1I<z"&}ܻ>^Gwݿϒ7va(zw{4:|◣{5!ll?dw,_槯>8o>LQ[}??Z=vGI?-׏o%o 0ä!,.FV>xdL)Nm(L[܌B0+^d ɇ~`m7L&hCh(>Iڐ6dnXh7-+IڐZ&'w~s%LjϷ)⥮8XᵋC{%|dbqHYO'KCebuj|vg?8#ЅԵSqhL,:o=NPr{zq6ǿVpu}~0w2I%;_ߩx{L,VцCdiL|Z$$mG*iBtwbUgVi N(S`ه. ␲6'KCrѝR:28Q&q7PS'PzmX2#CzCu ͣcX+;c \ʑQ-fϵ s&C`ɌzwLri+@XdtL;z'-{-+ s( @4g gل6Y(F[+@hY! Lc1Fkm\̚\ 7J5MuN,sA@HW4\a3ZU7T맪FosӦrN5%bBtBsez󴮜/sՓ΁ C.ZioY2NMWezNګB-UXъLV+t Mz,PզuI Y7ݫK˩#ҦL3Hc^з,LTN…5sh B &Ά36'~dSQaYDHs%<#@c.b 0h'슺|8r%rM(yFO{UOKIBGIuu~!hP\7s3hYƑ~_d&,`, 1!Z#II7b49T@t-x BnІs$@~ZHhUCѰ)G>@s%&L"kԤ,2$ؐeQ(zP m" 9gY&4Abt.SBI! M8~,x"hb`k~M~݀EdhXф ҧf sit?! Уdi5Mhhc GO 4XsC4#@4qK-ML:(4B\Hq4:!Z S:"^#"l^"$$ȥLG ޜD/ФBҍ?xab)&{3h&deYp@@S&yDѲ45Mz!j{M#Z ,014Z)՚Ъ""]%"FP3B+ji^b 5ӭvDL] AgY~(R3:E񾕸t囉ʼIa=Lq3pfvvq3DH`s`ґGdRVfɟ9tЎO^o_&?oi|2] wE} cO h&/_rOWA;3Ե\n& Dn&ӏ7tct8YB-YtF{T:hIzۇjYgf2=܋.>P猓!2q} 3p6_ ,ԳySm 㾜FtOgA/9diL=zjA}dпe%YQ˻C:'gB~C'msj/wW'GGh>?9|pr]V'_eEpM|؟⃂3N˧m`V0@@.nF!/L2ІC?0жQ&|` !5b±|> (LX@ J~>ک]^nmm5^S(L౛zyvoɫEa/%E I?ؤ/W,_-W~Çt7vg6n (zw{t7}ܹX$&Gݜ?;{I=v_@'ɷ~xΔgN_}|q>K6oc7d(Yt]l헳dc74 J 4+(1Ь@ J 4+(1Ь@ J 4+(1Ь@ J 4+(1Ь@ J 4+(1Ь@^})i=+jo,!{w_;Zn_[DcS?ۣj/zDn~7d"a7n)ЄGk2/{˻3W<'3tvu7 |{M`IfidVxtҎ WW3q/>&釸O%tVK9Fn2+<~>o0"XXHKɬ=z!O$?/Io>⧢~x/YIw<2VE`@&}Ib@ J 4+(1Ь!F,ۃthI<'/~9:Y><뗳I<@(^À)`I aQ&|` ! f X$mH>m`V0@R߆W +ƣe`V0@RgmO^o_^O&#vfy>4u-?NdIIYCqß;j~8U&ߎ@O牿I_o%8?f29Tcߡh$٥8ۇ', l[RP-M#!Ò <M"#eiLNZm?ťXޟ?89@>i7«kI<~LSHrȓc)G1-SM;ve~7Β9u2iz~| _:A/KmZ~w[4ܡOlHDwMjK&ьV=dɒZBLs!z,pIߎf4?Yg&iQlx.{|\o"wo5diTgY^hM7+ӆ dE rKk 6Qю75Nu呉FH) 5MNګ45RH5Yv=6_ڭSz?Lig&dnӎ:D盆DHk1 tsf$/;^#2 hՐV KEH 6_(2WJa/ǹ9YfZ.6Z7A\َϣeab>4X?:]ڜCi&iҌy̚z,LlsfI?TJJ0 =h&s06$xґ*0xMx& ts (ynfL ϐQtmѲoL768B'?<-TG @!ZՐN40Џο}~`MѨ,z2p~DA MO$@KL:9*}&4ԌC'@9G".ܔW!#gTS,TG>%${DrECѰ2ơX#}uO\\,Ih;9Pz#9Hp%&h곭v꧗qIp3) #4=t82.4(ПI6J r 5-ڷb i6L8M::7 [lHgNh D3Ѕ )f?r\濢_$3%4&< IfV$:hsBZ'tY!!-kXl д rIh*:<+*zĤ  O^[0#4-5B]၎.27 B *=hI7r4"Pt 4n'(4jґhn_l!:[@8|q &7ЂF=nnը5?V AGW+VMѠ@Y Jأxڤ!hP,VȒSQ\`QT 6Nk,nUjnMV Kb1ezK )x F;ҍhnM\E' |U;0!7(4=NI>tFJ sjH7x.,zݠt XR4AwH7Y2)0Yڬ h%&]hP pM?11!@&C(&5ZZhMAI78BGpZ!sy&@Vxt-1@1VrG1j%UGW@ȀM:@nճBZ?nah;<@<@$L@#t>[0@=e26NZِgFM:"uoNZMH\ˡUj h&hX Aia"}V{`ꗙIn?U;eC:)ڤ5RhhuE?4 urhhGh "!hPNl"Ihk7')=B@kȁL9t6V$8Q$rh,ЗjH$K Dfh$ :zPfLkݎ3q&sOFZِN4,htB4_LQShU8A5rrhuC@g)sI]AIƑmxbCb#chuRd@3BC:Р@J=DS"BWb eOfL#頮 @!T'eɽDDg<@Z=o%\#ZҡH8F̒sa%O:D+:Ԥ=\IgI/K΅,EEVCϦϷ [:ӹ${J$ZQYr-IC1 I-& FM,KTG Q!ZX?4x4hR羉~D"Z1)x3KZ釒giahъTs3׈'_hY6Q=1xr.<@lA(㳁D@4Yr*ETh*75ѽx6SFx9K.%bC4~<KOH_Ouhx+wHxig#)Gz҃D_y xtGϒ;h3DyεkxTM闍-Е}TWYr,92Bo:CT;"?4*؉+)>ȡH}h۫gCef(La;=-ɲTxde*nDžfJCt5na8(ڸTpVx%ـb{Z>mT Gz?D.NP쫗T{ %yQgbx6hHйtp9KNx+k22yζ'7fՓ~!.`ɅD)l8-z385 /4>,լ ~:kV3V\"i6lR4ګXbaYG&jz40 bdFŐÙϋ p݌z,L4ں2gJv4U6%BTAi>k'v5,/FRj.n,X8*unM޵zWf%M[Neyh=z"!GDEs9X&*.jq7#M-.XtѲjdrxٵ8Yg O^Cv*V+K2>IjzԱ8#:x>xBz|%*I:g, 6,9yaux~ԭ8(١<o!!IIq4q-[ыc>zh(Bp^?]ϖ]C{$U`sʒ{]zqLa{.=!o.IL懤>U]I+Kg:xKl:8*"MxzG$ЛCJ+)>09ceɽ|X>uh(>Iڐ|vq3 xa6$]܌B0+^d I +W˻Som]-cSwSoe]Yn^e{]dg .Ю.I,+@K=}M]u vu N!U}z겭PKV#ҟ0WKb?hW=; B@!kP(Zb- 1X]fm~fnDdLgeE h!+4Yx!>~0;S+r&(jVZeE qSoeAn4q*TGDLQն ,K2CYJE5+X1Կ Ӽnj$#g7i镮GWf0;hir&(iZeZTs3L,I1nږavԫ`ptK(*uE5$, *b&-yU;iGuDuL @-EmPb&)֘&+uZjr&)jk'iOV"FFLFH^;i'Ii"RF[> m1+A N")d&+TIfr-fb8-;yeUv+A&uۦj$qԻ̐3gڵm͂Zfup;Z3\}J Ed]f(XX B@!kP(Zb- X B@!k 4Q9d|R(;EhsxkjW^2 @ zr0*Q2'G Ы%3 @ zrUGIeD'G / P%<4zr0+@s}j cs Pҝԓd9_mRV:uL]n$I%z/ԉjs'G sT%:L6˄т"H\"^Ȏ_*"aYx'%1F'5i}NT&gy .;u2t1$Z676ɵ,I=+3^~~k&ܪhfR5}浧fP(ʁPNv-JQ2|UD6MZ} @aDq*yB JQ2|-V&-7N]Q hÜJ𜙀Vct(\c!dQ %5i"m=I*9Z@XVm5,#Y|2@ Z=Can&e&SG4 n+T{W : K E1bJfj(_ ڬ puFV#Okj<:Ȼ(JV*ڎ0]A$ƟB%cڝ5RN-^9*!: mBiWŻfpUutE)c kz5)/PJ_աu 23mvM 3?Ƌ N hT Jrݾ -Fa^%˟.6ݨ=Jc8!c]Tq qwUQw)ۢY:vJ:ЮS;d͓Xz9?_E^gI5O@GV^סu$4u:r-(I.O&z]ckIZ4Am3<9f{Ǿulp P# T5O^A#%L_/PgdL`$7B`(3d|@##J`Mow@Eb TQ^^DJU 4dɾAY V'_Гn9ީ0O bgW 5ݠ4CD讒U*R'i]Y@{y^'&u @ >@aܝ;IP^F4\%JRKF-&+Ita=sFPO&w$Kԡ\lW)dPZ ^["h}9x@i-x_ntgg $Z2 @%FY[2 @rmzPwdPZ ^hM @cЋTl-ύv/ǚK=S`*40z@i-x_ wp3ŚK=j~dAw'+k'ւzv:s@i-x_>dP2(/ԟ Jk8'Z2@P2J+mB SC!&B[b- X B@!kP(Zb- X 5\7&Rw`(iYM^5%=CLP @~'n2(-g@{ɠ @C!%2 } J4^2(-g@{ɠ @C!%2 } J4^2(-g@{ɠ @CͳSh.gu߿(;:+=..Sud Iagu D-n\u;Y<]ys6fIzWM(];ɦx:u-igue9Ng .Rb4:rV:]N,@5r(::8J\X! ƆlJ^@@yNݿRJ 9>>/@%ed.t"+zʅ֞ij`zIt%)[=3u PZv َi=hq}|.R1ե]+=<:zR@隤)_*OV݉P*CPޓ8-(!hrM1Mhr JJ5Mfr  t1Jz5+a&z%j]T bÊP@I̹M8MQۈN >(1q<t@MP['ĜZkոO @{\tf7O"snӫ}'~'m.es߱\G 3R׹E PDbm:*u$;-f#Ųr$$N+wLrL5s))M/;&#"M,=Kgi*&nPkצX]i1h rR5qhbYs$  ]nDA  k$Jm:-c@14~Zcm ,BL9@@LGݰO'G4hZ.f.e#r)JQ~54b PH̹Mz:4#ϖ:f uO%t@jhL@QZ4Ft.צh=ɶc󛮜DL9oA)@J2(JS_Y+P5Ctrk FS y j%s`˾֬E j:EP(Jhq@@|P>C*FBOtÙ7;>CFBO2Ct8o s;Sɓ zFf1 q<W @%F J]zpf4"^1@ciTdDhP1276ܾDOd Ƙn=bǓ ztn&"k4t h5?2ƠǓ zTf/uXtZY;);Iw ,Bj-iD{(Pr B@!֚"yXr^MЂ0:8fZ(䩩ƠфuBu=LhYѹ  2ꮯPLy1[?;YFjYL鄒+} M.YetH֛Wp==huC; (l7!pӡ Ơ$"܎"خ%7|hc4Rϸ3nP t1,}x@i- PogcP4@Ok#^v_ĘPEg5Q_@#-2_b;juUgzٶPtxiqv2嬿:E3S;en#BF;# M(eD _~}{0嬿|d۝/Jj15}ZN o PX_((Š5b aȐ|#2Kq'8C}LUCwۛRT6%d zhA?wjU1IM( 1cUv` y*)I/4?]*v"誘]O)zȏ }ܔdM;Z^ٳ5G}+QLQYH-jnJqBw/fE{o>n[Oyrb-PknJRU$&I^UT[&1haElJ7yOWWS{ObO?֤Q::{Qutj~篿{AI"-hEߢ?,Q!5vMIjnoo8I~e9my4!_s7֢vԌQ6 "I WdnJR͝lA?h&?/@sWk @cM4͑gK7O}3bR@E~J_oxPtz>)mvN6@n?ѿ'il@ V(.xZZ?) *];MDѹi_\ @+dSU)BƆ4+J}r@e ZЂJI,й)\տJWlzwϛNvN=N>w{#-wT$ɑR*=|'9Ơ7+D1YS4sԝ8W{v@߼j{ikO>S(݂-}=e(DCAqnC[:V:Q_5xx6(eI! [!^[#A(Eˊ.Ӑ@@c]&;k3,^8/Pc@ lx@%Fo(,? :QLH}@hq_~V4z{ԝ.:Qr{CL>]3q; ] ם{暣/x o=Q.;֮c؀^H}@I$: {f|@OWH@ejQLN,@4.}6#nZ6ͷԭx@E_\a@= $"XܓtRgcQ @݊TQ!rP9 ɷ|P[3H`FC ?}7uLEAr? bxѿ`YhB?I*(IL0'޻!.QLUA^R+ICL4zH!(E I<(@!k1&< MXMVM{rvq~q{\asv$%`M9B|]4|$X@O.KȈ_ 8r1U,SM9ޙ~їX@D;bh"ߪ[w_&f Džb$X@&ݢ#b4q*i8*o,b֞sNPD @4P76۝/tS>HT4Nmr3mN/&S>HT4NӤ \NpU,:ƥ_nAٜؕJS'9K h)b>r %*S> |3s,16NGn>3:w`Om`9cЀ4* YuFˇۍ h r&PKӈS>V&hA]SU%,Wiiֵvkt)@ԩ5Op1@t7)Ze{j-v*/WȃT/lޯR,B^b ho'UMG$YϠ./?Vutf8qV~CW:{UV`]oWїM(ub|C+J hhpf?տ$t2=sU&5JUJh:#NKUHhum1T1R':v33]bz΀BTmTV2_LрS>ݝoԩ@z{ P1"8x: X?LICƠbs_.[}_3TT#e@?LI: hmlY[ ^hG=1*Bg1PF^,K ch i!&h1km}e+g"h1QB;SJI/glԊ$3s-RDrS cj()tPoc?x1T.pM7U~>-'I/byLn3R ( B\L*ik;U, :w&X/P-*i^3Q8D @B@œ[/L(uk.ގfԭ-SYQ=_7:/=ӓ-(UCg&&iDv}Kd66''G+x25,.!]y >ӓ)ACmP*@%F :4&IXiIiVZP9 @n IDAT4*\-ϡw=±h"`*_@XGun"GtwNdj1hN}gqT0:܆WCU,ӊE:w&X'Zߙbh{ QV h X4Um*6wL)Poc}Maz @]@SX.)PocLT4N3Q8D @@;U,LT4N3Q8D @@;U,LT4N3Q8D @@;U,LT4N3Q8NjV@Ӄ1pPr B@!kP(Zb- X B@!k14AHMRP1pP@bm0&Bm~KВG@66NG}jc|G@66NG}jc|G@66NG}t@sq6)>>f|'>r: YX䱐\Cebwq33m^s|nC(K'~,46NebwiU7uo$dP&vRvz56s|I64IehWߦn9-tdӮT:.Q0@5ZzTf(9:ŶO;9W.Z` gѹT>p:xBj)M@ɫŊW5"tIzO%Um_WET=ҳxf$Aŋ,3bS2(&q3d^}[48+n+THm<-YV&"?h Fmw"qs3Q/X] xcc O*_GPiYR[Ocwax9DznA bhI2v"Ss *ZЅjn{;S @/ 8˄yV4N6㥪}$ʄ X].^eq6ttnԣf1 n&Յzf?ꆵoAk{$> Q3/;UZP ol/Tਗ਼M $ ʎ  xRA9X]A;wY8 \l5gK>B)(bxRͩya+I; b-u(- >opDkl @۝]no;3jb:MlHn&i$S4@\ld<c%)I{Obdp%IoYwTӂZ4b!J5UZQ)AϿz.8@ߺ~wa7cV~BbFI%ϷR`@aBp==Պ"m^r]q'|O(F8*4L֎dsR`@a w]V*2cEyĝn6 n&r (0cAq R`@a ¨I@)0029<@)00@4RYOchAj-(\b- X B@!kP(Zb- X B@!kP(Zb- X B@!kP(Zb- X B@!kP(Zb- X B@!kP(Zb- X B@!kP(Zb- X B@!kPm {=IENDB`vcdExtra/vignettes/fig/demo-housing-mosaic-glm0b-1.png0000644000176200001440000003264714470742312022341 0ustar liggesusersPNG  IHDR@e$uPLTE (:Xf  !(:*:1Q:::f::IXfXffffff Xf !!! ((:(2::::I:f:(:::::::f:::fX:ff:f:f:::::IIIIJoXXf:Xfff:fff:f::f:ff:fXffff:fffffffffffffffff12::::fff:ffffې۶ېf:Xff:fff:ff۶ې۶۶۶:fې*ې2ې:ېfې۶f۶۶۶ې۶䕥(::ff|ې۶/h pHYsod IDATx흋ǹ;0,Wr z1>2%'#ÉMX!qDA c)˥ ٬4vgY]]]UMt>骯Mu+[APv@@@@@@@@˳LT;lr%QǷYvp̳NdyN?f\!&?mhύ ?}{øt5~q V_aFݾ_4 {p܈ْVOЎ[ުty|? Ϧ:đq=`>-ML>T_\.G\*7,nnpӜk5~'g79Uá)i#N->DЀqp~\wz9ˢ.VbSdo͎TY^%̑ǫEh;d[G*r&iV:><fxemLp>@$]UvxA.?uh[±Ț7#^4ƟTmǤ٩jTXW5 /.dAYt⭚[FEh{ f[i7i!-ɥj(wkM(o}ɯxZnd 'Y-7].kw.@ ͛QȺ:ֹċY=JyecvSݝ.,@W;C*,+$r"WfcReŴ\_$W7'+w$ |͎U^@YL23ͼ^i S}N? ]]uUVCAg71OEFٷϦ2E\OG P^c/+Z̫.\1ُHSDW|IsͺW[Y(ʔ=uZqF5]_uA_V6'>4^H*Cy&f=JɖʈsWH-v?D(vYߟW.ԅ LԖPw(K[?c +UDKTIOʨF>~ Y u@=iAAk ݼ̅XxmT H^jѼ%j/W.6!r->rQ_~Y*js:k8M Q_?/ӥٵBWxvF"rnҵ4QjLϿsn:e}*S%ݜΓhe~I |= 겵##؟BbP;.3l@MԀxo͠;FYQu EUu+{)+s:OFHՃi&o}޳u @[ßCQ0Jj$j7#J- 2quc$:M iA\ w{wei`:uJP #Q[>@\kJ)oTU?5T}Ze~AjfglZ1ܶ]u$j;l%$~4I7e\e**YgfNgQNAZdbCeINw1VZ; @V|^o;6ԙWY7#p)g~h+ܭ9m]×iFL"4W-c#^VH-7V?X6dԡ]_}3HEʧfdLrS꣮tFAeNms3>4D+^u}S9 |XMWG}zH-7͑$C9JԖP^[-;&O=5oFeu}>&#Lz is\4Z/}6TuPjO *on4~,˱r4m^OZ-Zcʫ-xnNf j²o>,;fU&ûֵZWi sAauՇtՅXP-%õE}Z-LJgLD#eYVn&Vq^#x6ʜ}OxqbjefkMsfjSFL@Uigk lu4:5OhF6}P[#xeAsF}Zfԧ+jk@;t>IJW֢Z:LkqXfE3P+6 1?Q@mh2ŷh1q"f@@@@@@@@@@@@@@@@@@݀i1oպ۶Fh)@omv PS;vL@~@f;|$YTt*b-C3uMٺ)@2C>>fW6abs'v~f&jEn"<ޢ @vCe(EyMHu *w-6%#мnL=pW|(XBxwJX>̐23YEyU__<3GvWW;hU\ڀr1;1{."פ%u~Y?P\fW"W>\K j͛P & PBU3Gv@y@{)e$ܓ^wUUPvPIfϳZz줼-꠭5h=×N%$luܾ&T]6^WV˼vT (D͇xecgc?0AqPޓyZ+ZZ,nfkﻸn@Sxw &U@W$ P/PPPP Ɣ= !XL4G qo PU9E2@hЯ^ηBtn;\!eũG\wų?&?xAg1,P6>Gk>5:qʏ81i ŰG_p_'\ 3b]żM.%SGhl@%]_[G۷أ_zqO<|-:')ГJBKn⒀MPyؿ^"Ed ){_0ey+,l@OUFVh[8?Y%+;?y].iu(AY%n&W{]V(A7-렌okUAţ_Ϫf'$0׀ʊ^:1jWYž ݊oZd1fgTNJVm1AV!M/) :D@wQ.*-S]JZJZJZJZJZJZJZJZft ;ɞ|)٬;TH;_Q ;ɞ|Y%_NE}P 9C%s@+K:2گ/{Xq5qYwK گ/tK^ʓW7~ UJ.c*%hIбX_*_]K@xj+7 R|Iͽ{~}s@e+DWCy7;ۇחTow @e] ;,|'~JP״Gx%s@+K:4<` Vt>hKh}t>hKh}t>vj2y€W5$;bk'd-KLZ?n.ODtth1r@( H?F4y@W% t֨'DW7zJ6PFOfjJ4ϒX¿*@)8-Z׀5m!{v-C|i U,*o׮*%/kzg/´,NZo*Gi T -loǴ =;uzM{;T6ն5c44|ЀZAtm"&"xK> N2^P=Vva|i@y]_T@ K Z{'(/P=Vڗ(I+ $6E[m==9e|Dc^P=VZ;F`%i@;_Q; GpҊ6cΡNf42I+ d PCkng33WtL |pPP{yΦ==9e\coh (ޖ|hqJPyʇ8l3GoΩq\.ߚ21^7S~tpTR` iUBWw\!7(.㤹|0@k7Z*렡cJf3 (Ԗh3\B5fD})PtwF})PY1P@.:,r>_D#8iE} о@ NZQP=Vj+vU%#8iEhpX=Vj #8iEhpX=Vj #8iEhpX=Vjk 1,zb"p؇'B;]訇 (((((((((((((((((tU0kc Փ1HukDT~TĪ4ʓukO@(%kJ @[N5-~E'Gc#x @1h_ů^PzLWtr(=&m+:9Q}O,~#PyaH@ w87ty,ϦG" z5;8g'2 @tɽ$6Pї3AhDtq3^}dP 6/ 2m^P:@Ó[M'oݺu=^ͫK(AhxC>2 @U>}Fh€2m^ЫFB ==\5ЊI 0`i ~hY1i @zt1mMhB6 /|dP 6/X"lǫFI4@WbT @[t2 @zW5ǶVivƇ^2 @z,A3ghi? ڕ~;B `g >2` Pڕ^Z-/Ŕ[ϥOp^@g@%.]`u ΢ԬZ#oqM6dYUe:h/Q7s{1Obn#x=b Zwk͘[B^V%vπ6L=Zm$J[t2Z3RQVKy|f̡I'ovUo7Skƚ2kn&ӓSb-&m+:9כTr?zp:󡯛5c -f~MGg^ͯ^(Qz_lXCjɽٱ̕th_r>_^9^ڍ'޹X-ZUnL+6FБ$O`x={=6#x=&j$Ѐo9m~E'GzC*xwJuAW_._Hyf,BIJjF^50-~E'G0-~E'GzW|f"a@[N5a@[NRnXxfů^ (8֌E z‰qf,B:T<) (8֌Ev@ яnXzm5+1q"Ӭy$4`@ LJ>{8@C (q@wPJt8@$*ATW@7bv;4ψh{aZLQHv*M@P:A-ϙT: :)Ҋ RkúO8@#X2V/\vJ2^@ ƠfAc:j܀bA=cA3[3!g @k-Z3!G|xx[7gMy[/{6t0@} $Pvܚ5; gi^+C`Du\ݐEgV:nhh@QZЭ 6P [}^߳P9^ԭ_.qP_v|CgG͘:5p7|cB;t4.NAGf,BL-q PWZ[o |ݢ֚_ݬ9ɞ@ PoP=yG16ݡ[^' (֌EHzSmCN3NPoPK :~WJVwi׉J=5cr^Ц;t4$֚[?xj?Z wiiJ>5cjv_W~І;t4DM9QV6z~܌ @P91J>5c@ PoP=~5J~[k"dk>GNňC#Z3fq@ PoI{b4ejݡ[^X=dmfUO^Rw6 @ MVrzI_t1Vog_t_SiϏJc8-_^& Ո\"_Pk!hНx˺,jMR&Wx97}(5Œz;t4e-]L>$kYPwi) u@h[>@wPQ^;D#)O; lSTT'w=L8nz$i@:zG6:t3ZtxFRTG=Qt4̲\vR:'u@h @{_zjv^`^hX_, 0N-ccN'7DpnfP{& f:x2Lwiףʾ۳w @[hX78D:͠zt@7\zߋO|df,Bn7X]bMuA耺T.qVu'@omtyBD wnfP{e͏ito[Wޘ;=A{hƜ@}ZK>×]a%GFk"l$')4tgБb%(;cw3뾖 N3NZ~ƷٻAЭ j]'c _NSBJњ9?#= }ttVNPԆZxhzYԆ*47?FقXuAu& @ ! !!OeZ04DxMa8r{h[ߠxݎ?шˎhH=4e8]\_!vac -lړXv:ˎ!ȥ9vd)eV"ˎ!hMPh-L R f3AP>bx(AGq(4b(4ûiDA[nBsKot@Hҍ&|mPhs2z?SX(!Bq_Maxi6ؓV 50Qb|&^Rp+Bͷ9W<Dj]4tRϰzXIRMYV]0_odЀxU,1yjz!@]?4 /j$@z߀Z~~zԟR#]Sx׈\5s~ $b+6r3.$(1uJ)BC'o͑~WbL5~H}(,Bh_t @S+ȥE>خZWNUjh-܈uz9J+>&MBsV@hwn%tP PB!/ R+LlQ݄21@ń0@ #аJP{š@@9hZ&rus5$@LtU6{Ġ}PJnpѠ.9 \5<;CþanH*ht$&mTm"`Dw}iZ9<<b;4P-Z˿އ: qZ1sh?ӎf<@CH͉^cr$Ytös7֎Z͔9ÿPiJ@MAf'p\}T T+^*KҢ"@@s^dP-۝$4I+Y&+P[P-TS6V@`l4nj4ɍ:{"REY&4ϬwZzg1fNTQ~&:"BZLN5W~@WsB^͎L uбiѧ/Ե9@G6QjFIPB#O4GrcG]B#Oe|g@ՙ,G [H+Iv]JExG:V:@ՙ*j>m 9Snw3ՙFuDw$]^+O)J|h]f2@<+v %hdJdcdC>?U&|cq,~e~B,n@%rz_Q*W|2ToIPhd8UouMxP5e~B,K@_fv>ݓU7~Q/A U8h{LڕjuP*0 E9ik׬ v&EX#*sfjvolk~şXzhdP<SV /)!d3*Y&[bWOe[ubԬHVsf0r]J @X'd@{|ZP#?!n=U<@&D;iM&:hd|b}[wU Z'IJ?ٽty6okRg.Grhhg (\KS.?Ծ5UȄd߁q Ov]GWA6%NDo-5Lv_%n0 1у'Z OvܛE. C!>)o @X'݀ /"j&GsJT:(u*AO(I@Oe{ VM>ꏲTT6JOe hՊ'S~4 6xPW8K뭌շ)\Lv3),6T}QO9.;Sz½-!f @! :DF&?hy Y`!tE:>gD9媎JpX:EOe0 Pih|mGS~p@]Q SY.b|;}@#\Q@;U9_.=tgbGU$p ?ȿ/ĕ Tª5툻06 |#Nut)Sy-$nO"~m>|?/RG:sJ߿7"x$Ncrn|*)ԗg|If9E(짮b8 ME9Vրo o~? sdJ*4.d旬x :Y,_=fqr<|M@Gp?`77R iYНn/woUKj%((WJR7 'S(AըƱU~- *}V3Ֆy:U+A9PeT~6ݣ~/K\X߅4J+~%ZNG'NMcsJ.Lɛk 4(W.[EώG_ξosb~Fc[~jd@zɶk;ss:1Gr+Xgw*6wafٛo.a,{P hؙX3׹юЀK@do_ ;5wԾzʗY9yBL>/_er䞺ZՂwT(g%Ƨ/;;T]׵Yy.7t Ѵ <^=eѲj]W|yW2 ?b@^;z|j?]-NUh2PF]fAAPS@m%nqIENDB`vcdExtra/vignettes/fig/demo-housing-mosaic-glm1-1.png0000644000176200001440000002711014470742312022165 0ustar liggesusersPNG  IHDR@e$uPLTE:f:::f:ffff::::f:::::::f:::ff:f:f:::ff:fff:f::f:ff:ffff:fffffffffff::::fff:ffffې۶ېff:ff:ff۶ې۶۶۶ې:ېfې۶f۶۶۶۶fې۶X pHYsod IDATx흍ܶ9]M'גOkܤNۺMݚn뺻qkNVX ||ZŜ! I|! (  h (  h 柿}<{lv^msi+Inj~=ۺ>O=Ddf7LK>J&Ϫ7I_b;!o(|n{{Z}@SׇǤH7]7}[P##[}/y#G?S~}Gx]UT#Vg&eoJVnФ}سvrO ޮO;=9)5Sp-))=_aQO=!iv"rG9")}hJlꪖLO/7iF zz^oi-Gg%9o4tiՅTڏDfUJiYk=I`I\U~Rd:Q>#aUէ<֦G zV5K]T4(iU :ሼY{}G \D9/޺[BJZ$**u\~]>C6=_\IE 'b?k8zYww WK?擡:RՕ-Z]i[ W72IU|XUʂʅjozĿB*ѐNx1AtUT~T IG䚱ΞȄ0NPB:L\_"MJDI5mފ wIW;I"IZVg4zRޣ(_hKŔq-mnK)O#9~:,s{[PeHOzje,&y6h]Aynteǡ6Kv[&=i~eJ]O'ף9/KM hԢV!V "ɪZU#zmDyD#2wTbϦ\WN[z\ƫ[6Y Qv}(/eߩIoѶ=\[7/ZM (?m*X*ؗnH2AP$f?b069Mu;h*od"t(h˕-;ۭM (#X 8)כlGCAYP6WՁOgIt:˓ͧRҦOBWT*vN8Bjj:Z)(q&"z$K8U(?ݡľmP *,tҗ Aʨpl5ZС>AU_M!(\EV3-K|%_.nufeҭNS7jT.ŷ˔)W 1bGL>U޸l]L<;X.iIG5Qg6k[AuRy[ BPp;I42RI"t)zl$Ra|ȇ=AޣE}۹5YKYq>,KMΝ$9KNSka`2 )WﲆX/H,Bc GWe(A79zzOR r{twhVEt)/TCP1nKx#fb1H=E}]) 3M9 K|R J>l߯^v&$Ig7HL>L)V4鋵=47e#oސ|s^  xrD=##NaOE>7+4"BbYI 2f82WP m=s+ h (  h (  h (  h ( [ZtQ=|{lYdUtI 3=k:J^&)ؤColNs1U-[ګ3#i񉒚{=K^w$alZ;͝K,o8ߓV!(U)hZNqT%uY{LI7|S*Typ,tUz%S\kAKRWJiMN_v.is S!9Le%~UZWmAz19Ĕtfl֫<2IFDoK+AGYYWjͬ)h. ?CR^NMT>XQ:'hoנҒ#5(ro~G*˗JX>XQ:* <{Β=4#o%A/3V%UCeV/^앵cz?\v.Kkj'EQ](\FNsFSI|}O^?dF_+% Iϋ=/? AT?h>TKJȎIqgC< APtjP|.B?Ԍ'קEUEԐڠKI _|E970vJCtz~*΄3#e5x7ax;}^nAA(O=7DzY7lZLL҈E~%OSAP*@@ÀU<*@ȫIT h ( 5k@M | z#AԃU.P2eXT kP'ݘ~X̨A-4wڳ1ᰱsG;.}9"tPBYPF'k慠)թAsק{[Pe%tS~ЋLsłhֵI@ŢIS:=+:S9fO ?S>%rЂԹ#/ڠE55z^OGgAt :ܙOD34/+ש5g49AU3տ/Oɟ"ܖ9 輔XS}CPqlmayH s"<`AMc8 U-6 L>׻L78 6{M(*qt:c@rhz(rG%Tw|L(He5wj(glU*1ɍNuTw|"h5N cQx<Tw|bhWEȞ4 qPc*mA"6 :kOMMJUU* :[_P Tj Um7R4\i!:o˄$hJAAymqj`[ڄ!6h s7@Pu%܋7^L6,12T5_L6i>2"L_%7Tw j X2LHa&qP AˑݲT Ay[&$AGSq:xFYkJVig/aa]PF=HNM$m!蔼KfR[jסI%_D AyQЩq\hyd$zi{AxQfkм2_KCP "+Z %hVGAvZS@P "|pA 3u?AmR}w[8P\j)㰓%eOڐTOAɽ Z ]cIdyg< !6a:yRjFI2 >A&oIW;I@PT$1 5 ;I n] S*r+h|mЈC+jC* c-N5Z&>A&8( :WWz[ m  j5 A g\\sZd)O;GAPb (X/ AAA@P4 AAA@P4 AA" "qf,M3 DPw0,= iߘ#'K:34"(aɂ%1r̔P!)/h ?3קtvO&0uzoN0",! U!۳/oXHn]0 DAmAG霍.Xe۞tR0ujeUh_jI7j :騾35 zl:syE=4G/j5hVU{>܋6k vS[E9Yι'W/^XҸ>%jܤ/@//>|4`SVqP7Arw2KfW~4sެ Ҹ+g܋  $^j'D~;vp=ycX= Aߕ Jg:SA7wxAӄ :=e)h9^ymrвc!(NAY#hO QH.Oڄ8(V瓑Ĝ'' VhtM^AK dmt6^ȮB~Wh>ȾMjV :io:Fϵϫ=^-] /6zq#5f%k/Aë s8hn;vCDs hl޵Vh&yvg"T dm]fXL:--/vV[?5I; RJqPO)-²6_.3Ʊߜ7&z.v496y@FyzE;w>Tm:W z N_7"k>mbA/N]f8AA/N\fB^ v_Vk!/ 'B^ $&X珷cm=-Ao ,btjniC}2 yU!(\]=B^ ͽdw[6,oEH_-bVRsaɍݷf}L/hiNs6m/#k)4hfQZeSP&Ƙ.{oD7F2=misغlf8?ݯzWf  h&e* (AC.߿[ <`.܉LUaT󹱏 ?W\%)1ǰCP8uhA)uuA|LRbt ji&6/ lA)Tt]vJI%ڠtjV؋G  ďDuFh{Uq (%0Sq{%KDPZ51^rt5\ R $AIjԏ@II6Qzrq)10SqSRbtp/ͺu沙4U>A@Pt!ybZ0.TBg*JcLatv1 DNͺu沙@`&TA҅i^Ra&ws:VA}4 (%0u΁  h (5ƱVX=OA§OBP5OL j5OL fT~'`%Yo,_CPKN  f5AAAc=@P@@P??j:?5 :1wN@ԬRYom h@ ' A{8XAj:#C/'5Rb} 3&(vQ$8^vsEP9l=*$C!tjT_ֵGi2Q ZD24 RPK9wI'A'A-%⠓TPV7ݺX hM曧o'L/FA(Nk<9aϵ#c,^U~[)u?'ArEPQfנ"PO~KLRPz$3f_i A RP%=ALZJl05QsX⠓Zo$Ak"4^9 ʀ]ԉ亩3S]z;I&#,rx8Yqpz 5(LO2 AI,xD(hAL)^~&VA v!jj`2 hAMө^vTc'# tjPPj z% Am&A9u h j O`2 hj=1:?5 8Y<1-f t2RbZ-](rQeza@P@@P??j:?j:?hA`}Fq>>l.FQBGKsͪf:y[bA84ONj-3ŚF Z NAm4;bקtśT6ϴZM ԈŇ2;]ݰG2Ow4L7ۂʱhTOjH|xݹKavK)ULF/I:AaH'un5, HمrV `*r.APG%4M=@Pк ](vlqA&[.@qxc@2jڱU DڱU ڱU D؋ڱU D(h1ccDT':Վs Zh %.9&[%^AtLcE{cD+(8 ( 5=v< j\?v<qв:aw^+>{xRqt \pe=[L]N2$e$AP_,#h${M6Rp.(:I,#],4 $ N AA/iju\ $ \~aDU2;ԠcBp0` e-rZHd5 V AAAAA@P4 AAA@P4 AAA@P43Mo]8)H#l S0/Jl.OBTz= ACTzAk h?ٲdI\$|yz}6IMR)<^%(=קEF^Ȓg2$Bi[̼T~Jl.O, dyJWo]>9A~&w[sgķˢv p6YxuU *gzpSڃw<-4P\?IoLͬpynPѼ0XvC+4m־6E 48}G%&iR%M3qA]sipr3S݅Ap,A?}]V\&j*M[E 4u-)Ww\MV?O K+C6/vv e,*oe7$doƴz^dUᇛj^l#NZ c Iߤ*=ژAB#)"[<-hzrcpYDs iɮdosOR.s;ڳpe {5ŷK;.<TUT(S~Qnzϋs;^qE {j\b>}? PTBAP A5* (AA4 AP hD  (AA4 AP hD  (A NQ۫$\8MPA,̑;'h4Z"系[2*Z%zHՓ 8}>Yݗ2 zFaZ0ʲ45_tyv6xJ^mG2( / d .$hS9a*}afU]j퇢F,7 h2{r]S'oZ.׷KBjV hĒϷaCwI{Ơ{?SO5F"8=E]n#xּdpg.o?wlmɟ x3 d A:\էʢ9ѳ[SuZZЧς f:%MdO 'tA탠A@P hD mj]aS +7epfLuE7THC₠Fݸ^zgn"{6TH}_8g܂~uyOAHt߂i(?OӫɴyyqC "{6TH}>үI, dzvb}x1@ML9"{6TH}|rbr,=FnVQ *E*y&4٘[P"AT`5U^_?WT U6ڋhTEgw W$٘[P"Ab#~ M$&*6D3ER]Wdt݂ O^"ho?eAzxAӊ٘[P"A6"hOwuƤO2]c4*9ruiElL-\ yJC|Fj69!ZVdt܂ RAG/҇]MuDWZz!+gc:nA)NC5&ژ<ͨ6!PP AP hD  (Ʃft fu#] mj|'xAP A m 6zj= fٻRC hE0-1Lr=;6C%ZJSɺAm3`W#hE0-1h1$VC'߾Z'd݁2j̗Z AK \b<㍠P@袜uf균#hE0-@Pp- 6zjAg[C'KW"4yRr)2lBЊ`Zb䋝W@#hE0-1XB"*yT@P |S'd݁Z"1VCtچKAЃ@P <=Η{B",2aQA+!hGs Zz 4VCu?UV|C0-1t m%;M/u.J<"Zn_=VCAAm3x:!haGsk %N͋!hE0-1trA!hJRC0-jI8yH7SS0-AGA myj= AAmAЃ@P A m&?~z݌bpW~29@ AP hDy+Z<`Cc i:y!V7;wzE=FV&Fk}A߄ȀÝ!(61hE }eVSIڅ!ƼǎA,|#7"hC%#(61LEAJ"6h"6hz/h[bEfN#(61h&R'vbK A/kA,y M zOvj̗*]MSr}x/ϋ6m^ ͮ$.(-/#D<+IPl$Wg,b%j}&MncAG-hV5'/MK/T񻗯iӢŤhaEP\Фc"Ǻ\Ҋnj5tF.^o/L[mEeێBmGX"DF.h7_1LyUɕ$=cV^u6)?Q϶d&>~ơh [wD]*cM+IAk[p%I:4iv6f^˄AГpr>!mNE w4V_P5sАn&x hmrO FG=>{6﩯HncA.htp>MncA-~hicr; :jA774Yk$c{ c4ipQ z(6GՔAJ򟦧n=9qm^ |@2KEAe2~A'c{ #L/Ox_QNncA=Kz8V#,{9(Fx=>jA%2~A'c{ 4Gsr; :jAgIh=;mh#!H<4 RAi$!i$eẅ́~%gvVԂ#Lz8N&x&To*^ l_ܾmG{nXPiێwF C/ʃhJ ;=LfjQD* omQD* Om?-Av#0eG= #AG͒#,t$9ğ^7&GPY"j&M"0|4 TgW OXAP#9A=9ķxAeB8)9Ar%F8~xGK@P#9A* 5#,HFwzh38Aɏjzn92<4 ~ӔKG'DEou>fA09<9:|t$\1xዠq|nE?LA,| * Om| 69Aۻ[j AضHFЖx'oyu{8~ AU;)KGT>9#AoѢ A፠oɏYEPo82<4O:;yvdž'gIyr#G75r[FEiT4qMs{#0|46,7wT/_4ć;WTZAዠыOR~ӊ/?+Eаf՘/MBPax"z%9 }}~7ЇUc!0<4^E* /}7?}~[x+IGnoA* UcʼnS'n_]t^_zT>r:C|4,"0|Tk)ȸsI+^0^Y zTjc AݻD8=<t} x"h,ێv* O]ς&̫/o= ՛*^䅠Fbϻ:T2aZ}E[j0/UTWw( AӦQm兠FЖoJBPa AeFrTW!~cG T>)9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9A* 5#,H@P#9Aࡿ+GxpsKzWer&D  (F|+Ղ lK[ 顋iM ZàuZ 6:AAM几NAЦCP hS!S)T~m*?u 6:AAM几NAЦCP hS!S)T~m*?u 6:AAM几NAЦCP hS!S)T~m*?u 6:AAM几NCLDurSH޲)^v,?ugW[: AEE几NBxn几ND)*[: Ai$A롑Fc*;A1Zx h=%_O>7[: Ai$A롑ApGcgwx5-6At/EE&]}t'oY~/UW֏A,"iMᒇ%_Az1Z +oeT 'hM cu փAz1Z:AH` Azw փAz1Z:AAP h=A cu փAz1Z:AAP h=A cu փAz1Z:AAP h=A cu փAz1Z:AAP h=nՂuZtOE29@ AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4TJL֕IENDB`vcdExtra/vignettes/fig/tut05-cdplot1-1.png0000644000176200001440000001042014470742306020004 0ustar liggesusersPNG  IHDRJ NlPLTE:f:::f::::f::::MMMff:ff:f::fffېffېې:fېTd pHYsodJIDATx흋zF4δO^2Rx,.6$@_3Ic-m ]@Sľ" CP08 CP08 CP08 CP08 CP08 CP08 CP08 CP08 CP08 CP08 CP08 CP08 CP08 CP08 CP08 CP08 CP0_oEˏEUpDt+CP08 CP08 CP08\J Λ#S`d~)P~).2`Ln~)`@&~)bꗂ`,|9+)~x?L)X%rD.v`a V X=rXHe,G _,( )X]Q+XP0E/ϊ^A(X\jR?JucBD^fWPۉتWrC7Al_*`-* |E~߳% O1s md۬ E{d\aKa]j4kFoÆR̫/GGZtpFNnU2W0RlMYH[Vaœ:wu6AaDHFu6)8V)' AM=O ͦxlUWkmpWPpJG)__{fx4j86%8tA*SIpZM5 QARKd*r^ː)XqxrE-׶(xTڷ_oa sk56xt2!k~_+)_ |ؠ%sk/8)VIW6H/#; fl$1̢=__6_,!ر,8AVi N&YH/#IߝƱ(8vKW{tHp~]7ɰ" ]>M_Ӥ 8}4iL~]::$;a? ZqW/c[`/h-'-xEXVM8'xQAo_W'*σX Ɯ̢u|; i_8ݑ'&Y#.-*"x b^*Y^$kw'Ԇ6|x_`&Y#fnxxfܯ^I,/KU4gUޙ+lI_o`o~ł)X )W2γ <W$YE;ݯ@Sc~Wlsv!w}[Ok[^gHO> /g|K'_Þ_o”'<~x bu[Gajb`5 .LCV,uw(Dg@{vauq.M嗢kxd&xw$d~~_=e%Y$iN`RNL]\tg[!xZ\O;j8ޥeΧXyE 6,V]iH\+ @ob~m҂>jH‚a83 nx\&+Xj[Am%Mr)XtŬ$y?ؗYe~Sh(cKCZ 8+i A$8.v~ @[[)3֍dG*{G6dG!8V; g^u 8_=l}G?E~ !.[p ~ -8ٶ5WpJ~'7.ިcv7wLA&8}kB րBPR Ǭ ذ6Oުmw3:!іsFW{s9o]qo Dz:be#k:bm`zm8.oD'-^g讒zE0.[ Nn0iF#) U2iw mD;zpJ C7 K,iw/ W~D;zÐO?zE0ג<S5Y^{2u!}D$.z]I[0:`>WAi _?$*/LHR0#A}""x,0vY0zE,>û8oa6)üzO\w1L*m0J MjSiM.f,C*`" _1DǏYSskG.zx/gႵs$I}+S`,J@ #++p|[f(ճ<,%zC3 X߯ho(emƏ茎k3|6OȢp&YL,u)8Ѧ~G!Z?pO·GM앳ˌ?]a3)tv}3kkiq%B1K~RE6wv/z]-KDn`oyEEK+PB1a` DK{_p=ifd`u|0/]`V*us DK ;RoQffTLcI^L^3Yq%B1K~ fc#Y?޵w)Kngo*gm-(&]FyRyr7i_ᔻIL݆*e&{.g[2` 6?KZfaC/v|NYy$->|F1BJ7·B$.=E=M`p( `p jyt*{srϧ~$ȥzp V3lmnnԚ]*&vC5uV7lww-fXXLwo]uࡘmܭr/WTgX̭"p~W}W:}LV 0p^Ŕ6yύk1}H]^LLuQt-u;*1%حY5ݴcTXΰ's1M*#wQ\#n޽3U`unP{*+ձt/g]E7GGTCG:r8T3ڜqzCQ:w|MûwPȹtޗ/:ϰtӻ.wMޫ UnꮻZέVvp `p( `p( `p( `p( `p( `p( 91rT?~*_)XTIm(_jf5,⠂ժn_Q]߶SaqPem߸a>.Rm^)a> P}0Pm7&L @ 0@` &L @ 0@` &L @ 0@` &L @ 0@` &L @ 0@` &L @ 0@` &L @ 0@` &L @ 0@` &L @ 0@` 1G/!D8 ,*ŴK@Ρ-_N򶦂^:>To*&Pj~}xo.o@S{@KURTF/UZ^5Pj**0hz,s9G/B/UY[[@KKUTy|; B4zJ aah)zJ &@L$.F/=KA Tƛ,kk**> D4@/@KU KUvOH\^EtʠHsyemMTek6 fp=F*= k#@4 TⵞbzJ? C zJ?G zM.G4z@FRQDB/B/UQ0naKKUTẙĥ* oe&z)zJ#{#PTgO"rR *;H,2@iR1 T T:YPfG/Ut@KUv?栗p TO@iR/*^K("z)zJ cL$.F/UWyЀ^Ul_^r5sX(^*f]JYX^JIDR@/B/B/B/B/B/B/B/B/B/B/B/B/B/B/B/B/B/B/Us F/UV^ǷTwi l6z(^D">!PTUX}}iTGE$z$J?q@MGJjDhVzb[%"PT# \*ݼM@iR}z&E\+ݜ0jDAwMāsD/URߟG~~6>B> "@s(^TIax@RMs?] }-G/Ur"1c ԵCmC/ըM.s@7y| EA4zJ?LE zJ/@ z.(-lzhJEK@KUV~"@KU~BK0aMRRm1[KE T@K(zr^*r^x R_0V"R_dFsE*&P԰*(зETerLRR=h z >`4Tڪy@iR1 T T T T T T T T T T T T T T T TJ7͝9J\@S w 4\^WkN  j TsC$4%A( M5@Ih9J@S!Pj TsC$4%A( M5@Ih9J@S!Pj TsC$4%q":T=ՠ0jDž@c8VrC3zBDž@cg޿FF-!P`U@ F/Cs))rH%GP{>uG5. P!s=x4^J#Pq\t7{ 'ǷR'[4U(c|aWvR|z:|K!TƷ}o>Ȁ^]C@KYa!T{ޮVA/UZTٗCA/~Do}eWA/B/Ƿz|^D?6o(^aq|>YKDbuKRD:Zj%dF/:K&hzr^*r^죰<(zr@ 栗j[XXQZ:H\T@K}&hMR1 s^*\*@aSWe^uLUxOVz/LԬ{Lښ h]v/P 5/(,kk*{즈@ؽ@QZ4~G^>@ih:kk*P5/аCY[Sa0^`s{N ]Oa]{ Ưuؽ@{(@ih:kk*_=Q*^q$4dh]v/+e}lk/ 3E^롻{^ D."z]v/oe@ih:kk* Tsy/uTط@d4} d Tsy/uT@^ꬭ@SY[S@mua=S*'f T Dۺnt@"6<@"~HA>~Cs- T/^B[Xln.ml3G/Ռa<=zL$" zF%[K^^SI:KUT<!PT% yVj׻"PT% ="H#PT[@@m?=6]!V(ša(K\&Sn`~J&խ(a>*0t?Qy˟MջHl$dR]k'/7fdx5 T0L @ 0@` &L @ 0@` &L @ 0@` &L @I%~ih~mhm!A ǧa'~\v  l3lmviˏ!MQ{˿\^7 =MtҰ; sdE|?@E 5}=es(Fa@1>*`O]U_nd0b͇47E= &L @ 0@` &L @ 0@` &L ?yGKIENDB`vcdExtra/vignettes/fig/mobility-qsymm-1.png0000644000176200001440000001705414422306403020454 0ustar liggesusersPNG  IHDR@e$uPLTE:f:::f:ffff::::f:::::::f:::ff:f:f:::ff:fff:f::f:ffff:fffffffffff:::ff:fffې۶ېff:f:ff۶ې۶۶۶ې:ېf۶f۶۶۶ې۶䕥fې۶׎. pHYsodIDATx흍yAY^l^&mSgxک9i#-?2G "y~?I/wc"+ (x (x (x (x͈}AE}s3`4:׾B q;2\lhk{3A?2&뿴/jz72zz2"BОDлMUbӕf$Ϋ+8,mBG}(Sm|7Tti'y3C >~&#ɗȺ7V)k9g[RУ״FPfuH?G٨ӆ73 őaAk-{9Z\Z&_/T-[i1Uv!ڲ7Za2!hqs)N$mb/iUۘT|%*o޿~!h IjXJ/~Q+3EQy62ZS5rՇ -h4A|AW}AJ4Ǫ濖Lv(r\6&=<;J7Y>ĩwZ2r%X3IMAJ&qUEqz/G}'۞/fBT.hq|le䜮X\|z&4 gBPyq?f;OEz̿izRc_hMrIzGA{gIKtbW3U3!/ݟEUGũ< t'5s"fY-Hр݈!2;<AkAkAkAkAkAkAkAkAkTAܹɛfup<}Ҍ(l@,BН@8߼ߘ#LQZCN+']=g 4OFg7ɻo/W>$ >),l<&_d3(}vΫt״Lμ({֓bֱbbʇcDy"4uRM=?HbEC{JA?Dⵣ7럒cmi ACKb:ϞeCE&y{,St[@M?L,cRŜULً*?Td6gEЭi@5N.ZۤM}[P1R>t'Fb!aypX1'}苛=-~޼CO1?":/=<EMNód3oWz囓*x5 d W^:rjN4V > )蓧r!ir${t% H<ʣ8j5 (x (xXg2yXL)γ37f}WycoAÊtN.xԩsS/6Eٲ1iEڃl,q zu?})~8ߊ4oL-hX jw:d>m ȖM+"h/(, &n{?FAţɟ䵻܋BQX׀^-{ V=H?YnBG)an,iXTHwٲ1iEڃ̫ < Pe1 vH2Aˣy֘$6݀7wEmL-h\ ˋS$.<?>^4kA{Ȗ+xA5!D'5)pR״˙u|;(s50Q/ߊlۘ[м"mAj-hz?"YHw5Ze^yv_ٲ1=yEڂԎA's1jz M2ͨ6f7ژ!PP ^5 ^5 ^5 ^TѺ=a5+?Up!Wd pAP ^ it/4j 5 JB)թSA\>r3ZAMc+y1uj-ZAMc1A|L/څêer3ŵfrљA\̾㍠څH>袞Lq-|tE.8jAAMc%xyE.&OZRNu"h/l&_M]XLx4"h֒7]څLnAPXJq))E4o^)FP톁@Pps/4j{|}$A kɗ ]J0h1]XR6Kz-A`4=չDPͱzE. 5 xv3&C^XNϪ!(va;9 r& A{aj:I{|0@Pps/4j 5 AAM{Aн@PӸM~h݌ְܕ*L (x (xMxk1`CFYGzȵpsqDshFY {T" NYmf"9YD3 $DյfBt->mA5-@*ϋGP=h&XAՔNE43_A,0]FDnմfBt5<A5-]4>.Au-LE4[#E4yrճf)^kնfd^;#L&-):M,P]Mi*BA- m A5-U?erճftS&GP=h&/,AEm A}dTN&3WP lXKt.N-wF.Vۯ_U^[}Eg>2~AJkY\#ZmXGF.hWɧ?ێDW%p&Jd͌]Zǿ}T7Y3c4Ѷ9o,{-x%U EP @i:wش8 ha $MD>|;keBP AУb5ݼ ]Ț A":mNȚbNA 3yOϓ}|"h>]F#k&AS~VoHn΂X0;çYˑ5'Yˑ5MS*Yˑ53~A>ѯ~Y2qȚ APunmYˑ53~AYzjJ j%f/GvM4.@}4pQ ͛8>A #I@=$AM& DН3jPuA=#AwGPHAKFd|љ|` }I:Nn`g@ePAP%"!Tܓ]y!ItLzH?]A d9th#zi [%DPC |` $XB5*A J%DPC#\AMG1O=Ϣϯ_rDPq<:b#A'&Lpp"z98Bt03y$!ItKAлhɎO@PDpA kZ$A lwH>j`q___w©AKDݓ,!" APzF04~EPO7}#A˟Dл<䖏#AtY|sUZ@O{Pyܖ?O j%DPC"z}KL|zF86PfGPEG)o#g"bљwg񟓃З~ g#i;@]QEAz{}nvrg-.AKAPܷ_JX;8EPDqAEm A=#AN>}W]|<׊,"g NRL6Kz|BФ|yǠ@ V??M14F(nn3"!,Y}&A.Is BP Fb˳:gTܿi1(L(㎣gB4痯^EP J ۮEPAдkjgy!g#OJBP@P%"!TI>jU,!"A__(ă# A`AP%"!BW%DPC |` $XB5*A J%DPC |` $XB5*A J%DPC |` $XB5*A J%DPC |` $XB5*A J%DPC |` $XB5*A J%DPC |` $XB5*A J%DPC |` $XB5*A J%DPC |` $XB5*A J%DPC |` $XB5*A J%DPC |CGzܾӼ+29@ ^5 ^5w<> 8C~UNA K8,A;.A K8,A;.A K8,A;.A K8,A;.A K8,A;.A K8,A;.A K8,$ ADTGA]f]. BлA]!AP!zf]. D)&{A]tIr C'1ڎ]|D' msAv8u C/1ڎg| KN:IAv1W;A۱|~fq^MyE.uN7EE.l>Lw-$Ař//TA۱|U{''_;㠎Av1ڎ䍗2TPE.1:AAP h;mAX4*z:Av1:AAP h;mA cu Av1:AAP h;mA cu Av1:AAP h;mA cu Av1:AAP h;mm;GQ;> 8C~U.t5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^5 ^=wwLIENDB`vcdExtra/vignettes/fig/tut04-housetasks-mos1-1.png0000644000176200001440000001731514470742315021515 0ustar liggesusersPNG  IHDR@e$uPLTE:f&:f:ffff$$_::::f::::::::ff:f:f:::>DJoff:fff:f::ffffffffffpDp↨::::ff:fې۶ېff::ff۶ې۶۶?jې:۶f۶ې۶䕥fې۶{߿ pHYsodeIDATx ]Fuo٥/-[.-PZ.n6]A"YN9ql";KϿS HY|6&@ߤ,j\`h+jCs'YA7IuGO^ Us_ ݖE@6s U#YAK+S9Z<*Gn}5c}V!CRj|l  "AAIP||nڅJ%b96~ڵcI ?>WCO5tWQwO7Gps28MRn?||!~TkU}?z8{KBn~cÏu~~fTk]`c^8]}`G gsv5toT{75n҅$%Aapnt樶2l^Wcx$r §Gfkj_uzZ?cKImΛoXOex#A땠7O vP;urLӑ_#^$kJӗ&fz\:׸7 WafVGĉڈOSug3+p 꿎t>ʈ:z>io>"Gs.h_Wu|jG_[0՟͆UmA;*N'UG/~z-##".hOWuX1G/Edqt7J Zß>>2Z?-2iA_?}̻GOWPP )UjTN~\'ڨڅ۳t E'ʂ~^c6)IQݶԝfteI-c&S}Vu3n2T4~-+j_ AޛᏜf?'$y)hs5CpC }lWi(lq ~tt4]|g ߟgiͶ(&=1q5?⬐ҝEv6[k!' }5O/_+yU/,lO\"&o=* vy{[_QTX<-A]6 ˝=̦Ơ'C_ 53ZELPM@}?D0i&hch:?\`hEgIPOVͶJ%}2X4P^P߬ w䢘  D?S}R(v{0}D5󻀄Z^]\+*Ep\k^e04Yd'̥7=chj{{8A= Zv=i?Dxk'ѩ\髋O.R} ?[M44 (J* "4 (J3 i֑K+eWFqyfЂK. qsmsg3>Gsds|VQY؞c_"ƹt"hHjqgf,sۃGONU44]ւMx_r=揆7+[ceԉ:iAlvD"Z'z=~i;C |{Cj:ʥw\OGiǪVeAsETEPiAAPATEPiAAPAT,U{P (H 43rūx*Xi]+DW-"=u赚APASTuOAPQ=AEAPASTEA} @4É4$FЌttqK‚n.q=tmC_ ,h)V]ҤM eh AAYPhVw|fDva՗&A3BYBP‚&I]=")fBZKt咺mbtW1z( zsKꞂ { )* (Y zy0`MAAiQ$]7j?iK.ƫ] z~)ߝ(k)CH u{ A#1^4zCH u{ A#1/aDP kd/" kd/" k^ĊT}i;^R=PfZ+K"{EWKFBVPmWWًeWto)CH :;@׼8WQ?}X{ A#*h3Ivb"1u+{EWKFBT&{m_ta!h$}Qtֽi;{EWKFBN-Ec n!h$=@FAc,uq潩#G&ZKW 4 d3I B2y 6Z2iT64$/^* "4 (J* "4 (J* &+hcK+܌wMWZ8 &.̐3[8 . {xtfj+e?qN y}3;qM]PXݮFVsǗVPxx-ox4yAmOneh뒖]z ~qw4>07mNW>n7;qM^:a~YY $, F3W҃MV{w0t'.4!rIX-vOPf鑉:[ڊ]{>M;&+EtWJ}GcPWvuKCA5x#hkiw4 (J* "4zׂ&@2y 6ZiAPF{tz흉M*].'uqޚҽ,3!n A@vK*[BP9ʁTmr$,θ|%#eA}STmr eKXVwws9j\AG:@y;$7䆲P#A]Ki!"{;Ƶr,Ŀ!/DPkACTAH㖠Ų^nA!7AmAv" jGe;}#7A]YLZ469mWz AMP3}pgܽuP׿ *H‚ʁTmr h%A-!n)f@3Q (H 4 ĝ77L%abkC%NXL iAPAAiAPAAiAPFHPbh-۸:ئ;A`Kw4ҸhnOP4iMJ;V<Ѕ h˨G(oee.AZ&q3˨G(Уs7sAoc팠.R{?z:Q9 %hӃ:!myǠe}_>>˨G<;A6'N7.&c=hZ]쭟ȨG\fV&zP掖44}56,9^A ]A-!h*j|_ו jblwg&>~e;+} hJ$ h-mϟ/~Y_^^ u]}A3YUϷFIGPâݣugVC=聖4!{BI~Yk A"A2TzP[|MU-| ;\ܠ$#hSW]ig:4%%MM67(mnP4ܠ h$AAHls A = Z$gğOh-\4 (H MY9S?à&^K_[[h+^,皤  (J* "4 (J* "4 (J* "4 (J#$T8J (J#)hS]ΥoU{͓-Bm j(_2u@l̈́hVEA_V/|]tE|Uu=go;'˵j5e S6jU,NޗEAsBIPu2i՜uP'hABHЋY}fDʂ. A3"]A* "4 (J* "4 (JVyugxDk APAAifǦ'ﻱxkwrMkBiAAPATEPiAAPATEPiAAPfA7sYx%xAЌHP'8vA4{l/parr1YJsntЮE%͇-K߮+ԔYAc~O^Ԣ{@Tݫ+Whju_\9Sd4٬zQ_أ++ij&/αwWuK jWP#h AEZt[B|JOFнrۗ^0wah%h)A_=%͇.ѶAm]ӂs:ZB|LPcP[W A:=h AETEPi$5lZtW=惤* "4 (J* "4Y zcnl.!6ZiAPAA$fwK&^KC{^nI*7&Mwm4L}ke]i/I/bR{WxhA얖 ~aw.8GRN't"h0mɄ>~崎Wy'',-S hߜRsH/wi8CIz>ԃJXZ>@о9{ə^fz9={c C7W|Թ'h{D2 'Mwm4L}k,l聯MAAiAPfYO,6Zz.ؽ z (H 4 (H 4 (H 4‚n#C/ZBdԆ2iOɢ/vSK,/6sbn&&co~߾+jj A兠E.F4svzZ9lC.sˢZ,M/inv`<&n?[ u?BL~==h(!Ƃ.vKMVhƠ!?I*͔wuQǽZ~~K,Lv#7{;A"h jfnT]˽oeKzmN:hif;f'&]Եg.l AAAiʖ4YAPAAiAPAA#&ZKW 4 (H3,ePlK( D-46 hrTUsf=Ll]Gs%}:u[św(}8tp-kٝsA}vJwƳV0ᵾǮG])p ZЭPO/μvFU9ߎBZQ57 K} Zz{У҃Fg}1]cE=m{DоoAy)[nTh;AGuuuкGscOPr{0M>=qA흮*QTugV R; Ac( ^46Y _KPz ` ,.b:/B/"h( t\LB"h( %o?{kǠfF'h[MeA}U#rW')\-&:3t-&>y[MD]L ` 4@-&@^E;J* "4Y zQލ䏊MAAiAPAAiAPAAiAPAA\x]}5 &.p-zbd.)_"hh&*)핂ۛ44zqup_ߩ:R#g(av]}(~AЛ0Vno>>7&F[:z渾?v^d7?):CV|6ŨĨ-aNm2&Jo&D[tPpzqaT4OK4+M6v}KPƠ@Y(^کNzOz#z#:Ou (H 4 (H 4 (H 4 (H 4 (H 4 (H 4 (H 4 (H 4_TpyIENDB`vcdExtra/vignettes/fig/tut04-housetasks-mos2-1.png0000644000176200001440000001733014470742316021514 0ustar liggesusersPNG  IHDR@e$uPLTE:f :f:Xffffff 2::::f::::::::ff:f:f:::Joff:fff:f::fffffffffffff::::ff:fېې۶ېfff::ff۶ې۶۶?jې:۶f۶ې۶䕥fې۶Z pHYsodgIDATx Y KoȡKHI9K9 " 6jd[9@s"[3~_{+;dn') !R|6K!t9N04]?K04]'ٖ0/EV340'A<4r=[DEMGs0$]}9ٶPCoQjRIԴ)]M'v<{xieFNzkr1fnGN12]^ʮPhf^t/@xe ۷q1U͍p#Htj^VS֝uTTBHtXfڦwm>AHTUC?nr*!iÍcVǴ4WGcA( ݹ\ͷj_Q:rXM{!hQM.m՟z UTchuS,jF1F[,a=׆n1tPèf O~G\Pk)m_4QБZC'U'ZdFC Z[XpeyM5{sQnzC -膅uj:jiѭ6 U#=*<0n<pM}ߠxM1zxo -hmaSw(H]Y%* [cUǨI‘n ?l]%M 嫗wou4ܪITZ,yxy/ɤ}hKL`.O7QYM~\`}*Iez: zb,w]w=WJ7 ?텁]WFtV߹ =Yޙp2+Ʋ_f/(>N6Kz2ϛ#S֟e(Ud/2 1[jTH?Fq`^8.sNWhyoY҅AT&:E]kb<". ;+/¦wyٮc]ZJ*C-]lޫܗBHp-ä`x^2Z;[yM/ BWR?.GKFn|OPglIײidףmC =jT8:,g]m /?}gfzg.Ff)s$WTmw:d5r4W}f::XCծ4>5qo&Px$YC[!z~w[Vspx5f =m EЍi2) ;(-z~XhMk@5 L\I6A.-9 .0qAs5|_l /z>1W.e?O0a);vM DN(0/iw|dZUIW"KP"Ac"2AFEd4AE*EP (AT4AP "hAE*A'FЈ@P "hD gv[TP-VOo8xL/!Ua*Sm𗔂}>*Qƃ@Au*Dl&&1UNLޙA#B\5鍠[%WjSGՆU%!h45u&'"hDTK[5'A#B\7Y jY%z{b A#B{t]*AHތ}_62F$AMDv.R=ϔMkAyP#54" ڙ+WO! A#"\Awc4@IAP "hAE*EP (AT4RSwٹş6J8 (AA4IzG]Xt6JlO9*EP (AT4AP "hAE*EP (fAs<'/ s4"Ԉ&m.a`"(fAGzJƇͦX,Vw,q$͵oB{zÊXH Ig9lm[@.{V[nl|XmlԛzFfݒ4T]CjW67K- Aa(A/JЍOAwKBxJZ q0EԌӻs%!h< &>A<=q"*EPшT}sog|$^Z"hAE*EP (AT4Q zac]𧍷AA4 AP hOxƟ6J:cBCO& P<A)*5OAP y E_PKGИ)μ[G疄 W"?o1F`AUBN;I૧7:g$\:J_M{0 hDH4noIӉ$ 5w: |A#Bu%Ao6$W Z$EЈ,h^o)h%rջ#)W;VIzH4<A)*5OAP y %JA!Wx*hi$3@P hD }>i7"+_}sL}W.?@P "hAE*EP (AT4AP@뀰L'.n;#hD -hZ L۳0A#Br4~:mxigV,XP"l4"D \'S\_C j+UeH̪Jm2=Ђ4OLhQ'9M~=h E!hOV}u zIwJp$g A=1wzCPO yg A=N<:ޔ?mp AP hD(?}W0 C!+zq|hrS{_I;Qt7D!oӉBP hAN' A}NfhAWĬ;!Ԅ;ԤE=tXTnĈPNfXA&T-xWQv7o0S4I n@a @s6R3K}jܝ܀=rdX:no᥾a5>tlc^60ڔ;ml&&TxT)AmA3m jl܈kjЃRzFySnAӪV}ж^8<4(mA{5_u*͚jtl'FyϜ) h͍ܺ]{ h > MvKމBP߄*wA>A+tAN' A}N&JAa7Oo%hD ф>.SWO"|ʞNE AA յW2]6JZkWG0sg$?n6md8A2idsռA7N-,r!(Ʃ%6N,{L=,iLzezmN,n[Q"L8M̒Q;"hO,hV-h%0kA{BAy (ȅM;~N }}շiWNf2&:&G>9k~kt5"vKZk振:7mσf*/% -5@k"W_ICt +ih^]%Ewcy)x+ D  ((;"<4 ^񭍿*nl`= '  (AA4 AP hD  (AA4]MMR[Mgͪnѥ$6@Į_B'GS͓$xsF jcu]6jS@b?@_( ՂR*[Eۭ3; E(%e=W)&U9aBG0) Ac@fo4uhͫ6^5A@ jTfUr6j>hȜf[ֱ';O{਎.~5'Ac@;x=pOL cvKB;Z~Wyǹ4  ڑ嫗#h+hv`n]@.+PP=$ (AA4 AP h"4!>|k$3@P hD Fs}2^Wo;S㞘>EP h$KBPq h$KBPq h$Gvkα9$GȂ[Eܽ$KBPq jWSiYy`dh*xU1aDe.ǓPF=#A'&I z|(AvoձyuPclDPmzGȂn֠V*DդԡzGl։~;aAu"}u郶+=*6,UTxwE[ʨGPq&hHߌ~ݛQX+@vI*m@vI*m@vI>sq'Eo%hD 7 +G޺?m܆3^IGvK^A @`/W4{ ^n hr{A +u/l$  ^n hr{EuLӘިIˎ4 'WM\9* A"AMrgwzӧ+i詽ħ}ӛ3EA@P "hAE*EP (AT4A`M\uR+PFICApUrzӧx+ D  (fQo̷6Jڜhz:КEPhD  (AA4 AP hD gM){%!hToɳR˶YA폋JB`4DAMmfc*#T'{4\6m5קK Ur)4=jBlKu?"_,!tZ^d/Jrgך4XD7f?6嘆{yI^C4\.ǹ]@5[>*5eqKP#4}?͔!QQ-J*l"^P\m郖z̓mA.CtAW*TeZ k=;!hmN3x+ D  (AA4 AP hD  (AA4z J*5)j_N/l6@ЋH㨲;R6ѵ AA/b9.4iY*9ߒG/~<zZU3>+ ծ/ϓY^_&Zd'pzY2*}3ἮO> e2~:4u @P[?*Χgg2ԾPjӴ J'BRn( BBr#aO6-fD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 APq3IENDB`vcdExtra/vignettes/fig/tut05-spine1-2.png0000644000176200001440000001246414470742306017650 0ustar liggesusersPNG  IHDR@@RD)iPLTE:f:f::::f::::MMMff:ff:f::fffېffېې:fېG pHYsodqIDATx wE'!I#VI>3<5#U@bp @ @ A!B @ @ A!B @ @ A!B @ @ A!B @ @ A!B @ @ A!B @ @ A!B @ @ A!B @ @ A!B sE {TE@0xġ%W2~$ªV[&!*j!.ª*j!.ª*j!.ª*j /Nv4U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\U-UXB ]\Uyڮ>i3Ǫ{xn~{;Ū}>4 Z ]'@XZ`f4ZK]Ph2VƷS`Uu ]\U-UXZ~FXZ`}V>oW!8V7%@SBb# XZj T/o@XZj!1vvZv@|hZ *uE"bUk+achZ U%U4Zª"eaUŪ@ Z]T@m=4Z %U@ Xb@ Z t8qUZ?SDŪ"sT]khZm@zrYZǜ3jVyCXJ <a,Sgz; ¦cU+%Pwj@3z4cU+%P7K|YAqju}rGs:! XJ큺qT%oEyFB ƹy{s)K`Go4 -$/Dho 7{ 4x|Cn^c>8y|Cn^zCQn_]w~B[k˽U¦-J>^Bqn^ׁ1i 48 K@ܼ@ =M SBF桮Xgu tj LML#8VޙzUh'?ZߙzUh'?ZjcUk1/ bUkYemlHuXmty[Z1PcUK9 gh@9X (!PV4{hZܕh2V+1PZ QvS_iB"5FH aUkUz<@YX՚]vYXZ\P@ Z y\XZK OǪn!zXZ`z"q2V*j1UXb:.ª*j+DXZjN)U:o7@\UeDk:hZT%_U%*EDê}&oB,j-qE"ư5@Ī%R@`UD]\U-Ne*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ª*j!.ªVZ(ZIڞ* Z)>^\|-U@)DEGsÆ?K tBI\U)hC^t %qV-5؋z$ªVJŦ{Y/UXJ B^t %qVeNe\EgPWaU+-9op9hZ&|7B @ @ A!B @ @ ąx-vEn7^;UsԸSʠV %\2}ƿm(S[RvS*o]|Fsr2xscNo 8p7ĸg4+2"NqQZ웓~ ?qK\Rzo9~cf }{/= &ET]YZ7 e.(PU:#TW'8zu_mʾA>GLYGGZ"^rU1/*k sdK)ZAŏȋ?º7yzq9Vtt Z:!Vd'8H՟hH6# lv3k9]OƟߙʪV)LaкщūXgw{@ڽʢb A!B @ @ A!B @ @ A!BKu폈pIENDB`vcdExtra/vignettes/fig/mobility-mosaic1-1.png0000644000176200001440000001501414470742313020643 0ustar liggesusersPNG  IHDR@e$uPLTE:f:::f:ffff::::f:::::::f:::ff:f:f:::Joff:fff:f::f:ffff:fffffffffff:::ff:fffې۶ېff::f۶ې۶۶۶?jې:ېf۶f۶۶۶۶䕥fې۶y pHYsodIDATx {ƕ@AUڊ4myݨlnvNUIEŃ" r`39lS^`x03l ,| (AA4 AP hD  (AA4 Aގѥ4Ꭾ,Km3*:ı'h1T mQ˛ٙŹ-jAAmQSg,;~Zi=zeݔ,eGwpWe/_+qųLSv=~AYvUjVG95۫VߺjlP |%]y%hGXmjM~~qzS|uWz:L\:?~ MVwz|k=J+tyyL,ۘV}^48)_}|s񷯲mA ҒƔjLcN5fVMۂVz3 4 hƏ=,zVAuA%9志VoʮltzNWC#AA6:׏ms x$m mV/,x[4Awx{zfrۜIZ}{֋A=FU7!i9=[}xH F=0A݃@P D  (&nAIsqZ~r^T̲YJ{(X y) \R٨| `:vtTu8o%EAr¿=^.? jU3'FiBl?s{JӼ:NLYfUgO:kKм9/:e?AeE?{w*̪|w|2S=}RmXrkW˷V,qz@BV}nꟕ -&u54ڪ3'FhQ /]hBVմRjFbRv<'7mh:vtTu8V=5>^t +A|ՙG0# emUr|4}Oi z[ձ{ܨmDP}z5(ji/k{br%b :vpu0fjUm̌C|/hYeճ4$Vַ 07ت3'FAo#hUU8Y[P4qA[_m:3ު3'F.wwwd4AP hD  (AA4 'h". OHbbvuLVQ^6 Խ62YmF {meڌ2@Pdej3Ak/fvգkZ=C^1MqG".O+2YmF s\dZj3A(SLTTw$%h}7WLSuܑս$4Ui ZMGyTw$hu[ h#AyhN&(uL |j3AÙ! s8?uLVQ^m4U!oG^1MqGb=):4Ui :[lB^1MqGRN h#!A"h#AM&(uL&f:Źxŕ"KTw$"( vbDdej3Ak$j}Y E^1MqGB[7L7p@^1MqG:f4o+:HGŃ17i;҃4Uէ:+:HGPZ=i;#f_&(u퀛; CbD]SLy؀#5AKC[!h#9AYTw$((gcu! s$ A0S2Ӏ#AYTw$"hw&(uLVQ^6 Խ62YmF {meڌ2@Pdej3Ak/f_&(uLVQ^RŞSYi- AP hDS{$T*mgsi/Xpڦ6;AО _'I:HZE'\]' ZwgL>lA{%3A piBjL1/t 2Uy7CО$&,=#("Se"(`?tda.ړtL:xOP|ؠ $A^"(0S>#jLeOʩέTjL&EPe>A'"("]') ?l2AIBNUW,A`S*5SR!62AIb~84A&EP\d*&2u6 A7 M9cS*51TjxO A=BPCmea.ړ$e A@PO  A@PO  A@PO A`S*5_&6肠=AP hO/ړIU[&D D?IfI L^hKOzpKS{N^a *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *t` *Rl0~ ê AUD-h+Ke&Ug/Z|,;3E&Ug/Yyv_/J^lu&Ug/[=kYa *bt|$ A2{ˮŦM&Ug/Z<;ztr*j3IJ;;UEP\ZL5мՇ"(08Sup/}:gt(3UgSzPLoiw[j[Xūg?[k tEA Ia *Rts|gA3Vܫa *tt9κnra *bcfBP7X?+񥠊*39:{TEԂо^a *b;BPq |iL^(Uw41EPX\qN&Ug/[{339:{TÊa1=(:`xǯ=<_GP5T}L^(U^(UVPAVP"fA_~~^|g&Ug/ZPnC?GOYEPW> 2qA09 Ac$vAoM^s?1]_NwE}5o13ţ4F"9_\+1q]4H/~qqq՝IJmkz]\~}(FH삖/9# # vrVW4FxR_muڨ4׊L[RݽܮZrj_yד-A!AIs['&C .d Z]&JϪs4I@К^Y|"F/׉ h h4~AE*'H~Wţ$zAȄ* A#AT4AP "hAE*EP (AT4AP "hAE*EP (AT4AP "hAE*EP (AT4AP "hAE*EP (AT4AP "hAE*EP (AT4AP "hAE*EP5d/ Q  (AA4 7,+ŵ'8%Ǻs{&EV*@Pb *@Pb *@Pb *@Pb *@Pb *@Pb *@Pb *@Pb *@Pb *@Pb *@Pb *@Pb *@Pb *@Pb *@Pb *@Pb]7=ԧ!h((2i,b]?VeXz4q ZLT4^1 E܂.]*2i,b3Ivԧ!h(IRO&I@n$ AqsϘ$A\8' x h7N2ZϻL}K"nA$Aa$62i,b~ h7N2'7|ܚ#h'ڍIrvr h7A_2i,b$-\".ڍLrvqAq)?fI v:ԧ!h("T})A?vFP v@nT AAP h7*ƁAa^meXz4U ACZ4U ACZ4U ACZ4U ACZ4U ACZ4U ACZ4U ACZ4U ACZ4U ACZ4U ACZ4U ACZ4U ACZ4U ACZ4U ACZ4U ACZ4U AChwqψ8%  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD h-TIENDB`vcdExtra/vignettes/fig/mobility-qsymm-mosaic-1.png0000644000176200001440000002071314470742314021731 0ustar liggesusersPNG  IHDR@e$uPLTE%+3:B[f<K!]&,:::f:P$$3$_**,*_0000$[33T5,::::f:::::::f:::ff:f:f:::>>x?>3?oJP[J_JoJoJoUU-jX&X[[jff:fff:f::f:ffff:fffffffffffpDppwww6j!_3:::ff:fffې۶ې?jx[<$?jff:f:ff۶ې۶۶۶T%ĕ-+65OK>yoJչ8῏D*Yz*G9?_#,u+Sq=4 AQh&bQ FS(-_h͑wb?i@6{9 ,e\<9fAiKz Df3!,&3A}=AvNob}/ &2o&m(X~N(|m k*P)+1 bB5M7&kg"L* .rAФG[Y1^cB3Իk"dU3/)$:D*dBPQg %9hDא9As ёx%݌m.Z7[%3S+`eBPQ4tzOyq t߈(|7wJL%\6Ӳ+3\cş Lý+fBPQ4:GZ"_ ?RBsoFp|8t~mp29'h.r 'ý+fBPQ4$E}כ'g-.snͨUÝ (V*n6*a\G!>TyZ }3`4ײӮeBPY$Y'|yAV6f?Bb^"h6rL]sВcH臠ɥIpo+/6F^˂s›'nF/[;+49]mU䲥3_I .τH{CwUvK喝'4l ͽ2G&>G.[9.^ GT wß|з A?AxR|ލ:[f _}${]EA5Jt37S6fzeUVu)"2*94jBكSq1{xḋԹ 5D͆Sڣ J*i %E::̾LPtn  AjrY5ʯק y8}QTP?`ktQ@+CΥ>|1:J:<ɋi@so2%8'-s>#S%oOAd4gFЅi@9ЮT4m~+ƛڶ{ ZF hߒGS0cN2 У~ӻݿ;Tcocw %9(΃ypoe7'y;yvܧOFǞoWo}'Ӌ/-̫}o8ܭt2ky|Wg/i^xg?  ҀASx'A+A@P hD :FbMjLq] xl֨mAw&HtqAP߂FPox[ `gZAYAma~ z;ȋ}2xy"hL=()HME;=x;3uY(|ot-U;z R$#h'm?@4q> .o1<:5\7@PL>(tE:&DPpa*.o1<uK̾R7"h<|r]Auc&)up%i%T7\_  HmrWA0oIԉ0|RlBP`#wzOgmXO. hB^,ͷKPy2.oQ'ho ?;{M #|&&>, h$aׂ-j; ╍f0h1Ў ga&ӝ3dA3=ΉcX_|537"ß[б-hSYsq'"hϏ#(8J nt%T7;4P`6ta ɧ}wuN8(¸^IB@Е@Pp_  @PN>b A[@J nԹ]  AWAu+AЕ@P J n&?t-ilp[~E (AA4 qK)VeZ<.Av<&wQ(t$!MTHIr] RS$DAU)̥=mA+6APU ^yxU0`@'&Ja0WЙAo2MQe?8[p\|;wz&d`,6x/'ǩ ߿D;TɗO_\uPqɝ"e?4fxGZcdnAz?m}eRnAZK%_FODM-8- /9GxG=Uy<,cR͸gLaqn A+6YJ3q)Nj>A U6XR z 5d9A;EA;nAA;sPEЮj"9.AU)&Gś *|n!hMTp>m R·M7APU &Ja86AU)L$a%6APU t&Ja59.AU)&R,PKzRLPhD ILc j䶇51c *ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;Az+ZÑ;AW yh&ފpq@QkTV #wL=6}"D\錓XkA G :x8Ir3,XkA G=޸Fhl\oZ b8r_dJLk7[V #w΂}A%A2rGP!az`@'I:.zp?oEAg>wGP8 <'^2q@БU٥8ފp鿠ѕqeyr%Ik鿠?ď,b"r_а?trO4\oZ b8r8 fЍ_Rn]x})ߦ"D\t6}Z~$t熞T"}"s1i:TQ EP\PETM^t4nAǴ鹠ؾJf$ӏs={HM3erhaEP\3O >v̙ٱsAcd$r]лߌZ3J k1}9oCnXOM8[3f EP8 B8wؠ8xb1WA>Ƿ׾<,eBP hm`0+$"w 팽I$i14A9a&8 e O@〠!/Ib1ctWs>Lb1c\Thlmrk-〠 MSXkA G 쐉 E-ʹ(ފp鿠a-AgnZ׽yz+ZÑ;@4ފpA4ފp鿠͓XkA GMXkA G ztAo0r8 hJXkA G :}=xXk5c/hDO>V #w3rPq@p85c/hIo$rǸ#u7#D/hz+ZÑ;AOb1ctzmo%[ւ1:BA$14f 3d$rǸ h$uK'1uLw䎙+G7I?U'@Zw4]<@zJ /i Naw~37<>.E`wNkW`c3NG^Aձ}ʃ񈹓RAod7GKcwAqgǹA8yqsqA3O >v̙P+2J/49%9hK\TkxI\Ͻ؍{GV8!oF-_Oֵ7p6][4rR]%]D*|UjFG, hc40Ф3,ԷOj38g):mch^%l<-4=?x,3W]p|9Akf8"Ѷ/lЋGPk8#(x\ɃT23^N#|uP~qE㎽T2k%kT2N a=*  F_i,/36|R A TZH@Br#^ۻţnώZHpAVTZH@Br#,Ae* -$GPY h!9A TZH@Br#,Ae* -$GPY h!9A TZH@Br#,Ae* -$GPY h!9A TZH@Br#,Ae* -$GPY h!9A TZH@Br#,AeKvK^1twђ06ԁ  (AA4{>8`IUYM2jQ3xSVAk+Am6AAP hm Mr&Z[9j VAk+Am6AAP hm Mr&Z[9j VAk+Am6AAP hm Mr&Z[9j VAk+Am6qCУm5n&yAP8!txb>yAP8!M+Am℠VI򦕃6qB;ɛVAt@j7K7IZVC'2Z $ h52[A2Z x h5ƒO'q򦕃6qBP:I @j$YAAP h5QoXщ`#(a5؟@P8Gv?i򦕃6qBJѓ;ZA1|F[ h5撏6|z"hZAAP h5F4CƜ%w"(ցAj2Z ZAAP h5jƐ^Aj VAj2Z ZAAP h5jA- e VAj2Z ZAAP h5jA- e VAj2Z ZAAP h5jA- e VAj2Z ZAAP h5v7GkQ3H+O P_u (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD ?4N HIENDB`vcdExtra/vignettes/fig/tut01-Arthritis-1.png0000644000176200001440000001651514470742307020417 0ustar liggesusersPNG  IHDR@@RD)PLTE:f:::f:ff::::f:::::::f:ff:f:f::Joff:f:f::ffff:fffffffff::fff:fېې۶ېff:f:ff۶ې۶۶۶?jې:ېf۶f۶۶۶۶䕥fې۶F pHYsodIDATxkDv@f:mȰ^3,e7nH%3db C2?&^%-ՕU*9mY%cS@@:7"D @ @ @"D @ @ @"D @ @ @"D @ @ @"D @ @ b$dv۲|yrݖ*_~ZN$xJj,g6I,i!F>NHOE dwI@`w|uEu[vj[8c=vu8m t?qgYH@ Z'we9 ݏ_ϳ$yP1e[}Z>޾V;$P<{_O~$s,&C^jiuzco[-ݕ{rhj,(mvt\$V626?ѦyfӛSH.'惫ͼv-mk>l;[ٲX%|ʬbv}b}|vNQwUoaɣw_'wU|^(SeաqrGOrً>;ocznaw0^_%:/\wo[ی]iuFWGhUO>2&";:VR sWѻ@kY{r@&V>,6OuZ/]T2h}WL8+Qʚv shaDu s:!P˶եn;{Qɯݨ,pߒϪy dо<YvKaOT5JU}#еb;?(qҫWpӺftJ7SoîqSznAtՇEm@{]<{}[-.+zS@:ijnXjW;(֨HRO |ϼ"hsŸ쫷@F4*O=h3?+tq#rWx m??̋ZT6ײF *P-m+^ T9+H(١:oIv.ТrL@:^ ~3bSm)[)o)}e=*vpuUI=;y#wk8++P~t/*i I;@Y"HQYu򳰋IZ[(rZcHZ&eQtԬڮ~zb+t]3$;i]3y \oYmT^Vc̹DׯjhjxDZE&EͣmK3 /<|TOaz nխ%y{nZ'/Ϊ-v~Q=@Z~>/+z dk~zTFu~~33: P=kMD|*bj8α5~<%@^:q"hx(^`>N Qǒ-N@@=@ @ @"D @ @ @"D @ @ @"D @ @ @"D @ @ @"D @ @ @g$>s~{itChwف WwktQry2>^~wٗOG/7Av<\+3<[3*H{K?[4Zԕ3獺e^mjIoNy$yn-\ܭ+g*еz!{([EIEJyy s7tt@5 9g.4}&Uʢ#] <9_`ѫݿ@J5GD@E92ْٗ50+^$q@?̓Gm$WzDy @ @"Dv Ru0lSkX{~6md@t+vyy@}hx@"P(Dca\ 'sO>oŸ|c<_;l?d4{1~Zeάu/v7|> *VyNR@@Wo?5w+^/WSBXnz4D.Тw_]WW ^ [1Cj&~0d̂y1(躒S\XD?@ uzu͂9EQ{-Pt@'&C_O{WZ~ +r:5a6Q"_f>1ꐪ^LLT*rZidLD U:Ի+sW£P\OVȱ2H __P ;hvʇ@#ԉ-+u)*ҷ/@ @"D @ @r ` C0&!Qv#l%e9@;P#tӵm.?[&' V2qdjTL dXݡzރXY `(pqh(@Twl;34"G͔cx+rA EPVe5@ EPJ @i QJ0;j AUKTlnE=9S}v:U @y8*Ps;T}f0وyC8@[}}cd#"_~( dD7@QR#&@$H!B @"D @ @$H!B @"D @ @$H!B @"D @ @$z TҺ$ICaOrvNU>'PeVA>o'T71O+E :z]ԉ( &#PSȽ P$LDui?l?M|jRCz @"(h3Oέ-엨\ 3ZCG@9 q W$UP*@\@@\ Ja]֔MH+PU[ W=hD@JTӁ@#@iF^uRD)C ֳh m'GuiE!}31)C ,t"ehU sS6:q 76J&x@h.ヤMN-Pkb)@1[zA;)C XݳN И$Pƛo: m'4@wS6:@?̓?єMF/_DOxCs,2 +"qRD%\DDwH1 d++#Ј$~>2 ЉKa*M&x@'2 ɻs$Sna""_?W6x鱔MFtq t@!"=A4" / m'tbUǔMNDx)$*6O m'ehF$nl Nf#"(L),IfD=B 4@S("nv7-C[ Ut$@Aꁎd#"hhms!H @cو"@"@&cƺ :4.ww3ݯz<D%@iy7sԻJES0وJQ# + tt@Ew둨#1(ub^5}C1ODFe#@Χr 4@Nc6e@@ecvI),0R kSڧH>)Б m'4@֗r I&4 M[Z2 TգnY*!hD"HO+e:R!P8bv%&: #*93hDb(n$ m'4v=P$?3HJOD @ @$H!B @"h*Ŋ}h%q !B @"(@caoLC+яlg TNid@6B (uVuD8&;fcW2z m'$k~ w~&x@3.Z~3@;8B ḏ3]rv`PA @O]-hDNl6`]c =(93uڥ. C j@ApLAʭE#P$:.Vو_ַsBpRxU(q *@T$#n+)PfU{@VB  TPq{T$3Շ2@ҡ̔MA0L=SjLaoAWmPJ`}UynSz$fa? POR'֓4*h4'6@c@eJk{hD&rR-3g LB =(SF=PHw(SS2 Р m'4(ꁚ]ahT C@R6aB@˻ Fd ڝ#U2Mhf4R5n(yy0jZ{3CON9S# W naH ,WaKՕ*Zw[䩒昊BN{4"C ؗ1e! oսE_*Ṋ .EPw-žN="Y2T޹A*+jq\|W>VС,< dB{UVV5&4"C@%AG:i~ [hzJt4"C ] *Wj[׻˻lRs9[5F И %'j`cgULuQ@] a݃R!Ј Y 9 C@0FEb0b(m@?*qU0)CɃ(o-SxqRE%axZZfp/DG0ⅮF¢` N]-E޹AF,śҴc?Ċ޹94U2ާq-T5$g]na}aDFđbکG*UW@iO@m`b}@ @ @"D @ @ @"D @ @ @"D @ @ @"D @ @ @"D @ @ @"D @ @ @bރrIENDB`vcdExtra/vignettes/fig/demo-housing-mosaic-glm0a-1.png0000644000176200001440000003150114470742312022324 0ustar liggesusersPNG  IHDR@e$u#PLTE:f:::f:ffff::::f:::::::f:::ff:f:f:::Joff:fff:f::f:ff:ffff:fffffffffff::::fff:ffffې۶ېff:fff:ff۶ې۶۶۶ې:ېfې۶f۶۶۶۶䕥fې۶;:< pHYsod IDATx흏{ܶ!Y}g5nZl;]NdlA H'Aμь/^d A3mDr! hH$mDr! hH$mDrpԺͥs9PIdj"͚(:m~Js@nҊ#n~h_H &ۀZ@}|Yn7o?FVc!T'wJArFIOYA_)i1=ӕBErpW(Lg:^tjIDVKdVЫU{XЩ%S)ɇ7w.C#{PcU}y !G ]nȘ1@ _ROU?:q# 5-6khh H ɄtP߈q@M~pp \@7ٍo~j/$v#8R;~4J h7Dsm NOA֡nz;L=B.U?B__TFP'T[ЏΝZtjɽB&򋇯kꓦ~MhK$#}jIʥɅ@ (oEAl|H$KՠPCZ޺Z@/ Z)&EX6!S]O}5P%#G) :RFhSTQ`fՏ2 oTۣ΢?IVtzOd"K3nPH!38؎v3cHe~Naza%8kqP^Ⱥ@KcWQ2}D=4OLvwAt:Fӕ @'<o|ǚѷ^CG =]3{O8zr9z^.Ŗ\~%$bn3w0 {!@j@N-O%N6_ǗGn{ ߬_IոrĒ:.@j@1-\|U Ht"sEKWI^5\ 9S5 n" .u@z+̊6Hrn7ڈa%S5 m H](z͎ڌ r:h꧜CUYV'e&6x" zxC 5RV8߼- Wm VQ(]U4n+ʫ~rFRڠUo|[P+=S5 dm H=e/*15Vנ[|ڋׄ" PO٦O;6|u; (<3%S-L/Wwn=B" vQEr5&E$mDr! hH$;Z<*#N%A\g\RSK$-P}mt: 4 ?ܦ^_pzԖkYyѭǿN 1v_p-3HV>#lJIM[Zu쒉6УÒ89IU3TSQRSz{_Y^':o 6$:m4P! Hynw|y˶"ۏ>kj1'vwJ'.Hn=Z7s~U)__2\Wwxm:TA% Hj$QQ2.=="3N Z5BTˋ-%ӣmSn[Ԡ" ./Nō7Afm1Orf*C]ʼJ!Iip7ES)$}OF߶"1H-2r6TPީ7DS&RJVcנu#4 ?e+zmd56\gzo̚Lp\ &V.5(,k?e+zmd56t!E+ Ȁ:kՀ͚?OgKoŧo\YY Frftݘjv͟6)3t []ѕxά@#g9;@ty"/ڠ'f͟k}#6ol\4rsԯ*5y%-&h4f?j:Fw,@3fnɢ&އKjPީA;=syi.divs]y^߸3:/fZǾɛYx ")@tVzJ:+P=>1x:DUXZʽt[DFZHԨVk)jbn/+ӉX( @Q&DZV?d%jSsw`Ңj35WE:  ZpV/ފxG(- PdmFmh=--}ƈ1 ҢLt/e Pڠ4m'Plehj ;(P]E]K8jWuFiaǦq.x*Q+@:II8 Ț'IAnY@ЈMjova\Ǐ Cd]k%.+3@ЈMjovaًͫN&W@Yc:(@E Z$Ad:(@ d";?T[B:(@Whwk{Ң(-oI2UZ$~?(ZvzVIlm@QZ3ix*GZnqpR/5nm@QZtj:@Ys95 jFhjT{ NzAmPӒVPg錕z["=M^XVʑI=*-@ A Ru^ uFiVSZ@ou7KPZ&rTZojH( Jq//_y' m8(% 7s^|hG mPW.e[uE Rn$ 5j:$Rs[dTQЁz!+Sh Gh J#ñ4%X6v Shj=)4:]EDu`BМ5@6*DA.UK/T% Ф -Q&UhB4BKtW1Mͥq ݎhR)%o*J$cAKՄn4H쓬1TV0}Y- PФ"O>M)$Z*bMOل8}tݖhL -m Qht21Byɴzm@'SFmHj|@}7PWi6QPW{Pm5@}LZ`X k&Y,6PhRNF^4BK4}7)Zfk#/R%K#jF^4cyK*TJ?$| &R"C4IM*D/@@p&vcRV_Cc pu `;duИtf*DPtf*DPtf*DPtf*DPФRJ?=u 1!d葝ű@8,'t | %C, jAd9Y4c@+Dv %DPԠsR"4 =s0T']8yZ'IO`&t+ԵBk XYD ;՚gqPZu/WZ#"=9R#E4M#L&x#I^֊ '@f)->{a$zd ޓp#MԍP펝-Qȝ$sr] `~Z닇dGf*H;y y"ʆbC^qzR6| r|iK)r͈ Q'U5@{P*0Dvzk*TnXVFtMsj~y hMT( 6Um$||u1v3ů%|IIb]ڮ! 3;I]xA"GB,kԏEv a; qۿ[ܫ[ZZ< Nv.n'+r_ ZV$~BFm$||u10.}Cȏmz$||0tsqrx0=kcdӿy}(nл (_븐VXf.L дT}R. `#woEy}j2g :Ɣk`Ǣq꫹$||iFjؽy;Y/N d{>4 Z |7lk$||u0`*ybw~]t}v'IwP~O/>{`N<8>;Gdv>:Nh$jЃ3נkNv.n'KǠNZA-Cksdv>:ͷ;dMm'o:ٹzNGǘN5yLBI`Dm>Y#۫^շ;>L1e@V/yΩgR/5jnҟ\_L?oZkAzeilAҵV''ۓՏL,nn? >9ٞ.:?>{tu Wy_2KI24\HC9ٞ.rG8V_Tu8;gz]+c$N"rs1'Ec(,qQUK}yIGsZHN }1 iuGJ?Yt$T]?QǾK!7*Ks?ۏtox]O IņidOrl8ֽس_նQ6躽Q$$qZ>+ugnVn.z] ^ox%'eЦgį ۰$;>i36'as.S׻lI6$ɷ7y$X7E/$*6n5B9+Ln'AOUy!R,+WFWw|']n (}Uy]X )l&uQOܓ߀56F9[M?\v25~9'Ln'AOPbwaZ8kʇ&X' ܏Ywv@vT _n3l h.d)`4wxX3@'ISե.bF=oqNC̐ꃼN=II_Cy ٰ+' ާ*ɴbwa 2:Il?JsKGg,~@/zm3GVIrʣijP> w"+f &$/Ri [84E7P5nB#*x|bQy#/hNkremE6DZ~a#/2k~ۿLVև,V ZHyN,9+rLꦷ^ԮXS_;+5ϩYvP8y_;&vqlpl8዗1u޸>訍b̭9:nXhN6R 'P>v_| @I810ZW8`}tsQoeX-8#,Fܟ!IDAT1Op6]\ƯWG߿}s_GIO|-tS`F> څ֤| AJ=^n,0&vWNҿYB1'IО>Pun @<ڼkfn$A%02<%Bf|BY:JWP(@=o<o7=CY㓇ZPhrPhЬe=i @ ЬU ? 4ĎS ʸGX- Iػf$ {ة^S1HPQ u e @{/P4hx(sl*ɸˇ;nSz\$@ߛ̱^d";Oqr5-`A![0 .PP4hx(sl*ŏDdC![0 .Pek.a?E G "eJ+QԠ9(96ub)S0 .P{"OqreUW?2Fl?E xZY|u e @%]y0u/![0 .PPTh|8۰Hc"P, ˋ@k86,/] #fA̱j   M(_Y¤ΏIok"Ѕ$@7WVJPg-xc<*bKjEkՠTg-xc<‚{c0FMThműay>$?5g#i @{c(R?{I"lx\$rH;hdZ6Luf+;@ @vt:IlR[5V^wj-xܙGUNW8jhg84?%RG$R-b3Ku z OPV̦CH8б)uF<͑lp EviЙ "F4t @@h]tq. P:fT=pd8P]ܤxq@YdoRhOX,7)'b,Pٛ1GŠ[H8Vk2-x$R_^PlpJ 5Yn6#_:oa")1At4x$נmPhΐ :mc<N$qn@ݲmG 6#PV7_a?%uʶ1 $LR/H)S0cn6#~_s:} -ɹ<" @ݲmG)y/6:G 3ǗE@ݲmGi *꫿% eS6(kK [*C o6#4@X}6L eA%H@ݲmGi*@7}'Ͷ1 i-,~Ͷ1 -IwSW2̑N6#4M'gԓEN[H8t]]cB "{@{"J()m Z.|p#ᔼ^FVu˶1 zrZF/I5 iAe #4٣?PKARG JARG JARG JARG ra>\H8Pt4x$ :mc<NͶ1 'bfu@GmG 6#@R[hI=ci&f EviЙ "F4t @@h]t9mnqԈ. h}43ںJWD.}ӸC60gg%l,2_D[8 WFWhJA&-՛E>2PWX kE>2ʭV6 @g@yisQ7;y' B6MO*a^5hZW+kA 2t%]WL-I}Nwq4ve3h\GNάER볓(BƧ3A:Xi\:fn|: *0ԫhLu0X=F")zά'$Oi}tf0p8JƧ3ۋnQESZ74>Y3GOi}tf v#g;?꘩v0G>5*hPԘ$ݎ>nMQ맙QOa|8utfMv&W6F C`SOE*39<դh"+@9r#'gUiu*kffg ru=ANwu*JcsIkxu|n .CKnW) Wv:ݓUA˺\vPh~j] HVOjxeVr6(imеxg]؆6Jq?e:84V}ժDTUZQI_0\ދgo$ -Y_^ K*[Vt\bԪE\M@{"?VrGXt(f`Bx'P'AjI14N⟿൙z,|^H?e?k3m.UkKǗ|Q\h.$0\^AVɍ7vy-N4q xͤ)%9ei\\n)4N⟿? @2g?)mTg4q1PTպj범Yk>>%*6@s'_-e^P(Hm^S'_ ՠ'J)q;e?1& Z|6SW۠ ж;e?AfFy|Áɿ->הI1hƘy;%6Kkas,' xMHn@Y BHn4$ 6AC" @АHn4$ 6AC" @АHn4$ 6AC"aɛt%ܫ"(3bLSw5t#MYYD’GT~*P{* %oˁgV~+νhMthui]qob")@ {牯b6x^t5В˟R*~SkhyI/;@bDR곥OwqD)5HXrQ8׷}ڟ:?{A]qob"\-ZZӫhmXTVE:@]\qob"_5l_^Ť5<#E z^}mqcdn\@]qob"ܲbIFGuwqD)5HX52J+"Q14`w/*~Sk۟Ap=" b%,BNj09(ۀ[qwQwE8^! hH$mDr!"]Bn>>BZAgydYHnż{:n.؞#htYHn*]<V4Hn.7.=!G6*rM=|MJ5 Pjur qzU>;vu~w{и/jۣ^w$ErUŕo9qOO(*~ub?XUl~^vm~/ ErU[E^n:7?߬iyfѪʘnͯ*@O._rvB_\tE+QO}ֿ5g*ň6*Ps97k7𣥆`+9&TCʺl=D>|xieNVGʿ5C" JsvhTa6W*P;8)NEаHnkЪ۲dPW P6׮倶wUU|߳#wI k" hciuыo_^CNT@M6(j߾+6{9jZv m \@})-|iS]HkFG)~j J؋8eUV1|_N>et/z'A=UreND.ezGi߿ٓQk,I>"Q-cEL;㛭(onReY:y4uV ow.mWos~%*UP{ dĿ$|n W^AWY"-U&]˟rA &,מr._{ >WL'yӞ^:mעr;\MQeON)S#hw'rw/oۻ,% /U|4C [ Kk_ЦW)_RA]Zv-1^V*MR?ݷSk?_* /TФ"⬷k-RiֶlKEkQ;?Z{*I:u<~wз6o;4k = |ܯ+ "h'?vUn]imT'/:{:,h=K{(UphەcZ- /Tr$o %wu[mM÷}'hyYa+ /Sмs*{&3SdAۮE|)-ZYk留ڔ3LAK8֮4A fωuUVA-]j% . 5W}yeO<-ZWJmd@?d֊o|*ƇoC|- /R‘=MޜX:/G{WDT}Ȋ]x;(h~^"]o{;gH=ËF/Y*~A+k%H~WLCä}+uy7M]I(كQ)l{OzQ(hDeG\m+EZ75f~8/ZlUު&-{ Z]g4 ͯn?;"R+hvAkkռ<4)i1G3#G3gN_FSMЬǿPoKB 9F\Ãb[6iLAkфQ#ə9Sj,h- \"hq(:v}MUƣL4k<ΦE-h9N/n_ (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hDsƂ.\|<a aA< *6G/w?ϣٗX}rE{>itrJ>V,g_kLE2?ߧ\cvv~X AAWM!,LiHgOy(~,*].`( (gM AUK,r+AЁAP[M^W{ z@A A/8}&NReAZf*hAsAk+ū^u+U='3%%o*[vT8 unAGcF~ TԬr_2Y[PhD  (AA4z_>Ȇk빴 ȼÃ⽐c3 jo5o{4G&P'h"e:t3&w_4M36Ò ks_qa=74CY L\Qrfqms7y|r5vܖLЅ y7pR{A3 jK2t]z67nrq揕g\RpSߜ2"A$ǴI2CH4YnYƶ/ eOye'\޸և,.H^ST( > /]!hZKEtRJ*J٘:h&bV56AoJ~M9~JFP*hZ UKSp 72[NeZ`KJw|pj&`~м"tznM~2wA (AA4 AP hD GA0DB@P8L1f Vlo OfvhNv" ~QwOJPTCטm7s4+#Gё^M۟w7σFۥ}Nc@6ZP7Q?/yI ukzA8kA]?>J\k#hhꗭUQ)hOTX*Sg:(Fk}Pϡi]_4,Z{SR MԞ]?(Feө h*mA@QA6tT Q4| R0 ָ{ЇlgvgEg>Ltqhn:x2چʸTLu-= mCe/A}~ed\!⃏{Y Z~C>ޱL4_ԍ~ Z&  Ziggtoh_OzB6T I٠y|0Ѿ q@OFӊޚD7anh hZC~PceS-e_9mCeө h*mA@QA6tT mCAD@6=2{ofmCecHM-GN4APY+}:.aIv&+5+>mA;ji F8~҂J(R^&6ָeqG'AWE/( :>d9:Q_:C]{3Wj.ǝ dS| hQ)h h*qFeA2wJ vAAЀ hwT+*qFkQ7u:(vAAЀ hw4  Fkw hTM;*3zAЀ hw4  vAw h*qSʌ)"hw4  vAAP h*qFeA2wJ vAAЀ hwT+*qFkQ7u:(vAAЀ hw4  Fkw hTM;*3zAЀ hw4 f]/+4 H<[]oCT+t{{Kݭ*--939A7+h`r%2W5UMn UfNiAIZQ4,CЀ hw4  6,j+W'bfnAϙ Z5,.29AWZ3Zk19Awq)Bfָcrnϙ GkFk?T4i)#0AA]W=6G#tREMՄƃ*A;4^Y% h#AA?2eY@P NKPw!~ZEG&te#h+IIZs,:IA>/IY;tJ'AY;tznoisʲ؁tiQO,v ]E9؁z hweAT4AP "hAE*EP (A CG %eA5 z躦eY@Płnʲ؁5, seY@P݂nܞ5,];ꠒ@ЂUH=,v rAϏS[;T o^i_IY;t_KAY;T?ϣG?}ejʲ؁5G?-h$IA_^ʲ؁,+:E–1%$v@G( Ӥ,\݋/~k]KY;T#."YeAu 4n*b4xĊʲ؁˷ǭ,[Pû7Lڲ؁]?M*wm)b*t=Efm*b*t]ݛsrA \n}>Fk[PsrONˉO8g: #m.;q: Z3ZkE82Υ$xm߸ LM[^'zxs:fָ9H%fqd5iW-~g!73 :r)qfJ?͛ʲ؁uOv?T)A1IG,D;,bkr;fkbvԹJnnoDo; =\AGAbM+:zS\rRԗ@gw);D_EyO4  vAAЀ hw4 zn /=%ǁDAAE"۷髶w=zKAA} q /=%4vsa6z%@7SAP NKP C3"HzxJ_DP_"a9o̼@& KQ?HlNA4 ڌms@ h@;m&9_/8 /aI}zۉDf2+AA0FPn>D^Q?c gW.N uP&;# Y2v'͍x(˴|!A@.ѵkz-Ou9H#_hŏDuAômh4c͞%OP;>N1IzDeOYI4 #J jW-JݢaV~A"5HfbڱDY\>~6#VP;V/uĐӎWnN>CY+ aJPI#,h$m "h~?m`AXj pj]lfurűPnC!'WnC]I|z,;6,kxQnt3-LKz= A,mAGA@Q]-GY _nSz>RWnt3OӎWnASiAY+ I6:h@FX( 5y:GCY wn0"m`Aae6п5-r\Y e9,B?UD+r\Y 0X ꠣm Uۻ=e AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 l+IENDB`vcdExtra/vignettes/fig/mobility-mosaicplot-1.png0000644000176200001440000001651514470742313021470 0ustar liggesusersPNG  IHDR@e$uPLTE:f:::f:ffff::::f:::::::f:::f:f::ff:fff:f::f:fffffff::::fff:fېېff:ffې۶۶۶ې:ېfې۶f۶ې۶fې۶- pHYsod IDATx {u@UV${/n:S7i5i$A<|BG`P S ]S (A4 AP0 iL` (A4 AP0 iL` (A4 AP0 iL` (A4 AP0 iL` bTܼ[Iȏ^*?}g@ d=A@ ߷EQ|jZ<_qQ\}C嗟]տ--lRvk򻯋#_*D[YQqtZ{ /w׬m*4R̆[{];ܾ~,a7?ojv)g~,DF`Ah o>t|}˟,}+mܖr+ tn厠?7{Z˷KA)]?z h>3VFZl:'mrO! %h@R_\ ʿon=,Aw@ ZΪ^e3rN͊ZPRVkWsLR&u?> b zS{:5θ/^}rOܗV- AlCumbsu_v꺨oS y4vލ?i,GFN+zTnPc~)O@P0 i_6тT/iL` (A4 AP0 iL` (A4 AP0 iL` (A4 AP0 iL` (fA矾Vyy~ZMZ]^CC zw[ 'g,J3]БjPb@A矼uݳ}]AC>]Бjk4 )˧7ϳ/<d]c߄Vo~[}5uHAg?>;?A,|2t |BGҮoª߸F-zq_U ?sWOЙ{c٧k:v6W"h0d5]57п|(٧k:vWkgnEfoD{OAfgj:vV; 3D*raf 8Dz 9o45sQ'A4 AP0 iL`#hK-k!E"'3ΑcDK;:"|-h{g[~ -hZx胠j/h#'ضH+B!@PmQ":I⡏~/ԢATG-6(A4m# EN}TmrAA(AM}#x*-hjG?ߡ?z~S’قffy}2L39<ѼOCPw(]-Ae"0AA09AymG09Aƣ/ߥG,jxA,-hjG?ߡ9o_VDEP4[5Kc]>zQ7| lP (vꃠj/h#'< '3^AuPC^ EA-# EN} ܆ t|x# ("6hiG (FF-hm!>&Z(ZH3LD瘉-hC  t|x lA4m" h6h9⣏-Qq|jQt| ^བAI#h'V 9 *4"h7FZ(A- zv&6j(~ODP5vT"hWhLz @\U}[_FLAM dOOPP^&ۗZ~Ay,TEm}ABYa9DW/?R[ WPj-IL-W# <^-2tyA.AZ[' $A?BU~G]>u|y&82XTu}A듷fFA@PyNXk @hIaw hzhca{T^MmFPτpXQAx5 "ie7\A;ʶ`&MzZ"DYՀ×E!UШb-mH[ $&K柾h2G!4Q }Шb/-&4W'b^Z~*f=S j΂aRP>yj($ZjP Id"h.QTxUA%/*z@Z~"e)>.M( & 63HD˶^mE~APXU2AA !,Ƀx L--dHՋ!|A2$$(*Ҋ͠V lnc\5 v4fBP(CLb$dC-.ZPki&=ŽL3E'e EI"|Gv>,X^Wm@P(ZZ ֤M&Z.hT-w(~d%/ZЌLAGf%ԡ&ZbG%†5xuK;%U]3ߐIM`5]6PD-.ZЌLnt$>4Mԥfo^c\A7 fZ@&OP#D7.D}I$h;QRy.AL\(Z #SAg造dEnL=ӴHOU4T&ҡ3̃ mTv* ɶ "hmp%q|U<ãhxOgY>% bA*'#e3+"C1 x(bF ÊmnE-M u jYuۂUlz?I|LԇYzNOo+CPAďA+ZMC[I3 }PHFP/FAjNsOGan&Z(A6 y:#cD:4g~w_[| 7:n>S`D@sUlvɠCi*Jaea-dY$~IR˃A{^au:#cD:Anbk՛T 4yP!d<kT 42/:x\AC?N> M؉zn"Z~ u ={daABg(h+do6 3:ΜoZ@PVIg$-BnƄ}1w%UXnSc fş;&]Aj(A6g"*Yۘ$$r݁0Q|! >ETAʝQ zYů탎XP#~xGZ E*6 RXɺpv|O-$ͱ &g@P(&&̈́#AقĖ$`m/y'"hLsUlUxnjQ *><݂Ė$`-t4EP4SzxnmCsոDAD<݂Ė$mѹyn۰CA Q!5?Ae>h/(}PmB4j>%A []\bv(<[5Qoo pūTT5l&{ ʃꉿ?mA"x1FAϵ\[Ve=1 hxCq椵~]>)P + A]~R'pu י 4)T[!h`tw)yz , ACGI=P - A94RP* Ӛngz_e=y Ee=g@[d5J胠گBL4bw(uCWQ<ƾ> jLjg@~F쏠 \(F 4! ꞆyN[\c^G-bX[">FUmKAEAP!&AeX4>AAP (<>*D/QGAPAH3냠pAF삠 (AUc_QT@Ջ((i&JTBU#OF-}T A@P }T/J]OD8 bn[T5e ,m ƶ,m!ͤ۲AUc摞9T}dkl$LjYqLzMYf%>v]>Փզ9A|⦖;P-l,xڐ6)NFsEP}ZA%t:>(3%1$h=׿~"(i&ؖGقf%>%RЈZm[^A=[l$ǐAPۣM-T3vT%AwA"\AG'*A}[YCPLc=|-hZxcIް-hJ-CЁ@P4[-I<#⡞zM-T3v/(k!|$GP?t<>-͏?&D~&i~رO'1#g $X:y ۂF2=s=$-EP4[-I<1/褔FZ"fCUv67onsXA_W͒#r :zmO1nGIB#:ƶJcG/.+JIB;潺ݴ8xTA%#hM~"hK;vn״2)hvtz=QuCEzmTd[lD=>ǎMt7nR/Q*4VPF6=whV^(i& }, z4+S*AOdySDЈZ-fyA1$aV_k^`ǒObe} 댂رғyJI5vDqD@P4[-I<#hcZO"4b!.k=-hZxcGЮǎ;%Q;P>v-l (ڨ~ @"MA-!AI3ƎQ6Ȳ g $y1g4b!.+h-hZxcGЮyd*H?AYY 1VPF&*Dǎ*D4W!(4:_8AĐJ w`[ЈZh-Hz@8, ڱMyyHd{e:#h7$ j[ДQ#h7YuoWլ&!AhY<47qU=IvlH uON?OCCvu[tkP4t۞g~SGki] k-:ݿŏʘ^A[ȎWR`|ao/kIS*ݜ[΍gw}+->jGT]sDdjMq:* UHՃ˧_ғ4?tX"ZhXh+mt.6+,KDcPO֦"AODP?-"cdƞ}oTкLY *x;E<! yԖ$`rJ2tq[CKydSjpTP**,*_000+00$[33%3B3!%3T5,5o::::f:::::::f:::ff:f:f:::?>3?oJP[J_JoJoJoUU-j[[jff:fff:f::f:ffff:fffffffffffwww6j!:::ff:fffې۶ې?j<$?jff:f:f۶ې۶۶۶T%ĕ-+6pu+3A׶`Ւr{_P卜io]}4`,kg&T cg?ؽ=,8[WNЕF*T#2ĝ'NWv{] L?Co^ܵi4uuŽ`O*az+ƾNۮ9ݫϰ.,wYolaݸ;(F5rmX$W?M_o9h=zWG. znDc"Y` #M~geJmt JjyrKp%& Y (h4N~FB:JTU񵢠TAu4V _KJ%LtU^irTj/'{0\M)yYQPk E~dSG7]$Ps8H& Y*J ))m ɤۣ2"j4e#j4c-rRWԫA IC旲y2/iHJRAC{4e\@Ш񞕾`/.FB:%Q|eA&caLCG~9E4ctLW)σ̓f |H|JMy{Ww/3.y&A $?*|LGPhەh{YG u5<.=)םHRjHnƒ`CEeB˿Eǵshc%Y!|jҠ^s_ś6s`@.T/皝uB9o^pHvǭ2Eop|;~ٙ3q<t|_3i~ L 8Z0)֛ъ\RݠыP# *fT_~e{yx #c(h  Þ[~}Ь4 *̯$2Sei^ipb_ xayn#@$zpSXT1EJE:)˂ f*޶nӴz`qt ؚ0-,y>ƝuqͥhBTz^ӻ=`kn?y]OHY9t5I $Zf⧩LG|NqaL XMQ"S~ R* HxE:~ ܋ۤyQ"Sz *$鷠p/p_~Ԋ*1 " @PS_7fMV?kRЩS"Sz *Dt{5/x;> L=y^Lg?^mRLs@VJW$ccJoAZEO*L[Sa>UFb~^̾sk6t>[FيdlL-HH~= ׫A_Mh/eannPP^ؘ[V cq?g@Pwts4+ޮa HƔ݂ԊBPmWξh^Р-U) =A5{Ļm G|jZ"Sv R+$VLzC`~3vWCZŗHƔ܂ZP b!Ά 6Q{z3J5VHƔ܂1[kj$ژ*@PjcAiӫB}@P@ HAi ( 4 tQ2P ޖ~ZM]ѹ%9&?C18.444444$-#.AwL&765MSɣSf>i0yVN0$  Y5Onmj&uAPWk!h`T'cBNnmjfOA$ f>i$vAP#N0%šNm nd jf4 j%PZ'Չ| AMPD'765MSg^0 LԛA'765 5Ms< AZK7L h`T'n A:{7Su: [ ZP] VΉt Au:WA擛pc g>aڂAPGywN0j̃j AM0$] `k A A`cL3i Az f>i7yѺ% mW,@P@ HAi,śE mQjВ&Ah̛O_ oь2AQ& faʾ5GPwoMZY9JA0e_X$h!h+^uٽջA=LW ʖUA0e_'Ά$Ô}k`l0/_v A0e_X&q;rC=LWV :vRnx As0e_X$$v4OS=HR=LWfaʾ54GS=˅zx 4f,9zx kIq+^M=uuN)tFMNyo&gs|K(V#g"~{'0yQ A`;JAN&_vF#~Q'*zT1K#-OU'*tDPPPξ m]& ~䕬:w^ѴB'/iA`Y>A')g=ux & tG6gG,4 _giCDhxOԡJ V#jɫ ze:"ܑUr@0y A55.f+T&hY ViNAgC#/$(o.T耠_p߻IգVj09MtONɦx6BG~;4HԚ]:tG|^q5yMp>p\蔠/y>B7Q -I/^#LL--A mllvM!(E/h&G]<%P$ 쬷_VEl0/!(E,t6}(;xJ ;+۬\Kar=fni A,hp$I84I?,A)b8Ab"9۹{s"{BM+TT~{BF=%/sh9,TT~~{'#8)jbm6Dz?$~%PAP|){˂:,8)b{u?4.+"hoNJ]O-p0i>؆h^܂&߿]GR6=1dWx9-9F;ł quDGER>v :ZItG䎑 *| 鹠]oz_xn'~%łOvCPo,I _h^h-sDņ5SM>g)*LAӂ1> \s ;wku/;tdJ |?Ws'm A}0Q_IT;ٓG3 ~;^?Zs?<Lo{XFJL@o .Vm[Z@^ zN}Jr=fni A.|kqx?3e_4z̔Z䒵Jr=fni A,7&ڧn%LY)%lH;[w.V:Qr=fni A,hr)%.I-\[Z@PBP_нW!A1 Abdv_ 1 Akb0y%rr=fni A-:!h}$WX{}PZ$!h]L z|I"xr=fni A/1SvKKkA1%_P hm 31A5IzDLGL)k^ "u.Ei?N70տغe(2v'+5;o)EDGjE)9Oo?A OCPZX"hzJ k-:QAia'!(1,ZtA NCPbX$nʠArJ kv CPZ"l菑 İEб&p$cX"(& UCPZX"(۹{7򂠴DPwꍐ/JAiadq$kX#]|xJ-*!(- *!(- AI%!(-lt,SE:%GsPgǰDPvY"A 6pCDPw?/:; ֱxXEP-"=LAia{koAa o%APwwk{ -h7Bh!(->A_]Z~%%quv $D!(-,t (1㡳WHװEPvv1,t>zmXݮkX"ޚ|.%AtQ|kgQZJ Kw{Z\yJ [_Kq~N59-k#đ%J {Mg Aa<CPb"8d6İDн5gW^'D4yJ kMЏֺİDPwzNO MKgt{RBF Aw/n |>ގ GTCs} 6N_[_M/oݯȯaIAA䪠 zb7b'- 2v}}ə{Ds^D]GRAqNciI×gih-GqTEAs ;wk.~>LDi-(2舾OȿcTEAP~Gqo7Ň:?aS>=Y 0VA z7gg0t߽Rv򙂞(K hak-_M%R`/ %F슅Bxf)hIKA!h}lt~?ۮmhG79PaΩ< A+b%']N1%9_tgQ<m k]9fwK56&ꄠDPv}PJ[_a;vEPO]OyR*AİAPh4"İFЂwJĀJrJ $JrJ Ku[!v;q1m$AR!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- *!(- U'GfnobKzʴE@P@ HAi ( Q|Fp(N>)ZM^x~.%f(5SA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ijA!ija{k|!Br ATv29 h*zJr A\ B4|F29 hN!Ii@|ů$t$ `24vIi@|p,e h>胶 ͇(/]wCh,T|c?_tA}N~8w~;7CvĂ~!9 N=o ٓg78N;~R,-ǁ!2H:'hNtkFGwC zy5 AO>1AcQ&襰mE&!qF#@MtG%"_z#%ܞ8=)<-Nv4T&2 h> rF(-A-A-A-A-AiHP'@|0Q242424242aB{}IDAT4242424242424242424242424242424242424242424242424v G+fVZ҃Um&`@P@ HAi ( 4@P@ HAi ( 4@P@ HAi ( 4@P@ HAi ( 4@P@ HAi ( 4@P@ HAi ( 4@P@ HAi ( 4ԟoIENDB`vcdExtra/vignettes/fig/tut04-glass-quasi-1.png0000644000176200001440000002257114470742315020700 0ustar liggesusersPNG  IHDR@e$uYPLTE:[f :::f:X[ff(Q25P::::f:::::::f:::ff:f:f:::?_JoJoQff:fff:f::f:ffff:fffffffffffffx{1|{:1:::ff:fff:fې۶ېff::ff۶ې۶۶۶ې:ېf۶f۶۶۶ې۶ۼm䕥fې۶o pHYsod IDATxYd YP..-IK.mCӝ8nu9v33gF\sunAiAiAiAif^]𻋱7ޟ{^6X@x60 rD=٥́1HAWS<2t!hw RP;HC4߄ d0v`h˻ѽTjCTxZ;#v,J^vTQFѭ.UzyGUC4B/q(|wfw77 z~KY=S}-aN$'̓nuⓣ\`ṃc"3AV4VTDZv/m5uH7k}otOz8ruJ nu=o.=wp SI~ʤ$hR*|iIUڸs]^BIѭ.Ux`f;8(2E [jj J6 :ųRs'nR'ǯUsA ^CUEe7tiK"GΤBU|tK^zNs5xIwʧ M8ыO.>ե6٬s0$i#y\*27'XAckV^$h=f=|'SeZ.΂GTM {~hmMǹFư +KmЊRRڠi! upKɧʬ.S5_{+'9)=ե6dbOd_MXbaFν|7.AO̔l::qRA4n$zE̿Sy[BI*g;TjA+D,$EK5nu's0]+=O Ϳ6Tziji^xtKmxr@ko(_u)?Kq(Tf LLڽ8{}tKU?{  >w|%@n@P; hC !jAPpAPpAPpAPpAPpAPpAPpAPpAPp|yo?~Ln=GΑ.}߂CްUng,hἍ-~AM@,6]ڴm5-vI2 wfQ 8w<\y{=b;u&ޤVn ZTPǢ[F\띔޽E/d Z)>sUyKS{{^INQĕ}|#'h| _!(4Dl%)n= jt ZfoFx$?zFmڠAb3?'ER YPz3}y33d^O2n=qkFgO_7T_Y) >P Z9fr2!b]L}o.[<^_?4ӛ5}j_G y` |„YH;5"tiADsY3Wg~uYY-6yʖutɹ= Z ~$dn**OF A%WR5,Ot Q9&}>W >8G ^Ӎ~Zh6;e3@"⫡]AEΠp1~:O-qAI.S6fZ؁Vֺ^oWl-ƧrxJL^M߿a'i1nv]$u|QJ^Y̴o|z)iqqeE.V*Õu—\[=_g`nI5{! "ՏE+GLOԅdt:zEf Z𪎗W2e,Tk[,V6Ua`*)Y(~C}d$pg3F\ڒN{:jK=UeYnNS1,y _>zMӜ˸ncU*%x-F6V*@R)ŋY 7R=z׃6 9/I-*J:>lOG}e 䫺4iƔ >YY~4D:n-E/DUK1H{:h巗؀CM-<% *&(|rOv n03jErZ_DtDO?5}b-շ]viUШ;# :O{μQ(2Q!h\EAR[[ܩVC#;kjwGuۊ4mw]3},IOgNhSP^ɡdEb\#z:EA1I—W_%D͞I YŠ3xZcƹXzW䘼qy-'Z$  (A }ZcmS1!9 3sLj6/N@Ps9>3 Q5ACc>m9+HjBPsA$kWQCԼ^IbFaюbSw4wo8J;|,Y=M+ʉ"j! m*'a<:CH+i&4я!`ߨ^K{EiۿH+{U8Lltp~\֡Th_V(=6e`[}EĶb "h-mMz#+Pd-"h-_Td1syR'"h-CHc>Ah!x_hu&"h/ͩS<"h񩗝'-OAh%xfk\yZGa~tj]Du4\=-A"hMWegxMu4<>0M17hBPgkMu4|d3Mq&E:.(e6hBPb׆e*"hp"$"h-/)!(b3 "h_^jE: VNjn}EPDة#(b QAJAi/j*Z"KjLRԍO;FPɦ$5ɦ}Ah)xuzZE: ٴ6AJd\pEZ nZ ZCE2:8"h LdRAkh'Ah%8(I tR"趴&Iu 5S#Akh#xaLpE9ףAg6GPAh!|u4|C^GPSupE^=UYGP*54_VNUAkh5?%h3 g2eV{A{S/#h# ī4m݇w>685ޏ{I;bW#h# 39 AA +I}5t7Yɉ3tWrf+xl;SXn_ng)!h# ^i7AAwÏW"h n,.UxK ~i+mݍ6DšI[V@])ʍ{;Qq*76tvU^xK =7jJ't{ġg=d=sgRnOpt'A۳|OAAw@JSGF@EBF@Эsˇ"h# Չfjݖ@~nmݖ%T9%h+ 薈-âҜ6h; *A;2Md%c,t;~~V o)ZAFTG#h# Dm@f@]*f h# hے.g hC hF٩7)S o)ZAkڛ$+TY0 ^xK ZCrTb\q1"_=*Au,pRQmA:wĩA@K(݁/ptC~ 8bLӂʁ.< ~;v[\M;Д˫ϋvAw ,* `@3lQzcC<^w!&?rn% pĔG goW;f,q' (ւ{TDryjAMAPk,_WuEwzg⭁@F@P,U~U]p5A-$ɃZAm>>p%GPS"oqp5A`YAMAPac:I@P$mYMODPs""QAo EVyMXabL&jaT{7K#)jsLxe VQGPS&&ڠ@P/K6nZo샠 Ƌjw#)joO.fi<IQLBPc"j$8GPS"o ErG!AMAP/aGȃAÒ }D|y6="I'*jXwIp5A>86 Gݣo6=)(${ E7k'd k샠 Ă:큠6a]u*pe2WO #)j6APڄDu"$탠!Qoz MH[AB6jvAP|ۧ #)j=>/=4XU[3]ֈxur$)K>"93hvE& MdRMY ATU9 M‡2O/6nXOм:AMAP,#)jd:=$ -&KU_}UAMAP^|i}98~8szz!U=AZg=ᳺj Zdu-F,M MrݘN5&LZd5ޟ>9ӄ EQ⤹vaj ZDlTY M|cڠ6APVGPS:/< h'/tj B jAAP[ h# -jͧWڂeǍ@F@N#)Ip5A; h'$8GPSj vAMAN#)Ip5A; h'$8GPSj vAMAN#)Ip5A; h'$8GPSj vAMAN#)Ip5A; h'$8GPSj vAMAN [l% :C}j]AiAi^gCѴij_6tz: 2UIPn5 *APuEPAPuEPAPuEPAPuEPAPuEPAPuEPAPuEPAPuEP9,Aޭ$8*PCT=^|O80A48TٕpH'Q t|tt!#h8(A'y$IB $U*A{! i"#h/8$A }P]U(A|:8CTDP]'Dд_9" _#h?8Ac=B峸fOjx!hċG A{¡HLX"8j7=P'ş h/8AÇšx"hTϢ|v8A{90[A{*9o}7vDRA{!g@Ш>[A*őx 4<瑝lFО0|AWjאG>0|AE"; }{ɁNO#h8A#hWB#hѹh9/hFLpt^3 }`ʁ.hy=|A]gi&}`Ȃ,LPA _Ҧ&( *# ]еA{U0XA×|Bpt1N(c+#hk_w3:Ie db }`Ђ' zCpN4N4N4N4N4N4N4N4,ʬ7>׶ ޾.ԁ4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 N4 Na2+IENDB`vcdExtra/vignettes/fig/tut05-donner3a-2.png0000644000176200001440000002422714470742307020163 0ustar liggesusersPNG  IHDR@@RD)PLTE:f:f:f2333::::f:f:f:MMMMMnMMMnMff:fff:f::ff:ffffffnMMnMnnMnnnnnMMMnMnMn::::fff۶nMnnnMȫff:fې۶ƅ~ȎMې:ېfnvmfȎېEp pHYsod IDATx Gzp NMv$l M!4{d{>=y~F:Z#HDd $*DBt^O;nrV`ͪ,}UY @ӫAWe M^24*KdhzU>,}UY @ӫAWe M^24*KdhzU>,}UY @ӫAWe M^2uA]\\htQ2րXzV).. (),}1o݀nm~!ȿe,ڷnÿEX˯:jf7Te郌!by߫~guPQT =#@_N@:րGktOw(C KJ[5P(VG/='W}J (9kw\z=9i&%G'_&ԊPGv5ŏ'WΗh zſU3ϋhVtR^”?:$89䭒r67|Iˑ9|j3܇7j@;W˧<{xB\n)gg?κuifga&48jf lufm|3PN+ڀʋ?$wqxDQwc~2Yc!ؘnx_eLN@1exU =,"@ 9L ] :K1<#Ҷc]+'ʘ\1e/}n O|x Fm)OQ^|ea=L'(|otx FDmR>L8<#(Џy/P&FIC|$D1"h{I ; ԜU>d |Q~e5g2Ra> s&Bιe5g2=a?'uTYAYeLr@KT߼Ji^YvPsVY kpE,欲A'@z?ׅ6@iPxd QrPsVY jXs2u/$-yiCQEN@ 5g2I4כ+5g2=kI #OJpʗlPsVY 3@#ԒjPsVY P`0]RMAD%7 ShSQI(0 %ԜU>d$z;Ȇ5g212l* 5g2@JAD%+4A& JQ }=TY kgj֓t4fr]wPY)3$ܣ7 P<,}x{  *)rUY 99?pN,$< @Trr ]y)= p `  *YPhe:9,}1 H:rPsVY c =ap @T -ddD*Kd3hV}SUY P`LE1ĝ 7]2~lc* QU>d @TL?@5g2c* 'Z j*Kd,yapSQI>@Ꮥj*Kd2"ǔ+@,ADeS 5g2(M2y1Ɣ3@AEɠ欲AfSQ dPsVY P`7 ? *Kdb)7&SQ FUotU>-$>1 wLyT1hc*KdU]2D  *iGW@)dSVz$@8ZbVBAWeLR@Bu(AD%= b5VAAWe qA&% j@i,}U=PUY c %h8SQ vUtU>j {QcB@|AĈ˦ r)FBmU1*xy\۟:Z$SjBU   *~@׏6d}vcIǎV,ĘU*N~xf;\}x}ڏX61z,8䒍P|.}U)SLECZq MgU^WE^k?⠵T{u}r,9H MgԪ 9297VӛAVD%&1>كݭ@KĞhV} *ULC׿'NȚcP@!H˺ U:e1(T(5gUz5X_ 1. r欲A&e@|A뱼$WS.M9)=6y@ Y2 rs$ ӶS+\8<PdzdS#ieѱl8l h1_@ۣc1D/jހLY H"]TU-f( x Cad}2E@: 2?۝ Ŭ(,(H>(Z_a?x']U_e?)Ȩa - #Ȱ-~A܊vW ~nGHxQȪZ/Ȯ'!(ayꮡٙ j% ~DH ~F(4)D-a ꂬ9Qr U?PP1hcZ.*e?3b@.?ThU9 쀜~9X($q hT ڀLg<~(xq 6g ꔔ:H@NAYy b8SPN@/양 R5l@R@/B#1gFh@ @A-IX[^cH?,zu &mI: @T$%jt1`uDOP;f Hn'1fKFg |@R@LB9bJ7QRDwPǨ.$D! j[>Ȩ9rvvVB}zL ˇ\ޮHH2zs3{ b";w2ъ7@ #} H WHU]PWP:@Ifdi?fh . q atfHcx‚H0i  9o誁R4 UACj)ht:t Y| }{ lج`~0  ڪԀ| ; + ڪ7 ((C02\ (adFA ؑ4:װ r^ٰ]R J(as?kH-5  i9 7T 6o@s -@.BY4 =@:iI{LϡtEj :tQHc~ nuȬa$1]?t d Ȱa$1]?#hӆzÏGG=]|;e]6t1]?B*Bfg D?xs_On3T{9@ hP Cffj^xwsTNЖΛhĀj4Q1\f_pH,$ 3T.S>zZ<9?9a-xN׆R5P9G,˼x`gao:#:PY*,7SFQqlyP!Uʆ@P>i'GY\Dn0If~ ӊb.@U|{ӚPwPt=f^ۍsYVWVD%aHcU|@6ut.\ Pw  *@h̫ʲm6l4 *@sh"Z"7vG)ižTyP]Qq(4@)HhYߖeF )@eq_/)I|HcUy-5l GR _>1* *_ux@WNZ7%}?)@/S煜7o.$LӰ|W?淛M9%se\~j}}Ags:>>&]ˋH @\'w?,lӛŭaOg@͇2yq1 n(3څ'Cea*@7oneKiUк~)0yvҼA%@EU3PyuaPymtqPFsɳD*ESbga[@ ST Cy>.@2=؞tǮ??`9Y3 BuΛ*~5ЛkeT}9@* hv6ǩ3P0= >ҕV7 $fH:?OZwqR@h `//1!,փ D = Qla":@2 @ !Y?6@TrۇZPhT*F@\ (:xU%= CXLz2P@]D[9%Ð&рvbZFZP "Pp^fs${y?eo>fy+gPԞ o]xBW2ntcD Lܰ_ Bjl {_;ŇI>H/@:BDߗ+xBMu:h< }?~{d~O$Z4 ʄ6G'2lzU%CgJD_1vP^@A ZW"@ IRфZeJT"{LS&@i2!@aB&ZN$@ LB&S%. LPv@bA4@NBb@I"@=]!$ v NYT PkI& 9 PLPuB$@%D: F1* bd&$|~j@B @g#"@s F H,?<<@ApmgPEa Ub _/ H6 b O/IHc>=! J;IvFZU<>VxG+$k;#X1b #@/' R jqEB r> FtwUF@ bъ> @8Z!-eUF@ bъV)h G+=EAY4 R>V4:(* 㽂G+MA]e x b DsE-(W/ ѓ ( =  H5veBA:@g Z) jDhԀ€d+bA5Ze@)z(#$FKP|F'( Ph AP[P @I g@-BIIŘr5!@? ?8@A?t~0 E0 @у:@[B N;:'"h@A'w'!⎎ɪ ~UF[GdUtYr@Im{*F*5 ɪ8eAڄH0:'bZu ^@=YP[Ж&!ɪZ  ڞ I( =GdU@uAi&!bɪXe]Au@|A$!bɪDv9&!fv  i hPI3h@ZHa* 5q5 4CQH.jHZN}U|B@IH.KdU W)H.Kd-(BvY @8e 5J(HH.KdUR@dAZIVJ@I12*@[@>AvY @]Tt E:@0!Y,}Qj^rQ,U^s N 3P+h3kj zeMAvY @P845&!Y,}v(@ AdA!тT'1P*+@ YBD %$k2 PXsHdUրLdA: : i<ɪ$e>@)/H 'H.Kd6 bD;:=Ubr;Er <ԱIDATrD)QN?UZoO$k2k r FԣOLJ@.AvY @uߞ\ !H.Kd/ r=9^er@' r& -h堠$k2P` =91:}T"Ej rGz@Kbd(A6%2s5 %' Z yN R@K"2R@׬WГ΄^BOZP0tD -(r.HdU2/h@!Y,} @~Ax@K=Y- BP*mOV*3&@'2ԭ ((Uh%j2! P_R"yP*{@A*= !S)(NMT%7Dr QDr"jP6PԒ|Vn0;s$s^0;eb@eWuG ưĴW`ͪ,}UY @ӫAWe M^24*KdhzU>,}UY @ӫAWe M^24*KdhzU>,}UY @ӫAWe M5F4mMafl  m !Q $*DPSW'W|C]?:qƎLq:o,?fFȲo8eHj~Fui{/,U)5gGeT3wrmo|3Hƒʿ\{y\A5?rj~FT~mlI5ejiFtN&xh/:2B@HT !Q $*DлOO~<:i{5{?zޝ=~,w=|S:H忥*DZla7λϟ|TN{5{73^Л̛!l͗}3c@鍇EFI3_@'пlio?NG @Pq*!к8m !Q $*DB i2bxIENDB`vcdExtra/vignettes/fig/mobility-topo-mosaic-1.png0000644000176200001440000002057014470742314021545 0ustar liggesusersPNG  IHDR@e$uPLTE%+3:B[f<K!]&,:::f:3P$$_**_0000$[33%3B3T5,5o5o::::f:::::::f:::ff:f:f:::>>x?>3?oJP[J_JoJoJoUU-jX&X[[%[jff:fff:f::f:ffff:fffffffffffpDpwww6j!<_3:::ff:fffې۶ې?jx[<v$?jff::f۶ې۶۶۶T%ĕ-+6:[ |L2:FJϐ۸};!k?~t#O0?v0ev S{C;"FJF1|:?f>ڌ5-._^5IAӟ"4꯰v>}jYA v ꎹ]שA&G%>hC9g)h~4у@%#wcQ|?o%S ]2rAGjⱙyа$eHl$yK6W*[ $1.+˭rWܷ޺L/#kīwNeIOGݫF=)s }ڙ]_^tzՐ?-徿^Ƨ._ f&ڽ>|M_jKkɱF'pJɑ 4."`ze^zAm_Wʭ3 ^AE։x夎A.$[yzC u;hAA4 AP hD  (AA4 AP hD  (AA4 A޾s'ˮrB4`f :9Z Mޭywэ62J &jσ㎳zֿ9~7vɉ6~I E/6CW ǝ{J]V~נa &[# ՝'m"pOKAJztzzL9a AxyZ<qӽe_?ujg4F0,L>=v6h/EBЭ\@rk2듦9🠩FQїNs23&0@>;9Uۡ[ 5鼰;TOH-&p-y<FØ9pB4ܪw#|m͸w>sg[JЍ>(΂Im6$iN/wtL/ӇnMӯ;ߍa ASx{N4A7g|ƉV{}S'8/ѷy]R'cyQvPA5@P hD etSbԜb?X:k/CTHT?$,.-S}/}3VȜ|YOd7܂$⷟^Z-#'S 22R"7=x]5=6X^m6: edT>'Z3qM~㿌zF[S/^7Z#Cs2 +#DnmCmޥ0m_*zxq:nQqϯ:U32d*AVF Az{*t)0(/=*ZkQPU32d*AVFCuV@6YFAI drTn0 y'S 23Htxv_H [˃_4A+edT=̌&mDІPwǤm n>z9=LA+edT=̌&  i얫? '}'p0(ZFL3H^"ԿA¿Y+߃j.›(Q@gA5m,9'S 3H{{M hô^:YYR" lda eAA4 AP hD 94zZMgh;8MFeOA.AkAZ nꦕ|[ h9*g)APݴ<:5\O೏BG:!h RK"hTêUp="6{D+uCfv_I㍠uC&;Az ET7d?A$JTp="ꦕMCP +Z'1m&ei&6`Z'1m BNb$ p[AuR p[AuZ"6:CPn n|r&A$2n$6p@:!h{j4h6Uu76ɴP66_$<Nb$sZ'1\OAkiy-C<T^;yCP XIj*16&T K A5g1בp[Au@P h-T7Z  Au@P h-T7f}=i-x`28@ AP hDc(^.8Qeʁ!=T.3WF1G87+kHͰ.^Q=I>apjA4 I?9E"Z >p=/A4AAV`^m!CI?A#(NAA a% AtnpjA47N5ʀ "h1-'VAh5[ԉh30=d;s˱?&} uz}.R}A2Hm_k;w~s?fӈ>qv:&Ⱥ RAꁛ7o'zߌ?W]k%TuQ}И;fxsn5{BIX(LSשF 6>/EKU :ȸRdb@X> ֚?b}2QR?\lkt:(y`6M3%O7^pjuAwm~#O7!鸟 zu 4GAO'լdOM 荍45pJA=5M/iǦU΅6#iYWFJ [*hSUCFEpHjPEPAשFAsT )#h%A4"h%A4"h%A+tQEP\e@P nXD͗M*P0 (AA4 YQ|#iȘf&fP cjo25("h hM4  Fu5A(&aA: @К h\Z0AkQp hM4  Fu5A(&aA: @К h\Z0AkQp hM4  Fu5@G}>y%{* w L̉WtE򿈠AТs\ZtO>p2E$vXaA6W7=0L)ad Ֆn%1\Z;xctA%e$"Cè I:6zpmW"o0hT"zɑI5* ;{^}?7  $* e%5]mZE^}ӎs ȱz˯xV~sqAq8CP=h 5so_\ܙ ?Z5h"; EЅ7|-q__J֩yc Lpg󔽷n9uMdG|)?o(_x)y`O: SO]xYW7>SAj+Ԡ V~pW[LT{ߏסjPxY[i۠QU(\4J:10_} _^ehYA9W<{ A-A5J zΉ7>o0`oN5OfGJdǫ= Z}ر?$FKՙ\Aq{R_th8{+ NjxŚtT(2`ocNf5U4Ȏh A 'v>}ĉv|:6P.˯Oԫs&>RyCEi> *$%{u/WUXIzśf1HJd>N~A z⫗CQ^~۟  or&[NO] -}gy3qx_АÙ#^P{u3IA2Akb9 T:/;&f^,AW;ٻh͉WlM$,/O}>L@n :.l @КX ϻk30Akb݂ux?&6n;Y\Z ^4em*&/;&{Ú7,jA{3  :wtAggN,wv_}: @К,7eaA: @К,0AkQp hM_ѳy3'^Y,4Isu5Y~AzNsyЙ,/h@-: @КX#(KY3'^Y,a3'^Y_pD?=sA]TY~AaAǿ.ϸãj AEb}ARF̉WfM34=sAޓ,#cSnjp4zIn8N"({_<@z3 w5 Vav~|cxt>E]D(ANGahqOq-=QD* k-;QD* K--AN#0D= "Aw #,įnGPY"A#u EPa"hY&XI:bX"(GMGPYX"׸M|y!,,t2tGHmGPY"h0YV:\+~Z<"AGPY h*8AST @Tp5^+2[#,ltۧE:9]u>N#%zW .9rX#h679,twk7G[=8ty< Ax!%6﹝Ћo'ꇳ@PaX# }?k;%NYA38VL8m/Kzw>8q5AS;j^뭞ݍQv%)אOA/VtoG?jƃ/pEqGS^U w~"V~īE ꇻq*qsq4q P՘j _/d6y}P}}M5m}+~賳W냦{Tk6ͤԌYɣ0(e{L Kk,&OƩ(*-uzNoT?K{V]t^> EQCylYKbX"hf^Ex55\{OxJA;ۃ9DPX"wO}P-wdmEPw/^8JEPa ?4Vgy!0䓒T @Tp#,,5< auo;FPc h*8A+GPY h*8AST @Tp#,4Ae* MGPY h*8AST @Tp#,4Ae* MGPY h*8AST @Tp#,4Ae* MGPY h*8AST @Tp#,4Ae* MGPY h*8AST @Tp/La5cK' 2c28@ AP hD#~_ꋥS8 TFklRd#,? ha Ip$ZX8j-,5 A AM&AAP ha Ip$ZX8j-,5 A AM&AAP ha Ip$ZX8j-,5 A AM&AAP ha Ip$ZX8j;mVĂ-5{k{ AG[ Aǽe AMbg-5%v$eįg/[8j+e4͇Aa4IA|kIY h>AjHor  hKX!d|N-'m/qAvB&I#h+X!hv𲅓wQm-,,AS6}E~lZC`}rЍk#!^t1M;?4^"f,tkr(x Q; ?T/V$ٮ+Tӧo"hkX!d4Oԟ;׽|b=iAAw|-aYA AMf^4APͧ=A3EP-A a45 惠A|0OK:!Aa0AAP h>jA a45 惠A|0AAP h>jA a45 惠A|0AAP h>jA a45 惠A|0AAP h>jA ^R_,Zd`H2 (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD \Y'6IENDB`vcdExtra/vignettes/fig/tut04-glass-qsymm-1.png0000644000176200001440000002442714470742315020726 0ustar liggesusersPNG  IHDR@e$uPPLTE:[f :::f:NXff(Q2::::f:::::::f:::ff:f:f:::Qff:fff:f::f:ffff:fffffffffffff{1|{:1:::ff:fff:fې۶ېff:f:ff۶ې۶۶۶ې:ېf۶f۶۶۶ې۶ۼfې۶  pHYsod IDATx{}1NEcj;ZmӚpqR ^g٭F&\qpy'(s>VA@P5x ^A@P5x ^A [|v> &ߤ/G|ƩG$+GӋz_>=i;OheppJxx/g?]j(wb rI4'w!{K:;PJ-\- 6xp񷹨;$Fd$j4p%gyꮴ0 >18blpO"K_r*\j^.>2v3M14 'tBQ88I8L~tJF84AW- /d[BIW]z0%# (?uQjY9ѼoA˷y./x'׏Ww>$hB ?M?͏Ut;e8<|dI+.e=~) DT=TKSAeB Ɋz9V~S·Q,{TTAO,(Jz~I.$*`Y+TבW\R">^s4_ZPHai_r ${;,PEcѺC^CT5>oY$0?M_kGSa K7Z# tHm[4AOnGԕTev>edZnjJҚk+^ +A9J } Dsb_J%w˒BQV= 1B\zy BMy֐>/2 ,! ~h'׿zZV߈OURK=dA$SR=[6VIt^-9+-;Rp֑ ZG#]TjU>^w4OZP,>/o>5"(2ZQ=*^UL% Z:@+%oJ(KS[۰iR) čN<\|WP*)gV-YPFOUSЈZ%IԴqԏSDzN_>iSYRr!^L4,{-'=UA#+Qv&'j⢔p*AK/)~ *u&ʂ h\\_cA+IOUPȒrͦN`0)]TNjAPuG] ƉX'Tړ-6G5T[|l h6T=<F<Wf&E귪;Hoo>^s4O\P1Xwt{E~A)ؓ4,]~M8~5*G.td,&6PNePf5 }z4_\b]TTsBڅCߑ_Ws^$I5".FwBYDP1T"G. gv ==HIY~_c}r4_]P::CBԣ4Zuwu~2O5 [9`y {@ME_hJ#G?^qhD+?)h">W(Jڍ!l[%Ρ;ߊ̞e 5S> mHSf((ow \ +6>;&͔OG3eu^J-H_89rPO15%i|D}U#]9rRRzNp&UvgT [k̝Y̋/ ks ( k ( k ( k ( k (ZLn:]R` .OeN@lqm[/Nc끠*6n7-VtشNY0ؚl8x4>e+?*ukUHػnNҤ4DП\Or^r@.YeE?$8D~PmnE!M_0N.j*D&oN?x]d濒eЦ@Aˤ/*|ߤIy3 $DN~90Efp? ^|X?HAI8Uf|j @9Ac [)Yi`y Y Jڟ&6p f:s#aQ4T=/Dâ~4O|G);D^$Get$ˠߍ[Nkq"rlTɥeoxӼ! X]ŏQٛ+x=i++F>A,(\I=toI՛iUN8mP~ K&蓧loE$8PhGQ dt9a}3vݏc[EP* ]fM u;챠ԁt6sRᗻk|HkO#fzkS-I^ NwhEu=!''mLtzY# jY蝜E,(q̐ ƱIo~y6SXfhy}EZY'tCVP:1fs2^LZbȬ#rlʬMo XVD?8r="O\[mR._"oRo=:'TNIMg^5)ѥ^o?ɣ$InFF5CAwic73c +MBV1= Zmc <ƻ\sB;]]6 _ o.s5mL hW@֩1y@PmGtAad1y@PmGt}1)y@PmGťN8}:T]ɢ&G߿vDg茈 v@v.@;.`X=a_gЁ#t"(*ɞgdӦV= ECNe3{AR_bYV,3ή`e Y(0;'QSlA]] U]H6յ@P[&uu-@ۛtuyZ h/uBPWAK(KcSZ (oCJ)uu-BW[Zյ@P }/oOhz A]] %M4" kv*۳ 4APWAb¢I_ %Mյ@Мb"}&Z (nQ%Z h @[Ze>ʮյ@*lv<˖QZ hb4;vѵ@*O>OHPZ h嘔>I+=u~-ګW!k䢵*IԜEku!kBPWA5Q-j֖CPGA5EkK!kUѵ@u  A] -Ӱuuu-(tt"͜$Vy;((!k%}2A] UWCPGAU}=ѵ@P ep58ut- C A] UH uw-J*ѵ@PIPe:*h̃CPGA%CypZ ~2A] #ePe:J!f1vա<8ut-p.Q_ A] ͨM1.h`NwF/m3hבմDtZ㯆54/M>%,R3TKRhEB] N{!"f+w1z.Ukǻ[>1l|#^_65^6ЮݎHWR6ʼ=+Rj ')h;@ )dRf-V;@!h+@]!h+@P h h+@]PAwVBބA[5>$ t{,d 6<Ag9бME DtߑY_4: t;l/n#7m5b 趐b 趄l\ m1m= .4% t'A[nG(fBv1vK! vD h+@* @V[Q)7mtc\ t[M!h+@CV[Q+7mtQL Rj9ϳt9F% Mŋ 愣,$s[ AA7': t"vK! ,d)\AmJCV[mA[n \ݔxLA7%b{o A[nJ*GZ+(m !br 膐%R2h7@ч*-V[WZ h+@HL$@_|@մn)]V~ A[nAhet" :җ(/g @Ж -b'_PD/ǂkZ!/oUk֨D[nN\ t rb9vK! :B왰kZc>>~6Yƚ@Q t IQI"zDIY ^nXyϸGi[P_[(vЖ|xd0A o(tZ ⇴ʫF50[%~'6ys?c> pĔ{͌InHN+iWk@P="15$-w沧 CPC UH5~cj7cj&[7j:ieAmA[ducjlG;5 -җg#FT 2@ j j_jkv<@6X CPC Mҧt!*Iր6 Poj8t"L8N_`V= M0/:*b;n@P^ j4 ց6ACu EPoj4Z 6ACu UPoj4=3CPC =Y VR7APc 5ȜΏ-4JU'j ڞtWx A;(d (̇#0K]!1EQmORxޢ+5b锕@)5ڂsΐ.@Pc 5g+>T3 ?A#x5ZAVY\}#)tޤEA0o @P L b&l|ucjH:|=>,NIѓVzAPc E<< T gT^@P)tϙ;z*K15ڄ6ߥ: [VB!1&sNOnOyx A Xfg^aIDAT15BG%& M,o^<@PZ|e}5x A H_=>>;;{,QQE j j_jI mź15$}JG QI YZd9>~2]; A-BB$;U ӂjxTWg5x A 6 _Q  YaMA՛cju<@P[ЪQn݄N<@P[PA uBPc - h+@P[@VAmfvMyVA!!IA!!IA!!IA!!IA!!IA!!IA!!IA!!IA!!IA!!IA!!IA!!IA!!IA!!IA!!IA!!IA'?1py`ߚk ( u-Ps4aZsGz4xsiӥM>&@ƛA}6@P7m) hM>Ao xS @ƛA}6@P7m) hM>Ao xS Ipr+C>pX΢ F<8%hN: yA{¡ : hG)rB^p`SVMN4 Y5>93 )h?84Ai->NQ &h2)C>pPf`2!h8$Ag))i^,^],C4:$I k ]0!h/8$A6PCt>dU#%8"H;ypEPBX hO8Ay/g'E?ysrAA:ꑚC~p./D.sxBМw/< "h0`gH1"h8A{>׻!h?8A}x @Ph @PyxR5 (!AОp VZA!hO8AEW A{W'YdqCОIv&jAО.ǓXA J.Ep@Pr`^|/9AEpDк쿠au!콠@ٜ28{/hzDw4Ŕ!hwAiG|}p]|x}t>+#AypYP/!hcAҼT`$H"8.hFrjAОph]6A{!Z ʂC>7Gt>C65!h[A!ST J5#XpZЌ:Z~BОAm) hM>Ao xS @ƛA}6@P7m) hM>Ao xS @ƛA}6@P7 *Y; |>Mp}. k ( k ( k ( k ( k ( k ( k ( k ( k ( k ( k ( k ( k ( k (ZeHIENDB`vcdExtra/vignettes/fig/tut02-ca-haireye-1.png0000644000176200001440000001575414470742317020463 0ustar liggesusersPNG  IHDR@e$uPLTE:f:::f::fff::::f:::::f:::::f:f:f:::ff:ffff:f::f:ffffff::::ff:fېېff:f:f۶ې:ې۶f۶۶:f::::f:ff:f:ffې۶-. pHYsodIDATx {u@v*JtVVt5}dv#I#6k'fC:H"op&I.D  (AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP]&g|$I~uM'W$ԺGO@еe?ܞF#I>z5fk8\n.olQ}O|]jimfo6I>UņV+}.LeAW?spi>rBdT*/?]ZxY ϕ- ɓQ "hٻr}q_Y>T0DAs?HqWzu?nOI`;6T>WJ~_golYo"M>t0lA-d\4V8ƫ2[P\)lI]T6(߳*F4{ ϕͶ20ofn$d>q.h QUſjժ5g 9oyCsD˂o$9 o|9K6͇-/-uN~%W)ѵ_#͏7G+9n [YD"3mJ- jwZq>t0lA]؞m%3J 'Zzvkz89YU1Z0:ũ*6No},K6Zj 7ʇ-pIQ]&/QlD."gzn,J)MT,*r JởVLAPv峻 cgB" AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AP h E,A&ZAP DgTQAА** AP DgTQAА** AP DgTQAА** :]SS {щ2юe%tW\ 3tT/ m :> OPNwIrЖ{χ&Ψ06APh 0 m8r zI{ΨBQEAA4 j:#(I7N|zΨ҉Bˉ&23tt]1@?c @ A{5S30 AP DgTQAА** AP DgTQAА** AP DgTQAА** AP DgTQAА** AP DgTQAА** AP DgTQAА** AP DgTQAА** AP DgTQAА** AP DgTQAА** AP DgTQAА** AP DgTQAА** AP DgTQAА**}zyj1?܍f{{w~zp7z*l:A#OP}W٫/ 'ok)N^}%)k'%"*>82efmwt48]1ѯnV'OGN$q/>.e E7NNF<=LPSt%]p:ty{=7AӉ+W㳇==U`(0>GPhEGmLU2(AY/W[ڠI㠽#IP tX:ʑKФ HrUTA4 H孯kZZUT:OKIEzΨNjp*a/:Y004$:JWmЬ S {8(AC3 hHtF (AC3 hHtF (AC3 hHtF (AC3 hHtF (AC3 hHtFΧOaHA7OazIAg7;nN1]LOw WU|^r|tqW}/AߚCcL|h~~0>|O{4!oNV^o;4 9:o{[>->{Lj#! jjxx\<)>[|S}x+hSMɔ|/\PU>=cf/vj{۠l[zlJY.5X,]ddie>^Tq}LNjTvx^nsֽrc䨼/Z̴ֿEZ O/GMSvXEQYŗ|/ދy>=Og[w/ڣzm1"hO8%Ӌ}zpNuBͩtZۧa8mNk[&Un6$Ԝ]bb^傺wN|2Jg-Gj\PeU|}}Ӵ]_fhFNA{k.Ac /ucK˟ZgDשIAK㠾 Ӌ_-APLEg^Ip/@ 3ca͍o/::n4AA[ =+;H?۵!ԭYZ0 :q'hf2$ Q mP+ Z!ыri]{{Ψr0q-;RgTQaD No3PDgTQAP hzvP** :.** uc8ncN%:;o (E|8$Z{ET:ʡmЉŜm.Lc=O:OΨrp/dinqn'0)mPhǡ=76^UT-AGX AR],R7ZSPQEw3%+٩%c UrɗC)C{7Q{:7C1&޼J/߼FDzT=AHy9?)L^E*H,$`q7KPG__ 9TIqA,R^%+AW)C(/\B CڠS ?" 㳇7,fЮzL~"A,9 ii T;։T! z^jtT{!8•uj& Q]]AEnҼUOAPMA.°i hˉ^3.h^344į8 m5s æAtv~W**M^| : 4t~gY> QEQ'NnQE0fa|~:I %hHtFڠ!UTŃh hHtF Ψra,GD!UTj41b:J_u=/q_~p*a/]Z<fڟa9 QEI:{K}tBgTQiIJNFm|̩` D!UT.$: ہhD3揫;!Qib!t>Jt:3tHPg+]|,~x"8-74,ӇNV*^Ct}oTM5ø`SM< ޗAeФt2ػZ*MPOTEP4ˤ~Ua:÷Z *fVQ? Ѽ}v;SVа`v; f1qP?BQ#A'uM~Sىf?{Z4> :bh/hҋw;JtFNnXw>B>C|XBpAv)4WDЋoX·񗷃TgTQo3 W N:I%$A+$^3 WUT|HR$9POG>;^t#$:RGSPQE@A[ џYT**Һq*a/:t%(0{B_v,8)GyZS3*,5?;>p[_LvU{8|)LTgTQ9XgV (AA4 R +tF]0**< h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAН<"@Н6G{U1AНө/ښ:r%k='=%hO)J N^+W۪nW/>tbo?Ά):#Z~<ߦ?|.Sk4n: zqQwc:{ zpzжsocd!''/1Jri$h6"=!h$Qڠη:1Jr]p ~M?U\ت=B@;FINFYMT IlWC7AP! vT)nAC<*AC8C  hHtF Ψ  hHtF Ψҩ 8vt"*)~. {M :K.JPQE*~quОڠtI$ m?|h**]3-ov0PZ JА**QEȂ.IӑJ$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$: DgTQAP h4$:J4A"qID#'&5-C#'&5-C#'&5-C#'&5-C#'&5-C#'&5-C#'&5-C#'&5-$( AP hD  (AA4 AP hD#BYJgAZ&Ir&[y"I51A4O+G3լtgI#^Nkyk^LJ/[AZ&UM tykKqq\W͏x%=>'x z&c[!@69G|oZUj)Ul=\kA~Ef--U2ˏyۼQeЪJR[ҍ__hW3eɪv٢_Mmiz+Im+[AW5IbE_Utbv@AgH <[Aך'jl 1h1V!@ͺ4\߼괊oo>d댵k++2f; meNRlזļ[JմU9+C8͋jZ6Άe+&j\MlKk nlTilu5P߶ep fRu@9⥴VlMuJf+&6kw ָ]Z9"AA4 AP hD  (AA4 AP hD  (AA4 AP hD  (AA4 AZƫ os_-ooJ+Of[_ܞ'f+#Zk~~!j6yiyQ7-,tpclҸzfIRj b!ŕ[0/,c'5ۖ?'*VAZ2AG֧?^9}< .Hy&]c]/+0F[Y"h- 3fY'VLWvЦvΜWy*>479[XUy$ZK. z7Vٿ {v<S,Wj&w-qmAAZ6I_9Soyس<Œv[1K$eV1ieR e/ Q80mP/FUZ~U?$U1i  U6:I%h;8%Z($,, K9*a A *ff Z+J-^z^ۇ6(T/K-+]A;{y-ѾmaׅϫxzPeRgUPW g~eq:A|ںܔ$Axq'tqzkpLLAP hD  (AA4 AP hD  (AA4 AP hD @8BIENDB`vcdExtra/vignettes/continuous.Rmd0000644000176200001440000002011414470675442016727 0ustar liggesusers--- title: "Continuous predictors" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Continuous predictors} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut05-", fig.align = "center", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Old Sweave options # \SweaveOpts{engine=R,eps=TRUE,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{engine=R,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{prefix.string=fig/vcd-tut,eps=FALSE} # \SweaveOpts{keep.source=TRUE} # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ``` When continuous predictors are available---and potentially important---in explaining a categorical outcome, models for that outcome include: logistic regression (binary response), the proportional odds model (ordered polytomous response), multinomial (generalized) logistic regression. Many of these are special cases of the generalized linear model using the `"poisson"` or `"binomial"` family and their relatives. ## Spine and conditional density plots {#sec:spine} I don't go into fitting such models here, but I would be remiss not to illustrate some visualizations in `vcd` that are helpful here. The first of these is the spine plot or spinogram [@vcd:Hummel:1996], produced with `spine()`. These are special cases of mosaic plots with specific spacing and shading to show how a categorical response varies with a continuous or categorical predictor. They are also a generalization of stacked bar plots where not the heights but the *widths* of the bars corresponds to the relative frequencies of `x`. The heights of the bars then correspond to the conditional relative frequencies of `y` in every `x` group. ***Example***: For the `Arthritis` data, we can see how `Improved` varies with `Age` as follows. `spine()` takes a formula of the form `y ~ x` with a single dependent factor and a single explanatory variable `x` (a numeric variable or a factor). The range of a numeric variable`x` is divided into intervals based on the `breaks` argument, and stacked bars are drawn to show the distribution of `y` as `x` varies. As shown below, the discrete table that is visualized is returned by the function. ```{r, spine1} #| spine1, #| fig.height = 6, #| fig.width = 6, #| fig.show = "hold", #| out.width = "46%", #| fig.align = "center", #| cap = "Spine plots for the `Arthritis` data" (spine(Improved ~ Age, data = Arthritis, breaks = 3)) (spine(Improved ~ Age, data = Arthritis, breaks = "Scott")) ``` The conditional density plot [@vcd:Hofmann+Theus] is a further generalization. This visualization technique is similar to spinograms, but uses a smoothing approach rather than discretizing the explanatory variable. As well, it uses the original `x` axis and not a distorted one. ```{r} #| cdplot, #| fig.height = 5, #| fig.width = 5, #| cap = "Conditional density plot for the `Arthritis` data showing the variation of Improved with Age." cdplot(Improved ~ Age, data = Arthritis) ``` In such plots, it is useful to also see the distribution of the observations across the horizontal axis, e.g., with a `rug()` plot. \@ref{fig:cd-plot} uses `cdplot()` from the `graphics` package rather than `cd_plot()` from `vcd`, and is produced with ```{r} #| cdplot1, #| fig.height = 5, #| fig.width = 5, cdplot(Improved ~ Age, data = Arthritis) with(Arthritis, rug(jitter(Age), col="white", quiet=TRUE)) ``` From this figure it can be easily seen that the proportion of patients reporting Some or Marked improvement increases with Age, but there are some peculiar bumps in the distribution. These may be real or artifactual, but they would be hard to see with most other visualization methods. When we switch from non-parametric data exploration to parametric statistical models, such effects are easily missed. ## Model-based plots: effect plots and `ggplot2 plots` {#sec:modelplots} The nonparametric conditional density plot uses smoothing methods to convey the distributions of the response variable, but displays that are simpler to interpret can often be obtained by plotting the predicted response from a parametric model. For complex `glm()` models with interaction effects, the `effects` package provides the most useful displays, plotting the predicted values for a given term, averaging over other predictors not included in that term. I don't illustrate this here, but see @effects:1,@effects:2 and `help(package="effects")`. Here I just briefly illustrate the capabilities of the `ggplot2` package for model-smoothed plots of categorical responses in `glm()` models. ***Example***: The `Donner` data frame in `vcdExtra` gives details on the survival of 90 members of the Donner party, a group of people who attempted to migrate to California in 1846. They were trapped by an early blizzard on the eastern side of the Sierra Nevada mountains, and before they could be rescued, nearly half of the party had died. What factors affected who lived and who died? ```{r, donner1} data(Donner, package="vcdExtra") str(Donner) ``` A potential model of interest is the logistic regression model for $Pr(survived)$, allowing separate fits for males and females as a function of `age`. The key to this is the `stat_smooth()` function, using `method = "glm", method.args = list(family = binomial)`. The `formula = y ~ x` specifies a linear fit on the logit scale (\@ref{fig:donner3}, left) ```{r, donner2a, fig=FALSE, eval=FALSE} # separate linear fits on age for M/F ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ x, alpha = 0.2, size=2, aes(fill = sex)) ``` Alternatively, we can allow a quadratic relation with `age` by specifying `formula = y ~ poly(x,2)` (@ref(fig:donner3), right). ```{r, donner2b, fig=FALSE, eval=FALSE} # separate quadratics ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ poly(x,2), alpha = 0.2, size=2, aes(fill = sex)) ``` ```{r} #| donner3a, #| echo = FALSE, #| fig.height = 6, #| fig.width = 6, #| fig.show = "hold", #| out.width = "46%", #| cap = "Logistic regression plots for the `Donner` data showing survival vs. age, by sex. Left: linear logistic model; right: quadratic model {#fig:donner3}" # separate linear fits on age for M/F ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ x, alpha = 0.2, size=2, aes(fill = sex)) # separate quadratics ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ poly(x,2), alpha = 0.2, size=2, aes(fill = sex)) ``` These plots very nicely show (a) the fitted $Pr(survived)$ for males and females; (b) confidence bands around the smoothed model fits and (c) the individual observations by jittered points at 0 and 1 for those who died and survived, respectively. # References vcdExtra/vignettes/mosaics.Rmd0000644000176200001440000004126214422307100016142 0ustar liggesusers--- title: "Mosaic plots" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Mosaic plots} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut04-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Load packages set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) library(seriation) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ``` Mosaic plots provide an ideal method both for visualizing contingency tables and for visualizing the fit--- or more importantly--- **lack of fit** of a loglinear model. For a two-way table, `mosaic()`, by default, fits a model of independence, $[A][B]$ or `~A + B` as an R formula. The `vcdExtra` package extends this to models fit using `glm(..., family=poisson)`, which can include specialized models for ordered factors, or square tables that are intermediate between the saturated model, $[A B]$ = `A * B`, and the independence model $[A][B]$. For $n$-way tables, `vcd::mosaic()` can fit any loglinear model, and can also be used to plot a model fit with `MASS:loglm()`. The `vcdExtra` package extends this to models fit using `stats::glm()` and, by extension, to non-linear models fit using the [gnm package](https://cran.r-project.org/package=gnm). See @vcd:Friendly:1994, @vcd:Friendly:1999 for the statistical ideas behind these uses of mosaic displays in connection with loglinear models. Our book @FriendlyMeyer:2016:DDAR gives a detailed discussion of mosaic plots and many more examples. The essential ideas are to: * recursively sub-divide a unit square into rectangular "tiles" for the cells of the table, such that the area of each tile is proportional to the cell frequency. Tiles are split in a sequential order: + First according to the **marginal** proportions of a first variable, V1 + Next according to the **conditional** proportions of a 2nd variable, V2 | V1 + Next according to the **conditional** proportions of a 3rd variable, V3 | {V1, V2} + ... * For a given loglinear model, the tiles can then be shaded in various ways to reflect the residuals (lack of fit) for a given model. * The pattern of residuals can then be used to suggest a better model or understand *where* a given model fits or does not fit. `mosaic()` provides a wide range of options for the directions of splitting, the specification of shading, labeling, spacing, legend and many other details. It is actually implemented as a special case of a more general class of displays for $n$-way tables called `strucplot`, including sieve diagrams, association plots, double-decker plots as well as mosaic plots. For details, see `help(strucplot)` and the "See also" links therein, and also @vcd:Meyer+Zeileis+Hornik:2006b, which is available as an R vignette via `vignette("strucplot", package="vcd")`. ***Example***: A mosaic plot for the Arthritis treatment data fits the model of independence, `~ Treatment + Improved` and displays the association in the pattern of residual shading. The goal is to visualize the difference in the proportions of `Improved` for the two levels of `Treatment` : "Placebo" and "Treated". The plot below is produced with the following call to `mosaic()`. With the first split by `Treatment` and the shading used, it is easy to see that more people given the placebo experienced no improvement, while more people given the active treatment reported marked improvement. ```{r} #| Arthritis1, #| fig.height = 6, #| fig.width = 7, #| fig.cap = "Mosaic plot for the `Arthritis` data, using `shading_max`" data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) mosaic(art, gp = shading_max, split_vertical = TRUE, main="Arthritis: [Treatment] [Improved]") ``` `gp = shading_max` specifies that color in the plot signals a significant residual at a 90% or 99% significance level, with the more intense shade for 99%. Note that the residuals for the independence model were not large (as shown in the legend), yet the association between `Treatment` and `Improved` is highly significant. ```{r, art1} summary(art) ``` In contrast, one of the other shading schemes, from @vcd:Friendly:1994 (use: `gp = shading_Friendly`), uses fixed cutoffs of $\pm 2, \pm 4$, to shade cells which are *individually* significant at approximately $\alpha = 0.05$ and $\alpha = 0.001$ levels, respectively. The plot below uses `gp = shading_Friendly`. ```{r} #| Arthritis2, #| fig.height = 6, #| fig.width = 7, #| fig.cap = "Mosaic plot for the `Arthritis` data, using `shading_Friendly`" mosaic(art, gp = shading_Friendly, split_vertical = TRUE, main="Arthritis: gp = shading_Friendly") ``` ## Permuting variable levels Mosaic plots using tables or frequency data frames as input typically take the levels of the table variables in the order presented in the dataset. For character variables, this is often alphabetical order. That might be helpful for looking up a value, but is unhelpful for seeing and understanding the pattern of association. It is usually much better to order the levels of the row and column variables to help reveal the nature of their association. This is an example of **effect ordering for data display** [@FriendlyKwan:02:effect]. ***Example***: Data from @Glass:54 gave this 5 x 5 table on the occupations of 3500 British fathers and their sons, where the occupational categories are listed in alphabetic order. ```{r glass} data(Glass, package="vcdExtra") (glass.tab <- xtabs(Freq ~ father + son, data=Glass)) ``` The mosaic display shows very strong association, but aside from the diagonal cells, the pattern is unclear. Note the use of `set_varnames` to give more descriptive labels for the variables and abbreviate the occupational category labels. and `interpolate` to set the shading levels for the mosaic. ```{r glass-mosaic1} largs <- list(set_varnames=list(father="Father's Occupation", son="Son's Occupation"), abbreviate=10) gargs <- list(interpolate=c(1,2,4,8)) mosaic(glass.tab, shade=TRUE, labeling_args=largs, gp_args=gargs, main="Alphabetic order", legend=FALSE, rot_labels=c(20,90,0,70)) ``` The occupational categories differ in **status**, and can be reordered correctly as follows, from `Professional` down to `Unskilled`. ```{r glass-order} # reorder by status ord <- c(2, 1, 4, 3, 5) row.names(glass.tab)[ord] ``` The revised mosaic plot can be produced by indexing the rows and columns of the table using `ord`. ```{r glass-mosaic2} mosaic(glass.tab[ord, ord], shade=TRUE, labeling_args=largs, gp_args=gargs, main="Effect order", legend=FALSE, rot_labels=c(20,90,0,70)) ``` From this, and for the examples in the next section, it is useful to re-define `father` and `son` as **ordered** factors in the original `Glass` frequency data.frame. ```{r glass-ord} Glass.ord <- Glass Glass.ord$father <- ordered(Glass.ord$father, levels=levels(Glass$father)[ord]) Glass.ord$son <- ordered(Glass.ord$son, levels=levels(Glass$son)[ord]) str(Glass.ord) ``` ## Square tables For mobility tables such as this, where the rows and columns refer to the same occupational categories it comes as no surprise that there is a strong association in the diagonal cells: most often, sons remain in the same occupational categories as their fathers. However, the re-ordered mosaic display also reveals something subtler: when a son differs in occupation from the father, it is more likely that he will appear in a category one-step removed than more steps removed. The residuals seem to decrease with the number of steps from the diagonal. For such tables, specialized loglinear models provide interesting cases intermediate between the independence model, [A] [B], and the saturated model, [A B]. These can be fit using `glm()`, with the data in frequency form, ``` glm(Freq ~ A + B + assoc, data = ..., family = poisson) ``` where `assoc` is a special term to handle a restricted form of association, different from `A:B` which specifies the saturated model in this notation. * **Quasi-independence**: Asserts independence, but ignores the diagonal cells by fitting them exactly. The loglinear model is: $\log m_{ij} = \mu + \lambda^A_i + \lambda^B_j + \delta_i I(i = j)$, where $I()$ is the indicator function. * **Symmetry**: This model asserts that the joint distribution of the row and column variables is symmetric, that is $\pi_{ij} = \pi_{ji}$: A son is equally likely to move from their father's occupational category $i$ to another category, $j$, as the reverse, moving from $j$ to $i$. Symmetry is quite strong, because it also implies **marginal homogeneity**, that the marginal probabilities of the row and column variables are equal, $\pi{i+} = \sum_j \pi_{ij} = \sum_j \pi_{ji} = \pi_{+i}$ for all $i$. * **Quasi-symmetry**: This model uses the standard main-effect terms in the loglinear model, but asserts that the association parameters are symmetric, $\log m_{ij} = \mu + \lambda^A_i + \lambda^B_j + \lambda^{AB}_{ij}$, where $\lambda^{AB}_{ij} = \lambda^{AB}_{ji}$. The [gnm package](https://cran.r-project.org/package=gnm) provides a variety of these functions: `gnm::Diag()`, `gnm::Symm()` and `gnm::Topo()` for an interaction factor as specified by an array of levels, which may be arbitrarily structured. For example, the following generates a term for a diagonal factor in a $4 \times 4$ table. The diagonal values reflect parameters fitted for each diagonal cell. Off-diagonal values, "." are ignored. ```{r diag} rowfac <- gl(4, 4, 16) colfac <- gl(4, 1, 16) diag4by4 <- Diag(rowfac, colfac) matrix(Diag(rowfac, colfac, binary = FALSE), 4, 4) ``` `Symm()` constructs parameters for symmetric cells. The particular values don't matter. All that does matter is that the same value, e.g., `1:2` appears in both the (1,2) and (2,1) cells. ```{r symm} symm4by4 <- Symm(rowfac, colfac) matrix(symm4by4, 4, 4) ``` ***Example***: To illustrate, we fit the four models below, starting with the independence model `Freq ~ father + son` and then adding terms to reflect the restricted forms of association, e.g., `Diag(father, son)` for diagonal terms and `Symm(father, son)` for symmetry. ```{r glass-models} library(gnm) glass.indep <- glm(Freq ~ father + son, data = Glass.ord, family=poisson) glass.quasi <- glm(Freq ~ father + son + Diag(father, son), data = Glass.ord, family=poisson) glass.symm <- glm(Freq ~ Symm(father, son), data = Glass.ord, family=poisson) glass.qsymm <- glm(Freq ~ father + son + Symm(father, son), data = Glass.ord, family=poisson) ``` We can visualize these using the `vcdExtra::mosaic.glm()` method, which extends mosaic displays to handle fitted `glm` objects. *Technical note*: for models fitted using `glm()`, standardized residuals, `residuals_type="rstandard"` have better statistical properties than the default Pearson residuals in mosaic plots and analysis. ```{r glass-quasi} mosaic(glass.quasi, residuals_type="rstandard", shade=TRUE, labeling_args=largs, gp_args=gargs, main="Quasi-Independence", legend=FALSE, rot_labels=c(20,90,0,70) ) ``` Mosaic plots for the other models would give further visual assessment of these models, however we can also test differences among them. For nested models, `anova()` gives tests of how much better a more complex model is compared to the previous one. ```{r glass-anova} # model comparisons: for *nested* models anova(glass.indep, glass.quasi, glass.qsymm, test="Chisq") ``` Alternatively, `vcdExtra::LRstats()` gives model summaries for a collection of models, not necessarily nested, with AIC and BIC statistics reflecting model parsimony. ```{r glass-lrstats} models <- glmlist(glass.indep, glass.quasi, glass.symm, glass.qsymm) LRstats(models) ``` By all criteria, the model of quasi symmetry fits best. The residual deviance $G^2 is not significant. The mosaic is largely unshaded, indicating a good fit, but there are a few shaded cells that indicate the remaining positive and negative residuals. For comparative mosaic displays, it is sometimes useful to show the $G^2$ statistic in the main title, using `vcdExtra::modFit()` for this purpose. ```{r glass-qsymm} mosaic(glass.qsymm, residuals_type="rstandard", shade=TRUE, labeling_args=largs, gp_args=gargs, main = paste("Quasi-Symmetry", modFit(glass.qsymm)), legend=FALSE, rot_labels=c(20,90,0,70) ) ``` ## Correspondence analysis ordering When natural orders for row and column levels are not given a priori, we can find orderings that make more sense using correspondence analysis. The general ideas are that: * Correspondence analysis assigns scores to the row and column variables to best account for the association in 1, 2, ... dimensions * The first CA dimension accounts for largest proportion of the Pearson $\chi^2$ * Therefore, permuting the levels of the row and column variables by the CA Dim1 scores gives a more coherent mosaic plot, more clearly showing the nature of the association. * The [seriation package](https://cran.r-project.org/package=seriation) now has a method to order variables in frequency tables using CA. ***Example***: As an example, consider the `HouseTasks` dataset, a 13 x 4 table of frequencies of household tasks performed by couples, either by the `Husband`, `Wife`, `Alternating` or `Jointly`. You can see from the table that some tasks (Repairs) are done largely by the husband; some (laundry, main meal) are largely done by the wife, while others are done jointly or alternating between husband and wife. But the `Task` and `Who` levels are both in alphabetical order. ```{r housetasks} data("HouseTasks", package = "vcdExtra") HouseTasks ``` The naive mosaic plot for this dataset is shown below, splitting first by `Task` and then by `Who`. Due to the length of the factor labels, some features of `labeling` were used to make the display more readable. ```{r housetasks-mos1} require(vcd) mosaic(HouseTasks, shade = TRUE, labeling = labeling_border(rot_labels = c(45,0, 0, 0), offset_label =c(.5,5,0, 0), varnames = c(FALSE, TRUE), just_labels=c("center","right"), tl_varnames = FALSE), legend = FALSE) ``` Correspondence analysis, using the [ca package](https://cran.r-project.org/package=ca), shows that nearly 89% of the $\chi^2$ can be accounted for in two dimensions. ```{r housetasks-ca} require(ca) HT.ca <- ca(HouseTasks) summary(HT.ca, rows=FALSE, columns=FALSE) ``` The CA plot has a fairly simple interpretation: Dim1 is largely the distinction between tasks primarily done by the wife vs. the husband. Dim2 distinguishes tasks that are done singly vs. those that are done jointly. ```{r housetasks-ca-plot} plot(HT.ca, lines = TRUE) ``` So, we can use the `CA` method of `seriation::seriate()` to find the order of permutations of `Task` and `Who` along the CA dimensions. ```{r housetasks-seriation} require(seriation) order <- seriate(HouseTasks, method = "CA") # the permuted row and column labels rownames(HouseTasks)[order[[1]]] colnames(HouseTasks)[order[[2]]] ``` Now, use `seriation::permute()` to use `order` for the permutations of `Task` and `Who`, and plot the resulting mosaic: ```{r housetasks-mos2} # do the permutation HT_perm <- permute(HouseTasks, order, margin=1) mosaic(HT_perm, shade = TRUE, labeling = labeling_border(rot_labels = c(45,0, 0, 0), offset_label =c(.5,5,0, 0), varnames = c(FALSE, TRUE), just_labels=c("center","right"), tl_varnames = FALSE), legend = FALSE) ``` It is now easy to see the cluster of tasks (laundry and cooking) done largely by the wife at the top, and those (repairs, driving) done largely by the husband at the bottom. ## References vcdExtra/vignettes/demo-housing.Rmd0000644000176200001440000002773514422306403017120 0ustar liggesusers--- title: "Demo - Housing Data" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Demo - Housing Data} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/demo-housing-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # colorize text colorize <- function(x, color) { if (knitr::is_latex_output()) { sprintf("\\textcolor{%s}{%s}", color, x) } else if (knitr::is_html_output()) { sprintf("%s", color, x) } else x } ``` This vignette was one of a series of `demo()` files in the package. It is still there as `demo("housing")`, but is now presented here with additional commentary and analysis, designed to highlight some aspects of analysis of categorical data and graphical display. ## Load packages I'll use the following packages in this vignette. ```{r} library(vcdExtra) library(MASS) library(effects) ``` ## Housing data The content here is the dataset `MASS::housing`, giving a 4-way, $3 \times 3 \times 4 \times 2$ frequency table of 1681 individuals from the *Copenhagen Housing Conditions Survey*, classified by their: * Satisfaction (`Sat`) with their housing circumstances (low, medium or high), * `Type` of rental dwelling (Tower, Apartment, Atrium or Terrace) * perceived influence (`Infl`) on management of the property (low, medium, high), and * degree of contact (`Cont`) with other residents (low or high) Load the data: ```{r housing} data(housing, package="MASS") str(housing) ``` ### Variables, levels and models Satisfaction (`Sat`) of these householders with their present housing circumstances is the **outcome variable** here. For purposes of analysis, note that `Sat` is an ordered factor with levels `"Low" < "Medium" < "High"`. Note also that Influence, with the same levels is just a "Factor", not an ordered one. I consider here just models using `glm(..., family=poisson)` or the equivalent in `MASS::loglm()`. The ordering of factor levels is important in graphical displays. We don't want to see them ordered alphabetically, "High", "Low", "Medium". The `housing` data.frame was constructed so that the levels of `Sat` and `Infl` appear in the dataset in their appropriate order. ```{r} levels(housing$Sat) levels(housing$Infl) ``` Other models, e.g., the **proportional odds** model, fit using `MASS:polr()` can take the ordinal nature of satisfaction into account. In `glm()` one could re-assign `Infl` as an ordered factor and examine linear vs. non-linear associations for this factor. But I don't do this here. ## Null model The most ignorant model asserts that all the table factors are mutually independent. In symbolic notation, this is `[S] [I] [T] [C]` where all terms in separate `[ ]` are supposed to be independent. This is `Freq ~ Sat + Infl + Type + Cont` as a formula for `glm()`. ```{r house.null} house.null <- glm(Freq ~ Sat + Infl + Type + Cont, family = poisson, data = housing) ``` ## Baseline model When `Sat` is the outcome variable, a minimal **baseline model** should allow for all associations among the predictors, symbolized as `[S] [I T C]`. That is, Influence, Type and Contact may be associated in arbitrary ways, just as multiple predictors can be correlated in regression models. In this framework, what remains to be explained is whether/how `Sat` depends on the combinations of the other variables. The baseline model therefore includes the full three-way term for the predictors. ```{r house.glm0} house.glm0 <- glm(Freq ~ Sat + Infl*Type*Cont, family = poisson, data = housing) ``` Both of these models fit terribly, but we can always use `anova(mod1, mod2,...)` to compare the *relative* fits of **nested** models. ```{r anova} anova(house.null, house.glm0, test = "Chisq") ``` ## Visualising model fit The baseline model is shown in the mosaic plot below. Note that this is applied not to the `housing` data, but rather to the `house.glm0` object (of class `glm`) resulting to a call to `vcdExtra::mosaic.glm()`. With four variables in the mosaic, labeling of the variable names and factor levels is a bit tricky, because labels must appear on all four sides of the plot. The `labeling_args` argument can be used to set more informative variable names and abbreviate factor levels where necessary. ```{r} #| label= mosaic-glm0a, #| warning = TRUE # labeling_args for mosaic() largs <- list(set_varnames = c( Infl="Influence on management", Cont="Contact among residents", Type="Type of dwelling", Sat="Satisfaction"), abbreviate=c(Type=3)) mosaic(house.glm0, labeling_args=largs, main='Baseline model: [ITC][Sat]') ``` In this plot we can see largish `r colorize("positive residuals", "blue")` in the blocks corresponding to (low satisfaction, low influence) and (high satisfaction, high influence) and clusters of largish `r colorize("negative residuals", "red")` in the opposite corners. By default, variables are used in the mosaic display in their order in the data table or frequency data.frame. The `r colorize("warning", "red")` reminds us that the order of conditioning used is `~Sat + Infl + Type + Cont`. ### Ordering the variables in the mosaic For `mosaic.glm()`, the conditioning order of variables in the mosaic can be set using the `formula` argument. Here, I rearrange the variables to put `Sat` as the last variable in the splitting / conditioning sequence. I also use `vcdExtra::modFit()` to add the LR $G^2$ fit statistic to the plot title. ```{r mosaic-glm0b} mosaic(house.glm0, formula = ~ Type + Infl + Cont + Sat, labeling_args=largs, main=paste('Baseline model: [ITC][Sat],', modFit(house.glm0)) ) ``` ## Adding association terms Clearly, satisfaction depends on one or more of the predictors, `Infl`, `Type` and `Cont` and possibly their interactions. As a first step it is useful to consider sequentially adding the association terms `Infl:Sat`, `Type:Sat`, `Cont:Sat` one at a time. This analysis is carried out using `MASS::addterm()`. ```{r addterm} MASS::addterm(house.glm0, ~ . + Sat:(Infl + Type + Cont), test = "Chisq") ``` Based on this, it is useful to consider a "main-effects" model for satisfaction, adding all three two-way terms involving satisfaction. The `update()` method provides an easy way to add (or subtract) terms from a fitted model object. In the model formula, `.` stands for whatever was on the left side (`Freq`) or on the right side (`Sat + Infl*Type*Cont`) of the model (`house.glm0`) that is being updated. ```{r house-glm1} house.glm1 <- update(house.glm0, . ~ . + Sat*(Infl + Type + Cont)) ``` For comparison, we note that the same model can be fit using the iterative proportional scaling algorithm of `MASS::loglm()`. ```{r house-loglm1} (house.loglm1 <- MASS::loglm(Freq ~ Infl * Type * Cont + Sat*(Infl + Type + Cont), data = housing)) ``` ## Did the model get better? As before, `anova()` tests the added contribution of each more complex model over the one before. The residual deviance $G^2$ has been reduced from $G^2 (46) = 217.46$ for the baseline model `house.glm0` to $G^2 (34) = 38.66$ for the revised model `house.glm1`. The difference, $G^2(M1 | M0) = G^2 (12) = 178.79$ tests the collective additional fit provided by the two-way association of satisfaction with the predictors. ```{r} anova(house.glm0, house.glm1, test="Chisq") ``` ## Visualize model `glm1` The model `house.glm1` fits reasonably well, `r modFit(house.glm1)`, so most residuals are small. In the mosaic below, I use `gp=shading_Friendly` to shade the tiles so that positive and negative residuals are distinguished by color, and they are filled when the absolute value of the residual is outside $\pm 2, 4$. ```{r mosaic-glm1} mosaic(house.glm1, labeling_args=largs, main=paste('Model [IS][TS][CS],', modFit(house.glm1) ), gp=shading_Friendly) ``` One cell is highlighted here: The combination of medium influence, low contact and tower type, is more likely to give low satisfaction than the model predicts. Is this just an outlier, or is there something that can be interpreted and perhaps improve the model fit? It is hard tell, but the virtues of mosaic displays are that they help to: * diagnose overall patterns of associations, * spot unusual cells in relation to lack of fit of a given model. ## Can we drop any terms? When we add terms using `MASS::addterm()`, they are added sequentially. It might be the case that once some term is added, a previously added term is no longer important. Running `MASS::dropterm()` on the `housel.glm1` model checks for this. ```{r dropterm} MASS::dropterm(house.glm1, test = "Chisq") ``` Note that the three-way term `Infl:Type:Cont` is not significant. However, with `Sat` as the response, the associations of all predictors must be included in the model. ## What about two-way interactions? The model so far says that each of influence, type and control have separate, additive effects on the level of satisfaction, what I called a "main-effects" model. It might be the case that some of the predictors have *interaction* effects, e.g., that the effect of influence on satisfaction might vary with the type of dwelling or the level of control. An easy way to test for these is to update the main-effects model, adding all possible two-way interactions for `Sat`, one at a time, with `addterm()`. ```{r addterm1} MASS::addterm(house.glm1, ~. + Sat:(Infl + Type + Cont)^2, test = "Chisq") ``` The result shows that adding the term `Infl:Type:Sat` reduces the deviance $G^2$ from 38.66 to 16.11. The difference, $G^2(M1 + ITS | M1) = G^2 (12) = 22.55$ reflects a substantial improvement. The remaining two-way interaction terms reduce the deviance by smaller and non-significant amounts, relative to `house.glm1`. Model fitting should be guided by substance, not just statistical machinery. Nonetheless, it seems arguably sensible to add one two-way term to the model, giving `house.glm2`. ```{r} house.glm2 <- update(house.glm1, . ~ . + Sat:Infl:Type) ``` ## Model parsimony: AIC & BIC Adding more association terms to a model will always improve it. The question is, whether that is "worth it"? "Worth it" concerns the trade-off between model fit and parsimony. Sometimes we might prefer a model with fewer parameters to one that has a slightly better fit, but requires more model terms and parameters. The AIC and BIC statistics are designed to adjust our assessment of model fit by penalizing it for using more parameters. Equivalently, they deduct from the likelihood ratio $G^2$ a term proportional to the residual $\text{df}$ of the model. In any case -- **smaller is better** for both AIC and BIC. $$AIC = G^2 - 2 \: \text{df}$$ $$BIC = G^2 - \log(n) \: \text{df}$$ These measures are provided by `AIC()`, `BIC()`, and can be used to compare models using `vcdExtra::LRstats()`. ```{r lrstats} LRstats(house.glm0, house.glm1, house.glm2) ``` By these metrics, model `house.glm1` is best on both AIC and BIC. The increased goodness-of-fit (smaller $G^2$) of model `house.glm2` is not worth the extra cost of parameters in the `house.glm2` model. vcdExtra/vignettes/apa.csl0000644000176200001440000005306014422306403015310 0ustar liggesusers vcdExtra/vignettes/vignettes.bib0000644000176200001440000000253414422306403016532 0ustar liggesusers@book{FriendlyMeyer:2016:DDAR, Author = {Michael Friendly and Meyer, David}, Title = {Discrete Data Analysis with {R}: Visualization and Modeling Techniques for Categorical and Count Data}, Address = {Boca Raton, FL}, Isbn = {978-1-4987-2583-5}, Publisher = {Chapman \& Hall/CRC}, Year = {2016} } @book{Glass:54, Address = {Glencoe, IL}, Author = {Glass, D. V.}, Publisher = {The Free Press}, Title = {Social Mobility in Britain}, Year = {1954} } @article{Goodman:79, Author = {L. A. Goodman}, Journal = {Journal of the American Statistical Association}, Pages = {537--552}, Title = {Simple models for the analysis of association in cross-classifications having ordered categories}, Volume = {74}, Year = {1979} } @InCollection{Hauser:79, author = {R. M. Hauser}, booktitle = {Sociological Methodology 1980}, publisher = {Jossey-Bass}, title = {Some exploratory methods for modeling mobility tables and other cross-classified data}, year = {1980}, address = {San Francisco}, editor = {K. F. Schuessler}, pages = {413--458}, } @book{PowersXie:2008, Address = {Bingley, UK}, Author = {Powers, Daniel A. and Xie, Yu}, Edition = {Second}, Isbn = {9781781906590}, Publisher = {Emerald}, Title = {Statistical Methods for Categorical Data Analysis}, Year = {2008}} vcdExtra/vignettes/loglinear.Rmd0000644000176200001440000002362214470701657016502 0ustar liggesusers--- title: "Loglinear Models" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Loglinear Models} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut03-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ``` You can use the `loglm()` function in the `MASS` package to fit log-linear models. Equivalent models can also be fit (from a different perspective) as generalized linear models with the `glm()` function using the `family='poisson'` argument, and the `gnm` package provides a wider range of generalized *nonlinear* models, particularly for testing structured associations. The visualization methods for these models were originally developed for models fit using `loglm()`, so this approach is emphasized here. Some extensions of these methods for models fit using `glm()` and `gnm()` are contained in the `vcdExtra` package and illustrated in @ref(sec:glm). Assume we have a 3-way contingency table based on variables A, B, and C. The possible different forms of loglinear models for a 3-way table are shown in the table below. \@(tab:loglin-3way) The **Model formula** column shows how to express each model for `loglm()` in R. ^[For `glm()`, or `gnm()`, with the data in the form of a frequency data.frame, the same model is specified in the form `glm(Freq` $\sim$ `..., family="poisson")`, where `Freq` is the name of the cell frequency variable and `...` specifies the *Model formula*.] In the **Interpretation** column, the symbol "$\perp$" is to be read as "is independent of," and "$\;|\;$" means "conditional on," or "adjusting for," or just "given". | **Model** | **Model formula** | **Symbol** | **Interpretation** | |:-------------------------|:-------------------|:---------------|:-----------------------| | Mutual independence | `~A + B + C` | $[A][B][C]$ | $A \perp B \perp C$ | | Joint independence | `~A*B + C` | $[AB][C]$ | $(A \: B) \perp C$ | | Conditional independence | `~(A+B)*C` | $[AC][BC]$ | $(A \perp B) \;|\; C$ | | All two-way associations | `~A*B + A*C + B*C` | $[AB][AC][BC]$ | homogeneous association| | Saturated model | `~A*B*C` | $[ABC]$ | 3-way association | For example, the formula `~A + B + C` specifies the model of *mutual independence* with no associations among the three factors. In standard notation for the expected frequencies $m_{ijk}$, this corresponds to $$ \log ( m_{ijk} ) = \mu + \lambda_i^A + \lambda_j^B + \lambda_k^C \equiv A + B + C $$ The parameters $\lambda_i^A , \lambda_j^B$ and $\lambda_k^C$ pertain to the differences among the one-way marginal frequencies for the factors A, B and C. Similarly, the model of *joint independence*, $(A \: B) \perp C$, allows an association between A and B, but specifies that C is independent of both of these and their combinations, $$ \log ( m_{ijk} ) = \mu + \lambda_i^A + \lambda_j^B + \lambda_k^C + \lambda_{ij}^{AB} \equiv A * B + C $$ where the parameters $\lambda_{ij}^{AB}$ pertain to the overall association between A and B (collapsing over C). In the literature or text books, you will often find these models expressed in shorthand symbolic notation, using brackets, `[ ]` to enclose the *high-order terms* in the model. Thus, the joint independence model can be denoted `[AB][C]`, as shown in the **Symbol** column in the table. \@(tab:loglin-3way). Models of *conditional independence* allow (and fit) two of the three possible two-way associations. There are three such models, depending on which variable is conditioned upon. For a given conditional independence model, e.g., `[AB][AC]`, the given variable is the one common to all terms, so this example has the interpretation $(B \perp C) \;|\; A$. ## Fitting with `loglm()` {#sec:loglm} For example, we can fit the model of mutual independence among hair color, eye color and sex in `HairEyeColor` as ```{r, loglm-hec1} library(MASS) ## Independence model of hair and eye color and sex. hec.1 <- loglm(~Hair+Eye+Sex, data=HairEyeColor) hec.1 ``` Similarly, the models of conditional independence and joint independence are specified as ```{r, loglm-hec2} ## Conditional independence hec.2 <- loglm(~(Hair + Eye) * Sex, data=HairEyeColor) hec.2 ``` ```{r, loglm-hec3} ## Joint independence model. hec.3 <- loglm(~Hair*Eye + Sex, data=HairEyeColor) hec.3 ``` Note that printing the model gives a brief summary of the goodness of fit. A set of models can be compared using the `anova()` function. ```{r, loglm-anova} anova(hec.1, hec.2, hec.3) ``` ## Fitting with `glm()` and `gnm()` {#sec:glm} The `glm()` approach, and extensions of this in the `gnm` package allows a much wider class of models for frequency data to be fit than can be handled by `loglm()`. Of particular importance are models for ordinal factors and for square tables, where we can test more structured hypotheses about the patterns of association than are provided in the tests of general association under `loglm()`. These are similar in spirit to the non-parametric CMH tests described in \@ref(sec:CMH). ***Example***: The data `Mental` in the `vcdExtra` package gives a two-way table in frequency form classifying young people by their mental health status and parents' socioeconomic status (SES), where both of these variables are ordered factors. ```{r, mental1} data(Mental, package = "vcdExtra") str(Mental) xtabs(Freq ~ mental + ses, data=Mental) # display the frequency table ``` Simple ways of handling ordinal variables involve assigning scores to the table categories, and the simplest cases are to use integer scores, either for the row variable (``column effects'' model), the column variable (``row effects'' model), or both (``uniform association'' model). ```{r, mental2} indep <- glm(Freq ~ mental + ses, family = poisson, data = Mental) # independence model ``` To fit more parsimonious models than general association, we can define numeric scores for the row and column categories ```{r, mental3} # Use integer scores for rows/cols Cscore <- as.numeric(Mental$ses) Rscore <- as.numeric(Mental$mental) ``` Then, the row effects model, the column effects model, and the uniform association model can be fit as follows. The essential idea is to replace a factor variable with its numeric equivalent in the model formula for the association term. ```{r, mental4} # column effects model (ses) coleff <- glm(Freq ~ mental + ses + Rscore:ses, family = poisson, data = Mental) # row effects model (mental) roweff <- glm(Freq ~ mental + ses + mental:Cscore, family = poisson, data = Mental) # linear x linear association linlin <- glm(Freq ~ mental + ses + Rscore:Cscore, family = poisson, data = Mental) ``` The `LRstats()` function in `vcdExtra` provides a nice, compact summary of the fit statistics for a set of models, collected into a *glmlist* object. Smaller is better for AIC and BIC. ```{r, mental4a} # compare models using AIC, BIC, etc vcdExtra::LRstats(glmlist(indep, roweff, coleff, linlin)) ``` For specific model comparisons, we can also carry out tests of *nested* models with `anova()` when those models are listed from smallest to largest. Here, there are two separate paths from the most restrictive (independence) model through the model of uniform association, to those that allow only one of row effects or column effects. ```{r, mental5} anova(indep, linlin, coleff, test="Chisq") anova(indep, linlin, roweff, test="Chisq") ``` The model of linear by linear association seems best on all accounts. For comparison, one might try the CMH tests on these data: ```{r, mental6} CMHtest(xtabs(Freq~ses+mental, data=Mental)) ``` ## Non-linear terms The strength of the `gnm` package is that it handles a wide variety of models that handle non-linear terms, where the parameters enter the model beyond a simple linear function. The simplest example is the Goodman RC(1) model [@Goodman:79], which allows a multiplicative term to account for the association of the table variables. In the notation of generalized linear models with a log link, this can be expressed as $$ \log \mu_{ij} = \alpha_i + \beta_j + \gamma_{i} \delta_{j} ,$$ where the row-multiplicative effect parameters $\gamma_i$ and corresponding column parameters $\delta_j$ are estimated from the data.% ^[This is similar in spirit to a correspondence analysis with a single dimension, but as a statistical model.] Similarly, the RC(2) model adds two multiplicative terms to the independence model, $$ \log \mu_{ij} = \alpha_i + \beta_j + \gamma_{i1} \delta_{j1} + \gamma_{i2} \delta_{j2} . $$ In the `gnm` package, these models may be fit using the `Mult()` to specify the multiplicative term, and `instances()` to specify several such terms. ***Example***: For the `Mental` data, we fit the RC(1) and RC(2) models, and compare these with the independence model. ```{r, mental7} RC1 <- gnm(Freq ~ mental + ses + Mult(mental,ses), data=Mental, family=poisson, verbose=FALSE) RC2 <- gnm(Freq ~ mental+ses + instances(Mult(mental,ses),2), data=Mental, family=poisson, verbose=FALSE) anova(indep, RC1, RC2, test="Chisq") ``` ## References vcdExtra/vignettes/tests.Rmd0000644000176200001440000003560214470701662015665 0ustar liggesusers--- title: "Tests of Independence" author: "Michael Friendly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] vignette: > %\VignetteIndexEntry{Tests of Independence} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut02-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Old Sweave options # \SweaveOpts{engine=R,eps=TRUE,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{engine=R,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{prefix.string=fig/vcd-tut,eps=FALSE} # \SweaveOpts{keep.source=TRUE} # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ``` OK, now we're ready to do some analyses. This vignette focuses on relatively simple non-parametric tests and measures of association. ## CrossTable For tabular displays, the `CrossTable()` function in the `gmodels` package produces cross-tabulations modeled after `PROC FREQ` in SAS or `CROSSTABS` in SPSS. It has a wealth of options for the quantities that can be shown in each cell. Recall the GSS data used earlier. ```{r, GSStab} # Agresti (2002), table 3.11, p. 106 GSS <- data.frame( expand.grid(sex = c("female", "male"), party = c("dem", "indep", "rep")), count = c(279,165,73,47,225,191)) (GSStab <- xtabs(count ~ sex + party, data=GSS)) ``` Generate a cross-table showing cell frequency and the cell contribution to $\chi^2$. ```{r, xtabs-ex2} # 2-Way Cross Tabulation library(gmodels) CrossTable(GSStab, prop.t=FALSE, prop.r=FALSE, prop.c=FALSE) ``` There are options to report percentages (row, column, cell), specify decimal places, produce Chi-square, Fisher, and McNemar tests of independence, report expected and residual values (pearson, standardized, adjusted standardized), include missing values as valid, annotate with row and column titles, and format as SAS or SPSS style output! See `help(CrossTable)` for details. ## Chi-square test For 2-way tables you can use `chisq.test()` to test independence of the row and column variable. By default, the $p$-value is calculated from the asymptotic chi-squared distribution of the test statistic. Optionally, the $p$-value can be derived via Monte Carlo simulation. ```{r, chisq} (HairEye <- margin.table(HairEyeColor, c(1, 2))) chisq.test(HairEye) chisq.test(HairEye, simulate.p.value = TRUE) ``` ## Fisher Exact Test {#sec:Fisher} `fisher.test(X)` provides an **exact test** of independence. `X` must be a two-way contingency table in table form. Another form, `fisher.test(X, Y)` takes two categorical vectors of the same length. For tables larger than $2 \times 2$ the method can be computationally intensive (or can fail) if the frequencies are not small. ```{r fisher} fisher.test(GSStab) ``` Fisher's test is meant for tables with small total sample size. It generates an error for the `HairEye` data with $n$=592 total frequency. ```{r fisher-error, error=TRUE} fisher.test(HairEye) ``` ## Mantel-Haenszel test and conditional association {#sec:mantel} Use the `mantelhaen.test(X)` function to perform a Cochran-Mantel-Haenszel $\chi^2$ chi test of the null hypothesis that two nominal variables are *conditionally independent*, $A \perp B \; | \; C$, in each stratum, assuming that there is no three-way interaction. `X` is a 3 dimensional contingency table, where the last dimension refers to the strata. The `UCBAdmissions` serves as an example of a $2 \times 2 \times 6$ table, with `Dept` as the stratifying variable. ```{r, mantel1} # UC Berkeley Student Admissions mantelhaen.test(UCBAdmissions) ``` The results show no evidence for association between admission and gender when adjusted for department. However, we can easily see that the assumption of equal association across the strata (no 3-way association) is probably violated. For $2 \times 2 \times k$ tables, this can be examined from the odds ratios for each $2 \times 2$ table (`oddsratio()`), and tested by using `woolf_test()` in `vcd`. ```{r, mantel2} oddsratio(UCBAdmissions, log=FALSE) lor <- oddsratio(UCBAdmissions) # capture log odds ratios summary(lor) woolf_test(UCBAdmissions) ``` ## Some plot methods ### Fourfold displays We can visualize the odds ratios of Admission for each department with fourfold displays using `fourfold()`. The cell frequencies $n_{ij}$ of each $2 \times 2$ table are shown as a quarter circle whose radius is proportional to $\sqrt{n_{ij}}$, so that its area is proportional to the cell frequency. ```{r, reorder3} UCB <- aperm(UCBAdmissions, c(2, 1, 3)) dimnames(UCB)[[2]] <- c("Yes", "No") names(dimnames(UCB)) <- c("Sex", "Admit?", "Department") ``` Confidence rings for the odds ratio allow a visual test of the null of no association; the rings for adjacent quadrants overlap *iff* the observed counts are consistent with the null hypothesis. In the extended version (the default), brighter colors are used where the odds ratio is significantly different from 1. The following lines produce @ref(fig:fourfold1). ```{r} #| fourfold1, #| h=5, w=7.5, #| cap = "Fourfold display for the `UCBAdmissions` data. Where the odds ratio differs #| significantly from 1.0, the confidence bands do not overlap, and the circle quadrants are #| shaded more intensely." col <- c("#99CCFF", "#6699CC", "#F9AFAF", "#6666A0", "#FF0000", "#000080") fourfold(UCB, mfrow=c(2,3), color=col) ``` Another `vcd` function, `cotabplot()`, provides a more general approach to visualizing conditional associations in contingency tables, similar to trellis-like plots produced by `coplot()` and lattice graphics. The `panel` argument supplies a function used to render each conditional subtable. The following gives a display (not shown) similar to @ref(fig:fourfold1). ```{r fourfold2, eval=FALSE} cotabplot(UCB, panel = cotab_fourfold) ``` ### Doubledecker plots When we want to view the conditional probabilities of a response variable (e.g., `Admit`) in relation to several factors, an alternative visualization is a `doubledecker()` plot. This plot is a specialized version of a mosaic plot, which highlights the levels of a response variable (plotted vertically) in relation to the factors (shown horizontally). The following call produces @ref(fig:doubledecker), where we use indexing on the first factor (`Admit`) to make `Admitted` the highlighted level. In this plot, the association between `Admit` and `Gender` is shown where the heights of the highlighted conditional probabilities do not align. The excess of females admitted in Dept A stands out here. ```{r} #| doubledecker, #| h=5, w=8, #| out.width = "75%", #| cap = "Doubledecker display for the `UCBAdmissions` data. The heights #| of the highlighted bars show the conditional probabilities of `Admit`, #| given `Dept` and `Gender`." doubledecker(Admit ~ Dept + Gender, data=UCBAdmissions[2:1,,]) ``` ### Odds ratio plots Finally, the there is a `plot()` method for `oddsratio` objects. By default, it shows the 95% confidence interval for the log odds ratio. @ref(fig:oddsratio) is produced by: ```{r} #| oddsratio, #| h=6, w=6, #| out.width = "60%", #| cap = "Log odds ratio plot for the `UCBAdmissions` data." plot(lor, xlab="Department", ylab="Log Odds Ratio (Admit | Gender)") ``` {#fig:oddsratio} ## Cochran-Mantel-Haenszel tests for ordinal factors {#sec:CMH} The standard $\chi^2$ tests for association in a two-way table treat both table factors as nominal (unordered) categories. When one or both factors of a two-way table are quantitative or ordinal, more powerful tests of association may be obtained by taking ordinality into account, using row and or column scores to test for linear trends or differences in row or column means. More general versions of the CMH tests (Landis etal., 1978) [@Landis-etal:1978] are provided by assigning numeric scores to the row and/or column variables. For example, with two ordinal factors (assumed to be equally spaced), assigning integer scores, `1:R` and `1:C` tests the linear $\times$ linear component of association. This is statistically equivalent to the Pearson correlation between the integer-scored table variables, with $\chi^2 = (n-1) r^2$, with only 1 $df$ rather than $(R-1)\times(C-1)$ for the test of general association. When only one table variable is ordinal, these general CMH tests are analogous to an ANOVA, testing whether the row mean scores or column mean scores are equal, again consuming fewer $df$ than the test of general association. The `CMHtest()` function in `vcdExtra` calculates these various CMH tests for two possibly ordered factors, optionally stratified other factor(s). ***Example***: ```{r, table-form2, include=FALSE} ## A 4 x 4 table Agresti (2002, Table 2.8, p. 57) Job Satisfaction JobSat <- matrix(c(1,2,1,0, 3,3,6,1, 10,10,14,9, 6,7,12,11), 4, 4) dimnames(JobSat) = list(income=c("< 15k", "15-25k", "25-40k", "> 40k"), satisfaction=c("VeryD", "LittleD", "ModerateS", "VeryS")) JobSat <- as.table(JobSat) ``` Recall the $4 \times 4$ table, `JobSat` introduced in \@ref(sec:creating), ```{r, jobsat} JobSat ``` Treating the `satisfaction` levels as equally spaced, but using midpoints of the `income` categories as row scores gives the following results: ```{r, cmh1} CMHtest(JobSat, rscores=c(7.5,20,32.5,60)) ``` Note that with the relatively small cell frequencies, the test for general give no evidence for association. However, the the `cor` test for linear x linear association on 1 df is nearly significant. The `coin` package contains the functions `cmh_test()` and `lbl_test()` for CMH tests of general association and linear x linear association respectively. ## Measures of Association There are a variety of statistical measures of *strength* of association for contingency tables--- similar in spirit to $r$ or $r^2$ for continuous variables. With a large sample size, even a small degree of association can show a significant $\chi^2$, as in the example below for the `GSS` data. The `assocstats()` function in `vcd` calculates the $\phi$ contingency coefficient, and Cramer's V for an $r \times c$ table. The input must be in table form, a two-way $r \times c$ table. It won't work with `GSS` in frequency form, but by now you should know how to convert. ```{r, assoc1} assocstats(GSStab) ``` For tables with ordinal variables, like `JobSat`, some people prefer the Goodman-Kruskal $\gamma$ statistic [@vcd:Agresti:2002, \S 2.4.3] based on a comparison of concordant and discordant pairs of observations in the case-form equivalent of a two-way table. ```{r, gamma} GKgamma(JobSat) ``` A web article by Richard Darlington, [http://node101.psych.cornell.edu/Darlington/crosstab/TABLE0.HTM] gives further description of these and other measures of association. ## Measures of Agreement The `Kappa()` function in the `vcd` package calculates Cohen's $\kappa$ and weighted $\kappa$ for a square two-way table with the same row and column categories [@Cohen:60]. \footnote{ Don't confuse this with `kappa()` in base R that computes something entirely different (the condition number of a matrix). } Normal-theory $z$-tests are obtained by dividing $\kappa$ by its asymptotic standard error (ASE). A `confint()` method for `Kappa` objects provides confidence intervals. ```{r, kappa} data(SexualFun, package = "vcd") (K <- Kappa(SexualFun)) confint(K) ``` A visualization of agreement [@Bangdiwala:87], both unweighted and weighted for degree of departure from exact agreement is provided by the `agreementplot()` function. @fig(fig:agreesex) shows the agreementplot for the `SexualFun` data, produced as shown below. The Bangdiwala measures (returned by the function) represent the proportion of the shaded areas of the diagonal rectangles, using weights $w_1$ for exact agreement, and $w_2$ for partial agreement one step from the main diagonal. ```{r} #| agreesex, #| h=6, w=7, #| out.width = "70%", #| cap = "Agreement plot for the `SexualFun` data." agree <- agreementplot(SexualFun, main="Is sex fun?") unlist(agree) ``` In other examples, the agreement plot can help to show *sources* of disagreement. For example, when the shaded boxes are above or below the diagonal (red) line, a lack of exact agreement can be attributed in part to different frequency of use of categories by the two raters-- lack of *marginal homogeneity*. ## Correspondence analysis Correspondence analysis is a technique for visually exploring relationships between rows and columns in contingency tables. The `ca` package gives one implementation. For an $r \times c$ table, the method provides a breakdown of the Pearson $\chi^2$ for association in up to $M = \min(r-1, c-1)$ dimensions, and finds scores for the row ($x_{im}$) and column ($y_{jm}$) categories such that the observations have the maximum possible correlations.% ^[Related methods are the non-parametric CMH tests using assumed row/column scores (\@ref(sec:CMH), the analogous `glm()` model-based methods (\@ref(sec:CMH), and the more general RC models which can be fit using `gnm()`. Correspondence analysis differs in that it is a primarily descriptive/exploratory method (no significance tests), but is directly tied to informative graphic displays of the row/column categories.] Here, we carry out a simple correspondence analysis of the `HairEye` data. The printed results show that nearly 99% of the association between hair color and eye color can be accounted for in 2 dimensions, of which the first dimension accounts for 90%. ```{r, ca1} library(ca) ca(HairEye) ``` The resulting `ca` object can be plotted just by running the `plot()` method on the `ca` object, giving the result in \@ref(fig:ca-haireye). `plot.ca()` does not allow labels for dimensions; these can be added with `title()`. It can be seen that most of the association is accounted for by the ordering of both hair color and eye color along Dimension 1, a dark to light dimension. ```{r ca-haireye, cap = "Correspondence analysis plot for the `HairEye` data"} plot(ca(HairEye), main="Hair Color and Eye Color") ``` ## References vcdExtra/vignettes/vcd.bib0000644000176200001440000006353314422306403015304 0ustar liggesusers%% general graphics & original methods @Article{vcd:Cohen:1980, author = {A. Cohen}, title = {On the Graphical Display of the Significant Components in a Two-Way Contingency Table}, journal = {Communications in Statistics---Theory and Methods}, year = {1980}, volume = {A9}, pages = {1025--1041} } @InProceedings{vcd:Hartigan+Kleiner:1981, author = {J. A. Hartigan and B. Kleiner}, title = {Mosaics for Contingency Tables}, booktitle = {Computer Science and Statistics: Proceedings of the 13th Symposium on the Interface}, pages = {268--273}, year = {1981}, editor = {W. F. Eddy}, address = {New York}, publisher = {Springer-Verlag} } @Article{vcd:Hartigan+Kleiner:1984, author = {J. A. Hartigan and B. Kleiner}, title = {A Mosaic of Television Ratings}, journal = {The American Statistician}, year = {1984}, volume = {38}, pages = {32--35} } @TechReport{vcd:Young:1996, author = {Forrest W. Young}, title = {{\pkg{ViSta}}: The Visual Statistics System}, institution = {UNC L.~L.~Thurstone Psychometric Laboratory Research Memorandum}, year = 1996, number = {94--1(c)} } @Book{vcd:Cleveland:1993, author = {William S. Cleveland}, title = {Visualizing Data}, publisher = {Hobart Press}, year = 1993, address = {Summit, New Jersey} } @Article{vcd:Becker+Cleveland+Shyu:1996, author = {Richard A. Becker and William S. Cleveland and Ming-Jen Shyu}, title = {The Visual Design and Control of Trellis Display}, journal = {Journal of Computational and Graphical Statistics}, year = {1996}, volume = {5}, pages = {123--155} } @InProceedings{vcd:Riedwyl+Schuepbach:1994, author = {H. Riedwyl and M. Sch{\"u}pbach}, title = {Parquet Diagram to Plot Contingency Tables}, booktitle = {Softstat '93: Advances in Statistical Software}, pages = {293--299}, year = 1994, editor = {F. Faulbaum}, address = {New York}, publisher = {Gustav Fischer} } %% color @InProceedings{vcd:Ihaka:2003, author = {Ross Ihaka}, title = {Colour for Presentation Graphics}, booktitle = {Proceedings of the 3rd International Workshop on Distributed Statistical Computing, Vienna, Austria}, editor = {Kurt Hornik and Friedrich Leisch and Achim Zeileis}, year = {2003}, url = {http://www.ci.tuwien.ac.at/Conferences/DSC-2003/Proceedings/}, note = {{ISSN 1609-395X}}, } @Article{vcd:Lumley:2006, author = {Thomas Lumley}, title = {Color Coding and Color Blindness in Statistical Graphics}, journal = {ASA Statistical Computing \& Graphics Newsletter}, year = {2006}, volume = {17}, number = {2}, pages = {4--7} } @Book{vcd:Munsell:1905, author = {Albert H. Munsell}, title = {A Color Notation}, publisher = {Munsell Color Company}, year = {1905}, address = {Boston, Massachusetts} } @Article{vcd:Harrower+Brewer:2003, author = {Mark A. Harrower and Cynthia A. Brewer}, title = {\pkg{ColorBrewer.org}: An Online Tool for Selecting Color Schemes for Maps}, journal = {The Cartographic Journal}, year = {2003}, volume = {40}, pages = {27--37} } @InProceedings{vcd:Brewer:1999, author = {Cynthia A. Brewer}, title = {Color Use Guidelines for Data Representation}, booktitle = {Proceedings of the Section on Statistical Graphics, American Statistical Association}, address = {Alexandria, VA}, year = {1999}, pages = {55--60} } @Article{vcd:Cleveland+McGill:1983, author = {William S. Cleveland and Robert McGill}, title = {A Color-caused Optical Illusion on a Statistical Graph}, journal = {The American Statistician}, year = {1983}, volume = {37}, pages = {101--105} } @Book{vcd:CIE:2004, author = {{Commission Internationale de l'\'Eclairage}}, title = {Colorimetry}, edition = {3rd}, publisher = {Publication CIE 15:2004}, address = {Vienna, Austria}, year = {2004}, note = {{ISBN} 3-901-90633-9} } @InProceedings{vcd:Moretti+Lyons:2002, author = {Giovanni Moretti and Paul Lyons}, title = {Tools for the Selection of Colour Palettes}, booktitle = {Proceedings of the New Zealand Symposium On Computer-Human Interaction (SIGCHI 2002)}, address = {University of Waikato, New Zealand}, month = {July}, year = {2002} } @Article{vcd:MacAdam:1942, author = {D. L. MacAdam}, title = {Visual Sensitivities to Color Differences in Daylight}, journal = {Journal of the Optical Society of America}, year = {1942}, volume = {32}, number = {5}, pages = {247--274}, } @Book{vcd:Wyszecki+Stiles:2000, author = {G\"unter Wyszecki and W. S. Stiles}, title = {Color Science}, edition = {2nd}, publisher = {Wiley}, year = {2000}, note = {{ISBN} 0-471-39918-3} } @Misc{vcd:Poynton:2000, author = {Charles Poynton}, title = {Frequently-Asked Questions About Color}, year = {2000}, howpublished = {URL \url{http://www.poynton.com/ColorFAQ.html}}, note = {Accessed 2006-09-14}, } @Misc{vcd:Wiki+HSV:2006, author = {Wikipedia}, title = {{HSV} Color Space --- {W}ikipedia{,} The Free Encyclopedia}, year = {2006}, howpublished = {URL \url{http://en.wikipedia.org/w/index.php?title=HSV_color_space&oldid=74735552}}, note = {Accessed 2006-09-14}, } @Misc{vcd:Wiki+LUV:2006, author = {Wikipedia}, title = {{Lab} Color Space --- {W}ikipedia{,} The Free Encyclopedia}, year = {2006}, howpublished = {URL \url{http://en.wikipedia.org/w/index.php?title=Lab_color_space&oldid=72611029}}, note = {Accessed 2006-09-14}, } @Article{vcd:Smith:1978, author = {Alvy Ray Smith}, title = {Color Gamut Transform Pairs}, journal = {Computer Graphics}, pages = {12--19}, year = {1978}, volume = {12}, number = {3}, note = {ACM SIGGRAPH 78 Conference Proceedings}, } %% url = {http://www.alvyray.com/}, @Article{vcd:Meier+Spalter+Karelitz:2004, author = {Barbara J. Meier and Anne Morgan Spalter and David B. Karelitz}, title = {Interactive Color Palette Tools}, journal = {{IEEE} Computer Graphics and Applications}, volume = {24}, number = {3}, year = {2004}, pages = {64--72}, } %% url = {http://graphics.cs.brown.edu/research/color/} @InCollection{vcd:Mollon:1995, author = {J. Mollon}, editor = {T. Lamb and J. Bourriau}, booktitle = {Colour: Art and Science}, title = {Seeing Color}, publisher = {Cambridge Univesity Press}, year = 1995 } %% Friendly publications @Article{vcd:Friendly:1994, author = {Michael Friendly}, title = {Mosaic Displays for Multi-Way Contingency Tables}, journal = {Journal of the American Statistical Association}, year = {1994}, volume = {89}, pages = {190--200} } @Article{vcd:Friendly:1999, author = {Michael Friendly}, title = {Extending Mosaic Displays: Marginal, Conditional, and Partial Views of Categorical Data}, journal = {Journal of Computational and Graphical Statistics}, year = {1999}, volume = {8}, number = {3}, pages = {373--395} } @Book{vcd:Friendly:2000, author = {Michael Friendly}, title = {Visualizing Categorical Data}, publisher = {\textsf{SAS} Insitute}, year = {2000}, address = {Carey, NC}, URL = {http://www.math.yorku.ca/SCS/vcd/} } @Book{FriendlyMeyer:2016:DDAR, title = {Discrete Data Analysis with R: Visualization and Modeling Techniques for Categorical and Count Data}, year = {2016}, author = {Friendly, Michael and Meyer, David}, publisher = {Chapman \& Hall/CRC}, address = {Boca Raton, FL}, isbn = {978-1-4987-2583-5}, } %% Augsburg publications @Article{vcd:Theus+Lauer:1999, author = {Martin Theus and Stephan R. W. Lauer}, title = {Visualizing Loglinear Models}, journal = {Journal of Computational and Graphical Statistics}, year = 1999, volume = 8, number = 3, pages = {396--412} } @Article{vcd:Hofmann:2003, author = {Heike Hofmann}, title = {Constructing and Reading Mosaicplots}, journal = {Computational Statistics \& Data Analysis}, year = {2003}, volume = {43}, pages = {565--580} } @Article{vcd:Hofmann:2001, author = {Heike Hofmann}, title = {Generalized Odds Ratios for Visual Modelling}, journal = {Journal of Computational and Graphical Statistics}, year = {2001}, volume = {10}, pages = {1--13} } @Article{vcd:Theus:2003, author = {Martin Theus}, title = {Interactive Data Visualization Using \pkg{Mondrian}}, journal = {Journal of Statistical Software}, volume = 7, number = 11, pages = {1--9}, year = 2003, url = {https://www.jstatsoft.org/v07/i11/}, } @Unpublished{vcd:Hofmann+Theus, author = {Heike Hofmann and Martin Theus}, title = {Interactive Graphics for Visualizing Conditional Distributions}, note = {Unpublished Manuscript}, year = {2005} } @Article{vcd:Hummel:1996, author = {J. Hummel}, title = {Linked Bar Charts: Analysing Categorical Data Graphically}, journal = {Computational Statistics}, year = 1996, volume = 11, pages = {23--33} } @Article{vcd:Unwin+Hawkins+Hofmann:1996, author = {Antony R. Unwin and G. Hawkins and Heike Hofmann and B. Siegl}, title = {Interactive Graphics for Data Sets with Missing Values -- \pkg{MANET}}, journal = {Journal of Computational and Graphical Statistics}, year = 1996, pages = {113--122}, volume = 4, number = 6 } @Manual{vcd:Urbanek+Wichtrey:2006, title = {\pkg{iplots}: Interactive Graphics for \textsf{R}}, author = {Simon Urbanek and Tobias Wichtrey}, year = {2006}, note = {\textsf{R} package version 1.0-3}, url = {http://www.rosuda.org/iPlots/} } %% Software @Manual{vcd:R:2006, title = {\textsf{R}: {A} Language and Environment for Statistical Computing}, author = {{\textsf{R} Development Core Team}}, organization = {\textsf{R} Foundation for Statistical Computing}, address = {Vienna, Austria}, year = {2006}, note = {{ISBN} 3-900051-00-3}, url = {http://www.R-project.org/} } @Article{vcd:Murrell:2002, author = {Paul Murrell}, title = {The \pkg{grid} Graphics Package}, journal = {\proglang{R} News}, year = 2002, volume = 2, number = 2, pages = {14--19}, month = {June}, url = {http://CRAN.R-project.org/doc/Rnews/} } @Book{vcd:Murrell:2006, author = {Paul Murrell}, title = {\textsf{R} Graphics}, publisher = {Chapmann \& Hall/CRC}, address = {Boca Raton, Florida}, year = {2006}, } @Book{vcd:Venables+Ripley:2002, author = {William N. Venables and Brian D. Ripley}, title = {Modern Applied Statistics with \textsf{S}}, edition = {4th}, publisher = {Springer-Verlag}, address = {New York}, year = {2002}, note = {{ISBN} 0-387-95457-0}, url = {http://www.stats.ox.ac.uk/pub/MASS4/} } @Manual{vcd:Ihaka:2006, title = {\pkg{colorspace}: Colorspace Manipulation}, author = {Ross Ihaka}, year = {2006}, note = {\textsf{R} package version 0.95} } @Manual{vcd:Meyer+Zeileis+Hornik:2006, title = {\pkg{vcd}: Visualizing Categorical Data}, author = {David Meyer and Achim Zeileis and Kurt Hornik}, year = {2006}, note = {\textsf{R} package version 1.0-6} } @article{vcd:Ligges+Maechler:2003, title = {\pkg{scatterplot3d} -- An {R} Package for Visualizing Multivariate Data}, author = {Uwe Ligges and Martin M{\"a}chler}, journal = {Journal of Statistical Software}, year = 2003, pages = {1--20}, number = 11, volume = 8, url = {https://www.jstatsoft.org/v08/i11/} } @Manual{vcd:SAS:2005, title = {\proglang{SAS/STAT} Version 9}, author = {\proglang{SAS} Institute Inc.}, year = {2005}, address = {Cary, NC} } @Manual{vcd:SPLUS:2005, title = {\proglang{S-PLUS} 7}, author = {{Insightful Inc.}}, year = {2005}, address = {Seattle, WA} } %% data @Article{vcd:Azzalini+Bowman:1990, author = {A. Azzalini and A. W. Bowman}, title = {A Look at Some Data on the {O}ld {F}aithful Geyser}, journal = {Applied Statistics}, year = {1990}, volume = {39}, pages = {357--365}, } @Article{vcd:Obel:1975, author = {E.B. Obel}, title = {A Comparative Study of Patients with Cancer of the Ovary Who Have Survived More or Less Than 10 Years}, journal = {Acta Obstetricia et Gynecologica Scandinavica}, year = 1975, volume = 55, pages = {429--439} } @InCollection{vcd:Koch+Edwards:1988, author = {G. Koch and S. Edwards}, title = {Clinical Efficiency Trials with Categorical Data}, booktitle = {Biopharmaceutical Statistics for Drug Development}, editor = {K. E. Peace}, publisher = {Marcel Dekker}, address = {New York}, year = {1988}, pages = {403--451} } @TechReport{vcd:Knorr-Held:1999, author = {Leonhard Knorr-Held}, title = {Dynamic Rating of Sports Teams}, institution = {SFB 386 ``Statistical Analysis of Discrete Structures''}, year = {1999}, type = {Discussion Paper}, number = {98}, url = {http://www.stat.uni-muenchen.de/sfb386/} } @Article{vcd:Snee:1974, author = {R. D. Snee}, title = {Graphical Display of Two-Way Contingency Tables}, journal = {The American Statistician}, year = 1974, volume = 28, pages = {9--12} } @Article{vcd:Bickel+Hammel+O'Connell:1975, author = {P. J. Bickel and E. A. Hammel and J. W. O'Connell}, title = {Sex Bias in Graduate Admissions: Data from {B}erkeley}, journal = {Science}, year = 1975, volume = 187, pages = {398--403} } @Book{vcd:Gilbert:1981, author = {G. N. Gilbert}, title = {Modelling Society: An Introduction to Loglinear Analysis for Social Researchers}, publisher = {Allen and Unwin}, year = 1981, address = {London} } @Book{vcd:Thornes+Collard:1979, author = {B. Thornes and J. Collard}, title = {Who Divorces?}, publisher = {Routledge \& Kegan}, year = 1979, address = {London} } @Article{vcd:Dawson:1995, author = {Robert J. MacG Dawson}, title = {The ``Unusual Episode'' Data Revisited}, journal = {Journal of Statistics Education}, year = 1995, volume = 3, url = {http://www.amstat.org/publications/jse/v3n3/datasets.dawson.html} } @Article{vcd:Haberman:1974, author = {S. J. Haberman}, title = {Log-linear Models for Frequency Tables with Ordered Classifications}, journal = {Biometrics}, year = 1974, volume = 30, pages = {689--700} } @Article{vcd:Wing:1962, author = {J. K. Wing}, title = {Institutionalism in Mental Hospitals}, journal = {British Journal of Social Clinical Psychology}, year = 1962, volume = 1, pages = {38--51} } @Book{vcd:Andersen:1991, author = {E. B. Andersen}, title = {The Statistical Analysis of Categorical Data}, publisher = {Springer-Verlag}, year = {1991}, address = {Berlin}, edition = {2nd} } @Article{vcd:Haberman:1973, author = {S. J. Haberman}, title = {The Analysis of Residuals in Cross-classified Tables}, journal = {Biometrics}, year = {1973}, volume = {29}, pages = {205--220} } @Book{vcd:Everitt+Hothorn:2006, author = {Brian S. Everitt and Torsten Hothorn}, title = {A Handbook of Statistical Analyses Using \textsf{R}}, publisher = {Chapman \& Hall/CRC}, address = {Boca Raton, Florida}, year = {2006} } @Article{vcd:Salib+Hillier:1997, author = {Emad Salib and Valerie Hillier}, title = {A Case-Control Study of Smoking and {A}lzheimer's Disease}, journal = {International Journal of Geriatric Psychiatry}, year = {1997}, volume = {12}, pages = {295--300} } %% inference @Book{vcd:Agresti:2002, author = {Alan Agresti}, title = {Categorical Data Analysis}, publisher = {John Wiley \& Sons}, year = {2002}, address = {Hoboken, New Jersey}, edition = {2nd} } @Book{vcd:Mazanec+Strasser:2000, author = {Josef A. Mazanec and Helmut Strasser}, title = {A Nonparametric Approach to Perceptions-based Market Segmentation: Foundations}, publisher = {Springer-Verlag}, year = {2000}, address = {Berlin} } @Article{vcd:Strasser+Weber:1999, author = {Helmut Strasser and Christian Weber}, title = {On the Asymptotic Theory of Permutation Statistics}, journal = {Mathematical Methods of Statistics}, volume = {8}, pages = {220--250}, year = {1999} } @Book{vcd:Pesarin:2001, author = {Fortunato Pesarin}, title = {Multivariate Permutation Tests}, year = {2001}, publisher = {John Wiley \& Sons}, address = {Chichester} } @Article{vcd:Ernst:2004, author = {Michael D. Ernst}, title = {Permutation Methods: A Basis for Exact Inference}, journal = {Statistical Science}, volume = {19}, year = {2004}, pages = {676--685} } @Article{vcd:Patefield:1981, author = {W. M. Patefield}, title = {An Efficient Method of Generating $R \times C$ Tables with Given Row and Column Totals}, note = {{A}lgorithm AS 159}, journal = {Applied Statistics}, volume = {30}, year = {1981}, pages = {91--97} } %% own @InProceedings{vcd:Meyer+Zeileis+Hornik:2003, author = {David Meyer and Achim Zeileis and Kurt Hornik}, title = {Visualizing Independence Using Extended Association Plots}, booktitle = {Proceedings of the 3rd International Workshop on Distributed Statistical Computing, Vienna, Austria}, editor = {Kurt Hornik and Friedrich Leisch and Achim Zeileis}, year = {2003}, url = {http://www.ci.tuwien.ac.at/Conferences/DSC-2003/Proceedings/}, note = {{ISSN 1609-395X}}, } @TechReport{vcd:Zeileis+Meyer+Hornik:2005, author = {Achim Zeileis and David Meyer and Kurt Hornik}, title = {Residual-based Shadings for Visualizing (Conditional) Independence}, institution = {Department of Statistics and Mathematics, Wirtschaftsuniversit\"at Wien, Research Report Series}, year = {2005}, type = {Report}, number = {20}, month = {August}, url = {http://epub.wu-wien.ac.at/dyn/openURL?id=oai:epub.wu-wien.ac.at:epub-wu-01_871} } @Article{vcd:Zeileis+Meyer+Hornik:2007, author = {Achim Zeileis and David Meyer and Kurt Hornik}, title = {Residual-based Shadings for Visualizing (Conditional) Independence}, journal = {Journal of Computational and Graphical Statistics}, year = {2007}, volume = {16}, number = {3}, pages = {507--525}, doi = {10.1198/106186007X237856}, url = {http://statmath.wu-wien.ac.at/~zeileis/papers/Zeileis+Meyer+Hornik-2007.pdf} } @TechReport{vcd:Meyer+Zeileis+Hornik:2005a, author = {David Meyer and Achim Zeileis and Kurt Hornik}, title = {The Strucplot Framework: Visualizing Multi-Way Contingency Tables with \pkg{vcd}}, institution = {Department of Statistics and Mathematics, Wirtschaftsuniversit\"at Wien, Research Report Series}, year = {2005}, type = {Report}, number = {22}, month = {November}, url = {http://epub.wu-wien.ac.at/dyn/openURL?id=oai:epub.wu-wien.ac.at:epub-wu-01_8a1} } @Article{vcd:Meyer+Zeileis+Hornik:2006b, author = {David Meyer and Achim Zeileis and Kurt Hornik}, title = {The Strucplot Framework: Visualizing Multi-way Contingency Tables with \pkg{vcd}}, year = {2006}, journal = {Journal of Statistical Software}, volume = {17}, number = {3}, pages = {1--48}, url = {https://www.jstatsoft.org/v17/i03/} } @InCollection{vcd:Meyer+Zeileis+Hornik:2006a, author = {David Meyer and Achim Zeileis and Kurt Hornik}, title = {Visualizing Contingency Tables}, editor = {Chun-Houh Chen and Wolfang H\"ardle and Antony Unwin}, booktitle = {Handbook of Data Visualization}, series = {Springer Handbooks of Computational Statistics}, year = {2006}, publisher = {Springer-Verlag}, address = {New York}, note = {{ISBN} 3-540-33036-4, to appear} } @Article{vcd:Hothorn+Hornik+VanDeWiel:2006, author = {Torsten Hothorn and Kurt Hornik and Mark A. van de Wiel and Achim Zeileis}, title = {A {L}ego System for Conditional Inference}, journal = {The American Statistician}, year = {2006}, volume = {60}, number = {3}, pages = {257--263}, doi = {10.1198/000313006X118430} } @TechReport{vcd:Zeileis+Hornik:2006, author = {Achim Zeileis and Kurt Hornik}, title = {Choosing Color Palettes for Statistical Graphics}, institution = {Department of Statistics and Mathematics, Wirtschaftsuniversit\"at Wien, Research Report Series}, year = {2006}, type = {Report}, number = {41}, month = {October}, url = {http://epub.wu-wien.ac.at/} } %% bad color examples @Article{vcd:Gneiting+Sevcikova+Percival:2006, author = {Tilmann Gneiting and Hana \v{S}ev\v{c}\'ikov\'a and Donald B. Percival and Martin Schlather and Yindeng Jiang}, title = {Fast and Exact Simulation of Large Gaussian Lattice Systems in {$\mathbb{R}^2$}: Exploring the Limits}, year = {2006}, journal = {Journal of Computational and Graphical Statistics}, volume = {15}, number = {3}, pages = {483--501}, note = {Figures~1--4} } @Article{vcd:Yang+Buckley+Dudoit:2002, author = {Yee Hwa Yang and Michael J. Buckley and Sandrine Dudoit and Terence P. Speed}, title = {Comparison of Methods for Image Analysis on {cDNA} Microarray Data}, year = {2002}, journal = {Journal of Computational and Graphical Statistics}, volume = {11}, number = {1}, pages = {108--136}, note = {Figure~4a} } @Article{vcd:Kneib:2006, author = {Thomas Kneib}, title = {Mixed Model-based Inference in Geoadditive Hazard Regression for Interval-censored Survival Times}, year = {2006}, journal = {Computational Statistics \& Data Analysis}, volume = {51}, pages = {777--792}, note = {Figure~5 (left)} } @Article{vcd:Friendly:2002, author = {Michael Friendly}, title = {A Brief History of the Mosaic Display}, year = {2002}, journal = {Journal of Computational and Graphical Statistics}, volume = {11}, number = {1}, pages = {89--107}, note = {Figure~11 (left, middle)} } @Article{vcd:Celeux+Hurn+Robert:2000, author = {Gilles Celeux and Merrilee Hurn and Christian P. Robert}, title = {Computational and Inferential Difficulties with Mixture Posterior Distributions}, year = {2000}, journal = {Journal of the American Statistical Association}, volume = {95}, number = {451}, pages = {957--970}, note = {Figure~3} } %% pointers from Hadley @article{cleveland:1987, Author = {Cleveland, William and McGill, Robert}, Journal = {Journal of the Royal Statistical Society A}, Number = {3}, Pages = {192-229}, Title = {Graphical Perception: The Visual Decoding of Quantitative Information on Graphical Displays of Data}, Volume = {150}, Year = {1987}} @article{cleveland:1984, Author = {Cleveland, William S. and McGill, M. E.}, Journal = {Journal of the American Statistical Association}, Number = 387, Pages = {531-554}, Title = {Graphical Perception: Theory, Experimentation and Application to the Development of Graphical Methods}, Volume = 79, Year = 1984} @article{huang:1997, Author = {Huang, Chisheng and McDonald, John Alan and Stuetzle, Werner}, Journal = {Journal of Computational and Graphical Statistics}, Pages = {383--396}, Title = {Variable resolution bivariate plots}, Volume = {6}, Year = {1997}} @article{carr:1987, Author = {Carr, D. B. and Littlefield, R. J. and Nicholson, W. L. and Littlefield, J. S.}, Journal = {Journal of the American Statistical Association}, Number = {398}, Pages = {424-436}, Title = {Scatterplot Matrix Techniques for Large N}, Volume = {82}, Year = {1987}} @book{cleveland:1994, Author = {Cleveland, William}, Publisher = {Hobart Press}, Title = {The Elements of Graphing Data}, Year = {1994}} @book{chambers:1983, Author = {Chambers, John and Cleveland, William and Kleiner, Beat and Tukey, Paul}, Publisher = {Wadsworth}, Title = {Graphical methods for data analysis}, Year = {1983}} @book{bertin:1983, Address = {Madison, WI}, Author = {Bertin, Jacques}, Publisher = {University of Wisconsin Press}, Title = {Semiology of Graphics}, Year = {1983}} @book{wilkinson:2006, Author = {Wilkinson, Leland}, Publisher = {Springer-Verlag}, Series = {Statistics and Computing}, Title = {The Grammar of Graphics}, Year = {2005}} vcdExtra/R/0000755000176200001440000000000014430460317012235 5ustar liggesusersvcdExtra/R/Crossings.R0000644000176200001440000000230214430460317014327 0ustar liggesusers# crossings model (Goodman, 1972) # Ref: #Goodman, L. (1972). Some multiplicative models for the analysis of cross-classified data. #In: Proceedings of the Sixth Berkeley Symposium on Mathematical Statistics and Probability, #Berkeley, CA: University of California Press, pp. 649-696. crossings <- function(i, j, n) { npar <- n - 1 result <- list() for(c in 1:npar) { overi <- c >= i overj <- c >= j result[[c]] <- (overi & !overj) + (overj & !overi) } result <- matrix(unlist(result), length(i), npar) colnames(result) <- paste('C', 1:npar, sep='') result } Crossings <- function(...) { dots <- list(...) if (length(dots) != 2) stop("Crossings() is defined for only two factors") if (length(dots[[1]]) != length(dots[[2]])) stop("arguments to Crossings() must all have same length") dots <- lapply(dots, as.factor) n <- nlevels(dots[[1]]) if (nlevels(dots[[2]]) != n) stop("arguments to Crossings() must all have same number of levels") result <- crossings(as.numeric(dots[[1]]), as.numeric(dots[[2]]), n) rownames(result) <- do.call("paste", c(dots, sep = "")) result } vcdExtra/R/logLik.loglm.R0000644000176200001440000000127014430460317014712 0ustar liggesusers# logLik method for loglm objects, to allow use of AIC() and BIC() # with MASS::loglm, giving comparable results to the use of these # functions with glm(..., family=poisson) models. # allow for non-integer frequencies # allow for zero frequencies, with a zero= argument logLik.loglm <- function(object, ..., zero=1E-10) { fr <- if(!is.null(object$frequencies)) unclass(object$frequencies) else { unclass(update(object, keep.frequencies = TRUE)$frequencies) } df <- prod(dim(fr)) - object$df if (any(fr==0)) { fr <- as.vector(fr) fr[fr==0] <- zero } structure(sum((log(fr) - 1) * fr - lgamma(fr + 1)) - object$deviance/2, df = df, class = "logLik") } vcdExtra/R/mosaic.glmlist.R0000644000176200001440000001137614430460317015315 0ustar liggesusers#' Mosaic Displays for a glmlist Object #' @param x a glmlist object #' @param selection the index or name of one glm in \code{x} #' @param panel panel function #' @param type a character string indicating whether the \code{"observed"} or the \code{"expected"} values of the table should be visualized #' @param legend show a legend in the mosaic displays? #' @param main either a logical, or a vector of character strings used for plotting the main title. If main is a logical and TRUE, the name of the selected glm object is used #' @param ask should the function display a menu of models, when one is not specified in \code{selection}? #' @param graphics use a graphic menu when \code{ask=TRUE}? #' @param rows,cols when \code{ask=FALSE}, the number of rows and columns in which to plot the mosaics #' @param newpage start a new page? (only applies to \code{ask=FALSE}) #' @param ... other arguments passed to \code{\link{mosaic.glm}} #' @export mosaic.glmlist <- function(x, selection, panel=mosaic, type=c("observed", "expected"), legend=ask | !missing(selection), main=NULL, ask=TRUE, graphics=TRUE, rows, cols, newpage=TRUE, ...) { # calls <- sapply(x, mod.call) # get model calls as strings models <- names(x) if (!is.null(main)) { if (is.logical(main) && main) main <- models } else main <- rep(main, length(x)) type=match.arg(type) if (!missing(selection)){ if (is.character(selection)) selection <- gsub(" ", "", selection) return(panel(x[[selection]], type=type, main=main[selection], legend=legend, ...)) } # perhaps make these model labels more explicit for the menu if (ask & interactive()){ repeat { selection <- menu(models, graphics=graphics, title="Select Model to Plot") if (selection == 0) break else panel(x[[selection]], type=type, main=main[selection], legend=legend, ...) } } else { nmodels <- length(x) mfrow <- mfrow(nmodels) if (missing(rows) || missing(cols)){ rows <- mfrow[1] cols <- mfrow[2] } if (newpage) grid.newpage() lay <- grid.layout(nrow=rows, ncol = cols) pushViewport(viewport(layout = lay, y = 0, just = "bottom")) for (i in 1:rows) { for (j in 1:cols){ if ((sel <-(i-1)*cols + j) > nmodels) break pushViewport(viewport(layout.pos.row=i, layout.pos.col=j)) panel(x[[sel]], type=type, main=main[sel], newpage=FALSE, legend=legend, ...) popViewport() } } } } mosaic.loglmlist <- function(x, selection, panel=mosaic, type=c("observed", "expected"), legend=ask | !missing(selection), main=NULL, ask=TRUE, graphics=TRUE, rows, cols, newpage=TRUE, ...) { models <- names(x) strings <- as.vector(sapply(x, function(x) x$model.string)) if (!is.null(main)) { if (is.logical(main) && main) main <- ifelse(as.vector(sapply(strings, is.null)), models, strings) } else main <- rep(main, length(x)) type=match.arg(type) if (!missing(selection)){ if (is.character(selection)) selection <- gsub(" ", "", selection) return(panel(x[[selection]], type=type, main=main[selection], legend=legend, ...)) } # perhaps make these model labels more explicit for the menu if (ask & interactive()){ repeat { selection <- menu(models, graphics=graphics, title="Select Model to Plot") if (selection == 0) break else panel(x[[selection]], type=type, main=main[selection], legend=legend, ...) } } else { nmodels <- length(x) mfrow <- mfrow(nmodels) if (missing(rows) || missing(cols)){ rows <- mfrow[1] cols <- mfrow[2] } if (newpage) grid.newpage() lay <- grid.layout(nrow=rows, ncol = cols) pushViewport(viewport(layout = lay, y = 0, just = "bottom")) for (i in 1:rows) { for (j in 1:cols){ if ((sel <-(i-1)*cols + j) > nmodels) break pushViewport(viewport(layout.pos.row=i, layout.pos.col=j)) panel(x[[sel]], type=type, main=main[sel], newpage=FALSE, legend=legend, ...) popViewport() } } } } # from effects::utilities.R mfrow <- function(n, max.plots=0){ # number of rows and columns for array of n plots if (max.plots != 0 & n > max.plots) stop(paste("number of plots =",n," exceeds maximum =", max.plots)) rows <- round(sqrt(n)) cols <- ceiling(n/rows) c(rows, cols) } # from plot.lm: get model call as a string # TODO: should use abbreviate() mod.call <- function(x) { cal <- x$call if (!is.na(m.f <- match("formula", names(cal)))) { cal <- cal[c(1, m.f)] names(cal)[2L] <- "" } cc <- deparse(cal, 80) nc <- nchar(cc[1L], "c") abbr <- length(cc) > 1 || nc > 75 cap <- if (abbr) paste(substr(cc[1L], 1L, min(75L, nc)), "...") else cc[1L] cap } vcdExtra/R/Summarise.R0000644000176200001440000000732314430460317014332 0ustar liggesusers# fixed buglet when deviance() returns a null # fixed bug: residual df calculated incorrectly # but this now depends on objects having a df.residual component # TRUE for lm, glm, polr, negbin objects # made generic, adding a glmlist method Summarise <- function(object, ...) { UseMethod("Summarise") } Summarise.glmlist <- function(object, ..., saturated = NULL, sortby=NULL) { ns <- sapply(object, function(x) length(x$residuals)) if (any(ns != ns[1L])) stop("models were not all fitted to the same size of dataset") nmodels <- length(object) if (nmodels == 1) return(Summarise.default(object[[1L]], saturated=saturated)) rval <- lapply(object, Summarise.default, saturated=saturated) rval <- do.call(rbind, rval) if (!is.null(sortby)) { rval <- rval[order(rval[,sortby], decreasing=TRUE),] } rval } # could just do Summarise.loglmlist <- Summarise.glmlist Summarise.loglmlist <- function(object, ..., saturated = NULL, sortby=NULL) { ns <- sapply(object, function(x) length(x$residuals)) if (any(ns != ns[1L])) stop("models were not all fitted to the same size of dataset") nmodels <- length(object) if (nmodels == 1) return(Summarise.default(object[[1L]], saturated=saturated)) rval <- lapply(object, Summarise.default, saturated=saturated) rval <- do.call(rbind, rval) if (!is.null(sortby)) { rval <- rval[order(rval[,sortby], decreasing=TRUE),] } rval } Summarise.default <- function(object, ..., saturated = NULL, sortby=NULL) { ## interface methods for logLik() and nobs() ## - use S4 methods if loaded ## - use residuals() if nobs() is not available logLik0 <- if("stats4" %in% loadedNamespaces()) stats4::logLik else logLik nobs0 <- function(x, ...) { nobs1 <- if("stats4" %in% loadedNamespaces()) stats4::nobs else nobs nobs2 <- function(x, ...) NROW(residuals(x, ...)) rval <- try(nobs1(x, ...), silent = TRUE) if(inherits(rval, "try-error") | is.null(rval)) rval <- nobs2(x, ...) return(rval) } dof <- function(x) { if (inherits(x, "loglm")) { rval <- x$df } else { rval <- try(x$df.residual, silent=TRUE) } if (inherits(rval, "try-error") || is.null(rval)) stop(paste("Can't determine residual df for a", class(x), "object")) rval } ## collect all objects objects <- list(object, ...) nmodels <- length(objects) ## check sample sizes ns <- sapply(objects, nobs0) if(any(ns != ns[1L])) stop("models were not all fitted to the same size of dataset") ## extract log-likelihood and df (number of parameters) ll <- lapply(objects, logLik0) par <- as.numeric(sapply(ll, function(x) attr(x, "df"))) df <- as.numeric(sapply(objects, function(x) dof(x))) ll <- sapply(ll, as.numeric) ## compute saturated reference value (use 0 if deviance is not available) if(is.null(saturated)) { dev <- try(sapply(objects, deviance), silent = TRUE) if(inherits(dev, "try-error") || any(sapply(dev, is.null))) { saturated <- 0 } else { saturated <- ll + dev/2 } } ## setup ANOVA-style matrix rval <- matrix(rep(NA, 5 * nmodels), ncol = 5) colnames(rval) <- c("AIC", "BIC", "LR Chisq", "Df", "Pr(>Chisq)") rownames(rval) <- as.character(sapply(match.call(), deparse)[-1L])[1:nmodels] rval[,1] <- -2 * ll + 2 * par rval[,2] <- -2 * ll + log(ns) * par rval[,3] <- -2 * (ll - saturated) rval[,4] <- df rval[,5] <- pchisq(rval[,3], df, lower.tail = FALSE) if (!is.null(sortby)) { rval <- rval[order(rval[,sortby], decreasing=TRUE),] } ## return structure(as.data.frame(rval), heading = "Likelihood summary table:", class = c("anova", "data.frame")) } vcdExtra/R/summarise-old.R0000644000176200001440000000571214422306400015137 0ustar liggesusers # summarise a glm object or glmlist summarise <- function(...) { .Deprecated("LRstats") LRstats(...) } # summarise <- function(object, ...) { # UseMethod("summarise") # } # # stat.summarise <- function(deviance, df, onames, n) { # p <- pchisq(deviance, df, lower.tail=FALSE) # aic <- deviance - 2*df # if (missing(n)) { # result <- data.frame(aic, deviance, df, p) # names(result) <- c("AIC", "LR Chisq", "Df", "Pr(>Chisq)") # } # else { # bic <- deviance - log(n)*df # result <- data.frame(aic, bic, deviance, df, p) # names(result) <- c("AIC", "BIC", "LR Chisq", "Df", "Pr(>Chisq)") # } # # rownames(result) <- onames # attr(result, "heading") <- "Model Summary:" # class(result) <- c("anova", "data.frame") # result # } # # # summarise.glm <-function(object, ..., test=NULL){ # dotargs <- list(...) # is.glm <- unlist(lapply(dotargs, function(x) inherits(x, "glm"))) # dotargs <- dotargs[is.glm] # if (length(dotargs)) # return(summarise.glmlist(c(list(object), dotargs), test = test)) # # oname <- as.character(sys.call())[2] # result <- stat.summarise(object$deviance, object$df.residual, oname, sum(fitted(object))) # result # } # # summarise.glmlist <-function(object, ..., test=NULL, sortby=NULL){ # nmodels <- length(object) # if (nmodels == 1) # return(summarise.glm(object[[1]], test = test)) # if (is.null(names(object))) { # oname <- as.character(sys.call())[-1] # oname <- oname[1:length(object)] # } # else oname <- names(object) # # resdf <- as.numeric(lapply(object, function(x) x$df.residual)) # resdev <- as.numeric(lapply(object, function(x) x$deviance)) # n <- as.numeric(lapply(object, function(x) sum(fitted(x)))) # result <- stat.summarise(resdev, resdf, oname, n) # if (!is.null(sortby)) { # result <- result[order(result[,sortby], decreasing=TRUE),] # } # result # } # # # summarise.loglm <-function(object, ...){ # dotargs <- list(...) # is.loglm <- unlist(lapply(dotargs, function(x) inherits(x, "loglm"))) # dotargs <- dotargs[is.loglm] # if (length(dotargs)) # return(summarise.loglmlist(c(list(object), dotargs))) # # oname <- as.character(sys.call())[2] # result <- stat.summarise(object$deviance, object$df, oname, sum(fitted(object))) # result # } # # summarise.loglmlist <-function(object, ..., sortby=NULL){ # nmodels <- length(object) # if (nmodels == 1) # return(summarise.loglm(object[[1]])) # if (is.null(names(object))) { # oname <- as.character(sys.call())[-1] # oname <- oname[1:length(object)] # } # else oname <- names(object) # # resdf <- as.numeric(lapply(object, function(x) x$df)) # resdev <- as.numeric(lapply(object, function(x) x$deviance)) # n <- as.numeric(lapply(object, function(x) sum(fitted(x)))) # result <- stat.summarise(resdev, resdf, oname, n) # if (!is.null(sortby)) { # result <- result[order(result[,sortby], decreasing=TRUE),] # } # result # } # vcdExtra/R/zero.test.R0000644000176200001440000000261014430460317014314 0ustar liggesusers# Score test for zero inflation in Poisson data #https://stats.stackexchange.com/questions/118322/how-to-test-for-zero-inflation-in-a-dataset # References: # Broek, Jan van den. 1995. ?A Score Test for Zero Inflation in a Poisson Distribution.? Biometrics 51 (2): 738?43. doi:10.2307/2532959. # Yang, Zhao, James W. Hardin, and Cheryl L. Addy. 2010. ?Score Tests for Zero-Inflation in Overdispersed Count Data.? Communications in Statistics - Theory and Methods 39 (11): 2008?30. doi:10.1080/03610920902948228 # Van den Broek, J. (1995). A Score Test for Zero Inflation in a Poisson Distribution. Biometrics, 51(2), 738-743. doi:10.2307/2532959 zero.test <- function(x) { if(is.table(x)) { # expand to vector of values if(length(dim(x)) > 1) stop ("x must be a 1-way table") x <- rep(as.numeric(names(x)), unname(c(x))) } lambda <- mean(x) p0_tilde <- exp(-lambda) n0 <- sum(1*(!(x >0))) n <- length(x) numerator <- (n0 - n*p0_tilde)^2 denominator <- n*p0_tilde*(1-p0_tilde) - n*lambda*(p0_tilde^2) stat <- numerator/denominator pvalue <- pchisq(stat,df=1, ncp=0, lower.tail=FALSE) result <- list(statistic=stat, df=1, prob=pvalue) cat(paste("Score test for zero inflation\n\n", "\tChi-square =", round(stat,5), "\n", "\tdf = 1\n", "\tpvalue:", format.pval(pvalue), "\n")) invisible(result) } vcdExtra/R/cutfac.R0000644000176200001440000000056414430460317013632 0ustar liggesusers# Cut a variable to a factor cutfac <- function(x, breaks = NULL, q=10) { if(is.null(breaks)) breaks <- unique(quantile(x, 0:q/q)) x <- cut(x, breaks, include.lowest = TRUE, right = FALSE) levels(x) <- paste(breaks[-length(breaks)], ifelse(diff(breaks) > 1, c(paste("-", breaks[-c(1, length(breaks))] - 1, sep = ""), "+"), ""), sep = "") return(x) } vcdExtra/R/expand.dft.R0000644000176200001440000000252214430460317014414 0ustar liggesusers# Originally from Marc Schwarz # Ref: http://tolstoy.newcastle.edu.au/R/e6/help/09/01/1873.html # 23 Feb 22: Fix warning from type.convert expand.dft <- function(x, var.names = NULL, freq = "Freq", ...) { # allow: a table object, or a data frame in frequency form if(inherits(x, "table")) x <- as.data.frame.table(x, responseName = freq) freq.col <- which(colnames(x) == freq) if (length(freq.col) == 0) stop(paste(sQuote("freq"), "not found in column names")) DF <- sapply(1:nrow(x), function(i) x[rep(i, each = x[i, freq.col, drop = TRUE]), ], simplify = FALSE) DF <- do.call("rbind", DF)[, -freq.col, drop=FALSE] for (i in 1:ncol(DF)) { DF[[i]] <- type.convert(as.character(DF[[i]]), as.is=TRUE, ...) ## DONE ##: Generates warning: ## 1: In type.convert.default(as.character(DF[[i]]), ...) : ## 'as.is' should be specified by the caller; using TRUE } rownames(DF) <- NULL if (!is.null(var.names)) { if (length(var.names) < dim(DF)[2]) { stop(paste("Too few", sQuote("var.names"), "given.")) } else if (length(var.names) > dim(DF)[2]) { stop(paste("Too many", sQuote("var.names"), "given.")) } else { names(DF) <- var.names } } DF } # make this a synonym expand.table <- expand.dft vcdExtra/R/HLtest.R0000644000176200001440000000401214430460317013560 0ustar liggesusers# Functions for Hosmer Lemeshow test # original function downloaded from # http://sas-and-r.blogspot.com/2010/09/example-87-hosmer-and-lemeshow-goodness.html # # see also: MKmisc::gof.test for more general versions HLtest <- HosmerLemeshow <- function(model, g=10) { if (!inherits(model, "glm")) stop("requires a binomial family glm") if (!family(model)$family == 'binomial') stop("requires a binomial family glm") y <- model$y yhat <- model$fitted.values cutyhat = cut(yhat, breaks = quantile(yhat, probs=seq(0, 1, 1/g)), include.lowest=TRUE) obs = xtabs(cbind(1 - y, y) ~ cutyhat) exp = xtabs(cbind(1 - yhat, yhat) ~ cutyhat) chi = (obs - exp)/sqrt(exp) # browser() table <- data.frame(cut=dimnames(obs)$cutyhat, total= as.numeric(apply(obs, 1, sum)), obs=as.numeric(as.character(obs[,1])), exp=as.numeric(as.character(exp[,1])), chi=as.numeric(as.character(chi[,1])) ) rownames(table) <- 1:g chisq = sum(chi^2) p = 1 - pchisq(chisq, g - 2) result <- list(table=table, chisq=chisq, df=g-2, p.value=p, groups=g, call=model$call) class(result) <- "HLtest" return(result) } print.HLtest <- function(x, ...) { heading <- "Hosmer and Lemeshow Goodness-of-Fit Test" df <- data.frame("ChiSquare"=x$chisq, df=x$df, "P_value"= x$p.value) cat(heading,"\n\n") cat("Call:\n") print(x$call) print(df, row.names=FALSE) invisible(x) } # Q: how to print **s next to larg chisq components? summary.HLtest <- function(object, ...) { heading <- "Partition for Hosmer and Lemeshow Goodness-of-Fit Test" cat(heading,"\n\n") print(object$table) print(object) } ## Q: how to display any large chi residuals on the bars?? rootogram.HLtest <- function(x, ...) { rootogram(as.numeric(x$table$obs), as.numeric(x$table$exp), xlab="Fitted value group", names=1:x$groups, ...) } plot.HLtest <- function(x, ...) { rootogram.HLtest(x, ...) } vcdExtra/R/loglin-utilities.R0000644000176200001440000001511314430460317015656 0ustar liggesusers#' Loglinear Model Utilities #' These functions generate lists of terms to specify a loglinear model #' in a form compatible with loglin and provide for conversion to an #' equivalent loglm specification. They allow for a more conceptual #' way to specify such models. #' models of joint independence, of some factors wrt one or more other factors #' @param nf number of factors for which to generate model #' @param table a contingency table used for factor names, typically the output from \code{\link[base]{table}} #' @param factors names of factors used in the model when \code{table} is not specified #' @param with indices of the factors against which others are considered jointly independent #' @rdname loglin-utilities #' @export joint <- function(nf, table=NULL, factors=1:nf, with=nf) { if (!is.null(table)) factors <- names(dimnames(table)) if (nf == 1) return (list(term1=factors[1])) if (nf == 2) return (list(term1=factors[1], term2=factors[2])) others <- setdiff(1:nf, with) result <- list(term1=factors[others], term2=factors[with]) result } #' models of conditional independence of some factors wrt one or more other factors #' @param nf number of factors for which to generate model #' @param table a contingency table used for factor names, typically the output from \code{\link[base]{table}} #' @param factors names of factors used in the model when \code{table} is not specified #' @param with indices of the factors against which others are considered conditionally independent #' @rdname loglin-utilities #' @export conditional <- function(nf, table=NULL, factors=1:nf, with=nf) { if (!is.null(table)) factors <- names(dimnames(table)) if (nf == 1) return (list(term1=factors[1])) if (nf == 2) return (list(term1=factors[1], term2=factors[2])) main <- setdiff(1:nf, with) others <- matrix(factors[with], length(with), length(main)) result <- rbind(factors[main], others) result <- as.list(as.data.frame(result, stringsAsFactors=FALSE)) names(result) <- paste('term', 1:length(result), sep='') result } #' models of mutual independence of all factors #' @param nf number of factors for which to generate model #' @param table a contingency table used for factor names, typically the output from \code{\link[base]{table}} #' @param factors names of factors used in the model when \code{table} is not specified #' @rdname loglin-utilities #' @export mutual <- function(nf, table=NULL, factors=1:nf) { if (!is.null(table)) factors <- names(dimnames(table)) result <- sapply(factors[1:nf], list) names(result) <- paste('term', 1:length(result), sep='') result } #' saturated model: highest-order interaction #' @param nf number of factors for which to generate model #' @param table a contingency table used for factor names, typically the output from \code{\link[base]{table}} #' @param factors names of factors used in the model when \code{table} is not specified #' @rdname loglin-utilities #' @export saturated <- function(nf, table=NULL, factors=1:nf) { if (!is.null(table)) factors <- names(dimnames(table)) list(term1=factors[1:nf]) } # models of conditional independence, given one pair of variables ## Not needed: handled by condit, with length(with)>1 #condit2 <- function(nf, factors=1:nf, with=1:2) { # if (nf == 1) return (list(term1=factors[1])) # if (nf == 2) return (list(term1=factors[1], term2=factors[2])) # others <- setdiff(1:nf, with) # result <- rbind(factors[with], cbind(factors[others], factors[others])) # result <- as.list(as.data.frame(result, stringsAsFactors=FALSE)) # names(result) <- paste('term', 1:length(result), sep='') # result #} #' markov models of a given order #' @param nf number of factors for which to generate model #' @param table a contingency table used for factor names, typically the output from \code{\link[base]{table}} #' @param factors names of factors used in the model when \code{table} is not specified #' @param order order of the markov chain #' @rdname loglin-utilities #' @export markov <- function(nf, factors=1:nf, order=1) { if (nf == 1) return (list(term1=factors[1])) if (nf == 2) return (list(term1=factors[1], term2=factors[2])) if (length(factors) < order+2) { warning(paste('Not enough factors for order', order, 'Markov chain; using order=1')) order <-1 result <- rbind(factors[1:(nf-1)], factors[2:nf]) } else { if (nf <= order+1) result <- factors[1:nf] else { result <- NULL for (i in 1:(order+1)) result <- rbind(result, factors[i:(nf-order+i-1)]) } } result <- as.list(as.data.frame(result, stringsAsFactors=FALSE)) names(result) <- paste('term', 1:length(result), sep='') result } #' convert a loglin model to a model formula for loglm #' @param x a list of terms in a loglinear model, such as returned by \code{joint}, \code{conditional}, \dots #' @param env environment in which to evaluate the formula #' @source Code from Henrique Dallazuanna, , R-help 7-4-2013 #' @rdname loglin-utilities #' @export loglin2formula <- function(x, env = parent.frame()) { terms <- lapply(x, paste, collapse = ":") formula(sprintf(" ~ %s", do.call(paste, c(terms, sep = "+"))), env=env) } #' convert a loglin model to a string, using bracket notation for the high-order terms #' @param x a list of terms in a loglinear model, such as returned by \code{joint}, \code{conditional}, \dots #' @param brackets characters to use to surround model terms. Either a single character string containing two characters #' or a character vector of length two. #' @param sep characters used to separate factor names within a term #' @param collapse characters used to separate terms #' @param abbrev #' @rdname loglin-utilities #' @export loglin2string <- function(x, brackets = c('[', ']'), sep=',', collapse=' ', abbrev) { if (length(brackets)==1 && (nchar(brackets)>1)) brackets <- unlist(strsplit(brackets, "")) terms <- lapply(x, paste, collapse=sep) terms <- paste(brackets[1], terms, brackets[2], sep='') paste(terms, collapse= ' ') } vcdExtra/R/update.xtabs.R0000644000176200001440000000126414430460317014765 0ustar liggesusersupdate.xtabs <- function (object, formula., ..., evaluate = TRUE) { if (is.null(call<-attr(object, "call"))) stop("need an object with call component") extras <- match.call(expand.dots = FALSE)$... if (!missing(formula.)) call$formula <- update.formula(call$formula, formula.) if (length(extras)) { existing <- !is.na(match(names(extras), names(call))) for (a in names(extras)[existing]) call[[a]] <- extras[[a]] if (any(!existing)) { call <- c(as.list(call), extras[!existing]) call <- as.call(call) } } if (evaluate) eval(call, parent.frame()) else call } vcdExtra/R/logseries.R0000644000176200001440000001147214430460317014361 0ustar liggesusers## Original from gmlss.dist ## I think this is working correctly 01/03/10 #LG <- function (mu.link = "logit") #{ # mstats <- checklink("mu.link", "LG", substitute(mu.link),c("logit", "probit", "cloglog", "cauchit", "log", "own")) # structure( # list(family = c("LG", "Logarithmic"), # parameters = list(mu = TRUE), # the mean # nopar = 1, # type = "Discrete", # mu.link = as.character(substitute(mu.link)), # mu.linkfun = mstats$linkfun, # mu.linkinv = mstats$linkinv, # mu.dr = mstats$mu.eta, # dldm = function(y,mu) (y/mu)+1/((1-mu)*log(1-mu)), # d2ldm2 = function(y,mu) # { # dldm <- (y/mu)+1/((1-mu)*log(1-mu)) # d2ldm2 <- -dldm^2 # d2ldm2 # }, # G.dev.incr = function(y,mu,...) -2*dLG(x = y, mu = mu, log = TRUE), # rqres = expression(rqres(pfun="pLG", type="Discrete", ymin=1, y=y, mu=mu)), # mu.initial =expression({mu <- 0.9 } ), # mu.valid = function(mu) all(mu > 0 & mu < 1), # y.valid = function(y) all(y > 0) # ), # class = c("gamlss.family","family")) #} #----------------------------------------------------------------------------------------- dlogseries<-function(x, prob = 0.5, log = FALSE) { if (any(prob <= 0) | any(prob >= 1) ) stop(paste("prob must be greater than 0 and less than 1", "\n", "")) if (any(x <= 0) ) stop(paste("x must be >0", "\n", "")) logfy <- x*log(prob)-log(x)-log(-log(1-prob)) if(log == FALSE) fy <- exp(logfy) else fy <- logfy fy } #---------------------------------------------------------------------------------------- plogseries <- function(q, prob = 0.5, lower.tail = TRUE, log.p = FALSE) { if (any(prob <= 0) | any(prob >= 1) ) stop(paste("prob must be greater than 0 and less than 1", "\n", "")) if (any(q <= 0) ) stop(paste("q must be >0", "\n", "")) ly <- length(q) FFF <- rep(0,ly) nmu <- rep(prob, length = ly) j <- seq(along=q) for (i in j) { y.y <- q[i] mm <- nmu[i] allval <- seq(1,y.y) pdfall <- dlogseries(allval, prob = mm, log = FALSE) FFF[i] <- sum(pdfall) } cdf <- FFF cdf <- if(lower.tail==TRUE) cdf else 1-cdf cdf <- if(log.p==FALSE) cdf else log(cdf) cdf } #---------------------------------------------------------------------------------------- qlogseries <- function(p, prob=0.5, lower.tail = TRUE, log.p = FALSE, max.value = 10000) { if (any(prob <= 0) | any(prob >= 1) ) stop(paste("prob must be greater than 0 and less than 1", "\n", "")) if (any(p < 0) | any(p > 1.0001)) stop(paste("p must be between 0 and 1", "\n", "")) if (log.p==TRUE) p <- exp(p) else p <- p if (lower.tail==TRUE) p <- p else p <- 1-p ly <- length(p) QQQ <- rep(0,ly) nmu <- rep(prob, length = ly) for (i in seq(along=p)) { cumpro <- 0 if (p[i]+0.000000001 >= 1) QQQ[i] <- Inf else { for (j in seq(from = 1, to = max.value)) { cumpro <- plogseries(j, prob = nmu[i], log.p = FALSE) QQQ[i] <- j if (p[i] <= cumpro ) break } } } QQQ } #---------------------------------------------------------------------------------------- rlogseries <- function(n, prob = 0.5) { if (any(prob <= 0) | any(prob >= 1) ) stop(paste("prob must be greater than 0 and less than 1", "\n", "")) if (any(n <= 0)) stop(paste("n must be a positive integer", "\n", "")) n <- ceiling(n) p <- runif(n) r <- qlogseries(p, prob=prob) r } #---------------------------------------------------------------------------------------- vcdExtra/R/print.Kappa.R0000644000176200001440000000103214430460317014543 0ustar liggesusers# Print method for Kappa: Add a column showing z values ## DONE: now set digits ## DONE: now include CI print.Kappa <- function (x, digits=max(getOption("digits") - 3, 3), CI=FALSE, level=0.95, ...) { tab <- rbind(x$Unweighted, x$Weighted) z <- tab[,1] / tab[,2] tab <- cbind(tab, z) if (CI) { q <- qnorm((1 + level)/2) lower <- tab[,1] - q * tab[,2] upper <- tab[,1] + q * tab[,2] tab <- cbind(tab, lower, upper) } rownames(tab) <- names(x)[1:2] print(tab, digits=digits, ...) invisible(x) } vcdExtra/R/Kway.R0000644000176200001440000000171114430460317013273 0ustar liggesusers# Generate and fit all 1-way, 2-way, ... k-way terms in a glm Kway <- function(formula, family=poisson, data, ..., order=nt, prefix="kway") { if (is.character(family)) family <- get(family, mode = "function", envir = parent.frame()) if (is.function(family)) family <- family() if (is.null(family$family)) { print(family) stop("'family' not recognized") } if (missing(data)) data <- environment(formula) models <- list() mod <- glm(formula, family=family, data, ...) mod$call$formula <- formula terms <- terms(formula) tl <- attr(terms, "term.labels") nt <- length(tl) models[[1]] <- mod for(i in 2:order) { models[[i]] <- update(mod, substitute(.~.^p, list(p = i))) } # null model mod0 <- update(mod, .~1) models <- c(list(mod0), models) names(models) <- paste(prefix, 0:order, sep = ".") class(models) <- "glmlist" models } vcdExtra/R/modFit.R0000644000176200001440000000164714430460317013612 0ustar liggesusers## ## One-line summary of model fit for a glm/loglm object ## `modFit` <- function(x, ...) UseMethod("modFit") modFit.glm <- function(x, stats="chisq", digits=2, ...) { if (!inherits(x,"glm")) stop("modFit requires a glm object") result <- NULL if ("chisq" %in% stats) result <- paste("G^2(",x$df.residual,")=", formatC(x$deviance,digits=digits,format="f"),sep="") if ("aic" %in% stats) result <- paste(result, " AIC=", formatC(x$aic,digits=digits,format="f"),sep="") result } modFit.loglm <- function(x, stats="chisq", digits=2, ...) { if (!inherits(x,"loglm")) stop("modFit requires a loglm object") result <- NULL if ("chisq" %in% stats) result <- paste("G^2(",x$df,")=", formatC(x$deviance,digits=digits,format="f"),sep="") if ("aic" %in% stats) { aic<-x$deviance-x$df*2 result <- paste(result, " AIC=", formatC(aic,digits=digits,format="f"),sep="") } result } vcdExtra/R/glmlist.R0000644000176200001440000000462514430460317014042 0ustar liggesusers# glmlist - make a glmlist object containing a list of fitted glm objects with their names # borrowing code from Hmisc::llist glmlist <- function(...) { args <- list(...); lname <- names(args) name <- vname <- as.character(sys.call())[-1] for (i in 1:length(args)) { vname[i] <- if (length(lname) && lname[i] != "") lname[i] else name[i] } names(args) <- vname[1:length(args)] is.glm <- unlist(lapply(args, function(x) inherits(x, "glm"))) if (!all(is.glm)) { warning("Objects ", paste(vname[!is.glm], collapse=', '), " removed because they are not glm objects") args <- args[is.glm] } class(args) <- "glmlist" return(args); } # loglmlist - do the same for loglm objects loglmlist <- function(...) { args <- list(...); lname <- names(args) name <- vname <- as.character(sys.call())[-1] for (i in 1:length(args)) { vname[i] <- if (length(lname) && lname[i] != "") lname[i] else name[i] } names(args) <- vname[1:length(args)] is.loglm <- unlist(lapply(args, function(x) inherits(x, "loglm"))) if (!all(is.loglm)) { warning("Objects ", paste(vname[!is.loglm], collapse=', '), " removed because they are not loglm objects") args <- args[is.loglm] } class(args) <- "loglmlist" return(args); } # generic version: named list nlist <- function(...) { args <- list(...); lname <- names(args) name <- vname <- as.character(sys.call())[-1] for (i in 1:length(args)) { vname[i] <- if (length(lname) && lname[i] != "") lname[i] else name[i] } names(args) <- vname[1:length(args)] return(args); } # coeficient method for a glmlist (from John Fox, r-help, 10-28-2014) coef.glmlist <- function(object, result=c("list", "matrix", "data.frame"), ...){ result <- match.arg(result) coefs <- lapply(object, coef) if (result == "list") return(coefs) coef.names <- unique(unlist(lapply(coefs, names))) n.mods <- length(object) coef.matrix <- matrix(NA, length(coef.names), n.mods) rownames(coef.matrix) <- coef.names colnames(coef.matrix) <- names(object) for (i in 1:n.mods){ coef <- coef(object[[i]]) coef.matrix[names(coef), i] <- coef } if (result == "matrix") return(coef.matrix) as.data.frame(coef.matrix) } vcdExtra/R/mcaplot.R0000644000176200001440000001017314430460317014021 0ustar liggesusers#' --- #' title: "Custom plot function for mjca" #' date: "28 Jan 2016" #' --- #' #' #' @param obj An \code{"mjca"} object #' @param map Character string specifying the map type. Allowed options include #' \code{"symmetric"} (default), #' \code{"rowprincipal"}, \code{"colprincipal"}, \code{"symbiplot"}, #' \code{"rowgab"}, \code{"colgab"}, \code{"rowgreen"}, \code{"colgreen"} #' @param dim Dimensions to plot, a vector of length 2. #' @param col Vector of colors, one for each factor in the MCA #' @param pch Vector of point symbols for the category levels, one for each factor #' @param cex Character size for points and level labels #' @param pos Position of level labels relative to the category points; either a single number #' or a vector of length equal to the number of category points. #' @param lines A logical or an integer vector indicating which factors are to be #' joined with lines using \code{\link{multilines}} #' @param lwd Line width(s) for the lines #' @param legend Logical; draw a legend for the factor names? #' @param legend.pos Position of the legend in the plot #' @param xlab,ylab Labels for horizontal and vertical axes. The default, \code{"_auto_" } #' means that the function auto-generates a label of the form \code{Dimension X (xx.x %)} #' @param rev.axes A logical vector of length 2, where TRUE reverses the direction of the #' corresponding axis. #' @param ... Arguments passed down to \code{plot} #' @return Returns the coordinates of the category points invisibly #' #' @author Michael Friendly #' @seealso \code{\link{plot.mjca}} #' @examples #' data(Titanic) #' titanic.mca <- ca::mjca(Titanic) #' mcaplot(titanic.mca, legend=TRUE, legend.pos="topleft") #' #' data(HairEyeColor) #' haireye.mca <- ca::mjca(HairEyeColor) #' mcaplot(haireye.mca, legend=TRUE, cex.lab=1.3) mcaplot <- function(obj, map="symmetric", dim=1:2, col=c("blue", "red", "brown", "black", "green3", "purple"), pch=15:20, cex=1.2, pos=3, lines=TRUE, lwd=2, legend=FALSE, legend.pos="topright", xlab = "_auto_", ylab = "_auto_", rev.axes = c(FALSE, FALSE), ...) { if(!requireNamespace("ca", quietly=TRUE)) stop("The ca package is required") if(!inherits(obj, "mjca")) stop("Only defined for mjca objects") coords <- cacoord(obj, type=map, rows=FALSE) coords <- data.frame(coords, obj$factors) # extract factor names & levels nlev <- obj$levels.n nfac <- length(nlev) pch <- rep_len(pch, nfac) col <- rep_len(col, nfac) lwd <- rep_len(lwd, nfac) rev.axes <- rep(rev.axes, length.out=2) if(any(dim > ncol(coords))) stop("dim must be valid dimensions of the coordinates") labs <- pctlab(obj) if (xlab == "_auto_") xlab <- labs[dim[1]] if (ylab == "_auto_") ylab <- labs[dim[2]] if(isTRUE(rev.axes[1])) coords[, dim[1]] <- -coords[, dim[1]] if(isTRUE(rev.axes[2])) coords[, dim[2]] <- -coords[, dim[2]] plot(coords[, dim], type='n', asp=1, xlab=xlab, ylab=ylab, ...) points(coords[,dim], pch=rep(pch, nlev), col=rep(col, nlev), cex=cex) text(coords[,dim], labels=coords$level, col=rep(col, nlev), pos=pos, cex=cex, xpd=TRUE) if (is.logical(lines)) lines <- if(lines) 1:nfac else NULL if(length(lines)) multilines(coords[, dim], group=coords$factor, which=lines, col=col, lwd=lwd) abline(h = 0, v = 0, lty = "longdash", col="gray") if (legend) { factors <- coords$factor factors <- factors[!duplicated(factors)] legend(legend.pos, legend=factors, title="Factor", title.col="black", col=col, text.col=col, pch=pch, bg=rgb(.95, .95, .95, .3), cex=cex) } invisible(coords) } pctlab <- function(obj, prefix="Dimension ", decimals=1) { values <- obj$sv^2 if (obj$lambda == "JCA"){ pct <- rep_len(NA, length(values)) } else { if (obj$lambda == "adjusted") { values <- obj$inertia.e pct <- round(100 * values, decimals) } else { pct <- round(100 * values / sum(values), decimals) } } pctval <- ifelse(is.na(pct), NULL, paste0(" (", pct, "%)")) paste0(prefix, 1:length(values), pctval) } vcdExtra/R/LRstats.R0000644000176200001440000000727514430460317013767 0ustar liggesusers# fixed buglet when deviance() returns a null # fixed bug: residual df calculated incorrectly # but this now depends on objects having a df.residual component # TRUE for lm, glm, polr, negbin objects # made generic, adding a glmlist method LRstats <- function(object, ...) { UseMethod("LRstats") } LRstats.glmlist <- function(object, ..., saturated = NULL, sortby=NULL) { ns <- sapply(object, function(x) length(x$residuals)) if (any(ns != ns[1L])) stop("models were not all fitted to the same size of dataset") nmodels <- length(object) if (nmodels == 1) return(LRstats.default(object[[1L]], saturated=saturated)) rval <- lapply(object, LRstats.default, saturated=saturated) rval <- do.call(rbind, rval) if (!is.null(sortby)) { rval <- rval[order(rval[,sortby], decreasing=TRUE),] } rval } # could just do LRstats.loglmlist <- LRstats.glmlist LRstats.loglmlist <- function(object, ..., saturated = NULL, sortby=NULL) { ns <- sapply(object, function(x) length(x$residuals)) if (any(ns != ns[1L])) stop("models were not all fitted to the same size of dataset") nmodels <- length(object) if (nmodels == 1) return(LRstats.default(object[[1L]], saturated=saturated)) rval <- lapply(object, LRstats.default, saturated=saturated) rval <- do.call(rbind, rval) if (!is.null(sortby)) { rval <- rval[order(rval[,sortby], decreasing=TRUE),] } rval } LRstats.default <- function(object, ..., saturated = NULL, sortby=NULL) { ## interface methods for logLik() and nobs() ## - use S4 methods if loaded ## - use residuals() if nobs() is not available logLik0 <- if("stats4" %in% loadedNamespaces()) stats4::logLik else logLik nobs0 <- function(x, ...) { nobs1 <- if("stats4" %in% loadedNamespaces()) stats4::nobs else nobs nobs2 <- function(x, ...) NROW(residuals(x, ...)) rval <- try(nobs1(x, ...), silent = TRUE) if(inherits(rval, "try-error") | is.null(rval)) rval <- nobs2(x, ...) return(rval) } dof <- function(x) { if (inherits(x, "loglm")) { rval <- x$df } else { rval <- try(x$df.residual, silent=TRUE) } if (inherits(rval, "try-error") || is.null(rval)) stop(paste("Can't determine residual df for a", class(x), "object")) rval } ## collect all objects objects <- list(object, ...) nmodels <- length(objects) ## check sample sizes ns <- sapply(objects, nobs0) if(any(ns != ns[1L])) stop("models were not all fitted to the same size of dataset") ## extract log-likelihood and df (number of parameters) ll <- lapply(objects, logLik0) par <- as.numeric(sapply(ll, function(x) attr(x, "df"))) df <- as.numeric(sapply(objects, function(x) dof(x))) ll <- sapply(ll, as.numeric) ## compute saturated reference value (use 0 if deviance is not available) if(is.null(saturated)) { dev <- try(sapply(objects, deviance), silent = TRUE) if(inherits(dev, "try-error") || any(sapply(dev, is.null))) { saturated <- 0 } else { saturated <- ll + dev/2 } } ## setup ANOVA-style matrix rval <- matrix(rep(NA, 5 * nmodels), ncol = 5) colnames(rval) <- c("AIC", "BIC", "LR Chisq", "Df", "Pr(>Chisq)") rownames(rval) <- as.character(sapply(match.call(), deparse)[-1L])[1:nmodels] rval[,1] <- -2 * ll + 2 * par rval[,2] <- -2 * ll + log(ns) * par rval[,3] <- -2 * (ll - saturated) rval[,4] <- df rval[,5] <- pchisq(rval[,3], df, lower.tail = FALSE) if (!is.null(sortby)) { rval <- rval[order(rval[,sortby], decreasing=TRUE),] } ## return structure(as.data.frame(rval), heading = "Likelihood summary table:", class = c("anova", "data.frame")) } vcdExtra/R/mosaic3d.R0000644000176200001440000001634214430460317014070 0ustar liggesusers##################################### ## Produce a 3D mosaic plot using rgl ##################################### # TODO: provide formula interface # TODO: handle zero margins (causes display to be erased in shapelist3d) # DONE: handle zero cells # DONE: generalize the calculation of residuals # DONE: allow display of type=c("observed", "expected") # DONE: if ndim>3, provide for labels at max or min # DONE: make object oriented and provide a loglm method # mosaic3d: provide observed array of counts and either residuals, expected frequencies, # or a loglin set of margins to fit mosaic3d <- function(x, ...) { UseMethod("mosaic3d") } mosaic3d.loglm <- function (x, type = c("observed", "expected"), residuals_type = c("pearson", "deviance"), # gp = shading_hcl, gp_args = list(), ...) { residuals_type <- match.arg(tolower(residuals_type), c("pearson", "deviance")) if (is.null(x$fitted)) x <- update(x, fitted = TRUE) expected <- fitted(x) residuals <- residuals(x, type = "pearson") observed <- residuals * sqrt(expected) + expected if (residuals_type == "deviance") residuals <- residuals(x, type = "deviance") # gp <- if (inherits(gp, "grapcon_generator")) # do.call("gp", c(list(observed, residuals, expected, x$df), # as.list(gp_args))) # else gp mosaic3d.default(observed, residuals = residuals, expected = expected, type = type, residuals_type = residuals_type, # gp = gp, ...) } mosaic3d.default <- function(x, expected=NULL, residuals=NULL, type = c("observed", "expected"), residuals_type = NULL, shape=rgl::cube3d(alpha=alpha), alpha=0.5, spacing=0.1, split_dir=1:3, shading=shading_basic, interpolate=c(2,4), zero_size=.05, label_edge, labeling_args=list(), newpage=TRUE, box=FALSE, ...) { if (!requireNamespace("rgl")) stop("rgl is required") type <- match.arg(type) if (is.null(residuals)) { residuals_type <- if (is.null(residuals_type)) "pearson" else match.arg(tolower(residuals_type), c("pearson", "deviance", "ft")) } ## convert structable object if (is.structable(x)) { x <- as.table(x) } ## table characteristics levels <- dim(x) ndim <- length(levels) dn <- dimnames(x) if (is.null(dn)) dn <- dimnames(x) <- lapply(levels, seq) vnames <- names(dimnames(x)) if (is.null(vnames)) vnames <- names(dn) <- names(dimnames(x)) <- LETTERS[1:ndim] ## replace NAs by 0 if (any(nas <- is.na(x))) x[nas] <- 0 ## model fitting: ## calculate expected if needed if ((is.null(expected) && is.null(residuals)) || !is.numeric(expected)) { if (inherits(expected, "formula")) { fm <- loglm(expected, x, fitted = TRUE) expected <- fitted(fm) df <- fm$df } else { if (is.null(expected)) expected <- as.list(1:ndim) fm <- loglin(x, expected, fit = TRUE, print = FALSE) expected <- fm$fit df <- fm$df } } ## compute residuals if (is.null(residuals)) residuals <- switch(residuals_type, pearson = (x - expected) / sqrt(ifelse(expected > 0, expected, 1)), deviance = { tmp <- 2 * (x * log(ifelse(x == 0, 1, x / ifelse(expected > 0, expected, 1))) - (x - expected)) tmp <- sqrt(pmax(tmp, 0)) ifelse(x > expected, tmp, -tmp) }, ft = sqrt(x) + sqrt(x + 1) - sqrt(4 * expected + 1) ) ## replace NAs by 0 if (any(nas <- is.na(residuals))) residuals[nas] <- 0 # switch observed and expected if required observed <- if (type == "observed") x else expected expected <- if (type == "observed") expected else x # replicate arguments to number of dimensions spacing <- rep(spacing, length=ndim) split_dir <- rep(split_dir, length=ndim) if(missing(label_edge)) label_edge <- rep( c('-', '+'), each=3, length=ndim) zeros <- observed <= .Machine$double.eps shapelist <- shape # sanity check if (!inherits(shapelist, "shape3d")) stop("shape must be a shape3d object") if (newpage) rgl::open3d() for (k in 1:ndim) { marg <- margin.table(observed, k:1) if (k==1) { shapelist <- split3d(shapelist, marg, split_dir[k], space=spacing[k]) label3d(shapelist, split_dir[k], dn[[k]], vnames[k], edge=label_edge[k], ...) } else { marg <- matrix(marg, nrow=levels[k]) shapelist <- split3d(shapelist, marg, split_dir[k], space=spacing[k]) names(shapelist) <- apply(as.matrix(expand.grid(dn[1:k])), 1, paste, collapse=":") L <- length(shapelist) label_cells <- if (label_edge[k]=='-') 1:levels[k] else (L-levels[k]+1):L label3d(shapelist[label_cells], split_dir[k], dn[[k]], vnames[k], edge=label_edge[k], ...) } } # assign colors # TODO: allow alpha to control transparency of side walls col <- shading(residuals, interpolate=interpolate) # display, but exclude the zero cells rgl::shapelist3d(shapelist[!as.vector(zeros)], col=col[!as.vector(zeros)], ...) # plot markers for zero cells if (any(zeros)) { ctrs <- t(sapply(shapelist, center3d)) rgl::spheres3d(ctrs[as.vector(zeros),], radius=zero_size) } # invisible(structable(observed)) invisible(shapelist) } # basic shading_Friendly, adapting the simple code used in mosaicplot() shading_basic <- function(residuals, interpolate=TRUE) { if (is.logical(interpolate)) interpolate <- c(2, 4) else if (any(interpolate <= 0) || length(interpolate) > 5) stop("invalid 'interpolate' specification") shade <- sort(interpolate) breaks <- c(-Inf, -rev(shade), 0, shade, Inf) colors <- c(hsv(0, s = seq.int(1, to = 0, length.out = length(shade) + 1)), hsv(4/6, s = seq.int(0, to = 1, length.out = length(shade) + 1))) colors[as.numeric(cut(residuals, breaks))] } # provide labels for 3D objects below/above their extent along a given dimension # FIXME: kludge for interline gap between level labels and variable name # TODO: how to pass & extract labeling_args, e.g., labeling_args=list(at='min', fontsize=10) label3d <- function(objlist, dim, text, varname, offset=.05, adj, edge="-", gap=.1, labeling_args, ...) { if(missing(adj)) { if (dim < 3) adj <- ifelse(edge == '-', c(0.5, 1), c(0.5, 0)) else adj <- ifelse(edge == '-', c(1, 0.5), c(0, 0.5)) } ranges <- lapply(objlist, range3d) loc <- t(sapply(ranges, colMeans)) # positions of labels on dimension dim min <- t(sapply(ranges, function(x) x[1,])) # other dimensions at min values max <- t(sapply(ranges, function(x) x[2,])) # other dimensions at max values xyz <- if (edge == '-') (min - offset) else (max + offset) xyz[,dim] <- loc[,dim] if(!missing(varname)) { loclab <- colMeans(loc) # NB: doesn't take space into acct xyzlab <- if (edge == '-') min[1,] - offset - gap else max[1,] + offset + gap xyzlab[dim] <- loclab[dim] xyz <- rbind(xyz, xyzlab) text <- c(text, varname) } result <- c(labels = rgl::texts3d(xyz, texts=text, adj=adj, ...)) invisible(result) } vcdExtra/R/vcdExtra-deprecated.R0000644000176200001440000000014714422306400016231 0ustar liggesusers#summarise <- function (...) { # .Deprecated("summarise", package="vcdExtra") # LRstats(...) #} # vcdExtra/R/seq_loglm.R0000644000176200001440000000571314430460317014350 0ustar liggesusers#' Sequential loglinear models for an n-way table #' This function takes an n-way contingency table and fits a series of sequential #' models to the 1-, 2-, ... n-way marginal tables, corresponding to a variety of #' types of loglinear models. #' @param x a contingency table in array form, with optional category labels specified in the dimnames(x) attribute, #' or else a data.frame in frequency form, with the frequency variable names "Freq". #' @param type type of sequential model to fit #' @param marginals which marginals to fit? #' @param vorder order of variables #' @param k indices of conditioning variable(s) for "joint", "conditional" or order for "markov" #' @param prefix #' @param fitted keep fitted values? seq_loglm <- function( x, type = c("joint", "conditional", "mutual", "markov", "saturated"), marginals = 1:nf, # which marginals to fit? vorder = 1:nf, # order of variables in the sequential models k = NULL, # conditioning variable(s) for "joint", "conditional" or order for "markov" prefix = 'model', fitted = TRUE, # keep fitted values? ... ) { if (inherits(x, "data.frame") && "Freq" %in% colnames(x)) { x <- xtabs(Freq ~ ., data=x) } if (!inherits(x, c("table", "array"))) stop("not an xtabs, table, array or data.frame with a 'Freq' variable") nf <- length(dim(x)) x <- aperm(x, vorder) factors <- names(dimnames(x)) indices <- 1:nf type = match.arg(type) # models <- as.list(rep(NULL, length(marginals))) models <- list() for (i in marginals) { mtab <- margin.table(x, 1:i) if (i==1) { # KLUDGE: use loglin, but try to make it look like a loglm object mod <- loglin(mtab, margin=NULL, print=FALSE) mod$model.string = paste("=", factors[1]) mod$margin <- list(factors[1]) # mod$margin <- names(dimnames(mtab)) # names(mod$margin) <- factors[1] if (fitted) { fit <- mtab fit[] <- (sum(mtab) / length(mtab)) mod$fitted <- fit } mod$nobs <- length(mtab) mod$frequencies <- mtab mod$deviance <- mod$lrt class(mod) <- c("loglin", "loglm") } else { expected <- switch(type, 'conditional' = conditional(i, mtab, with=if(is.null(k)) i else k), 'joint' = joint(i, mtab, with=if(is.null(k)) i else k), 'mutual' = mutual(i, mtab), 'markov' = markov(i, mtab, order=if(is.null(k)) 1 else k), 'saturated' = saturated(i, mtab) ) form <- loglin2formula(expected) # mod <- loglm(formula=form, data=mtab, fitted=TRUE) mod <- eval(bquote(MASS::loglm(.(form), data=mtab, fitted=fitted))) mod$model.string <- loglin2string(expected, brackets=if (i= S-PLUS 6.0) if(is.null(version$language) && inherits(x, "crosstabs")) { oldClass(x)<-NULL; attr(x, "marginals")<-NULL} ## TODO: add tests for matrix or table n <- nrow(x) m <- ncol(x) pi.c<-pi.d<-matrix(0, nrow=n, ncol=m) row.x<-row(x) col.x<-col(x) for(i in 1:(n)){ for(j in 1:(m)){ pi.c[i, j]<-sum(x[row.xi & col.x>j]) pi.d[i, j]<-sum(x[row.xj]) + sum(x[row.x>i & col.x 1, maxTitle=NULL) { # make sure requested packages are available and loaded for (i in seq_along(package)) { if (!isNamespaceLoaded(package[i])) if (requireNamespace(package[i], quietly=TRUE)) cat(paste("Loading package:", package[i], "\n")) else stop(paste("Package", package[i], "is not available")) } dsitems <- data(package=package)$results wanted <- c('Package', 'Item','Title') ds <- as.data.frame(dsitems[,wanted], stringsAsFactors=FALSE) getData <- function(x, pkg) { # fix items with " (...)" in names, e.g., "BJsales.lead (BJsales)" in datasets objname <- gsub(" .*", "", x) e <- loadNamespace(pkg) if (!exists(x, envir = e)) { dataname <- sub("^.*\\(", "", x) dataname <- sub("\\)$", "", dataname) e <- new.env() data(list = dataname, package = pkg, envir = e) } get(objname, envir = e) } getDim <- function(i) { data <- getData(ds$Item[i], ds$Package[i]) if (is.null(dim(data))) length(data) else paste(dim(data), collapse='x') } getClass <- function(i) { data <- getData(ds$Item[i], ds$Package[i]) cl <- class(data) if (length(cl)>1 && !allClass) cl[length(cl)] else cl } ds$dim <- unlist(lapply(seq_len(nrow(ds)), getDim )) ds$class <- unlist(lapply(seq_len(nrow(ds)), getClass )) if (!is.null(maxTitle)) ds$Title <- substr(ds$Title, 1, maxTitle) if (incPackage) ds[c('Package', 'Item','class','dim','Title')] else ds[c('Item','class','dim','Title')] } vcdExtra/R/CMHtest.R0000644000176200001440000002277514430460317013704 0ustar liggesusers# Cochran-Mantel-Haenszel tests for ordinal factors in contingency tables # The code below follows Stokes, Davis & Koch, (2000). # "Categorical Data Analysis using the SAS System", 2nd Ed., # pp 74--75, 92--101, 124--129. # Ref: Landis, R. J., Heyman, E. R., and Koch, G. G. (1978), # Average Partial Association in Three-way Contingency Tables: # A Review and Discussion of Alternative Tests, # International Statistical Review, 46, 237-254. # See: https://onlinecourses.science.psu.edu/stat504/book/export/html/90 # http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_freq_a0000000648.htm # DONE: this should be the main function, handling 2-way & higher-way tables # With strata, use apply() or recursion over strata # DONE: With strata, calculate overall CMH tests controlling for strata # FIXED: rmeans and cmeans tests were labeled incorrectly CMHtest <- function(x, ...) UseMethod("CMHtest") CMHtest.formula <- function(formula, data = NULL, subset = NULL, na.action = NULL, ...) { m <- match.call(expand.dots = FALSE) edata <- eval(m$data, parent.frame()) fstr <- strsplit(paste(deparse(formula), collapse = ""), "~") vars <- strsplit(strsplit(gsub(" ", "", fstr[[1]][2]), "\\|")[[1]], "\\+") varnames <- vars[[1]] condnames <- if (length(vars) > 1) vars[[2]] else NULL dep <- gsub(" ", "", fstr[[1]][1]) if (!dep %in% c("","Freq")) { if (all(varnames == ".")) { varnames <- if (is.data.frame(data)) colnames(data) else names(dimnames(as.table(data))) varnames <- varnames[-which(varnames %in% dep)] } varnames <- c(varnames, dep) } if (inherits(edata, "ftable") || inherits(edata, "table") || length(dim(edata)) > 2) { condind <- NULL dat <- as.table(data) if(all(varnames != ".")) { ind <- match(varnames, names(dimnames(dat))) if (any(is.na(ind))) stop(paste("Can't find", paste(varnames[is.na(ind)], collapse=" / "), "in", deparse(substitute(data)))) if (!is.null(condnames)) { condind <- match(condnames, names(dimnames(dat))) if (any(is.na(condind))) stop(paste("Can't find", paste(condnames[is.na(condind)], collapse=" / "), "in", deparse(substitute(data)))) ind <- c(condind, ind) } dat <- margin.table(dat, ind) } CMHtest.default(dat, strata = if (is.null(condind)) NULL else match(condnames, names(dimnames(dat))), ...) } else { m <- m[c(1, match(c("formula", "data", "subset", "na.action"), names(m), 0))] m[[1]] <- as.name("xtabs") m$formula <- formula(paste(if("Freq" %in% colnames(data)) "Freq", "~", paste(c(varnames, condnames), collapse = "+"))) tab <- eval(m, parent.frame()) CMHtest.default(tab, ...) } } CMHtest.default <- function(x, strata = NULL, rscores=1:R, cscores=1:C, types=c("cor", "rmeans", "cmeans", "general"), overall=FALSE, details=overall, ...) { snames <- function(x, strata) { sn <- dimnames(x)[strata] dn <- names(sn) apply(expand.grid(sn), 1, function(x) paste(dn, x, sep=":", collapse = "|")) } ## check dimensions L <- length(d <- dim(x)) if(any(d < 2L)) stop("All table dimensions must be 2 or greater") if(L > 2L & is.null(strata)) strata <- 3L:L if(is.character(strata)) strata <- which(names(dimnames(x)) == strata) if(L - length(strata) != 2L) stop("All but 2 dimensions must be specified as strata.") ## rearrange table to put primary dimensions first x <- aperm(x, c(setdiff(1:L, strata), strata)) d <- dim(x) R <- d[1] C <- d[2] # handle strata if (!is.null(strata)) { sn <- snames(x, strata) res <- c(apply(x, strata, CMHtest2, rscores=rscores, cscores=cscores, types=types,details=details, ...)) # DONE: fix names if there are 2+ strata names(res) <- sn for (i in seq_along(res)) res[[i]]$stratum <- sn[i] # DONE: Calculate generalized CMH, controlling for strata if (overall) { if (!details) warning("Overall CMH tests not calculated because details=FALSE") else { resall <- CMHtest3(res, types=types) res$ALL <- resall } } return(res) } else CMHtest2(x, rscores=rscores, cscores=cscores, types=types,details=details, ...) } # handle two-way case, for a given stratum # DONE: now allow rscores/cscores == 'midrank' for midrank scores # DONE: allow rscores/cscores=NULL for unordered factors, where ordinal # scores don't make sense # DONE: modified to return all A matrices as a list # DONE: cmh() moved outside CMHtest2 <- function(x, stratum=NULL, rscores=1:R, cscores=1:C, types=c("cor", "rmeans", "cmeans", "general"), details=FALSE, ...) { # left kronecker product lkronecker <- function(x, y, make.dimnames=TRUE, ...) kronecker(y, x, make.dimnames=make.dimnames, ...) # midrank scores (modified ridits) based on row/column totals midrank <- function (n) { cs <- cumsum(n) (2*cs - n +1) / (2*(cs[length(cs)]+1)) } L <- length(d <- dim(x)) R <- d[1] C <- d[2] if (is.character(rscores) && rscores=="midrank") rscores <- midrank(rowSums(x)) if (is.character(cscores) && cscores=="midrank") cscores <- midrank(colSums(x)) nt <- sum(x) pr <- rowSums(x) / nt pc <- colSums(x) / nt m <- as.vector(nt * outer(pr,pc)) # expected values under independence n <- as.vector(x) # cell frequencies V1 <- (diag(pr) - pr %*% t(pr)) V2 <- (diag(pc) - pc %*% t(pc)) V <- (nt^2/(nt-1)) * lkronecker(V1, V2, make.dimnames=TRUE) if (length(types)==1 && types=="ALL") types <- c("general", "rmeans", "cmeans", "cor" ) types <- match.arg(types, several.ok=TRUE) # handle is.null(rscores) etc here if (is.null(rscores)) types <- setdiff(types, c("cmeans", "cor")) if (is.null(cscores)) types <- setdiff(types, c("rmeans", "cor")) table <- NULL Amats <- list() for (type in types) { if("cor" == type) { A <- lkronecker( t(rscores), t(cscores) ) df <- 1 table <- rbind(table, cmh(n, m, A, V, df)) Amats$cor <- A } else if("rmeans" == type) { A <- lkronecker( cbind(diag(R-1), rep(0, R-1)), t(cscores)) df <- R-1 table <- rbind(table, cmh(n, m, A, V, df)) Amats$rmeans <- A } else if("cmeans" == type) { A <- lkronecker( t(rscores), cbind(diag(C-1), rep(0, C-1))) df <- C-1 table <- rbind(table, cmh(n, m, A, V, df)) Amats$cmeans <- A } else if ("general" == type) { A <- lkronecker( cbind(diag(R-1), rep(0, R-1)), cbind(diag(C-1), rep(0, C-1))) df <- (R-1)*(C-1) table <- rbind(table, cmh(n, m, A, V, df)) Amats$general <- A } } colnames(table) <- c("Chisq", "Df", "Prob") rownames(table) <- types xnames <- names(dimnames(x)) result <- list(table=table, names=xnames, rscores=rscores, cscores=cscores, stratum=stratum ) if (details) result <- c(result, list(A=Amats, V=V, n=n, m=m)) class(result) <- "CMHtest" result } # do overall test, from a computed CMHtest list CMHtest3 <- function(object, types=c("cor", "rmeans", "cmeans", "general")) { nstrat <- length(object) # number of strata # extract components, each a list of nstrat terms n.list <- lapply(object, function(s) s$n) m.list <- lapply(object, function(s) s$m) V.list <- lapply(object, function(s) s$V) A.list <- lapply(object, function(s) s$A) nt <- sapply(lapply(object, function(s) s$n), sum) Df <- object[[1]]$table[,"Df"] if (length(types)==1 && types=="ALL") types <- c("general", "rmeans", "cmeans", "cor" ) types <- match.arg(types, several.ok=TRUE) table <- list() for (type in types) { AVA <- 0 Anm <- 0 for (k in 1:nstrat) { A <- A.list[[k]][[type]] V <- V.list[[k]] n <- n.list[[k]] m <- m.list[[k]] AVA <- AVA + A %*% V %*% t(A) Anm <- Anm + A %*% (n-m) } Q <- t(Anm) %*% solve(AVA) %*% Anm df <- Df[type] pvalue <- pchisq(Q, df, lower.tail=FALSE) table <- rbind(table, c(Q, df, pvalue)) } rownames(table) <- types colnames(table) <- c("Chisq", "Df", "Prob") xnames <- object[[1]]$names result=list(table=table, names=xnames, stratum="ALL") class(result) <- "CMHtest" result } # basic CMH calculation cmh <- function(n, m,A, V, df) { AVA <- A %*% V %*% t(A) Q <- t(n-m) %*% t(A) %*% solve(AVA) %*% A %*% (n-m) pvalue <- pchisq(Q, df, lower.tail=FALSE) c(Q, df, pvalue) } # DONE: incorporate stratum name in the heading # TODO: handle the printing of pvalues better print.CMHtest <- function(x, digits = max(getOption("digits") - 2, 3), ...) { heading <- "Cochran-Mantel-Haenszel Statistics" if (!is.null(x$names)) heading <- paste(heading, "for", paste(x$names, collapse=" by ")) if (!is.null(x$stratum)) heading <- paste(heading, ifelse(x$stratum=="ALL", "\n\tOverall tests, controlling for all strata", paste("\n\tin stratum", x$stratum))) # TODO: determine score types (integer, midrank) for heading df <- x$table types <- rownames(df) labels <- list(cor="Nonzero correlation", rmeans="Row mean scores differ", cmeans="Col mean scores differ", general="General association") labels <- unlist(labels[types]) # select the labels for the types df <- data.frame("AltHypothesis"=as.character(labels), df, stringsAsFactors=FALSE) cat(heading,"\n\n") print(df, digits=digits, ...) cat("\n") invisible(x) } vcdExtra/NEWS.md0000644000176200001440000003156614470740445013154 0ustar liggesusers## Version 0.8-5 (2023-08-19) - Fix CRAN nit re vcdExtra-package.Rd - Fix moved URL - http: -> https: - Fix xrefs in vignettes - More spellcheck - Fixed one more NOTE re: AirCrash.Rd for a URL ## Version 0.8-4 (2023-04-12) - Imports: tidyr must depend on (>= 1.3.0) for one use in a vignette. - Enhanced the vignette, "Creating and manipulating frequency tables", `vignettes/creating.Rmd` - Created a new vignette, "Mobility tables", with extensive examples of models and graphs for square mobility tables. ## Version 0.8-3 (2023-02-16) - All datasets now classified with \concept{} tags by method of analysis. These can be found using `help.search(pattern, field="concept")` - Added a vignette, `datasets.Rmd` showing all datasets classified by method tags. Links only work in the `pkgdown` site. - Added `Asbestos` data ## Version 0.8-2 (2023-01-19) This is a major enhancement release of the `vcdExtra` package, focusing on documentation and examples. - added `HouseTasks` data set, illustrating permutation of row / col variables - package now depends on R (>= 3.5.0) per CRAN nit - add `Suggests: seriation` to illustrate CA re-ordering of rows/cols based on correspondence analysis - all .Rd files reformatted and many examples extended. - the vignette `mosaic.Rmd` on mosaic displays has been extensively revised with examples for square tables and permutation of row / column variables. - begin to classify datasets with `\concept{}` tags - Added a new `demo-housing.Rmd` vignette, using content from `demo/housing.R`. ## Version 0.8-1 (2022-04-22) - rename vignettes to be in order ## Version 0.8-0 (2022-04-20) - Fixed warning from `expand.dft()` re type.convert - Old `.Rnw` vignettes converted to `.Rmd` - Fixed two problems detected in the initial submission. ## Version 0.7-6 (2022-02-12) - Fix some issues with `CMHtest()` `types` argument #PR11 [Thx: ShuguangSun, Matt Kumar] - Fix some Winbuilder URL nits; extensive spell checking ## Version 0.7-5 (2020-12-25) - Fix problem re use of rgl in `mosaic3d()` examples ## Version 0.7-4 (2019-09-25) - Fix `datasets()` to work with packages not using LazyData #PR7 [Thx: Duncan Murdoch] - Bump package Version ## Version 0.7-3 (2018-06-04) - fix Version number for gnm in `DESCRIPTION` - fix `expand.dft` and `expand.table` to work with tibbles [Thx: Duncan Murdoch] - vcdExtra gets a hex sticker ## Version 0.7-1 (2017-09-28) - Fixed buglet in `expand.dft()` when table is 1-dim [Thx: Long Qu] - Added `zero.test()`, a simple score test for zero inflation - Development has moved to https://github.com/friendly/vcdExtra ## Version 0.7-0 (2016-01-27) - Added Glass data - introduce links and references to DDAR - added `mcaplot()` to plot MCA solutions in DDAR style - added `update.xtabs()` method - updated vignette to refer to DDAR ## Version 0.6-12 (2015-10-06) - Added Burt data - Fixed examples/vignette for ggplot_2.0.0 ## Version 0.6-11 (2015-09-14) - bump pkg Version for CRAN ## Version 0.6-10 (2015-07-27) - Added HospVisits data ## Version 0.6-9 (2015-06-11) - Added Mice data - Removed uses of `summarise()` and Summarise() from demos and examples, in preparation for deprecating them. - `summarise()` is now deprecated - Now use `importsFrom()` for all functions from recommended packages ## Version 0.6-8 (2015-04-15) - Fixed Title: and Description: for CRAN ## Version 0.6-7 (2015-04-02) - Removed loddsratio (now in vcd) (rev 252) - Removed print.Kappa (now in vcd) (rev 253) - Fixed bug in `CMHtest()`: rmeans and cmeans labels were reversed (rev 254) - Fixed error in Fungicide.Rd, now that we require vcd_1.3-3 (rev 254) - Added WorkerSat data - 2 x 2 x 2 - Added AirCrash data ## Version 0.6-6 (2015-02-04) - Minor doc changes ## Version 0.6-5 (2014-11-07) - Added Cormorants data (fixed to latest) - Added `LRstats()`, to replace Summarise. Older summarise() and Summarise() will eventually be deprecated. - Now Suggests: AER for NMES1988 data - `collapse.table()` now works with array objects ## Version 0.6-3 (2014-10-27) - Fixed bug in logLik.loglm when the data contain zero frequencies (rev 228) - Made Summarise generic, adding a method for "glmlist" objects (rev 230) - Added a coef() method for "glmlist" objects (thx: John Fox) - Added a Summarise.loglmlist method (rev 232) - Replaced all documentation uses of summarise() with Summarise() (rev 233) - Added `cutfac()`, a convenience wrapper for cut() (rev 234) - Now use `rgl::` in all mosaic3d functions ## Version 0.6-2 (2014-06-30) - added Summarise, to replace summarise - Added HairEyePlace data - 4 x 5 x 2, Caithness and Aberdeen hair/eye color (rev 223) - Added PhdPubs data from Long (1997) - publications by PhD candidates - Allow Summarise to work with models w/o a deviance() function - Fixed bug in Summarise wrt degrees of freedom ## Version 0.6-1 (2014-04-14) - Added ICU data - Added Toxaemia data - multivariate response contingency table (rev 209) - Added Vietnam data - 2 x 5 x 4 frequency table (rev 210) - Added logLik.loglm to allow use of AIC() and BIC() for loglm models (rev 212) - Fixed loddsratio.Rd to work with revised vcd::CoalMiners data (rev 212) - Added blogits for bivariate binary response data - Added Vote1980 data (rev 214) ## Version 0.6-0 (2014-03-07) - Removed Authors: in DESCRIPTION, bumped Version ## Version 0.5-12 (2013-12-16) - Added ShakeWords data set- word frequency counts from Shakespeare (rev 188) - Added Geissler data-- all family sizes for Saxony sex composition data (rev 190) - Added logseries functions for the logarithmic series distribution (rev 191) - Added Depends data -- dependencies of r packages (rev 192) - Fixed buglet in seq_loglm() not respecting arrays - Added seq_mosaic() (rev 194) - Added CyclingDeaths data (rev 196) - mosaic3d() gets an interpolate= option to control shading levels (rev 197) - Fixed bug in seq_mosaic, thx to David Meyer (rev 200) - Fixed bug in seq_loglm() when marginals != 1:nf ## Version 0.5-11 (2013-07-01) - Added mosaic.glmlist to plot mosaics (or other strucplots) for some or all models in a glmlist (rev 169) - Added loglin-utilities.R, containing a suite of functions to provide a more conceptual way to specify loglinear models by type ('joint', 'conditional', 'mutual', 'markov', 'saturated') (rev 171) - Added mosaic.loglmlist, similar to mosaic.glmlist for models fit using MASS::loglm (rev 173) - Both mosaic.glmlist and mosaic.glmlist get an explicit panel=argument; both get some more sensible default default arguments (rev 175) - Added seq_loglm to fit sequential loglm models to marginal subtables, giving a loglmlist result (rev 176) - Added Accident data (rev 178); fleshed out Accident examples (rev 180) - Fixed use of ::: for R 3.0.1 (rev 179) - Fixed various problems related to use of MASS:loglm (rev 181-183) - Added Titanicp to datasets (rev 185) ## Version 0.5-8 (2013-03-06) - Revised vcd-tutorial showing some examples of plyr; added a section on RC models - Added Donner data with example of ggplot2 plot for a binomial glm() - Added vcd-tutorial section using ggplot2 for Donner data - Enhanced datasets() to provide a maxTitle argument (rev 153) - Added doubledecker plots to Dyke.Rd (rev 156) - Added Draft1970table and Draft1970 data sets (rev 158) - Added example of doubledecker plots to vcd-tutorial vignette (rev 164) ## Version 0.5-7 (2013-03-01) - Completed CMHtest methods, adding overall tests across strata in a general way - CMHtest now gets an S3 generic with a formula interface - print Kappa gets digits= and CI= arguments ## Version 0.5-6 (2012-11-30) - Added Hosmer Lemeshow and HLtest methods, including plotting via vcd::rootogram() - Added CMHtest for general Cochran-Mantel-Haenszel tests - Revised vcd-tutorial vignette, adding a section on CMH tests; removed dependence on Z.cls ## Version 0.5-3 (2012-03-07) - Added Mammograms data (4x4, ordered factors, agreement) - Extended mosaic.glm examples - Added Alligator data (4x2x2x5, in frequency form) - Added DaytonSurvey data (5-way, 2x2x2x2x2 in frequency form) - Extended vcd-tutorial vignette with a section on collapsing over factors - Removed aperm.* now that aperm.table is in base R ## Version 0.5-2 (2010-11-28) - Added loddsratio and related methods for log odds ratios, generalizing vcd::oddsratio from 2 x 2 (x strata) tables to R x C (x strata) tables - Added as.matrix.loddsratio, as.array.loddsratio methods - Added some simple plot examples to example(loddsratio), anticipating a plot method - Added data(Fungicide), a 2 x 2 x 2 x 2 table - Renamed summarize() and related methods to summarise() to avoid conflict with plyr. - Addition to vcd-tutorial vignette on use of aperm() with table objects - Updated demo(yamaguchi-xie) to correct row/col nomenclature and add plot of BIC - Added aperm() S3 generic to handle table objects - Moved tv.dat to inst/doc/extdata to avoid warnings in R 2.12+ ## Version 0.5-1 (2010-09-17) - Added Yamaguchi87 data (5x5x3 three-way mobility table in frequency form) - Added demo(yamaguchi-xie) illustrating fitting and visualization of the models of homogeneous and log multiplicative layer effects fit in Xie (1992, Table 1) - Added BIC to summarize() and friends - Added Hauser79 data (two-way mobility table), plus some examples from Powers and Xie (2008) - Added Crossings() to construct interactions for Goodman 1972 crossings model. - Added datasets() to list datasets in packages - Extended description and examples of Kway() - Added meanResiduals() and extended mosaic.glm() (Heather Turner) - summarize() gets a sortby argument for glmlist and loglmlist objects ## Version 0.5-0 (2010-04-28) - Fleshed out mosaic3d, allowing display of observed or expected, internally calculated or externally supplied residuals, specifying the initial 3D shape, etc. This completes the 'top-level' work on mosaic3d(), borrowing code from vcd::strucplot. - Added initial handling for zero cells in the table to mosaic3d(). - Added center3d() for finding the mean coordinates of shape3d objects. - Added demo(mosaic-hec) comparing 2D and 3D mosaics for HairEyeColor data - Gave mosaic3d a label_edge argument, allowing labels for dimensions at minima or maxima - Made mosaic3d object oriented, giving it a loglm method - Added Kway(), fitting all 0-way, 1-way, 2-way, ... k-way models in a glm ## Version 0.4-3 (2010-03-25) - Added demo(mosaic3d-demo), a proof-of-concept for doing 3D mosaic displays - Added mosaic3d(), an initial basic Version. - Factored out split3d() and gave it S3 methods ## Version 0.4-2 (2010-03-09) - Revised vignette("vcd-tutorial"): added some hints for mosaic(), corrected stuff regarding prior limitations of mosaic.glm() - Added demo(Wong3-1): three-way table, with models of conditional association - Added Suggest: effects for effects plots of glm(), multinom() and polr() models - Added demo(housing): visualize models fit in example(housing, package="MASS") using mosaic() and effect plots. - Updated demo(Wong2-3): added model comparison plots, glmlist processing - Added Suggest: VGAM - Extended package description in vcdExtra-package.Rd - Added glmlist() to facilitate processing, extraction, plotting, etc. of a collection of glm() models - Added loglmlist(), for collections of loglm() objects - Added summarize methods for glm, glmlist, loglm and loglmlist objects ## Version 0.4-1 (2010-02-21) - Added example(Caesar), illustrating structural zeros - Re-named Heckman variables to e1971, ..., e1968 (errors from loglm); began example(Heckman) - Added example(Detergent), example(Dyke) - Fixed bug with mosaic.glm when data in global environment (Heather Turner) - Added sieve.glm and assoc.glm methods (MF) - Added modFit.glm and modFit.loglm - Added demo(Wong2-3) ## Version 0.4-0 (2010-02-23) - Added new datasets: data/{Abortion, Bartlett, Caesar, Cancer, Detergent, Dyke, Gilby, Heart, Heckman, Hoyt, Mobility} from mosdata.sas via md2r.sas converter. - Fixed small documentation warnings - Switched inst/CHANGES to NEWS ## Version 0.3-6 (2009-04-21) - Added Depends: gnm - Added demo/{mental-glm, ucb-glm, vision-quasi} - Added demo/{occStatus,yaish-unidiff} - Initial release to CRAN ## Version 0.3-5 (2009-3-6) - mosaic.glm now uses object$data if available ## Version 0.3-4 (2009-2-11) - Fixed bugs in mosaic.glm, mosaic.gnm in models with terms like Diag(dest, origin) that get included in x$xlevels ## Version 0.3-3 (2009-2-10) - Fixed bugs in mosaic.gnm - Fixed print.GKgamma - Added example of GKgamma to vcd-tutorial ## Version 0.3-2 (2009-2-8) - Added more examples to mosaic.glm.Rd ## Version 0.3-0 (2009-2-6) - Fixed bugs in mosaic.glm and mosaic.gnm ## Version 0.2 (2009-2-1) - Added vcd-tutorial vignette ## Version 0.1 (2009-1-26) - Initial Version on R-Forge. vcdExtra/MD50000644000176200001440000003160614471023412012346 0ustar liggesusers407a6f14ca269870523f697fdaeda03b *DESCRIPTION a7114a995994640c28ccf1d7dfefac66 *NAMESPACE c04cba0ca90548650d17ad544a5192d0 *NEWS.md 0b9242b2515cf6dad28203c5835b02ce *R/CMHtest.R 7bf2895634e1b0faca9385bd81ccee37 *R/Crossings.R 1963eb3fb347cf99287b4794b4fd57dc *R/GKgamma.R a7a0bf2f6825917c8e31f99c7b25d265 *R/HLtest.R 10217e3a0582ec4cddba2bfc130a8fad *R/Kway.R c8079ae2c1058550fa5b5fda9624dc8b *R/LRstats.R 8e444f0aaaea055b108a478f8ddf2def *R/Summarise.R 650530a623dec352a740a6626baddac7 *R/blogits.R d3838c45cc40201160f85f0eb732d3c5 *R/collapse.table.R b0483049482397dff2bb9ca2ef123725 *R/cutfac.R f35d0b821039af9613f556357d4f2c82 *R/datasets.R e8e69896994a75825789233f2fa81050 *R/expand.dft.R c1cf46896f5087dccb67fe9e27c7d200 *R/glmlist.R ea1051fba28687e9b147a8912baf9802 *R/logLik.loglm.R 479eb4388f3e1bdf34826374604911a9 *R/loglin-utilities.R 67c413ec19c991bf083e0459eb3ff71f *R/logseries.R c155ada0d20a6e24dd190bcdd56b27de *R/mcaplot.R 1384d1ce36ba17ce25b00603306a258c *R/modFit.R 014cf5c8db1d36327db64bc35c5a7ed3 *R/mosaic.glm.R 5ce7705b0b6911d86ed0ee6547fc7218 *R/mosaic.glmlist.R de9b6b1b1b831283a118c2e5e9e72932 *R/mosaic3d.R 1d5324d4c8b0e2cb572aa9e61ddb7148 *R/print.Kappa.R 1925c7203e978e7b778c25a5b2afa0c1 *R/seq_loglm.R 1987e35844f89995f5f071e3c219e848 *R/seq_mosaic.R ebdbee3b568920673218c4610b858562 *R/split3d.R 559c21fac93caf6fe54e3e48a6265c17 *R/summarise-old.R e782855ba6eaecbc19b51e1c4b8d69ba *R/update.xtabs.R c2c481edc3058a26cd2eb178510decaa *R/vcdExtra-deprecated.R 5fe4e1529aa78269afa34eac8595e84c *R/zero.test.R cdbdb705c017565f8d24cdc91a0e6ec8 *build/partial.rdb 537938d26daf8f4461bed7b6f02c0656 *build/vignette.rds 6d2a9f9d45fe43a9627455addb4561bc *data/Abortion.rda ba2b62141b5cbaf77c2b06698553e983 *data/Accident.RData d295c350b177b842ccee2e5b164a58f4 *data/AirCrash.RData d775f487470c49d64119693325aa910c *data/Alligator.RData 88b64cd3bed82c2702918e9a17384013 *data/Asbestos.RData 3bbe75f0bcf3a92f29625be117d1c296 *data/Bartlett.rda 5fc3e4c48e1397773e90780afecbb6ca *data/Burt.RData 0d56582a4b5af7562b3116810d53f961 *data/Caesar.rda b302d59a1a8d1f53d639375b7caf2919 *data/Cancer.rda 3813cf7ec7f60ce7fa8e521d6ba63ba2 *data/Cormorants.RData 3b9f36583cfb6342dcc4d4dcd3998004 *data/CyclingDeaths.RData 1e23f322f74ea63465f67781fb6b2a4d *data/DaytonSurvey.RData eabd8206f271bdb144bb53c0df6a2f92 *data/Depends.RData fd6545bf5e14066aab8b349b62b11fbe *data/Detergent.rda 008242aa4f2235a9c015b7423429ded1 *data/Donner.RData c66f21ad5716aee11c585e033e95fd7f *data/Draft1970.RData 1daeff3b42be51c00f8ad0b77fd83e21 *data/Draft1970table.RData 2d37442d326c44677d93a931a277e212 *data/Dyke.rda 7f3834a840e7060e63a12dfaf43307a6 *data/Fungicide.rda f5aa7f75d2342c90fd71ef90961bdbde *data/GSS.rda f1d55ca4db128ee7bf40f27e7e33daed *data/Geissler.RData 0562850df808df96f4d889575e340c18 *data/Gilby.rda 8bee58957caa50dfed1e572ff2f06ce6 *data/Glass.RData 59e31226e1a72831b243a8b1d9ac2309 *data/HairEyePlace.RData 462a843912c7fa9d65e80216c1b9bf21 *data/Hauser79.RData 7286137b66891022c7040ee165106cd6 *data/Heart.rda 962147756d43e545ce4195c0a008b3c2 *data/Heckman.rda 395c246ca156be6ff2f3c4ddd85e6e02 *data/HospVisits.RData dbf4db64fff385dbe501e3a1f584f313 *data/HouseTasks.RData 0be197a140533b84d2aa4e9cbdf53847 *data/Hoyt.rda 3ecbf5b367c5a5dc92de5ca67399a42d *data/ICU.RData 6760c1bde6b7c2f02ee7092bb1afde50 *data/JobSat.rda a9598a5882e39e718e8a011872a99bf0 *data/Mammograms.RData c6c47e0e0aaa247eb5deb17bc8dabc1a *data/Mental.rda 8e4b0f4f2fafb81451d25679de372f12 *data/Mice.RData 5792d4777f84482b21e8924c56f293bc *data/Mobility.rda 245b886dcb1ae47cb60a9a77da485b01 *data/PhdPubs.RData dbe73c96e2d34286a5f527db6e197ddc *data/ShakeWords.RData 6d0ea682819c64ba8bf93637a3252d31 *data/TV.rda 2f9862582e0cea8b0920f1f587829d80 *data/Titanicp.rda 4cd539822e2217dedcc6878c9d571831 *data/Toxaemia.RData a80d9998ac4582bc3bd52e4c426e3c2a *data/Vietnam.RData 8c762b9361d60b4b2fa6ea53e7a47a6a *data/Vietnam.rda b7392e8ea7f5a6d921090b4eb70aaa3a *data/Vote1980.RData 2d049ddb7fcd57fcdcbd381d07bd4e01 *data/WorkerSat.RData 67a9eda069524ea4e9156ba5f79eb05b *data/Yamaguchi87.RData 45bc99442529ca940cb36f44333030f8 *demo/00Index b4a763fc0987f60fd6102f1026d483c1 *demo/Wong2-3.R 55e972f403ae9de662d7e9b6891bd2c7 *demo/Wong3-1.R e9c622501b47f8dd4d942feea0110213 *demo/housing.R 02eba55e051f4afcbdd923d80dd5f7b0 *demo/mental-glm.R f5c6bad4251ba4fc2879ac10f7755bd9 *demo/mosaic3d-demo.R ba4d09ac1c0d1f3708df0885d8b66dfb *demo/mosaic3d-hec.R c838bce2fa0536380a5991b22b3bfeb2 *demo/occStatus.R b8e990080da869639effe9c5be96990f *demo/ucb-glm.R be799fa72e29b45c9c5117ad1396ab88 *demo/vision-quasi.R ee93f55378d740b1ab23b8a298739aaa *demo/yaish-unidiff.R ffd20b4a85ca58f1dc3d8d1c3f4efd92 *demo/yamaguchi-xie.R 90f088eaa9934a0dbf5eeae10c41f23a *inst/WORDLIST 4a185db2df6c93b0dfd1410c1c83fa13 *inst/doc/continuous.R 3472b051f433a91e3cd5c7d3fddf65aa *inst/doc/continuous.Rmd 3e4ab0a3177a898dc8952873fffb6aee *inst/doc/continuous.html bdce1eff250a00667a3502654391712b *inst/doc/creating.R a697a1200efabdc7bb42bb5364ddb8b4 *inst/doc/creating.Rmd 173cf509df4255bff54569cda754c577 *inst/doc/creating.html 57aa35ecfb9a96032e03caf97e54d4d6 *inst/doc/datasets.R 70e08c54cc359dd9b8c99f0e61791343 *inst/doc/datasets.Rmd 9a22eeaa9f9cadf929909e34208a928b *inst/doc/datasets.html ef01767bc459290252033e906e077714 *inst/doc/demo-housing.R e6d634581d658349081fbedfa6ea6e6f *inst/doc/demo-housing.Rmd 865281cbe2e3d964b5ca9f2c513443ef *inst/doc/demo-housing.html a34943958dfd59fc717b2d9438080421 *inst/doc/loglinear.R de0fbc326295f69e661f0515dc6c3c07 *inst/doc/loglinear.Rmd d1e22e9ac1c9ba339d40b0774e69e3f1 *inst/doc/loglinear.html f3c4fa247f054927611de0129953e8f2 *inst/doc/mobility.R a2ffb4acb54840407b9fa6793ac8f61d *inst/doc/mobility.Rmd d1ef55d3684672927102c137fd4b1b32 *inst/doc/mobility.html bbd8787e3d394fe6de6137b38cfc346b *inst/doc/mosaics.R 3f98504fd0dcdbb8d42561cc51e6da24 *inst/doc/mosaics.Rmd f8cf03f80e4f3a159ea2e713fc43753d *inst/doc/mosaics.html 378650b6daf80b878ecddc99fd434061 *inst/doc/tests.R e5d48bcddb50b42a0536810fbf3efb04 *inst/doc/tests.Rmd 540674e387f2915d255d611451b0d665 *inst/doc/tests.html 3321d04d1eea258c261c59f337c21d1a *inst/extdata/tv.dat ec7a5d9a2a97072b47418e35cdf59534 *inst/extdata/vcdExtra-datasets.xlsx adac397fc8e8b5ce2ab0c8f2d6536ee2 *man/Abortion.Rd 2225aa1ed764fd18c8752a5c1ff8a009 *man/Accident.Rd 5d6fcddf97911c8d89ff56a09b07794d *man/AirCrash.Rd fa31ad01d354dd15532df15c9b981aaa *man/Alligator.Rd 41918fc97f0ccf66168d7eefc7c0dfed *man/Asbestos.Rd 8d4dd491b8776d3f87058c8efa56dc72 *man/Bartlett.Rd 1415b7c9266e5869fe8997e2935a518a *man/Burt.Rd 573cf3ccfe53769e3890a15f3a67b2ed *man/CMHtest.Rd d01bc7b62fad9120e942fc7784d00b24 *man/Caesar.Rd e2328c1a0e0e8bfe01f368402650c767 *man/Cancer.Rd b80d0261320f8a9c05ca5f931f3104e4 *man/Cormorants.Rd 550265c33bdd8209ec0aa49e963ee02a *man/Crossings.Rd b53f0a4316b9b3345356e86295e46302 *man/CyclingDeaths.Rd 2c36c943e50e38a79a2b716bb359a3bc *man/DaytonSurvey.Rd fd6893cb476d7cfbcb7b9bd3cd10fe0c *man/Depends.Rd 0e917e2dade904a19b5ee9ba076eb200 *man/Detergent.Rd 94afdc50f9da14c9da355c6b39214449 *man/Donner.Rd ab6bf23ebaed0ec854d9e72a1c4ebefa *man/Draft1970.Rd 280dab5e23bbfd88498f9df9059a0488 *man/Draft1970table.Rd 5d8c7a37653b3eba29e376d4391b4188 *man/Dyke.Rd ae0f73e6f15e4313b3f922243e7baefc *man/Fungicide.Rd bc1760e7dba509ec09b76865fabfff43 *man/GKgamma.Rd d558fb267929ab412b3b047fbbcce7ff *man/GSS.Rd e05a981ec1a2f762c49f4e8af17c4441 *man/Geissler.Rd b1f492d174b3a16da74c12149bf7e71c *man/Gilby.Rd 8a71ad6f272e408182645c706cff77f1 *man/Glass.Rd 9ca25cfa3632cfabe4f6a55cbc548636 *man/HLtest.Rd 2b67efd4c7ad31ca4bef9f89ccf9380e *man/HairEyePlace.Rd 024581c9c4d38ae21478d04a5e237d49 *man/Hauser79.Rd b5496d6812856284a8a7106b1948ee80 *man/Heart.Rd 4287e95a48ec5076844e0b2135af6985 *man/Heckman.Rd 03987671be920cb82b8d015fb9c719fe *man/HospVisits.Rd 4edcef134014fb672de7a0ee76ca9900 *man/HouseTasks.Rd b3da9c0b217e36bce6e96fc46169feef *man/Hoyt.Rd b5fa3b5ffa9365933e6b34b9c400c829 *man/ICU.Rd 641165246b367e17d642a311b57fc64c *man/JobSat.Rd c100e3b5e9816c5402a40e1924302e1b *man/Kway.Rd 9c79c35060171b0ea5d2d7960b35edd0 *man/LRstats.Rd 563409527d1fc81dcd6981f784dd8d37 *man/Mammograms.Rd 4d2257e126753ea18b26d87fccfc128d *man/Mental.Rd b31f1947f52689fd922fab997d96a619 *man/Mice.Rd 88a49573343d3b4e41164ed0a8e2b480 *man/Mobility.Rd ae8ca98db1eb35dcd7de772a5439a36b *man/PhdPubs.Rd 83e8c3a042ba4b98f9bd3766d1fe41d3 *man/ShakeWords.Rd 19ff4c40b4f445d48021fa0dcf0c59a1 *man/Summarise.Rd 6d137df093afba6bfed1e0b0943e8a0f *man/TV.Rd 3a53b4ec21077d21fe4f22185707128b *man/Titanicp.Rd cbd9e57586630891669b2669b9f136bf *man/Toxaemia.Rd 410c77bf7fd4515164c7ef249803f860 *man/Vietnam.Rd a9a35d094292ba10fc2ff1093a682b26 *man/Vote1980.Rd a01cc8708ade3725711071cff3ffd7ae *man/WorkerSat.Rd 0986f1f13a900481d8978b5e81fa1e07 *man/Yamaguchi87.Rd 990cc169d1768db3063337be21bf7b9f *man/blogits.Rd d93c47d4017fbef35853cedab1bfbc2f *man/collapse.table.Rd 1fc332c3e4840e563f49a592f3bb094a *man/cutfac.Rd 2bf96c50365c17b38613b42e285e0175 *man/datasets.Rd 3ab4a9fa13b62f55c6270dda77f1e2a4 *man/expand.dft.Rd 44742032834b4d907dd1bd4b91d84e39 *man/figures/logo.png cad88d6f034566cd4d933517c9c99162 *man/glmlist.Rd a6aaef25b0ce16f0b7e2cf998741510f *man/logLik.loglm.Rd 72447d319ac7398e41dd9848f63602ed *man/loglin-utilities.Rd 49427b5d2c9a599c262ff1f7334f3893 *man/logseries.Rd c2deadcebb861766e23ff6736daa8bdd *man/mcaplot.Rd b63a8cc7c4636fe0c2f8c929f622044d *man/modFit.Rd 872b7f5c4b651ccf0c03e681b3a63069 *man/mosaic.glm.Rd 20a48ce95e1ef1dbeb2e3bb4a104f581 *man/mosaic.glmlist.Rd bdea8c07c8cc655eaa8f137bf008f086 *man/mosaic3d.Rd 6af328bb0bace1bb2164ab101dd077d3 *man/print.Kappa.Rd ea3f8682e915cf0ce39af0338adbb9ff *man/seq_loglm.Rd b4aa5c3c3b217c34370173982ef8705d *man/seq_mosaic.Rd cec84e260d0b884b8c27073e224a998f *man/split3d.Rd ad144db982aea894107e281d6eaf6774 *man/update.xtabs.Rd 86a2288b811a6468dfa0b3ea39bca4ef *man/vcdExtra-deprecated.Rd c5054831cd9cb838ba1072c72400a35a *man/vcdExtra-package.Rd f676ea7d04c59430d4f0cd8b59861362 *man/zero.test.Rd ec4a39a5e4df18cd2068c89389c210b3 *vignettes/apa.csl 3472b051f433a91e3cd5c7d3fddf65aa *vignettes/continuous.Rmd a697a1200efabdc7bb42bb5364ddb8b4 *vignettes/creating.Rmd 70e08c54cc359dd9b8c99f0e61791343 *vignettes/datasets.Rmd e6d634581d658349081fbedfa6ea6e6f *vignettes/demo-housing.Rmd 340b1347b79e58e4d0e4bb19bd39fea4 *vignettes/fig/demo-housing-mosaic-glm0a-1.png 735da94bc2ec0b8c88315aeddee44c3f *vignettes/fig/demo-housing-mosaic-glm0b-1.png adbec2aa123a5b599334c193a57da5da *vignettes/fig/demo-housing-mosaic-glm1-1.png 02781fd04fdfdcfbee798f760552daba *vignettes/fig/hauser-model-plot.png c65cfa3f0ec5070af8ef141c6f6d1f93 *vignettes/fig/mobility-UAdiag-mosaic-1.png ec958e95c07ce9204d8d39541cf17ab2 *vignettes/fig/mobility-indep-1.png 514146c21a9ffd197cfefcdbf09cff53 *vignettes/fig/mobility-mosaic1-1.png ecd32600d6338a21058bab5769757f66 *vignettes/fig/mobility-mosaicplot-1.png 41f42b7d098fdf0b0cf91b6848c35327 *vignettes/fig/mobility-qsymm-1.png ccb2ef4b5ddb856d34172b05b8d9a002 *vignettes/fig/mobility-qsymm-mosaic-1.png 0f1581748d78b2d350b2f8325ade53db *vignettes/fig/mobility-quasi-1.png c20cdc93c74092076cca3bda8c3376df *vignettes/fig/mobility-topo-mosaic-1.png 3855e9889524e3809d76171d50e9e669 *vignettes/fig/tut01-Arthritis-1.png 030cf022b2e00b51cc103536fbc219db *vignettes/fig/tut01-tv-mosaic-1.png 2ee1f053a33f7b09ef55cc7b65f4b4d3 *vignettes/fig/tut01-tv-mosaic1-1.png 29e446617abc02b4ba96402239151ce7 *vignettes/fig/tut02-agreesex-1.png 7c3fc679529278df3e0d36427a1394ce *vignettes/fig/tut02-ca-haireye-1.png 4d4c9c94a8aa73f40dceda4986a05a88 *vignettes/fig/tut02-doubledecker-1.png 740b9ce05c22c77e4b498d0607730f8a *vignettes/fig/tut02-fourfold1-1.png 3e8e1a77eabbb8933d590702dbd2497a *vignettes/fig/tut02-oddsratio-1.png d633f7c10db8bbb271ce70099083a8b7 *vignettes/fig/tut04-Arthritis1-1.png 5e6315c38768edc64856aa1a0a48b289 *vignettes/fig/tut04-Arthritis2-1.png 404528431d825e77f8ffc163384608e8 *vignettes/fig/tut04-glass-mosaic1-1.png 3274d07e94edbdf2a86e4e74bf8ae19e *vignettes/fig/tut04-glass-mosaic2-1.png ffad6ec179c61c4161c8f37f4b246bdd *vignettes/fig/tut04-glass-qsymm-1.png 261828aba9a7f67303c62b0a6d643f43 *vignettes/fig/tut04-glass-quasi-1.png ca7b3661bcb57d52b9a7a56e808769c7 *vignettes/fig/tut04-housetasks-ca-plot-1.png bd4b491c318184d7becb6c937bdc96f8 *vignettes/fig/tut04-housetasks-mos1-1.png afc8d2ee995a63d8c11c068dd8ebf952 *vignettes/fig/tut04-housetasks-mos2-1.png 8d10199a2bda5959eef26f4572dc0c32 *vignettes/fig/tut05-cdplot-1.png 285f4c1eebacb7968cb7a04ff0c8e21f *vignettes/fig/tut05-cdplot1-1.png 143329a3da3276224e548b8b2140bab5 *vignettes/fig/tut05-donner3a-1.png 289c60cc0687f45cc0ae55f564c88ebd *vignettes/fig/tut05-donner3a-2.png f5606661faf956dfebb0cd4dfe1b21d0 *vignettes/fig/tut05-spine1-1.png d8d25c55bbc4d5bc0128541f655ba003 *vignettes/fig/tut05-spine1-2.png de0fbc326295f69e661f0515dc6c3c07 *vignettes/loglinear.Rmd a2ffb4acb54840407b9fa6793ac8f61d *vignettes/mobility.Rmd 3f98504fd0dcdbb8d42561cc51e6da24 *vignettes/mosaics.Rmd e5d48bcddb50b42a0536810fbf3efb04 *vignettes/tests.Rmd 0beba0c403ffbbd1a0844d4d33ac7066 *vignettes/vcd.bib 32617c0b580e5c7316e86fa1a34d4b89 *vignettes/vcdExtra.bib 3983de84393d568939fb2b73f5d99425 *vignettes/vignettes.bib vcdExtra/inst/0000755000176200001440000000000014470742320013012 5ustar liggesusersvcdExtra/inst/doc/0000755000176200001440000000000014470742320013557 5ustar liggesusersvcdExtra/inst/doc/creating.R0000644000176200001440000002511414470742310015500 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut01-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Old Sweave options # \SweaveOpts{engine=R,eps=TRUE,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{engine=R,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{prefix.string=fig/vcd-tut,eps=FALSE} # \SweaveOpts{keep.source=TRUE} # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ## ---- case-form--------------------------------------------------------------- names(Arthritis) # show the variables str(Arthritis) # show the structure head(Arthritis,5) # first 5 observations, same as Arthritis[1:5,] ## ---- frequency-form---------------------------------------------------------- # Agresti (2002), table 3.11, p. 106 GSS <- data.frame( expand.grid(sex = c("female", "male"), party = c("dem", "indep", "rep")), count = c(279,165,73,47,225,191)) GSS names(GSS) str(GSS) sum(GSS$count) ## ---- table-form1------------------------------------------------------------- str(HairEyeColor) # show the structure sum(HairEyeColor) # number of cases sapply(dimnames(HairEyeColor), length) # table dimension sizes ## ---- table-form2------------------------------------------------------------- # A 4 x 4 table Agresti (2002, Table 2.8, p. 57) Job Satisfaction JobSat <- matrix(c( 1, 2, 1, 0, 3, 3, 6, 1, 10,10,14, 9, 6, 7,12,11), 4, 4) dimnames(JobSat) = list( income = c("< 15k", "15-25k", "25-40k", "> 40k"), satisfaction = c("VeryD", "LittleD", "ModerateS", "VeryS") ) JobSat ## ---- table-form3------------------------------------------------------------- JobSat <- as.table(JobSat) str(JobSat) ## ---- relevel, eval=FALSE----------------------------------------------------- # dimnames(JobSat)$income <- c(7.5,20,32.5,60) # dimnames(JobSat)$satisfaction <- 1:4 ## ---- reorder1---------------------------------------------------------------- HairEyeColor <- HairEyeColor[, c(1,3,4,2), ] str(HairEyeColor) ## ---- reorder2, echo=TRUE, eval=FALSE----------------------------------------- # Arthritis <- read.csv("arthritis.txt",header=TRUE) # Arthritis$Improved <- ordered(Arthritis$Improved, # levels=c("None", "Some", "Marked") # ) ## ----------------------------------------------------------------------------- data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) mosaic(art, gp = shading_max, split_vertical = TRUE, main="Arthritis: [Treatment] [Improved]") ## ---- reorder3---------------------------------------------------------------- UCB <- aperm(UCBAdmissions, c(2, 1, 3)) dimnames(UCB)[[2]] <- c("Yes", "No") names(dimnames(UCB)) <- c("Sex", "Admit?", "Department") # display as a flattened table stats::ftable(UCB) ## ---- structable-------------------------------------------------------------- structable(HairEyeColor) # show the table: default structable(Hair+Sex ~ Eye, HairEyeColor) # specify col ~ row variables ## ---- structable1,eval=FALSE-------------------------------------------------- # HSE < - structable(Hair+Sex ~ Eye, HairEyeColor) # save structable object # mosaic(HSE) # plot it ## ---- table-setup------------------------------------------------------------- n=500 A <- factor(sample(c("a1","a2"), n, rep=TRUE)) B <- factor(sample(c("b1","b2"), n, rep=TRUE)) C <- factor(sample(c("c1","c2"), n, rep=TRUE)) mydata <- data.frame(A,B,C) ## ---- table-ex1--------------------------------------------------------------- # 2-Way Frequency Table attach(mydata) mytable <- table(A,B) # A will be rows, B will be columns mytable # print table margin.table(mytable, 1) # A frequencies (summed over B) margin.table(mytable, 2) # B frequencies (summed over A) prop.table(mytable) # cell percentages prop.table(mytable, 1) # row percentages prop.table(mytable, 2) # column percentages ## ---- table-ex2--------------------------------------------------------------- # 3-Way Frequency Table mytable <- table(A, B, C) ftable(mytable) ## ---- xtabs-ex1--------------------------------------------------------------- # 3-Way Frequency Table mytable <- xtabs(~A+B+C, data=mydata) ftable(mytable) # print table summary(mytable) # chi-square test of indepedence ## ---- xtabs-ex2--------------------------------------------------------------- (GSStab <- xtabs(count ~ sex + party, data=GSS)) summary(GSStab) ## ---- dayton1----------------------------------------------------------------- data("DaytonSurvey", package="vcdExtra") str(DaytonSurvey) head(DaytonSurvey) ## ---- dayton2----------------------------------------------------------------- # data in frequency form # collapse over sex and race Dayton.ACM.df <- aggregate(Freq ~ cigarette+alcohol+marijuana, data=DaytonSurvey, FUN=sum) Dayton.ACM.df ## ---- dayton3----------------------------------------------------------------- # in table form Dayton.tab <- xtabs(Freq ~ cigarette+alcohol+marijuana+sex+race, data=DaytonSurvey) structable(cigarette+alcohol+marijuana ~ sex+race, data=Dayton.tab) ## ---- dayton4----------------------------------------------------------------- # collapse over sex and race Dayton.ACM.tab <- apply(Dayton.tab, MARGIN=1:3, FUN=sum) Dayton.ACM.tab <- margin.table(Dayton.tab, 1:3) # same result structable(cigarette+alcohol ~ marijuana, data=Dayton.ACM.tab) ## ---- dayton5----------------------------------------------------------------- library(plyr) Dayton.ACM.df <- plyr::ddply(DaytonSurvey, .(cigarette, alcohol, marijuana), plyr::summarise, Freq=sum(Freq)) Dayton.ACM.df ## ---- collapse1--------------------------------------------------------------- # create some sample data in frequency form sex <- c("Male", "Female") age <- c("10-19", "20-29", "30-39", "40-49", "50-59", "60-69") education <- c("low", 'med', 'high') data <- expand.grid(sex=sex, age=age, education=education) counts <- rpois(36, 100) # random Possion cell frequencies data <- cbind(data, counts) # make it into a 3-way table t1 <- xtabs(counts ~ sex + age + education, data=data) structable(t1) ## ---- collapse2--------------------------------------------------------------- # collapse age to 3 levels, education to 2 levels t2 <- collapse.table(t1, age=c("10-29", "10-29", "30-49", "30-49", "50-69", "50-69"), education=c(" mutate(sibspF = case_match(sibsp, 0 ~ "0", 1 ~ "1", 2:max(sibsp) ~ "2+")) |> mutate(sibspF = ordered(sibspF)) |> mutate(parchF = case_match(parch, 0 ~ "0", 1 ~ "1", 2:max(parch) ~ "2+")) |> mutate(parchF = ordered(parchF)) table(Titanicp$sibspF, Titanicp$parchF) ## ---- convert-ex1------------------------------------------------------------- as.data.frame(GSStab) ## ---- convert-ex2------------------------------------------------------------- Art.tab <- with(Arthritis, table(Treatment, Sex, Improved)) str(Art.tab) ftable(Art.tab) ## ---- convert-ex3------------------------------------------------------------- Art.df <- expand.dft(Art.tab) str(Art.df) ## ---- tv1--------------------------------------------------------------------- tv.data<-read.table(system.file("extdata","tv.dat", package="vcdExtra")) head(tv.data,5) ## ---- tv2,eval=FALSE---------------------------------------------------------- # tv.data<-read.table("C:/R/data/tv.dat") ## ---- tv3--------------------------------------------------------------------- TV <- array(tv.data[,5], dim=c(5,11,5,3)) dimnames(TV) <- list(c("Monday","Tuesday","Wednesday","Thursday","Friday"), c("8:00","8:15","8:30","8:45","9:00","9:15","9:30", "9:45","10:00","10:15","10:30"), c("ABC","CBS","NBC","Fox","Other"), c("Off","Switch","Persist")) names(dimnames(TV))<-c("Day", "Time", "Network", "State") ## ---- tv3a,eval=FALSE--------------------------------------------------------- # TV <- xtabs(V5 ~ ., data=tv.data) # dimnames(TV) <- list(Day = c("Monday","Tuesday","Wednesday","Thursday","Friday"), # Time = c("8:00","8:15","8:30","8:45","9:00","9:15","9:30", # "9:45","10:00","10:15","10:30"), # Network = c("ABC","CBS","NBC","Fox","Other"), # State = c("Off","Switch","Persist")) # # # table dimensions # dim(TV) ## ---- tv4--------------------------------------------------------------------- TV2 <- TV[,,1:3,] # keep only ABC, CBS, NBC TV2 <- TV2[,,,3] # keep only Persist -- now a 3 way table structable(TV2) ## ---- tv5--------------------------------------------------------------------- TV.df <- as.data.frame.table(TV2) levels(TV.df$Time) <- c(rep("8:00", 2), rep("8:30", 2), rep("9:00", 2), rep("9:30", 2), rep("10:00",2), "10:30" ) TV3 <- xtabs(Freq ~ Day + Time + Network, TV.df) structable(Day ~ Time+Network, TV3) ## ----tv-mosaic1, fig.height=6, fig.width=7------------------------------------ mosaic(TV3, shade = TRUE, labeling = labeling_border(rot_labels = c(0, 0, 0, 90))) vcdExtra/inst/doc/loglinear.html0000644000176200001440000011554014470742313016431 0ustar liggesusers Loglinear Models

Loglinear Models

Michael Friendly

2023-08-21

You can use the loglm() function in the MASS package to fit log-linear models. Equivalent models can also be fit (from a different perspective) as generalized linear models with the glm() function using the family='poisson' argument, and the gnm package provides a wider range of generalized nonlinear models, particularly for testing structured associations.

The visualization methods for these models were originally developed for models fit using loglm(), so this approach is emphasized here. Some extensions of these methods for models fit using glm() and gnm() are contained in the vcdExtra package and illustrated in (ref?)(sec:glm).

Assume we have a 3-way contingency table based on variables A, B, and C. The possible different forms of loglinear models for a 3-way table are shown in the table below. @(tab:loglin-3way) The Model formula column shows how to express each model for loglm() in R. 1 In the Interpretation column, the symbol “\(\perp\)” is to be read as “is independent of,” and “\(\;|\;\)” means “conditional on,” or “adjusting for,” or just “given”.

Model Model formula Symbol Interpretation
Mutual independence ~A + B + C \([A][B][C]\) \(A \perp B \perp C\)
Joint independence ~A*B + C \([AB][C]\) \((A \: B) \perp C\)
Conditional independence ~(A+B)*C \([AC][BC]\) \((A \perp B) \;|\; C\)
All two-way associations ~A*B + A*C + B*C \([AB][AC][BC]\) homogeneous association
Saturated model ~A*B*C \([ABC]\) 3-way association

For example, the formula ~A + B + C specifies the model of mutual independence with no associations among the three factors. In standard notation for the expected frequencies \(m_{ijk}\), this corresponds to

\[ \log ( m_{ijk} ) = \mu + \lambda_i^A + \lambda_j^B + \lambda_k^C \equiv A + B + C \]

The parameters \(\lambda_i^A , \lambda_j^B\) and \(\lambda_k^C\) pertain to the differences among the one-way marginal frequencies for the factors A, B and C.

Similarly, the model of joint independence, \((A \: B) \perp C\), allows an association between A and B, but specifies that C is independent of both of these and their combinations,

\[ \log ( m_{ijk} ) = \mu + \lambda_i^A + \lambda_j^B + \lambda_k^C + \lambda_{ij}^{AB} \equiv A * B + C \]

where the parameters \(\lambda_{ij}^{AB}\) pertain to the overall association between A and B (collapsing over C).

In the literature or text books, you will often find these models expressed in shorthand symbolic notation, using brackets, [ ] to enclose the high-order terms in the model. Thus, the joint independence model can be denoted [AB][C], as shown in the Symbol column in the table. @(tab:loglin-3way).

Models of conditional independence allow (and fit) two of the three possible two-way associations. There are three such models, depending on which variable is conditioned upon. For a given conditional independence model, e.g., [AB][AC], the given variable is the one common to all terms, so this example has the interpretation \((B \perp C) \;|\; A\).

Fitting with loglm()

For example, we can fit the model of mutual independence among hair color, eye color and sex in HairEyeColor as

library(MASS)
## Independence model of hair and eye color and sex.  
hec.1 <- loglm(~Hair+Eye+Sex, data=HairEyeColor)
hec.1
## Call:
## loglm(formula = ~Hair + Eye + Sex, data = HairEyeColor)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 166.3001 24        0
## Pearson          164.9247 24        0

Similarly, the models of conditional independence and joint independence are specified as

## Conditional independence
hec.2 <- loglm(~(Hair + Eye) * Sex, data=HairEyeColor)
hec.2
## Call:
## loglm(formula = ~(Hair + Eye) * Sex, data = HairEyeColor)
## 
## Statistics:
##                       X^2 df P(> X^2)
## Likelihood Ratio 156.6779 18        0
## Pearson          147.9440 18        0
## Joint independence model.  
hec.3 <- loglm(~Hair*Eye + Sex, data=HairEyeColor)
hec.3
## Call:
## loglm(formula = ~Hair * Eye + Sex, data = HairEyeColor)
## 
## Statistics:
##                       X^2 df  P(> X^2)
## Likelihood Ratio 19.85656 15 0.1775045
## Pearson          19.56712 15 0.1891745

Note that printing the model gives a brief summary of the goodness of fit. A set of models can be compared using the anova() function.

anova(hec.1, hec.2, hec.3)
## LR tests for hierarchical log-linear models
## 
## Model 1:
##  ~Hair + Eye + Sex 
## Model 2:
##  ~(Hair + Eye) * Sex 
## Model 3:
##  ~Hair * Eye + Sex 
## 
##            Deviance df Delta(Dev) Delta(df) P(> Delta(Dev)
## Model 1   166.30014 24                                    
## Model 2   156.67789 18    9.62225         6        0.14149
## Model 3    19.85656 15  136.82133         3        0.00000
## Saturated   0.00000  0   19.85656        15        0.17750

Fitting with glm() and gnm()

The glm() approach, and extensions of this in the gnm package allows a much wider class of models for frequency data to be fit than can be handled by loglm(). Of particular importance are models for ordinal factors and for square tables, where we can test more structured hypotheses about the patterns of association than are provided in the tests of general association under loglm(). These are similar in spirit to the non-parametric CMH tests described in @ref(sec:CMH).

Example: The data Mental in the vcdExtra package gives a two-way table in frequency form classifying young people by their mental health status and parents’ socioeconomic status (SES), where both of these variables are ordered factors.

data(Mental, package = "vcdExtra")
str(Mental)
## 'data.frame':    24 obs. of  3 variables:
##  $ ses   : Ord.factor w/ 6 levels "1"<"2"<"3"<"4"<..: 1 1 1 1 2 2 2 2 3 3 ...
##  $ mental: Ord.factor w/ 4 levels "Well"<"Mild"<..: 1 2 3 4 1 2 3 4 1 2 ...
##  $ Freq  : int  64 94 58 46 57 94 54 40 57 105 ...
xtabs(Freq ~ mental + ses, data=Mental)   # display the frequency table
##           ses
## mental       1   2   3   4   5   6
##   Well      64  57  57  72  36  21
##   Mild      94  94 105 141  97  71
##   Moderate  58  54  65  77  54  54
##   Impaired  46  40  60  94  78  71

Simple ways of handling ordinal variables involve assigning scores to the table categories, and the simplest cases are to use integer scores, either for the row variable (column effects'' model), the column variable (row effects’’ model), or both (``uniform association’’ model).

indep <- glm(Freq ~ mental + ses, family = poisson, data = Mental)  # independence model

To fit more parsimonious models than general association, we can define numeric scores for the row and column categories

# Use integer scores for rows/cols 
Cscore <- as.numeric(Mental$ses)
Rscore <- as.numeric(Mental$mental) 

Then, the row effects model, the column effects model, and the uniform association model can be fit as follows. The essential idea is to replace a factor variable with its numeric equivalent in the model formula for the association term.

# column effects model (ses)
coleff <- glm(Freq ~ mental + ses + Rscore:ses, family = poisson, data = Mental)

# row effects model (mental)
roweff <- glm(Freq ~ mental + ses + mental:Cscore, family = poisson, data = Mental)

# linear x linear association
linlin <- glm(Freq ~ mental + ses + Rscore:Cscore, family = poisson, data = Mental)

The LRstats() function in vcdExtra provides a nice, compact summary of the fit statistics for a set of models, collected into a glmlist object. Smaller is better for AIC and BIC.

# compare models using AIC, BIC, etc
vcdExtra::LRstats(glmlist(indep, roweff, coleff, linlin))
## Likelihood summary table:
##           AIC    BIC LR Chisq Df Pr(>Chisq)    
## indep  209.59 220.19   47.418 15  3.155e-05 ***
## roweff 174.45 188.59    6.281 12     0.9013    
## coleff 179.00 195.50    6.829 10     0.7415    
## linlin 174.07 185.85    9.895 14     0.7698    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

For specific model comparisons, we can also carry out tests of nested models with anova() when those models are listed from smallest to largest. Here, there are two separate paths from the most restrictive (independence) model through the model of uniform association, to those that allow only one of row effects or column effects.

anova(indep, linlin, coleff, test="Chisq")  
## Analysis of Deviance Table
## 
## Model 1: Freq ~ mental + ses
## Model 2: Freq ~ mental + ses + Rscore:Cscore
## Model 3: Freq ~ mental + ses + Rscore:ses
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        15     47.418                          
## 2        14      9.895  1   37.523 9.035e-10 ***
## 3        10      6.829  4    3.066    0.5469    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(indep, linlin, roweff, test="Chisq")  
## Analysis of Deviance Table
## 
## Model 1: Freq ~ mental + ses
## Model 2: Freq ~ mental + ses + Rscore:Cscore
## Model 3: Freq ~ mental + ses + mental:Cscore
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        15     47.418                          
## 2        14      9.895  1   37.523 9.035e-10 ***
## 3        12      6.281  2    3.614    0.1641    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The model of linear by linear association seems best on all accounts. For comparison, one might try the CMH tests on these data:

CMHtest(xtabs(Freq~ses+mental, data=Mental))
## Cochran-Mantel-Haenszel Statistics for ses by mental 
## 
##                  AltHypothesis  Chisq Df       Prob
## cor        Nonzero correlation 37.156  1 1.0907e-09
## rmeans  Row mean scores differ 40.297  5 1.3012e-07
## cmeans  Col mean scores differ 40.666  3 7.6971e-09
## general    General association 45.958 15 5.4003e-05

Non-linear terms

The strength of the gnm package is that it handles a wide variety of models that handle non-linear terms, where the parameters enter the model beyond a simple linear function. The simplest example is the Goodman RC(1) model (Goodman, 1979), which allows a multiplicative term to account for the association of the table variables. In the notation of generalized linear models with a log link, this can be expressed as

\[ \log \mu_{ij} = \alpha_i + \beta_j + \gamma_{i} \delta_{j} ,\]

where the row-multiplicative effect parameters \(\gamma_i\) and corresponding column parameters \(\delta_j\) are estimated from the data.% 2

Similarly, the RC(2) model adds two multiplicative terms to the independence model,

\[ \log \mu_{ij} = \alpha_i + \beta_j + \gamma_{i1} \delta_{j1} + \gamma_{i2} \delta_{j2} . \]

In the gnm package, these models may be fit using the Mult() to specify the multiplicative term, and instances() to specify several such terms.

Example: For the Mental data, we fit the RC(1) and RC(2) models, and compare these with the independence model.

RC1 <- gnm(Freq ~ mental + ses + Mult(mental,ses), data=Mental, 
             family=poisson, verbose=FALSE)
RC2 <- gnm(Freq ~ mental+ses + instances(Mult(mental,ses),2), data=Mental, 
             family=poisson, verbose=FALSE)
anova(indep, RC1, RC2, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: Freq ~ mental + ses
## Model 2: Freq ~ mental + ses + Mult(mental, ses)
## Model 3: Freq ~ mental + ses + Mult(mental, ses, inst = 1) + Mult(mental, 
##     ses, inst = 2)
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)    
## 1        15     47.418                         
## 2         9     40.230  6    7.188   0.3038    
## 3         3      0.523  6   39.707  5.2e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

References

Goodman, L. A. (1979). Simple models for the analysis of association in cross-classifications having ordered categories. Journal of the American Statistical Association, 74, 537–552.

  1. For glm(), or gnm(), with the data in the form of a frequency data.frame, the same model is specified in the form glm(Freq \(\sim\) ..., family="poisson"), where Freq is the name of the cell frequency variable and ... specifies the Model formula.↩︎

  2. This is similar in spirit to a correspondence analysis with a single dimension, but as a statistical model.↩︎

vcdExtra/inst/doc/creating.Rmd0000644000176200001440000007410714470701331016025 0ustar liggesusers--- title: "Creating and manipulating frequency tables" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Creating and manipulating frequency tables} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut01-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Old Sweave options # \SweaveOpts{engine=R,eps=TRUE,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{engine=R,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{prefix.string=fig/vcd-tut,eps=FALSE} # \SweaveOpts{keep.source=TRUE} # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ``` R provides many methods for creating frequency and contingency tables. Several are described below. In the examples below, we use some real examples and some anonymous ones, where the variables `A`, `B`, and `C` represent categorical variables, and `X` represents an arbitrary R data object. ## Forms of frequency data The first thing you need to know is that categorical data can be represented in three different forms in R, and it is sometimes necessary to convert from one form to another, for carrying out statistical tests, fitting models or visualizing the results. Once a data object exists in R, you can examine its complete structure with the `str()` function, or view the names of its components with the `names()` function. ### Case form Categorical data in case form are simply data frames containing individual observations, with one or more factors, used as the classifying variables. In case form, there may also be numeric covariates. The total number of observations is `nrow(X)`, and the number of variables is `ncol(X)`. ***Example***: The `Arthritis` data is available in case form in the `vcd` package. There are two explanatory factors: `Treatment` and `Sex`. `Age` is a numeric covariate, and `Improved` is the response--- an ordered factor, with levels `r paste(levels(Arthritis$Improved),collapse=' < ')`. Excluding `Age`, this represents a $2 \times 2 \times 3$ contingency table for `Treatment`, `Sex` and `Improved`, but in case form. ```{r, case-form} names(Arthritis) # show the variables str(Arthritis) # show the structure head(Arthritis,5) # first 5 observations, same as Arthritis[1:5,] ``` ### Frequency form Data in frequency form is also a data frame containing one or more factors, and a frequency variable, often called `Freq` or `count`. The total number of observations is: `sum(X$Freq)`, `sum(X[,"Freq"])` or some equivalent form. The number of cells in the table is given by `nrow(X)`. ***Example***: For small frequency tables, it is often convenient to enter them in frequency form using `expand.grid()` for the factors and `c()` to list the counts in a vector. The example below, from [@vcd:Agresti:2002] gives results for the 1991 General Social Survey, with respondents classified by sex and party identification. ```{r, frequency-form} # Agresti (2002), table 3.11, p. 106 GSS <- data.frame( expand.grid(sex = c("female", "male"), party = c("dem", "indep", "rep")), count = c(279,165,73,47,225,191)) GSS names(GSS) str(GSS) sum(GSS$count) ``` ### Table form Table form data is represented by a `matrix`, `array` or `table` object, whose elements are the frequencies in an $n$-way table. The variable names (factors) and their levels are given by `dimnames(X)`. The total number of observations is `sum(X)`. The number of dimensions of the table is `length(dimnames(X))`, and the table sizes are given by `sapply(dimnames(X), length)`. ***Example***: The `HairEyeColor` is stored in table form in `vcd`. ```{r, table-form1} str(HairEyeColor) # show the structure sum(HairEyeColor) # number of cases sapply(dimnames(HairEyeColor), length) # table dimension sizes ``` ***Example***: Enter frequencies in a matrix, and assign `dimnames`, giving the variable names and category labels. Note that, by default, `matrix()` uses the elements supplied by *columns* in the result, unless you specify `byrow=TRUE`. ```{r, table-form2} # A 4 x 4 table Agresti (2002, Table 2.8, p. 57) Job Satisfaction JobSat <- matrix(c( 1, 2, 1, 0, 3, 3, 6, 1, 10,10,14, 9, 6, 7,12,11), 4, 4) dimnames(JobSat) = list( income = c("< 15k", "15-25k", "25-40k", "> 40k"), satisfaction = c("VeryD", "LittleD", "ModerateS", "VeryS") ) JobSat ``` `JobSat` is a **matrix**, not an object of `class("table")`, and some functions are happier with tables than matrices. You can coerce it to a table with `as.table()`, ```{r, table-form3} JobSat <- as.table(JobSat) str(JobSat) ``` ## Ordered factors and reordered tables {#sec:ordered-factors} In table form, the values of the table factors are ordered by their position in the table. Thus in the `JobSat` data, both `income` and `satisfaction` represent ordered factors, and the *positions* of the values in the rows and columns reflects their ordered nature. Yet, for analysis, there are times when you need *numeric* values for the levels of ordered factors in a table, e.g., to treat a factor as a quantitative variable. In such cases, you can simply re-assign the `dimnames` attribute of the table variables. For example, here, we assign numeric values to `income` as the middle of their ranges, and treat `satisfaction` as equally spaced with integer scores. ```{r, relevel, eval=FALSE} dimnames(JobSat)$income <- c(7.5,20,32.5,60) dimnames(JobSat)$satisfaction <- 1:4 ``` For the `HairEyeColor` data, hair color and eye color are ordered arbitrarily. For visualizing the data using mosaic plots and other methods described below, it turns out to be more useful to assure that both hair color and eye color are ordered from dark to light. Hair colors are actually ordered this way already, and it is easiest to re-order eye colors by indexing. Again `str()` is your friend. ```{r, reorder1} HairEyeColor <- HairEyeColor[, c(1,3,4,2), ] str(HairEyeColor) ``` This is also the order for both hair color and eye color shown in the result of a correspondence analysis (@ref(fig:ca-haireye) below. With data in case form or frequency form, when you have ordered factors represented with character values, you must ensure that they are treated as ordered in R. Imagine that the `Arthritis` data was read from a text file. By default the `Improved` will be ordered alphabetically: `Marked`, `None`, `Some` --- not what we want. In this case, the function `ordered()` (and others) can be useful. ```{r, reorder2, echo=TRUE, eval=FALSE} Arthritis <- read.csv("arthritis.txt",header=TRUE) Arthritis$Improved <- ordered(Arthritis$Improved, levels=c("None", "Some", "Marked") ) ``` The dataset `Arthritis` in the `vcd` package is a data.frame in this form With this order of `Improved`, the response in this data, a mosaic display of `Treatment` and `Improved` (@ref(fig:arthritis) shows a clearly interpretable pattern. The original version of `mosaic` in the `vcd` package required the input to be a contingency table in array form, so we convert using `xtabs()`. ```{r} #| Arthritis, #| fig.height = 6, #| fig.width = 6, #| fig.cap = "Mosaic plot for the `Arthritis` data, showing the marginal model of independence for Treatment and Improved. Age, a covariate, and Sex are ignored here." data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) mosaic(art, gp = shading_max, split_vertical = TRUE, main="Arthritis: [Treatment] [Improved]") ``` Several data sets in the package illustrate the salutary effects of reordering factor levels in mosaic displays and other analyses. See: * `help(AirCrash)` * `help(Glass)` * `help(HouseTasks)` The [seriate](https://CRAN.R-project.org/package=seriation) package now contains a general method to permute the row and column variables in a table according to the result of a correspondence analysis, using scores on the first CA dimension. ### Re-ordering dimensions Finally, there are situations where, particularly for display purposes, you want to re-order the *dimensions* of an $n$-way table, or change the labels for the variables or levels. This is easy when the data are in table form: `aperm()` permutes the dimensions, and assigning to `names` and `dimnames` changes variable names and level labels respectively. We will use the following version of `UCBAdmissions` in \@ref(sec:mantel) below. ^[Changing `Admit` to `Admit?` might be useful for display purposes, but is dangerous--- because it is then difficult to use that variable name in a model formula. See \@ref(sec:tips) for options `labeling_args` and `set_labels`to change variable and level names for displays in the `strucplot` framework.] ```{r, reorder3} UCB <- aperm(UCBAdmissions, c(2, 1, 3)) dimnames(UCB)[[2]] <- c("Yes", "No") names(dimnames(UCB)) <- c("Sex", "Admit?", "Department") # display as a flattened table stats::ftable(UCB) ``` ## `structable()` {#sec:structable} For 3-way and larger tables the `structable()` function in `vcd` provides a convenient and flexible tabular display. The variables assigned to the rows and columns of a two-way display can be specified by a model formula. ```{r, structable} structable(HairEyeColor) # show the table: default structable(Hair+Sex ~ Eye, HairEyeColor) # specify col ~ row variables ``` It also returns an object of class `"structable"` which may be plotted with `mosaic()` (not shown here). ```{r, structable1,eval=FALSE} HSE < - structable(Hair+Sex ~ Eye, HairEyeColor) # save structable object mosaic(HSE) # plot it ``` ## `table()` and friends {#sec:table} You can generate frequency tables from factor variables using the `table()` function, tables of proportions using the `prop.table()` function, and marginal frequencies using `margin.table()`. For these examples, create some categorical vectors: ```{r, table-setup} n=500 A <- factor(sample(c("a1","a2"), n, rep=TRUE)) B <- factor(sample(c("b1","b2"), n, rep=TRUE)) C <- factor(sample(c("c1","c2"), n, rep=TRUE)) mydata <- data.frame(A,B,C) ``` These lines illustrate `table`-related functions: ```{r, table-ex1} # 2-Way Frequency Table attach(mydata) mytable <- table(A,B) # A will be rows, B will be columns mytable # print table margin.table(mytable, 1) # A frequencies (summed over B) margin.table(mytable, 2) # B frequencies (summed over A) prop.table(mytable) # cell percentages prop.table(mytable, 1) # row percentages prop.table(mytable, 2) # column percentages ``` `table()` can also generate multidimensional tables based on 3 or more categorical variables. In this case, you can use the `ftable()` or `structable()` function to print the results more attractively. ```{r, table-ex2} # 3-Way Frequency Table mytable <- table(A, B, C) ftable(mytable) ``` `table()` ignores missing values by default. To include `NA` as a category in counts, include the table option `exclude=NULL` if the variable is a vector. If the variable is a factor you have to create a new factor using \code{newfactor <- factor(oldfactor, exclude=NULL)}. ## `xtabs()` {#sec:xtabs} The `xtabs()` function allows you to create cross-tabulations of data using formula style input. This typically works with case-form data supplied in a data frame or a matrix. The result is a contingency table in array format, whose dimensions are determined by the terms on the right side of the formula. ```{r, xtabs-ex1} # 3-Way Frequency Table mytable <- xtabs(~A+B+C, data=mydata) ftable(mytable) # print table summary(mytable) # chi-square test of indepedence ``` If a variable is included on the left side of the formula, it is assumed to be a vector of frequencies (useful if the data have already been tabulated in frequency form). ```{r, xtabs-ex2} (GSStab <- xtabs(count ~ sex + party, data=GSS)) summary(GSStab) ``` ## Collapsing over table factors: `aggregate()`, `margin.table()` and `apply()` It sometimes happens that we have a data set with more variables or factors than we want to analyse, or else, having done some initial analyses, we decide that certain factors are not important, and so should be excluded from graphic displays by collapsing (summing) over them. For example, mosaic plots and fourfold displays are often simpler to construct from versions of the data collapsed over the factors which are not shown in the plots. The appropriate tools to use again depend on the form in which the data are represented--- a case-form data frame, a frequency-form data frame (`aggregate()`), or a table-form array or table object (`margin.table()` or `apply()`). When the data are in frequency form, and we want to produce another frequency data frame, `aggregate()` is a handy tool, using the argument `FUN=sum` to sum the frequency variable over the factors *not* mentioned in the formula. ***Example***: The data frame `DaytonSurvey` in the `vcdExtra` package represents a $2^5$ table giving the frequencies of reported use (``ever used?'') of alcohol, cigarettes and marijuana in a sample of high school seniors, also classified by sex and race. ```{r, dayton1} data("DaytonSurvey", package="vcdExtra") str(DaytonSurvey) head(DaytonSurvey) ``` To focus on the associations among the substances, we want to collapse over sex and race. The right-hand side of the formula used in the call to `aggregate()` gives the factors to be retained in the new frequency data frame, `Dayton.ACM.df`. ```{r, dayton2} # data in frequency form # collapse over sex and race Dayton.ACM.df <- aggregate(Freq ~ cigarette+alcohol+marijuana, data=DaytonSurvey, FUN=sum) Dayton.ACM.df ``` When the data are in table form, and we want to produce another table, `apply()` with `FUN=sum` can be used in a similar way to sum the table over dimensions not mentioned in the `MARGIN` argument. `margin.table()` is just a wrapper for `apply()` using the `sum()` function. ***Example***: To illustrate, we first convert the `DaytonSurvey` to a 5-way table using `xtabs()`, giving `Dayton.tab`. ```{r, dayton3} # in table form Dayton.tab <- xtabs(Freq ~ cigarette+alcohol+marijuana+sex+race, data=DaytonSurvey) structable(cigarette+alcohol+marijuana ~ sex+race, data=Dayton.tab) ``` Then, use `apply()` on `Dayton.tab` to give the 3-way table `Dayton.ACM.tab` summed over sex and race. The elements in this new table are the column sums for `Dayton.tab` shown by `structable()` just above. ```{r, dayton4} # collapse over sex and race Dayton.ACM.tab <- apply(Dayton.tab, MARGIN=1:3, FUN=sum) Dayton.ACM.tab <- margin.table(Dayton.tab, 1:3) # same result structable(cigarette+alcohol ~ marijuana, data=Dayton.ACM.tab) ``` Many of these operations can be performed using the `**ply()` functions in the [`plyr`]( https://CRAN.R-project.org/package=plyr) package. For example, with the data in a frequency form data frame, use `ddply()` to collapse over unmentioned factors, and `plyr::summarise()` as the function to be applied to each piece. ```{r, dayton5} library(plyr) Dayton.ACM.df <- plyr::ddply(DaytonSurvey, .(cigarette, alcohol, marijuana), plyr::summarise, Freq=sum(Freq)) Dayton.ACM.df ``` ## Collapsing table levels: `collapse.table()` A related problem arises when we have a table or array and for some purpose we want to reduce the number of levels of some factors by summing subsets of the frequencies. For example, we may have initially coded Age in 10-year intervals, and decide that, either for analysis or display purposes, we want to reduce Age to 20-year intervals. The `collapse.table()` function in `vcdExtra` was designed for this purpose. ***Example***: Create a 3-way table, and collapse Age from 10-year to 20-year intervals. First, we generate a $2 \times 6 \times 3$ table of random counts from a Poisson distribution with mean of 100. ```{r, collapse1} # create some sample data in frequency form sex <- c("Male", "Female") age <- c("10-19", "20-29", "30-39", "40-49", "50-59", "60-69") education <- c("low", 'med', 'high') data <- expand.grid(sex=sex, age=age, education=education) counts <- rpois(36, 100) # random Possion cell frequencies data <- cbind(data, counts) # make it into a 3-way table t1 <- xtabs(counts ~ sex + age + education, data=data) structable(t1) ``` Now collapse `age` to 20-year intervals, and `education` to 2 levels. In the arguments, levels of `age` and `education` given the same label are summed in the resulting smaller table. ```{r, collapse2} # collapse age to 3 levels, education to 2 levels t2 <- collapse.table(t1, age=c("10-29", "10-29", "30-49", "30-49", "50-69", "50-69"), education=c(" mutate(sibspF = case_match(sibsp, 0 ~ "0", 1 ~ "1", 2:max(sibsp) ~ "2+")) |> mutate(sibspF = ordered(sibspF)) |> mutate(parchF = case_match(parch, 0 ~ "0", 1 ~ "1", 2:max(parch) ~ "2+")) |> mutate(parchF = ordered(parchF)) table(Titanicp$sibspF, Titanicp$parchF) ``` `car::recode()` is a similar function, but with a less convenient interface. The [`forcats`]( https://CRAN.R-project.org/package=forcats) package provides a collection of functions for reordering the levels of a factor or grouping categories according to their frequency: * `forcats::fct_reorder()`: Reorder a factor by another variable. * `forcats::fct_infreq()`: Reorder a factor by the frequency of values. * `forcats::fct_relevel()`: Change the order of a factor by hand. * `forcats::fct_lump()`: Collapse the least/most frequent values of a factor into “other”. * `forcats::fct_collapse()`: Collapse factor levels into manually defined groups. * `forcats::fct_recode()`: Change factor levels by hand. ## Converting among frequency tables and data frames As we've seen, a given contingency table can be represented equivalently in different forms, but some R functions were designed for one particular representation. The table below shows some handy tools for converting from one form to another. | **From this** | | **To this** | | |:-----------------|:--------------------|:---------------------|-------------------| | | _Case form_ | _Frequency form_ | _Table form_ | | _Case form_ | noop | `xtabs(~A+B)` | `table(A,B)` | | _Frequency form_ | `expand.dft(X)` | noop | `xtabs(count~A+B)`| | _Table form_ | `expand.dft(X)` | `as.data.frame(X)` | noop | For example, a contingency table in table form (an object of `class(table)`) can be converted to a data.frame with `as.data.frame()`. ^[Because R is object-oriented, this is actually a short-hand for the function `as.data.frame.table()`.] The resulting `data.frame` contains columns representing the classifying factors and the table entries (as a column named by the `responseName` argument, defaulting to `Freq`. This is the inverse of `xtabs()`. ***Example***: Convert the `GSStab` in table form to a data.frame in frequency form. ```{r, convert-ex1} as.data.frame(GSStab) ``` ***Example***: Convert the `Arthritis` data in case form to a 3-way table of `Treatment` $\times$ `Sex` $\times$ `Improved`. Note the use of `with()` to avoid having to use `Arthritis\$Treatment` etc. within the call to `table()`.% ^[`table()` does not allow a `data` argument to provide an environment in which the table variables are to be found. In the examples in \@ref(sec:table) I used `attach(mydata)` for this purpose, but `attach()` leaves the variables in the global environment, while `with()` just evaluates the `table()` expression in a temporary environment of the data.] ```{r, convert-ex2} Art.tab <- with(Arthritis, table(Treatment, Sex, Improved)) str(Art.tab) ftable(Art.tab) ``` There may also be times that you will need an equivalent case form `data.frame` with factors representing the table variables rather than the frequency table. For example, the `mca()` function in package `MASS` only operates on data in this format. Marc Schwartz initially provided code for `expand.dft()` on the Rhelp mailing list for converting a table back into a case form `data.frame`. This function is included in `vcdExtra`. ***Example***: Convert the `Arthritis` data in table form (`Art.tab`) back to a `data.frame` in case form, with factors `Treatment`, `Sex` and `Improved`. ```{r, convert-ex3} Art.df <- expand.dft(Art.tab) str(Art.df) ``` ## A complex example {#sec:complex} If you've followed so far, you're ready for a more complicated example. The data file, `tv.dat` represents a 4-way table of size $5 \times 11 \times 5 \times 3$ where the table variables (unnamed in the file) are read as `V1` -- `V4`, and the cell frequency is read as `V5`. The file, stored in the `doc/extdata` directory of `vcdExtra`, can be read as follows: ```{r, tv1} tv.data<-read.table(system.file("extdata","tv.dat", package="vcdExtra")) head(tv.data,5) ``` For a local file, just use `read.table()` in this form: ```{r, tv2,eval=FALSE} tv.data<-read.table("C:/R/data/tv.dat") ``` The data `tv.dat` came from the initial implementation of mosaic displays in R by Jay Emerson. In turn, they came from the initial development of mosaic displays [@vcd:Hartigan+Kleiner:1984] that illustrated the method with data on a large sample of TV viewers whose behavior had been recorded for the Neilsen ratings. This data set contains sample television audience data from Neilsen Media Research for the week starting November 6, 1995. The table variables are: * `V1`-- values 1:5 correspond to the days Monday--Friday; * `V2`-- values 1:11 correspond to the quarter hour times 8:00PM through 10:30PM; * `V3`-- values 1:5 correspond to ABC, CBS, NBC, Fox, and non-network choices; * `V4`-- values 1:3 correspond to transition states: turn the television Off, Switch channels, or Persist in viewing the current channel. We are interested just the cell frequencies, and rely on the facts that the (a) the table is complete--- there are no missing cells, so `nrow(tv.data)` = `r nrow(tv.data)`; (b) the observations are ordered so that `V1` varies most rapidly and `V4` most slowly. From this, we can just extract the frequency column and reshape it into an array. [That would be dangerous if any observations were out of order.] ```{r, tv3} TV <- array(tv.data[,5], dim=c(5,11,5,3)) dimnames(TV) <- list(c("Monday","Tuesday","Wednesday","Thursday","Friday"), c("8:00","8:15","8:30","8:45","9:00","9:15","9:30", "9:45","10:00","10:15","10:30"), c("ABC","CBS","NBC","Fox","Other"), c("Off","Switch","Persist")) names(dimnames(TV))<-c("Day", "Time", "Network", "State") ``` More generally (even if there are missing cells), we can use `xtabs()` (or `plyr::daply()`) to do the cross-tabulation, using `V5` as the frequency variable. Here's how to do this same operation with `xtabs()`: ```{r, tv3a,eval=FALSE} TV <- xtabs(V5 ~ ., data=tv.data) dimnames(TV) <- list(Day = c("Monday","Tuesday","Wednesday","Thursday","Friday"), Time = c("8:00","8:15","8:30","8:45","9:00","9:15","9:30", "9:45","10:00","10:15","10:30"), Network = c("ABC","CBS","NBC","Fox","Other"), State = c("Off","Switch","Persist")) # table dimensions dim(TV) ``` But this 4-way table is too large and awkward to work with. Among the networks, Fox and Other occur infrequently. We can also cut it down to a 3-way table by considering only viewers who persist with the current station. ^[This relies on the fact that that indexing an array drops dimensions of length 1 by default, using the argument `drop=TRUE`; the result is coerced to the lowest possible dimension.] ```{r, tv4} TV2 <- TV[,,1:3,] # keep only ABC, CBS, NBC TV2 <- TV2[,,,3] # keep only Persist -- now a 3 way table structable(TV2) ``` Finally, for some purposes, we might want to collapse the 11 times into a smaller number. Half-hour time slots make more sense. Here, we use `as.data.frame.table()` to convert the table back to a data frame, `levels()` to re-assign the values of `Time`, and finally, `xtabs()` to give a new, collapsed frequency table. ```{r, tv5} TV.df <- as.data.frame.table(TV2) levels(TV.df$Time) <- c(rep("8:00", 2), rep("8:30", 2), rep("9:00", 2), rep("9:30", 2), rep("10:00",2), "10:30" ) TV3 <- xtabs(Freq ~ Day + Time + Network, TV.df) structable(Day ~ Time+Network, TV3) ``` We've come this far, so we might as well show a mosaic display. This is analogous to that used by @vcd:Hartigan+Kleiner:1984. ```{r tv-mosaic1, fig.height=6, fig.width=7} mosaic(TV3, shade = TRUE, labeling = labeling_border(rot_labels = c(0, 0, 0, 90))) ``` This mosaic displays can be read at several levels, corresponding to the successive splits of the tiles and the residual shading. Several trends are clear for viewers who persist: * Overall, there are about the same number of viewers on each weekday, with slightly more on Thursday. * Looking at time slots, viewership is slightly greater from 9:00 - 10:00 overall and also 8:00 - 9:00 on Thursday and Friday From the residual shading of the tiles: * Monday: CBS dominates in all time slots. * Tuesday" ABC and CBS dominate after 9:00 * Thursday: is a largely NBC day * Friday: ABC dominates in the early evening # References vcdExtra/inst/doc/mobility.html0000644000176200001440000053112614470742314016310 0ustar liggesusers Mobility tables

Mobility tables

Michael Friendly

2023-08-21

Social mobility

Social mobility is an important concept in sociology, and its’ study has led to a wide range of developments in categorical data analysis in what are often called mobility tables.

The idea is to study the movement of individuals, families, households or other categories of people within or between social strata in a society, across time or space. This refers to a change in social status relative to one’s current social location within a given society.

Using survey data, the most frequent examples relate to changes in income or wealth, but most often this is studied via classification in occupational categories (“professional,”managerial”, “skilled manual”, …). Most often this is studied intergenerationaly using the occupational categories of fathers and sons.

Mobility tables are nearly always square tables, with the same categories for the row and column variables. As such, they nearly always exhibit positive associations along the diagonal cells. What is of interest are specialized models, intermediate between the null model of independence and the saturated model.

Models

These models include important special cases:

  • quasi-independence: Ignoring diagonal cells, are the row and column variables independent?
  • symmetry: Are associations above the diagonal equal to the corresponding ones below the diagonal?
  • row effects, col effects, linear x linear: Typically, the factors in such tables are ordinal. To what extent can the models be simplified by assigning integer scores to the row, column categories or both?
  • multiplicative RC: RC models attempt to estimate the scores for the row and column categories.
  • topographical models: It is possible that the associations among occupational categories exhibit consistent patterns according to their nature. These models allow specifying a theoretically interesting pattern.
  • crossings models: assert that there are different difficulty parameters for crossing from category to the next and associations between categories decrease with their separation.

While standard loglinear models can be fit using MASS::loglm, these models require use of `stats::glm() or gnm::gnm(), as I illustrate below.

Hauser data

This vignette uses the vcdExtra::Hauser79 dataset, a cross-classification of 19,912 individuals by father’s occupation and son’s first occupation for U.S. men aged 20-64 in 1973. The data comes from Hauser (1980) and has been also analysed by Powers & Xie (2008). The discussion draws on Friendly & Meyer (2016), Ch. 10.

data("Hauser79", package="vcdExtra")
str(Hauser79)
## 'data.frame':    25 obs. of  3 variables:
##  $ Son   : Factor w/ 5 levels "UpNM","LoNM",..: 1 2 3 4 5 1 2 3 4 5 ...
##  $ Father: Factor w/ 5 levels "UpNM","LoNM",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ Freq  : num  1414 521 302 643 40 ...
(Hauser_tab <- xtabs(Freq ~ Father + Son, data=Hauser79))
##       Son
## Father UpNM LoNM  UpM  LoM Farm
##   UpNM 1414  521  302  643   40
##   LoNM  724  524  254  703   48
##   UpM   798  648  856 1676  108
##   LoM   756  914  771 3325  237
##   Farm  409  357  441 1611 1832

As can be seen, Hauser79 is a data.frame in frequency form. The factor levels in this table are a coarse grouping of occupational categories, so:

  • UpNM = professional and kindred workers, managers and officials, and non-retail sales workers;

  • LoNM = proprietors, clerical and kindred workers, and retail sales workers;

  • UpM = craftsmen, foremen, and kindred workers;

  • LoM = service workers, operatives and kindred workers, and laborers (except farm);

  • Farm = farmers and farm managers, farm laborers, and foremen.

Load packages

library(vcdExtra)
library(gnm)
library(dplyr)

Mosaic plots

Hauser_tab is a table object, and the simplest plot for the frequencies is the default plot() method, giving a graphics::mosaicplot().

plot(Hauser_tab, shade=TRUE)

The frequencies are first split according to father’s occupational category (the first table dimension) and then by sons’ occupation. The most common category for fathers is lower manual, followed by farm.

mosaicplot(), using shade=TRUE colors the tiles according to the sign and magnitude of the residuals from an independence model: shades of positive for positive residuals and negative red for negative residuals.

vcd::mosaic() gives a similar display, but is much more flexible in the labeling of the row and column variable, labels for the categories, and the scheme used for shading the tiles. Here, I simply assign longer labels for the row and column variables, using the labeling_args argument to mosaic().

labels <- list(set_varnames = c(Father="Father's occupation", 
                                Son="Son's occupation"))

mosaic(Freq ~ Father + Son, data=Hauser79,
       labeling_args = labels,
       shade=TRUE,
       legend = FALSE)

Fitting and graphing models

The call to vcd::mosaic() above takes the Hauser79 dataset as input. Internally, it fits the model of independence and displays the result, but for more complex tables, control of the fitted model is limited.

Unlike mosaicplot() and even the ggmosaic package, vcdExtra::mosaic.glm() is a mosaic method for glm objects. This means you can fit any model, and supply the model object to mosaic(). (Note that in mosaic(), the formula argument determines the order of splitting in the mosaic, not a loglinear formula.)

hauser.indep <- glm(Freq ~ Father + Son, 
  data=Hauser79, 
  family=poisson)

# the same mosaic, using the fitted model
mosaic(hauser.indep, formula = ~ Father + Son, 
       labeling_args = labels,
       legend = FALSE,
       main="Independence model")

Quasi-independence

Among the most important advances from the social mobility literature is the idea that associations between row and column variables in square tables can be explored in greater depth if we ignore the obvious association in the diagonal cells. The result is a model of quasi-independence, asserting that fathers’ and sons’ occupations are independent, ignoring the diagonal cells.

For a two-way table, quasi-independence can be expressed as \[ \pi_{ij} = \pi_{i+} \pi_{+j} \quad\quad \mbox{for } i\ne j \] or in loglinear form as: \[ \log m_{ij} = \mu + \lambda_i^A + \lambda_j^B + \delta_i I(i=j) \quad . \] This model effectively adds one parameter, \(\delta_i\), for each main diagonal cell and fits those frequencies perfectly.

In the gnm package, gnm::Diag() creates the appropriate term in the model formula, using a symbol in the diagonal cells and “.” otherwise.

# with symbols
with(Hauser79, Diag(Father, Son)) |> matrix(nrow=5)
##      [,1]   [,2]   [,3]  [,4]  [,5]  
## [1,] "UpNM" "."    "."   "."   "."   
## [2,] "."    "LoNM" "."   "."   "."   
## [3,] "."    "."    "UpM" "."   "."   
## [4,] "."    "."    "."   "LoM" "."   
## [5,] "."    "."    "."   "."   "Farm"

We proceed to fit and plot the quasi-independence model by updating the independence model, adding the term Diag(Father, Son).

hauser.quasi <-  update(hauser.indep, 
                        ~ . + Diag(Father, Son))

mosaic(hauser.quasi, ~ Father+Son, 
       labeling_args = labels,
       legend = FALSE,
       main="Quasi-independence model")

Note that the pattern of residuals shows a systematic pattern of positive and negative residuals above and below the diagonal tiles. We turn to this next.

Symmetry and quasi-symmetry

Another advance from the social mobility literature was the idea of how to test for differences in occupational categories between fathers and sons. The null hypothesis of no systematic differences can be formulated as a test of symmetry in the table, \[ \pi_{ij} = \pi_{ji} \quad\quad \mbox{for } i\ne j \quad , \] which asserts that sons are as likely to move from their father’s occupation \(i\) to another category \(j\) as they were to move in the reverse direction, \(j\) to \(i\). An alternative, “Upward mobility”, i.e., that sons who did not stay in their father’s occupational category moved to a higher category on average would mean that \[ \pi_{ij} < \pi_{ji} \quad\quad \mbox{for } i\ne j \] Yet this model is overly strong, because it also asserts marginal homogeneity, that the marginal probabilities of row and column values are equal, \(\pi_{i+} = \pi_{+i}\) for all \(i\). Consequently, this hypothesis is most often tested as a model for quasi-symmetry, that also ignores the diagonal cells.

Symmetry is modeled by the function gnm::Symm(). It returns a factor with the same labels for positions above and below the diagonal.

with(Hauser79, Symm(Father, Son)) |> matrix(nrow=5)
##      [,1]        [,2]        [,3]       [,4]       [,5]       
## [1,] "UpNM:UpNM" "UpNM:LoNM" "UpNM:UpM" "UpNM:LoM" "UpNM:Farm"
## [2,] "UpNM:LoNM" "LoNM:LoNM" "LoNM:UpM" "LoNM:LoM" "LoNM:Farm"
## [3,] "UpNM:UpM"  "LoNM:UpM"  "UpM:UpM"  "UpM:LoM"  "UpM:Farm" 
## [4,] "UpNM:LoM"  "LoNM:LoM"  "UpM:LoM"  "LoM:LoM"  "LoM:Farm" 
## [5,] "UpNM:Farm" "LoNM:Farm" "UpM:Farm" "LoM:Farm" "Farm:Farm"

To fit the model of quasi-symmetry, add both Diag() and Symm() to the model of independence.

hauser.qsymm <-  update(hauser.indep, 
                        ~ . + Diag(Father,Son) + Symm(Father,Son))

To compare the models so far, we can use anova() or `vcdExtra::LRstats():

anova(hauser.indep, hauser.quasi, hauser.qsymm, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: Freq ~ Father + Son
## Model 2: Freq ~ Father + Son + Diag(Father, Son)
## Model 3: Freq ~ Father + Son + Diag(Father, Son) + Symm(Father, Son)
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        16     6170.1                          
## 2        11      683.3  5   5486.8 < 2.2e-16 ***
## 3         6       27.4  5    655.9 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

LRstats(hauser.indep, hauser.quasi, hauser.qsymm)
## Likelihood summary table:
##                 AIC    BIC LR Chisq Df Pr(>Chisq)    
## hauser.indep 6390.8 6401.8   6170.1 16  < 2.2e-16 ***
## hauser.quasi  914.1  931.1    683.3 11  < 2.2e-16 ***
## hauser.qsymm  268.2  291.3     27.4  6  0.0001193 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

This hauser.qsymm model represents a huge improvement in goodness of fit. With such a large sample size, it might be considered an acceptable fit.

But, this model of quasi-symmetry still shows some residual lack of fit. To visualize this in the mosaic, we can label the cells with their standardized residuals.

mosaic(hauser.qsymm, ~ Father+Son, 
       labeling_args = labels,
       labeling = labeling_residuals,
       residuals_type ="rstandard",
       legend = FALSE,
       main="Quasi-symmetry model")

The cells with the largest lack of symmetry (using standardized residuals) are those for the upper and lower non-manual occupations, where the son of an upper manual worker is less likely to move to lower non-manual work than the reverse.

Topological models

It is also possible that there are more subtle patterns of association one might want to model, with specific parameters for particular combinations of the occupational categories (beyond the idea of symmetry). Hauser (1980) developed this idea in what are now called topological models or levels models, where an arbitrary pattern of associations can be specified, implemented in gnm::Topo().

# Levels for Hauser 5-level model
levels <- matrix(c(
      2,  4,  5,  5,  5,
      3,  4,  5,  5,  5,
      5,  5,  5,  5,  5,
      5,  5,  5,  4,  4,
      5,  5,  5,  4,  1), 
      nrow = 5, ncol = 5, 
      byrow=TRUE)
hauser.topo <- update(hauser.indep, 
                      ~ . + Topo(Father, Son, spec=levels))

mosaic(hauser.topo, ~Father+Son, 
       labeling_args = labels,
       labeling = labeling_residuals,
       residuals_type ="rstandard",
       legend = FALSE,
       main="Topological model")

Comparing models, we can see that the model of quasi-symmetry is the best so far, using AIC as the measure:

LRstats(hauser.indep, hauser.quasi, hauser.qsymm, hauser.topo, sortby = "AIC")
## Likelihood summary table:
##                 AIC    BIC LR Chisq Df Pr(>Chisq)    
## hauser.indep 6390.8 6401.8   6170.1 16  < 2.2e-16 ***
## hauser.quasi  914.1  931.1    683.3 11  < 2.2e-16 ***
## hauser.topo   295.3  311.1     66.6 12  1.397e-09 ***
## hauser.qsymm  268.2  291.3     27.4  6  0.0001193 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Ordinal tables

Because the factors in mobility tables are ordered, another path to simplifying the saturated model is to consider assigning numerical scores (typically consecutive integers) to the categories.

When both variables are assigned scores, this gives the linear-by-linear model, \[ \log ( m_{ij} ) = \mu + \lambda_i^A + \lambda_j^B + \gamma \: a_i b_j \quad , \] where \(a_i\) and \(b_j\) are the row and column numeric scores. This model is also called the model of uniform association (Goodman, 1979) because, for integer scores, \(a_i=i\), \(b_j=j\), this model has only one extra parameter, \(\gamma\), which is the common odds local ratio. The independence model is the special case, \(\gamma=0\). In contrast, the saturated model, allowing general association \(\lambda_{ij}^{AB}\), uses \((I-1)(J-1)\) additional parameters.

For square tables, like mobility tables, this model can be amended to include a diagonal term, Diag()

Sscore <- as.numeric(Hauser79$Son)
Fscore <- as.numeric(Hauser79$Father)

Hauser79 |> cbind(Fscore, Fscore) |> head()
##    Son Father Freq Fscore Fscore
## 1 UpNM   UpNM 1414      1      1
## 2 LoNM   UpNM  521      1      1
## 3  UpM   UpNM  302      1      1
## 4  LoM   UpNM  643      1      1
## 5 Farm   UpNM   40      1      1
## 6 UpNM   LoNM  724      2      2

To fit this model, I use Fscore * Sscore for the linear x linear association and add Diag(Father, Son) to fit the diagonal cells exactly.

hauser.UAdiag <- update(hauser.indep,
                        . ~ . + Fscore : Sscore + Diag(Father, Son))

LRstats(hauser.UAdiag)
## Likelihood summary table:
##                  AIC BIC LR Chisq Df Pr(>Chisq)    
## hauser.UAdiag 305.72 324   73.007 10  1.161e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

In this model, the estimated common local log odds ratio—the coefficient \(\gamma\) for the linear-by-linear term Fscore:Sscore, is given by:

coef(hauser.UAdiag)[["Fscore:Sscore"]]
## [1] 0.1584003

For comparisons not involving the diagonal cells, each step down the scale of occupational categories for the father multiplies the odds that the son will also be in one lower category by \(\exp (0.158) = 1.172\), an increase of 17%.

But this model does not seem to be any improvement over quasi-symmetry. From the pattern of residuals in the mosaic, we see a number of large residuals of various signs in the lower triangular, where the son’s occupation is of a higher level than that of the father.

mosaic(hauser.UAdiag, ~ Father+Son, 
       labeling_args = labels,
       labeling = labeling_residuals,
       residuals_type ="rstandard",
       legend = FALSE,
       main="Uniform association + Diag()")

Model comparison plots

Finally, for comparing a largish collection of models, a model comparison plot can show the trade-off between goodness-of-fit and parsimony by plotting measures like \(G^2/df\), AIC, or BIC against degrees of freedom. The plot below, including quite a few more models, uses a log scale for BIC to emphasize differences among better fitting models. (The code for this plot is shown on p. 399 of Friendly & Meyer (2016)).

References

Friendly, M., & Meyer, D. (2016). Discrete data analysis with R: Visualization and modeling techniques for categorical and count data. Boca Raton, FL: Chapman & Hall/CRC.
Goodman, L. A. (1979). Simple models for the analysis of association in cross-classifications having ordered categories. Journal of the American Statistical Association, 74, 537–552.
Hauser, R. M. (1980). Some exploratory methods for modeling mobility tables and other cross-classified data. In K. F. Schuessler (Ed.), Sociological methodology 1980 (pp. 413–458). San Francisco: Jossey-Bass.
Powers, D. A., & Xie, Y. (2008). Statistical methods for categorical data analysis (Second). Bingley, UK: Emerald.
vcdExtra/inst/doc/mobility.Rmd0000644000176200001440000003507114422306403016054 0ustar liggesusers--- title: "Mobility tables" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib", "vignettes.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Mobility tables} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/mobility-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # colorize text colorize <- function(x, color) { if (knitr::is_latex_output()) { sprintf("\\textcolor{%s}{%s}", color, x) } else if (knitr::is_html_output()) { sprintf("%s", color, x) } else x } ``` ## Social mobility Social mobility is an important concept in sociology, and its' study has led to a wide range of developments in categorical data analysis in what are often called _mobility tables_. The idea is to study the movement of individuals, families, households or other categories of people within or between social strata in a society, across time or space. This refers to a change in social status relative to one's current social location within a given society. Using survey data, the most frequent examples relate to changes in income or wealth, but most often this is studied via classification in occupational categories ("professional, "managerial", "skilled manual", ...). Most often this is studied _intergenerationaly_ using the occupational categories of fathers and sons. Mobility tables are nearly always _square_ tables, with the same categories for the row and column variables. As such, they nearly always exhibit positive associations along the diagonal cells. What is of interest are specialized models, intermediate between the null model of independence and the saturated model. ### Models These models include important special cases: - **quasi-independence**: Ignoring diagonal cells, are the row and column variables independent? - **symmetry**: Are associations above the diagonal equal to the corresponding ones below the diagonal? - **row effects, col effects, linear x linear**: Typically, the factors in such tables are ordinal. To what extent can the models be simplified by assigning integer scores to the row, column categories or both? - **multiplicative RC**: RC models attempt to estimate the scores for the row and column categories. - **topographical models**: It is possible that the associations among occupational categories exhibit consistent patterns according to their nature. These models allow specifying a theoretically interesting pattern. - **crossings models**: assert that there are different difficulty parameters for crossing from category to the next and associations between categories decrease with their separation. While standard loglinear models can be fit using `MASS::loglm`, these models require use of ``stats::glm()` or `gnm::gnm()`, as I illustrate below. ## Hauser data This vignette uses the `vcdExtra::Hauser79` dataset, a cross-classification of 19,912 individuals by father's occupation and son's first occupation for U.S. men aged 20-64 in 1973. The data comes from @Hauser:79 and has been also analysed by @PowersXie:2008. The discussion draws on @FriendlyMeyer:2016:DDAR, Ch. 10. ```{r hauser-data} data("Hauser79", package="vcdExtra") str(Hauser79) (Hauser_tab <- xtabs(Freq ~ Father + Son, data=Hauser79)) ``` As can be seen, `Hauser79` is a data.frame in frequency form. The factor levels in this table are a coarse grouping of occupational categories, so: - `UpNM` = professional and kindred workers, managers and officials, and non-retail sales workers; - `LoNM` = proprietors, clerical and kindred workers, and retail sales workers; - `UpM` = craftsmen, foremen, and kindred workers; - `LoM` = service workers, operatives and kindred workers, and laborers (except farm); - `Farm` = farmers and farm managers, farm laborers, and foremen. ### Load packages ```{r load} library(vcdExtra) library(gnm) library(dplyr) ``` ### Mosaic plots `Hauser_tab` is a `table` object, and the simplest plot for the frequencies is the default `plot()` method, giving a `graphics::mosaicplot()`. ```{r mosaicplot} plot(Hauser_tab, shade=TRUE) ``` The frequencies are first split according to father's occupational category (the first table dimension) and then by sons' occupation. The most common category for fathers is lower manual, followed by farm. `mosaicplot()`, using `shade=TRUE` colors the tiles according to the sign and magnitude of the residuals from an independence model: shades of `r colorize("positive", "blue")` for positive residuals and `r colorize("negative", "red")` red for negative residuals. `vcd::mosaic()` gives a similar display, but is much more flexible in the labeling of the row and column variable, labels for the categories, and the scheme used for shading the tiles. Here, I simply assign longer labels for the row and column variables, using the `labeling_args` argument to `mosaic()`. ```{r mosaic1} labels <- list(set_varnames = c(Father="Father's occupation", Son="Son's occupation")) mosaic(Freq ~ Father + Son, data=Hauser79, labeling_args = labels, shade=TRUE, legend = FALSE) ``` ### Fitting and graphing models The call to `vcd::mosaic()` above takes the `Hauser79` dataset as input. Internally, it fits the model of independence and displays the result, but for more complex tables, control of the fitted model is limited. Unlike `mosaicplot()` and even the [`ggmosaic`]( https://CRAN.R-project.org/package=ggmosaic) package, `vcdExtra::mosaic.glm()` is a `mosaic` **method** for `glm` objects. This means you can fit any model, and supply the model object to `mosaic()`. (Note that in `mosaic()`, the `formula` argument determines the order of splitting in the mosaic, not a loglinear formula.) ```{r indep} hauser.indep <- glm(Freq ~ Father + Son, data=Hauser79, family=poisson) # the same mosaic, using the fitted model mosaic(hauser.indep, formula = ~ Father + Son, labeling_args = labels, legend = FALSE, main="Independence model") ``` ## Quasi-independence Among the most important advances from the social mobility literature is the idea that associations between row and column variables in square tables can be explored in greater depth if we ignore the obvious association in the diagonal cells. The result is a model of _quasi-independence_, asserting that fathers' and sons' occupations are independent, ignoring the diagonal cells. For a two-way table, quasi-independence can be expressed as $$ \pi_{ij} = \pi_{i+} \pi_{+j} \quad\quad \mbox{for } i\ne j $$ or in loglinear form as: $$ \log m_{ij} = \mu + \lambda_i^A + \lambda_j^B + \delta_i I(i=j) \quad . $$ This model effectively adds one parameter, $\delta_i$, for each main diagonal cell and fits those frequencies perfectly. In the [`gnm`]( https://CRAN.R-project.org/package=gnm) package, `gnm::Diag()` creates the appropriate term in the model formula, using a symbol in the diagonal cells and "." otherwise. ```{r Diag} # with symbols with(Hauser79, Diag(Father, Son)) |> matrix(nrow=5) ``` We proceed to fit and plot the quasi-independence model by updating the independence model, adding the term `Diag(Father, Son)`. ```{r quasi} hauser.quasi <- update(hauser.indep, ~ . + Diag(Father, Son)) mosaic(hauser.quasi, ~ Father+Son, labeling_args = labels, legend = FALSE, main="Quasi-independence model") ``` Note that the pattern of residuals shows a systematic pattern of `r colorize("positive", "blue")` and `r colorize("negative", "red")` residuals above and below the diagonal tiles. We turn to this next. ### Symmetry and quasi-symmetry Another advance from the social mobility literature was the idea of how to test for _differences_ in occupational categories between fathers and sons. The null hypothesis of no systematic differences can be formulated as a test of **symmetry** in the table, $$ \pi_{ij} = \pi_{ji} \quad\quad \mbox{for } i\ne j \quad , $$ which asserts that sons are as likely to move from their father's occupation $i$ to another category $j$ as they were to move in the reverse direction, $j$ to $i$. An alternative, "Upward mobility", i.e., that sons who did not stay in their father's occupational category moved to a higher category on average would mean that $$ \pi_{ij} < \pi_{ji} \quad\quad \mbox{for } i\ne j $$ Yet this model is overly strong, because it also asserts **marginal homogeneity**, that the marginal probabilities of row and column values are equal, $\pi_{i+} = \pi_{+i}$ for all $i$. Consequently, this hypothesis is most often tested as a model for **quasi-symmetry**, that also ignores the diagonal cells. Symmetry is modeled by the function `gnm::Symm()`. It returns a factor with the same labels for positions above and below the diagonal. ```{r symm} with(Hauser79, Symm(Father, Son)) |> matrix(nrow=5) ``` To fit the model of quasi-symmetry, add both `Diag()` and `Symm()` to the model of independence. ```{r qsymm} hauser.qsymm <- update(hauser.indep, ~ . + Diag(Father,Son) + Symm(Father,Son)) ``` To compare the models so far, we can use `anova()` or `vcdExtra::LRstats(): ```{r anova1} anova(hauser.indep, hauser.quasi, hauser.qsymm, test="Chisq") LRstats(hauser.indep, hauser.quasi, hauser.qsymm) ``` This `hauser.qsymm` model represents a huge improvement in goodness of fit. With such a large sample size, it might be considered an acceptable fit. But, this model of quasi-symmetry still shows some residual lack of fit. To visualize this in the mosaic, we can label the cells with their standardized residuals. ```{r qsymm-mosaic} mosaic(hauser.qsymm, ~ Father+Son, labeling_args = labels, labeling = labeling_residuals, residuals_type ="rstandard", legend = FALSE, main="Quasi-symmetry model") ``` The cells with the largest lack of symmetry (using standardized residuals) are those for the upper and lower non-manual occupations, where the son of an upper manual worker is less likely to move to lower non-manual work than the reverse. ### Topological models It is also possible that there are more subtle patterns of association one might want to model, with specific parameters for particular combinations of the occupational categories (beyond the idea of symmetry). @Hauser:79 developed this idea in what are now called **topological** models or **levels** models, where an arbitrary pattern of associations can be specified, implemented in `gnm::Topo()`. ```{r topo-levels} # Levels for Hauser 5-level model levels <- matrix(c( 2, 4, 5, 5, 5, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 5, 5, 5, 4, 1), nrow = 5, ncol = 5, byrow=TRUE) ``` ```{r topo-mosaic} hauser.topo <- update(hauser.indep, ~ . + Topo(Father, Son, spec=levels)) mosaic(hauser.topo, ~Father+Son, labeling_args = labels, labeling = labeling_residuals, residuals_type ="rstandard", legend = FALSE, main="Topological model") ``` Comparing models, we can see that the model of quasi-symmetry is the best so far, using AIC as the measure: ```{r} LRstats(hauser.indep, hauser.quasi, hauser.qsymm, hauser.topo, sortby = "AIC") ``` ## Ordinal tables Because the factors in mobility tables are ordered, another path to simplifying the saturated model is to consider assigning numerical scores (typically consecutive integers) to the categories. When both variables are assigned scores, this gives the **linear-by-linear model**, $$ \log ( m_{ij} ) = \mu + \lambda_i^A + \lambda_j^B + \gamma \: a_i b_j \quad , $$ where $a_i$ and $b_j$ are the row and column numeric scores. This model is also called the model of **uniform association** [@Goodman:79] because, for integer scores, $a_i=i$, $b_j=j$, this model has only one extra parameter, $\gamma$, which is the common odds local ratio. The independence model is the special case, $\gamma=0$. In contrast, the saturated model, allowing general association $\lambda_{ij}^{AB}$, uses $(I-1)(J-1)$ additional parameters. For square tables, like mobility tables, this model can be amended to include a diagonal term, `Diag()` ```{r scores} Sscore <- as.numeric(Hauser79$Son) Fscore <- as.numeric(Hauser79$Father) Hauser79 |> cbind(Fscore, Fscore) |> head() ``` To fit this model, I use `Fscore * Sscore` for the linear x linear association and add `Diag(Father, Son)` to fit the diagonal cells exactly. ```{r hauser-UAdiag} hauser.UAdiag <- update(hauser.indep, . ~ . + Fscore : Sscore + Diag(Father, Son)) LRstats(hauser.UAdiag) ``` In this model, the estimated common local log odds ratio---the coefficient $\gamma$ for the linear-by-linear term `Fscore:Sscore`, is given by: ```{r} coef(hauser.UAdiag)[["Fscore:Sscore"]] ``` For comparisons not involving the diagonal cells, each step down the scale of occupational categories for the father multiplies the odds that the son will also be in one lower category by $\exp (0.158) = 1.172$, an increase of 17%. But this model does not seem to be any improvement over quasi-symmetry. From the pattern of residuals in the mosaic, we see a number of large residuals of various signs in the lower triangular, where the son's occupation is of a higher level than that of the father. ```{r UAdiag-mosaic} mosaic(hauser.UAdiag, ~ Father+Son, labeling_args = labels, labeling = labeling_residuals, residuals_type ="rstandard", legend = FALSE, main="Uniform association + Diag()") ``` ## Model comparison plots Finally, for comparing a largish collection of models, a model comparison plot can show the trade-off between goodness-of-fit and parsimony by plotting measures like $G^2/df$, AIC, or BIC against degrees of freedom. The plot below, including quite a few more models, uses a log scale for BIC to emphasize differences among better fitting models. (The code for this plot is shown on p. 399 of @FriendlyMeyer:2016:DDAR). ![](fig/hauser-model-plot.png){width=80%} ## References vcdExtra/inst/doc/demo-housing.R0000644000176200001440000000730514470742312016306 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/demo-housing-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # colorize text colorize <- function(x, color) { if (knitr::is_latex_output()) { sprintf("\\textcolor{%s}{%s}", color, x) } else if (knitr::is_html_output()) { sprintf("%s", color, x) } else x } ## ----------------------------------------------------------------------------- library(vcdExtra) library(MASS) library(effects) ## ----housing------------------------------------------------------------------ data(housing, package="MASS") str(housing) ## ----------------------------------------------------------------------------- levels(housing$Sat) levels(housing$Infl) ## ----house.null--------------------------------------------------------------- house.null <- glm(Freq ~ Sat + Infl + Type + Cont, family = poisson, data = housing) ## ----house.glm0--------------------------------------------------------------- house.glm0 <- glm(Freq ~ Sat + Infl*Type*Cont, family = poisson, data = housing) ## ----anova-------------------------------------------------------------------- anova(house.null, house.glm0, test = "Chisq") ## ----------------------------------------------------------------------------- # labeling_args for mosaic() largs <- list(set_varnames = c( Infl="Influence on management", Cont="Contact among residents", Type="Type of dwelling", Sat="Satisfaction"), abbreviate=c(Type=3)) mosaic(house.glm0, labeling_args=largs, main='Baseline model: [ITC][Sat]') ## ----mosaic-glm0b------------------------------------------------------------- mosaic(house.glm0, formula = ~ Type + Infl + Cont + Sat, labeling_args=largs, main=paste('Baseline model: [ITC][Sat],', modFit(house.glm0)) ) ## ----addterm------------------------------------------------------------------ MASS::addterm(house.glm0, ~ . + Sat:(Infl + Type + Cont), test = "Chisq") ## ----house-glm1--------------------------------------------------------------- house.glm1 <- update(house.glm0, . ~ . + Sat*(Infl + Type + Cont)) ## ----house-loglm1------------------------------------------------------------- (house.loglm1 <- MASS::loglm(Freq ~ Infl * Type * Cont + Sat*(Infl + Type + Cont), data = housing)) ## ----------------------------------------------------------------------------- anova(house.glm0, house.glm1, test="Chisq") ## ----mosaic-glm1-------------------------------------------------------------- mosaic(house.glm1, labeling_args=largs, main=paste('Model [IS][TS][CS],', modFit(house.glm1) ), gp=shading_Friendly) ## ----dropterm----------------------------------------------------------------- MASS::dropterm(house.glm1, test = "Chisq") ## ----addterm1----------------------------------------------------------------- MASS::addterm(house.glm1, ~. + Sat:(Infl + Type + Cont)^2, test = "Chisq") ## ----------------------------------------------------------------------------- house.glm2 <- update(house.glm1, . ~ . + Sat:Infl:Type) ## ----lrstats------------------------------------------------------------------ LRstats(house.glm0, house.glm1, house.glm2) vcdExtra/inst/doc/datasets.Rmd0000644000176200001440000001024414422306403016027 0ustar liggesusers--- title: "Datasets for categorical data analysis" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Datasets for categorical data analysis} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/datasets-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") ``` The `vcdExtra` package contains `r nrow(vcdExtra::datasets("vcdExtra"))` datasets, taken from the literature on categorical data analysis, and selected to illustrate various methods of analysis and data display. These are in addition to the `r nrow(vcdExtra::datasets("vcd"))` datasets in the [vcd package](https://cran.r-project.org/package=vcd). To make it easier to find those which illustrate a particular method, the datasets in `vcdExtra` have been classified using method tags. This vignette creates an "inverse table", listing the datasets that apply to each method. It also illustrates a general method for classifying datasets in R packages. ```{r load} library(dplyr) library(tidyr) library(readxl) ``` ## Processing tags Using the result of `vcdExtra::datasets(package="vcdExtra")` I created a spreadsheet, `vcdExtra-datasets.xlsx`, and then added method tags. ```{r read-datasets} dsets_tagged <- read_excel(here::here("inst", "extdata", "vcdExtra-datasets.xlsx"), sheet="vcdExtra-datasets") dsets_tagged <- dsets_tagged |> dplyr::select(-Title, -dim) |> dplyr::rename(dataset = Item) head(dsets_tagged) ``` To invert the table, need to split tags into separate observations, then collapse the rows for the same tag. ```{r split-tags} dset_split <- dsets_tagged |> tidyr::separate_longer_delim(tags, delim = ";") |> dplyr::mutate(tag = stringr::str_trim(tags)) |> dplyr::select(-tags) #' ## collapse the rows for the same tag tag_dset <- dset_split |> arrange(tag) |> dplyr::group_by(tag) |> dplyr::summarise(datasets = paste(dataset, collapse = "; ")) |> ungroup() # get a list of the unique tags unique(tag_dset$tag) ``` ## Make this into a nice table Another sheet in the spreadsheet gives a more descriptive `topic` for corresponding to each tag. ```{r read-tags} tags <- read_excel(here::here("inst", "extdata", "vcdExtra-datasets.xlsx"), sheet="tags") head(tags) ``` Now, join this with the `tag_dset` created above. ```{r join-tags} tag_dset <- tag_dset |> dplyr::left_join(tags, by = "tag") |> dplyr::relocate(topic, .after = tag) tag_dset |> dplyr::select(-tag) |> head() ``` ### Add links to `help()` We're almost there. It would be nice if the dataset names could be linked to their documentation. This function is designed to work with the `pkgdown` site. There are different ways this can be done, but what seems to work is a link to `../reference/{dataset}.html` Unfortunately, this won't work in the actual vignette. ```{r add-links} add_links <- function(dsets, style = c("reference", "help", "rdrr.io"), sep = "; ") { style <- match.arg(style) names <- stringr::str_split_1(dsets, sep) names <- dplyr::case_when( style == "help" ~ glue::glue("[{names}](help({names}))"), style == "reference" ~ glue::glue("[{names}](../reference/{names}.html)"), style == "rdrr.io" ~ glue::glue("[{names}](https://rdrr.io/cran/vcdExtra/man/{names}.html)") ) glue::glue_collapse(names, sep = sep) } ``` ## Make the table {#table} Use `purrr::map()` to apply `add_links()` to all the datasets for each tag. (`mutate(datasets = add_links(datasets))` by itself doesn't work.) ```{r kable} tag_dset |> dplyr::select(-tag) |> dplyr::mutate(datasets = purrr::map(datasets, add_links)) |> knitr::kable() ``` Voila! vcdExtra/inst/doc/mobility.R0000644000176200001440000001125714470742314015543 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/mobility-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # colorize text colorize <- function(x, color) { if (knitr::is_latex_output()) { sprintf("\\textcolor{%s}{%s}", color, x) } else if (knitr::is_html_output()) { sprintf("%s", color, x) } else x } ## ----hauser-data-------------------------------------------------------------- data("Hauser79", package="vcdExtra") str(Hauser79) (Hauser_tab <- xtabs(Freq ~ Father + Son, data=Hauser79)) ## ----load--------------------------------------------------------------------- library(vcdExtra) library(gnm) library(dplyr) ## ----mosaicplot--------------------------------------------------------------- plot(Hauser_tab, shade=TRUE) ## ----mosaic1------------------------------------------------------------------ labels <- list(set_varnames = c(Father="Father's occupation", Son="Son's occupation")) mosaic(Freq ~ Father + Son, data=Hauser79, labeling_args = labels, shade=TRUE, legend = FALSE) ## ----indep-------------------------------------------------------------------- hauser.indep <- glm(Freq ~ Father + Son, data=Hauser79, family=poisson) # the same mosaic, using the fitted model mosaic(hauser.indep, formula = ~ Father + Son, labeling_args = labels, legend = FALSE, main="Independence model") ## ----Diag--------------------------------------------------------------------- # with symbols with(Hauser79, Diag(Father, Son)) |> matrix(nrow=5) ## ----quasi-------------------------------------------------------------------- hauser.quasi <- update(hauser.indep, ~ . + Diag(Father, Son)) mosaic(hauser.quasi, ~ Father+Son, labeling_args = labels, legend = FALSE, main="Quasi-independence model") ## ----symm--------------------------------------------------------------------- with(Hauser79, Symm(Father, Son)) |> matrix(nrow=5) ## ----qsymm-------------------------------------------------------------------- hauser.qsymm <- update(hauser.indep, ~ . + Diag(Father,Son) + Symm(Father,Son)) ## ----anova1------------------------------------------------------------------- anova(hauser.indep, hauser.quasi, hauser.qsymm, test="Chisq") LRstats(hauser.indep, hauser.quasi, hauser.qsymm) ## ----qsymm-mosaic------------------------------------------------------------- mosaic(hauser.qsymm, ~ Father+Son, labeling_args = labels, labeling = labeling_residuals, residuals_type ="rstandard", legend = FALSE, main="Quasi-symmetry model") ## ----topo-levels-------------------------------------------------------------- # Levels for Hauser 5-level model levels <- matrix(c( 2, 4, 5, 5, 5, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 5, 5, 5, 4, 1), nrow = 5, ncol = 5, byrow=TRUE) ## ----topo-mosaic-------------------------------------------------------------- hauser.topo <- update(hauser.indep, ~ . + Topo(Father, Son, spec=levels)) mosaic(hauser.topo, ~Father+Son, labeling_args = labels, labeling = labeling_residuals, residuals_type ="rstandard", legend = FALSE, main="Topological model") ## ----------------------------------------------------------------------------- LRstats(hauser.indep, hauser.quasi, hauser.qsymm, hauser.topo, sortby = "AIC") ## ----scores------------------------------------------------------------------- Sscore <- as.numeric(Hauser79$Son) Fscore <- as.numeric(Hauser79$Father) Hauser79 |> cbind(Fscore, Fscore) |> head() ## ----hauser-UAdiag------------------------------------------------------------ hauser.UAdiag <- update(hauser.indep, . ~ . + Fscore : Sscore + Diag(Father, Son)) LRstats(hauser.UAdiag) ## ----------------------------------------------------------------------------- coef(hauser.UAdiag)[["Fscore:Sscore"]] ## ----UAdiag-mosaic------------------------------------------------------------ mosaic(hauser.UAdiag, ~ Father+Son, labeling_args = labels, labeling = labeling_residuals, residuals_type ="rstandard", legend = FALSE, main="Uniform association + Diag()") vcdExtra/inst/doc/continuous.R0000644000176200001440000000611114470742307016114 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut05-", fig.align = "center", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Old Sweave options # \SweaveOpts{engine=R,eps=TRUE,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{engine=R,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{prefix.string=fig/vcd-tut,eps=FALSE} # \SweaveOpts{keep.source=TRUE} # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ## ---- spine1------------------------------------------------------------------ (spine(Improved ~ Age, data = Arthritis, breaks = 3)) (spine(Improved ~ Age, data = Arthritis, breaks = "Scott")) ## ----------------------------------------------------------------------------- cdplot(Improved ~ Age, data = Arthritis) ## ----------------------------------------------------------------------------- cdplot(Improved ~ Age, data = Arthritis) with(Arthritis, rug(jitter(Age), col="white", quiet=TRUE)) ## ---- donner1----------------------------------------------------------------- data(Donner, package="vcdExtra") str(Donner) ## ---- donner2a, fig=FALSE, eval=FALSE----------------------------------------- # # separate linear fits on age for M/F # ggplot(Donner, aes(age, survived, color = sex)) + # geom_point(position = position_jitter(height = 0.02, width = 0)) + # stat_smooth(method = "glm", # method.args = list(family = binomial), # formula = y ~ x, # alpha = 0.2, size=2, aes(fill = sex)) ## ---- donner2b, fig=FALSE, eval=FALSE----------------------------------------- # # separate quadratics # ggplot(Donner, aes(age, survived, color = sex)) + # geom_point(position = position_jitter(height = 0.02, width = 0)) + # stat_smooth(method = "glm", # method.args = list(family = binomial), # formula = y ~ poly(x,2), # alpha = 0.2, size=2, aes(fill = sex)) ## ----------------------------------------------------------------------------- # separate linear fits on age for M/F ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ x, alpha = 0.2, size=2, aes(fill = sex)) # separate quadratics ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ poly(x,2), alpha = 0.2, size=2, aes(fill = sex)) vcdExtra/inst/doc/tests.html0000644000176200001440000033160314470742317015623 0ustar liggesusers Tests of Independence

Tests of Independence

Michael Friendly

2023-08-21

OK, now we’re ready to do some analyses. This vignette focuses on relatively simple non-parametric tests and measures of association.

CrossTable

For tabular displays, the CrossTable() function in the gmodels package produces cross-tabulations modeled after PROC FREQ in SAS or CROSSTABS in SPSS. It has a wealth of options for the quantities that can be shown in each cell.

Recall the GSS data used earlier.

# Agresti (2002), table 3.11, p. 106
GSS <- data.frame(
  expand.grid(sex = c("female", "male"), 
              party = c("dem", "indep", "rep")),
  count = c(279,165,73,47,225,191))

(GSStab <- xtabs(count ~ sex + party, data=GSS))
##         party
## sex      dem indep rep
##   female 279    73 225
##   male   165    47 191

Generate a cross-table showing cell frequency and the cell contribution to \(\chi^2\).

# 2-Way Cross Tabulation
library(gmodels)
CrossTable(GSStab, prop.t=FALSE, prop.r=FALSE, prop.c=FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |-------------------------|
## 
##  
## Total Observations in Table:  980 
## 
##  
##              | party 
##          sex |       dem |     indep |       rep | Row Total | 
## -------------|-----------|-----------|-----------|-----------|
##       female |       279 |        73 |       225 |       577 | 
##              |     1.183 |     0.078 |     1.622 |           | 
## -------------|-----------|-----------|-----------|-----------|
##         male |       165 |        47 |       191 |       403 | 
##              |     1.693 |     0.112 |     2.322 |           | 
## -------------|-----------|-----------|-----------|-----------|
## Column Total |       444 |       120 |       416 |       980 | 
## -------------|-----------|-----------|-----------|-----------|
## 
## 

There are options to report percentages (row, column, cell), specify decimal places, produce Chi-square, Fisher, and McNemar tests of independence, report expected and residual values (pearson, standardized, adjusted standardized), include missing values as valid, annotate with row and column titles, and format as SAS or SPSS style output! See help(CrossTable) for details.

Chi-square test

For 2-way tables you can use chisq.test() to test independence of the row and column variable. By default, the \(p\)-value is calculated from the asymptotic chi-squared distribution of the test statistic. Optionally, the \(p\)-value can be derived via Monte Carlo simulation.

(HairEye <- margin.table(HairEyeColor, c(1, 2)))
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    68   20    15     5
##   Brown   119   84    54    29
##   Red      26   17    14    14
##   Blond     7   94    10    16

chisq.test(HairEye)
## 
##  Pearson's Chi-squared test
## 
## data:  HairEye
## X-squared = 138.29, df = 9, p-value < 2.2e-16

chisq.test(HairEye, simulate.p.value = TRUE)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  HairEye
## X-squared = 138.29, df = NA, p-value = 0.0004998

Fisher Exact Test

fisher.test(X) provides an exact test of independence. X must be a two-way contingency table in table form. Another form, fisher.test(X, Y) takes two categorical vectors of the same length.
For tables larger than \(2 \times 2\) the method can be computationally intensive (or can fail) if the frequencies are not small.

fisher.test(GSStab)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  GSStab
## p-value = 0.03115
## alternative hypothesis: two.sided

Fisher’s test is meant for tables with small total sample size. It generates an error for the HairEye data with \(n\)=592 total frequency.

fisher.test(HairEye)
## Error in fisher.test(HairEye): FEXACT error 6 (f5xact).  LDKEY=618 is too small for this problem: kval=238045028.
## Try increasing the size of the workspace.

Mantel-Haenszel test and conditional association

Use the mantelhaen.test(X) function to perform a Cochran-Mantel-Haenszel \(\chi^2\) chi test of the null hypothesis that two nominal variables are conditionally independent, \(A \perp B \; | \; C\), in each stratum, assuming that there is no three-way interaction. X is a 3 dimensional contingency table, where the last dimension refers to the strata.

The UCBAdmissions serves as an example of a \(2 \times 2 \times 6\) table, with Dept as the stratifying variable.

# UC Berkeley Student Admissions
mantelhaen.test(UCBAdmissions)
## 
##  Mantel-Haenszel chi-squared test with continuity correction
## 
## data:  UCBAdmissions
## Mantel-Haenszel X-squared = 1.4269, df = 1, p-value = 0.2323
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.7719074 1.0603298
## sample estimates:
## common odds ratio 
##         0.9046968

The results show no evidence for association between admission and gender when adjusted for department. However, we can easily see that the assumption of equal association across the strata (no 3-way association) is probably violated. For \(2 \times 2 \times k\) tables, this can be examined from the odds ratios for each \(2 \times 2\) table (oddsratio()), and tested by using woolf_test() in vcd.

oddsratio(UCBAdmissions, log=FALSE)
##  odds ratios for Admit and Gender by Dept 
## 
##         A         B         C         D         E         F 
## 0.3492120 0.8025007 1.1330596 0.9212838 1.2216312 0.8278727

lor <- oddsratio(UCBAdmissions)  # capture log odds ratios
summary(lor)
## 
## z test of coefficients:
## 
##    Estimate Std. Error z value  Pr(>|z|)    
## A -1.052076   0.262708 -4.0047 6.209e-05 ***
## B -0.220023   0.437593 -0.5028    0.6151    
## C  0.124922   0.143942  0.8679    0.3855    
## D -0.081987   0.150208 -0.5458    0.5852    
## E  0.200187   0.200243  0.9997    0.3174    
## F -0.188896   0.305164 -0.6190    0.5359    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

woolf_test(UCBAdmissions) 
## 
##  Woolf-test on Homogeneity of Odds Ratios (no 3-Way assoc.)
## 
## data:  UCBAdmissions
## X-squared = 17.902, df = 5, p-value = 0.003072

Some plot methods

Fourfold displays

We can visualize the odds ratios of Admission for each department with fourfold displays using fourfold(). The cell frequencies \(n_{ij}\) of each \(2 \times 2\) table are shown as a quarter circle whose radius is proportional to \(\sqrt{n_{ij}}\), so that its area is proportional to the cell frequency.

UCB <- aperm(UCBAdmissions, c(2, 1, 3))
dimnames(UCB)[[2]] <- c("Yes", "No")
names(dimnames(UCB)) <- c("Sex", "Admit?", "Department")

Confidence rings for the odds ratio allow a visual test of the null of no association; the rings for adjacent quadrants overlap iff the observed counts are consistent with the null hypothesis. In the extended version (the default), brighter colors are used where the odds ratio is significantly different from 1. The following lines produce (ref?)(fig:fourfold1).

col <- c("#99CCFF", "#6699CC", "#F9AFAF", "#6666A0", "#FF0000", "#000080")
fourfold(UCB, mfrow=c(2,3), color=col)
Fourfold display for the UCBAdmissions data. Where the odds ratio differs significantly from 1.0, the confidence bands do not overlap, and the circle quadrants are shaded more intensely.
Fourfold display for the UCBAdmissions data. Where the odds ratio differs significantly from 1.0, the confidence bands do not overlap, and the circle quadrants are shaded more intensely.

Another vcd function, cotabplot(), provides a more general approach to visualizing conditional associations in contingency tables, similar to trellis-like plots produced by coplot() and lattice graphics. The panel argument supplies a function used to render each conditional subtable. The following gives a display (not shown) similar to (ref?)(fig:fourfold1).

cotabplot(UCB, panel = cotab_fourfold)

Doubledecker plots

When we want to view the conditional probabilities of a response variable (e.g., Admit) in relation to several factors, an alternative visualization is a doubledecker() plot. This plot is a specialized version of a mosaic plot, which highlights the levels of a response variable (plotted vertically) in relation to the factors (shown horizontally). The following call produces (ref?)(fig:doubledecker), where we use indexing on the first factor (Admit) to make Admitted the highlighted level.

In this plot, the association between Admit and Gender is shown where the heights of the highlighted conditional probabilities do not align. The excess of females admitted in Dept A stands out here.

doubledecker(Admit ~ Dept + Gender, data=UCBAdmissions[2:1,,])
Doubledecker display for the `UCBAdmissions` data. The heights    of the highlighted bars show the conditional probabilities of `Admit`,    given `Dept` and `Gender`.

Doubledecker display for the UCBAdmissions data. The heights of the highlighted bars show the conditional probabilities of Admit, given Dept and Gender.

Odds ratio plots

Finally, the there is a plot() method for oddsratio objects. By default, it shows the 95% confidence interval for the log odds ratio. (ref?)(fig:oddsratio) is produced by:

plot(lor, 
     xlab="Department", 
     ylab="Log Odds Ratio (Admit | Gender)")
Log odds ratio plot for the `UCBAdmissions` data.

Log odds ratio plot for the UCBAdmissions data.

{#fig:oddsratio}

Cochran-Mantel-Haenszel tests for ordinal factors

The standard \(\chi^2\) tests for association in a two-way table treat both table factors as nominal (unordered) categories. When one or both factors of a two-way table are quantitative or ordinal, more powerful tests of association may be obtained by taking ordinality into account, using row and or column scores to test for linear trends or differences in row or column means.

More general versions of the CMH tests (Landis etal., 1978) (Landis, Heyman, and Koch 1978) are provided by assigning numeric scores to the row and/or column variables. For example, with two ordinal factors (assumed to be equally spaced), assigning integer scores, 1:R and 1:C tests the linear \(\times\) linear component of association. This is statistically equivalent to the Pearson correlation between the integer-scored table variables, with \(\chi^2 = (n-1) r^2\), with only 1 \(df\) rather than \((R-1)\times(C-1)\) for the test of general association.

When only one table variable is ordinal, these general CMH tests are analogous to an ANOVA, testing whether the row mean scores or column mean scores are equal, again consuming fewer \(df\) than the test of general association.

The CMHtest() function in vcdExtra calculates these various CMH tests for two possibly ordered factors, optionally stratified other factor(s).

Example:

Recall the \(4 \times 4\) table, JobSat introduced in @ref(sec:creating),

JobSat
##         satisfaction
## income   VeryD LittleD ModerateS VeryS
##   < 15k      1       3        10     6
##   15-25k     2       3        10     7
##   25-40k     1       6        14    12
##   > 40k      0       1         9    11

Treating the satisfaction levels as equally spaced, but using midpoints of the income categories as row scores gives the following results:

CMHtest(JobSat, rscores=c(7.5,20,32.5,60))
## Cochran-Mantel-Haenszel Statistics for income by satisfaction 
## 
##                  AltHypothesis  Chisq Df     Prob
## cor        Nonzero correlation 3.8075  1 0.051025
## rmeans  Row mean scores differ 4.4774  3 0.214318
## cmeans  Col mean scores differ 3.8404  3 0.279218
## general    General association 5.9034  9 0.749549

Note that with the relatively small cell frequencies, the test for general give no evidence for association. However, the the cor test for linear x linear association on 1 df is nearly significant. The coin package contains the functions cmh_test() and lbl_test() for CMH tests of general association and linear x linear association respectively.

Measures of Association

There are a variety of statistical measures of strength of association for contingency tables— similar in spirit to \(r\) or \(r^2\) for continuous variables. With a large sample size, even a small degree of association can show a significant \(\chi^2\), as in the example below for the GSS data.

The assocstats() function in vcd calculates the \(\phi\) contingency coefficient, and Cramer’s V for an \(r \times c\) table. The input must be in table form, a two-way \(r \times c\) table. It won’t work with GSS in frequency form, but by now you should know how to convert.

assocstats(GSStab)
##                     X^2 df P(> X^2)
## Likelihood Ratio 7.0026  2 0.030158
## Pearson          7.0095  2 0.030054
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.084 
## Cramer's V        : 0.085

For tables with ordinal variables, like JobSat, some people prefer the Goodman-Kruskal \(\gamma\) statistic (Agresti 2002, 2.4.3) based on a comparison of concordant and discordant pairs of observations in the case-form equivalent of a two-way table.

GKgamma(JobSat)
## gamma        : 0.221 
## std. error   : 0.117 
## CI           : -0.009 0.451

A web article by Richard Darlington, [http://node101.psych.cornell.edu/Darlington/crosstab/TABLE0.HTM] gives further description of these and other measures of association.

Measures of Agreement

The Kappa() function in the vcd package calculates Cohen’s \(\kappa\) and weighted \(\kappa\) for a square two-way table with the same row and column categories (Cohen 1960). Normal-theory \(z\)-tests are obtained by dividing \(\kappa\) by its asymptotic standard error (ASE). A confint() method for Kappa objects provides confidence intervals.

data(SexualFun, package = "vcd")
(K <- Kappa(SexualFun))
##             value     ASE     z Pr(>|z|)
## Unweighted 0.1293 0.06860 1.885 0.059387
## Weighted   0.2374 0.07832 3.031 0.002437
confint(K)
##             
## Kappa                 lwr       upr
##   Unweighted -0.005120399 0.2637809
##   Weighted    0.083883432 0.3908778

A visualization of agreement (Bangdiwala 1987), both unweighted and weighted for degree of departure from exact agreement is provided by the agreementplot() function. (fig?)(fig:agreesex) shows the agreementplot for the SexualFun data, produced as shown below.

The Bangdiwala measures (returned by the function) represent the proportion of the shaded areas of the diagonal rectangles, using weights \(w_1\) for exact agreement, and \(w_2\) for partial agreement one step from the main diagonal.

agree <- agreementplot(SexualFun, main="Is sex fun?")
Agreement plot for the `SexualFun` data.

Agreement plot for the SexualFun data.

unlist(agree)
##          Bangdiwala Bangdiwala_Weighted            weights1            weights2 
##           0.1464624           0.4981723           1.0000000           0.8888889

In other examples, the agreement plot can help to show sources of disagreement. For example, when the shaded boxes are above or below the diagonal (red) line, a lack of exact agreement can be attributed in part to different frequency of use of categories by the two raters– lack of marginal homogeneity.

Correspondence analysis

Correspondence analysis is a technique for visually exploring relationships between rows and columns in contingency tables. The ca package gives one implementation. For an \(r \times c\) table, the method provides a breakdown of the Pearson \(\chi^2\) for association in up to \(M = \min(r-1, c-1)\) dimensions, and finds scores for the row (\(x_{im}\)) and column (\(y_{jm}\)) categories such that the observations have the maximum possible correlations.% 1

Here, we carry out a simple correspondence analysis of the HairEye data. The printed results show that nearly 99% of the association between hair color and eye color can be accounted for in 2 dimensions, of which the first dimension accounts for 90%.

library(ca)
ca(HairEye)
## 
##  Principal inertias (eigenvalues):
##            1        2        3       
## Value      0.208773 0.022227 0.002598
## Percentage 89.37%   9.52%    1.11%   
## 
## 
##  Rows:
##             Black     Brown       Red    Blond
## Mass     0.182432  0.483108  0.119932 0.214527
## ChiDist  0.551192  0.159461  0.354770 0.838397
## Inertia  0.055425  0.012284  0.015095 0.150793
## Dim. 1  -1.104277 -0.324463 -0.283473 1.828229
## Dim. 2   1.440917 -0.219111 -2.144015 0.466706
## 
## 
##  Columns:
##             Brown     Blue     Hazel     Green
## Mass     0.371622 0.363176  0.157095  0.108108
## ChiDist  0.500487 0.553684  0.288654  0.385727
## Inertia  0.093086 0.111337  0.013089  0.016085
## Dim. 1  -1.077128 1.198061 -0.465286  0.354011
## Dim. 2   0.592420 0.556419 -1.122783 -2.274122

The resulting ca object can be plotted just by running the plot() method on the ca object, giving the result in @ref(fig:ca-haireye). plot.ca() does not allow labels for dimensions; these can be added with title(). It can be seen that most of the association is accounted for by the ordering of both hair color and eye color along Dimension 1, a dark to light dimension.

plot(ca(HairEye), main="Hair Color and Eye Color")
Correspondence analysis plot for the HairEye data
Correspondence analysis plot for the HairEye data

References

Agresti, Alan. 2002. Categorical Data Analysis. 2nd ed. Hoboken, New Jersey: John Wiley & Sons.
Bangdiwala, Shrikant I. 1987. “Using SAS Software Graphical Procedures for the Observer Agreement Chart.” Proceedings of the SAS User’s Group International Conference 12: 1083–88.
Cohen, J. 1960. “A Coefficient of Agreement for Nominal Scales.” Educational and Psychological Measurement 20: 37–46.
Landis, R. J., E. R. Heyman, and G. G. Koch. 1978. “Average Partial Association in Three-Way Contingency Tables: A Review and Discussion of Alternative Tests,” International Statistical Review 46: 237–54.

  1. Related methods are the non-parametric CMH tests using assumed row/column scores (@ref(sec:CMH), the analogous glm() model-based methods (@ref(sec:CMH), and the more general RC models which can be fit using gnm(). Correspondence analysis differs in that it is a primarily descriptive/exploratory method (no significance tests), but is directly tied to informative graphic displays of the row/column categories.↩︎

vcdExtra/inst/doc/tests.R0000644000176200001440000001055114470742317015054 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut02-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Old Sweave options # \SweaveOpts{engine=R,eps=TRUE,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{engine=R,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{prefix.string=fig/vcd-tut,eps=FALSE} # \SweaveOpts{keep.source=TRUE} # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ## ---- GSStab------------------------------------------------------------------ # Agresti (2002), table 3.11, p. 106 GSS <- data.frame( expand.grid(sex = c("female", "male"), party = c("dem", "indep", "rep")), count = c(279,165,73,47,225,191)) (GSStab <- xtabs(count ~ sex + party, data=GSS)) ## ---- xtabs-ex2--------------------------------------------------------------- # 2-Way Cross Tabulation library(gmodels) CrossTable(GSStab, prop.t=FALSE, prop.r=FALSE, prop.c=FALSE) ## ---- chisq------------------------------------------------------------------- (HairEye <- margin.table(HairEyeColor, c(1, 2))) chisq.test(HairEye) chisq.test(HairEye, simulate.p.value = TRUE) ## ----fisher------------------------------------------------------------------- fisher.test(GSStab) ## ----fisher-error, error=TRUE------------------------------------------------- fisher.test(HairEye) ## ---- mantel1----------------------------------------------------------------- # UC Berkeley Student Admissions mantelhaen.test(UCBAdmissions) ## ---- mantel2----------------------------------------------------------------- oddsratio(UCBAdmissions, log=FALSE) lor <- oddsratio(UCBAdmissions) # capture log odds ratios summary(lor) woolf_test(UCBAdmissions) ## ---- reorder3---------------------------------------------------------------- UCB <- aperm(UCBAdmissions, c(2, 1, 3)) dimnames(UCB)[[2]] <- c("Yes", "No") names(dimnames(UCB)) <- c("Sex", "Admit?", "Department") ## ----------------------------------------------------------------------------- col <- c("#99CCFF", "#6699CC", "#F9AFAF", "#6666A0", "#FF0000", "#000080") fourfold(UCB, mfrow=c(2,3), color=col) ## ----fourfold2, eval=FALSE---------------------------------------------------- # cotabplot(UCB, panel = cotab_fourfold) ## ----------------------------------------------------------------------------- doubledecker(Admit ~ Dept + Gender, data=UCBAdmissions[2:1,,]) ## ----------------------------------------------------------------------------- plot(lor, xlab="Department", ylab="Log Odds Ratio (Admit | Gender)") ## ---- table-form2, include=FALSE---------------------------------------------- ## A 4 x 4 table Agresti (2002, Table 2.8, p. 57) Job Satisfaction JobSat <- matrix(c(1,2,1,0, 3,3,6,1, 10,10,14,9, 6,7,12,11), 4, 4) dimnames(JobSat) = list(income=c("< 15k", "15-25k", "25-40k", "> 40k"), satisfaction=c("VeryD", "LittleD", "ModerateS", "VeryS")) JobSat <- as.table(JobSat) ## ---- jobsat------------------------------------------------------------------ JobSat ## ---- cmh1-------------------------------------------------------------------- CMHtest(JobSat, rscores=c(7.5,20,32.5,60)) ## ---- assoc1------------------------------------------------------------------ assocstats(GSStab) ## ---- gamma------------------------------------------------------------------- GKgamma(JobSat) ## ---- kappa------------------------------------------------------------------- data(SexualFun, package = "vcd") (K <- Kappa(SexualFun)) confint(K) ## ----------------------------------------------------------------------------- agree <- agreementplot(SexualFun, main="Is sex fun?") unlist(agree) ## ---- ca1--------------------------------------------------------------------- library(ca) ca(HairEye) ## ----ca-haireye, cap = "Correspondence analysis plot for the `HairEye` data"---- plot(ca(HairEye), main="Hair Color and Eye Color") vcdExtra/inst/doc/loglinear.R0000644000176200001440000000612414470742313015663 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut03-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ## ---- loglm-hec1-------------------------------------------------------------- library(MASS) ## Independence model of hair and eye color and sex. hec.1 <- loglm(~Hair+Eye+Sex, data=HairEyeColor) hec.1 ## ---- loglm-hec2-------------------------------------------------------------- ## Conditional independence hec.2 <- loglm(~(Hair + Eye) * Sex, data=HairEyeColor) hec.2 ## ---- loglm-hec3-------------------------------------------------------------- ## Joint independence model. hec.3 <- loglm(~Hair*Eye + Sex, data=HairEyeColor) hec.3 ## ---- loglm-anova------------------------------------------------------------- anova(hec.1, hec.2, hec.3) ## ---- mental1----------------------------------------------------------------- data(Mental, package = "vcdExtra") str(Mental) xtabs(Freq ~ mental + ses, data=Mental) # display the frequency table ## ---- mental2----------------------------------------------------------------- indep <- glm(Freq ~ mental + ses, family = poisson, data = Mental) # independence model ## ---- mental3----------------------------------------------------------------- # Use integer scores for rows/cols Cscore <- as.numeric(Mental$ses) Rscore <- as.numeric(Mental$mental) ## ---- mental4----------------------------------------------------------------- # column effects model (ses) coleff <- glm(Freq ~ mental + ses + Rscore:ses, family = poisson, data = Mental) # row effects model (mental) roweff <- glm(Freq ~ mental + ses + mental:Cscore, family = poisson, data = Mental) # linear x linear association linlin <- glm(Freq ~ mental + ses + Rscore:Cscore, family = poisson, data = Mental) ## ---- mental4a---------------------------------------------------------------- # compare models using AIC, BIC, etc vcdExtra::LRstats(glmlist(indep, roweff, coleff, linlin)) ## ---- mental5----------------------------------------------------------------- anova(indep, linlin, coleff, test="Chisq") anova(indep, linlin, roweff, test="Chisq") ## ---- mental6----------------------------------------------------------------- CMHtest(xtabs(Freq~ses+mental, data=Mental)) ## ---- mental7----------------------------------------------------------------- RC1 <- gnm(Freq ~ mental + ses + Mult(mental,ses), data=Mental, family=poisson, verbose=FALSE) RC2 <- gnm(Freq ~ mental+ses + instances(Mult(mental,ses),2), data=Mental, family=poisson, verbose=FALSE) anova(indep, RC1, RC2, test="Chisq") vcdExtra/inst/doc/continuous.Rmd0000644000176200001440000002011414470675442016441 0ustar liggesusers--- title: "Continuous predictors" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Continuous predictors} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut05-", fig.align = "center", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Old Sweave options # \SweaveOpts{engine=R,eps=TRUE,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{engine=R,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{prefix.string=fig/vcd-tut,eps=FALSE} # \SweaveOpts{keep.source=TRUE} # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ``` When continuous predictors are available---and potentially important---in explaining a categorical outcome, models for that outcome include: logistic regression (binary response), the proportional odds model (ordered polytomous response), multinomial (generalized) logistic regression. Many of these are special cases of the generalized linear model using the `"poisson"` or `"binomial"` family and their relatives. ## Spine and conditional density plots {#sec:spine} I don't go into fitting such models here, but I would be remiss not to illustrate some visualizations in `vcd` that are helpful here. The first of these is the spine plot or spinogram [@vcd:Hummel:1996], produced with `spine()`. These are special cases of mosaic plots with specific spacing and shading to show how a categorical response varies with a continuous or categorical predictor. They are also a generalization of stacked bar plots where not the heights but the *widths* of the bars corresponds to the relative frequencies of `x`. The heights of the bars then correspond to the conditional relative frequencies of `y` in every `x` group. ***Example***: For the `Arthritis` data, we can see how `Improved` varies with `Age` as follows. `spine()` takes a formula of the form `y ~ x` with a single dependent factor and a single explanatory variable `x` (a numeric variable or a factor). The range of a numeric variable`x` is divided into intervals based on the `breaks` argument, and stacked bars are drawn to show the distribution of `y` as `x` varies. As shown below, the discrete table that is visualized is returned by the function. ```{r, spine1} #| spine1, #| fig.height = 6, #| fig.width = 6, #| fig.show = "hold", #| out.width = "46%", #| fig.align = "center", #| cap = "Spine plots for the `Arthritis` data" (spine(Improved ~ Age, data = Arthritis, breaks = 3)) (spine(Improved ~ Age, data = Arthritis, breaks = "Scott")) ``` The conditional density plot [@vcd:Hofmann+Theus] is a further generalization. This visualization technique is similar to spinograms, but uses a smoothing approach rather than discretizing the explanatory variable. As well, it uses the original `x` axis and not a distorted one. ```{r} #| cdplot, #| fig.height = 5, #| fig.width = 5, #| cap = "Conditional density plot for the `Arthritis` data showing the variation of Improved with Age." cdplot(Improved ~ Age, data = Arthritis) ``` In such plots, it is useful to also see the distribution of the observations across the horizontal axis, e.g., with a `rug()` plot. \@ref{fig:cd-plot} uses `cdplot()` from the `graphics` package rather than `cd_plot()` from `vcd`, and is produced with ```{r} #| cdplot1, #| fig.height = 5, #| fig.width = 5, cdplot(Improved ~ Age, data = Arthritis) with(Arthritis, rug(jitter(Age), col="white", quiet=TRUE)) ``` From this figure it can be easily seen that the proportion of patients reporting Some or Marked improvement increases with Age, but there are some peculiar bumps in the distribution. These may be real or artifactual, but they would be hard to see with most other visualization methods. When we switch from non-parametric data exploration to parametric statistical models, such effects are easily missed. ## Model-based plots: effect plots and `ggplot2 plots` {#sec:modelplots} The nonparametric conditional density plot uses smoothing methods to convey the distributions of the response variable, but displays that are simpler to interpret can often be obtained by plotting the predicted response from a parametric model. For complex `glm()` models with interaction effects, the `effects` package provides the most useful displays, plotting the predicted values for a given term, averaging over other predictors not included in that term. I don't illustrate this here, but see @effects:1,@effects:2 and `help(package="effects")`. Here I just briefly illustrate the capabilities of the `ggplot2` package for model-smoothed plots of categorical responses in `glm()` models. ***Example***: The `Donner` data frame in `vcdExtra` gives details on the survival of 90 members of the Donner party, a group of people who attempted to migrate to California in 1846. They were trapped by an early blizzard on the eastern side of the Sierra Nevada mountains, and before they could be rescued, nearly half of the party had died. What factors affected who lived and who died? ```{r, donner1} data(Donner, package="vcdExtra") str(Donner) ``` A potential model of interest is the logistic regression model for $Pr(survived)$, allowing separate fits for males and females as a function of `age`. The key to this is the `stat_smooth()` function, using `method = "glm", method.args = list(family = binomial)`. The `formula = y ~ x` specifies a linear fit on the logit scale (\@ref{fig:donner3}, left) ```{r, donner2a, fig=FALSE, eval=FALSE} # separate linear fits on age for M/F ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ x, alpha = 0.2, size=2, aes(fill = sex)) ``` Alternatively, we can allow a quadratic relation with `age` by specifying `formula = y ~ poly(x,2)` (@ref(fig:donner3), right). ```{r, donner2b, fig=FALSE, eval=FALSE} # separate quadratics ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ poly(x,2), alpha = 0.2, size=2, aes(fill = sex)) ``` ```{r} #| donner3a, #| echo = FALSE, #| fig.height = 6, #| fig.width = 6, #| fig.show = "hold", #| out.width = "46%", #| cap = "Logistic regression plots for the `Donner` data showing survival vs. age, by sex. Left: linear logistic model; right: quadratic model {#fig:donner3}" # separate linear fits on age for M/F ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ x, alpha = 0.2, size=2, aes(fill = sex)) # separate quadratics ggplot(Donner, aes(age, survived, color = sex)) + geom_point(position = position_jitter(height = 0.02, width = 0)) + stat_smooth(method = "glm", method.args = list(family = binomial), formula = y ~ poly(x,2), alpha = 0.2, size=2, aes(fill = sex)) ``` These plots very nicely show (a) the fitted $Pr(survived)$ for males and females; (b) confidence bands around the smoothed model fits and (c) the individual observations by jittered points at 0 and 1 for those who died and survived, respectively. # References vcdExtra/inst/doc/mosaics.R0000644000176200001440000001363414470742316015354 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut04-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Load packages set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) library(seriation) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ## ----------------------------------------------------------------------------- data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) mosaic(art, gp = shading_max, split_vertical = TRUE, main="Arthritis: [Treatment] [Improved]") ## ---- art1-------------------------------------------------------------------- summary(art) ## ----------------------------------------------------------------------------- mosaic(art, gp = shading_Friendly, split_vertical = TRUE, main="Arthritis: gp = shading_Friendly") ## ----glass-------------------------------------------------------------------- data(Glass, package="vcdExtra") (glass.tab <- xtabs(Freq ~ father + son, data=Glass)) ## ----glass-mosaic1------------------------------------------------------------ largs <- list(set_varnames=list(father="Father's Occupation", son="Son's Occupation"), abbreviate=10) gargs <- list(interpolate=c(1,2,4,8)) mosaic(glass.tab, shade=TRUE, labeling_args=largs, gp_args=gargs, main="Alphabetic order", legend=FALSE, rot_labels=c(20,90,0,70)) ## ----glass-order-------------------------------------------------------------- # reorder by status ord <- c(2, 1, 4, 3, 5) row.names(glass.tab)[ord] ## ----glass-mosaic2------------------------------------------------------------ mosaic(glass.tab[ord, ord], shade=TRUE, labeling_args=largs, gp_args=gargs, main="Effect order", legend=FALSE, rot_labels=c(20,90,0,70)) ## ----glass-ord---------------------------------------------------------------- Glass.ord <- Glass Glass.ord$father <- ordered(Glass.ord$father, levels=levels(Glass$father)[ord]) Glass.ord$son <- ordered(Glass.ord$son, levels=levels(Glass$son)[ord]) str(Glass.ord) ## ----diag--------------------------------------------------------------------- rowfac <- gl(4, 4, 16) colfac <- gl(4, 1, 16) diag4by4 <- Diag(rowfac, colfac) matrix(Diag(rowfac, colfac, binary = FALSE), 4, 4) ## ----symm--------------------------------------------------------------------- symm4by4 <- Symm(rowfac, colfac) matrix(symm4by4, 4, 4) ## ----glass-models------------------------------------------------------------- library(gnm) glass.indep <- glm(Freq ~ father + son, data = Glass.ord, family=poisson) glass.quasi <- glm(Freq ~ father + son + Diag(father, son), data = Glass.ord, family=poisson) glass.symm <- glm(Freq ~ Symm(father, son), data = Glass.ord, family=poisson) glass.qsymm <- glm(Freq ~ father + son + Symm(father, son), data = Glass.ord, family=poisson) ## ----glass-quasi-------------------------------------------------------------- mosaic(glass.quasi, residuals_type="rstandard", shade=TRUE, labeling_args=largs, gp_args=gargs, main="Quasi-Independence", legend=FALSE, rot_labels=c(20,90,0,70) ) ## ----glass-anova-------------------------------------------------------------- # model comparisons: for *nested* models anova(glass.indep, glass.quasi, glass.qsymm, test="Chisq") ## ----glass-lrstats------------------------------------------------------------ models <- glmlist(glass.indep, glass.quasi, glass.symm, glass.qsymm) LRstats(models) ## ----glass-qsymm-------------------------------------------------------------- mosaic(glass.qsymm, residuals_type="rstandard", shade=TRUE, labeling_args=largs, gp_args=gargs, main = paste("Quasi-Symmetry", modFit(glass.qsymm)), legend=FALSE, rot_labels=c(20,90,0,70) ) ## ----housetasks--------------------------------------------------------------- data("HouseTasks", package = "vcdExtra") HouseTasks ## ----housetasks-mos1---------------------------------------------------------- require(vcd) mosaic(HouseTasks, shade = TRUE, labeling = labeling_border(rot_labels = c(45,0, 0, 0), offset_label =c(.5,5,0, 0), varnames = c(FALSE, TRUE), just_labels=c("center","right"), tl_varnames = FALSE), legend = FALSE) ## ----housetasks-ca------------------------------------------------------------ require(ca) HT.ca <- ca(HouseTasks) summary(HT.ca, rows=FALSE, columns=FALSE) ## ----housetasks-ca-plot------------------------------------------------------- plot(HT.ca, lines = TRUE) ## ----housetasks-seriation----------------------------------------------------- require(seriation) order <- seriate(HouseTasks, method = "CA") # the permuted row and column labels rownames(HouseTasks)[order[[1]]] colnames(HouseTasks)[order[[2]]] ## ----housetasks-mos2---------------------------------------------------------- # do the permutation HT_perm <- permute(HouseTasks, order, margin=1) mosaic(HT_perm, shade = TRUE, labeling = labeling_border(rot_labels = c(45,0, 0, 0), offset_label =c(.5,5,0, 0), varnames = c(FALSE, TRUE), just_labels=c("center","right"), tl_varnames = FALSE), legend = FALSE) vcdExtra/inst/doc/mosaics.Rmd0000644000176200001440000004126214422307100015654 0ustar liggesusers--- title: "Mosaic plots" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Mosaic plots} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut04-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Load packages set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) library(seriation) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ``` Mosaic plots provide an ideal method both for visualizing contingency tables and for visualizing the fit--- or more importantly--- **lack of fit** of a loglinear model. For a two-way table, `mosaic()`, by default, fits a model of independence, $[A][B]$ or `~A + B` as an R formula. The `vcdExtra` package extends this to models fit using `glm(..., family=poisson)`, which can include specialized models for ordered factors, or square tables that are intermediate between the saturated model, $[A B]$ = `A * B`, and the independence model $[A][B]$. For $n$-way tables, `vcd::mosaic()` can fit any loglinear model, and can also be used to plot a model fit with `MASS:loglm()`. The `vcdExtra` package extends this to models fit using `stats::glm()` and, by extension, to non-linear models fit using the [gnm package](https://cran.r-project.org/package=gnm). See @vcd:Friendly:1994, @vcd:Friendly:1999 for the statistical ideas behind these uses of mosaic displays in connection with loglinear models. Our book @FriendlyMeyer:2016:DDAR gives a detailed discussion of mosaic plots and many more examples. The essential ideas are to: * recursively sub-divide a unit square into rectangular "tiles" for the cells of the table, such that the area of each tile is proportional to the cell frequency. Tiles are split in a sequential order: + First according to the **marginal** proportions of a first variable, V1 + Next according to the **conditional** proportions of a 2nd variable, V2 | V1 + Next according to the **conditional** proportions of a 3rd variable, V3 | {V1, V2} + ... * For a given loglinear model, the tiles can then be shaded in various ways to reflect the residuals (lack of fit) for a given model. * The pattern of residuals can then be used to suggest a better model or understand *where* a given model fits or does not fit. `mosaic()` provides a wide range of options for the directions of splitting, the specification of shading, labeling, spacing, legend and many other details. It is actually implemented as a special case of a more general class of displays for $n$-way tables called `strucplot`, including sieve diagrams, association plots, double-decker plots as well as mosaic plots. For details, see `help(strucplot)` and the "See also" links therein, and also @vcd:Meyer+Zeileis+Hornik:2006b, which is available as an R vignette via `vignette("strucplot", package="vcd")`. ***Example***: A mosaic plot for the Arthritis treatment data fits the model of independence, `~ Treatment + Improved` and displays the association in the pattern of residual shading. The goal is to visualize the difference in the proportions of `Improved` for the two levels of `Treatment` : "Placebo" and "Treated". The plot below is produced with the following call to `mosaic()`. With the first split by `Treatment` and the shading used, it is easy to see that more people given the placebo experienced no improvement, while more people given the active treatment reported marked improvement. ```{r} #| Arthritis1, #| fig.height = 6, #| fig.width = 7, #| fig.cap = "Mosaic plot for the `Arthritis` data, using `shading_max`" data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) mosaic(art, gp = shading_max, split_vertical = TRUE, main="Arthritis: [Treatment] [Improved]") ``` `gp = shading_max` specifies that color in the plot signals a significant residual at a 90% or 99% significance level, with the more intense shade for 99%. Note that the residuals for the independence model were not large (as shown in the legend), yet the association between `Treatment` and `Improved` is highly significant. ```{r, art1} summary(art) ``` In contrast, one of the other shading schemes, from @vcd:Friendly:1994 (use: `gp = shading_Friendly`), uses fixed cutoffs of $\pm 2, \pm 4$, to shade cells which are *individually* significant at approximately $\alpha = 0.05$ and $\alpha = 0.001$ levels, respectively. The plot below uses `gp = shading_Friendly`. ```{r} #| Arthritis2, #| fig.height = 6, #| fig.width = 7, #| fig.cap = "Mosaic plot for the `Arthritis` data, using `shading_Friendly`" mosaic(art, gp = shading_Friendly, split_vertical = TRUE, main="Arthritis: gp = shading_Friendly") ``` ## Permuting variable levels Mosaic plots using tables or frequency data frames as input typically take the levels of the table variables in the order presented in the dataset. For character variables, this is often alphabetical order. That might be helpful for looking up a value, but is unhelpful for seeing and understanding the pattern of association. It is usually much better to order the levels of the row and column variables to help reveal the nature of their association. This is an example of **effect ordering for data display** [@FriendlyKwan:02:effect]. ***Example***: Data from @Glass:54 gave this 5 x 5 table on the occupations of 3500 British fathers and their sons, where the occupational categories are listed in alphabetic order. ```{r glass} data(Glass, package="vcdExtra") (glass.tab <- xtabs(Freq ~ father + son, data=Glass)) ``` The mosaic display shows very strong association, but aside from the diagonal cells, the pattern is unclear. Note the use of `set_varnames` to give more descriptive labels for the variables and abbreviate the occupational category labels. and `interpolate` to set the shading levels for the mosaic. ```{r glass-mosaic1} largs <- list(set_varnames=list(father="Father's Occupation", son="Son's Occupation"), abbreviate=10) gargs <- list(interpolate=c(1,2,4,8)) mosaic(glass.tab, shade=TRUE, labeling_args=largs, gp_args=gargs, main="Alphabetic order", legend=FALSE, rot_labels=c(20,90,0,70)) ``` The occupational categories differ in **status**, and can be reordered correctly as follows, from `Professional` down to `Unskilled`. ```{r glass-order} # reorder by status ord <- c(2, 1, 4, 3, 5) row.names(glass.tab)[ord] ``` The revised mosaic plot can be produced by indexing the rows and columns of the table using `ord`. ```{r glass-mosaic2} mosaic(glass.tab[ord, ord], shade=TRUE, labeling_args=largs, gp_args=gargs, main="Effect order", legend=FALSE, rot_labels=c(20,90,0,70)) ``` From this, and for the examples in the next section, it is useful to re-define `father` and `son` as **ordered** factors in the original `Glass` frequency data.frame. ```{r glass-ord} Glass.ord <- Glass Glass.ord$father <- ordered(Glass.ord$father, levels=levels(Glass$father)[ord]) Glass.ord$son <- ordered(Glass.ord$son, levels=levels(Glass$son)[ord]) str(Glass.ord) ``` ## Square tables For mobility tables such as this, where the rows and columns refer to the same occupational categories it comes as no surprise that there is a strong association in the diagonal cells: most often, sons remain in the same occupational categories as their fathers. However, the re-ordered mosaic display also reveals something subtler: when a son differs in occupation from the father, it is more likely that he will appear in a category one-step removed than more steps removed. The residuals seem to decrease with the number of steps from the diagonal. For such tables, specialized loglinear models provide interesting cases intermediate between the independence model, [A] [B], and the saturated model, [A B]. These can be fit using `glm()`, with the data in frequency form, ``` glm(Freq ~ A + B + assoc, data = ..., family = poisson) ``` where `assoc` is a special term to handle a restricted form of association, different from `A:B` which specifies the saturated model in this notation. * **Quasi-independence**: Asserts independence, but ignores the diagonal cells by fitting them exactly. The loglinear model is: $\log m_{ij} = \mu + \lambda^A_i + \lambda^B_j + \delta_i I(i = j)$, where $I()$ is the indicator function. * **Symmetry**: This model asserts that the joint distribution of the row and column variables is symmetric, that is $\pi_{ij} = \pi_{ji}$: A son is equally likely to move from their father's occupational category $i$ to another category, $j$, as the reverse, moving from $j$ to $i$. Symmetry is quite strong, because it also implies **marginal homogeneity**, that the marginal probabilities of the row and column variables are equal, $\pi{i+} = \sum_j \pi_{ij} = \sum_j \pi_{ji} = \pi_{+i}$ for all $i$. * **Quasi-symmetry**: This model uses the standard main-effect terms in the loglinear model, but asserts that the association parameters are symmetric, $\log m_{ij} = \mu + \lambda^A_i + \lambda^B_j + \lambda^{AB}_{ij}$, where $\lambda^{AB}_{ij} = \lambda^{AB}_{ji}$. The [gnm package](https://cran.r-project.org/package=gnm) provides a variety of these functions: `gnm::Diag()`, `gnm::Symm()` and `gnm::Topo()` for an interaction factor as specified by an array of levels, which may be arbitrarily structured. For example, the following generates a term for a diagonal factor in a $4 \times 4$ table. The diagonal values reflect parameters fitted for each diagonal cell. Off-diagonal values, "." are ignored. ```{r diag} rowfac <- gl(4, 4, 16) colfac <- gl(4, 1, 16) diag4by4 <- Diag(rowfac, colfac) matrix(Diag(rowfac, colfac, binary = FALSE), 4, 4) ``` `Symm()` constructs parameters for symmetric cells. The particular values don't matter. All that does matter is that the same value, e.g., `1:2` appears in both the (1,2) and (2,1) cells. ```{r symm} symm4by4 <- Symm(rowfac, colfac) matrix(symm4by4, 4, 4) ``` ***Example***: To illustrate, we fit the four models below, starting with the independence model `Freq ~ father + son` and then adding terms to reflect the restricted forms of association, e.g., `Diag(father, son)` for diagonal terms and `Symm(father, son)` for symmetry. ```{r glass-models} library(gnm) glass.indep <- glm(Freq ~ father + son, data = Glass.ord, family=poisson) glass.quasi <- glm(Freq ~ father + son + Diag(father, son), data = Glass.ord, family=poisson) glass.symm <- glm(Freq ~ Symm(father, son), data = Glass.ord, family=poisson) glass.qsymm <- glm(Freq ~ father + son + Symm(father, son), data = Glass.ord, family=poisson) ``` We can visualize these using the `vcdExtra::mosaic.glm()` method, which extends mosaic displays to handle fitted `glm` objects. *Technical note*: for models fitted using `glm()`, standardized residuals, `residuals_type="rstandard"` have better statistical properties than the default Pearson residuals in mosaic plots and analysis. ```{r glass-quasi} mosaic(glass.quasi, residuals_type="rstandard", shade=TRUE, labeling_args=largs, gp_args=gargs, main="Quasi-Independence", legend=FALSE, rot_labels=c(20,90,0,70) ) ``` Mosaic plots for the other models would give further visual assessment of these models, however we can also test differences among them. For nested models, `anova()` gives tests of how much better a more complex model is compared to the previous one. ```{r glass-anova} # model comparisons: for *nested* models anova(glass.indep, glass.quasi, glass.qsymm, test="Chisq") ``` Alternatively, `vcdExtra::LRstats()` gives model summaries for a collection of models, not necessarily nested, with AIC and BIC statistics reflecting model parsimony. ```{r glass-lrstats} models <- glmlist(glass.indep, glass.quasi, glass.symm, glass.qsymm) LRstats(models) ``` By all criteria, the model of quasi symmetry fits best. The residual deviance $G^2 is not significant. The mosaic is largely unshaded, indicating a good fit, but there are a few shaded cells that indicate the remaining positive and negative residuals. For comparative mosaic displays, it is sometimes useful to show the $G^2$ statistic in the main title, using `vcdExtra::modFit()` for this purpose. ```{r glass-qsymm} mosaic(glass.qsymm, residuals_type="rstandard", shade=TRUE, labeling_args=largs, gp_args=gargs, main = paste("Quasi-Symmetry", modFit(glass.qsymm)), legend=FALSE, rot_labels=c(20,90,0,70) ) ``` ## Correspondence analysis ordering When natural orders for row and column levels are not given a priori, we can find orderings that make more sense using correspondence analysis. The general ideas are that: * Correspondence analysis assigns scores to the row and column variables to best account for the association in 1, 2, ... dimensions * The first CA dimension accounts for largest proportion of the Pearson $\chi^2$ * Therefore, permuting the levels of the row and column variables by the CA Dim1 scores gives a more coherent mosaic plot, more clearly showing the nature of the association. * The [seriation package](https://cran.r-project.org/package=seriation) now has a method to order variables in frequency tables using CA. ***Example***: As an example, consider the `HouseTasks` dataset, a 13 x 4 table of frequencies of household tasks performed by couples, either by the `Husband`, `Wife`, `Alternating` or `Jointly`. You can see from the table that some tasks (Repairs) are done largely by the husband; some (laundry, main meal) are largely done by the wife, while others are done jointly or alternating between husband and wife. But the `Task` and `Who` levels are both in alphabetical order. ```{r housetasks} data("HouseTasks", package = "vcdExtra") HouseTasks ``` The naive mosaic plot for this dataset is shown below, splitting first by `Task` and then by `Who`. Due to the length of the factor labels, some features of `labeling` were used to make the display more readable. ```{r housetasks-mos1} require(vcd) mosaic(HouseTasks, shade = TRUE, labeling = labeling_border(rot_labels = c(45,0, 0, 0), offset_label =c(.5,5,0, 0), varnames = c(FALSE, TRUE), just_labels=c("center","right"), tl_varnames = FALSE), legend = FALSE) ``` Correspondence analysis, using the [ca package](https://cran.r-project.org/package=ca), shows that nearly 89% of the $\chi^2$ can be accounted for in two dimensions. ```{r housetasks-ca} require(ca) HT.ca <- ca(HouseTasks) summary(HT.ca, rows=FALSE, columns=FALSE) ``` The CA plot has a fairly simple interpretation: Dim1 is largely the distinction between tasks primarily done by the wife vs. the husband. Dim2 distinguishes tasks that are done singly vs. those that are done jointly. ```{r housetasks-ca-plot} plot(HT.ca, lines = TRUE) ``` So, we can use the `CA` method of `seriation::seriate()` to find the order of permutations of `Task` and `Who` along the CA dimensions. ```{r housetasks-seriation} require(seriation) order <- seriate(HouseTasks, method = "CA") # the permuted row and column labels rownames(HouseTasks)[order[[1]]] colnames(HouseTasks)[order[[2]]] ``` Now, use `seriation::permute()` to use `order` for the permutations of `Task` and `Who`, and plot the resulting mosaic: ```{r housetasks-mos2} # do the permutation HT_perm <- permute(HouseTasks, order, margin=1) mosaic(HT_perm, shade = TRUE, labeling = labeling_border(rot_labels = c(45,0, 0, 0), offset_label =c(.5,5,0, 0), varnames = c(FALSE, TRUE), just_labels=c("center","right"), tl_varnames = FALSE), legend = FALSE) ``` It is now easy to see the cluster of tasks (laundry and cooking) done largely by the wife at the top, and those (repairs, driving) done largely by the husband at the bottom. ## References vcdExtra/inst/doc/demo-housing.Rmd0000644000176200001440000002773514422306403016632 0ustar liggesusers--- title: "Demo - Housing Data" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Demo - Housing Data} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/demo-housing-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # colorize text colorize <- function(x, color) { if (knitr::is_latex_output()) { sprintf("\\textcolor{%s}{%s}", color, x) } else if (knitr::is_html_output()) { sprintf("%s", color, x) } else x } ``` This vignette was one of a series of `demo()` files in the package. It is still there as `demo("housing")`, but is now presented here with additional commentary and analysis, designed to highlight some aspects of analysis of categorical data and graphical display. ## Load packages I'll use the following packages in this vignette. ```{r} library(vcdExtra) library(MASS) library(effects) ``` ## Housing data The content here is the dataset `MASS::housing`, giving a 4-way, $3 \times 3 \times 4 \times 2$ frequency table of 1681 individuals from the *Copenhagen Housing Conditions Survey*, classified by their: * Satisfaction (`Sat`) with their housing circumstances (low, medium or high), * `Type` of rental dwelling (Tower, Apartment, Atrium or Terrace) * perceived influence (`Infl`) on management of the property (low, medium, high), and * degree of contact (`Cont`) with other residents (low or high) Load the data: ```{r housing} data(housing, package="MASS") str(housing) ``` ### Variables, levels and models Satisfaction (`Sat`) of these householders with their present housing circumstances is the **outcome variable** here. For purposes of analysis, note that `Sat` is an ordered factor with levels `"Low" < "Medium" < "High"`. Note also that Influence, with the same levels is just a "Factor", not an ordered one. I consider here just models using `glm(..., family=poisson)` or the equivalent in `MASS::loglm()`. The ordering of factor levels is important in graphical displays. We don't want to see them ordered alphabetically, "High", "Low", "Medium". The `housing` data.frame was constructed so that the levels of `Sat` and `Infl` appear in the dataset in their appropriate order. ```{r} levels(housing$Sat) levels(housing$Infl) ``` Other models, e.g., the **proportional odds** model, fit using `MASS:polr()` can take the ordinal nature of satisfaction into account. In `glm()` one could re-assign `Infl` as an ordered factor and examine linear vs. non-linear associations for this factor. But I don't do this here. ## Null model The most ignorant model asserts that all the table factors are mutually independent. In symbolic notation, this is `[S] [I] [T] [C]` where all terms in separate `[ ]` are supposed to be independent. This is `Freq ~ Sat + Infl + Type + Cont` as a formula for `glm()`. ```{r house.null} house.null <- glm(Freq ~ Sat + Infl + Type + Cont, family = poisson, data = housing) ``` ## Baseline model When `Sat` is the outcome variable, a minimal **baseline model** should allow for all associations among the predictors, symbolized as `[S] [I T C]`. That is, Influence, Type and Contact may be associated in arbitrary ways, just as multiple predictors can be correlated in regression models. In this framework, what remains to be explained is whether/how `Sat` depends on the combinations of the other variables. The baseline model therefore includes the full three-way term for the predictors. ```{r house.glm0} house.glm0 <- glm(Freq ~ Sat + Infl*Type*Cont, family = poisson, data = housing) ``` Both of these models fit terribly, but we can always use `anova(mod1, mod2,...)` to compare the *relative* fits of **nested** models. ```{r anova} anova(house.null, house.glm0, test = "Chisq") ``` ## Visualising model fit The baseline model is shown in the mosaic plot below. Note that this is applied not to the `housing` data, but rather to the `house.glm0` object (of class `glm`) resulting to a call to `vcdExtra::mosaic.glm()`. With four variables in the mosaic, labeling of the variable names and factor levels is a bit tricky, because labels must appear on all four sides of the plot. The `labeling_args` argument can be used to set more informative variable names and abbreviate factor levels where necessary. ```{r} #| label= mosaic-glm0a, #| warning = TRUE # labeling_args for mosaic() largs <- list(set_varnames = c( Infl="Influence on management", Cont="Contact among residents", Type="Type of dwelling", Sat="Satisfaction"), abbreviate=c(Type=3)) mosaic(house.glm0, labeling_args=largs, main='Baseline model: [ITC][Sat]') ``` In this plot we can see largish `r colorize("positive residuals", "blue")` in the blocks corresponding to (low satisfaction, low influence) and (high satisfaction, high influence) and clusters of largish `r colorize("negative residuals", "red")` in the opposite corners. By default, variables are used in the mosaic display in their order in the data table or frequency data.frame. The `r colorize("warning", "red")` reminds us that the order of conditioning used is `~Sat + Infl + Type + Cont`. ### Ordering the variables in the mosaic For `mosaic.glm()`, the conditioning order of variables in the mosaic can be set using the `formula` argument. Here, I rearrange the variables to put `Sat` as the last variable in the splitting / conditioning sequence. I also use `vcdExtra::modFit()` to add the LR $G^2$ fit statistic to the plot title. ```{r mosaic-glm0b} mosaic(house.glm0, formula = ~ Type + Infl + Cont + Sat, labeling_args=largs, main=paste('Baseline model: [ITC][Sat],', modFit(house.glm0)) ) ``` ## Adding association terms Clearly, satisfaction depends on one or more of the predictors, `Infl`, `Type` and `Cont` and possibly their interactions. As a first step it is useful to consider sequentially adding the association terms `Infl:Sat`, `Type:Sat`, `Cont:Sat` one at a time. This analysis is carried out using `MASS::addterm()`. ```{r addterm} MASS::addterm(house.glm0, ~ . + Sat:(Infl + Type + Cont), test = "Chisq") ``` Based on this, it is useful to consider a "main-effects" model for satisfaction, adding all three two-way terms involving satisfaction. The `update()` method provides an easy way to add (or subtract) terms from a fitted model object. In the model formula, `.` stands for whatever was on the left side (`Freq`) or on the right side (`Sat + Infl*Type*Cont`) of the model (`house.glm0`) that is being updated. ```{r house-glm1} house.glm1 <- update(house.glm0, . ~ . + Sat*(Infl + Type + Cont)) ``` For comparison, we note that the same model can be fit using the iterative proportional scaling algorithm of `MASS::loglm()`. ```{r house-loglm1} (house.loglm1 <- MASS::loglm(Freq ~ Infl * Type * Cont + Sat*(Infl + Type + Cont), data = housing)) ``` ## Did the model get better? As before, `anova()` tests the added contribution of each more complex model over the one before. The residual deviance $G^2$ has been reduced from $G^2 (46) = 217.46$ for the baseline model `house.glm0` to $G^2 (34) = 38.66$ for the revised model `house.glm1`. The difference, $G^2(M1 | M0) = G^2 (12) = 178.79$ tests the collective additional fit provided by the two-way association of satisfaction with the predictors. ```{r} anova(house.glm0, house.glm1, test="Chisq") ``` ## Visualize model `glm1` The model `house.glm1` fits reasonably well, `r modFit(house.glm1)`, so most residuals are small. In the mosaic below, I use `gp=shading_Friendly` to shade the tiles so that positive and negative residuals are distinguished by color, and they are filled when the absolute value of the residual is outside $\pm 2, 4$. ```{r mosaic-glm1} mosaic(house.glm1, labeling_args=largs, main=paste('Model [IS][TS][CS],', modFit(house.glm1) ), gp=shading_Friendly) ``` One cell is highlighted here: The combination of medium influence, low contact and tower type, is more likely to give low satisfaction than the model predicts. Is this just an outlier, or is there something that can be interpreted and perhaps improve the model fit? It is hard tell, but the virtues of mosaic displays are that they help to: * diagnose overall patterns of associations, * spot unusual cells in relation to lack of fit of a given model. ## Can we drop any terms? When we add terms using `MASS::addterm()`, they are added sequentially. It might be the case that once some term is added, a previously added term is no longer important. Running `MASS::dropterm()` on the `housel.glm1` model checks for this. ```{r dropterm} MASS::dropterm(house.glm1, test = "Chisq") ``` Note that the three-way term `Infl:Type:Cont` is not significant. However, with `Sat` as the response, the associations of all predictors must be included in the model. ## What about two-way interactions? The model so far says that each of influence, type and control have separate, additive effects on the level of satisfaction, what I called a "main-effects" model. It might be the case that some of the predictors have *interaction* effects, e.g., that the effect of influence on satisfaction might vary with the type of dwelling or the level of control. An easy way to test for these is to update the main-effects model, adding all possible two-way interactions for `Sat`, one at a time, with `addterm()`. ```{r addterm1} MASS::addterm(house.glm1, ~. + Sat:(Infl + Type + Cont)^2, test = "Chisq") ``` The result shows that adding the term `Infl:Type:Sat` reduces the deviance $G^2$ from 38.66 to 16.11. The difference, $G^2(M1 + ITS | M1) = G^2 (12) = 22.55$ reflects a substantial improvement. The remaining two-way interaction terms reduce the deviance by smaller and non-significant amounts, relative to `house.glm1`. Model fitting should be guided by substance, not just statistical machinery. Nonetheless, it seems arguably sensible to add one two-way term to the model, giving `house.glm2`. ```{r} house.glm2 <- update(house.glm1, . ~ . + Sat:Infl:Type) ``` ## Model parsimony: AIC & BIC Adding more association terms to a model will always improve it. The question is, whether that is "worth it"? "Worth it" concerns the trade-off between model fit and parsimony. Sometimes we might prefer a model with fewer parameters to one that has a slightly better fit, but requires more model terms and parameters. The AIC and BIC statistics are designed to adjust our assessment of model fit by penalizing it for using more parameters. Equivalently, they deduct from the likelihood ratio $G^2$ a term proportional to the residual $\text{df}$ of the model. In any case -- **smaller is better** for both AIC and BIC. $$AIC = G^2 - 2 \: \text{df}$$ $$BIC = G^2 - \log(n) \: \text{df}$$ These measures are provided by `AIC()`, `BIC()`, and can be used to compare models using `vcdExtra::LRstats()`. ```{r lrstats} LRstats(house.glm0, house.glm1, house.glm2) ``` By these metrics, model `house.glm1` is best on both AIC and BIC. The increased goodness-of-fit (smaller $G^2$) of model `house.glm2` is not worth the extra cost of parameters in the `house.glm2` model. vcdExtra/inst/doc/continuous.html0000644000176200001440000022655214470742307016674 0ustar liggesusers Continuous predictors

Continuous predictors

Michael Friendly

2023-08-21

When continuous predictors are available—and potentially important—in explaining a categorical outcome, models for that outcome include: logistic regression (binary response), the proportional odds model (ordered polytomous response), multinomial (generalized) logistic regression. Many of these are special cases of the generalized linear model using the "poisson" or "binomial" family and their relatives.

Spine and conditional density plots

I don’t go into fitting such models here, but I would be remiss not to illustrate some visualizations in vcd that are helpful here. The first of these is the spine plot or spinogram (Hummel, 1996), produced with spine(). These are special cases of mosaic plots with specific spacing and shading to show how a categorical response varies with a continuous or categorical predictor.

They are also a generalization of stacked bar plots where not the heights but the widths of the bars corresponds to the relative frequencies of x. The heights of the bars then correspond to the conditional relative frequencies of y in every x group.

Example: For the Arthritis data, we can see how Improved varies with Age as follows. spine() takes a formula of the form y ~ x with a single dependent factor and a single explanatory variable x (a numeric variable or a factor). The range of a numeric variablex is divided into intervals based on the breaks argument, and stacked bars are drawn to show the distribution of y as x varies. As shown below, the discrete table that is visualized is returned by the function.

(spine(Improved ~ Age, data = Arthritis, breaks = 3))
##          Improved
## Age       None Some Marked
##   [20,40]   10    3      2
##   (40,60]   21    3     17
##   (60,80]   11    8      9
(spine(Improved ~ Age, data = Arthritis, breaks = "Scott"))
##          Improved
## Age       None Some Marked
##   [20,30]    6    1      0
##   (30,40]    4    2      2
##   (40,50]    9    0      3
##   (50,60]   12    3     14
##   (60,70]   11    8      8
##   (70,80]    0    0      1
Spine plots for the `Arthritis` dataSpine plots for the `Arthritis` data

Spine plots for the Arthritis data

The conditional density plot (Hofmann & Theus, 2005) is a further generalization. This visualization technique is similar to spinograms, but uses a smoothing approach rather than discretizing the explanatory variable. As well, it uses the original x axis and not a distorted one.

cdplot(Improved ~ Age, data = Arthritis)
Conditional density plot for the `Arthritis` data showing the variation of Improved with Age.

Conditional density plot for the Arthritis data showing the variation of Improved with Age.

In such plots, it is useful to also see the distribution of the observations across the horizontal axis, e.g., with a rug() plot. @ref{fig:cd-plot} uses cdplot() from the graphics package rather than cd_plot() from vcd, and is produced with

cdplot(Improved ~ Age, data = Arthritis)
with(Arthritis, rug(jitter(Age), col="white", quiet=TRUE))

From this figure it can be easily seen that the proportion of patients reporting Some or Marked improvement increases with Age, but there are some peculiar bumps in the distribution. These may be real or artifactual, but they would be hard to see with most other visualization methods. When we switch from non-parametric data exploration to parametric statistical models, such effects are easily missed.

Model-based plots: effect plots and ggplot2 plots

The nonparametric conditional density plot uses smoothing methods to convey the distributions of the response variable, but displays that are simpler to interpret can often be obtained by plotting the predicted response from a parametric model.

For complex glm() models with interaction effects, the effects package provides the most useful displays, plotting the predicted values for a given term, averaging over other predictors not included in that term. I don’t illustrate this here, but see Fox & Weisberg (2018),Fox (2003) and help(package="effects").

Here I just briefly illustrate the capabilities of the ggplot2 package for model-smoothed plots of categorical responses in glm() models.

Example: The Donner data frame in vcdExtra gives details on the survival of 90 members of the Donner party, a group of people who attempted to migrate to California in 1846. They were trapped by an early blizzard on the eastern side of the Sierra Nevada mountains, and before they could be rescued, nearly half of the party had died. What factors affected who lived and who died?

data(Donner, package="vcdExtra")
str(Donner)
## 'data.frame':    90 obs. of  5 variables:
##  $ family  : Factor w/ 10 levels "Breen","Donner",..: 9 1 1 1 1 1 1 1 1 1 ...
##  $ age     : int  23 13 1 5 14 40 51 9 3 8 ...
##  $ sex     : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 1 2 2 2 2 ...
##  $ survived: int  0 1 1 1 1 1 1 1 1 1 ...
##  $ death   : POSIXct, format: "1846-12-29" NA ...

A potential model of interest is the logistic regression model for \(Pr(survived)\), allowing separate fits for males and females as a function of age. The key to this is the stat_smooth() function, using method = "glm", method.args = list(family = binomial). The formula = y ~ x specifies a linear fit on the logit scale (@ref{fig:donner3}, left)

# separate linear fits on age for M/F
ggplot(Donner, aes(age, survived, color = sex)) +
  geom_point(position = position_jitter(height = 0.02, width = 0)) +
  stat_smooth(method = "glm", 
              method.args = list(family = binomial), 
              formula = y ~ x,
              alpha = 0.2, size=2, aes(fill = sex))

Alternatively, we can allow a quadratic relation with age by specifying formula = y ~ poly(x,2) ((ref?)(fig:donner3), right).

# separate quadratics
ggplot(Donner, aes(age, survived, color = sex)) +
  geom_point(position = position_jitter(height = 0.02, width = 0)) +
  stat_smooth(method = "glm", 
              method.args = list(family = binomial), 
              formula = y ~ poly(x,2), 
              alpha = 0.2, size=2, aes(fill = sex))
Logistic regression plots for the  `Donner` data   showing survival vs. age, by sex. Left: linear logistic model; right: quadratic model {#fig:donner3}Logistic regression plots for the  `Donner` data   showing survival vs. age, by sex. Left: linear logistic model; right: quadratic model {#fig:donner3}

Logistic regression plots for the Donner data showing survival vs. age, by sex. Left: linear logistic model; right: quadratic model {#fig:donner3}

These plots very nicely show (a) the fitted \(Pr(survived)\) for males and females; (b) confidence bands around the smoothed model fits and (c) the individual observations by jittered points at 0 and 1 for those who died and survived, respectively.

References

Fox, J. (2003). Effect displays in R for generalised linear models. Journal of Statistical Software, 8(15), 1–27. http://doi.org/10.18637/jss.v008.i15
Fox, J., & Weisberg, S. (2018). Visualizing fit and lack of fit in complex regression models with predictor effect plots and partial residuals. Journal of Statistical Software, 87(9), 1–27. http://doi.org/10.18637/jss.v087.i09
Hofmann, H., & Theus, M. (2005). Interactive graphics for visualizing conditional distributions.
Hummel, J. (1996). Linked bar charts: Analysing categorical data graphically. Computational Statistics, 11, 23–33.
vcdExtra/inst/doc/loglinear.Rmd0000644000176200001440000002362214470701657016214 0ustar liggesusers--- title: "Loglinear Models" author: "Michael Friendly" date: "`r Sys.Date()`" package: vcdExtra output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] csl: apa.csl vignette: > %\VignetteIndexEntry{Loglinear Models} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut03-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ``` You can use the `loglm()` function in the `MASS` package to fit log-linear models. Equivalent models can also be fit (from a different perspective) as generalized linear models with the `glm()` function using the `family='poisson'` argument, and the `gnm` package provides a wider range of generalized *nonlinear* models, particularly for testing structured associations. The visualization methods for these models were originally developed for models fit using `loglm()`, so this approach is emphasized here. Some extensions of these methods for models fit using `glm()` and `gnm()` are contained in the `vcdExtra` package and illustrated in @ref(sec:glm). Assume we have a 3-way contingency table based on variables A, B, and C. The possible different forms of loglinear models for a 3-way table are shown in the table below. \@(tab:loglin-3way) The **Model formula** column shows how to express each model for `loglm()` in R. ^[For `glm()`, or `gnm()`, with the data in the form of a frequency data.frame, the same model is specified in the form `glm(Freq` $\sim$ `..., family="poisson")`, where `Freq` is the name of the cell frequency variable and `...` specifies the *Model formula*.] In the **Interpretation** column, the symbol "$\perp$" is to be read as "is independent of," and "$\;|\;$" means "conditional on," or "adjusting for," or just "given". | **Model** | **Model formula** | **Symbol** | **Interpretation** | |:-------------------------|:-------------------|:---------------|:-----------------------| | Mutual independence | `~A + B + C` | $[A][B][C]$ | $A \perp B \perp C$ | | Joint independence | `~A*B + C` | $[AB][C]$ | $(A \: B) \perp C$ | | Conditional independence | `~(A+B)*C` | $[AC][BC]$ | $(A \perp B) \;|\; C$ | | All two-way associations | `~A*B + A*C + B*C` | $[AB][AC][BC]$ | homogeneous association| | Saturated model | `~A*B*C` | $[ABC]$ | 3-way association | For example, the formula `~A + B + C` specifies the model of *mutual independence* with no associations among the three factors. In standard notation for the expected frequencies $m_{ijk}$, this corresponds to $$ \log ( m_{ijk} ) = \mu + \lambda_i^A + \lambda_j^B + \lambda_k^C \equiv A + B + C $$ The parameters $\lambda_i^A , \lambda_j^B$ and $\lambda_k^C$ pertain to the differences among the one-way marginal frequencies for the factors A, B and C. Similarly, the model of *joint independence*, $(A \: B) \perp C$, allows an association between A and B, but specifies that C is independent of both of these and their combinations, $$ \log ( m_{ijk} ) = \mu + \lambda_i^A + \lambda_j^B + \lambda_k^C + \lambda_{ij}^{AB} \equiv A * B + C $$ where the parameters $\lambda_{ij}^{AB}$ pertain to the overall association between A and B (collapsing over C). In the literature or text books, you will often find these models expressed in shorthand symbolic notation, using brackets, `[ ]` to enclose the *high-order terms* in the model. Thus, the joint independence model can be denoted `[AB][C]`, as shown in the **Symbol** column in the table. \@(tab:loglin-3way). Models of *conditional independence* allow (and fit) two of the three possible two-way associations. There are three such models, depending on which variable is conditioned upon. For a given conditional independence model, e.g., `[AB][AC]`, the given variable is the one common to all terms, so this example has the interpretation $(B \perp C) \;|\; A$. ## Fitting with `loglm()` {#sec:loglm} For example, we can fit the model of mutual independence among hair color, eye color and sex in `HairEyeColor` as ```{r, loglm-hec1} library(MASS) ## Independence model of hair and eye color and sex. hec.1 <- loglm(~Hair+Eye+Sex, data=HairEyeColor) hec.1 ``` Similarly, the models of conditional independence and joint independence are specified as ```{r, loglm-hec2} ## Conditional independence hec.2 <- loglm(~(Hair + Eye) * Sex, data=HairEyeColor) hec.2 ``` ```{r, loglm-hec3} ## Joint independence model. hec.3 <- loglm(~Hair*Eye + Sex, data=HairEyeColor) hec.3 ``` Note that printing the model gives a brief summary of the goodness of fit. A set of models can be compared using the `anova()` function. ```{r, loglm-anova} anova(hec.1, hec.2, hec.3) ``` ## Fitting with `glm()` and `gnm()` {#sec:glm} The `glm()` approach, and extensions of this in the `gnm` package allows a much wider class of models for frequency data to be fit than can be handled by `loglm()`. Of particular importance are models for ordinal factors and for square tables, where we can test more structured hypotheses about the patterns of association than are provided in the tests of general association under `loglm()`. These are similar in spirit to the non-parametric CMH tests described in \@ref(sec:CMH). ***Example***: The data `Mental` in the `vcdExtra` package gives a two-way table in frequency form classifying young people by their mental health status and parents' socioeconomic status (SES), where both of these variables are ordered factors. ```{r, mental1} data(Mental, package = "vcdExtra") str(Mental) xtabs(Freq ~ mental + ses, data=Mental) # display the frequency table ``` Simple ways of handling ordinal variables involve assigning scores to the table categories, and the simplest cases are to use integer scores, either for the row variable (``column effects'' model), the column variable (``row effects'' model), or both (``uniform association'' model). ```{r, mental2} indep <- glm(Freq ~ mental + ses, family = poisson, data = Mental) # independence model ``` To fit more parsimonious models than general association, we can define numeric scores for the row and column categories ```{r, mental3} # Use integer scores for rows/cols Cscore <- as.numeric(Mental$ses) Rscore <- as.numeric(Mental$mental) ``` Then, the row effects model, the column effects model, and the uniform association model can be fit as follows. The essential idea is to replace a factor variable with its numeric equivalent in the model formula for the association term. ```{r, mental4} # column effects model (ses) coleff <- glm(Freq ~ mental + ses + Rscore:ses, family = poisson, data = Mental) # row effects model (mental) roweff <- glm(Freq ~ mental + ses + mental:Cscore, family = poisson, data = Mental) # linear x linear association linlin <- glm(Freq ~ mental + ses + Rscore:Cscore, family = poisson, data = Mental) ``` The `LRstats()` function in `vcdExtra` provides a nice, compact summary of the fit statistics for a set of models, collected into a *glmlist* object. Smaller is better for AIC and BIC. ```{r, mental4a} # compare models using AIC, BIC, etc vcdExtra::LRstats(glmlist(indep, roweff, coleff, linlin)) ``` For specific model comparisons, we can also carry out tests of *nested* models with `anova()` when those models are listed from smallest to largest. Here, there are two separate paths from the most restrictive (independence) model through the model of uniform association, to those that allow only one of row effects or column effects. ```{r, mental5} anova(indep, linlin, coleff, test="Chisq") anova(indep, linlin, roweff, test="Chisq") ``` The model of linear by linear association seems best on all accounts. For comparison, one might try the CMH tests on these data: ```{r, mental6} CMHtest(xtabs(Freq~ses+mental, data=Mental)) ``` ## Non-linear terms The strength of the `gnm` package is that it handles a wide variety of models that handle non-linear terms, where the parameters enter the model beyond a simple linear function. The simplest example is the Goodman RC(1) model [@Goodman:79], which allows a multiplicative term to account for the association of the table variables. In the notation of generalized linear models with a log link, this can be expressed as $$ \log \mu_{ij} = \alpha_i + \beta_j + \gamma_{i} \delta_{j} ,$$ where the row-multiplicative effect parameters $\gamma_i$ and corresponding column parameters $\delta_j$ are estimated from the data.% ^[This is similar in spirit to a correspondence analysis with a single dimension, but as a statistical model.] Similarly, the RC(2) model adds two multiplicative terms to the independence model, $$ \log \mu_{ij} = \alpha_i + \beta_j + \gamma_{i1} \delta_{j1} + \gamma_{i2} \delta_{j2} . $$ In the `gnm` package, these models may be fit using the `Mult()` to specify the multiplicative term, and `instances()` to specify several such terms. ***Example***: For the `Mental` data, we fit the RC(1) and RC(2) models, and compare these with the independence model. ```{r, mental7} RC1 <- gnm(Freq ~ mental + ses + Mult(mental,ses), data=Mental, family=poisson, verbose=FALSE) RC2 <- gnm(Freq ~ mental+ses + instances(Mult(mental,ses),2), data=Mental, family=poisson, verbose=FALSE) anova(indep, RC1, RC2, test="Chisq") ``` ## References vcdExtra/inst/doc/demo-housing.html0000644000176200001440000026440514470742312017057 0ustar liggesusers Demo - Housing Data

Demo - Housing Data

Michael Friendly

2023-08-21

This vignette was one of a series of demo() files in the package. It is still there as demo("housing"), but is now presented here with additional commentary and analysis, designed to highlight some aspects of analysis of categorical data and graphical display.

Load packages

I’ll use the following packages in this vignette.

library(vcdExtra)
library(MASS)
library(effects)

Housing data

The content here is the dataset MASS::housing, giving a 4-way, \(3 \times 3 \times 4 \times 2\) frequency table of 1681 individuals from the Copenhagen Housing Conditions Survey, classified by their:

  • Satisfaction (Sat) with their housing circumstances (low, medium or high),
  • Type of rental dwelling (Tower, Apartment, Atrium or Terrace)
  • perceived influence (Infl) on management of the property (low, medium, high), and
  • degree of contact (Cont) with other residents (low or high)

Load the data:

data(housing, package="MASS")
str(housing)
## 'data.frame':    72 obs. of  5 variables:
##  $ Sat : Ord.factor w/ 3 levels "Low"<"Medium"<..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ Infl: Factor w/ 3 levels "Low","Medium",..: 1 1 1 2 2 2 3 3 3 1 ...
##  $ Type: Factor w/ 4 levels "Tower","Apartment",..: 1 1 1 1 1 1 1 1 1 2 ...
##  $ Cont: Factor w/ 2 levels "Low","High": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Freq: int  21 21 28 34 22 36 10 11 36 61 ...

Variables, levels and models

Satisfaction (Sat) of these householders with their present housing circumstances is the outcome variable here.

For purposes of analysis, note that Sat is an ordered factor with levels "Low" < "Medium" < "High".

Note also that Influence, with the same levels is just a “Factor”, not an ordered one.

I consider here just models using glm(..., family=poisson) or the equivalent in MASS::loglm().

The ordering of factor levels is important in graphical displays. We don’t want to see them ordered alphabetically, “High”, “Low”, “Medium”. The housing data.frame was constructed so that the levels of Sat and Infl appear in the dataset in their appropriate order.

levels(housing$Sat)
## [1] "Low"    "Medium" "High"
levels(housing$Infl)
## [1] "Low"    "Medium" "High"

Other models, e.g., the proportional odds model, fit using MASS:polr() can take the ordinal nature of satisfaction into account. In glm() one could re-assign Infl as an ordered factor and examine linear vs. non-linear associations for this factor. But I don’t do this here.

Null model

The most ignorant model asserts that all the table factors are mutually independent. In symbolic notation, this is [S] [I] [T] [C] where all terms in separate [ ] are supposed to be independent. This is Freq ~ Sat + Infl + Type + Cont as a formula for glm().

house.null <- glm(Freq ~ Sat + Infl + Type + Cont, family = poisson,
                  data = housing)

Baseline model

When Sat is the outcome variable, a minimal baseline model should allow for all associations among the predictors, symbolized as [S] [I T C]. That is, Influence, Type and Contact may be associated in arbitrary ways, just as multiple predictors can be correlated in regression models.

In this framework, what remains to be explained is whether/how Sat depends on the combinations of the other variables. The baseline model therefore includes the full three-way term for the predictors.

house.glm0 <- glm(Freq ~ Sat + Infl*Type*Cont, family = poisson,
                  data = housing)

Both of these models fit terribly, but we can always use anova(mod1, mod2,...) to compare the relative fits of nested models.

anova(house.null, house.glm0, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: Freq ~ Sat + Infl + Type + Cont
## Model 2: Freq ~ Sat + Infl * Type * Cont
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        63     295.35                          
## 2        46     217.46 17   77.896 9.045e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Visualising model fit

The baseline model is shown in the mosaic plot below. Note that this is applied not to the housing data, but rather to the house.glm0 object (of class glm) resulting to a call to vcdExtra::mosaic.glm().

With four variables in the mosaic, labeling of the variable names and factor levels is a bit tricky, because labels must appear on all four sides of the plot. The labeling_args argument can be used to set more informative variable names and abbreviate factor levels where necessary.

# labeling_args for mosaic()
largs <- list(set_varnames = c(
      Infl="Influence on management", 
            Cont="Contact among residents", 
            Type="Type of dwelling", 
            Sat="Satisfaction"),
    abbreviate=c(Type=3))

mosaic(house.glm0, 
       labeling_args=largs, 
       main='Baseline model: [ITC][Sat]')
## Warning: no formula provided, assuming ~Sat + Infl + Type + Cont

In this plot we can see largish positive residuals in the blocks corresponding to (low satisfaction, low influence) and (high satisfaction, high influence) and clusters of largish negative residuals in the opposite corners.

By default, variables are used in the mosaic display in their order in the data table or frequency data.frame. The warning reminds us that the order of conditioning used is ~Sat + Infl + Type + Cont.

Ordering the variables in the mosaic

For mosaic.glm(), the conditioning order of variables in the mosaic can be set using the formula argument. Here, I rearrange the variables to put Sat as the last variable in the splitting / conditioning sequence. I also use vcdExtra::modFit() to add the LR \(G^2\) fit statistic to the plot title.

mosaic(house.glm0, 
       formula = ~ Type + Infl + Cont + Sat, 
       labeling_args=largs, 
       main=paste('Baseline model: [ITC][Sat],', modFit(house.glm0))
  )

Adding association terms

Clearly, satisfaction depends on one or more of the predictors, Infl, Type and Cont and possibly their interactions.

As a first step it is useful to consider sequentially adding the association terms Infl:Sat, Type:Sat, Cont:Sat one at a time. This analysis is carried out using MASS::addterm().

MASS::addterm(house.glm0, 
              ~ . + Sat:(Infl + Type + Cont), 
              test = "Chisq")
## Single term additions
## 
## Model:
## Freq ~ Sat + Infl * Type * Cont
##          Df Deviance    AIC     LRT   Pr(Chi)    
## <none>        217.46 610.43                      
## Sat:Infl  4   111.08 512.05 106.371 < 2.2e-16 ***
## Sat:Type  6   156.79 561.76  60.669 3.292e-11 ***
## Sat:Cont  2   212.33 609.30   5.126   0.07708 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Based on this, it is useful to consider a “main-effects” model for satisfaction, adding all three two-way terms involving satisfaction.

The update() method provides an easy way to add (or subtract) terms from a fitted model object. In the model formula, . stands for whatever was on the left side (Freq) or on the right side (Sat + Infl*Type*Cont) of the model (house.glm0) that is being updated.

house.glm1 <- update(house.glm0, 
                     . ~ . + Sat*(Infl + Type + Cont))

For comparison, we note that the same model can be fit using the iterative proportional scaling algorithm of MASS::loglm().

(house.loglm1 <- MASS::loglm(Freq ~ Infl * Type * Cont + 
                              Sat*(Infl + Type + Cont), data = housing))
## Call:
## MASS::loglm(formula = Freq ~ Infl * Type * Cont + Sat * (Infl + 
##     Type + Cont), data = housing)
## 
## Statistics:
##                       X^2 df  P(> X^2)
## Likelihood Ratio 38.66222 34 0.2671359
## Pearson          38.90831 34 0.2582333

Did the model get better?

As before, anova() tests the added contribution of each more complex model over the one before. The residual deviance \(G^2\) has been reduced from \(G^2 (46) = 217.46\) for the baseline model house.glm0 to \(G^2 (34) = 38.66\) for the revised model house.glm1. The difference, \(G^2(M1 | M0) = G^2 (12) = 178.79\) tests the collective additional fit provided by the two-way association of satisfaction with the predictors.

anova(house.glm0, house.glm1, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: Freq ~ Sat + Infl * Type * Cont
## Model 2: Freq ~ Sat + Infl + Type + Cont + Infl:Type + Infl:Cont + Type:Cont + 
##     Sat:Infl + Sat:Type + Sat:Cont + Infl:Type:Cont
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        46    217.456                          
## 2        34     38.662 12   178.79 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Visualize model glm1

The model house.glm1 fits reasonably well, G^2(34)=38.66, so most residuals are small. In the mosaic below, I use gp=shading_Friendly to shade the tiles so that positive and negative residuals are distinguished by color, and they are filled when the absolute value of the residual is outside \(\pm 2, 4\).

mosaic(house.glm1, 
       labeling_args=largs, 
       main=paste('Model [IS][TS][CS],', modFit(house.glm1) ), 
       gp=shading_Friendly)

One cell is highlighted here: The combination of medium influence, low contact and tower type, is more likely to give low satisfaction than the model predicts. Is this just an outlier, or is there something that can be interpreted and perhaps improve the model fit? It is hard tell, but the virtues of mosaic displays are that they help to:

  • diagnose overall patterns of associations,
  • spot unusual cells in relation to lack of fit of a given model.

Can we drop any terms?

When we add terms using MASS::addterm(), they are added sequentially. It might be the case that once some term is added, a previously added term is no longer important. Running MASS::dropterm() on the housel.glm1 model checks for this.

MASS::dropterm(house.glm1, test = "Chisq")
## Single term deletions
## 
## Model:
## Freq ~ Sat + Infl + Type + Cont + Infl:Type + Infl:Cont + Type:Cont + 
##     Sat:Infl + Sat:Type + Sat:Cont + Infl:Type:Cont
##                Df Deviance    AIC     LRT   Pr(Chi)    
## <none>              38.662 455.63                      
## Sat:Infl        4  147.780 556.75 109.117 < 2.2e-16 ***
## Sat:Type        6  100.889 505.86  62.227 1.586e-11 ***
## Sat:Cont        2   54.722 467.69  16.060 0.0003256 ***
## Infl:Type:Cont  6   43.952 448.92   5.290 0.5072454    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Note that the three-way term Infl:Type:Cont is not significant. However, with Sat as the response, the associations of all predictors must be included in the model.

What about two-way interactions?

The model so far says that each of influence, type and control have separate, additive effects on the level of satisfaction, what I called a “main-effects” model. It might be the case that some of the predictors have interaction effects, e.g., that the effect of influence on satisfaction might vary with the type of dwelling or the level of control.

An easy way to test for these is to update the main-effects model, adding all possible two-way interactions for Sat, one at a time, with addterm().

MASS::addterm(house.glm1,
               ~. + Sat:(Infl + Type + Cont)^2, 
              test  =  "Chisq")
## Single term additions
## 
## Model:
## Freq ~ Sat + Infl + Type + Cont + Infl:Type + Infl:Cont + Type:Cont + 
##     Sat:Infl + Sat:Type + Sat:Cont + Infl:Type:Cont
##               Df Deviance    AIC     LRT Pr(Chi)  
## <none>             38.662 455.63                  
## Sat:Infl:Type 12   16.107 457.08 22.5550 0.03175 *
## Sat:Infl:Cont  4   37.472 462.44  1.1901 0.87973  
## Sat:Type:Cont  6   28.256 457.23 10.4064 0.10855  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The result shows that adding the term Infl:Type:Sat reduces the deviance \(G^2\) from 38.66 to 16.11. The difference, \(G^2(M1 + ITS | M1) = G^2 (12) = 22.55\) reflects a substantial improvement. The remaining two-way interaction terms reduce the deviance by smaller and non-significant amounts, relative to house.glm1.

Model fitting should be guided by substance, not just statistical machinery. Nonetheless, it seems arguably sensible to add one two-way term to the model, giving house.glm2.

house.glm2 <- update(house.glm1,
                     . ~ . + Sat:Infl:Type)

Model parsimony: AIC & BIC

Adding more association terms to a model will always improve it. The question is, whether that is “worth it”?

“Worth it” concerns the trade-off between model fit and parsimony. Sometimes we might prefer a model with fewer parameters to one that has a slightly better fit, but requires more model terms and parameters.

The AIC and BIC statistics are designed to adjust our assessment of model fit by penalizing it for using more parameters. Equivalently, they deduct from the likelihood ratio \(G^2\) a term proportional to the residual \(\text{df}\) of the model. In any case – smaller is better for both AIC and BIC.

\[AIC = G^2 - 2 \: \text{df}\] \[BIC = G^2 - \log(n) \: \text{df}\]

These measures are provided by AIC(), BIC(), and can be used to compare models using vcdExtra::LRstats().

LRstats(house.glm0, house.glm1, house.glm2)
## Likelihood summary table:
##               AIC    BIC LR Chisq Df Pr(>Chisq)    
## house.glm0 610.43 669.62  217.456 46     <2e-16 ***
## house.glm1 455.63 542.15   38.662 34     0.2671    
## house.glm2 457.08 570.91   16.107 22     0.8105    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

By these metrics, model house.glm1 is best on both AIC and BIC. The increased goodness-of-fit (smaller \(G^2\)) of model house.glm2 is not worth the extra cost of parameters in the house.glm2 model.

vcdExtra/inst/doc/tests.Rmd0000644000176200001440000003560214470701662015377 0ustar liggesusers--- title: "Tests of Independence" author: "Michael Friendly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: fig_caption: yes bibliography: ["vcd.bib", "vcdExtra.bib"] vignette: > %\VignetteIndexEntry{Tests of Independence} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/tut02-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # Old Sweave options # \SweaveOpts{engine=R,eps=TRUE,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{engine=R,height=6,width=7,results=hide,fig=FALSE,echo=TRUE} # \SweaveOpts{prefix.string=fig/vcd-tut,eps=FALSE} # \SweaveOpts{keep.source=TRUE} # preload datasets ??? set.seed(1071) library(vcd) library(vcdExtra) library(ggplot2) data(HairEyeColor) data(PreSex) data(Arthritis, package="vcd") art <- xtabs(~Treatment + Improved, data = Arthritis) if(!file.exists("fig")) dir.create("fig") ``` OK, now we're ready to do some analyses. This vignette focuses on relatively simple non-parametric tests and measures of association. ## CrossTable For tabular displays, the `CrossTable()` function in the `gmodels` package produces cross-tabulations modeled after `PROC FREQ` in SAS or `CROSSTABS` in SPSS. It has a wealth of options for the quantities that can be shown in each cell. Recall the GSS data used earlier. ```{r, GSStab} # Agresti (2002), table 3.11, p. 106 GSS <- data.frame( expand.grid(sex = c("female", "male"), party = c("dem", "indep", "rep")), count = c(279,165,73,47,225,191)) (GSStab <- xtabs(count ~ sex + party, data=GSS)) ``` Generate a cross-table showing cell frequency and the cell contribution to $\chi^2$. ```{r, xtabs-ex2} # 2-Way Cross Tabulation library(gmodels) CrossTable(GSStab, prop.t=FALSE, prop.r=FALSE, prop.c=FALSE) ``` There are options to report percentages (row, column, cell), specify decimal places, produce Chi-square, Fisher, and McNemar tests of independence, report expected and residual values (pearson, standardized, adjusted standardized), include missing values as valid, annotate with row and column titles, and format as SAS or SPSS style output! See `help(CrossTable)` for details. ## Chi-square test For 2-way tables you can use `chisq.test()` to test independence of the row and column variable. By default, the $p$-value is calculated from the asymptotic chi-squared distribution of the test statistic. Optionally, the $p$-value can be derived via Monte Carlo simulation. ```{r, chisq} (HairEye <- margin.table(HairEyeColor, c(1, 2))) chisq.test(HairEye) chisq.test(HairEye, simulate.p.value = TRUE) ``` ## Fisher Exact Test {#sec:Fisher} `fisher.test(X)` provides an **exact test** of independence. `X` must be a two-way contingency table in table form. Another form, `fisher.test(X, Y)` takes two categorical vectors of the same length. For tables larger than $2 \times 2$ the method can be computationally intensive (or can fail) if the frequencies are not small. ```{r fisher} fisher.test(GSStab) ``` Fisher's test is meant for tables with small total sample size. It generates an error for the `HairEye` data with $n$=592 total frequency. ```{r fisher-error, error=TRUE} fisher.test(HairEye) ``` ## Mantel-Haenszel test and conditional association {#sec:mantel} Use the `mantelhaen.test(X)` function to perform a Cochran-Mantel-Haenszel $\chi^2$ chi test of the null hypothesis that two nominal variables are *conditionally independent*, $A \perp B \; | \; C$, in each stratum, assuming that there is no three-way interaction. `X` is a 3 dimensional contingency table, where the last dimension refers to the strata. The `UCBAdmissions` serves as an example of a $2 \times 2 \times 6$ table, with `Dept` as the stratifying variable. ```{r, mantel1} # UC Berkeley Student Admissions mantelhaen.test(UCBAdmissions) ``` The results show no evidence for association between admission and gender when adjusted for department. However, we can easily see that the assumption of equal association across the strata (no 3-way association) is probably violated. For $2 \times 2 \times k$ tables, this can be examined from the odds ratios for each $2 \times 2$ table (`oddsratio()`), and tested by using `woolf_test()` in `vcd`. ```{r, mantel2} oddsratio(UCBAdmissions, log=FALSE) lor <- oddsratio(UCBAdmissions) # capture log odds ratios summary(lor) woolf_test(UCBAdmissions) ``` ## Some plot methods ### Fourfold displays We can visualize the odds ratios of Admission for each department with fourfold displays using `fourfold()`. The cell frequencies $n_{ij}$ of each $2 \times 2$ table are shown as a quarter circle whose radius is proportional to $\sqrt{n_{ij}}$, so that its area is proportional to the cell frequency. ```{r, reorder3} UCB <- aperm(UCBAdmissions, c(2, 1, 3)) dimnames(UCB)[[2]] <- c("Yes", "No") names(dimnames(UCB)) <- c("Sex", "Admit?", "Department") ``` Confidence rings for the odds ratio allow a visual test of the null of no association; the rings for adjacent quadrants overlap *iff* the observed counts are consistent with the null hypothesis. In the extended version (the default), brighter colors are used where the odds ratio is significantly different from 1. The following lines produce @ref(fig:fourfold1). ```{r} #| fourfold1, #| h=5, w=7.5, #| cap = "Fourfold display for the `UCBAdmissions` data. Where the odds ratio differs #| significantly from 1.0, the confidence bands do not overlap, and the circle quadrants are #| shaded more intensely." col <- c("#99CCFF", "#6699CC", "#F9AFAF", "#6666A0", "#FF0000", "#000080") fourfold(UCB, mfrow=c(2,3), color=col) ``` Another `vcd` function, `cotabplot()`, provides a more general approach to visualizing conditional associations in contingency tables, similar to trellis-like plots produced by `coplot()` and lattice graphics. The `panel` argument supplies a function used to render each conditional subtable. The following gives a display (not shown) similar to @ref(fig:fourfold1). ```{r fourfold2, eval=FALSE} cotabplot(UCB, panel = cotab_fourfold) ``` ### Doubledecker plots When we want to view the conditional probabilities of a response variable (e.g., `Admit`) in relation to several factors, an alternative visualization is a `doubledecker()` plot. This plot is a specialized version of a mosaic plot, which highlights the levels of a response variable (plotted vertically) in relation to the factors (shown horizontally). The following call produces @ref(fig:doubledecker), where we use indexing on the first factor (`Admit`) to make `Admitted` the highlighted level. In this plot, the association between `Admit` and `Gender` is shown where the heights of the highlighted conditional probabilities do not align. The excess of females admitted in Dept A stands out here. ```{r} #| doubledecker, #| h=5, w=8, #| out.width = "75%", #| cap = "Doubledecker display for the `UCBAdmissions` data. The heights #| of the highlighted bars show the conditional probabilities of `Admit`, #| given `Dept` and `Gender`." doubledecker(Admit ~ Dept + Gender, data=UCBAdmissions[2:1,,]) ``` ### Odds ratio plots Finally, the there is a `plot()` method for `oddsratio` objects. By default, it shows the 95% confidence interval for the log odds ratio. @ref(fig:oddsratio) is produced by: ```{r} #| oddsratio, #| h=6, w=6, #| out.width = "60%", #| cap = "Log odds ratio plot for the `UCBAdmissions` data." plot(lor, xlab="Department", ylab="Log Odds Ratio (Admit | Gender)") ``` {#fig:oddsratio} ## Cochran-Mantel-Haenszel tests for ordinal factors {#sec:CMH} The standard $\chi^2$ tests for association in a two-way table treat both table factors as nominal (unordered) categories. When one or both factors of a two-way table are quantitative or ordinal, more powerful tests of association may be obtained by taking ordinality into account, using row and or column scores to test for linear trends or differences in row or column means. More general versions of the CMH tests (Landis etal., 1978) [@Landis-etal:1978] are provided by assigning numeric scores to the row and/or column variables. For example, with two ordinal factors (assumed to be equally spaced), assigning integer scores, `1:R` and `1:C` tests the linear $\times$ linear component of association. This is statistically equivalent to the Pearson correlation between the integer-scored table variables, with $\chi^2 = (n-1) r^2$, with only 1 $df$ rather than $(R-1)\times(C-1)$ for the test of general association. When only one table variable is ordinal, these general CMH tests are analogous to an ANOVA, testing whether the row mean scores or column mean scores are equal, again consuming fewer $df$ than the test of general association. The `CMHtest()` function in `vcdExtra` calculates these various CMH tests for two possibly ordered factors, optionally stratified other factor(s). ***Example***: ```{r, table-form2, include=FALSE} ## A 4 x 4 table Agresti (2002, Table 2.8, p. 57) Job Satisfaction JobSat <- matrix(c(1,2,1,0, 3,3,6,1, 10,10,14,9, 6,7,12,11), 4, 4) dimnames(JobSat) = list(income=c("< 15k", "15-25k", "25-40k", "> 40k"), satisfaction=c("VeryD", "LittleD", "ModerateS", "VeryS")) JobSat <- as.table(JobSat) ``` Recall the $4 \times 4$ table, `JobSat` introduced in \@ref(sec:creating), ```{r, jobsat} JobSat ``` Treating the `satisfaction` levels as equally spaced, but using midpoints of the `income` categories as row scores gives the following results: ```{r, cmh1} CMHtest(JobSat, rscores=c(7.5,20,32.5,60)) ``` Note that with the relatively small cell frequencies, the test for general give no evidence for association. However, the the `cor` test for linear x linear association on 1 df is nearly significant. The `coin` package contains the functions `cmh_test()` and `lbl_test()` for CMH tests of general association and linear x linear association respectively. ## Measures of Association There are a variety of statistical measures of *strength* of association for contingency tables--- similar in spirit to $r$ or $r^2$ for continuous variables. With a large sample size, even a small degree of association can show a significant $\chi^2$, as in the example below for the `GSS` data. The `assocstats()` function in `vcd` calculates the $\phi$ contingency coefficient, and Cramer's V for an $r \times c$ table. The input must be in table form, a two-way $r \times c$ table. It won't work with `GSS` in frequency form, but by now you should know how to convert. ```{r, assoc1} assocstats(GSStab) ``` For tables with ordinal variables, like `JobSat`, some people prefer the Goodman-Kruskal $\gamma$ statistic [@vcd:Agresti:2002, \S 2.4.3] based on a comparison of concordant and discordant pairs of observations in the case-form equivalent of a two-way table. ```{r, gamma} GKgamma(JobSat) ``` A web article by Richard Darlington, [http://node101.psych.cornell.edu/Darlington/crosstab/TABLE0.HTM] gives further description of these and other measures of association. ## Measures of Agreement The `Kappa()` function in the `vcd` package calculates Cohen's $\kappa$ and weighted $\kappa$ for a square two-way table with the same row and column categories [@Cohen:60]. \footnote{ Don't confuse this with `kappa()` in base R that computes something entirely different (the condition number of a matrix). } Normal-theory $z$-tests are obtained by dividing $\kappa$ by its asymptotic standard error (ASE). A `confint()` method for `Kappa` objects provides confidence intervals. ```{r, kappa} data(SexualFun, package = "vcd") (K <- Kappa(SexualFun)) confint(K) ``` A visualization of agreement [@Bangdiwala:87], both unweighted and weighted for degree of departure from exact agreement is provided by the `agreementplot()` function. @fig(fig:agreesex) shows the agreementplot for the `SexualFun` data, produced as shown below. The Bangdiwala measures (returned by the function) represent the proportion of the shaded areas of the diagonal rectangles, using weights $w_1$ for exact agreement, and $w_2$ for partial agreement one step from the main diagonal. ```{r} #| agreesex, #| h=6, w=7, #| out.width = "70%", #| cap = "Agreement plot for the `SexualFun` data." agree <- agreementplot(SexualFun, main="Is sex fun?") unlist(agree) ``` In other examples, the agreement plot can help to show *sources* of disagreement. For example, when the shaded boxes are above or below the diagonal (red) line, a lack of exact agreement can be attributed in part to different frequency of use of categories by the two raters-- lack of *marginal homogeneity*. ## Correspondence analysis Correspondence analysis is a technique for visually exploring relationships between rows and columns in contingency tables. The `ca` package gives one implementation. For an $r \times c$ table, the method provides a breakdown of the Pearson $\chi^2$ for association in up to $M = \min(r-1, c-1)$ dimensions, and finds scores for the row ($x_{im}$) and column ($y_{jm}$) categories such that the observations have the maximum possible correlations.% ^[Related methods are the non-parametric CMH tests using assumed row/column scores (\@ref(sec:CMH), the analogous `glm()` model-based methods (\@ref(sec:CMH), and the more general RC models which can be fit using `gnm()`. Correspondence analysis differs in that it is a primarily descriptive/exploratory method (no significance tests), but is directly tied to informative graphic displays of the row/column categories.] Here, we carry out a simple correspondence analysis of the `HairEye` data. The printed results show that nearly 99% of the association between hair color and eye color can be accounted for in 2 dimensions, of which the first dimension accounts for 90%. ```{r, ca1} library(ca) ca(HairEye) ``` The resulting `ca` object can be plotted just by running the `plot()` method on the `ca` object, giving the result in \@ref(fig:ca-haireye). `plot.ca()` does not allow labels for dimensions; these can be added with `title()`. It can be seen that most of the association is accounted for by the ordering of both hair color and eye color along Dimension 1, a dark to light dimension. ```{r ca-haireye, cap = "Correspondence analysis plot for the `HairEye` data"} plot(ca(HairEye), main="Hair Color and Eye Color") ``` ## References vcdExtra/inst/doc/creating.html0000644000176200001440000040370114470742310016245 0ustar liggesusers Creating and manipulating frequency tables

Creating and manipulating frequency tables

Michael Friendly

2023-08-21

R provides many methods for creating frequency and contingency tables. Several are described below. In the examples below, we use some real examples and some anonymous ones, where the variables A, B, and C represent categorical variables, and X represents an arbitrary R data object.

Forms of frequency data

The first thing you need to know is that categorical data can be represented in three different forms in R, and it is sometimes necessary to convert from one form to another, for carrying out statistical tests, fitting models or visualizing the results. Once a data object exists in R, you can examine its complete structure with the str() function, or view the names of its components with the names() function.

Case form

Categorical data in case form are simply data frames containing individual observations, with one or more factors, used as the classifying variables. In case form, there may also be numeric covariates. The total number of observations is nrow(X), and the number of variables is ncol(X).

Example:

The Arthritis data is available in case form in the vcd package. There are two explanatory factors: Treatment and Sex. Age is a numeric covariate, and Improved is the response— an ordered factor, with levels None < Some < Marked. Excluding Age, this represents a \(2 \times 2 \times 3\) contingency table for Treatment, Sex and Improved, but in case form.

names(Arthritis)      # show the variables
## [1] "ID"        "Treatment" "Sex"       "Age"       "Improved"

str(Arthritis)        # show the structure
## 'data.frame':    84 obs. of  5 variables:
##  $ ID       : int  57 46 77 17 36 23 75 39 33 55 ...
##  $ Treatment: Factor w/ 2 levels "Placebo","Treated": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Sex      : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Age      : int  27 29 30 32 46 58 59 59 63 63 ...
##  $ Improved : Ord.factor w/ 3 levels "None"<"Some"<..: 2 1 1 3 3 3 1 3 1 1 ...

head(Arthritis,5)     # first 5 observations, same as Arthritis[1:5,] 
##   ID Treatment  Sex Age Improved
## 1 57   Treated Male  27     Some
## 2 46   Treated Male  29     None
## 3 77   Treated Male  30     None
## 4 17   Treated Male  32   Marked
## 5 36   Treated Male  46   Marked

Frequency form

Data in frequency form is also a data frame containing one or more factors, and a frequency variable, often called Freq or count. The total number of observations is: sum(X$Freq), sum(X[,"Freq"]) or some equivalent form.

The number of cells in the table is given by nrow(X).

Example: For small frequency tables, it is often convenient to enter them in frequency form using expand.grid() for the factors and c() to list the counts in a vector. The example below, from (Agresti, 2002) gives results for the 1991 General Social Survey, with respondents classified by sex and party identification.

# Agresti (2002), table 3.11, p. 106
GSS <- data.frame(
  expand.grid(sex = c("female", "male"), 
              party = c("dem", "indep", "rep")),
  count = c(279,165,73,47,225,191))

GSS
##      sex party count
## 1 female   dem   279
## 2   male   dem   165
## 3 female indep    73
## 4   male indep    47
## 5 female   rep   225
## 6   male   rep   191
names(GSS)
## [1] "sex"   "party" "count"
str(GSS)
## 'data.frame':    6 obs. of  3 variables:
##  $ sex  : Factor w/ 2 levels "female","male": 1 2 1 2 1 2
##  $ party: Factor w/ 3 levels "dem","indep",..: 1 1 2 2 3 3
##  $ count: num  279 165 73 47 225 191

sum(GSS$count)
## [1] 980

Table form

Table form data is represented by a matrix, array or table object, whose elements are the frequencies in an \(n\)-way table. The variable names (factors) and their levels are given by dimnames(X). The total number of observations is sum(X). The number of dimensions of the table is length(dimnames(X)), and the table sizes are given by sapply(dimnames(X), length).

Example: The HairEyeColor is stored in table form in vcd.

str(HairEyeColor)                      # show the structure
##  'table' num [1:4, 1:4, 1:2] 32 53 10 3 11 50 10 30 10 25 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Hair: chr [1:4] "Black" "Brown" "Red" "Blond"
##   ..$ Eye : chr [1:4] "Brown" "Blue" "Hazel" "Green"
##   ..$ Sex : chr [1:2] "Male" "Female"

sum(HairEyeColor)                      # number of cases
## [1] 592

sapply(dimnames(HairEyeColor), length) # table dimension sizes
## Hair  Eye  Sex 
##    4    4    2

Example: Enter frequencies in a matrix, and assign dimnames, giving the variable names and category labels. Note that, by default, matrix() uses the elements supplied by columns in the result, unless you specify byrow=TRUE.

# A 4 x 4 table  Agresti (2002, Table 2.8, p. 57) Job Satisfaction
JobSat <- matrix(c( 1, 2, 1, 0, 
                    3, 3, 6, 1, 
                   10,10,14, 9, 
                    6, 7,12,11), 4, 4)

dimnames(JobSat) = list(
  income = c("< 15k", "15-25k", "25-40k", "> 40k"),
  satisfaction = c("VeryD", "LittleD", "ModerateS", "VeryS")
  )

JobSat
##         satisfaction
## income   VeryD LittleD ModerateS VeryS
##   < 15k      1       3        10     6
##   15-25k     2       3        10     7
##   25-40k     1       6        14    12
##   > 40k      0       1         9    11

JobSat is a matrix, not an object of class("table"), and some functions are happier with tables than matrices. You can coerce it to a table with as.table(),

JobSat <- as.table(JobSat)
str(JobSat)
##  'table' num [1:4, 1:4] 1 2 1 0 3 3 6 1 10 10 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ income      : chr [1:4] "< 15k" "15-25k" "25-40k" "> 40k"
##   ..$ satisfaction: chr [1:4] "VeryD" "LittleD" "ModerateS" "VeryS"

Ordered factors and reordered tables

In table form, the values of the table factors are ordered by their position in the table. Thus in the JobSat data, both income and satisfaction represent ordered factors, and the positions of the values in the rows and columns reflects their ordered nature.

Yet, for analysis, there are times when you need numeric values for the levels of ordered factors in a table, e.g., to treat a factor as a quantitative variable. In such cases, you can simply re-assign the dimnames attribute of the table variables. For example, here, we assign numeric values to income as the middle of their ranges, and treat satisfaction as equally spaced with integer scores.

dimnames(JobSat)$income <- c(7.5,20,32.5,60)
dimnames(JobSat)$satisfaction <- 1:4

For the HairEyeColor data, hair color and eye color are ordered arbitrarily. For visualizing the data using mosaic plots and other methods described below, it turns out to be more useful to assure that both hair color and eye color are ordered from dark to light. Hair colors are actually ordered this way already, and it is easiest to re-order eye colors by indexing. Again str() is your friend.

HairEyeColor <- HairEyeColor[, c(1,3,4,2), ]
str(HairEyeColor)
##  'table' num [1:4, 1:4, 1:2] 32 53 10 3 10 25 7 5 3 15 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Hair: chr [1:4] "Black" "Brown" "Red" "Blond"
##   ..$ Eye : chr [1:4] "Brown" "Hazel" "Green" "Blue"
##   ..$ Sex : chr [1:2] "Male" "Female"

This is also the order for both hair color and eye color shown in the result of a correspondence analysis ((ref?)(fig:ca-haireye) below.

With data in case form or frequency form, when you have ordered factors represented with character values, you must ensure that they are treated as ordered in R.

Imagine that the Arthritis data was read from a text file.
By default the Improved will be ordered alphabetically: Marked, None, Some — not what we want. In this case, the function ordered() (and others) can be useful.

Arthritis <- read.csv("arthritis.txt",header=TRUE)
Arthritis$Improved <- ordered(Arthritis$Improved, 
                              levels=c("None", "Some", "Marked")
                              )

The dataset Arthritis in the vcd package is a data.frame in this form With this order of Improved, the response in this data, a mosaic display of Treatment and Improved ((ref?)(fig:arthritis) shows a clearly interpretable pattern.

The original version of mosaic in the vcd package required the input to be a contingency table in array form, so we convert using xtabs().

data(Arthritis, package="vcd")
art <- xtabs(~Treatment + Improved, data = Arthritis)
mosaic(art, gp = shading_max, split_vertical = TRUE, main="Arthritis: [Treatment] [Improved]")
Mosaic plot for the Arthritis data, showing the marginal model of independence for Treatment and Improved. Age, a covariate, and Sex are ignored here.
Mosaic plot for the Arthritis data, showing the marginal model of independence for Treatment and Improved. Age, a covariate, and Sex are ignored here.

Several data sets in the package illustrate the salutary effects of reordering factor levels in mosaic displays and other analyses. See:

  • help(AirCrash)
  • help(Glass)
  • help(HouseTasks)

The seriate package now contains a general method to permute the row and column variables in a table according to the result of a correspondence analysis, using scores on the first CA dimension.

Re-ordering dimensions

Finally, there are situations where, particularly for display purposes, you want to re-order the dimensions of an \(n\)-way table, or change the labels for the variables or levels. This is easy when the data are in table form: aperm() permutes the dimensions, and assigning to names and dimnames changes variable names and level labels respectively. We will use the following version of UCBAdmissions in @ref(sec:mantel) below. 1

UCB <- aperm(UCBAdmissions, c(2, 1, 3))
dimnames(UCB)[[2]] <- c("Yes", "No")
names(dimnames(UCB)) <- c("Sex", "Admit?", "Department")

# display as a flattened table
stats::ftable(UCB)
##               Department   A   B   C   D   E   F
## Sex    Admit?                                   
## Male   Yes               512 353 120 138  53  22
##        No                313 207 205 279 138 351
## Female Yes                89  17 202 131  94  24
##        No                 19   8 391 244 299 317

structable()

For 3-way and larger tables the structable() function in vcd provides a convenient and flexible tabular display. The variables assigned to the rows and columns of a two-way display can be specified by a model formula.

structable(HairEyeColor)                   # show the table: default
##              Eye Brown Hazel Green Blue
## Hair  Sex                              
## Black Male          32    10     3   11
##       Female        36     5     2    9
## Brown Male          53    25    15   50
##       Female        66    29    14   34
## Red   Male          10     7     7   10
##       Female        16     7     7    7
## Blond Male           3     5     8   30
##       Female         4     5     8   64

structable(Hair+Sex ~ Eye, HairEyeColor)   # specify col ~ row variables
##       Hair Black        Brown         Red        Blond       
##       Sex   Male Female  Male Female Male Female  Male Female
## Eye                                                          
## Brown         32     36    53     66   10     16     3      4
## Hazel         10      5    25     29    7      7     5      5
## Green          3      2    15     14    7      7     8      8
## Blue          11      9    50     34   10      7    30     64

It also returns an object of class "structable" which may be plotted with mosaic() (not shown here).

HSE < - structable(Hair+Sex ~ Eye, HairEyeColor)   # save structable object
mosaic(HSE)                                        # plot it

table() and friends

You can generate frequency tables from factor variables using the table() function, tables of proportions using the prop.table() function, and marginal frequencies using margin.table().

For these examples, create some categorical vectors:

 n=500
 A <- factor(sample(c("a1","a2"), n, rep=TRUE))
 B <- factor(sample(c("b1","b2"), n, rep=TRUE))
 C <- factor(sample(c("c1","c2"), n, rep=TRUE))
 mydata <- data.frame(A,B,C)

These lines illustrate table-related functions:

# 2-Way Frequency Table
attach(mydata)
mytable <- table(A,B)   # A will be rows, B will be columns
mytable                 # print table
##     B
## A     b1  b2
##   a1 116 114
##   a2 138 132

margin.table(mytable, 1) # A frequencies (summed over B)
## A
##  a1  a2 
## 230 270
margin.table(mytable, 2) # B frequencies (summed over A)
## B
##  b1  b2 
## 254 246

prop.table(mytable)    # cell percentages
##     B
## A       b1    b2
##   a1 0.232 0.228
##   a2 0.276 0.264
prop.table(mytable, 1) # row percentages
##     B
## A           b1        b2
##   a1 0.5043478 0.4956522
##   a2 0.5111111 0.4888889
prop.table(mytable, 2) # column percentages
##     B
## A           b1        b2
##   a1 0.4566929 0.4634146
##   a2 0.5433071 0.5365854

table() can also generate multidimensional tables based on 3 or more categorical variables. In this case, you can use the ftable() or structable() function to print the results more attractively.

# 3-Way Frequency Table
mytable <- table(A, B, C)
ftable(mytable)
##       C c1 c2
## A  B         
## a1 b1   45 71
##    b2   59 55
## a2 b1   62 76
##    b2   76 56

table() ignores missing values by default. To include NA as a category in counts, include the table option exclude=NULL if the variable is a vector. If the variable is a factor you have to create a new factor using .

xtabs()

The xtabs() function allows you to create cross-tabulations of data using formula style input. This typically works with case-form data supplied in a data frame or a matrix. The result is a contingency table in array format, whose dimensions are determined by the terms on the right side of the formula.

# 3-Way Frequency Table
mytable <- xtabs(~A+B+C, data=mydata)

ftable(mytable)    # print table
##       C c1 c2
## A  B         
## a1 b1   45 71
##    b2   59 55
## a2 b1   62 76
##    b2   76 56

summary(mytable)   # chi-square test of indepedence
## Call: xtabs(formula = ~A + B + C, data = mydata)
## Number of cases in table: 500 
## Number of factors: 3 
## Test for independence of all factors:
##  Chisq = 9.888, df = 4, p-value = 0.04235

If a variable is included on the left side of the formula, it is assumed to be a vector of frequencies (useful if the data have already been tabulated in frequency form).

(GSStab <- xtabs(count ~ sex + party, data=GSS))
##         party
## sex      dem indep rep
##   female 279    73 225
##   male   165    47 191

summary(GSStab)
## Call: xtabs(formula = count ~ sex + party, data = GSS)
## Number of cases in table: 980 
## Number of factors: 2 
## Test for independence of all factors:
##  Chisq = 7.01, df = 2, p-value = 0.03005

Collapsing over table factors: aggregate(), margin.table() and apply()

It sometimes happens that we have a data set with more variables or factors than we want to analyse, or else, having done some initial analyses, we decide that certain factors are not important, and so should be excluded from graphic displays by collapsing (summing) over them. For example, mosaic plots and fourfold displays are often simpler to construct from versions of the data collapsed over the factors which are not shown in the plots.

The appropriate tools to use again depend on the form in which the data are represented— a case-form data frame, a frequency-form data frame (aggregate()), or a table-form array or table object (margin.table() or apply()).

When the data are in frequency form, and we want to produce another frequency data frame, aggregate() is a handy tool, using the argument FUN=sum to sum the frequency variable over the factors not mentioned in the formula.

Example: The data frame DaytonSurvey in the vcdExtra package represents a \(2^5\) table giving the frequencies of reported use (``ever used?’’) of alcohol, cigarettes and marijuana in a sample of high school seniors, also classified by sex and race.

data("DaytonSurvey", package="vcdExtra")
str(DaytonSurvey)
## 'data.frame':    32 obs. of  6 variables:
##  $ cigarette: Factor w/ 2 levels "Yes","No": 1 2 1 2 1 2 1 2 1 2 ...
##  $ alcohol  : Factor w/ 2 levels "Yes","No": 1 1 2 2 1 1 2 2 1 1 ...
##  $ marijuana: Factor w/ 2 levels "Yes","No": 1 1 1 1 2 2 2 2 1 1 ...
##  $ sex      : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 1 2 2 ...
##  $ race     : Factor w/ 2 levels "white","other": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Freq     : num  405 13 1 1 268 218 17 117 453 28 ...
head(DaytonSurvey)
##   cigarette alcohol marijuana    sex  race Freq
## 1       Yes     Yes       Yes female white  405
## 2        No     Yes       Yes female white   13
## 3       Yes      No       Yes female white    1
## 4        No      No       Yes female white    1
## 5       Yes     Yes        No female white  268
## 6        No     Yes        No female white  218

To focus on the associations among the substances, we want to collapse over sex and race. The right-hand side of the formula used in the call to aggregate() gives the factors to be retained in the new frequency data frame, Dayton.ACM.df.

# data in frequency form
# collapse over sex and race
Dayton.ACM.df <- aggregate(Freq ~ cigarette+alcohol+marijuana, 
                           data=DaytonSurvey, 
                           FUN=sum)
Dayton.ACM.df
##   cigarette alcohol marijuana Freq
## 1       Yes     Yes       Yes  911
## 2        No     Yes       Yes   44
## 3       Yes      No       Yes    3
## 4        No      No       Yes    2
## 5       Yes     Yes        No  538
## 6        No     Yes        No  456
## 7       Yes      No        No   43
## 8        No      No        No  279

When the data are in table form, and we want to produce another table, apply() with FUN=sum can be used in a similar way to sum the table over dimensions not mentioned in the MARGIN argument. margin.table() is just a wrapper for apply() using the sum() function.

Example: To illustrate, we first convert the DaytonSurvey to a 5-way table using xtabs(), giving Dayton.tab.

# in table form
Dayton.tab <- xtabs(Freq ~ cigarette+alcohol+marijuana+sex+race, 
                    data=DaytonSurvey)
structable(cigarette+alcohol+marijuana ~ sex+race, 
           data=Dayton.tab)
##              cigarette Yes              No            
##              alcohol   Yes      No     Yes      No    
##              marijuana Yes  No Yes  No Yes  No Yes  No
## sex    race                                           
## female white           405 268   1  17  13 218   1 117
##        other            23  23   0   1   2  19   0  12
## male   white           453 228   1  17  28 201   1 133
##        other            30  19   1   8   1  18   0  17

Then, use apply() on Dayton.tab to give the 3-way table Dayton.ACM.tab summed over sex and race. The elements in this new table are the column sums for Dayton.tab shown by structable() just above.

# collapse over sex and race
Dayton.ACM.tab <- apply(Dayton.tab, MARGIN=1:3, FUN=sum)
Dayton.ACM.tab <- margin.table(Dayton.tab, 1:3)   # same result

structable(cigarette+alcohol ~ marijuana, data=Dayton.ACM.tab)
##           cigarette Yes      No    
##           alcohol   Yes  No Yes  No
## marijuana                          
## Yes                 911   3  44   2
## No                  538  43 456 279

Many of these operations can be performed using the **ply() functions in the plyr package. For example, with the data in a frequency form data frame, use ddply() to collapse over unmentioned factors, and plyr::summarise() as the function to be applied to each piece.

library(plyr)
Dayton.ACM.df <- plyr::ddply(DaytonSurvey, 
                             .(cigarette, alcohol, marijuana), 
                             plyr::summarise, Freq=sum(Freq))

Dayton.ACM.df
##   cigarette alcohol marijuana Freq
## 1       Yes     Yes       Yes  911
## 2       Yes     Yes        No  538
## 3       Yes      No       Yes    3
## 4       Yes      No        No   43
## 5        No     Yes       Yes   44
## 6        No     Yes        No  456
## 7        No      No       Yes    2
## 8        No      No        No  279

Collapsing table levels: collapse.table()

A related problem arises when we have a table or array and for some purpose we want to reduce the number of levels of some factors by summing subsets of the frequencies. For example, we may have initially coded Age in 10-year intervals, and decide that, either for analysis or display purposes, we want to reduce Age to 20-year intervals. The collapse.table() function in vcdExtra was designed for this purpose.

Example: Create a 3-way table, and collapse Age from 10-year to 20-year intervals. First, we generate a \(2 \times 6 \times 3\) table of random counts from a Poisson distribution with mean of 100.

# create some sample data in frequency form
sex <- c("Male", "Female")
age <- c("10-19", "20-29",  "30-39", "40-49", "50-59", "60-69")
education <- c("low", 'med', 'high')
data <- expand.grid(sex=sex, age=age, education=education)
counts <- rpois(36, 100)   # random Possion cell frequencies
data <- cbind(data, counts)

# make it into a 3-way table
t1 <- xtabs(counts ~ sex + age + education, data=data)
structable(t1)
##                  age 10-19 20-29 30-39 40-49 50-59 60-69
## sex    education                                        
## Male   low              98   105   104    90    90   101
##        med              97   105   101    88    97   107
##        high             99   101   109    88    99    96
## Female low             102   117   101   105    85    88
##        med             106    84    92   116   110    96
##        high            106    96   121    91   107   102

Now collapse age to 20-year intervals, and education to 2 levels. In the arguments, levels of age and education given the same label are summed in the resulting smaller table.

# collapse age to 3 levels, education to 2 levels
t2 <- collapse.table(t1, 
         age=c("10-29", "10-29",  "30-49", "30-49", "50-69", "50-69"),
         education=c("<high", "<high", "high"))
structable(t2)
##                  age 10-29 30-49 50-69
## sex    education                      
## Male   <high           405   383   395
##        high            200   197   195
## Female <high           409   414   379
##        high            202   212   209

Collapsing table levels: dplyr

For data sets in frequency form or case form, factor levels can be collapsed by recoding the levels to some grouping. One handy function for this is dplyr::case_match()

Example:

The vcdExtra::Titanicp data set contains information on 1309 passengers on the RMS Titanic, including sibsp, the number of (0:8) siblings or spouses aboard, and parch (0:6), the number of parents or children aboard, but the table is quite sparse.

table(Titanicp$sibsp, Titanicp$parch)
##    
##       0   1   2   3   4   5   6   9
##   0 790  52  43   2   2   2   0   0
##   1 183  90  29   5   4   4   2   2
##   2  26   9   6   1   0   0   0   0
##   3   3   9   8   0   0   0   0   0
##   4   0  10  12   0   0   0   0   0
##   5   0   0   6   0   0   0   0   0
##   8   0   0   9   0   0   0   0   0

For purposes of analysis, we might want to collapse both of these to the levels 0, 1, 2+. Here’s how:

library(dplyr)
Titanicp <- Titanicp |>
  mutate(sibspF = case_match(sibsp,
                            0 ~ "0",
                            1 ~ "1",
                            2:max(sibsp) ~ "2+")) |>
  mutate(sibspF = ordered(sibspF)) |>
  mutate(parchF = case_match(parch,
                             0 ~ "0",
                             1 ~ "1",
                             2:max(parch) ~ "2+")) |>
  mutate(parchF = ordered(parchF)) 

table(Titanicp$sibspF, Titanicp$parchF)
##     
##        0   1  2+
##   0  790  52  49
##   1  183  90  46
##   2+  29  28  42

car::recode() is a similar function, but with a less convenient interface.

The forcats package provides a collection of functions for reordering the levels of a factor or grouping categories according to their frequency:

  • forcats::fct_reorder(): Reorder a factor by another variable.
  • forcats::fct_infreq(): Reorder a factor by the frequency of values.
  • forcats::fct_relevel(): Change the order of a factor by hand.
  • forcats::fct_lump(): Collapse the least/most frequent values of a factor into “other”.
  • forcats::fct_collapse(): Collapse factor levels into manually defined groups.
  • forcats::fct_recode(): Change factor levels by hand.

Converting among frequency tables and data frames

As we’ve seen, a given contingency table can be represented equivalently in different forms, but some R functions were designed for one particular representation.

The table below shows some handy tools for converting from one form to another.

From this To this
Case form Frequency form Table form
Case form noop xtabs(~A+B) table(A,B)
Frequency form expand.dft(X) noop xtabs(count~A+B)
Table form expand.dft(X) as.data.frame(X) noop

For example, a contingency table in table form (an object of class(table)) can be converted to a data.frame with as.data.frame(). 2 The resulting data.frame contains columns representing the classifying factors and the table entries (as a column named by the responseName argument, defaulting to Freq. This is the inverse of xtabs().

Example: Convert the GSStab in table form to a data.frame in frequency form.

as.data.frame(GSStab)
##      sex party Freq
## 1 female   dem  279
## 2   male   dem  165
## 3 female indep   73
## 4   male indep   47
## 5 female   rep  225
## 6   male   rep  191

Example: Convert the Arthritis data in case form to a 3-way table of Treatment \(\times\) Sex \(\times\) Improved. Note the use of with() to avoid having to use Arthritis\$Treatment etc. within the call to table().% 3

Art.tab <- with(Arthritis, table(Treatment, Sex, Improved))
str(Art.tab)
##  'table' int [1:2, 1:2, 1:3] 19 6 10 7 7 5 0 2 6 16 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ Treatment: chr [1:2] "Placebo" "Treated"
##   ..$ Sex      : chr [1:2] "Female" "Male"
##   ..$ Improved : chr [1:3] "None" "Some" "Marked"

ftable(Art.tab)
##                  Improved None Some Marked
## Treatment Sex                             
## Placebo   Female            19    7      6
##           Male              10    0      1
## Treated   Female             6    5     16
##           Male               7    2      5

There may also be times that you will need an equivalent case form data.frame with factors representing the table variables rather than the frequency table. For example, the mca() function in package MASS only operates on data in this format. Marc Schwartz initially provided code for expand.dft() on the Rhelp mailing list for converting a table back into a case form data.frame. This function is included in vcdExtra.

Example: Convert the Arthritis data in table form (Art.tab) back to a data.frame in case form, with factors Treatment, Sex and Improved.

Art.df <- expand.dft(Art.tab)
str(Art.df)
## 'data.frame':    84 obs. of  3 variables:
##  $ Treatment: chr  "Placebo" "Placebo" "Placebo" "Placebo" ...
##  $ Sex      : chr  "Female" "Female" "Female" "Female" ...
##  $ Improved : chr  "None" "None" "None" "None" ...

A complex example

If you’ve followed so far, you’re ready for a more complicated example. The data file, tv.dat represents a 4-way table of size \(5 \times 11 \times 5 \times 3\) where the table variables (unnamed in the file) are read as V1V4, and the cell frequency is read as V5. The file, stored in the doc/extdata directory of vcdExtra, can be read as follows:

tv.data<-read.table(system.file("extdata","tv.dat", package="vcdExtra"))
head(tv.data,5)
##   V1 V2 V3 V4 V5
## 1  1  1  1  1  6
## 2  2  1  1  1 18
## 3  3  1  1  1  6
## 4  4  1  1  1  2
## 5  5  1  1  1 11

For a local file, just use read.table() in this form:

tv.data<-read.table("C:/R/data/tv.dat")

The data tv.dat came from the initial implementation of mosaic displays in R by Jay Emerson. In turn, they came from the initial development of mosaic displays (Hartigan & Kleiner, 1984) that illustrated the method with data on a large sample of TV viewers whose behavior had been recorded for the Neilsen ratings. This data set contains sample television audience data from Neilsen Media Research for the week starting November 6, 1995.

The table variables are:

  • V1– values 1:5 correspond to the days Monday–Friday;
  • V2– values 1:11 correspond to the quarter hour times 8:00PM through 10:30PM;
  • V3– values 1:5 correspond to ABC, CBS, NBC, Fox, and non-network choices;
  • V4– values 1:3 correspond to transition states: turn the television Off, Switch channels, or Persist in viewing the current channel.

We are interested just the cell frequencies, and rely on the facts that the

  1. the table is complete— there are no missing cells, so nrow(tv.data) = 825;
  2. the observations are ordered so that V1 varies most rapidly and V4 most slowly. From this, we can just extract the frequency column and reshape it into an array. [That would be dangerous if any observations were out of order.]
TV <- array(tv.data[,5], dim=c(5,11,5,3))                                        
dimnames(TV) <- list(c("Monday","Tuesday","Wednesday","Thursday","Friday"), 
                     c("8:00","8:15","8:30","8:45","9:00","9:15","9:30",         
                       "9:45","10:00","10:15","10:30"),                            
                     c("ABC","CBS","NBC","Fox","Other"), 
                     c("Off","Switch","Persist"))

names(dimnames(TV))<-c("Day", "Time", "Network", "State")

More generally (even if there are missing cells), we can use xtabs() (or plyr::daply()) to do the cross-tabulation, using V5 as the frequency variable. Here’s how to do this same operation with xtabs():

TV <- xtabs(V5 ~ ., data=tv.data)
dimnames(TV) <- list(Day = c("Monday","Tuesday","Wednesday","Thursday","Friday"), 
                     Time = c("8:00","8:15","8:30","8:45","9:00","9:15","9:30",         
                              "9:45","10:00","10:15","10:30"),                            
                     Network = c("ABC","CBS","NBC","Fox","Other"), 
                     State = c("Off","Switch","Persist"))

# table dimensions
dim(TV)

But this 4-way table is too large and awkward to work with. Among the networks, Fox and Other occur infrequently. We can also cut it down to a 3-way table by considering only viewers who persist with the current station. 4

TV2 <- TV[,,1:3,]      # keep only ABC, CBS, NBC
TV2 <- TV2[,,,3]       # keep only Persist -- now a 3 way table
structable(TV2)
##                   Time 8:00 8:15 8:30 8:45 9:00 9:15 9:30 9:45 10:00 10:15 10:30
## Day       Network                                                               
## Monday    ABC           146  151  156   83  325  350  386  340   352   280   278
##           CBS           337  293  304  233  311  251  241  164   252   265   272
##           NBC           263  219  236  140  226  235  239  246   279   263   283
## Tuesday   ABC           244  181  231  205  385  283  345  192   329   351   364
##           CBS           173  180  184  109  218  235  256  250   274   263   261
##           NBC           315  254  280  241  370  214  195  111   188   190   210
## Wednesday ABC           233  161  194  156  339  264  279  140   237   228   203
##           CBS           158  126  207   59   98  103  122   86   109   105   110
##           NBC           134  146  166   66  194  230  264  143   274   289   306
## Thursday  ABC           174  183  197  181  187  198  211   86   110   122   117
##           CBS           196  185  195  104  106  116  116   47   102    84    84
##           NBC           515  463  472  477  590  473  446  349   649   705   747
## Friday    ABC           294  281  305  239  278  246  245  138   246   232   233
##           CBS           130  144  154   81  129  153  136  126   138   136   152
##           NBC           195  220  248  160  172  164  169   85   183   198   204

Finally, for some purposes, we might want to collapse the 11 times into a smaller number. Half-hour time slots make more sense. Here, we use as.data.frame.table() to convert the table back to a data frame, levels() to re-assign the values of Time, and finally, xtabs() to give a new, collapsed frequency table.

TV.df <- as.data.frame.table(TV2)
levels(TV.df$Time) <- c(rep("8:00", 2),
                        rep("8:30", 2),
                        rep("9:00", 2), 
                        rep("9:30", 2), 
                        rep("10:00",2),
                            "10:30"
                        )

TV3 <- xtabs(Freq ~ Day + Time + Network, TV.df)

structable(Day ~ Time+Network, TV3)
##               Day Monday Tuesday Wednesday Thursday Friday
## Time  Network                                             
## 8:00  ABC            297     425       394      357    575
##       CBS            630     353       284      381    274
##       NBC            482     569       280      978    415
## 8:30  ABC            239     436       350      378    544
##       CBS            537     293       266      299    235
##       NBC            376     521       232      949    408
## 9:00  ABC            675     668       603      385    524
##       CBS            562     453       201      222    282
##       NBC            461     584       424     1063    336
## 9:30  ABC            726     537       419      297    383
##       CBS            405     506       208      163    262
##       NBC            485     306       407      795    254
## 10:00 ABC            632     680       465      232    478
##       CBS            517     537       214      186    274
##       NBC            542     378       563     1354    381
## 10:30 ABC            278     364       203      117    233
##       CBS            272     261       110       84    152
##       NBC            283     210       306      747    204

We’ve come this far, so we might as well show a mosaic display. This is analogous to that used by Hartigan & Kleiner (1984).

mosaic(TV3, shade = TRUE,
       labeling = labeling_border(rot_labels = c(0, 0, 0, 90)))

This mosaic displays can be read at several levels, corresponding to the successive splits of the tiles and the residual shading. Several trends are clear for viewers who persist:

  • Overall, there are about the same number of viewers on each weekday, with slightly more on Thursday.
  • Looking at time slots, viewership is slightly greater from 9:00 - 10:00 overall and also 8:00 - 9:00 on Thursday and Friday

From the residual shading of the tiles:

  • Monday: CBS dominates in all time slots.
  • Tuesday” ABC and CBS dominate after 9:00
  • Thursday: is a largely NBC day
  • Friday: ABC dominates in the early evening

References

Agresti, A. (2002). Categorical data analysis (2nd ed.). Hoboken, New Jersey: John Wiley & Sons.
Hartigan, J. A., & Kleiner, B. (1984). A mosaic of television ratings. The American Statistician, 38, 32–35.

  1. Changing Admit to Admit? might be useful for display purposes, but is dangerous— because it is then difficult to use that variable name in a model formula. See @ref(sec:tips) for options labeling_args and set_labelsto change variable and level names for displays in the strucplot framework.↩︎

  2. Because R is object-oriented, this is actually a short-hand for the function as.data.frame.table().↩︎

  3. table() does not allow a data argument to provide an environment in which the table variables are to be found. In the examples in @ref(sec:table) I used attach(mydata) for this purpose, but attach() leaves the variables in the global environment, while with() just evaluates the table() expression in a temporary environment of the data.↩︎

  4. This relies on the fact that that indexing an array drops dimensions of length 1 by default, using the argument drop=TRUE; the result is coerced to the lowest possible dimension.↩︎

vcdExtra/inst/doc/datasets.html0000644000176200001440000007364614470742311016275 0ustar liggesusers Datasets for categorical data analysis

Datasets for categorical data analysis

Michael Friendly

2023-08-21

The vcdExtra package contains 45 datasets, taken from the literature on categorical data analysis, and selected to illustrate various methods of analysis and data display. These are in addition to the 33 datasets in the vcd package.

To make it easier to find those which illustrate a particular method, the datasets in vcdExtra have been classified using method tags. This vignette creates an “inverse table”, listing the datasets that apply to each method. It also illustrates a general method for classifying datasets in R packages.

library(dplyr)
library(tidyr)
library(readxl)

Processing tags

Using the result of vcdExtra::datasets(package="vcdExtra") I created a spreadsheet, vcdExtra-datasets.xlsx, and then added method tags.

dsets_tagged <- read_excel(here::here("inst", "extdata", "vcdExtra-datasets.xlsx"), 
                           sheet="vcdExtra-datasets")

dsets_tagged <- dsets_tagged |>
  dplyr::select(-Title, -dim) |>
  dplyr::rename(dataset = Item)

head(dsets_tagged)
## # A tibble: 6 × 3
##   dataset   class      tags                                
##   <chr>     <chr>      <chr>                               
## 1 Abortion  table      loglinear;logit;2x2                 
## 2 Accident  data.frame loglinear; glm; logistic            
## 3 AirCrash  data.frame reorder; ca                         
## 4 Alligator data.frame loglinear;multinomial;zeros         
## 5 Bartlett  table      2x2;loglinear; homogeneity;oddsratio
## 6 Burt      data.frame ca

To invert the table, need to split tags into separate observations, then collapse the rows for the same tag.

dset_split <- dsets_tagged |>
  tidyr::separate_longer_delim(tags, delim = ";") |>
  dplyr::mutate(tag = stringr::str_trim(tags)) |>
  dplyr::select(-tags)

#' ## collapse the rows for the same tag
tag_dset <- dset_split |>
  arrange(tag) |>
  dplyr::group_by(tag) |>
  dplyr::summarise(datasets = paste(dataset, collapse = "; ")) |> ungroup()

# get a list of the unique tags
unique(tag_dset$tag)
##  [1] "2x2"         "agree"       "binomial"    "ca"          "glm"        
##  [6] "homogeneity" "lm"          "logistic"    "logit"       "loglinear"  
## [11] "mobility"    "multinomial" "oddsratio"   "one-way"     "ordinal"    
## [16] "poisson"     "reorder"     "square"      "zeros"

Make this into a nice table

Another sheet in the spreadsheet gives a more descriptive topic for corresponding to each tag.

tags <- read_excel(here::here("inst", "extdata", "vcdExtra-datasets.xlsx"), 
                   sheet="tags")
head(tags)
## # A tibble: 6 × 2
##   tag         topic                     
##   <chr>       <chr>                     
## 1 2x2         2 by 2 tables             
## 2 agree       observer agreement        
## 3 binomial    binomial distributions    
## 4 ca          correspondence analysis   
## 5 glm         generalized linear models 
## 6 homogeneity homogeneity of association

Now, join this with the tag_dset created above.

tag_dset <- tag_dset |>
  dplyr::left_join(tags, by = "tag") |>
  dplyr::relocate(topic, .after = tag)

tag_dset |>
  dplyr::select(-tag) |>
  head()
## # A tibble: 6 × 2
##   topic                      datasets                                           
##   <chr>                      <chr>                                              
## 1 2 by 2 tables              Abortion; Bartlett; Heart                          
## 2 observer agreement         Mammograms                                         
## 3 binomial distributions     Geissler                                           
## 4 correspondence analysis    AirCrash; Burt; Draft1970table; Gilby; HospVisits;…
## 5 generalized linear models  Accident; Cormorants; DaytonSurvey; Donner; Draft1…
## 6 homogeneity of association Bartlett

Make the table

Use purrr::map() to apply add_links() to all the datasets for each tag. (mutate(datasets = add_links(datasets)) by itself doesn’t work.)

tag_dset |>
  dplyr::select(-tag) |>
  dplyr::mutate(datasets = purrr::map(datasets, add_links)) |>
  knitr::kable()
topic datasets
2 by 2 tables Abortion; Bartlett; Heart
observer agreement Mammograms
binomial distributions Geissler
correspondence analysis AirCrash; Burt; Draft1970table; Gilby; HospVisits; HouseTasks; Mental
generalized linear models Accident; Cormorants; DaytonSurvey; Donner; Draft1970table; GSS; ICU; PhdPubs
homogeneity of association Bartlett
linear models Draft1970
logistic regression Accident; Donner; ICU; Titanicp
logit models Abortion; Cancer
loglinear models Abortion; Accident; Alligator; Bartlett; Caesar; Cancer; Detergent; Dyke; Heckman; Hoyt; JobSat; Mice; TV; Titanicp; Toxaemia; Vietnam; Vote1980; WorkerSat
mobility tables Glass; Hauser79; Mobility; Yamaguchi87
multinomial models Alligator
odds ratios Bartlett; Fungicide
one-way tables CyclingDeaths; Depends; ShakeWords
ordinal variables Draft1970table; Gilby; HairEyePlace; Hauser79; HospVisits; JobSat; Mammograms; Mental; Mice; Mobility; Yamaguchi87
Poisson distributions Cormorants; PhdPubs
reordering values AirCrash; Glass; HouseTasks
square tables Glass; Hauser79; Mobility; Yamaguchi87
zero counts Alligator; Caesar; PhdPubs; Vote1980

Voila!

vcdExtra/inst/doc/mosaics.html0000644000176200001440000047543514470742316016132 0ustar liggesusers Mosaic plots

Mosaic plots

Michael Friendly

2023-08-21

Mosaic plots provide an ideal method both for visualizing contingency tables and for visualizing the fit— or more importantly— lack of fit of a loglinear model.

For a two-way table, mosaic(), by default, fits a model of independence, \([A][B]\) or ~A + B as an R formula. The vcdExtra package extends this to models fit using glm(..., family=poisson), which can include specialized models for ordered factors, or square tables that are intermediate between the saturated model, \([A B]\) = A * B, and the independence model \([A][B]\).

For \(n\)-way tables, vcd::mosaic() can fit any loglinear model, and can also be used to plot a model fit with MASS:loglm(). The vcdExtra package extends this to models fit using stats::glm() and, by extension, to non-linear models fit using the gnm package.

See Friendly (1994), Friendly (1999) for the statistical ideas behind these uses of mosaic displays in connection with loglinear models. Our book Friendly & Meyer (2016) gives a detailed discussion of mosaic plots and many more examples.

The essential ideas are to:

  • recursively sub-divide a unit square into rectangular “tiles” for the cells of the table, such that the area of each tile is proportional to the cell frequency. Tiles are split in a sequential order:

    • First according to the marginal proportions of a first variable, V1
    • Next according to the conditional proportions of a 2nd variable, V2 | V1
    • Next according to the conditional proportions of a 3rd variable, V3 | {V1, V2}
  • For a given loglinear model, the tiles can then be shaded in various ways to reflect the residuals (lack of fit) for a given model.

  • The pattern of residuals can then be used to suggest a better model or understand where a given model fits or does not fit.

mosaic() provides a wide range of options for the directions of splitting, the specification of shading, labeling, spacing, legend and many other details. It is actually implemented as a special case of a more general class of displays for \(n\)-way tables called strucplot, including sieve diagrams, association plots, double-decker plots as well as mosaic plots.

For details, see help(strucplot) and the “See also” links therein, and also Meyer, Zeileis, & Hornik (2006), which is available as an R vignette via vignette("strucplot", package="vcd").

Example: A mosaic plot for the Arthritis treatment data fits the model of independence, ~ Treatment + Improved and displays the association in the pattern of residual shading. The goal is to visualize the difference in the proportions of Improved for the two levels of Treatment : “Placebo” and “Treated”.

The plot below is produced with the following call to mosaic(). With the first split by Treatment and the shading used, it is easy to see that more people given the placebo experienced no improvement, while more people given the active treatment reported marked improvement.

data(Arthritis, package="vcd")
art <- xtabs(~Treatment + Improved, data = Arthritis)
mosaic(art, gp = shading_max, 
            split_vertical = TRUE, 
            main="Arthritis: [Treatment] [Improved]")
Mosaic plot for the Arthritis data, using shading_max
Mosaic plot for the Arthritis data, using shading_max

gp = shading_max specifies that color in the plot signals a significant residual at a 90% or 99% significance level, with the more intense shade for 99%. Note that the residuals for the independence model were not large (as shown in the legend), yet the association between Treatment and Improved is highly significant.

summary(art)
## Call: xtabs(formula = ~Treatment + Improved, data = Arthritis)
## Number of cases in table: 84 
## Number of factors: 2 
## Test for independence of all factors:
##  Chisq = 13.055, df = 2, p-value = 0.001463

In contrast, one of the other shading schemes, from Friendly (1994) (use: gp = shading_Friendly), uses fixed cutoffs of \(\pm 2, \pm 4\), to shade cells which are individually significant at approximately \(\alpha = 0.05\) and \(\alpha = 0.001\) levels, respectively. The plot below uses gp = shading_Friendly.

mosaic(art, gp = shading_Friendly, 
            split_vertical = TRUE, 
            main="Arthritis: gp = shading_Friendly")
Mosaic plot for the Arthritis data, using shading_Friendly
Mosaic plot for the Arthritis data, using shading_Friendly

Permuting variable levels

Mosaic plots using tables or frequency data frames as input typically take the levels of the table variables in the order presented in the dataset. For character variables, this is often alphabetical order. That might be helpful for looking up a value, but is unhelpful for seeing and understanding the pattern of association.

It is usually much better to order the levels of the row and column variables to help reveal the nature of their association. This is an example of effect ordering for data display (Friendly & Kwan, 2003).

Example:

Data from Glass (1954) gave this 5 x 5 table on the occupations of 3500 British fathers and their sons, where the occupational categories are listed in alphabetic order.

data(Glass, package="vcdExtra")
(glass.tab <- xtabs(Freq ~ father + son, data=Glass))
##               son
## father         Managerial Professional Skilled Supervisory Unskilled
##   Managerial          174           28     154          84        55
##   Professional         45           50      18           8         8
##   Skilled             150           14     714         185       447
##   Supervisory          78           11     223         110        96
##   Unskilled            42            3     320          72       411

The mosaic display shows very strong association, but aside from the diagonal cells, the pattern is unclear. Note the use of set_varnames to give more descriptive labels for the variables and abbreviate the occupational category labels. and interpolate to set the shading levels for the mosaic.

largs <- list(set_varnames=list(father="Father's Occupation", 
                                son="Son's Occupation"),
              abbreviate=10)
gargs <- list(interpolate=c(1,2,4,8))

mosaic(glass.tab, 
  shade=TRUE, 
  labeling_args=largs, 
  gp_args=gargs,
  main="Alphabetic order", 
  legend=FALSE, 
  rot_labels=c(20,90,0,70))

The occupational categories differ in status, and can be reordered correctly as follows, from Professional down to Unskilled.

# reorder by status
ord <- c(2, 1, 4, 3, 5) 
row.names(glass.tab)[ord]
## [1] "Professional" "Managerial"   "Supervisory"  "Skilled"      "Unskilled"

The revised mosaic plot can be produced by indexing the rows and columns of the table using ord.

mosaic(glass.tab[ord, ord], 
  shade=TRUE, 
  labeling_args=largs,  
  gp_args=gargs,
  main="Effect order", 
  legend=FALSE, 
  rot_labels=c(20,90,0,70))

From this, and for the examples in the next section, it is useful to re-define father and son as ordered factors in the original Glass frequency data.frame.

Glass.ord <- Glass
Glass.ord$father <- ordered(Glass.ord$father, levels=levels(Glass$father)[ord])
Glass.ord$son    <- ordered(Glass.ord$son,    levels=levels(Glass$son)[ord])
str(Glass.ord)
## 'data.frame':    25 obs. of  3 variables:
##  $ father: Ord.factor w/ 5 levels "Professional"<..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ son   : Ord.factor w/ 5 levels "Professional"<..: 1 2 3 4 5 1 2 3 4 5 ...
##  $ Freq  : int  50 45 8 18 8 28 174 84 154 55 ...

Square tables

For mobility tables such as this, where the rows and columns refer to the same occupational categories it comes as no surprise that there is a strong association in the diagonal cells: most often, sons remain in the same occupational categories as their fathers.

However, the re-ordered mosaic display also reveals something subtler: when a son differs in occupation from the father, it is more likely that he will appear in a category one-step removed than more steps removed. The residuals seem to decrease with the number of steps from the diagonal.

For such tables, specialized loglinear models provide interesting cases intermediate between the independence model, [A] [B], and the saturated model, [A B]. These can be fit using glm(), with the data in frequency form,

glm(Freq ~ A + B + assoc, data = ..., family = poisson)

where assoc is a special term to handle a restricted form of association, different from A:B which specifies the saturated model in this notation.

  • Quasi-independence: Asserts independence, but ignores the diagonal cells by fitting them exactly. The loglinear model is: \(\log m_{ij} = \mu + \lambda^A_i + \lambda^B_j + \delta_i I(i = j)\), where \(I()\) is the indicator function.

  • Symmetry: This model asserts that the joint distribution of the row and column variables is symmetric, that is \(\pi_{ij} = \pi_{ji}\): A son is equally likely to move from their father’s occupational category \(i\) to another category, \(j\), as the reverse, moving from \(j\) to \(i\). Symmetry is quite strong, because it also implies marginal homogeneity, that the marginal probabilities of the row and column variables are equal, \(\pi{i+} = \sum_j \pi_{ij} = \sum_j \pi_{ji} = \pi_{+i}\) for all \(i\).

  • Quasi-symmetry: This model uses the standard main-effect terms in the loglinear model, but asserts that the association parameters are symmetric, \(\log m_{ij} = \mu + \lambda^A_i + \lambda^B_j + \lambda^{AB}_{ij}\), where \(\lambda^{AB}_{ij} = \lambda^{AB}_{ji}\).

The gnm package provides a variety of these functions: gnm::Diag(), gnm::Symm() and gnm::Topo() for an interaction factor as specified by an array of levels, which may be arbitrarily structured.

For example, the following generates a term for a diagonal factor in a \(4 \times 4\) table. The diagonal values reflect parameters fitted for each diagonal cell. Off-diagonal values, “.” are ignored.

rowfac <- gl(4, 4, 16)
colfac <- gl(4, 1, 16)
diag4by4 <- Diag(rowfac, colfac)
matrix(Diag(rowfac, colfac, binary = FALSE), 4, 4)
##      [,1] [,2] [,3] [,4]
## [1,] "1"  "."  "."  "." 
## [2,] "."  "2"  "."  "." 
## [3,] "."  "."  "3"  "." 
## [4,] "."  "."  "."  "4"

Symm() constructs parameters for symmetric cells. The particular values don’t matter. All that does matter is that the same value, e.g., 1:2 appears in both the (1,2) and (2,1) cells.

symm4by4 <- Symm(rowfac, colfac)
matrix(symm4by4, 4, 4)
##      [,1]  [,2]  [,3]  [,4] 
## [1,] "1:1" "1:2" "1:3" "1:4"
## [2,] "1:2" "2:2" "2:3" "2:4"
## [3,] "1:3" "2:3" "3:3" "3:4"
## [4,] "1:4" "2:4" "3:4" "4:4"

Example: To illustrate, we fit the four models below, starting with the independence model Freq ~ father + son and then adding terms to reflect the restricted forms of association, e.g., Diag(father, son) for diagonal terms and Symm(father, son) for symmetry.

library(gnm)
glass.indep <- glm(Freq ~ father + son, 
                   data = Glass.ord, family=poisson)
glass.quasi <- glm(Freq ~ father + son + Diag(father, son),  
                   data = Glass.ord, family=poisson)
glass.symm  <- glm(Freq ~ Symm(father, son),  
                   data = Glass.ord, family=poisson)
glass.qsymm <- glm(Freq ~ father + son + Symm(father, son),  
                   data = Glass.ord, family=poisson)

We can visualize these using the vcdExtra::mosaic.glm() method, which extends mosaic displays to handle fitted glm objects. Technical note: for models fitted using glm(), standardized residuals, residuals_type="rstandard" have better statistical properties than the default Pearson residuals in mosaic plots and analysis.

mosaic(glass.quasi, 
  residuals_type="rstandard", 
  shade=TRUE, 
  labeling_args=largs,  
  gp_args=gargs,
  main="Quasi-Independence",
  legend=FALSE, 
  rot_labels=c(20,90,0,70)
  )

Mosaic plots for the other models would give further visual assessment of these models, however we can also test differences among them. For nested models, anova() gives tests of how much better a more complex model is compared to the previous one.

# model comparisons: for *nested* models
anova(glass.indep, glass.quasi, glass.qsymm, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: Freq ~ father + son
## Model 2: Freq ~ father + son + Diag(father, son)
## Model 3: Freq ~ father + son + Symm(father, son)
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        16     792.19                          
## 2        11     235.78  5   556.41 < 2.2e-16 ***
## 3         6       4.66  5   231.12 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Alternatively, vcdExtra::LRstats() gives model summaries for a collection of models, not necessarily nested, with AIC and BIC statistics reflecting model parsimony.

models <- glmlist(glass.indep, glass.quasi, glass.symm, glass.qsymm)
LRstats(models)
## Likelihood summary table:
##                AIC    BIC LR Chisq Df Pr(>Chisq)    
## glass.indep 960.91 971.88   792.19 16  < 2.2e-16 ***
## glass.quasi 414.50 431.57   235.78 11  < 2.2e-16 ***
## glass.symm  218.18 236.47    37.46 10  4.704e-05 ***
## glass.qsymm 193.38 216.54     4.66  6     0.5876    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

By all criteria, the model of quasi symmetry fits best. The residual deviance $G^2 is not significant. The mosaic is largely unshaded, indicating a good fit, but there are a few shaded cells that indicate the remaining positive and negative residuals. For comparative mosaic displays, it is sometimes useful to show the \(G^2\) statistic in the main title, using vcdExtra::modFit() for this purpose.

mosaic(glass.qsymm, 
  residuals_type="rstandard", 
  shade=TRUE, 
  labeling_args=largs,  
  gp_args=gargs,
  main = paste("Quasi-Symmetry", modFit(glass.qsymm)),
  legend=FALSE, 
  rot_labels=c(20,90,0,70)
  )

Correspondence analysis ordering

When natural orders for row and column levels are not given a priori, we can find orderings that make more sense using correspondence analysis.

The general ideas are that:

  • Correspondence analysis assigns scores to the row and column variables to best account for the association in 1, 2, … dimensions

  • The first CA dimension accounts for largest proportion of the Pearson \(\chi^2\)

  • Therefore, permuting the levels of the row and column variables by the CA Dim1 scores gives a more coherent mosaic plot, more clearly showing the nature of the association.

  • The seriation package now has a method to order variables in frequency tables using CA.

Example: As an example, consider the HouseTasks dataset, a 13 x 4 table of frequencies of household tasks performed by couples, either by the Husband, Wife, Alternating or Jointly. You can see from the table that some tasks (Repairs) are done largely by the husband; some (laundry, main meal) are largely done by the wife, while others are done jointly or alternating between husband and wife. But the Task and Who levels are both in alphabetical order.

data("HouseTasks", package = "vcdExtra")
HouseTasks
##            Who
## Task        Alternating Husband Jointly Wife
##   Breakfast          36      15       7   82
##   Dinner             11       7      13   77
##   Dishes             24       4      53   32
##   Driving            51      75       3   10
##   Finances           13      21      66   13
##   Holidays            1       6     153    0
##   Insurance           1      53      77    8
##   Laundry            14       2       4  156
##   Main_meal          20       5       4  124
##   Official           46      23      15   12
##   Repairs             3     160       2    0
##   Shopping           23       9      55   33
##   Tidying            11       1      57   53

The naive mosaic plot for this dataset is shown below, splitting first by Task and then by Who. Due to the length of the factor labels, some features of labeling were used to make the display more readable.

require(vcd)
mosaic(HouseTasks, shade = TRUE,
       labeling = labeling_border(rot_labels = c(45,0, 0, 0), 
                                  offset_label =c(.5,5,0, 0),
                                  varnames = c(FALSE, TRUE),
                                  just_labels=c("center","right"),
                                  tl_varnames = FALSE),
       legend = FALSE)

Correspondence analysis, using the ca package, shows that nearly 89% of the \(\chi^2\) can be accounted for in two dimensions.

require(ca)
## Loading required package: ca
HT.ca <- ca(HouseTasks)
summary(HT.ca, rows=FALSE, columns=FALSE)
## 
## Principal inertias (eigenvalues):
## 
##  dim    value      %   cum%   scree plot               
##  1      0.542889  48.7  48.7  ************             
##  2      0.445003  39.9  88.6  **********               
##  3      0.127048  11.4 100.0  ***                      
##         -------- -----                                 
##  Total: 1.114940 100.0

The CA plot has a fairly simple interpretation: Dim1 is largely the distinction between tasks primarily done by the wife vs. the husband. Dim2 distinguishes tasks that are done singly vs. those that are done jointly.

plot(HT.ca, lines = TRUE)

So, we can use the CA method of seriation::seriate() to find the order of permutations of Task and Who along the CA dimensions.

require(seriation)
order <- seriate(HouseTasks, method = "CA")
# the permuted row and column labels
rownames(HouseTasks)[order[[1]]]
##  [1] "Laundry"   "Main_meal" "Dinner"    "Breakfast" "Tidying"   "Dishes"   
##  [7] "Shopping"  "Official"  "Holidays"  "Finances"  "Insurance" "Driving"  
## [13] "Repairs"
colnames(HouseTasks)[order[[2]]]
## [1] "Wife"        "Alternating" "Jointly"     "Husband"

Now, use seriation::permute() to use order for the permutations of Task and Who, and plot the resulting mosaic:

# do the permutation
HT_perm <- permute(HouseTasks, order, margin=1)

mosaic(HT_perm, shade = TRUE,
       labeling = labeling_border(rot_labels = c(45,0, 0, 0), 
                                  offset_label =c(.5,5,0, 0),
                                  varnames = c(FALSE, TRUE),
                                  just_labels=c("center","right"),
                                  tl_varnames = FALSE),
       legend = FALSE)

It is now easy to see the cluster of tasks (laundry and cooking) done largely by the wife at the top, and those (repairs, driving) done largely by the husband at the bottom.

References

Friendly, M. (1994). Mosaic displays for multi-way contingency tables. Journal of the American Statistical Association, 89, 190–200.
Friendly, M. (1999). Extending mosaic displays: Marginal, conditional, and partial views of categorical data. Journal of Computational and Graphical Statistics, 8(3), 373–395.
Friendly, M., & Kwan, E. (2003). Effect ordering for data displays. Computational Statistics and Data Analysis, 43(4), 509–539. Retrieved from http://authors.elsevier.com/sd/article/S0167947302002906
Friendly, M., & Meyer, D. (2016). Discrete data analysis with R: Visualization and modeling techniques for categorical and count data. Boca Raton, FL: Chapman & Hall/CRC.
Glass, D. V. (1954). Social mobility in britain. Glencoe, IL: The Free Press.
Meyer, D., Zeileis, A., & Hornik, K. (2006). The strucplot framework: Visualizing multi-way contingency tables with. Journal of Statistical Software, 17(3), 1–48. Retrieved from https://www.jstatsoft.org/v17/i03/
vcdExtra/inst/doc/datasets.R0000644000176200001440000000514314470742311015515 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, message = FALSE, warning = FALSE, fig.height = 6, fig.width = 7, fig.path = "fig/datasets-", dev = "png", comment = "##" ) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") ## ----load--------------------------------------------------------------------- library(dplyr) library(tidyr) library(readxl) ## ----read-datasets------------------------------------------------------------ dsets_tagged <- read_excel(here::here("inst", "extdata", "vcdExtra-datasets.xlsx"), sheet="vcdExtra-datasets") dsets_tagged <- dsets_tagged |> dplyr::select(-Title, -dim) |> dplyr::rename(dataset = Item) head(dsets_tagged) ## ----split-tags--------------------------------------------------------------- dset_split <- dsets_tagged |> tidyr::separate_longer_delim(tags, delim = ";") |> dplyr::mutate(tag = stringr::str_trim(tags)) |> dplyr::select(-tags) #' ## collapse the rows for the same tag tag_dset <- dset_split |> arrange(tag) |> dplyr::group_by(tag) |> dplyr::summarise(datasets = paste(dataset, collapse = "; ")) |> ungroup() # get a list of the unique tags unique(tag_dset$tag) ## ----read-tags---------------------------------------------------------------- tags <- read_excel(here::here("inst", "extdata", "vcdExtra-datasets.xlsx"), sheet="tags") head(tags) ## ----join-tags---------------------------------------------------------------- tag_dset <- tag_dset |> dplyr::left_join(tags, by = "tag") |> dplyr::relocate(topic, .after = tag) tag_dset |> dplyr::select(-tag) |> head() ## ----add-links---------------------------------------------------------------- add_links <- function(dsets, style = c("reference", "help", "rdrr.io"), sep = "; ") { style <- match.arg(style) names <- stringr::str_split_1(dsets, sep) names <- dplyr::case_when( style == "help" ~ glue::glue("[{names}](help({names}))"), style == "reference" ~ glue::glue("[{names}](../reference/{names}.html)"), style == "rdrr.io" ~ glue::glue("[{names}](https://rdrr.io/cran/vcdExtra/man/{names}.html)") ) glue::glue_collapse(names, sep = sep) } ## ----kable-------------------------------------------------------------------- tag_dset |> dplyr::select(-tag) |> dplyr::mutate(datasets = purrr::map(datasets, add_links)) |> knitr::kable() vcdExtra/inst/extdata/0000755000176200001440000000000014422306403014437 5ustar liggesusersvcdExtra/inst/extdata/vcdExtra-datasets.xlsx0000644000176200001440000003326214422306403020753 0ustar liggesusersPK!d[Content_Types].xml (Ĕn0 US0M7C#$ `@ڥQ~qcf+h+E \qRO;!)Jp g;,EMĪFa8ޙ(8AU 59neZ 1>L--eOk%`Qd֫*k*EL*WNrwgMӡ`ʥFC6Q^Trm姏aJ? WAiEZF>Qa|ID?q;y~)̑#m,=\"w~j;> wrӫo6;, |7kץv)pv١3tLsmPK!U0#L _rels/.rels (MO0 HݐBKwAH!T~I$ݿ'TG~>!G!؞mH]_p9ܳ;YE{F# Z+1h=/"緍t-˟p1ʩWPf#YA+/nXu1ұNOZZ:\ ~mXݍќA΁' PK!JaGxl/_rels/workbook.xml.rels (j0 ѽqP:{)0Mlc?y6У$41f9#u)(ڛε ^OW 3H./6kNOd@"8R`ÃT[4e>KAsc+EY5iQw~ om4]~ ɉ -i^Yy\YD>qW$KS3b2k T>:3[/%s* }+4?rV PK!י4(xl/worksheets/sheet1.xmlێ +1]+Ϊ+z&xq砪ށ$Jh-| Vd IU4KRJݺ߿=MSbb21s|`0" ,6 Ŏb9 o Nbga`$ɻs8jqHR @+@eP2 7TB f(s,j.caQ; )TՂӞ@Cj9H/þS3O<ˀ)¢o,& LQ‚)F*ոЦMaA%a1-'=!C:W; m@ݠJ04TmG 41KHSD? "Kݮ-c"'+l? W{m?Ku|'&ǟ$B_Gº3 Ta ytBTj "^U iz\t%.P%k+}h=Y-nuآF1}4Cnu3Pad>Y՝nWF_WaP2bSH-VQ0[%FߑL!o[7j\Yke0bou: dڔWOV{R Q{>-,ҷ )ͺzY[VU'|޸ET]@}^l oVH'q ;o=z3NdH%ߣ@D~fXH'',@]D8?Ǔi@#t!uNzFXǛ׺҇/6[8rŭ[hSjOf5>I, ).z"d.K)`VAN5wXdk/4%)n6ZXZcDyhW5}c.=7JJ .dv13.„13ϕ&WV|^a2V97Q.|x0yxx1O(-餐]W@Yͣ {&~B tz>RHtH.:s{4Q]~R@ɷ@+ JV`N( QQOdwpT,\V'`b8[svO1}"g%If^n@EA`$5$}}gq.lySYv4-a3vpѡvT A$㲚/A^eqYxUye+"5"me?=A6[x\Vɉy:.ܗ㲚Onĝ ۊOLb/ F1j%}2dLh)&c L )'\C&ل+$>2&_!! I9~3b`!uB[28>2'bc!{B[!W qOiOh+d>)HLOM,J+VIM+U23WR(LπK J I%%0^FjbJjg_egPK!N xl/theme/theme1.xmlY͋7? sw5%l$dQV32%9R(Bo=@ $'#$lJZv G~ztҽzG ’_P=ؘ$Ӗk8(4|OHe n ,K۟~rmDlI9*f8&H#ޘ+R#^bP{}2!# J{O1B (W%òBR!a1;{(~h%/V&DYCn2L`|Xsj Z{_\Zҧh4:na PաWU_]נT E A)>\Çfgנ_[K^PkPDIr.jwd A)Q RSLX"7Z2>R$I O(9%o&`T) JU>#02]`XRxbL+7 /={=_*Kn%SSՏ__7'Ŀ˗:/}}O!c&a?0BĒ@v^[ uXsXa3W"`J+U`ek)r+emgoqx(ߤDJ]8TzM5)0IYgz|]p+~o`_=|j QkekZAj|&O3!ŻBw}ь0Q'j"5,ܔ#-q&?'2ڏ ZCeLTx3&cu+ЭNxNg x)\CJZ=ޭ~TwY(aLfQuQ_B^g^ٙXtXPꗡZFq 0mxEAAfc ΙFz3Pb/3 tSٺqyjuiE-#t00,;͖Yƺ2Obr3kE"'&&S;nj*#4kx#[SvInwaD:\N1{-_- 4m+W>Z@+qt;x2#iQNSp$½:7XX/+r1w`h׼9#:Pvd5O+Oٚ.<O7sig*t; CԲ*nN-rk.yJ}0-2MYNÊQ۴3, O6muF8='?ȝZu@,Jܼfw߫7$-1-![{ׅ}ftj|he! kc{W3 3'pA=%1]nppvoXj! ("^y25xr))%I;;dkYZi;e Q=wU,N_r aϺt]t z瑟λ,ur0"bfCu,ץA׌F"Ƌ,Lx 80g D]Ug@QV9e0)}Ykg@QV9ޝUr1mZI%ޚP?VSEuSȱF6?H dIqЗKʐjJ8v}UƴW0_/i?HƟw؇8I=ao0gJ&ԧ钑k<$v@Oʸ<wN="V pe41._e[Y|LbUI vi,pq*0v3Y4_Y@8XD@JNs HAH jMVàh(LE˅0%_ |& |hG :[2ت!G[>dP/*Ƞ^mUȓA@:z}t>X `ZO)9qyT. *D2+e[9y0m'ETB!EA B'ȄѦRf}ı²sX bBEԂÂ{)(/sXMj,1dCy.^`x,G2grXKXrXKXɈbF.by$"tͣKay#F.^\XE:\XE.ċbyu• Q WR'x,G1d_+x[ ˝uT+5$+/߮$+)QU\IUPfu%; ꊄ%CGM>c3ut!-!1{moS[ q r̺NMpI ㏰h+ZeǠq9F.eIS˱>JdBuWQ _R—_mSt?vohN=׉M{;iC8Orougk翱{cܧy}oy px}w^q{o<*oU'{q&WM$< ɄCPK!Fxl/sharedStrings.xml|Xr6}LaG/mgX7Ve;cKq6x"'>B$$& )_P].Ƀ#\{9{voENOXˋNuCLtEYe*r]ʋNΛ_99&,}7JB84c1R6qBJtUN4PU+syn幻u8?vw\.I.m.]Zj`pa>lT} 'X[cDtLw#V+~>ĩ’*ƈ2^Q0Ϻ2S#lYo8GDZ uؠSfZ%Lka-qXvɫ֎9`E2\ wGK e^[́=iᄫ So0OYQG a r%;6˅`gF[{cF"^/B7E{N^ en%ҿݡtimJ>ouI&$ya ]g[??\[52ZOx3yT c-߇$TlYs{'YdQ~ц4ann#1`HLYij.>|+Rzl>:z<8 -5P?ɔtPlH;bWlg{#%EVF &RЃ&DO.轿@_0@v7>cGM%a*~ &0S`+(M 3B(ĺJ2uvTAe!3S,NON0k y1!Vja$@/q*ax!C6w7o EUH$:ﵪ𺵹Y`$ۯ(cR۫ދyP"&i\uZ1{W46Q܎ڬC[֎po3D$Ç FExdjm[$xVϣxERjVf;yYQ?PK!BwDJdocProps/core.xml (]K0C}uc m*@x-ؤ!vfVx7OsR,>:$Eh %z E3-Xj(Q-뫂[ 5`v7cwKBCpZ|8-6-,MXgy،DtB >"ͻmP$!*!h*{f:^?c{X.A#^=RvU4uXF;MVM_߭Br ̃yNz,8%q6Ɍ q/1ɴ&sz3yvA<WPK!e("docProps/app.xml (MO0+"[B1Z+R gIcڑgZ~:(r@6^?3uD.JKQ`0Ѻ(!X1`%HJ}S[L쐊l sL;ynܩc4mekg&g =qWMm4==nmgzgޙ)\ z%Mh^.jmu65xB% _ \":^vh8k^BǩDAd}KSL 2)cqi 7Arq#W3aq:co`f\Ghd3=xo=-u muW|or@آ}|l8>eIMPK-!d[Content_Types].xmlPK-!U0#L _rels/.relsPK-!Ug!Wxl/workbook.xmlPK-!JaG xl/_rels/workbook.xml.relsPK-!י4( xl/worksheets/sheet1.xmlPK-!tF -xl/worksheets/sheet2.xmlPK-!N xl/theme/theme1.xmlPK-!x{r[VA xl/styles.xmlPK-!F"'xl/sharedStrings.xmlPK-!BwDJ.docProps/core.xmlPK-!e("1docProps/app.xmlPK 3vcdExtra/inst/extdata/tv.dat0000644000176200001440000002541214422306403015566 0ustar liggesusers1 1 1 1 6 2 1 1 1 18 3 1 1 1 6 4 1 1 1 2 5 1 1 1 11 1 2 1 1 6 2 2 1 1 29 3 2 1 1 25 4 2 1 1 17 5 2 1 1 29 1 3 1 1 10 2 3 1 1 10 3 3 1 1 12 4 3 1 1 8 5 3 1 1 7 1 4 1 1 20 2 4 1 1 24 3 4 1 1 26 4 4 1 1 14 5 4 1 1 40 1 5 1 1 20 2 5 1 1 15 3 5 1 1 28 4 5 1 1 11 5 5 1 1 13 1 6 1 1 26 2 6 1 1 91 3 6 1 1 64 4 6 1 1 2 5 6 1 1 30 1 7 1 1 27 2 7 1 1 32 3 7 1 1 24 4 7 1 1 10 5 7 1 1 21 1 8 1 1 36 2 8 1 1 63 3 8 1 1 59 4 8 1 1 54 5 8 1 1 77 1 9 1 1 50 2 9 1 1 36 3 9 1 1 26 4 9 1 1 13 5 9 1 1 38 1 10 1 1 68 2 10 1 1 18 3 10 1 1 35 4 10 1 1 3 5 10 1 1 34 1 11 1 1 34 2 11 1 1 9 3 11 1 1 53 4 11 1 1 14 5 11 1 1 23 1 1 2 1 8 2 1 2 1 11 3 1 2 1 14 4 1 2 1 6 5 1 2 1 10 1 2 2 1 38 2 2 2 1 9 3 2 2 1 15 4 2 2 1 11 5 2 2 1 8 1 3 2 1 22 2 3 2 1 15 3 3 2 1 20 4 3 2 1 9 5 3 2 1 8 1 4 2 1 38 2 4 2 1 28 3 4 2 1 32 4 4 2 1 24 5 4 2 1 20 1 5 2 1 27 2 5 2 1 7 3 5 2 1 15 4 5 2 1 9 5 5 2 1 15 1 6 2 1 54 2 6 2 1 5 3 6 2 1 5 4 6 2 1 5 5 6 2 1 8 1 7 2 1 26 2 7 2 1 7 3 7 2 1 1 4 7 2 1 7 5 7 2 1 16 1 8 2 1 39 2 8 2 1 10 3 8 2 1 19 4 8 2 1 21 5 8 2 1 11 1 9 2 1 19 2 9 2 1 8 3 9 2 1 9 4 9 2 1 13 5 9 2 1 12 1 10 2 1 23 2 10 2 1 18 3 10 2 1 15 4 10 2 1 11 5 10 2 1 15 1 11 2 1 10 2 11 2 1 7 3 11 2 1 5 4 11 2 1 12 5 11 2 1 10 1 1 3 1 0 2 1 3 1 15 3 1 3 1 6 4 1 3 1 17 5 1 3 1 9 1 2 3 1 31 2 2 3 1 43 3 2 3 1 7 4 2 3 1 67 5 2 3 1 5 1 3 3 1 12 2 3 3 1 26 3 3 3 1 6 4 3 3 1 32 5 3 3 1 8 1 4 3 1 30 2 4 3 1 10 3 4 3 1 47 4 4 3 1 27 5 4 3 1 34 1 5 3 1 13 2 5 3 1 9 3 5 3 1 19 4 5 3 1 42 5 5 3 1 15 1 6 3 1 14 2 6 3 1 67 3 6 3 1 12 4 6 3 1 97 5 6 3 1 14 1 7 3 1 19 2 7 3 1 23 3 7 3 1 25 4 7 3 1 44 5 7 3 1 13 1 8 3 1 10 2 8 3 1 36 3 8 3 1 42 4 8 3 1 80 5 8 3 1 22 1 9 3 1 14 2 9 3 1 29 3 9 3 1 17 4 9 3 1 26 5 9 3 1 13 1 10 3 1 11 2 10 3 1 25 3 10 3 1 27 4 10 3 1 24 5 10 3 1 10 1 11 3 1 10 2 11 3 1 17 3 11 3 1 18 4 11 3 1 12 5 11 3 1 14 1 1 4 1 11 2 1 4 1 6 3 1 4 1 5 4 1 4 1 14 5 1 4 1 8 1 2 4 1 10 2 2 4 1 8 3 2 4 1 12 4 2 4 1 10 5 2 4 1 10 1 3 4 1 11 2 3 4 1 6 3 3 4 1 10 4 3 4 1 7 5 3 4 1 3 1 4 4 1 29 2 4 4 1 7 3 4 4 1 66 4 4 4 1 23 5 4 4 1 10 1 5 4 1 9 2 5 4 1 8 3 5 4 1 19 4 5 4 1 9 5 5 4 1 9 1 6 4 1 6 2 6 4 1 10 3 6 4 1 5 4 6 4 1 6 5 6 4 1 5 1 7 4 1 9 2 7 4 1 10 3 7 4 1 19 4 7 4 1 2 5 7 4 1 8 1 8 4 1 75 2 8 4 1 41 3 8 4 1 47 4 8 4 1 31 5 8 4 1 65 1 9 4 1 0 2 9 4 1 0 3 9 4 1 0 4 9 4 1 0 5 9 4 1 0 1 10 4 1 0 2 10 4 1 0 3 10 4 1 0 4 10 4 1 0 5 10 4 1 0 1 11 4 1 0 2 11 4 1 0 3 11 4 1 0 4 11 4 1 0 5 11 4 1 0 1 1 5 1 65 2 1 5 1 46 3 1 5 1 45 4 1 5 1 52 5 1 5 1 40 1 2 5 1 62 2 2 5 1 65 3 2 5 1 65 4 2 5 1 31 5 2 5 1 52 1 3 5 1 42 2 3 5 1 56 3 3 5 1 40 4 3 5 1 58 5 3 5 1 37 1 4 5 1 122 2 4 5 1 100 3 4 5 1 104 4 4 5 1 63 5 4 5 1 53 1 5 5 1 66 2 5 5 1 59 3 5 5 1 51 4 5 5 1 71 5 5 5 1 51 1 6 5 1 73 2 6 5 1 57 3 6 5 1 71 4 6 5 1 58 5 6 5 1 44 1 7 5 1 58 2 7 5 1 71 3 7 5 1 67 4 7 5 1 66 5 7 5 1 51 1 8 5 1 109 2 8 5 1 111 3 8 5 1 130 4 8 5 1 135 5 8 5 1 90 1 9 5 1 77 2 9 5 1 112 3 9 5 1 100 4 9 5 1 85 5 9 5 1 111 1 10 5 1 113 2 10 5 1 127 3 10 5 1 109 4 10 5 1 93 5 10 5 1 107 1 11 5 1 75 2 11 5 1 87 3 11 5 1 92 4 11 5 1 59 5 11 5 1 68 1 1 1 2 18 2 1 1 2 49 3 1 1 2 30 4 1 1 2 25 5 1 1 2 18 1 2 1 2 16 2 2 1 2 84 3 2 1 2 106 4 2 1 2 17 5 2 1 2 69 1 3 1 2 7 2 3 1 2 19 3 3 1 2 16 4 3 1 2 13 5 3 1 2 10 1 4 1 2 73 2 4 1 2 59 3 4 1 2 53 4 4 1 2 26 5 4 1 2 44 1 5 1 2 41 2 5 1 2 18 3 5 1 2 15 4 5 1 2 19 5 5 1 2 15 1 6 1 2 25 2 6 1 2 94 3 6 1 2 80 4 6 1 2 7 5 6 1 2 50 1 7 1 2 29 2 7 1 2 31 3 7 1 2 19 4 7 1 2 14 5 7 1 2 5 1 8 1 2 65 2 8 1 2 128 3 8 1 2 116 4 8 1 2 99 5 8 1 2 80 1 9 1 2 29 2 9 1 2 14 3 9 1 2 22 4 9 1 2 16 5 9 1 2 32 1 10 1 2 65 2 10 1 2 14 3 10 1 2 31 4 10 1 2 6 5 10 1 2 44 1 11 1 2 23 2 11 1 2 8 3 11 1 2 15 4 11 1 2 9 5 11 1 2 33 1 1 2 2 13 2 1 2 2 24 3 1 2 2 22 4 1 2 2 7 5 1 2 2 20 1 2 2 2 61 2 2 2 2 20 3 2 2 2 49 4 2 2 2 21 5 2 2 2 23 1 3 2 2 16 2 3 2 2 13 3 3 2 2 7 4 3 2 2 8 5 3 2 2 26 1 4 2 2 76 2 4 2 2 83 3 4 2 2 152 4 4 2 2 86 5 4 2 2 74 1 5 2 2 22 2 5 2 2 26 3 5 2 2 28 4 5 2 2 11 5 5 2 2 11 1 6 2 2 50 2 6 2 2 11 3 6 2 2 6 4 6 2 2 7 5 6 2 2 5 1 7 2 2 27 2 7 2 2 6 3 7 2 2 6 4 7 2 2 8 5 7 2 2 45 1 8 2 2 63 2 8 2 2 23 3 8 2 2 41 4 8 2 2 56 5 8 2 2 22 1 9 2 2 18 2 9 2 2 18 3 9 2 2 10 4 9 2 2 39 5 9 2 2 24 1 10 2 2 10 2 10 2 2 13 3 10 2 2 5 4 10 2 2 19 5 10 2 2 14 1 11 2 2 14 2 11 2 2 10 3 11 2 2 2 4 11 2 2 8 5 11 2 2 9 1 1 3 2 26 2 1 3 2 26 3 1 3 2 21 4 1 3 2 25 5 1 3 2 27 1 2 3 2 71 2 2 3 2 77 3 2 3 2 4 4 2 3 2 73 5 2 3 2 9 1 3 3 2 22 2 3 3 2 22 3 3 3 2 4 4 3 3 2 14 5 3 3 2 10 1 4 3 2 110 2 4 3 2 76 3 4 3 2 65 4 4 3 2 41 5 4 3 2 110 1 5 3 2 35 2 5 3 2 16 3 5 3 2 17 4 5 3 2 13 5 5 3 2 34 1 6 3 2 20 2 6 3 2 149 3 6 3 2 24 4 6 3 2 102 5 6 3 2 26 1 7 3 2 14 2 7 3 2 11 3 7 3 2 25 4 7 3 2 21 5 7 3 2 20 1 8 3 2 12 2 8 3 2 77 3 8 3 2 112 4 8 3 2 94 5 8 3 2 102 1 9 3 2 23 2 9 3 2 32 3 9 3 2 11 4 9 3 2 10 5 9 3 2 17 1 10 3 2 44 2 10 3 2 12 3 10 3 2 12 4 10 3 2 12 5 10 3 2 7 1 11 3 2 5 2 11 3 2 12 3 11 3 2 5 4 11 3 2 6 5 11 3 2 5 1 1 4 2 22 2 1 4 2 31 3 1 4 2 13 4 1 4 2 23 5 1 4 2 14 1 2 4 2 15 2 2 4 2 13 3 2 4 2 14 4 2 4 2 41 5 2 4 2 15 1 3 4 2 13 2 3 4 2 17 3 3 4 2 9 4 3 4 2 12 5 3 4 2 8 1 4 4 2 70 2 4 4 2 37 3 4 4 2 145 4 4 4 2 51 5 4 4 2 30 1 5 4 2 26 2 5 4 2 23 3 5 4 2 20 4 5 4 2 12 5 5 4 2 11 1 6 4 2 7 2 6 4 2 5 3 6 4 2 18 4 6 4 2 3 5 6 4 2 9 1 7 4 2 7 2 7 4 2 16 3 7 4 2 19 4 7 4 2 12 5 7 4 2 13 1 8 4 2 180 2 8 4 2 142 3 8 4 2 127 4 8 4 2 156 5 8 4 2 269 1 9 4 2 0 2 9 4 2 0 3 9 4 2 0 4 9 4 2 0 5 9 4 2 0 1 10 4 2 0 2 10 4 2 0 3 10 4 2 0 4 10 4 2 0 5 10 4 2 0 1 11 4 2 0 2 11 4 2 0 3 11 4 2 0 4 11 4 2 0 5 11 4 2 0 1 1 5 2 61 2 1 5 2 70 3 1 5 2 44 4 1 5 2 45 5 1 5 2 51 1 2 5 2 34 2 2 5 2 57 3 2 5 2 55 4 2 5 2 35 5 2 5 2 28 1 3 5 2 37 2 3 5 2 58 3 3 5 2 28 4 3 5 2 49 5 3 5 2 38 1 4 5 2 255 2 4 5 2 177 3 4 5 2 141 4 4 5 2 85 5 4 5 2 104 1 5 5 2 51 2 5 5 2 67 3 5 5 2 63 4 5 5 2 43 5 5 5 2 41 1 6 5 2 70 2 6 5 2 26 3 6 5 2 57 4 6 5 2 23 5 6 5 2 44 1 7 5 2 30 2 7 5 2 39 3 7 5 2 37 4 7 5 2 51 5 7 5 2 41 1 8 5 2 114 2 8 5 2 102 3 8 5 2 104 4 8 5 2 152 5 8 5 2 86 1 9 5 2 72 2 9 5 2 54 3 9 5 2 62 4 9 5 2 54 5 9 5 2 49 1 10 5 2 44 2 10 5 2 57 3 10 5 2 41 4 10 5 2 35 5 10 5 2 52 1 11 5 2 78 2 11 5 2 27 3 11 5 2 11 4 11 5 2 35 5 11 5 2 49 1 1 1 3 146 2 1 1 3 244 3 1 1 3 233 4 1 1 3 174 5 1 1 3 294 1 2 1 3 151 2 2 1 3 181 3 2 1 3 161 4 2 1 3 183 5 2 1 3 281 1 3 1 3 156 2 3 1 3 231 3 3 1 3 194 4 3 1 3 197 5 3 1 3 305 1 4 1 3 83 2 4 1 3 205 3 4 1 3 156 4 4 1 3 181 5 4 1 3 239 1 5 1 3 325 2 5 1 3 385 3 5 1 3 339 4 5 1 3 187 5 5 1 3 278 1 6 1 3 350 2 6 1 3 283 3 6 1 3 264 4 6 1 3 198 5 6 1 3 246 1 7 1 3 386 2 7 1 3 345 3 7 1 3 279 4 7 1 3 211 5 7 1 3 245 1 8 1 3 340 2 8 1 3 192 3 8 1 3 140 4 8 1 3 86 5 8 1 3 138 1 9 1 3 352 2 9 1 3 329 3 9 1 3 237 4 9 1 3 110 5 9 1 3 246 1 10 1 3 280 2 10 1 3 351 3 10 1 3 228 4 10 1 3 122 5 10 1 3 232 1 11 1 3 278 2 11 1 3 364 3 11 1 3 203 4 11 1 3 117 5 11 1 3 233 1 1 2 3 337 2 1 2 3 173 3 1 2 3 158 4 1 2 3 196 5 1 2 3 130 1 2 2 3 293 2 2 2 3 180 3 2 2 3 126 4 2 2 3 185 5 2 2 3 144 1 3 2 3 304 2 3 2 3 184 3 3 2 3 207 4 3 2 3 195 5 3 2 3 154 1 4 2 3 233 2 4 2 3 109 3 4 2 3 59 4 4 2 3 104 5 4 2 3 81 1 5 2 3 311 2 5 2 3 218 3 5 2 3 98 4 5 2 3 106 5 5 2 3 129 1 6 2 3 251 2 6 2 3 235 3 6 2 3 103 4 6 2 3 116 5 6 2 3 153 1 7 2 3 241 2 7 2 3 256 3 7 2 3 122 4 7 2 3 116 5 7 2 3 136 1 8 2 3 164 2 8 2 3 250 3 8 2 3 86 4 8 2 3 47 5 8 2 3 126 1 9 2 3 252 2 9 2 3 274 3 9 2 3 109 4 9 2 3 102 5 9 2 3 138 1 10 2 3 265 2 10 2 3 263 3 10 2 3 105 4 10 2 3 84 5 10 2 3 136 1 11 2 3 272 2 11 2 3 261 3 11 2 3 110 4 11 2 3 84 5 11 2 3 152 1 1 3 3 263 2 1 3 3 315 3 1 3 3 134 4 1 3 3 515 5 1 3 3 195 1 2 3 3 219 2 2 3 3 254 3 2 3 3 146 4 2 3 3 463 5 2 3 3 220 1 3 3 3 236 2 3 3 3 280 3 3 3 3 166 4 3 3 3 472 5 3 3 3 248 1 4 3 3 140 2 4 3 3 241 3 4 3 3 66 4 4 3 3 477 5 4 3 3 160 1 5 3 3 226 2 5 3 3 370 3 5 3 3 194 4 5 3 3 590 5 5 3 3 172 1 6 3 3 235 2 6 3 3 214 3 6 3 3 230 4 6 3 3 473 5 6 3 3 164 1 7 3 3 239 2 7 3 3 195 3 7 3 3 264 4 7 3 3 446 5 7 3 3 169 1 8 3 3 246 2 8 3 3 111 3 8 3 3 143 4 8 3 3 349 5 8 3 3 85 1 9 3 3 279 2 9 3 3 188 3 9 3 3 274 4 9 3 3 649 5 9 3 3 183 1 10 3 3 263 2 10 3 3 190 3 10 3 3 289 4 10 3 3 705 5 10 3 3 198 1 11 3 3 283 2 11 3 3 210 3 11 3 3 306 4 11 3 3 747 5 11 3 3 204 1 1 4 3 222 2 1 4 3 130 3 1 4 3 316 4 1 4 3 146 5 1 4 3 131 1 2 4 3 233 2 2 4 3 141 3 2 4 3 327 4 2 4 3 135 5 2 4 3 132 1 3 4 3 245 2 3 4 3 165 3 3 4 3 364 4 3 4 3 146 5 3 4 3 146 1 4 4 3 179 2 4 4 3 154 3 4 4 3 178 4 4 4 3 101 5 4 4 3 126 1 5 4 3 210 2 5 4 3 145 3 5 4 3 163 4 5 4 3 135 5 5 4 3 242 1 6 4 3 229 2 6 4 3 146 3 6 4 3 165 4 6 4 3 148 5 6 4 3 269 1 7 4 3 241 2 7 4 3 158 3 7 4 3 157 4 7 4 3 160 5 7 4 3 291 1 8 4 3 0 2 8 4 3 0 3 8 4 3 0 4 8 4 3 0 5 8 4 3 0 1 9 4 3 0 2 9 4 3 0 3 9 4 3 0 4 9 4 3 0 5 9 4 3 0 1 10 4 3 0 2 10 4 3 0 3 10 4 3 0 4 10 4 3 0 5 10 4 3 0 1 11 4 3 0 2 11 4 3 0 3 11 4 3 0 4 11 4 3 0 5 11 4 3 0 1 1 5 3 766 2 1 5 3 704 3 1 5 3 738 4 1 5 3 573 5 1 5 3 549 1 2 5 3 783 2 2 5 3 744 3 2 5 3 726 4 2 5 3 581 5 2 5 3 546 1 3 5 3 901 2 3 5 3 817 3 3 5 3 797 4 3 5 3 642 5 3 5 3 587 1 4 5 3 616 2 4 5 3 632 3 4 5 3 641 4 4 5 3 580 5 4 5 3 511 1 5 5 3 662 2 5 5 3 637 3 5 5 3 756 4 5 5 3 637 5 5 5 3 592 1 6 5 3 615 2 6 5 3 645 3 6 5 3 723 4 6 5 3 634 5 6 5 3 579 1 7 5 3 636 2 7 5 3 726 3 7 5 3 737 4 7 5 3 651 5 7 5 3 574 1 8 5 3 509 2 8 5 3 609 3 8 5 3 599 4 8 5 3 443 5 8 5 3 490 1 9 5 3 636 2 9 5 3 675 3 9 5 3 743 4 9 5 3 575 5 9 5 3 674 1 10 5 3 557 2 10 5 3 587 3 10 5 3 673 4 10 5 3 541 5 10 5 3 604 1 11 5 3 510 2 11 5 3 540 3 11 5 3 642 4 11 5 3 498 5 11 5 3 591 vcdExtra/inst/WORDLIST0000644000176200001440000000454014470702360014207 0ustar liggesusersABCDE ABCD ABCE Aberdein ACM Addy AE agreementplot Agresti's Agresti AirCrash Aitken al Altham Anscombe APL asbestosis Asbestosis attr au Avrunin Bangdiwala BCD BCDE BD Beh Beitrage Bertin Bingley Biometrics Biometrika birthdate birthdates blogits Boca Breen Breslow Broek buglet Caesarian Caithness Cattell CDE centroids chisquare chisq Chisq chr Clogg CMH CMHtest CoalMiners coercible COMPSTAT Corbet CRC crosstable Creatinine CyclingDeaths Daly Darlington DaytonSurvey DDAR de deciles depletions De der des df DOI donner doubledecker Doubledecker Duxbury edu Efron et etal Fahrmeir Falguerolles Featherman Fienberg Fosdick Frage Freese Gart Geborenen Geissler Geissler's Geschlechts ggplot Gilby GKgamma GLIM glm glmlist gnm GOF Grayson GSS Haberman Haenszel haireye HairEye HairEyeColor HairEyePlace Hartigan Hauser HCO Heckman Heyman Hornik Hosmer HospVisits Hout Hoyt http ij ijk Inspectional Institutionalism interpretable Interscience Jossey JRSS kable Karger Kastenbaum Kleiner knitr Krishnaiah Kundel Kway Labour Lamphiear Landis Langner Lauer LazyData LazyLoad Lemeshow Lifecycle loddsratio logLik loglin loglm loglmlist logseries Lombardo LRstats Lunn markov Mathieu MCA McCandless McConway McNemar McRae Microjournal midrank Mikis Ministere NMES Modelling modFit mosdata multinom Murdock nd Neilsen Nesselroade newcastle noop nonfarm Nonmanual nonmanual nonmanuals NonWhites normed NoTumor num NYU occStatus OCG oddsratio Opler ORCID Ord ordinality Ostrowski Pastides PCO PhdPubs Plackett poisson Polansky polr POSIXct preceeding Publics Raton recoding Rennie rgl Rhelp Ries RMS rootogram Sachsischen Schafer Schuessler Schwarz Seeliger Selikoff Semiology seriation SES ShakeWords Shockey shoebox ShuguangSun Spiegelhalter Springer Srole Stackexchange Stasinopoulos Stata Statistischen statum strucplot Strucplot Strucplots Sturdivant subtable Teres Theus Thisted tibbles tidyr Titanicp tolstoy toxaemia Toxaemia Toxaemic Travaux Tutz UCBAdmissions Unidiff unidiff unshaded VGAM vcd verhaltnisses Verlag VisualAcuity Whittaker Wicklin Winbuilder Wolfinger WorkerSat www Xie xlevelsS Yaish Yamaguchi yorku Yu Zhao zur