conting/0000755000175100001440000000000012753125172011736 5ustar hornikusersconting/NAMESPACE0000644000175100001440000000151012753071315013151 0ustar hornikusersimport(mvtnorm, tseries, coda) importFrom(gtools, combinations) importFrom(BMS, bin2hex) importFrom(BMS, hex2bin) exportPattern("^[[:alpha:]]+") S3method(plot, pval) S3method(plot, totpop) S3method(print, acceptrate) S3method(print, bcct) S3method(print, bict) S3method(print, interprob) S3method(print, interstat) S3method(print, modprobs) S3method(print, pval) S3method(print, sbcct) S3method(print, sbict) S3method(print, submod) S3method(print, totpop) S3method(summary, bcct) S3method(summary, bict) importFrom("graphics", "abline", "hist", "legend", "plot", "points") importFrom("stats", "as.formula", "coef", "dpois", "glm", "lm","model.frame", "model.matrix", "na.pass", "nlminb","poisson", "rgamma", "rpois","runif","var") importFrom("utils", "read.table", "setTxtProgressBar","txtProgressBar", "write.table")conting/data/0000755000175100001440000000000012753071315012646 5ustar hornikusersconting/data/AOH.rda0000644000175100001440000000047512753071315013753 0ustar hornikusersQMK@FEC""h_JOq+Iiɭ?_8Y*0f&ofN0@<({wlmnCy]=>-a8SOx|Nq u;G94?ǵU&z fy[ecIô:%*6+Q2{I-閙rDAJ$kǑ蚚T4~<*U}M`xcsny8u4R3ks+1GW'5Qa%*XG=conting/data/ScotPWID.rda0000644000175100001440000000110112753071315014723 0ustar hornikusersWn@iFUC=! ~lՈO86Brc `qʽI(<^X=Bm%+k,_W://޿_0S R{1:mvQg(Ra0 Wg;莢hi)m1ds˧IJ?MUiiGqM8N~jsE_*> BMywR49ug{Nͽv}zc= {'a%;~ KxN8nQ;_ӃqqS~{qrL^0 ֶcbLQconting/data/spina.rda0000644000175100001440000000047712753071315014460 0ustar hornikusers r0b```b`ffd`b2Y# 'f-.Kd``q%?IEn C 4k@p0y-(J@5`a|:\SG! G LԲԜb K, e*̓?9'nNZbrI~'LAbOۙa;3a i\BC1s.LN,MN,L71(_717(3.Ț_ZDgPSVXb 2VB*2aZfgQ~HP152ĒD"Amȅconting/data/heart.rda0000644000175100001440000000060412753071315014441 0ustar hornikusers r0b```b`ffd`b2Y# 'fHM,*a``r؁!AB;6@hm0>T!k@ -P߀Àj/h9M!(_ 0{`jAi4j,P4#\ 5T UD!f(-',5eL-2WrNb1Lܜ" Czc !e)"#Ep"ƧUp|\Ij6S)8QkTXd1a 'p1\a 749`␩Hgr$$@: conting/R/0000755000175100001440000000000012753071315012136 5ustar hornikusersconting/R/bcctsubsetu.R0000644000175100001440000001023312753071315014606 0ustar hornikusersbcctsubsetu <- function (object, n.sample, save = NULL, name = NULL, progress = FALSE) { if (n.sample <= 0) { stop("n.sample must be positive") } ptm <- (proc.time())[3] if (is.null(save)) { save <- object$save } if (is.null(name)) { name <- object$name } if (save > 0 & is.null(name)) { name_RJACC <- "RJACC.txt" name_MHACC <- "MHACC.txt" name_BETA <- "BETA.txt" name_MODEL <- "MODEL.txt" name_SIG <- "SIG.txt" } else { name_RJACC <- paste(name, "RJACC.txt", sep = "") name_MHACC <- paste(name, "MHACC.txt", sep = "") name_BETA <- paste(name, "BETA.txt", sep = "") name_MODEL <- paste(name, "MODEL.txt", sep = "") name_SIG <- paste(name, "SIG.txt", sep = "") } if (object$save == 0 & save > 0) { if (file.exists(name_BETA)) { stop(paste("A file named ", name_BETA, " already exists in the working directory", sep = "")) } if (file.exists(name_MODEL)) { stop(paste("A file named ", name_MODEL, " already exists in the working directory", sep = "")) } if (file.exists(name_SIG)) { stop(paste("A file named ", name_SIG, " already exists in the working directory", sep = "")) } if (file.exists(name_RJACC)) { stop(paste("A file named ", name_RJACC, " already exists in the working directory", sep = "")) } if (file.exists(name_MHACC)) { stop(paste("A file named ", name_MHACC, " already exists in the working directory", sep = "")) } write.table(file = name_BETA, x = object$BETA, row.names = FALSE, col.names = FALSE, append = TRUE) write.table(file = name_MODEL, x = object$MODEL, row.names = FALSE, col.names = FALSE, append = TRUE) write.table(file = name_SIG, x = object$SIG, row.names = FALSE, col.names = FALSE, append = TRUE) write.table(file = name_RJACC, x = object$rj_acc, row.names = FALSE, col.names = FALSE, append = TRUE) write.table(file = name_MHACC, x = object$mh_acc, row.names = FALSE, col.names = FALSE, append = TRUE) } start.sig <- object$SIG[length(object$SIG)] start.index <- model2index(object$MODEL[length(object$SIG)], dig = dim(object$BETA)[2]) start.beta <- object$BETA[dim(object$BETA)[1], start.index == 1] runit <- bcctsubset.fit(priornum = object$priornum, subset.index=object$subset.index, maximal.mod = object$maximal.mod, IP = object$IP, eta.hat = object$eta.hat, ini.index = start.index, ini.beta = start.beta, ini.sig = start.sig, iters = n.sample, save = save, name = name, null.move.prob = object$null.move.prob, a = object$a, b = object$b, progress = progress) BETA <- runit$BETA MODEL <- runit$MODEL SIG <- runit$SIG rj_acc <- runit$rj_acc mh_acc <- runit$mh_acc if (save > 0) { rj_acc <- read.matrix(file = name_RJACC, header = FALSE) mh_acc <- read.matrix(file = name_MHACC, header = FALSE) BETA <- read.matrix(file = name_BETA, header = FALSE) SIG <- read.matrix(file = name_SIG, header = FALSE) MODEL <- as.character(read.table(file = name_MODEL, header = FALSE)[, 1]) } if (save == 0) { rj_acc <- c(object$rj_acc, rj_acc) mh_acc <- c(object$mh_acc, mh_acc) BETA <- rbind(object$BETA, BETA) SIG <- c(object$SIG, SIG) MODEL <- c(object$MODEL, MODEL) } ptm <- (proc.time())[3] - ptm time <- object$time + ptm est <- list(BETA = BETA, MODEL = MODEL, SIG = SIG, rj_acc = rj_acc, mh_acc = mh_acc, priornum = object$priornum, maximal.mod = object$maximal.mod, IP = object$IP, eta.hat = object$eta.hat, save = save, name = name, null.move.prob = object$null.move.prob, time = time, a = object$a, b = object$b, subset.index = object$subset.index) class(est) <- "bcct" est } conting/R/bcctsubset.R0000644000175100001440000001201312753071315014417 0ustar hornikusersbcctsubset <- function (subsetformula, data = NULL, n.sample, prior = "SBH", start.formula = NULL, start.beta = NULL, start.sig = NULL, save = 0, name = NULL, null.move.prob = 0.5, a = 0.001, b = 0.001, progress = FALSE) { if (n.sample <= 0) { stop("n.sample must be positive") } if (prior != "UIP" & prior != "SBH") { stop("prior not found") } if (save < 0) { stop("save must be non-negative") } if (null.move.prob < 0 | null.move.prob > 1) { stop("null.move.prob is a probability and should be between 0 and 1") } if (a < 0 & a != (-1)) { stop("a and b must be non-negative") } if (b < 0) { stop("a and b must be non-negative") } ptm <- (proc.time())[3] if (!is.null(data)) { if (attributes(data)$class == "table") { data <- data.frame(data) } } if (save > 0) { if (is.null(name)) { name_RJACC <- "RJACC.txt" name_MHACC <- "MHACC.txt" name_BETA <- "BETA.txt" name_MODEL <- "MODEL.txt" name_SIG <- "SIG.txt" } else { name_RJACC <- paste(name, "RJACC.txt", sep = "") name_MHACC <- paste(name, "MHACC.txt", sep = "") name_BETA <- paste(name, "BETA.txt", sep = "") name_MODEL <- paste(name, "MODEL.txt", sep = "") name_SIG <- paste(name, "SIG.txt", sep = "") } if (file.exists(name_BETA)) { stop(paste("A file named ", name_BETA, " already exists in the working directory", sep = "")) } if (file.exists(name_MODEL)) { stop(paste("A file named ", name_MODEL, " already exists in the working directory", sep = "")) } if (file.exists(name_SIG)) { stop(paste("A file named ", name_SIG, " already exists in the working directory", sep = "")) } if (file.exists(name_RJACC)) { stop(paste("A file named ", name_RJACC, " already exists in the working directory", sep = "")) } if (file.exists(name_MHACC)) { stop(paste("A file named ", name_MHACC, " already exists in the working directory", sep = "")) } } priortypes <- c("UIP", "SBH") priornum <- c(1, 2)[prior == priortypes] options(contrasts = c("contr.sum", "contr.poly"), warn = -1) if (!is.null(data)) { maximal.mod <- glm(formula = subsetformula[[1]], data = data, family = poisson, control = list(maxit = 1), x = TRUE, y = TRUE) } else{ maximal.mod <- glm(formula = subsetformula[[1]], family = poisson, control = list(maxit = 1), x = TRUE, y = TRUE) } options(contrasts = c("contr.treatment", "contr.poly"), warn = 0) subset.index<-matrix(formula2index(big.X= maximal.mod$x, formula=subsetformula[[1]], data=data),nrow=1) for(i in 2:length(subsetformula)){ subset.index<-rbind(subset.index,formula2index(big.X= maximal.mod$x, formula=subsetformula[[i]], data=data))} big.X <- maximal.mod$x y <- maximal.mod$y n <- dim(big.X)[1] IP <- t(big.X) %*% big.X/n IP[, 1] <- 0 IP[1, 0] <- 0 bmod <- beta_mode(X = big.X, y = y, prior = prior, IP = IP, a = a, b = b) eta.hat <- as.vector(big.X %*% matrix(bmod, ncol = 1)) if (is.null(start.formula)) { start.index <- rep(1, dim(big.X)[2]) } else { start.index <- formula2index(big.X = big.X, formula = start.formula, data = data) } if (is.null(start.beta)) { start.beta <- bmod[start.index == 1] } if (is.null(start.sig)) { start.sig <- 1 } start.mod <- index2model(start.index) runit <- bcctsubset.fit(priornum = priornum, subset.index = subset.index, maximal.mod = maximal.mod, IP = IP, eta.hat = eta.hat, ini.index = start.index, ini.beta = start.beta, ini.sig = start.sig, iters = n.sample, save = save, name = name, null.move.prob = null.move.prob, a = a, b = b, progress = progress) BETA <- runit$BETA MODEL <- runit$MODEL SIG <- runit$SIG rj_acc <- runit$rj_acc mh_acc <- runit$mh_acc if (save > 0) { rj_acc <- read.matrix(file = name_RJACC, header = FALSE) mh_acc <- read.matrix(file = name_MHACC, header = FALSE) BETA <- read.matrix(file = name_BETA, header = FALSE) SIG <- read.matrix(file = name_SIG, header = FALSE) MODEL <- as.character(read.table(file = name_MODEL, header = FALSE)[, 1]) } time <- (proc.time())[3] - ptm est <- list(BETA = BETA, MODEL = MODEL, SIG = SIG, rj_acc = rj_acc, mh_acc = mh_acc, priornum = priornum, maximal.mod = maximal.mod, IP = IP, eta.hat = eta.hat, save = save, name = name, null.move.prob = null.move.prob, time = time, a = a, b = b,subset.index=subset.index) class(est) <- "bcct" est } conting/R/summary.bcct.R0000644000175100001440000000246412753071315014676 0ustar hornikuserssummary.bcct <- function(object,n.burnin=0,thin=1,cutoff=0.75,statistic="X2",best=NULL,scale=0.1,prob.level=0.95,...){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(cutoff<0 | cutoff>1){ stop("cutoff is a probability and should be between 0 and 1")} if(statistic!="X2" & statistic!="FreemanTukey" & statistic!="deviance"){ stop("statistic not found")} if(scale<0 | scale>1){ stop("scale should be between 0 and 1")} if(!is.null(best)){ if(best<=0){ stop("best should be positive")}} if(prob.level<0 | prob.level>1){ stop("prob.level is a probability and should be between 0 and 1")} is1<-inter_stats(object,n.burnin=n.burnin,cutoff=cutoff,thin=thin,prob.level=prob.level) is2<-mod_probs(object,n.burnin=n.burnin,scale=scale,best=best,thin=thin) is3<-bayespval(object,n.burnin=n.burnin,thin=thin,statistic=statistic) est<-list(BETA=object$BETA,MODEL=object$MODEL,SIG=object$SIG,rj_acc=object$rj_acc,mh_acc=object$mh_acc,priornum=object$priornum,maximal.mod=object$maximal.mod,IP=object$IP,eta.hat=object$eta.hat,save=object$save,name=object$name,int_stats=is1,mod_stats=is2,pval_stats=is3) class(est)<-"sbcct" est } conting/R/iwls_mh.R0000644000175100001440000000336412753071315013731 0ustar hornikusersiwls_mh <- function(curr.y,curr.X,curr.beta,iprior.var){ curr.LP<-as.vector(curr.X%*%matrix(curr.beta,ncol=1)) ## current linear predictor curr.w<-exp(curr.LP) ## current elements of weight matrix icurr.C<-iprior.var+crossprod(x=curr.X*curr.w,y=curr.X) ## inverse proposal variance #curr.C<-solve(icurr.C) curr.C<-chol2inv(chol(icurr.C)) ## proposal variance curr.z<-curr.LP+(curr.y-curr.w)/curr.w ## current value of working vector curr.m<-as.vector(tcrossprod(x=curr.C,y=curr.X*curr.w)%*%matrix(curr.z,ncol=1)) ## proposal mean prop.beta<-as.vector(rmvnorm(n=1,mean=curr.m,sigma=curr.C)) ## proposal prop.LP<-as.vector(curr.X%*%matrix(prop.beta,ncol=1)) ## proposed linear predictor prop.w<-exp(prop.LP) ## proposed elememts of weight matrix iprop.C<-iprior.var+crossprod(x=curr.X*prop.w,y=curr.X) ## inverse current variance #prop.C<-solve(iprop.C) ## current variance prop.C<-chol2inv(chol(iprop.C)) ## current variance prop.z<-prop.LP+(curr.y-prop.w)/prop.w ## proposed working vector prop.m<-as.vector(tcrossprod(x=prop.C,y=curr.X*prop.w)%*%matrix(prop.z,ncol=1))##current mean top<-sum(curr.y*prop.LP)-sum(prop.w)-0.5*as.vector(matrix(prop.beta[-1],nrow=1)%*%iprior.var[-1,-1]%*%matrix(prop.beta[-1],ncol=1))+dmvnorm(x=curr.beta,mean=prop.m,sigma=prop.C,log=TRUE) ## log numerator of acceptance probability bot<-sum(curr.y*curr.LP)-sum(curr.w)-0.5*as.vector(matrix(curr.beta[-1],nrow=1)%*%iprior.var[-1,-1]%*%matrix(curr.beta[-1],ncol=1))+dmvnorm(x=prop.beta,mean=curr.m,sigma=curr.C,log=TRUE) ## log denominatrot of acceptance probability prob<-exp(top-bot) ## acceptance probability if(prob>=runif(1)){ ## accept or reject new.beta<-prop.beta} else{ new.beta<-curr.beta} new.beta} conting/R/print.sbcct.R0000644000175100001440000000031712753071315014513 0ustar hornikusersprint.sbcct <- function(x,...,digits = max(3, getOption("digits") - 3)){ print(x$int_stats,digits=digits) cat("\n") print(x$mod_stats,digits=digits) cat("\n") print(x$pval_stats,digits=digits) } conting/R/total_pop.R0000644000175100001440000000175012753071315014265 0ustar hornikuserstotal_pop <- function(object,n.burnin=0,thin=1,prob.level=0.95){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(prob.level<0 | prob.level>1){ stop("prob.level is a probability and should be between 0 and 1")} missing<-c(object$missing1,object$missing2) obs.z<-sum(object$maximal.mod$y[-missing]) if(is.matrix(object$Y0)){ if(n.burnin>0){ YY0<-matrix(object$Y0[-(1:n.burnin),],ncol=dim(object$Y0)[2])} else{ YY0<-object$Y0} TOT<-apply(YY0,1,sum)+obs.z} else{ if(n.burnin>0){ YY0<-object$Y0[-(1:n.burnin)]} else{ YY0<-object$Y0} TOT<-YY0+obs.z} n.sample<-length(TOT) every<-seq(from=thin,to=n.sample,by=thin) TOT<-TOT[every] int<-HPDinterval(mcmc(TOT),prob=prob.level) est<-list(TOT=TOT,int=int,meanTOT=mean(TOT),thin=thin,prob.level=prob.level) class(est)<-"totpop" est} conting/R/bict.R0000644000175100001440000001105312753071315013202 0ustar hornikusersbict <- function(formula,data=NULL,n.sample,prior="SBH",cens=NULL,start.formula=NULL,start.beta=NULL,start.sig=NULL,start.y0=NULL,save=0,name=NULL,null.move.prob=0.5,a=0.001,b=0.001,progress=FALSE){ if(n.sample<=0){ stop("n.sample must be positive")} if(prior!="UIP" & prior!="SBH"){ stop("prior not found")} if(save<0){ stop("save must be non-negative")} if(null.move.prob<0 | null.move.prob>1){ stop("null.move.prob is a probability and should be between 0 and 1")} if(a<0 & a!=(-1)){ stop("a and b must be non-negative")} if(b<0){ stop("a and b must be non-negative")} ptm<-(proc.time())[3] if(!is.null(data)){ if(attributes(data)$class=="table"){ data<-data.frame(data)}} if(save>0){ if(is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt" name_Y0<-"Y0.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="") name_Y0<-paste(name,"Y0.txt",sep="")} if(file.exists(name_BETA)){stop(paste("A file named ",name_BETA," already exists in the working directory",sep=""))} if(file.exists(name_MODEL)){stop(paste("A file named ",name_MODEL," already exists in the working directory",sep=""))} if(file.exists(name_SIG)){stop(paste("A file named ",name_SIG," already exists in the working directory",sep=""))} if(file.exists(name_Y0)){stop(paste("A file named ",name_Y0," already exists in the working directory",sep=""))} if(file.exists(name_RJACC)){stop(paste("A file named ",name_RJACC," already exists in the working directory",sep=""))} if(file.exists(name_MHACC)){stop(paste("A file named ",name_MHACC," already exists in the working directory",sep=""))} } priortypes<-c("UIP","SBH") priornum<-c(1,2)[prior==priortypes] options(contrasts=c("contr.sum","contr.poly"),warn=-1) if(!is.null(data)){ maximal.mod<-glm(formula=formula,data=data,method="model.frame",na.action=na.pass,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} else{ maximal.mod<-glm(formula=formula,method="model.frame",na.action=na.pass,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} options(contrasts=c("contr.treatment","contr.poly"),warn=0) missing1<-(1:length(maximal.mod[,1]))[is.na(maximal.mod[,1])] data<-maximal.mod data[missing1,1]<-0 if(!is.null(cens)){ missing2<-cens} else{ missing2<-c()} missing<-c(missing1,missing2) missing_details<-data[missing1,-1] censored_details<-data[missing2,-1] options(contrasts=c("contr.sum","contr.poly"),warn=-1) if(!is.null(data)){ maximal.mod<-glm(formula=formula,data=data,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} else{ maximal.mod<-glm(formula=formula,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} options(contrasts=c("contr.treatment","contr.poly"),warn=0) big.X<-maximal.mod$x y<-maximal.mod$y n<-dim(big.X)[1] IP<-t(big.X)%*%big.X/n IP[,1]<-0 IP[1,0]<-0 bmod<-beta_mode(X=big.X[-missing,],y=y[-missing],prior=prior,IP=IP,a=a,b=b) eta.hat<-as.vector(big.X%*%matrix(bmod,ncol=1)) if(is.null(start.formula)){ start.index<-rep(1,dim(big.X)[2])} else{ start.index<-formula2index(big.X=big.X,formula=start.formula,data=data)} if(is.null(start.beta)){ start.beta<-bmod[start.index==1]} if(is.null(start.sig)){ start.sig<-1} if(is.null(start.y0)){ start.y0<-round(exp(eta.hat[missing]))} start.mod<-index2model(start.index) runit<-bict.fit(priornum=priornum,missing1=missing1,missing2=missing2,maximal.mod=maximal.mod, IP=IP,eta.hat=eta.hat,ini.index=start.index,ini.beta=start.beta,ini.sig=start.sig,ini.y0=start.y0, iters=n.sample,save=save,name=name,null.move.prob=null.move.prob,a=a,b=b,progress=progress) BETA<-runit$BETA MODEL<-runit$MODEL SIG<-runit$SIG Y0<-runit$Y0 rj_acc<-runit$rj_acc mh_acc<-runit$mh_acc if(save>0){ rj_acc<-read.matrix(file=name_RJACC,header=FALSE) mh_acc<-read.matrix(file=name_MHACC,header=FALSE) BETA<-read.matrix(file=name_BETA,header=FALSE) SIG<-read.matrix(file=name_SIG,header=FALSE) Y0<-matrix(read.matrix(file=name_Y0,header=FALSE),nrow=length(SIG)) MODEL<-as.character(read.table(file=name_MODEL,header=FALSE)[,1])} time<-(proc.time())[3]-ptm est<-list(BETA=BETA,MODEL=MODEL,SIG=SIG,Y0=Y0,missing1=missing1,missing2=missing2,missing_details=missing_details,censored_details=censored_details,rj_acc=rj_acc,mh_acc=mh_acc,priornum=priornum,maximal.mod=maximal.mod,IP=IP,eta.hat=eta.hat,save=save,name=name,null.move.prob=null.move.prob,time=time,a=a,b=b) class(est)<-"bict" est} conting/R/drop_term.R0000644000175100001440000000304712753071315014260 0ustar hornikusersdrop_term <- function(curr.index,data,maximal.mod){ big.X<-maximal.mod$x ## maximal design matrix full.terms<-attributes(big.X)$assign ## terms in big.X uni<-unique(full.terms[curr.index==1]) ## terms in current model uni<-uni[uni>0] ## non-interecpt terms in current model term.labels<-attr(summary(maximal.mod)$terms,"term.labels")[uni] ## term labels in current model term.order<-attr(summary(maximal.mod)$terms,"order")[uni] ## order of terms in current model term.factors<-attr(summary(maximal.mod)$terms,"factors")[,uni] ## constituent main effect terms of terms in ## current model K<-length(term.labels[term.order==1]) ## number of main effect terms can_drop<-c() ## vector containing terms we can drop if(max(term.order)>1){ ## if we are not in independence model can_drop<-term.labels[term.order==max(term.order)] ## we can drop all highest order terms candos<-(1:length(term.labels[term.order0 & is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="")} if(object$save==0 & save>0){ if(file.exists(name_BETA)){stop(paste("A file named ",name_BETA," already exists in the working directory",sep=""))} if(file.exists(name_MODEL)){stop(paste("A file named ",name_MODEL," already exists in the working directory",sep=""))} if(file.exists(name_SIG)){stop(paste("A file named ",name_SIG," already exists in the working directory",sep=""))} if(file.exists(name_RJACC)){stop(paste("A file named ",name_RJACC," already exists in the working directory",sep=""))} if(file.exists(name_MHACC)){stop(paste("A file named ",name_MHACC," already exists in the working directory",sep=""))} write.table(file=name_BETA,x=object$BETA,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MODEL,x=object$MODEL,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_SIG,x=object$SIG,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_RJACC,x=object$rj_acc,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MHACC,x=object$mh_acc,row.names=FALSE,col.names=FALSE,append=TRUE) } start.sig<-object$SIG[length(object$SIG)] start.index<-model2index(object$MODEL[length(object$SIG)],dig=dim(object$BETA)[2]) start.beta<-object$BETA[dim(object$BETA)[1],start.index==1] runit<-bcct.fit(priornum=object$priornum,maximal.mod=object$maximal.mod,IP=object$IP,eta.hat=object$eta.hat, ini.index=start.index,ini.beta=start.beta,ini.sig=start.sig,iters=n.sample,save=save,name=name,null.move.prob=object$null.move.prob, a=object$a,b=object$b,progress=progress) BETA<-runit$BETA MODEL<-runit$MODEL SIG<-runit$SIG rj_acc<-runit$rj_acc mh_acc<-runit$mh_acc if(save>0){ rj_acc<-read.matrix(file=name_RJACC,header=FALSE) mh_acc<-read.matrix(file=name_MHACC,header=FALSE) BETA<-read.matrix(file=name_BETA,header=FALSE) SIG<-read.matrix(file=name_SIG,header=FALSE) MODEL<-as.character(read.table(file=name_MODEL,header=FALSE)[,1])} if(save==0){ rj_acc<-c(object$rj_acc,rj_acc) mh_acc<-c(object$mh_acc,mh_acc) BETA<-rbind(object$BETA,BETA) SIG<-c(object$SIG,SIG) MODEL<-c(object$MODEL,MODEL)} ptm<-(proc.time())[3]-ptm time<-object$time+ptm est<-list(BETA=BETA,MODEL=MODEL,SIG=SIG,rj_acc=rj_acc,mh_acc=mh_acc,priornum=object$priornum,maximal.mod=object$maximal.mod,IP=object$IP,eta.hat=object$eta.hat,save=save,name=name,null.move.prob=object$null.move.prob,time=time,a=object$a,b=object$b) class(est)<-"bcct" est} conting/R/beta_mode.R0000644000175100001440000000516712753071315014211 0ustar hornikusersbeta_mode <- function(X,prior="SBH",y,IP,a=0.001,b=0.001){ Xt<-t(X) ## transpose of X sy<-log(ifelse(y>0,y,1/6)) ## starting values sb<-coef(lm(sy~X-1)) ## "" ###################### Log likelihoods ######################## loglik<-function(beta){ eta<-as.vector(X%*%matrix(beta,ncol=1)) sum(dpois(x=y,lambda=exp(eta),log=TRUE))} ## log-likelihood dloglik<-function(beta){ eta<-as.vector(X%*%matrix(beta,ncol=1)) as.vector(Xt%*%matrix(y-exp(eta),ncol=1))} ## gradient of log-likelihood d2loglik<-function(beta){ eta<-as.vector(X%*%matrix(beta,ncol=1)) w<-exp(eta) -crossprod(x=X*w,y=X)} ## hessian of log-likelihood ################################################################ priortypes<-c("UIP","SBH") priornum<-c(1,2)[prior==priortypes] ## which prior - defines prior part of posterior ################# UIP Priors ################################### if(priornum==1){ iSig<-IP[-1,-1] ## inverse prior variance #Sig<-solve(iSig) ## prior variance Sig<-chol2inv(chol(iSig)) ## prior variance pp<-dim(X)[2]-1 ## number of parameters - 1 prior<-function(beta){ dmvnorm(x=beta[-1],mean=rep(0,pp),sigma=Sig,log=TRUE)} ## prior log pdf dprior<-function(beta){ -c(0,as.vector(iSig%*%matrix(beta[-1],ncol=1)))} ## gradient of prior log pdf d2prior<-function(beta){ -cbind(0,rbind(0,iSig))}} ## hessian of prior log pdf ################# SBH Priors ################################### if(priornum==2){ ## parameters from inverse gamma #a<-0.001 #b<-0.001 iSig<-IP[-1,-1] ## inverse scale matrix for t-distribution pp<-dim(X)[2]-1 prior<-function(beta){ bsb<-as.vector(matrix(beta[-1],nrow=1)%*%iSig%*%matrix(beta[-1],ncol=1)) -0.5*(a+pp)*log(b+bsb)} ## prior log pdf dprior<-function(beta){ sb<-as.vector(iSig%*%matrix(beta[-1],ncol=1)) bsb<-sum(beta[-1]*sb) -c(0,(a+pp)*sb/(b+bsb))} ## gradient of prior log pdf d2prior<-function(beta){ sb<-as.vector(iSig%*%matrix(beta[-1],ncol=1)) bsb<-sum(beta[-1]*sb) ## hessian of prior log pdf -cbind(0,rbind(0,(a+pp)*((iSig/(b+bsb))-2*((matrix(sb,ncol=1)%*%matrix(sb,nrow=1))/((b+bsb)^2)))))}} ################################################################## mlogpost<-function(beta){ -loglik(beta)-prior(beta)} ## minus log posterior mdlogpost<-function(beta){ -dloglik(beta)-dprior(beta)} ## gradient of minus log posterior md2logpost<-function(beta){ -d2loglik(beta)-d2prior(beta)} ## hessian of minus log posterior opt<-nlminb(start=sb,objective=mlogpost,gradient=mdlogpost,hessian=md2logpost) ## optimisation! mbeta<-opt$par mbeta} conting/R/inter_probs.R0000644000175100001440000000221612753071315014610 0ustar hornikusersinter_probs <- function(object,cutoff=0.75,n.burnin=0,thin=1){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(cutoff<0 | cutoff>1){ stop("cutoff is a probability and should be between 0 and 1")} term.labels<-c("(Intercept)",attr(summary(object$maximal.mod)$terms,"term.labels")) term.numbers<-attributes(object$maximal.mod$x)$assign small<-function(i){ min((1:length(term.numbers))[term.numbers==i])} ids<-sapply(X=0:max(term.numbers),FUN=small) if(n.burnin>0){ innerBETA<-object$BETA[-(1:n.burnin),] innerMODEL<-object$MODEL[-(1:n.burnin)]} else{ innerBETA<-object$BETA innerMODEL<-object$MODEL} n.sample<-length(innerMODEL) every<-seq(from=thin,to=n.sample,by=thin) innerBETA<-innerBETA[every,] innerMODEL<-innerMODEL[every] INDO<-model2index(innerMODEL,dig=dim(innerBETA)[2]) post_prob<-apply(INDO[,ids],2,mean) est<-list(term=term.labels[post_prob>=cutoff],prob=post_prob[post_prob>=cutoff],thin=thin) class(est)<-"interprob" est } conting/R/prop_mod.R0000644000175100001440000000242312753071315014101 0ustar hornikusersprop_mod <- function(curr.index,data,maximal.mod,null.move.prob=0.5){ stay<-runif(1) if(stay>null.move.prob){ big.X<-maximal.mod$x full.labels<-attr(summary(maximal.mod)$terms,"term.labels") full.order<-attr(summary(maximal.mod)$terms,"order") full.factors<-attr(summary(maximal.mod)$terms,"factors") full.terms<-attributes(big.X)$assign dr<-drop_term(curr.index,data,maximal.mod) ad<-add_term(curr.index,data,maximal.mod) terms<-c(dr,ad) types<-rep(c(1,2),c(length(dr),length(ad))) total.choices<-length(terms) if(length(terms)>0){ choose<-sample(x=1:total.choices,size=1) termo<-terms[choose] typo<-types[choose] new.index<-curr.index if(typo==1){ #drop move new.index[full.terms==(1:length(full.labels))[full.labels==termo]]<-0 result<-list(new.index=new.index,type="drop",total.choices=total.choices,null.move.prob=null.move.prob)} if(typo==2){ #add move new.index[full.terms==(1:length(full.labels))[full.labels==termo]]<-1 result<-list(new.index=new.index,type="add",total.choices=total.choices,null.move.prob=null.move.prob)}} else{ result<-list(new.index=curr.index,type="null",total.choices=0,null.move.prob=null.move.prob)}} else{ result<-list(new.index=curr.index,type="null",total.choices=0,null.move.prob=null.move.prob)} result} conting/R/accept_rate.R0000644000175100001440000000024312753071315014532 0ustar hornikusersaccept_rate <- function(object){ rj<-object$rj_acc mh<-object$mh_acc est<-list(rj_ar=100*mean(rj),mh_ar=100*mean(mh)) class(est)<-"acceptrate" est} conting/R/print.interstat.R0000644000175100001440000000075312753071315015436 0ustar hornikusersprint.interstat <- function(x,digits=max(3, getOption("digits") - 3),...){ df<-data.frame(post_prob=x$prob,post_mean=x$post_mean, post_var=x$post_var,lower_lim=x$lower,upper_lim=x$upper) nam<-x$term row.names(df)<-nam lev<-100*x$prob.level cat("Posterior summary statistics of log-linear parameters:\n") print(df,digits=digits) cat("NB: lower_lim and upper_lim refer to the lower and upper values of the\n") cat(lev,"% highest posterior density intervals, respectively\n")} conting/R/summary.bict.R0000644000175100001440000000263212753071315014701 0ustar hornikuserssummary.bict <- function(object,n.burnin=0,thin=1,cutoff=0.75,statistic="X2",best=NULL,scale=0.1,prob.level=0.95,...){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(cutoff<0 | cutoff>1){ stop("cutoff is a probability and should be between 0 and 1")} if(statistic!="X2" & statistic!="FreemanTukey" & statistic!="deviance"){ stop("statistic not found")} if(scale<0 | scale>1){ stop("scale should be between 0 and 1")} if(!is.null(best)){ if(best<=0){ stop("best should be positive")}} if(prob.level<0 | prob.level>1){ stop("prob.level is a probability and should be between 0 and 1")} is1<-inter_stats(object,n.burnin=n.burnin,cutoff=cutoff,thin=thin,prob.level=prob.level) is2<-mod_probs(object,n.burnin=n.burnin,scale=scale,best=best,thin=thin) is3<-total_pop(object,n.burnin=n.burnin,thin=thin,prob.level=prob.level) is4<-bayespval(object,n.burnin=n.burnin,thin=thin,statistic=statistic) est<-list(BETA=object$BETA,MODEL=object$MODEL,SIG=object$SIG,Y0=object$Y0,rj_acc=object$rj_acc,mh_acc=object$mh_acc,priornum=object$priornum,maximal.mod=object$maximal.mod,IP=object$IP,eta.hat=object$eta.hat,save=object$save,name=object$name,int_stats=is1,mod_stats=is2,tpop_stats=is3,pval_stats=is4) class(est)<-"sbict" est } conting/R/bcct.R0000644000175100001440000000753612753071315013207 0ustar hornikusersbcct <- function(formula,data=NULL,n.sample,prior="SBH",start.formula=NULL,start.beta=NULL,start.sig=NULL,save=0,name=NULL,null.move.prob=0.5,a=0.001,b=0.001,progress=FALSE){ if(n.sample<=0){ stop("n.sample must be positive")} if(prior!="UIP" & prior!="SBH"){ stop("prior not found")} if(save<0){ stop("save must be non-negative")} if(null.move.prob<0 | null.move.prob>1){ stop("null.move.prob is a probability and should be between 0 and 1")} if(a<0 & a!=(-1)){ stop("a and b must be non-negative")} if(b<0){ stop("a and b must be non-negative")} ptm<-(proc.time())[3] if(!is.null(data)){ if(attributes(data)$class=="table"){ data<-data.frame(data)}} if(save>0){ if(is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="")} ## names for files if we are saving them if(file.exists(name_BETA)){stop(paste("A file named ",name_BETA," already exists in the working directory",sep=""))} if(file.exists(name_MODEL)){stop(paste("A file named ",name_MODEL," already exists in the working directory",sep=""))} if(file.exists(name_SIG)){stop(paste("A file named ",name_SIG," already exists in the working directory",sep=""))} if(file.exists(name_RJACC)){stop(paste("A file named ",name_RJACC," already exists in the working directory",sep=""))} if(file.exists(name_MHACC)){stop(paste("A file named ",name_MHACC," already exists in the working directory",sep=""))} } ## error if files already exist priortypes<-c("UIP","SBH") priornum<-c(1,2)[prior==priortypes] ## which prior? options(contrasts=c("contr.sum","contr.poly"),warn=-1) if(!is.null(data)){ maximal.mod<-glm(formula=formula,data=data,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} else{ maximal.mod<-glm(formula=formula,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} options(contrasts=c("contr.treatment","contr.poly"),warn=0) ## get maximal design matrix big.X<-maximal.mod$x ## maximal design matrix y<-maximal.mod$y ## responses n<-dim(big.X)[1] ## sample size (number of cells) IP<-t(big.X)%*%big.X/n IP[,1]<-0 IP[1,0]<-0 ## inverse prior scale matrix bmod<-beta_mode(X=big.X,y=y,prior=prior,IP=IP,a=a,b=b) ## find posterior mode eta.hat<-as.vector(big.X%*%matrix(bmod,ncol=1)) ## calculate eta.hat for RJ if(is.null(start.formula)){ start.index<-rep(1,dim(big.X)[2])} else{ start.index<-formula2index(big.X=big.X,formula=start.formula,data=data)} ## set up starting values for various parameters if(is.null(start.beta)){ start.beta<-bmod[start.index==1]} if(is.null(start.sig)){ start.sig<-1} start.mod<-index2model(start.index) runit<-bcct.fit(priornum=priornum,maximal.mod=maximal.mod,IP=IP,eta.hat=eta.hat,ini.index=start.index,ini.beta=start.beta,ini.sig=start.sig,iters=n.sample,save=save,name=name,null.move.prob=null.move.prob,a=a,b=b,progress=progress) ## do some iterations of data augmentation algorithm BETA<-runit$BETA MODEL<-runit$MODEL SIG<-runit$SIG rj_acc<-runit$rj_acc mh_acc<-runit$mh_acc ## list results if(save>0){ rj_acc<-read.matrix(file=name_RJACC,header=FALSE) mh_acc<-read.matrix(file=name_MHACC,header=FALSE) BETA<-read.matrix(file=name_BETA,header=FALSE) SIG<-read.matrix(file=name_SIG,header=FALSE) MODEL<-as.character(read.table(file=name_MODEL,header=FALSE)[,1])} time<-(proc.time())[3]-ptm est<-list(BETA=BETA,MODEL=MODEL,SIG=SIG,rj_acc=rj_acc,mh_acc=mh_acc,priornum=priornum,maximal.mod=maximal.mod,IP=IP,eta.hat=eta.hat,save=save,name=name,null.move.prob=null.move.prob,time=time,a=a,b=b) class(est)<-"bcct" ## setup class for results est} conting/R/print.bcct.R0000644000175100001440000000143612753071315014333 0ustar hornikusersprint.bcct <- function(x,...){ hrs<-round(x$time%/%3600,0) mins<-round((x$time%%3600)%/%60,0) secs<-round((x$time%%3600)%%60,0) hrs<-ifelse(hrs<10,paste("0",hrs,sep=""),hrs) mins<-ifelse(mins<10,paste("0",mins,sep=""),mins) secs<-ifelse(secs<10,paste("0",secs,sep=""),secs) priortypes<-c("UIP","SBH") cat("Number of cells in table =",length(x$maximal.mod$y),"\n") cat("\n") cat("Maximal model =\n") print(x$maximal.mod$formula) cat("\n") cat("Number of log-linear parameters in maximal model =",dim(x$maximal.mod$x)[2],"\n") cat("\n") cat("Number of MCMC iterations =",length(x$MODEL),"\n") cat("\n") cat("Computer time for MCMC =",paste(hrs,":",mins,":",secs,sep=""),"\n") cat("\n") cat("Prior distribution for log-linear parameters =",priortypes[x$priornum],"\n") } conting/R/print.interprob.R0000644000175100001440000000035412753071315015422 0ustar hornikusersprint.interprob <- function(x,digits = max(3, getOption("digits") - 3),...){ nam<-x$term df<-data.frame(post_prob=x$prob) row.names(df)<-nam cat("Posterior probabilities of log-linear parameters:\n") print(df,digits=digits)} conting/R/find_cens.R0000644000175100001440000000154212753071315014213 0ustar hornikusersfind_cens <- function(sources,cens_source,data=NULL,unobs.level="un",obs.level="obs"){ if(!is.null(data)){ if(attributes(data)$class=="table"){ data<-data.frame(data)}} options(contrasts=c("contr.sum","contr.poly"),warn=-1) if(!is.null(data)){ small.X<-model.frame(sources,data=data) smaller.X<-model.frame(cens_source,data=data)} else{ small.X<-model.frame(sources) smaller.X<-model.frame(cens_source)} options(contrasts=c("contr.treatment","contr.poly"),warn=0) which<-c() for(i in 1:dim(small.X)[2]){ which[i]<-ifelse(all(small.X[,i]==smaller.X[,1]),1,0)} which_cens<-(1:dim(small.X)[2])[which==1] which_ok<-(1:dim(small.X)[2])[which==0] check.vec<-rep(0,dim(small.X)[2]) check.vec[which_cens]<-obs.level check.vec[which_ok]<-unobs.level res<-c() for(i in 1:dim(small.X)[1]){ if(all(small.X[i,]==check.vec)){res<-c(res,i)}} res} conting/R/bcctsubset.fit.R0000644000175100001440000001145712753071315015213 0ustar hornikusersbcctsubset.fit <- function(priornum, subset.index, maximal.mod, IP, eta.hat, ini.index, ini.beta, ini.sig, iters, save, name, null.move.prob, a, b, progress) { if (is.null(name)) { name_RJACC <- "RJACC.txt" name_MHACC <- "MHACC.txt" name_BETA <- "BETA.txt" name_MODEL <- "MODEL.txt" name_SIG <- "SIG.txt" } else { name_RJACC <- paste(name, "RJACC.txt", sep = "") name_MHACC <- paste(name, "MHACC.txt", sep = "") name_BETA <- paste(name, "BETA.txt", sep = "") name_MODEL <- paste(name, "MODEL.txt", sep = "") name_SIG <- paste(name, "SIG.txt", sep = "") } subset.mod<-c() for(i in 1:dim(subset.index)[1]){ subset.mod[i]<-index2model(subset.index[i,])} big.X <- maximal.mod$x y <- maximal.mod$y data <- maximal.mod$data curr.index <- ini.index curr.mod<-index2model(curr.index) curr.X <- big.X[, curr.index == 1] curr.ivar <- IP[curr.index == 1, curr.index == 1] MODEL <- c() BETA <- c() curr.beta <- ini.beta SIG <- c() curr.sig <- ini.sig rj_acc <- c() mh_acc <- c() counter <- 0 if (progress) { pb <- txtProgressBar(min = 0, max = iters, style = 3) } while (counter < iters) { uu<-runif(1) if(uu 0) { if (counter%%save == 0) { write.table(file = name_BETA, x = BETA, row.names = FALSE, col.names = FALSE, append = TRUE) write.table(file = name_MODEL, x = MODEL, row.names = FALSE, col.names = FALSE, append = TRUE) write.table(file = name_SIG, x = SIG, row.names = FALSE, col.names = FALSE, append = TRUE) write.table(file = name_RJACC, x = rj_acc, row.names = FALSE, col.names = FALSE, append = TRUE) write.table(file = name_MHACC, x = mh_acc, row.names = FALSE, col.names = FALSE, append = TRUE) rj_acc <- c() mh_acc <- c() BETA <- c() MODEL <- c() SIG <- c() } } } if (progress) { close(pb) } list(BETA = BETA, SIG = SIG, MODEL = MODEL, rj_acc = rj_acc, mh_acc = mh_acc) } conting/R/print.pval.R0000644000175100001440000000071012753071315014354 0ustar hornikusersprint.pval <- function(x, digits = max(3, getOption("digits") - 3),...){ statistic2<-c("X2","deviance","Freeman-Tukey")[x$statnum==(1:3)] cat("Under the",statistic2,"statistic \n") cat("\n") cat("Summary statistics for T_pred \n") print(round(summary(x$Tpred),digits=digits)) cat("\n") cat("Summary statistics for T_obs \n") print(round(summary(x$Tobs),digits=digits)) cat("\n") cat("Bayesian p-value = ",round(x$pval,digits=digits),"\n")} conting/R/print.sbict.R0000644000175100001440000000037512753071315014525 0ustar hornikusersprint.sbict <- function(x,...,digits = max(3, getOption("digits") - 3)){ print(x$int_stats,digits=digits) cat("\n") print(x$mod_stats,digits=digits) cat("\n") print(x$tpop_stats,digits=digits) cat("\n") print(x$pval_stats,digits=digits) } conting/R/add_term.R0000644000175100001440000000246512753071315014047 0ustar hornikusersadd_term <- function(curr.index,data,maximal.mod){ big.X<-maximal.mod$x full.terms<-attributes(big.X)$assign full.labels<-attr(summary(maximal.mod)$terms,"term.labels") full.order<-attr(summary(maximal.mod)$terms,"order") full.factors<-attr(summary(maximal.mod)$terms,"factors") uni<-unique(full.terms[curr.index==1]) uni<-uni[uni>0] curr.labels<-full.labels[uni] curr.order<-full.order[uni] curr.factors<-full.factors[,uni] K<-length(full.labels[full.order==1]) can_add<-c() if(!all(curr.index==1)){ pot.labels<-c() pot.order<-c() pot<-c() for(ttt in 1:length(full.labels)){ if(!any(full.labels[ttt]==curr.labels)){ pot.labels<-c(pot.labels,full.labels[ttt]) pot.order<-c(pot.order,full.order[ttt]) pot<-c(pot,ttt)}} pot.factors<-matrix(full.factors[-1,pot],nrow=K) can_add<-pot.labels[pot.order==2] candos<-(1:length(pot.labels))[pot.order>2] candos<-candos[pot.order[candos]<=(max(curr.order)+1)] for(ttt in candos){ combos<-combinations(n=pot.order[ttt],r=pot.order[ttt]-1,v=(1:K)[pot.factors[,ttt]==1]) ok<-c() for(q in 1:dim(combos)[1]){ int<-rep(0,K) int[combos[q,]]<-1 run<-as.numeric(apply(matrix(rep(int,dim(curr.factors)[2]),nrow=K,byrow=FALSE)==curr.factors[-1,],2,all)) ok[q]<-ifelse(sum(run)==0,0,1)} if(all(ok==1)){ can_add<-c(can_add,pot.labels[ttt])}}} can_add} conting/R/plot.pval.R0000644000175100001440000000055412753071315014204 0ustar hornikusersplot.pval <- function(x,...){ plot(x$Tobs,x$Tpred,xlab="T_obs",ylab="T_pred",type="n",...) points(x$Tobs[x$Tpred>x$Tobs],x$Tpred[x$Tpred>x$Tobs],col=8,pch=16) points(x$Tobs[x$TpredT_obs","T_pred=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(prob.level<0 | prob.level>1){ stop("prob.level is a probability and should be between 0 and 1")} if(order<1){ stop("order should be greater than or equal to 1")} if(statistic!="X2" & statistic!="FreemanTukey" & statistic!="deviance"){ stop("statistic not found")} if(n.burnin>0){ if(class(object)=="bict"){ innerY0<-matrix(object$Y0[-(1:n.burnin),],ncol=dim(object$Y0)[2])} else{ innerY0<-NULL} innerSIG<-object$SIG[-(1:n.burnin)] innerBETA<-object$BETA[-(1:n.burnin),] innerMODEL<-object$MODEL[-(1:n.burnin)]} else{ if(class(object)=="bict"){ innerY0<-matrix(object$Y0,ncol=dim(object$Y0)[2])} else{ innerY0<-NULL} innerSIG<-object$SIG innerBETA<-object$BETA innerMODEL<-object$MODEL} n.sample<-length(innerMODEL) every<-seq(from=thin,to=n.sample,by=thin) innerBETA<-innerBETA[every,] innerMODEL<-innerMODEL[every] innerSIG<-innerSIG[every] if(!is.null(innerY0)){ innerY0<-matrix(innerY0[every,],ncol=dim(object$Y0)[2])} tab<-sort(table(innerMODEL)/length(innerMODEL),decreasing=TRUE) if(is.null(formula)){ border<-order if(order>length(tab)){ stop("Model not visited in (thinned) sample")} interest<-tab[order] bformula<-index2formula(index=model2index(model=names(interest),dig=dim(object$maximal.mod$x)[2]),maximal.mod=object$maximal.mod)} else{ bformula<-formula interest<-tab[names(tab)==index2model(formula2index(big.X=object$maximal.mod$x, formula=formula, data=object$maximal.mod$data))] if(length(interest)==0){ stop("Model not visited in (thinned) sample")} border<-(1:length(tab))[names(tab)==index2model(formula2index(big.X=object$maximal.mod$x, formula=formula, data=object$maximal.mod$data))] } bformula<-paste0("~",as.character(bformula)[3]) int_index<-model2index(model=names(interest),dig=dim(object$maximal.mod$x)[2]) redBETA<-innerBETA[innerMODEL==names(interest),int_index==1] redSIG<-innerSIG[innerMODEL==names(interest)] if(!is.null(innerY0)){ redY0<-matrix(innerY0[innerMODEL==names(interest),],ncol=dim(object$Y0)[2])} else{ redY0<-NULL} if(!is.null(redY0)){ missing<-c(object$missing1,object$missing2) yyy<-object$maximal.mod$y[-missing] xxx<-object$maximal.mod$x[-missing,int_index==1] obs.z<-sum(yyy) redTOT<-apply(redY0,2,sum)+obs.z int<-HPDinterval(mcmc(redTOT),prob=prob.level) meanTOT<-mean(redTOT)} else{ yyy<-object$maximal.mod$y xxx<-object$maximal.mod$x[,int_index==1] redTOT<-NULL int<-NULL meanTOT<-NULL} MU<-exp(redBETA%*%t(xxx)) PRED<-matrix(rpois(n=prod(dim(MU)),lambda=as.vector(MU)),ncol=dim(MU)[2]) Y<-matrix(rep(yyy,dim(MU)[1]),ncol=dim(MU)[2],byrow=TRUE) statnum<-(1:3)[c("X2","deviance","FreemanTukey")==statistic] if(statnum==1){ Tpred<-apply(((PRED-MU)^2)/MU,1,sum) Tobs<-apply(((Y-MU)^2)/MU,1,sum)} if(statnum==2){ Tpred<-apply(-2*dpois(x=PRED,lambda=MU,log=TRUE),1,sum) Tobs<-apply(-2*dpois(x=Y,lambda=MU,log=TRUE),1,sum)} if(statnum==3){ Tpred<-apply((sqrt(PRED)-sqrt(MU))^2,1,sum) Tobs<-apply((sqrt(Y)-sqrt(MU))^2,1,sum)} pval<-mean(as.numeric(Tpred>Tobs)) post_mean<-apply(redBETA,2,mean,na.rm=TRUE) post_var<-apply(redBETA,2,var,na.rm=TRUE) post_int<-c() for(j in 1:length(post_mean)){ into<-HPDinterval(mcmc(redBETA[,j]),prob=prob.level) post_int<-rbind(post_int,into)} dimnames(post_int)<-NULL lower<-post_int[,1] upper<-post_int[,2] est<-list(term=(dimnames(object$maximal.mod$x)[[2]])[int_index==1],post_prob=as.numeric(interest), post_mean=post_mean,post_var=post_var, lower=lower,upper=upper,thin=thin,prob.level=prob.level,formula=bformula,order=border, BETA=redBETA,SIG=redSIG,Y0=redY0,TOT=redTOT,meanTOT=meanTOT,int=int, PRED=PRED,Tpred=Tpred,Tobs=Tobs,pval=pval,statnum=statnum,statistic=statistic) class(est)<-"submod" est} conting/R/model2index.R0000644000175100001440000000033012753071315014467 0ustar hornikusersmodel2index <- function (model,dig) { ## need to tell it number of columns in maximal design matrix res<-t(sapply(X=model,FUN=hex2bin)) ## uses hex2bin function from BMS s<-dim(res)[2] res[,(s-dig+1):s]} conting/R/print.acceptrate.R0000644000175100001440000000040112753071315015522 0ustar hornikusersprint.acceptrate <- function(x,digits = max(3, getOption("digits") - 3),...){ cat("Acceptance rate of reversible jump proposals = ",round(x$rj_ar,digits),"% \n") cat("Acceptance rate of Metropolis-Hastings proposals = ",round(x$mh_ar,digits),"% \n")} conting/R/bcct.fit.R0000644000175100001440000000776212753071315013771 0ustar hornikusersbcct.fit <- function(priornum,maximal.mod,IP,eta.hat,ini.index,ini.beta,ini.sig,iters,save,name,null.move.prob,a,b,progress){ if(is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="")} ## names for files if we are saving them big.X<-maximal.mod$x ## maximal design matrix y<-maximal.mod$y ## responses data<-maximal.mod$data ## data curr.index<-ini.index ## current index (binary vector) curr.X<-big.X[,curr.index==1] ## current design matrix curr.ivar<-IP[curr.index==1,curr.index==1] ## current prior scale matrix MODEL<-c() ## current model (hexidecimal) BETA<-c() ## matrix for betas curr.beta<-ini.beta ## current betas SIG<-c() ## current prior variance curr.sig<-ini.sig ## ditto rj_acc<-c() ## setting up vector for RJ accept/rejects mh_acc<-c() ## setting up vector for MH accept/rejects counter<-0 ## iteration counter if(progress){ pb<-txtProgressBar(min = 0, max = iters, style = 3)} ## set up progress bar while(counter0){ if(counter%%save==0){ write.table(file=name_BETA,x=BETA,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MODEL,x=MODEL,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_SIG,x=SIG,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_RJACC,x=rj_acc,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MHACC,x=mh_acc,row.names=FALSE,col.names=FALSE,append=TRUE) ## save progress rj_acc<-c() mh_acc<-c() BETA<-c() MODEL<-c() SIG<-c()}}} if(progress){ close(pb)} ## close progress bar list(BETA=BETA,SIG=SIG,MODEL=MODEL,rj_acc=rj_acc,mh_acc=mh_acc)} conting/R/formula2index.R0000644000175100001440000000055212753071315015042 0ustar hornikusersformula2index <- function(big.X,formula,data){ dX<-model.matrix(formula,data=data,contrasts =attributes(big.X)$contrasts)## design matrix given by formula indo<-c() for(i in 1:dim(big.X)[2]){ indo[i]<-ifelse(any((dimnames(big.X)[[2]])[i]==(dimnames(dX)[[2]])),1,0)} ## which columns of maximal design matrix ## are in this new matrix indo} conting/R/RJ_update.R0000644000175100001440000000633412753071315014144 0ustar hornikusersRJ_update <- function(prop.index,curr.index,curr.beta,eta.hat,curr.y,big.X,proposal.probs,i.prop.prior.var,i.curr.prior.var){ icurrR<-i.curr.prior.var[-1,-1] ## current inverse prior variance ipropR<-i.prop.prior.var[-1,-1] ## proposed inverse prior variance currR<-chol2inv(chol(icurrR)) ## current prior variance propR<-chol2inv(chol(ipropR)) ## proposed prior variance RHO_TOP<-proposal.probs[1] ## proposal prob (proposed to current) RHO_BOT<-proposal.probs[2] ## proposal prob (current to proposed) curr.X<-big.X[,curr.index==1] ## current design matrix prop.X<-big.X[,prop.index==1] ## proposed design matrix S<-matrix(big.X[,prop.index!=curr.index],nrow=length(curr.y)) ## difference in current and proposed design matrices w<-exp(eta.hat) ## weight matrix under maximal model curr.LP<-as.vector(curr.X%*%matrix(curr.beta,ncol=1)) ## current linear predictor if(sum(prop.index)sum(curr.index)){ ### Birth move cXW<-t(curr.X)%*%diag(w) cXWX<-cXW%*%curr.X icXWX<-chol2inv(chol(cXWX)) icXWX.XW<-icXWX%*%cXW SWIP<-t(S)%*%diag(w)-t(S)%*%t(cXW)%*%icXWX.XW SIG1<-chol2inv(chol(SWIP%*%S)) ## proposal variance MU1<-as.vector(SIG1%*%SWIP%*%matrix(eta.hat,ncol=1)) ## proposal mean u1<-as.vector(rmvnorm(n=1,mean=MU1,sigma=SIG1)) ## innovation variables prop.beta<-rep(0,dim(prop.X)[2]) prop.beta[curr.index[prop.index==1]==1]<-curr.beta-as.vector(icXWX.XW%*%S%*%matrix(u1,ncol=1)) prop.beta[curr.index[prop.index==1]==0]<-u1 ## proposal for beta prop.LP<-as.vector(prop.X%*%matrix(prop.beta,ncol=1)) ## proposed linear predictor top<-sum(curr.y*prop.LP)-sum(exp(prop.LP))+dmvnorm(x=prop.beta[-1],mean=rep(0,length(prop.beta)-1),sigma=propR,log=TRUE) bot<-sum(curr.y*curr.LP)-sum(exp(curr.LP))+dmvnorm(x=curr.beta[-1],mean=rep(0,length(curr.beta)-1),sigma=currR,log=TRUE) jac<--dmvnorm(x=u1,mean=MU1,sigma=SIG1,log=TRUE) ## log numerator, denominator and jacobian prob<-(RHO_TOP/RHO_BOT)*exp(top-bot+jac)} ## acceptance probability if(prob>=runif(1)){ new.beta<-prop.beta new.index<-prop.index} else{ new.beta<-curr.beta new.index<-curr.index} ## accept or reject list(new.beta=new.beta,new.index=new.index)} conting/R/index2formula.R0000644000175100001440000000101512753071315015035 0ustar hornikusersindex2formula <- function(index,maximal.mod){ terms<-attributes(maximal.mod$x)$assign+1 ## terms in maximal model indo.terms<-unique(terms[index==1]) ## terms in formula model modA<-maximal.mod term.labels<-attr(summary(modA)$terms,"term.labels")[indo.terms[-1]-1] ## term labels in formula model form<-paste0("y~",term.labels[1]) ## cobble together the formula if(length(term.labels)>1){ for(i in 2:length(term.labels)){ form<-paste0(form,"+",term.labels[i])}} form<-as.formula(form) form} conting/R/bict.fit.R0000644000175100001440000000764612753071315014000 0ustar hornikusersbict.fit <- function(priornum,missing1,missing2,maximal.mod,IP,eta.hat,ini.index,ini.beta,ini.sig,ini.y0,iters,save,name,null.move.prob,a,b,progress){ missing<-c(missing1,missing2) if(is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt" name_Y0<-"Y0.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="") name_Y0<-paste(name,"Y0.txt",sep="")} big.X<-maximal.mod$x y<-maximal.mod$y data<-maximal.mod$data curr.index<-ini.index curr.X<-big.X[,curr.index==1] curr.ivar<-IP[curr.index==1,curr.index==1] #MODEL<-as.character(index2model(curr.index)) MODEL<-c() #BETA<-matrix(0,nrow=1,ncol=dim(big.X)[2]) #BETA[,curr.index==1]<-ini.beta BETA<-c() curr.beta<-ini.beta #SIG<-ini.sig SIG<-c() curr.sig<-ini.sig #Y0<-matrix(ini.y0,nrow=1) Y0<-c() curr.y0<-ini.y0 rj_acc<-c() mh_acc<-c() counter<-0 if(progress){ pb<-txtProgressBar(min = 0, max = iters, style = 3)} ## set up progress bar while(counter0){ for(i in 1:length(missing2)){ ppp<-dpois(x=1:y[missing2[i]],lambda=mutar[missing2[i]],log=TRUE) ppp<-ppp-max(ppp) curr.y0[length(missing1)+i]<-sample(x=1:y[missing2[i]],size=1,prob=exp(ppp))}} curry<-rep(0,dim(big.X)[2]) curry[new.index==1]<-new.beta curr.index<-new.index curr.beta<-new.beta curr.X<-big.X[,curr.index==1] curr.ivar<-IP[curr.index==1,curr.index==1] BETA<-rbind(BETA,curry) SIG<-c(SIG,curr.sig) MODEL<-c(MODEL,index2model(new.index)) Y0<-rbind(Y0,curr.y0) counter<-counter+1 if(progress){ setTxtProgressBar(pb, counter)} ## update progress bar if(save>0){ if(counter%%save==0){ write.table(file=name_BETA,x=BETA,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MODEL,x=MODEL,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_SIG,x=SIG,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_Y0,x=Y0,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_RJACC,x=rj_acc,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MHACC,x=mh_acc,row.names=FALSE,col.names=FALSE,append=TRUE) rj_acc<-c() mh_acc<-c() BETA<-c() MODEL<-c() SIG<-c() Y0<-c()}}} if(progress){ close(pb)} list(BETA=BETA,SIG=SIG,MODEL=MODEL,Y0=Y0,rj_acc=rj_acc,mh_acc=mh_acc)} conting/R/bayespval.R0000644000175100001440000000300512753071315014245 0ustar hornikusersbayespval <- function(object,n.burnin=0,thin=1,statistic="X2"){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(statistic!="X2" & statistic!="FreemanTukey" & statistic!="deviance"){ stop("statistic not found")} if(is.null(object$missing1)){ yyy<-object$maximal.mod$y xxx<-object$maximal.mod$x} else{ missing<-c(object$missing1,object$missing2) yyy<-object$maximal.mod$y[-missing] xxx<-object$maximal.mod$x[-missing,]} if(n.burnin>0){ innerBETA<-object$BETA[-(1:n.burnin),]} else{ innerBETA<-object$BETA} n.sample<-dim(innerBETA)[1] every<-seq(from=thin,to=n.sample,by=thin) innerBETA<-innerBETA[every,] MU<-exp(innerBETA%*%t(xxx)) PRED<-matrix(rpois(n=prod(dim(MU)),lambda=as.vector(MU)),ncol=dim(MU)[2]) Y<-matrix(rep(yyy,dim(MU)[1]),ncol=dim(MU)[2],byrow=TRUE) statnum<-(1:3)[c("X2","deviance","FreemanTukey")==statistic] if(statnum==1){ Tpred<-apply(((PRED-MU)^2)/MU,1,sum) Tobs<-apply(((Y-MU)^2)/MU,1,sum)} if(statnum==2){ Tpred<-apply(-2*dpois(x=PRED,lambda=MU,log=TRUE),1,sum) Tobs<-apply(-2*dpois(x=Y,lambda=MU,log=TRUE),1,sum)} if(statnum==3){ Tpred<-apply((sqrt(PRED)-sqrt(MU))^2,1,sum) Tobs<-apply((sqrt(Y)-sqrt(MU))^2,1,sum)} pval<-mean(as.numeric(Tpred>Tobs)) est<-list(PRED=PRED,Tpred=Tpred,Tobs=Tobs,pval=pval,statnum=statnum,statistic=statistic,thin=thin) class(est)<-"pval" est} conting/R/inter_stats.R0000644000175100001440000000313412753071315014621 0ustar hornikusersinter_stats <- function(object,cutoff=0.75,n.burnin=0,thin=1,prob.level=0.95){ if(cutoff<0 | cutoff>1){ stop("cutoff is a probability and should be between 0 and 1")} if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(prob.level<0 | prob.level>1){ stop("prob.level is a probability and should be between 0 and 1")} if(n.burnin>0){ innerBETA<-object$BETA[-(1:n.burnin),] innerMODEL<-object$MODEL[-(1:n.burnin)]} else{ innerBETA<-object$BETA innerMODEL<-object$MODEL} n.sample<-length(innerMODEL) every<-seq(from=thin,to=n.sample,by=thin) innerBETA<-innerBETA[every,] innerMODEL<-innerMODEL[every] INDO<-model2index(innerMODEL,dig=dim(innerBETA)[2]) innerBETA2<-innerBETA innerBETA2[INDO==0]<-NA post_mean<-apply(innerBETA2,2,mean,na.rm=TRUE) post_var<-apply(innerBETA2,2,var,na.rm=TRUE) post_prob<-apply(INDO,2,mean) post_int<-c() for(j in 1:length(post_mean)){ if(length(innerBETA[INDO[,j]==1,j])>1){ into<-HPDinterval(mcmc(innerBETA[INDO[,j]==1,j]),prob=prob.level)} else{ into<-c(NA,NA)} post_int<-rbind(post_int,into)} dimnames(post_int)<-NULL lower<-post_int[,1] upper<-post_int[,2] est<-list(term=(dimnames(object$maximal.mod$x)[[2]])[post_prob>=cutoff],prob=post_prob[post_prob>=cutoff], post_mean=post_mean[post_prob>=cutoff],post_var=post_var[post_prob>=cutoff], lower=lower[post_prob>=cutoff],upper=upper[post_prob>=cutoff],thin=thin,prob.level=prob.level) class(est)<-"interstat" est} conting/R/mod_probs.R0000644000175100001440000000311012753071315014240 0ustar hornikusersmod_probs <- function(object,n.burnin=0,scale=0.1,best=NULL,thin=1){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(scale<0 | scale>1){ stop("scale should be between 0 and 1")} if(!is.null(best)){ if(best<=0){ stop("best should be positive")}} if(n.burnin>0){ innerBETA<-object$BETA[-(1:n.burnin),] innerMODEL<-object$MODEL[-(1:n.burnin)]} else{ innerBETA<-object$BETA innerMODEL<-object$MODEL} n.sample<-length(innerMODEL) every<-seq(from=thin,to=n.sample,by=thin) innerBETA<-innerBETA[every,] innerMODEL<-innerMODEL[every] tab<-sort(table(innerMODEL)/length(innerMODEL),decreasing=TRUE) tab.names<-dimnames(tab)[[1]] if(is.null(best)){ ref.tab<-tab[tab>tab[1]*scale]} else{ if(best<=length(tab)){ ref.tab<-tab[1:best]} else{ ref.tab<-tab}} if(length(ref.tab)>1){ ref.tab.names<-dimnames(ref.tab)[[1]] dimnames(ref.tab)<-NULL} else{ ref.tab.names<-names(ref.tab) names(ref.tab)<-NULL} if(length(ref.tab)>1){ forms<-c() for(j in 1:length(ref.tab)){ forms[j]<-paste0("~",as.character(index2formula(index=model2index(ref.tab.names,dig=dim(innerBETA)[2])[j,],maximal.mod=object$maximal.mod))[3])} } else{ forms<-paste0("~",as.character(index2formula(index=model2index(ref.tab.names,dig=dim(innerBETA)[2]),maximal.mod=object$maximal.mod))[3])} est<-list(table=data.frame(prob=ref.tab,model_formula=forms),totmodsvisit=length(tab),thin=thin) class(est)<-"modprobs" est} conting/R/RJ_update_swap.R0000644000175100001440000000766712753071315015210 0ustar hornikusersRJ_update_swap <- function (prop.index, curr.index, curr.beta, eta.hat, curr.y, big.X, proposal.probs, i.prop.prior.var, i.curr.prior.var){ icurrR <- i.curr.prior.var[-1, -1] ipropR <- i.prop.prior.var[-1, -1] currR <- chol2inv(chol(icurrR)) propR <- chol2inv(chol(ipropR)) RHO_TOP <- proposal.probs[1] RHO_BOT <- proposal.probs[2] curr.X <- big.X[, curr.index == 1] prop.X <- big.X[, prop.index == 1] Xcc <- big.X[, curr.index==1 & prop.index==1] S1 <- matrix(big.X[,curr.index==1 & prop.index==0],nrow = length(curr.y)) S2 <- matrix(big.X[,curr.index==0 & prop.index==1],nrow = length(curr.y)) w <- exp(eta.hat) curr.LP <- as.vector(curr.X %*% matrix(curr.beta, ncol = 1)) cXW <- t(Xcc) %*% diag(w) cXWX <- cXW %*% Xcc icXWX <- chol2inv(chol(cXWX)) icXWX.XW <- icXWX %*% cXW SWIP1 <- t(S1) %*% diag(w) - t(S1) %*% t(cXW) %*% icXWX.XW SWIP2 <- t(S2) %*% diag(w) - t(S2) %*% t(cXW) %*% icXWX.XW ## START OF SWAP MOVE if(dim(S1)[2]>0 & dim(S2)[2]>0){ SIG1 <- chol2inv(chol(SWIP1%*%S1)) SIG2 <- chol2inv(chol(SWIP2%*%S2)) MU1<-SIG1%*%SWIP1%*%matrix(eta.hat,ncol=1) MU2<-SIG2%*%SWIP2%*%matrix(eta.hat,ncol=1) beta_1 <- curr.beta[prop.index[curr.index == 1] == 1] beta_2 <- curr.beta[prop.index[curr.index == 1] == 0] u<-as.vector(rmvnorm(n=1,mean=MU2,sigma=SIG2)) prop.beta <- rep(0, dim(prop.X)[2]) prop.beta[curr.index[prop.index == 1] == 1] <- beta_1 + as.vector(icXWX.XW%*%(S1%*%matrix(beta_2,ncol=1)-S2%*%matrix(u,ncol=1))) prop.beta[curr.index[prop.index == 1] == 0] <- u prop.LP <- as.vector(prop.X %*% matrix(prop.beta, ncol = 1)) top <- sum(curr.y * prop.LP) - sum(exp(prop.LP)) + dmvnorm(x = prop.beta[-1],mean = rep(0, length(prop.beta) - 1), sigma = propR, log = TRUE) bot <- sum(curr.y * curr.LP) - sum(exp(curr.LP)) + dmvnorm(x = curr.beta[-1],mean = rep(0, length(curr.beta) - 1), sigma = currR, log = TRUE) jac<-dmvnorm(x=beta_2,mean=MU1,sigma=SIG1,log=TRUE)-dmvnorm(x=u,mean=MU2,sigma=SIG2,log=TRUE) prob <- (RHO_TOP/RHO_BOT) * exp(top - bot + jac)} ## END OF SWAP MOVE ## START OF DEATH MOVE if(dim(S1)[2]>0 & dim(S2)[2]==0){ SIG1 <- chol2inv(chol(SWIP1%*%S1)) MU1<-SIG1%*%SWIP1%*%matrix(eta.hat,ncol=1) beta_1 <- curr.beta[prop.index[curr.index == 1] == 1] beta_2 <- curr.beta[prop.index[curr.index == 1] == 0] prop.beta <- rep(0, dim(prop.X)[2]) prop.beta[curr.index[prop.index == 1] == 1] <- beta_1 + as.vector(icXWX.XW%*%(S1%*%matrix(beta_2,ncol=1))) prop.LP <- as.vector(prop.X %*% matrix(prop.beta, ncol = 1)) top <- sum(curr.y * prop.LP) - sum(exp(prop.LP)) + dmvnorm(x = prop.beta[-1],mean = rep(0, length(prop.beta) - 1), sigma = propR, log = TRUE) bot <- sum(curr.y * curr.LP) - sum(exp(curr.LP)) + dmvnorm(x = curr.beta[-1],mean = rep(0, length(curr.beta) - 1), sigma = currR, log = TRUE) jac<-dmvnorm(x=beta_2,mean=MU1,sigma=SIG1,log=TRUE) prob <- (RHO_TOP/RHO_BOT) * exp(top - bot + jac)} ## END OF DEATH MOVE ## START OF BIRTH MOVE if(dim(S1)[2]==0 & dim(S2)[2]>0){ SIG2 <- chol2inv(chol(SWIP2%*%S2)) MU2<-SIG2%*%SWIP2%*%matrix(eta.hat,ncol=1) u<-as.vector(rmvnorm(n=1,mean=MU2,sigma=SIG2)) prop.beta <- rep(0, dim(prop.X)[2]) prop.beta[curr.index[prop.index == 1] == 1] <- curr.beta + as.vector(icXWX.XW%*%(-S2%*%matrix(u,ncol=1))) prop.beta[curr.index[prop.index == 1] == 0] <- u prop.LP <- as.vector(prop.X %*% matrix(prop.beta, ncol = 1)) top <- sum(curr.y * prop.LP) - sum(exp(prop.LP)) + dmvnorm(x = prop.beta[-1],mean = rep(0, length(prop.beta) - 1), sigma = propR, log = TRUE) bot <- sum(curr.y * curr.LP) - sum(exp(curr.LP)) + dmvnorm(x = curr.beta[-1],mean = rep(0, length(curr.beta) - 1), sigma = currR, log = TRUE) jac<--dmvnorm(x=u,mean=MU2,sigma=SIG2,log=TRUE) prob <- (RHO_TOP/RHO_BOT) * exp(top - bot + jac)} if (prob >= runif(1)) { new.beta <- prop.beta new.index <- prop.index } else { new.beta <- curr.beta new.index <- curr.index } list(new.beta = new.beta, new.index = new.index)} conting/R/index2model.R0000644000175100001440000000006312753071315014472 0ustar hornikusersindex2model <- function(index){ bin2hex(index)} conting/R/print.submod.R0000644000175100001440000000225312753071315014707 0ustar hornikusersprint.submod <- function(x,...,digits = max(3, getOption("digits") - 3)){ blob<-round(x$post_prob,digits=digits) cat("Posterior model probability = ",blob,"\n") cat("\n") df<-data.frame(post_mean=x$post_mean, post_var=x$post_var,lower_lim=x$lower,upper_lim=x$upper) nam<-x$term row.names(df)<-nam lev<-100*x$prob.level cat("Posterior summary statistics of log-linear parameters:\n") print(df,digits=digits) cat("NB: lower_lim and upper_lim refer to the lower and upper values of the\n") cat(lev,"% highest posterior density intervals, respectively\n") cat("\n") if(!is.null(x$meanTOT)){ cat("Posterior mean of total population size =",round(x$meanTOT,digits),"\n") cat(lev,"% highest posterior density interval for total population size = (",round(x$int,digits),") \n") cat("\n")} statistic2<-c("X2","deviance","Freeman-Tukey")[x$statnum==(1:3)] cat("Under the",statistic2,"statistic \n") cat("\n") cat("Summary statistics for T_pred \n") print(round(summary(x$Tpred),digits=digits)) cat("\n") cat("Summary statistics for T_obs \n") print(round(summary(x$Tobs),digits=digits)) cat("\n") cat("Bayesian p-value = ",round(x$pval,digits=digits),"\n") } conting/R/print.totpop.R0000644000175100001440000000044112753071315014740 0ustar hornikusersprint.totpop <- function(x,digits = max(3, getOption("digits") - 3),...){ lev<-100*x$prob.level cat("Posterior mean of total population size =",round(x$meanTOT,digits),"\n") cat(lev,"% highest posterior density interval for total population size = (",round(x$int,digits),") \n")} conting/R/print.bict.R0000644000175100001440000000165112753071315014340 0ustar hornikusersprint.bict <- function(x,...){ hrs<-round(x$time%/%3600,0) mins<-round((x$time%%3600)%/%60,0) secs<-round((x$time%%3600)%%60,0) hrs<-ifelse(hrs<10,paste("0",hrs,sep=""),hrs) mins<-ifelse(mins<10,paste("0",mins,sep=""),mins) secs<-ifelse(secs<10,paste("0",secs,sep=""),secs) priortypes<-c("UIP","SBH") cat("Number of cells in table =",length(x$maximal.mod$y),"\n") cat("\n") cat("Maximal model =\n") print(x$maximal.mod$formula) cat("\n") cat("Number of log-linear parameters in maximal model =",dim(x$maximal.mod$x)[2],"\n") cat("\n") cat("Number of MCMC iterations =",length(x$MODEL),"\n") cat("\n") cat("Computer time for MCMC =",paste(hrs,":",mins,":",secs,sep=""),"\n") cat("\n") cat("Prior distribution for log-linear parameters =",priortypes[x$priornum],"\n") cat("\n") cat("Number of missing cells =",length(x$missing1),"\n") cat("\n") cat("Number of censored cells =",length(x$missing2),"\n") } conting/R/bictu.R0000644000175100001440000000706212753071315013374 0ustar hornikusersbictu <- function(object,n.sample,save=NULL,name=NULL,progress=FALSE){ if(n.sample<=0){ stop("n.sample must be positive")} ptm<-(proc.time())[3] if(is.null(save)){save<-object$save} if(is.null(name)){name<-object$name} if(save>0 & is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt" name_Y0<-"Y0.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="") name_Y0<-paste(name,"Y0.txt",sep="")} if(object$save==0 & save>0){ if(file.exists(name_BETA)){stop(paste("A file named ",name_BETA," already exists in the working directory",sep=""))} if(file.exists(name_MODEL)){stop(paste("A file named ",name_MODEL," already exists in the working directory",sep=""))} if(file.exists(name_SIG)){stop(paste("A file named ",name_SIG," already exists in the working directory",sep=""))} if(file.exists(name_Y0)){stop(paste("A file named ",name_Y0," already exists in the working directory",sep=""))} if(file.exists(name_RJACC)){stop(paste("A file named ",name_RJACC," already exists in the working directory",sep=""))} if(file.exists(name_MHACC)){stop(paste("A file named ",name_MHACC," already exists in the working directory",sep=""))} write.table(file=name_BETA,x=object$BETA,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MODEL,x=object$MODEL,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_SIG,x=object$SIG,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_Y0,x=object$Y0,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_RJACC,x=object$rj_acc,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MHACC,x=object$mh_acc,row.names=FALSE,col.names=FALSE,append=TRUE) } start.sig<-object$SIG[length(object$SIG)] start.index<-model2index(object$MODEL[length(object$SIG)],dig=dim(object$BETA)[2]) start.beta<-object$BETA[dim(object$BETA)[1],start.index==1] start.y0<-object$Y0[dim(object$Y0)[1],] runit<-bict.fit(priornum=object$priornum,missing1=object$missing1,missing2=object$missing2, maximal.mod=object$maximal.mod,IP=object$IP,eta.hat=object$eta.hat,ini.index=start.index,ini.beta=start.beta, ini.sig=start.sig,ini.y0=start.y0,iters=n.sample,save=save,name=name,null.move.prob=object$null.move.prob, a=object$a,b=object$b,progress=progress) BETA<-runit$BETA MODEL<-runit$MODEL SIG<-runit$SIG Y0<-runit$Y0 rj_acc<-runit$rj_acc mh_acc<-runit$mh_acc if(save>0){ rj_acc<-read.matrix(file=name_RJACC,header=FALSE) mh_acc<-read.matrix(file=name_MHACC,header=FALSE) BETA<-read.matrix(file=name_BETA,header=FALSE) SIG<-read.matrix(file=name_SIG,header=FALSE) Y0<-read.matrix(file=name_Y0,header=FALSE) MODEL<-as.character(read.table(file=name_MODEL,header=FALSE)[,1])} if(save==0){ rj_acc<-c(object$rj_acc,rj_acc) mh_acc<-c(object$mh_acc,mh_acc) BETA<-rbind(object$BETA,BETA) SIG<-c(object$SIG,SIG) Y0<-rbind(object$Y0,Y0) MODEL<-c(object$MODEL,MODEL)} ptm<-(proc.time())[3]-ptm time<-ptm+object$time est<-list(BETA=BETA,MODEL=MODEL,SIG=SIG,Y0=Y0,rj_acc=rj_acc,mh_acc=mh_acc,priornum=object$priornum, missing1=object$missing1,missing2=object$missing2,maximal.mod=object$maximal.mod,IP=object$IP, eta.hat=object$eta.hat,save=save,name=name,missing_details=object$missing_details, censored_details=object$censored_details,null.move.prob=object$null.move.prob,time=time, a=object$a,b=object$b) class(est)<-"bict" est} conting/MD50000644000175100001440000001010212753125172012240 0ustar hornikusers60387b3dfcb83406f4a1f5f91c8ad0e7 *DESCRIPTION 67dbc652656de0237051424e9187e90c *NAMESPACE 9071b89c46380a7af6be8fae2db00948 *R/RJ_update.R c75f483ea3281c214b9d27958c616f33 *R/RJ_update_swap.R def339b59e64204e9db01335b77ac9e8 *R/accept_rate.R e3f610f6d31b1bc65164baf540514209 *R/add_term.R 35054448b4cbbd8ea26cedbf4ee4b38a *R/bayespval.R 991f15c2bebba5697cee1e292b76435e *R/bcct.R 003b9a0852fe28d4bcff06f954ccad6e *R/bcct.fit.R b36681e655668fc3be548cbc4b57fe13 *R/bcctsubset.R e5afa25206bd9a6437a410126c6a06a3 *R/bcctsubset.fit.R 0b09f78d4fe56e869ad7f5048b1f074f *R/bcctsubsetu.R 821da2bca4bacb7633100c9f307c7453 *R/bcctu.R da1a1615c25899a3326d2a6c6e57978b *R/beta_mode.R 6171aa1fdcb2bd7811f3f56782621048 *R/bict.R 0da21f4dc21842b2e7ee4c77a16fd177 *R/bict.fit.R 7afe0a984173c8683b3229fbdc96a77b *R/bictu.R 7c0a2962d2ece37792f4c91367b97fe0 *R/drop_term.R 76330349796348b50441c8a6757c1a3f *R/find_cens.R df5e5c6c5e43ea8bc5c122a2ca8c6d17 *R/formula2index.R 17deb7819acee872650113f9a1d84563 *R/index2formula.R 70f4801283044832a985c39184248a25 *R/index2model.R 8a89e3326f747b115da28df58cf27c2b *R/inter_probs.R 6179a33eaa7f6df906b190f02db9ef23 *R/inter_stats.R 9bad7f600ef384dee022e4ec8e04259b *R/iwls_mh.R 11a12c2fc7251aa85f5e8478b9ceb5d4 *R/mod_probs.R c8e6c05006cd4872c7af5388cb03a1d3 *R/model2index.R b22dba5ee5e25ecd7543978698a3b977 *R/plot.pval.R d69ac5be534d94f4a2b57518ac44ca90 *R/plot.totpop.R 4816bff07a4143b889fe0deb2d844da0 *R/print.acceptrate.R 1f2dcbc0a39d4699be9ea1b6a31f3402 *R/print.bcct.R 32734498e41893130546bd75305f9b43 *R/print.bict.R 7c773ccdb2c5a8350759aa3b13a103c0 *R/print.interprob.R 3f962d6409feb992f032ba747988c50b *R/print.interstat.R 71dc48b2148fe63a0dd3803393181218 *R/print.modprobs.R 2259d5ca419a97cd8fb49cd0be2ae4ae *R/print.pval.R 77d19a2780e62b727c1ac9d2ece42e56 *R/print.sbcct.R 15742acbbd80e2758f6c6daad9b949a7 *R/print.sbict.R ebd49330a88bba6d4cc5cbfe048379ea *R/print.submod.R 85c166c04eb6ad8775bcadf3f43a6f0b *R/print.totpop.R 5cbce87ccc24bc4e57ef26010f1fce6a *R/prop_mod.R 09878e4eddf17312e38a2af136082668 *R/sub_model.R a87c0874595f4b0e9c82c46433a2d942 *R/summary.bcct.R 25d8d4dc32f86840812605ab3a2068c5 *R/summary.bict.R 84ed36115620defbe585207a53a9c951 *R/total_pop.R 3f0f36638d13e0acd2a371e06390bbf7 *data/AOH.rda c93a12e018f8e9669005f4576e5f1e5e *data/ScotPWID.rda eda5a3ec0cb5abe48d1031f444739cee *data/heart.rda c562b1dc971f3ae687d766268a5e92de *data/spina.rda da54c697af3c2d465145cf4d78f89732 *man/AOH.Rd 357f50b79475f7349882ec0939f5f4ff *man/RJ_update.Rd f69405e0f5c86bc4982591e783e46a7c *man/ScotPWID.Rd 1044fd6d23c1183a784e00d558a1aaf4 *man/accept_rate.Rd 47b7907303f60bfaa9518b157ec5ad0a *man/add_term.Rd c8963bf5b265a82694f7cdcb58a89f73 *man/bayespval.Rd 3db442f4319a7d8fe68c4ff7f62648cd *man/bcct.Rd 2d590b58c0050e4b0d61c72ad72147d8 *man/bcct.fit.Rd 2066618b2c7e1a2de0768407d486f2fa *man/beta_mode.Rd b919367dc533a36ff7b83aee89ff91c6 *man/bict.Rd ba14f4d22009d53888b7b26a043c7a9a *man/bict.fit.Rd 99dffec0a7b603e2abc7e52d2c08bd4d *man/conting-package.Rd e7a8084279cee2c8cadc2a660d05c5ea *man/find_cens.Rd 6335a3dcae17b4f64942f74ec5214e77 *man/formula2index.Rd 8ac844d7f724081448928f8630adace5 *man/heart.Rd a23d5956100a6fb27f9858fe238fa115 *man/index2model.Rd f50b7c3621a429ba4c57951d900c2c89 *man/inter_probs.Rd a6edb1960da54e36b76e061c8e32ac15 *man/inter_stats.Rd 29382133157eb0c74cf8e4d42667fbc3 *man/iwls_mh.Rd da98e83d91295fabb16142c7af497e32 *man/mod_probs.Rd 9cf2c35c9a01151bcf0c5455090f4b55 *man/plot.pval.Rd 957d31b3019560d6dfb9710152fcda30 *man/plot.totpop.Rd 305758a3df578087fa9fe9f547f6e9b5 *man/print.acceptrate.Rd e73060dd2ce4016d1472b2c89bd170f6 *man/print.bcct.Rd 09ff6774c99c00de7ae5ce369f13cd64 *man/print.interprob.Rd 9710c237d28e26ed3464726782d5f0d2 *man/print.interstat.Rd e71f894293e3c49bed041549f4178332 *man/print.modprobs.Rd bc85fbe299f31c976f746de6496afc58 *man/print.pval.Rd 3e931d97e008f06feba602feb3a820ba *man/print.submod.Rd a765000f5df8362c890475710e153a7e *man/print.totpop.Rd 1cdd84e1c6050c775559904e230162a9 *man/spina.Rd c2e53f8a245cb4fdd98a52af7e6c75aa *man/sub_model.Rd 378736f8ee3fe210906db640b320f7be *man/summary.bcct.Rd a0156c27e007fb88b9329901c690c476 *man/total_pop.Rd conting/DESCRIPTION0000644000175100001440000000073712753125172013453 0ustar hornikusersPackage: conting Type: Package Title: Bayesian Analysis of Contingency Tables Version: 1.6 Date: 2016-08-11 Author: Antony M. Overstall Maintainer: Antony M. Overstall Description: Bayesian analysis of complete and incomplete contingency tables. Depends: R (>= 2.15.0) Imports: mvtnorm, BMS, gtools, tseries, coda License: GPL-2 NeedsCompilation: no Packaged: 2016-08-11 12:42:53 UTC; ao35j Repository: CRAN Date/Publication: 2016-08-11 18:40:26 conting/man/0000755000175100001440000000000012753071315012510 5ustar hornikusersconting/man/RJ_update.Rd0000644000175100001440000001225712753071315014663 0ustar hornikusers\name{RJ_update} \alias{RJ_update} \alias{RJ_update_swap} \title{ Reversible Jump Algorithm } \description{ These functions implement one iteration of the orthogonal projection reversible jump algorithm for generalised linear models proposed by Forster et al (2012) applied to log-linear models with and without swap moves. } \usage{ RJ_update_swap(prop.index, curr.index, curr.beta, eta.hat, curr.y, big.X, proposal.probs, i.prop.prior.var, i.curr.prior.var) RJ_update(prop.index, curr.index, curr.beta, eta.hat, curr.y, big.X, proposal.probs, i.prop.prior.var, i.curr.prior.var) } \arguments{ \item{prop.index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the proposed model. } \item{curr.index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the current model. } \item{curr.beta}{ A vector of length \code{sum(curr.index)} giving the log-linear parameters under the current model. } \item{eta.hat}{ A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model. } \item{curr.y}{ A vector of length n giving the cell counts. } \item{big.X}{ The design matrix under the maximal model. } \item{proposal.probs}{ A numeric vector of length 2. The first element gives the probability of proposing a move from the proposed model to the current model. The second element gives the probability of proposing a move from the current model to the proposed model. } \item{i.prop.prior.var}{ A matrix giving the inverse of the prior variance matrix for the log-linear parameters under the proposed model. } \item{i.curr.prior.var}{ A matrix giving the inverse of the prior variance matrix for the log-linear parameters under the current model. } } \details{ For the original algorithm see Forster et al (2012). For details on its application to log-linear models see Overstall & King (2014), and the references therein. \code{RJ_update_swap} performs birth/death and swap moves whereas \code{RJ_update} just performs birth/death moves. } \value{ The function will return a list with the following components: \item{new.beta}{A vector giving the new log-linear parameters.} \item{new.index}{A binary vector indicating which log-linear parameters are present in the new model.} } \references{ Forster, J.J., Gill, R.C. & Overstall, A.M. (2012) Reversible jump methods for generalised linear models and generalised linear mixed models. \emph{Statistics and Computing}, \bold{22 (1)}, 107--120. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ set.seed(4) ## Set seed for reproducibility data(AOH) ## Load data maximal.mod<-glm(y~(alc+hyp+obe)^3,family=poisson,x=TRUE,contrasts=list(alc="contr.sum", hyp="contr.sum",obe="contr.sum"),data=AOH) ## Fit maximal model to get a design matrix IP<-t(maximal.mod$x)\%*\%maximal.mod$x/length(AOH$y) IP[,1]<-0 IP[1,]<-0 ## Calculate inverse prior scale matrix under maximal model. Under the UIP this ## is the inverse prior variance matrix. Under the SBH prior, we need to divide ## this matrix by the current value of SIG. bmod<-beta_mode(X=maximal.mod$x,y=AOH$y,IP=IP) ## Find posterior mode under maximal model with UIP eta.hat<-as.vector(maximal.mod$x\%*\%bmod) ## Find posterior mode of linear predictor. curr.index<-formula2index(big.X=maximal.mod$x,formula=y~alc+hyp+obe+alc:hyp,data=AOH) ## Calculate index for current model including alc:hyp interaction curr.index ## Print out current index #[1] 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 pm<-prop_mod(curr.index=curr.index,data=AOH,maximal.mod=maximal.mod) ## Propose a model p2<-(1-pm$null.move.prob)/pm$total.choices p2 ## Calculate probability of proposing proposed model from current model #[1] 0.1666667 prop.index<-pm$new.index prop.index ## Assign and print out proposal index # [1] 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 dm<-prop_mod(curr.index=prop.index,data=AOH,maximal.mod=maximal.mod,null.move.prob=0) p1<-(1-pm$null.move.prob)/dm$total.choices p1 ## Calculate probability of proposing current model from proposed model #[1] 0.1666667 RJ_update(prop.index=prop.index,curr.index=curr.index, curr.beta=coef(maximal.mod)[curr.index==1],eta.hat=eta.hat,curr.y=AOH$y,big.X=maximal.mod$x, proposal.probs=c(p1,p2), i.prop.prior.var=IP[prop.index==1,prop.index==1], i.curr.prior.var=IP[curr.index==1,curr.index==1]) ## Do one iteration of reversible jump algorithm. Will get: #$new.beta #(Intercept) alc1 alc2 alc3 hyp1 obe1 # 2.87128918 -0.07098006 -0.07221330 0.08748803 -0.51899802 -0.07855115 # obe2 #-0.02474727 # #$new.index # [1] 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 } conting/man/print.bcct.Rd0000644000175100001440000000214512753071315015047 0ustar hornikusers\name{print.bcct} \alias{print.bcct} \alias{print.bict} \title{ Print \code{bcct} and \code{bict} Objects } \description{ This function prints objects of class \code{"bcct"} and \code{"bict"}. } \usage{ \method{print}{bcct}(x, ...) \method{print}{bict}(x, ...) } \arguments{ \item{x}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ These functions print out very simple details on the \code{bcct} or \code{bict} objects. They display the number of cells in the table, the maximal model considered, the number of log-linear parameters in the maximal model, the number of MCMC iterations, the computer time required for the MCMC (in hours, minutes and seconds) and the prior used. In the case of objects of class \code{"bict"}, it also prints out the number of missing and censored cells. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For examples see \code{\link{bcct}} and \code{\link{bict}}.} \seealso{ \code{\link{bcct}}, \code{\link{bict}}. } conting/man/ScotPWID.Rd0000644000175100001440000000621112753071315014373 0ustar hornikusers\name{ScotPWID} \alias{ScotPWID} \docType{data} \title{ People Who Inject Drugs in Scotland 2006: An Incomplete 2^7 Table } \description{ 5670 people who inject drugs (PWID) in Scotland in 2006 are osberved by four sources: social enquiry reports (\code{S1}), hospital records (\code{S2}), Scottish drug misuse database (\code{S3}) and Hepatitis C virus (HCV) diagnosis database (\code{S4}). The PWID are further cross-classified according to three additional factors: region (\code{Region}; 2 levels), gender (\code{Gender}; 2 levels) and age (\code{Age}; 2 levels). } \usage{data(ScotPWID)} \format{ A \code{"data.frame"} with 128 observations on the following 8 variables. \describe{ \item{\code{y}}{ Counts in each cell of the table with NAs for the cells corresponding to not being observed by any of the sources. } \item{\code{S1}}{ A factor with levels \code{un} \code{obs} indicating whether source S1 observed the PWID. } \item{\code{S2}}{ A factor with levels \code{un} \code{obs} indicating whether source S2 observed the PWID. } \item{\code{S3}}{ A factor with levels \code{un} \code{obs} indicating whether source S3 observed the PWID. } \item{\code{S4}}{ A factor with levels \code{un} \code{obs} indicating whether source S4 observed the PWID. } \item{\code{Region}}{ A factor with levels \code{GGC} \code{Rest} indicating the region (\code{GGC} = Greater Glasgow & Clyde, \code{Rest} = Rest of Scotland). } \item{\code{Gender}}{ A factor with levels \code{Male} \code{Female} indicating gender. } \item{\code{Age}}{ A factor with levels \code{Young} \code{Old} indicating age (\code{Young} = <35 years, \code{Old}=35+ years). } } } \details{ Note that the PWID observed by source \code{S4}, the HCV database, are not necessarily current PWID. They are people who have a history of drug use. Therefore the count in the cell corresponding to only being observed by the HCV database is an overcount. Overstall et al (2014) use a modelling approach whereby the count in the cell corresponding to only being observed by the HCV database is missing and the observed value acts as an upper bound. For more details on the dataset see King et al (2013). For details on the function \code{\link{bict}} applied to this data, see Overstall & King (2014). } \source{ King, R., Bird, S. M., Overstall, A. M., Hay, G. & Hutchinson, S. J. (2013) Injecting drug users in Scotland, 2006: Listing, number, demography, and opiate-related death-rates. \emph{Addiction Research and Theory}, \bold{21 (3)}, 235-246. } \references{ Overstall, A.M., King, R., Bird, S.M., Hutchinson, S.J. & Hay, G. (2014) Incomplete contingency tables with censored cells with application to estimating the number of people who inject drugs in Scotland. \emph{Statistics in Medicine}, \bold{33 (9)}, 1564--1579. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \examples{ data(ScotPWID) summary(ScotPWID) } \keyword{datasets} conting/man/bayespval.Rd0000644000175100001440000000761712753071315015000 0ustar hornikusers\name{bayespval} \alias{bayespval} \title{ Compute Bayesian p-value } \description{ This function will compute the Bayesian (or posterior predictive) p-value. This can be used as a diagnostic tool to check model adequacy. Additionally this function outputs predictions from the model which can also be used in other assessments of model adequacy. } \usage{ bayespval(object, n.burnin = 0, thin = 1, statistic = "X2") } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"} object. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } \item{statistic}{ An optional argument giving the discrepancy statistic to use for calculating the Bayesian p-value. It can be one of \code{c("X2","FreemanTukey","deviance")} which correspond to the different statistics: \code{"X2"} = Chi-squared statistic, \code{"FreemanTukey"} = Freeman-Tukey statistic, \code{"deviance"} = deviance statistic. See Overstall & King (2014), and references therein, for descriptions of these statistics. } } \details{See Gelman et al (2004, Chapter 6) for more details on Bayesian p-values and see Overstall & King (2014), and references therein, for details of their application to contingency tables. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ The function will produce an object of class \code{"pval"} which is a list with the following components. \item{PRED}{An (\code{n.sample}-\code{n.burnin}) by n* matrix where (n* is the number of observed cell counts) containing the predictions of the observed cell counts.} \item{Tpred}{A vector of length (\code{n.sample}-\code{n.burnin}) containing the discrepancies between the predicted cell counts and their means.} \item{Tobs}{A vector of length (\code{n.sample}-\code{n.burnin}) containing the discrepancies between the observed cell counts and their means.} \item{pval}{A scalar giving the Bayesian p-value, i.e. the proportion of \code{Tpred}>\code{Tobs}.} \item{statnum}{A numeric scalar identifying which statistic is used.} \item{statistic}{A character string identifying which statistic is used.} \item{thin}{The value of the argument \code{thin}.} } \references{ Gelman, A., Carlin, J.B., Stern, H.S. & Rubin, D.B. (2004) \emph{Bayesian Data Analysis}, 2nd edition, Chapman & Hall. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bict}}, \code{\link{bcct}}, \code{\link{print.pval}}. } \examples{ set.seed(1) ## Set seed for reproducibility data(spina) ## Load spina data test1<-bict(formula=y~(S1+S2+S3+eth)^2,data=spina,n.sample=50,prior="UIP") ## Do 50 iterations starting at maximal model containing all two-way interactions. test1p<-bayespval(object=test1,statistic="FreemanTukey",n.burnin=5) ## Use the Freeman-Tukey statistic and a burn-in phase of 5 iterations. test1p ## Will get following output #Under the Freeman-Tukey statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 2.812 4.695 5.190 5.777 6.405 14.490 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 4.566 4.861 5.197 5.430 6.108 6.460 # #Bayesian p-value = 0.4667 ## Can do a plot \dontrun{plot(test1p)} } conting/man/plot.pval.Rd0000644000175100001440000000135412753071315014721 0ustar hornikusers\name{plot.pval} \alias{plot.pval} \title{ Plot \code{pval} Objects } \description{ This function plots objects of class \code{"pval"}. } \usage{ \method{plot}{pval}(x, ...) } \arguments{ \item{x}{ An object of class \code{"pval"}. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will produce a plot of \code{T_obs} against \code{T_pred} (see \code{\link{bayespval}}) along with a line through the origin with slope one. The proportion of points above the line (grey points) gives the Bayesian p-value. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{bayespval}}.} \seealso{ \code{\link{bayespval}} }conting/man/bict.fit.Rd0000644000175100001440000001612012753071315014501 0ustar hornikusers\name{bict.fit} \alias{bict.fit} \title{ Bayesian Analysis of Incomplete Contingency Tables } \description{ This function is the workhorse behind \code{\link{bict}} and \code{\link{bictu}}. } \usage{ bict.fit(priornum, missing1, missing2, maximal.mod, IP, eta.hat, ini.index, ini.beta, ini.sig, ini.y0, iters, save, name, null.move.prob, a, b, progress) } \arguments{ \item{priornum}{ A numeric scalar indicating which prior is to be used: 1 = \code{"UIP"}, 2 = \code{"SBH"}. } \item{missing1}{ A vector of the same length as the number of missing cell counts giving the row numbers of the data.frame in \code{data} which correspond to the missing cell counts. } \item{missing2}{ A vector of the same length as the number of censored cell counts giving the row numbers of the data.frame in \code{data} which correspond to the censored cell counts. } \item{maximal.mod}{ An object of class \code{"glm"} giving the fit of the maximal model. } \item{IP}{ A p by p matrix giving the inverse of the prior scale matrix for the maximal model. } \item{eta.hat}{ A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model. } \item{ini.index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the initial model. } \item{ini.beta}{ A numeric vector giving the starting values of the log-linear parameters for the MCMC algorithm. } \item{ini.sig}{ A numeric scalar giving the starting value of sigma^2 for the MCMC algorithm. } \item{ini.y0}{ A numeric vector giving the starting values of the missing and censored cell entries for the MCMC algorithm. } \item{iters}{ The number of iterations of the MCMC algorithm to peform. } \item{save}{ If positive, the function will save the MCMC output to external text files every \code{save} iterations. If zero , the function will not save the MCMC output to external files. } \item{name}{ A prefix to the external files saved if the argument \code{save} is positive. If \code{NULL}, then the external files will have no prefix. } \item{null.move.prob}{ A scalar argument giving the probability of performing a null move, i.e. proposing a move to the current model. } \item{a}{ The shape hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } \item{b}{ The scale hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } \item{progress}{ Logical argument. If \code{TRUE}, then a progress bar will be displayed. } } \value{ The function will return a list with the following components. \item{BETA}{An \code{iters} by p matrix containing the sampled values of the log-linear parameters, where p is the number of log-linear parameters in the maximal model. For elements of this matrix which correspond to a log-linear parameter which is not present for the current model a zero is returned.} \item{MODEL}{A vector of length \code{iters} giving the sampled model indicators in hexadecimal format.} \item{SIG}{A vector of length \code{iters} giving the sampled values for sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one.} \item{Y0}{An \code{iters} by k matrix giving the sampled values of the missing and censored cell counts, where k is the total number of missing and censored cell counts.} \item{rj_acc}{A binary vector of the same length as the number of reversible jump moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{mh_acc}{A binary vector of the same length as the number of Metropolis-Hastings moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \seealso{ \code{\link{bict}}, \code{\link{bictu}}. } \examples{ data(spina) ## Load spina data. spina$z<-spina$y spina$z[is.na(spina$y)]<-0 ## Define a new variable in spina data.frame which is equal to y except where ## y is NA, in which case z=0. This is just so we can fit maximal model to the ## complete contingency table. maximal.mod<-glm(formula=z~(S1+S2+S3+eth)^2,data=spina,x=TRUE,y=TRUE, contrasts=list(S1="contr.sum",S2="contr.sum",S3="contr.sum", eth="contr.sum")) ## Fit maximal model to complete contingency table. curr.index<-formula2index(big.X=maximal.mod$x,formula=z~S1+S2+S3+eth,data=spina) ## Set up binary vector for independence model. IP<-t(maximal.mod$x)\%*\%maximal.mod$x/length(maximal.mod$y) IP[,1]<-0 IP[1,]<-0 ## Set up the inverse scale matrix for the prior distribution under ## the maximal model. bmod<-beta_mode(X=maximal.mod$x[!is.na(spina$y),],prior="UIP", y=maximal.mod$y[!is.na(spina$y)],IP=IP) ## Find the posterior mode under the maximal model fitted to observed cell counts. eta.hat<-as.vector(maximal.mod$x\%*\%bmod) ## Find the posterior mode of the linear predictor ## under the maximal model. set.seed(1) ## Set seed for reproducibility test1<-bict.fit(priornum=1, missing1=(1:length(maximal.mod$y))[is.na(spina$y)], missing2=NULL,maximal.mod=maximal.mod, IP=IP, eta.hat=eta.hat, ini.index=curr.index, ini.beta=bmod[curr.index==1], ini.sig=1, ini.y0=c(500,200,20),iters=10, save=0, name=NULL, null.move.prob=0.5, a=0.001, b=0.001, progress = FALSE) ## Run for 10 iterations starting at model defined by curr.index. test1$MODEL ## Look at sampled model indicators. Should be: # [1] "7e00" "7e00" "7e00" "7e00" "7e00" "7e00" "7e00" "7e00" "7f00" "7f00" model2index(test1$MODEL,dig=15) ## Convert these to binary indicators of the log-linear parameters. ## Will get: # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7f00 1 1 1 1 1 1 1 0 0 0 0 #7f00 1 1 1 1 1 1 1 0 0 0 0 # [,12] [,13] [,14] [,15] #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7f00 0 0 0 0 #7f00 0 0 0 0 }conting/man/sub_model.Rd0000644000175100001440000001350312753071315014752 0ustar hornikusers\name{sub_model} \alias{sub_model} \title{ Compute Posterior Summary Statistics for (Sub-) Models } \description{ This function computes posterior summary statistics for (sub-) models using the MCMC output of \code{"bcct"} and \code{"bict"} objects. } \usage{ sub_model(object, formula = NULL, order = 1, n.burnin = 0, thin = 1, prob.level = 0.95, statistic = "X2") } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{formula}{ An optional argument of class \code{"formula"}: a symbolic description of the model of interest. The default value is \code{NULL}. If not \code{NULL} then this argument takes precedent over \code{order}. } \item{order}{ A scalar argument identifying the model for which to compute summary statistics. The function will compute statistics for the model with the \code{order}-th largest posterior model probability. The default value is 1, meaning that, by default, the function will compute summary statistics for the posterior modal model. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } \item{prob.level}{ An optional argument giving the probability content of the highest posterior density intervals (HPDIs). The default value is 0.95. } \item{statistic}{ An optional argument giving the discrepancy statistic to use for calculating the Bayesian p-value. It can be one of \code{c("X2","FreemanTukey","deviance")} which correspond to the different statistics: \code{"X2"} = Chi-squared statistic, \code{"FreemanTukey"} = Freeman-Tukey statistic, \code{"deviance"} = deviance statistic. See Overstall & King (2014), and references therein, for descriptions of these statistics. } } \details{ If the MCMC algorithm does not visit the model of interest in the thinned MCMC sample, after burn-in, then an error message will be returned. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ This function will return an object of class \code{"submod"} which is a list with the following components. Note that, unless otherwise stated, all components are conditional on the model of interest. \item{term}{A vector of term labels for each log-linear parameter.} \item{post_prob}{A scalar giving the posterior model probability for the model of interest.} \item{post_mean}{A vector of posterior means for each of the log-linear parameters.} \item{post_var}{A vector of posterior variances for each of the log-linear parameters.} \item{lower}{A vector of lower limits for the 100*\code{prob.level}\% HPDI for each log-linear parameter.} \item{upper}{A vector of upper limits for the 100*\code{prob.level}\% HPDI for each log-linear parameter.} \item{prob.level}{The argument \code{prob.level}.} \item{order}{The ranking of the model of interest in terms of posterior model probabilities.} \item{formula}{The formula of the model of interest.} \item{BETA}{A matrix containing the sampled values of the log-linear parameters, where the number of columns is the number of log-linear parameters in the model of interest.} \item{SIG}{A vector containing the sampled values of sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one.} If \code{object} is of class \code{"bict"}, then \code{sub_model} will also return the following component. \item{Y0}{A matrix (with k columns) containing the sampled values of the missing and censored cell counts, where k is the total number of missing and censored cell counts.} } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bcct}}, \code{\link{bict}}, } \examples{ set.seed(1) ## Set seed for reproducibility. data(AOH) ## Load the AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=100,prior="UIP") ## Let the maximal model be the saturated model. Starting from the ## posterior mode of the maximal model do 100 iterations under the unit ## information prior. test1sm<-sub_model(object=test1,order=1,n.burnin=10) ## Obtain posterior summary statistics for posterior modal model using a ## burnin of 10. test1sm #Posterior model probability = 0.5 # #Posterior summary statistics of log-linear parameters: # post_mean post_var lower_lim upper_lim #(Intercept) 2.907059 0.002311 2.81725 2.97185 #alc1 -0.023605 0.004009 -0.20058 0.06655 #alc2 -0.073832 0.005949 -0.22995 0.10845 #alc3 0.062491 0.006252 -0.09635 0.18596 #hyp1 -0.529329 0.002452 -0.63301 -0.43178 #obe1 0.005441 0.004742 -0.12638 0.12031 #obe2 -0.002783 0.004098 -0.17082 0.07727 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively # #Under the X2 statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 11.07 19.76 23.34 24.47 29.04 50.37 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 30.82 34.78 35.74 36.28 37.45 42.49 # #Bayesian p-value = 0.0444 } conting/man/mod_probs.Rd0000644000175100001440000000656212753071315014774 0ustar hornikusers\name{mod_probs} \alias{mod_probs} \title{ Compute Posterior Model Probabilities } \description{ This function computes the posterior model probabilities using the MCMC output of \code{"bcct"} and \code{"bict"} objects. } \usage{ mod_probs(object, n.burnin = 0, scale = 0.1, best = NULL, thin = 1) } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{scale}{ An optional argument for controlling how the posterior model probabilities are returned as output. The function will return details on the models with the posterior model probability larger than \code{scale} times the probability of the posterior modal model. The default value is 0.1. } \item{best}{ An optional argument for controlling how the posterior model probabilities are returned as output. The function will return details on the \code{best} models with the highest posterior model probabilities. For example, if \code{best=4}, then details on the four models with the highest posterior model probabilities will be returned. The default value is \code{NULL}. If not \code{NULL}, then this argument takes precedent over \code{scale}. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } } \details{ It will output only the probabilities of the "best" models, as defined by the user specifying either the \code{best} or \code{scale} arguments. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ The function will return an object of class \code{"modprobs"} which is a list containing the following components. \item{table}{An object of class \code{"data.frame"} with number of rows defined by \code{scale} or \code{best} and columns: \code{model_formula}; giving the model (in terms of a printed formula), and \code{prob}; giving the posterior model probability.} \item{totmodsvisit}{A numeric scalar giving the total number of models visited after the burn-in iterations.} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bcct}}, \code{\link{bict}}, \code{\link{print.modprobs}}. } \examples{ set.seed(1) ## Set seed for reproducibility data(AOH) ## Load AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=100,prior="UIP") ## Starting from maximal model of saturated model do 100 iterations of MCMC ## algorithm. mod_probs(object=test1,n.burnin=10,best=6) ## Using a burn-in of 10 iterations find the posterior model probabilities ## of the 6 models with the highest posterior model probability. Will get: #Posterior model probabilities: # prob model_formula #1 0.50000 ~alc + hyp + obe #2 0.32222 ~alc + hyp + obe + hyp:obe #3 0.12222 ~alc + hyp + obe + alc:hyp + hyp:obe #4 0.05556 ~alc + hyp + obe + alc:hyp # #Total number of models visited = 4 ## Note that since the chain only visited 4 models we only get probabilities ## for 4 models not 6. } conting/man/add_term.Rd0000644000175100001440000001056012753071315014560 0ustar hornikusers\name{add_term} \alias{add_term} \alias{drop_term} \alias{prop_mod} \title{ Determines Model Moves Given Current Model } \description{ These functions are used to detemine which models we can propose moves to, given the current model in the MCMC algorithm, and the principle of marginality. } \usage{ add_term(curr.index, data, maximal.mod) drop_term(curr.index, data, maximal.mod) prop_mod(curr.index,data,maximal.mod,null.move.prob=0.5) } \arguments{ \item{curr.index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the current model. } \item{data}{ An object of class \code{"data.frame"} containing the variables in the model. } \item{maximal.mod}{ An object of class \code{"glm"} giving the fit of the maximal model. } \item{null.move.prob}{ An optional scalar argument giving the probability of performing a null move, i.e. proposing a move to the current model. The default value is 0.5. } } \details{ In the reversible jump algorithm we propose a move to a model given the current model. The function \code{prop_mod} implements a scheme whereby only local proposals are made, i.e. either a term is added or dropped. These types of move are called birth and death moves, respectively, by Forster et al (2012). When a term is either added or dropped, we preserve the principle of marginality, e.g. we can only propose to add a three-way interaction if all the possible two-way interactions between the three factors are included in the present model. The functions \code{add_term} and \code{drop_term} determine which terms can be added or dropped whilst preserving the principle of marginality. The function \code{prop_mod} will call \code{add_term} and \code{drop_term} thus determining which terms can be added or dropped. With probability \code{null.move.prob} it will choose to remain in the current model; otherwise it will choose one of the possible terms to add or drop. } \value{ The functions \code{add_term} and \code{drop_term} will output a character vector containing the names of terms that can be dropped. The function \code{prop_mod} will return a list with the following components. \item{new.index}{A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the proposed model.} \item{type}{A character string which will be one of \code{c("null","drop","add")} depending on the type of move proposed.} \item{total.choices}{If \code{type} is not \code{"null"}, then \code{total.choices} will be scalar giving the total number of non-null moves available. If \code{type} is equal to \code{"null"}, then \code{total.choices} will be 0.} \item{null.move.prob}{A scalar giving the probability of a null move.} } \references{ Forster, J.J., Gill, R.C. & Overstall, A.M. (2012) Reversible jump methods for generalised linear models and generalised linear mixed models. \emph{Statistics and Computing}, \bold{22 (1)}, 107--120. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ data(AOH) ## Load the AOH data maximal.mod<-glm(formula=y~(alc+hyp+obe)^3,data=AOH,x=TRUE,y=TRUE, contrasts=list(alc="contr.sum",hyp="contr.sum",obe="contr.sum")) ## Set up the maximal model which in this case is the saturated model. curr.index<-formula2index(big.X=maximal.mod$x,formula=y~alc+hyp+obe+hyp:obe,data=AOH) ## Set up the binary vector for the model containing all main effects and the ## hyp:obe interaction. add_term(curr.index=curr.index,data=AOH,maximal.mod=maximal.mod) ## See what terms we can add - will get: #[1] "alc:hyp" "alc:obe" drop_term(curr.index=curr.index,data=AOH,maximal.mod=maximal.mod) ## See what terms we can drop - will get: #[1] "hyp:obe" set.seed(4) ## Set the seed for reproducibility. prop_mod(curr.index=curr.index,data=AOH,maximal.mod=maximal.mod) ## Propose a model. Will be a drop move, proposing the independence model by ## dropping the hyp:obe interaction. The total.choices object is 3, i.e. one ## drop move and two add moves. Specifically: #$new.index # [1] 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 # #$type #[1] "drop" # #$total.choices #[1] 3 # #$null.move.prob #[1] 0.5 } conting/man/inter_probs.Rd0000644000175100001440000000541012753071315015325 0ustar hornikusers\name{inter_probs} \alias{inter_probs} \title{ Calculate Posterior Probability of Each Term } \description{ This function computes the posterior probability of each term using the MCMC output of \code{"bcct"} and \code{"bict"} objects. } \usage{ inter_probs(object, cutoff = 0.75, n.burnin = 0, thin = 1) } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{cutoff}{ An optional argument giving the cutoff posterior probability for displaying posterior summary statistics of the log-linear parameters. Only those log-linear parameters with a posterior probability greater than \code{cutoff} will be returned as part of the output. The default value is 0.75. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } } \details{ This function provides a scaled back version of what \code{\link{inter_stats}} provides. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ This function returns an object of class \code{"interprob"} which is a list with the following components. \item{term}{A vector of term labels.} \item{prob}{A vector of posterior probabilities.} \item{thin}{The value of the argument \code{thin}.} The function will only return elements in the above list if \code{prob} > \code{cutoff}. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bcct}}, \code{\link{bict}}, \code{\link{print.interprob}}, \code{\link{inter_stats}}. } \examples{ set.seed(1) ## Set seed for reproducibility data(AOH) ## Load AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=100,prior="UIP") ## Starting from maximal model of saturated model do 100 iterations of MCMC ## algorithm. inter_probs(test1,n.burnin=10,cutoff=0) ## Calculate posterior probabilities having used a burn-in phase of ## 10 iterations and a cutoff of 0 (i.e. display all terms with ## non-zero posterior probability). Will get the following: #Posterior probabilities of log-linear parameters: # post_prob #(Intercept) 1.0000 #alc 1.0000 #hyp 1.0000 #obe 1.0000 #alc:hyp 0.1778 #alc:obe 0.0000 #hyp:obe 0.4444 #alc:hyp:obe 0.0000 ## Note that the MCMC chain (after burn-in) does not visit any models ## with the alc:obe or alc:hyp:obe interactions. }conting/man/formula2index.Rd0000644000175100001440000000456512753071315015570 0ustar hornikusers\name{formula2index} \alias{formula2index} \alias{index2formula} \title{ Convert Between Formula and Index } \description{ These functions will convert a formula object to a binary index and vice versa. } \usage{ formula2index(big.X, formula, data) index2formula(index, maximal.mod) } \arguments{ \item{big.X}{ The design matrix under the maximal model. } \item{formula}{ An object of class \code{"formula"}: a symbolic description of the model to convert to a binary index. } \item{data}{ An object of class \code{"data.frame"} containing the variables in the model. } \item{maximal.mod}{ An object of class \code{"glm"} giving the fit of the maximal model. } \item{index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the model to be converted to a formula. } } \value{ The function \code{formula2index} will produce a binary vector of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the model represented by the argument \code{formula}. The function \code{index2formula} will produce an object of class \code{"formula"}: a symbolic description of the model given by the argument \code{index}. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ data(ScotPWID) ## Load the ScotPWID data maximal.mod<-glm(y~(S1+S2+S3+S4+Region+Gender+Age)^2,family=poisson,contrasts=list( S1="contr.sum",S2="contr.sum",S3="contr.sum",S4="contr.sum", Region="contr.sum",Gender="contr.sum",Age="contr.sum"),data=ScotPWID,x=TRUE) ## Fit the maximal model containing all two-way interactions. big.X<-maximal.mod$x ## Set the design matrix under the maximal model index<-formula2index(big.X=big.X, formula=~S1+S2+S3+S4+Region+Gender+Age+S1:S2+S1:Age+S2:Gender+S3:S4+S4:Age, data=ScotPWID) ## Find the index under the model with the following interactions: ## S1:S2 ## S1:Age ## S2:Gender ## S3:S4 ## S4:Age index ## Print index # [1] 1 1 1 1 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 index2formula(index=index,maximal.mod=maximal.mod) ## Go back to formula #y ~ S1 + S2 + S3 + S4 + Region + Gender + Age + S1:S2 + S1:Age + # S2:Gender + S3:S4 + S4:Age }conting/man/total_pop.Rd0000644000175100001440000000517512753071315015010 0ustar hornikusers\name{total_pop} \alias{total_pop} \title{ Evaluate Posterior Distribution of Total Population Size } \description{ This function uses the MCMC output of a \code{"bict"} object to derive an MCMC sample from the posterior distribution of the total population size. } \usage{ total_pop(object, n.burnin = 0, thin = 1, prob.level = 0.95) } \arguments{ \item{object}{ An object of class \code{"bict"}. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } \item{prob.level}{ An optional argument giving the target probability content of the highest posterior density intervals for the total population size. The default value is 0.95. } } \details{ The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ This function will return an object of class \code{"totpop"} which is a list with the following components. \item{TOT}{A vector of length (\code{n.sample}-\code{n.burnin}) giving the MCMC sample from the posterior distribution of the total population size.} \item{int}{The 100*\code{prob.level}\% highest posterior density interval (HPDI) for the total population size.} \item{meanTOT}{The posterior mean of the total population size.} \item{prob.level}{The argument \code{prob.level}.} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bict}}, \code{\link{print.totpop}}. } \examples{ set.seed(1) ## Set seed for reproducibility data(spina) ## Load spina data test1<-bict(formula=y~(S1+S2+S3+eth)^2,data=spina,n.sample=100,prior="UIP") ## For the spina dataset. We do 100 iterations under the unit information ## prior. The maximal model is the model with two-way interactions and we ## start from this model at the posterior model tp<-total_pop(test1,n.burnin=10) ## Use a burn-in phase of 10 iterations tp ## Print out results. Will get: #Posterior mean of total population size = 727.0667 #95 % highest posterior density interval for total population size = ( 706 757 ) ## Could do a plot \dontrun{plot(tp)} ## Do a summary of MCMC sample from total population size summary(tp$TOT) ## Will get # Min. 1st Qu. Median Mean 3rd Qu. Max. # 697.0 716.2 727.0 727.1 735.8 763.0 } conting/man/conting-package.Rd0000644000175100001440000001323312753071315016033 0ustar hornikusers\name{conting-package} \alias{conting-package} \alias{conting} \docType{package} \title{ Bayesian Analysis of Complete and Incomplete Contingency Tables } \description{ Performs Bayesian analysis of complete and incomplete contingency tables incorporating model uncertainty using log-linear models. These analyses can be used to identify associations/interactions between categorical factors and to estimate unknown closed populations. } \details{ \tabular{ll}{ Package: \tab conting\cr Type: \tab Package\cr Version: \tab 1.6\cr Date: \tab 2016-08-11\cr License: \tab GPL-2\cr } For the Bayesian analysis of complete contingency tables the key function is \code{\link{bcct}} which uses MCMC methods to generate a sample from the joint posterior distribution of the model parameters and model indicator. Further MCMC iterations can be performed by using \code{\link{bcctu}}. For the Bayesian analysis of incomplete contingency tables the key function is \code{\link{bict}} which uses MCMC methods to generate a sample from the joint posterior distribution of the model parameters, model indicator and the missing, and, possibly, censored cell entries. Further MCMC iterations can be performed by using \code{\link{bictu}}. In both cases see Overstall & King (2014), and the references therein, for details on the statistical and computational methods, as well as detailed examples. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk} Maintainer: Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk} } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \keyword{ package } \examples{ set.seed(1) ## Set seed for reproducibility data(AOH) ## Load AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=100,prior="UIP") ## Bayesian analysis of complete contingency table. Let the saturated model ## be the maximal model and do 100 iterations. summary(test1) ## Summarise the result. Will get: #Posterior summary statistics of log-linear parameters: # post_prob post_mean post_var lower_lim upper_lim #(Intercept) 1 2.877924 0.002574 2.78778 2.97185 #alc1 1 -0.060274 0.008845 -0.27772 0.06655 #alc2 1 -0.049450 0.006940 -0.20157 0.11786 #alc3 1 0.073111 0.005673 -0.05929 0.20185 #hyp1 1 -0.544988 0.003485 -0.65004 -0.42620 #obe1 1 -0.054672 0.007812 -0.19623 0.12031 #obe2 1 0.007809 0.004127 -0.11024 0.11783 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively # #Posterior model probabilities: # prob model_formula #1 0.45 ~alc + hyp + obe #2 0.30 ~alc + hyp + obe + hyp:obe #3 0.11 ~alc + hyp + obe + alc:hyp + hyp:obe #4 0.06 ~alc + hyp + obe + alc:hyp + alc:obe + hyp:obe #5 0.05 ~alc + hyp + obe + alc:hyp # #Total number of models visited = 7 # #Under the X2 statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 11.79 20.16 23.98 24.70 28.77 52.40 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 8.18 24.22 31.51 30.12 35.63 42.49 # #Bayesian p-value = 0.28 set.seed(1) ## Set seed for reproducibility data(spina) ## Load spina data test2<-bict(formula=y~(S1+S2+S3+eth)^2,data=spina,n.sample=100,prior="UIP") ## Bayesian analysis of incomplete contingency table. Let the model with two-way ## interactions be the maximal model and do 100 iterations. summary(test2) ## Summarise the result. Will get: #Posterior summary statistics of log-linear parameters: # post_prob post_mean post_var lower_lim upper_lim #(Intercept) 1 1.0427 0.033967 0.6498 1.4213 #S11 1 -0.3159 0.015785 -0.4477 -0.1203 #S21 1 0.8030 0.018797 0.6127 1.1865 #S31 1 0.7951 0.003890 0.6703 0.8818 #eth1 1 2.8502 0.033455 2.4075 3.1764 #eth2 1 0.1435 0.072437 -0.4084 0.5048 #S21:S31 1 -0.4725 0.002416 -0.5555 -0.3928 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively # #Posterior model probabilities: # prob model_formula #1 0.36 ~S1 + S2 + S3 + eth + S2:S3 #2 0.19 ~S1 + S2 + S3 + eth + S2:S3 + S2:eth #3 0.12 ~S1 + S2 + S3 + eth + S1:eth + S2:S3 #4 0.12 ~S1 + S2 + S3 + eth + S1:S2 + S1:S3 + S1:eth + S2:S3 + S2:eth + S3:eth #5 0.10 ~S1 + S2 + S3 + eth + S1:S3 + S1:eth + S2:S3 #6 0.06 ~S1 + S2 + S3 + eth + S1:S3 + S1:eth + S2:S3 + S2:eth #Total number of models visited = 8 # #Posterior mean of total population size = 726.75 #95 % highest posterior density interval for total population size = ( 706 758 ) # #Under the X2 statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 8.329 15.190 20.040 22.550 24.180 105.200 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 5.329 18.270 22.580 21.290 24.110 37.940 # #Bayesian p-value = 0.45 } conting/man/bict.Rd0000644000175100001440000003212012753071315013716 0ustar hornikusers\name{bict} \alias{bict} \alias{bictu} \title{ Bayesian Analysis of Incomplete Contingency Tables } \description{ These functions implement a Bayesian analysis of incomplete contingency tables. This is accomplished using a data augmentation MCMC algorithm where the null moves are performed using the Metropolis-Hastings algorithm and the between models moves are performed using the reversible jump algorithm. This function can also accomodate cases where one of the sources observes a mixture of individuals from target and non-target populations. This results in the some of the cell counts being censored. \code{bict} should be used initially, and \code{bictu} should be used to do additional MCMC iterations, if needed. } \usage{ bict(formula, data, n.sample, prior = "SBH", cens = NULL, start.formula = NULL, start.beta = NULL, start.sig = NULL, start.y0 = NULL, save = 0, name = NULL, null.move.prob=0.5, a = 0.001, b = 0.001, progress = FALSE) bictu(object, n.sample, save = NULL, name = NULL, progress = FALSE) } \arguments{ \item{formula}{ An object of class \code{"formula"}: a symbolic description of the maximal model.} \item{object}{An object of class \code{"bict"} produced as a previous call to \code{bict} or \code{bictu}.} \item{data}{An object of class \code{"data.frame"} (or \code{"table"}) containing the variables in the model. If the model variables are not found in \code{data}, the variables are taken from \code{environment(formula)}, typically the environment from which \code{bict} is called.} \item{n.sample}{ A numeric scalar giving the number of MCMC iterations to peform. } \item{prior}{ An optional argument giving the prior to be used in the analysis. It can be one of \code{c("UIP","SBH")}, where \code{"UIP"} = unit information prior; and \code{"SBH"} = Sabanes-Bove & Held prior. The default value is \code{"SBH"}. } \item{cens}{ A numeric vector indicating the row numbers of the data.frame in \code{data} which correspond to the censored cells. This can be found using the function \code{\link{find_cens}}. } \item{start.formula}{ An optional argument giving an object of class \code{"formula"}: a symbolic description of the starting model in the MCMC algorithm. If \code{NULL} (the default) the starting model will be the maximal model. } \item{start.beta}{ An optional argument giving the starting values of the log-linear parameters for the MCMC algorithm. It should be a vector of the same length as the number of log-linear parameters in the starting model implied by the argument \code{start.formula}. If \code{NULL} (the default) the starting value will be the posterior mode under the maximal model. } \item{start.sig}{ An optional argument giving the starting value of sigma^2 (under the Sabanes-Bove & Held prior) for the MCMC algorithm when the argument of prior is \code{"SBH"}. If \code{NULL} (the default) the starting value will be one. } \item{start.y0}{ An optional argument giving the starting values of the missing and censored cell counts. This should have the same length as the number of missing and censored cell counts. } \item{save}{ An optional argument for saving the MCMC output mid-algorithm. For \code{bict}, if positive, the function will save the MCMC output to external text files every \code{save} iterations. If zero (the default), the function will not save the MCMC output to external files. For \code{bictu}, if non-\code{NULL}, the function will save the MCMC output to external text files every \code{save} iterations. If \code{NULL} (the default), it will inherit the value of \code{save} from the previous call to \code{bict} or \code{bictu}. } \item{name}{ An optional argument giving a prefix to the external files saved if the argument \code{save} is positive. For \code{bict}, a value of \code{NULL} means the external files will not have a prefix. For \code{bictu}, a value of \code{NULL}, means the prefix will be inherited from the previous call to \code{bict} or \code{bictu}. } \item{null.move.prob}{ An optional scalar argument giving the probability of performing a null move in the reversible jump algorithm, i.e. proposing a move to the current model. The default value is 0.5. } \item{a}{ The shape hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). The default value is 0.001. A value of \code{a = -1} gives the Gelman prior (Gelman, 2006), i.e. a uniform prior on the standard deviation. } \item{b}{ The scale hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). The default value is 0.001. A value of \code{b = 0} gives the Gelman prior (Gelman, 2006), i.e. a uniform prior on the standard deviation. } \item{progress}{ Logical argument. If \code{TRUE}, then a progress bar will be displayed. The default value is \code{FALSE}. } } \details{ For identifiability, the parameters are constrained. The \code{\link{conting-package}} uses sum-to-zero constraints. See Overstall & King (2014), and the references therein, for more details. The Metropolis-Hastings algorithm employed is the iterated weighted least squares method for generalised linear models (GLMs) proposed by Gamerman (1997). The reversible jump algorithm employed is the orthogonal projections method for GLMs proposed by Forster et al (2012). For details on these methods applied to log-linear models through the data-augmentation algorithm see Overstall & King (2014), and the references therein. For details on the censored approach see Overstall et al (2014). For details on the unit information and Sabanes-Bove & Held priors for generalised linear models see Ntzoufras et al (2003) and Sabanes-Bove & Held (2011), respectively. See Overstall & King (2014), and the references therein, for their application to log-linear models and contingency tables. } \value{ The functions will return an object of class \code{"bict"} which is a list with the following components. \item{BETA}{ An \code{n.sample} by p matrix containing the sampled values of the log-linear parameters, where p is the number of log-linear parameters in the maximal model. For elements of this matrix which correspond to a log-linear parameter which is not present for the current model a zero is returned. } \item{MODEL}{ A vector of length \code{n.sample} giving the sampled model indicators in hexadecimal format. } \item{SIG}{ A vector of length \code{n.sample} giving the sampled values for sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one. } \item{Y0}{ An \code{n.sample} by k matrix giving the sampled values of the missing and censored cell counts, where k is the total number of missing and censored cell counts. } \item{missing1}{ A vector of the same length as the number of missing cell counts giving the row numbers of the \code{data.frame} in \code{data} (or the elements of the variables) which correspond to the missing cell counts. } \item{missing2}{ A vector of the same length as the number of censored cell counts giving the row numbers of the \code{data.frame} in \code{data} (or the elements of the variables) which correspond to the censored cell counts. } \item{missing_details}{ The rows of the \code{data.frame} in \code{data} (or the elements of the variables) which correspond to the missing cell counts. } \item{censored_details}{ The rows of the \code{data.frame} in \code{data} (or the elements of the variables) which correspond to the censored cell counts. } \item{rj_acc}{ A binary vector of the same length as the number of reversible jump moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted. } \item{mh_acc}{ A binary vector of the same length as the number of Metropolis-Hastings moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted. } \item{priornum}{ A numeric scalar indicating which prior was used: 1 = \code{"UIP"}, 2 = \code{"SBH"}. } \item{maximal.mod}{ An object of class \code{"glm"} giving the fit of the maximal model. } \item{IP}{ A p by p matrix giving the inverse of the prior scale matrix for the maximal model. } \item{eta.hat}{ A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model. } \item{save}{ The argument \code{save}. } \item{name}{ The argument \code{name}. } \item{null.move.prob}{ The argument \code{null.move.prob}.} \item{time}{ The total computer time (in seconds) used for the MCMC computations.} \item{a}{ The argument \code{a}. } \item{b}{ The argument \code{b}. } } \references{ Sabanes-Bove, D. & Held, L. (2011) Hyper-g priors for generalized linear models. \emph{Bayesian Analysis}, \bold{6 (3)}, 387--410. Forster, J.J., Gill, R.C. & Overstall, A.M. (2012) Reversible jump methods for generalised linear models and generalised linear mixed models. \emph{Statistics and Computing}, \bold{22 (1)}, 107--120. Gamerman, D. (1997) Sampling from the posterior distribution in generalised linear mixed models. \emph{Statistics and Computing}, \bold{7 (1)}, 57--68. Gelman, A. (2006) Prior distributions for variance parameters in hierarchical models(Comment on Article by Browne and Draper). \emph{Bayesian Analysis}, \bold{1 (3)}, 515--534. Nztoufras, I., Dellaportas, P. & Forster, J.J. (2003) Bayesian variable and link determination for generalised linear models. \emph{Journal of Statistical Planning and Inference}, \bold{111 (1)}, 165--180. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} Overstall, A.M., King, R., Bird, S.M., Hutchinson, S.J. & Hay, G. (2014) Incomplete contingency tables with censored cells with application to estimating the number of people who inject drugs in Scotland. \emph{Statistics in Medicine}, \bold{33 (9)}, 1564--1579. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ These functions are wrappers for \code{\link{bict.fit}}. In Version 1.0 of \code{\link{conting-package}}, note that the default value for \code{prior} was \code{"UIP"}. From Version 1.1 onwards, the default value is \code{"SBH"}. } \seealso{ \code{\link{bict.fit}}, \code{\link{spina}}, \code{\link{ScotPWID}}. } \examples{ set.seed(1) ## Set seed for reproducibility. data(spina) ## Load the spina data test1<-bict(formula=y~(S1 + S2 + S3 + eth)^2,data=spina,n.sample=50, prior="UIP") ## Let the maximal model be the model with two-way interactions. Starting from the ## posterior mode of the model with two-way interactions, do 50 iterations under the ## unit information prior. test1<-bictu(object=test1,n.sample=50) ## Do another 50 iterations test1 #Number of cells in table = 24 # #Maximal model = #y ~ (S1 + S2 + S3 + eth)^2 # #Number of log-linear parameters in maximal model = 15 # #Number of MCMC iterations = 100 # #Computer time for MCMC = 00:00:01 # #Prior distribution for log-linear parameters = UIP # #Number of missing cells = 3 # #Number of censored cells = 0 summary(test1) ## Summarise the result. Will get: #Posterior summary statistics of log-linear parameters: # post_prob post_mean post_var lower_lim upper_lim #(Intercept) 1 1.0427 0.033967 0.6498 1.4213 #S11 1 -0.3159 0.015785 -0.4477 -0.1203 #S21 1 0.8030 0.018797 0.6127 1.1865 #S31 1 0.7951 0.003890 0.6703 0.8818 #eth1 1 2.8502 0.033455 2.4075 3.1764 #eth2 1 0.1435 0.072437 -0.4084 0.5048 #S21:S31 1 -0.4725 0.002416 -0.5555 -0.3928 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively # #Posterior model probabilities: # prob model_formula #1 0.36 ~S1 + S2 + S3 + eth + S2:S3 #2 0.19 ~S1 + S2 + S3 + eth + S2:S3 + S2:eth #3 0.12 ~S1 + S2 + S3 + eth + S1:eth + S2:S3 #4 0.12 ~S1 + S2 + S3 + eth + S1:S2 + S1:S3 + S1:eth + S2:S3 + S2:eth + S3:eth #5 0.10 ~S1 + S2 + S3 + eth + S1:S3 + S1:eth + S2:S3 #6 0.06 ~S1 + S2 + S3 + eth + S1:S3 + S1:eth + S2:S3 + S2:eth # #Total number of models visited = 8 # #Posterior mean of total population size = 726.75 #95 % highest posterior density interval for total population size = ( 706 758 ) # #Under the X2 statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 8.329 15.190 20.040 22.550 24.180 105.200 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 5.329 18.270 22.580 21.290 24.110 37.940 # #Bayesian p-value = 0.45 } conting/man/AOH.Rd0000644000175100001440000000321312753071315013405 0ustar hornikusers\name{AOH} \alias{AOH} \docType{data} \title{ Alcohol, Obesity and Hypertension: A Complete 4 * 3 * 2 Table } \description{ 491 subjects are cross-classified according to the three factors: hypertension (\code{hyp}; 2 levels), obesity (\code{obe}; 3 levels) and alcohol (\code{alc}; 4 levels). There are a total of 24 cells in the table. } \usage{data(AOH)} \format{ A \code{"data.frame"} with 24 observations on the following 4 variables. \describe{ \item{\code{y}}{ Counts in each cell of table. } \item{\code{alc}}{ A factor with levels \code{0} \code{1-2} \code{3-5} \code{6+} indicating the classification of alcohol intake of drinks per day. } \item{\code{obe}}{ A factor with levels \code{low} \code{average} \code{high} indicating the classification of obesity. } \item{\code{hyp}}{ A factor with levels \code{yes} \code{no} indicating the classification of hypertension. } } } \details{ These data are from a study in Western Australia. The study copied a larger study from USA. See Knuiman & Speed (1988) for more details. For details on the function \code{\link{bcct}} applied to these data, see Overstall & King (2014). } \source{ Knuiman, M.W. & Speed, T.P. (1988) Incorporating Prior Information into the Analysis of Contingency Tables. \emph{Biometrics}, \bold{44 (4)}, 1061--1071. } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \examples{ data(AOH) summary(AOH) } \keyword{datasets} conting/man/print.acceptrate.Rd0000644000175100001440000000144512753071315016251 0ustar hornikusers\name{print.acceptrate} \alias{print.acceptrate} \title{ Prints \code{acceptrate} Objects } \description{ This function prints objects of class \code{"acceptrate"}. } \usage{ \method{print}{acceptrate}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{ An object of class \code{"acceptrate"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will simply print out the acceptance rates for the reversible jump and Metropolis-Hastings algorithms. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{accept_rate}}.} \seealso{ \code{\link{accept_rate}} } conting/man/beta_mode.Rd0000644000175100001440000000723512753071315014725 0ustar hornikusers\name{beta_mode} \alias{beta_mode} \title{ Posterior Mode } \description{ This function finds the posterior mode of the log-linear parameters of a log-linear model with a given design matrix and prior distribution. } \usage{ beta_mode(X, prior = "SBH", y, IP , a = 0.001 , b = 0.001) } \arguments{ \item{X}{ The n by p design matrix where n is the number of cells and p is the number of log-linear parameters. } \item{prior}{ The prior distribution. It can be one of \code{c("UIP","SBH")}, where \code{"UIP"} = unit information prior; and \code{"SBH"} = Sabanes-Bove & Held prior. The default value is \code{"SBH"}. } \item{y}{ The n by 1 vector of cell counts. } \item{IP}{ A p by p matrix giving the inverse of the prior scale matrix. } \item{a}{ The shape hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } \item{b}{ The scale hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } } \details{ The posterior mode is found by maximising the log unnormalised posterior pdf given by the sum of the log-likelihood and the log of the prior pdf. This optimisation is achieved using a quasi Newton-Raphson method. For details on the unit information and Sabanes-Bove & Held priors for generalised linear models see Ntzoufras et al (2003) and Sabanes-Bove & Held (2011), respectively. See Overstall & King (2014), and the references therein, for their application to log-linear models and contingency tables. The posterior mode is required for the reversible jump algorithm implemented from Forster et al (2012). } \value{ \code{beta_mode} will return a p by 1 vector containing the posterior mode of the log linear parameters.} \references{ Sabanes-Bove, D. & Held, L. (2011) Hyper-g priors for generalized linear models. \emph{Bayesian Analysis}, \bold{6 (3)}, 387--410. Forster, J.J., Gill, R.C. & Overstall, A.M. (2012) Reversible jump methods for generalised linear models and generalised linear mixed models. \emph{Statistics and Computing}, \bold{22 (1)}, 107--120. Nztoufras, I., Dellaportas, P. & Forster, J.J. (2003) Bayesian variable and link determination for generalised linear models. \emph{Journal of Statistical Planning and Inference}, \bold{111 (1)}, 165--180. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ data(AOH) ## loads the AOH data X<-model.matrix(~alc+hyp+obe,data=AOH, contrasts=list(alc="contr.sum",hyp="contr.sum",obe="contr.sum")) ## Sets up the design matrix for the independence model IP<-(t(X)\%*\%X)/dim(X)[1] ## Set up inverse of prior scale matrix beta_mode(X=X,prior="UIP",y=AOH$y,IP=IP) ## Finds the posterior mode of the log-linear parameters under the ## independence model with the unit information prior. Will get: #X(Intercept) Xalc1 Xalc2 Xalc3 Xhyp1 Xobe1 # 2.894270420 -0.045859743 -0.071775824 0.089541068 -0.504141954 0.008163604 # Xobe2 #-0.016327209 beta_mode(X=X,prior="SBH",y=AOH$y,IP=IP) ## Finds the posterior mode of the log-linear parameters under the ## independence model with the Sabanes-Bove & Held prior. Will get: #X(Intercept) Xalc1 Xalc2 Xalc3 Xhyp1 Xobe1 # 2.908298763 -0.043704371 -0.068212247 0.085338704 -0.473628107 0.007762839 # Xobe2 #-0.015525678 }conting/man/iwls_mh.Rd0000644000175100001440000000462412753071315014447 0ustar hornikusers\name{iwls_mh} \alias{iwls_mh} \title{ Iterated Weighted Least Square Metropolis Hastings Algorithm } \description{ This function implements one iteration of the Iterated Weight Least Square Metropolis Hastings Algorithm as proposed by Gamerman (1997) for generalised linear models as applied to log-linear models. } \usage{ iwls_mh(curr.y, curr.X, curr.beta, iprior.var) } \arguments{ \item{curr.y}{ A vector of length n giving the cell counts. } \item{curr.X}{ An n by p design matrix for the current model, where p is the number of log-linear parameters. } \item{curr.beta}{ A vector of length p giving the current log-linear parameters. } \item{iprior.var}{ A p by p matrix giving the inverse of the prior variance matrix. } } \details{ For details of the original algorithm see Gamerman (1997). For its application to log-linear models see Overstall & King (2014), and the references therein. } \value{ The function will output a vector of length p giving the new values of the log-linear parameters. } \references{ Gamerman, D. (1997) Sampling from the posterior distribution in generalised linear mixed models. \emph{Statistics and Computing}, \bold{7 (1)}, 57--68. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ set.seed(1) ## Set seed for reproducibility data(AOH) ## Load AOH data maximal.mod<-glm(y~alc+hyp+obe,family=poisson,x=TRUE,contrasts=list(alc="contr.sum", hyp="contr.sum",obe="contr.sum"),data=AOH) ## Fit independence model to get a design matrix IP<-t(maximal.mod$x)\%*\%maximal.mod$x/length(AOH$y) IP[,1]<-0 IP[1,]<-0 ## Set up inverse prior variance matrix under the UIP ## Let the current parameters be the MLE under the independence model as.vector(coef(maximal.mod)) #[1] 2.89365105 -0.04594959 -0.07192507 0.08971628 -0.50545335 0.00818037 #[7] -0.01636074 ## Update parameters using MH algorithm iwls_mh(curr.y=AOH$y,curr.X=maximal.mod$x,curr.beta=coef(maximal.mod),iprior.var=IP) ## Will get: #[1] 2.86468919 -0.04218623 -0.16376055 0.21656167 -0.49528676 -0.05026597 #[7] 0.02726671 } conting/man/print.interprob.Rd0000644000175100001440000000142112753071315016134 0ustar hornikusers\name{print.interprob} \alias{print.interprob} \title{ Print \code{interprob} Objects } \description{ This function prints objects of class \code{"interprob"}. } \usage{ \method{print}{interprob}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{ An object of class \code{"interprob"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will print out the posterior probability of each term (subject to the argument \code{cutoff}). } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{inter_probs}}.} \seealso{ \code{\link{inter_probs}}. } conting/man/accept_rate.Rd0000644000175100001440000000331012753071315015246 0ustar hornikusers\name{accept_rate} \alias{accept_rate} \title{ Compute Acceptance Rates for Metropolis-Hastings and Reversible Jump Algorithms } \description{ This function computes the acceptance rates of the Metropolis-Hastings and reversible jump algorithms from the MCMC output of \code{bcct} and \code{bict} objects. } \usage{ accept_rate(object) } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } } \details{ Acceptance rates can be used to assess the performance of MCMC methods (in particular the peformance of the reversible jump method, Brooks et al, 2003). } \value{ This function will return an object of class \code{"acceptrate"} which is a list with the following components. \item{rj_ar}{Acceptance rate (as a \%) of the reversible jump algorithm.} \item{mh_ar}{Acceptance rate (as a \%) of the Metropolis-Hastings algorithm.} } \references{ Brooks, S.P., Giudici, P., & Roberts, G.O. (2003) Efficient construction of reversible jump Markov chain Monte Carlo proposal distributions. \emph{Journal of the Royal Statistical Society, Series B}, \bold{65 (1)}, 3--55. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{print.acceptrate}}, \code{\link{bcct}}, \code{\link{bict}}. } \examples{ set.seed(1) ## set a seed for reproducibility data(AOH) test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=500,prior="UIP") ## Create a bcct object for the AOH dataset for a very small number of ## iterations (500). accept_rate(test1) ## Calculate accept rates. Will get: #Acceptance rate of reversible jump proposals = 32.5581 % #Acceptance rate of Metropolis-Hastings proposals = 76.8595 % } conting/man/print.modprobs.Rd0000644000175100001440000000144312753071315015761 0ustar hornikusers\name{print.modprobs} \alias{print.modprobs} \title{ Print \code{modprobs} Objects } \description{ This function prints objects of class \code{"modprobs"}. } \usage{ \method{print}{modprobs}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{ An object of class \code{"modprobs"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will print out the posterior model probability of the "best" models as defined by the arguments \code{best} or \code{scale}. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{mod_probs}}.} \seealso{ \code{\link{mod_probs}}. }conting/man/print.submod.Rd0000644000175100001440000000243112753071315015423 0ustar hornikusers\name{print.submod} \alias{print.submod} \title{ Print \code{submod} Objects } \description{ This function prints objects of class \code{"submod"}. } \usage{ \method{print}{submod}(x, ..., digits = max(3, getOption("digits") - 3)) } \arguments{ \item{x}{ An object of class \code{"submod"}. } \item{\dots}{ Arguments to be passed to and from other methods. } \item{digits}{ An optional argument controlling the rounding of output. } } \value{ Firstly, conditional on the model of interest (defined by \code{formula} and \code{order}), this function will print out the posterior means, posterior variances and 100*\code{prob.level}\% highest posterior density intervals (HPDIs) for each of the log-linear parameters. Secondly, conditional on the model of interest, it will print out summaries of the discrepancy statistics and the corresponding Bayesian p-value. Finally, if the class of the object passed to \code{sub_model} is \code{"bict"}, then it will print out the posterior mean and 100*\code{prob.level}\% HPDI for the total population size, conditional on the model of interest. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{sub_model}}.} \seealso{ \code{\link{sub_model}}. }conting/man/print.interstat.Rd0000644000175100001440000000161712753071315016154 0ustar hornikusers\name{print.interstat} \alias{print.interstat} \title{ Print \code{interstat} Objects } \description{ This function prints objects of class \code{"interstat"}. } \usage{ \method{print}{interstat}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{ An object of class \code{"interstat"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will print out the posterior probability, posterior mean, posterior variance and the 100*\code{prob.level}\% highest posterior density intervals (HPDIs) of each log-linear parameter (subject to the argument \code{cutoff}). } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{inter_stats}}.} \seealso{ \code{\link{inter_stats}}. }conting/man/inter_stats.Rd0000644000175100001440000000741212753071315015342 0ustar hornikusers\name{inter_stats} \alias{inter_stats} \title{ Compute Posterior Summary Statistics of the Log-Linear Parameters. } \description{ This function computes the posterior summary statistics of the log-linear parameters using the MCMC output of \code{"bcct"} and \code{"bict"} objects. The posterior summary statistics are posterior probability, posterior mean, posterior variance and lower and upper limits highest posterior density intervals (HPDIs). } \usage{ inter_stats(object, cutoff = 0.75, n.burnin = 0, thin = 1, prob.level = 0.95) } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{cutoff}{ An optional argument giving the cutoff posterior probability for displaying posterior summary statistics of the log-linear parameters. Only those log-linear parameters with a posterior probability greater than \code{cutoff} will be returned as part of the output. The default value is 0.75. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } \item{prob.level}{ An optional argument giving the probability content of the HPDIs. The default value is 0.95. } } \details{ This function provides an expanded version of what \code{\link{inter_probs}} provides. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ This function will return an object of class \code{"interstat"} which is a list with the following components: \item{term}{A vector of term labels for each parameter.} \item{prob}{A vector of posterior probabilities for each parameter.} \item{post_mean}{A vector of posterior means for each parameter.} \item{post_var}{A vector of posterior variances for each parameter.} \item{lower}{A vector of lower limits for the 100*\code{prob.level}\% HPDI for each parameter.} \item{upper}{A vector of upper limits for the 100*\code{prob.level}\% HPDI for each parameter.} \item{prob.level}{The argument \code{prob.level}.} The function will only return elements in the above list if \code{prob} > \code{cutoff}. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bcct}}, \code{\link{bict}}, \code{\link{print.interstat}} \code{\link{inter_probs}} } \examples{ set.seed(1) ## Set seed for reproducibility data(AOH) ## Load AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=100,prior="UIP") ## Starting from maximal model of saturated model do 100 iterations of MCMC ## algorithm. inter_stats(test1,n.burnin=10,cutoff=0.5) ## Calculate posterior summary statistics having used a burn-in phase of ## 10 iterations and a cutoff of 0 (i.e. display all terms with ## non-zero posterior probability. Will get the following: #Posterior summary statistics of log-linear parameters: # post_prob post_mean post_var lower_lim upper_lim #(Intercept) 1 2.88291 0.002565 2.78778 2.97185 #alc1 1 -0.05246 0.008762 -0.27772 0.06655 #alc2 1 -0.05644 0.006407 -0.20596 0.11786 #alc3 1 0.06822 0.005950 -0.09635 0.18596 #hyp1 1 -0.53895 0.003452 -0.63301 -0.39888 #obe1 1 -0.04686 0.007661 -0.20929 0.12031 #obe2 1 0.01395 0.004024 -0.11024 0.11783 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively } conting/man/summary.bcct.Rd0000644000175100001440000001643612753071315015420 0ustar hornikusers\name{summary.bcct} \alias{summary.bcct} \alias{print.sbcct} \alias{summary.bict} \alias{print.sbict} \title{ Summary of \code{bcct} and \code{bict} Objects } \description{ These functions produce summaries of objects of class \code{"bcct"} and \code{"bict"}. They also control how these summaries are printed. } \usage{ \method{summary}{bcct}(object, n.burnin = 0, thin = 1, cutoff = 0.75, statistic = "X2", best = NULL, scale = 0.1, prob.level = 0.95, ...) \method{print}{sbcct}(x, ..., digits = max(3, getOption("digits") - 3)) \method{summary}{bict}(object, n.burnin = 0, thin = 1, cutoff = 0.75, statistic = "X2", best = NULL, scale = 0.1, prob.level = 0.95, ...) \method{print}{sbict}(x, ..., digits = max(3, getOption("digits") - 3)) } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{x}{ An object of class \code{"sbcct"} or \code{"sbict"} produced as a result of a call to the functions \code{summary.bcct} or \code{summary.bict}, respectively. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } \item{cutoff}{ An optional argument giving the cutoff posterior probability for displaying posterior summary statistics of the log-linear parameters. Only those log-linear parameters with a posterior probability greater than \code{cutoff} will be returned as part of the output. The default value is 0.75. } \item{statistic}{ An optional argument giving the discrepancy statistic to use for calculating the Bayesian p-value. It can be one of \code{c("X2","FreemanTukey","deviance")} which correspond to the different statistics: \code{"X2"} = Chi-squared statistic, \code{"FreemanTukey"} = Freeman-Tukey statistic, \code{"deviance"} = deviance statistic. See Overstall & King (2014), and references therein, for descriptions of these statistics. } \item{best}{ An optional argument for controlling how the posterior model probabilities are returned as output. The function will return details on the \code{best} models with the highest posterior model probabilities. The default value is \code{NULL}. If not \code{NULL} than this argument takes precedent over \code{scale}. } \item{scale}{ An optional argument for controlling how the posterior model probabilities are returned as output. The function will return details on the models with the posterior model probability larger than \code{scale} times the probability of the posterior modal model. The default value is 0.1. } \item{prob.level}{ An optional argument giving the probability content of the highest posterior density intervals (HPDIs). The default value is 0.95. } \item{digits}{ An optional argument controling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \details{The functions \code{summary.bcct} and \code{summary.bict} rely on the functions \code{\link{inter_stats}}, \code{\link{mod_probs}}, \code{\link{bayespval}}, and (in the case of \code{summary.bict}) \code{\link{total_pop}}. For extra information about the output from these functions, see the associated help files. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ The function \code{summary.bcct} will return an object of class \code{"sbcct"} which is a list with the following components. \item{BETA}{An \code{n.sample} by p matrix containing the sampled values of the log-linear parameters, where p is the number of log-linear parameters in the maximal model. For elements of this matrix which correspond to a log-linear parameter which is not present for the current model a zero is returned.} \item{MODEL}{A vector of length \code{n.sample} giving the samlpled model indicators in hexadecimal format.} \item{SIG}{A vector of length \code{n.sample} giving the sampled values for sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one.} \item{rj_acc}{A binary vector of the same length as the number of reversible jump moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{mh_acc}{A binary vector of the same length as the number of Metropolis-Hastings moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{priornum}{A numeric scalar indicating which prior was used: 1 = \code{"UIP"}, 2 = \code{"SBH"}.} \item{maximal.mod}{An object of class \code{"glm"} giving the fit of the maximal model.} \item{IP}{A p by p matrix giving the inverse of the prior scale matrix for the maximal model.} \item{eta.hat}{A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model.} \item{save}{The argument \code{save}.} \item{name}{The argument \code{name}.} \item{int_stats}{A list which contains the same components as an object of class \code{"interstat"}, i.e. summary statistics for the log-linear parameters, see \code{\link{inter_stats}}.} \item{mod_stats}{A list which contains the same components as an object of class \code{"modprobs"}, i.e. summary statistics for the posterior model probabilities, see \code{\link{mod_probs}}.} \item{pval_stats}{A list which contains the same components as an object of class \code{"pval"}, i.e. summary statistics for the posterior model probabilities, see \code{\link{bayespval}}.} The function \code{summary.bict} will return an object of class \code{"sbict"} which is a list with the same components as an object of class \code{"sbcct"} and the following additional components. \item{Y0}{An \code{n.sample} by k matrix giving the sampled values of the missing and censored cell counts, where k is the total number of missing and censored cell counts.} \item{tpop_stats}{A list which contains the same components as an object of class \code{"totpop"}, i.e. posterior summary statistics for the total population, see \code{\link{total_pop}}.} The functions \code{\link{print.sbcct}} and \code{\link{print.sbict}} will print out the MCMC acceptance rates, posterior summary statistics for the log-linear parameters, the posterior model probabilities, the Bayesian p-value and (in the case of \code{\link{print.sbict}}) posterior summary statistics for the total population size. } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ For examples see the help files for \code{\link{bcct}} and \code{\link{bict}}. } \seealso{ \code{\link{bcct}}, \code{\link{bict}}, \code{\link{accept_rate}}, \code{\link{bayespval}}, \code{\link{inter_stats}}, \code{\link{mod_probs}}, \code{\link{total_pop}}. } conting/man/print.totpop.Rd0000644000175100001440000000143512753071315015462 0ustar hornikusers\name{print.totpop} \alias{print.totpop} \title{ Print \code{totpop} Objects } \description{ This function prints objects of class \code{"totpop"}. } \usage{ \method{print}{totpop}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{ An object of class \code{"totpop"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ The function will print out the posterior mean and the 100*\code{prob.level}\% highest posterior density interval for the total population size. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{total_pop}}.} \seealso{ \code{\link{total_pop}}. }conting/man/index2model.Rd0000644000175100001440000000430312753071315015211 0ustar hornikusers\name{index2model} \alias{index2model} \alias{model2index} \title{ Convert Between Index and Model Indicator } \description{ These functions convert the binary vector, indicating which terms are in the current model, to the hexadecimal model indicator, and vice versa. } \usage{ index2model(index) model2index(model,dig) } \arguments{ \item{index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the model to be converted to a hexadecimal. } \item{dig}{A scalar argument giving the number of columns of the design matrix for the maximal model. } \item{model}{A character string giving a hexadecimal model indicator. } } \value{ \code{index2model} will return a hexadecimal model indicator. \code{model2index} will return a binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the model converted from hexadecimal. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ data(ScotPWID) ## Load the ScotPWID data maximal.mod<-glm(y~(S1+S2+S3+S4+Region+Gender+Age)^2,family=poisson,contrasts=list( S1="contr.sum",S2="contr.sum",S3="contr.sum",S4="contr.sum", Region="contr.sum",Gender="contr.sum",Age="contr.sum"),data=ScotPWID,x=TRUE) ## Fit the maximal model containing all two-way interactions. big.X<-maximal.mod$x ## Set the design matrix under the maximal model index<-formula2index(big.X=big.X, formula=~S1+S2+S3+S4+Region+Gender+Age+S1:S2+S1:Age+S2:Gender+S3:S4+S4:Age, data=ScotPWID) ## Find the index under the model with the following interactions: ## S1:S2 ## S1:Age ## S2:Gender ## S3:S4 ## S4:Age index ## Print the index, will get: # [1] 1 1 1 1 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 modind<-index2model(index) ## Find the hexadecimal model indicator modind ## Print it, will get: #[1] "1ff08a08" ## Convert back to index model2index(model=modind,dig=length(index)) ## Will get: # [1] 1 1 1 1 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 } conting/man/bcct.Rd0000644000175100001440000002675712753071315013733 0ustar hornikusers\name{bcct} \alias{bcct} \alias{bcctu} \alias{bcctsubset} \alias{bcctsubsetu} \title{ Bayesian Analysis of Complete Contingency Tables } \description{ These functions implement a Bayesian analysis of complete contingency tables. This is accomplished using an MCMC algorithm where the null moves are performed using a Metropolis-Hastings algorithm and the between models moves are performed using a reversible jump algorithm. \code{bcct} should be used initially, and \code{bcctu} should be used to do additional MCMC iterations, if required. \code{bcctsubset} and \code{bcctsubsetu} operate on a subset of models. } \usage{ bcct(formula, data, n.sample, prior = "SBH", start.formula = NULL, start.beta = NULL, start.sig = NULL, save = 0, name = NULL, null.move.prob=0.5, a = 0.001, b = 0.001, progress = FALSE) bcctu(object, n.sample, save = NULL, name = NULL, progress = FALSE) bcctsubset(subsetformula, data, n.sample, prior = "SBH", start.formula = NULL, start.beta = NULL, start.sig = NULL, save = 0, name = NULL, null.move.prob=0.5, a = 0.001, b = 0.001, progress = FALSE) bcctsubsetu(object, n.sample, save = NULL, name = NULL, progress = FALSE) } \arguments{ \item{formula}{An object of class \code{"formula"}: a symbolic description of the maximal model.} \item{subsetformula}{A list with elements of class \code{"formula"}: symbolic description of the subset of models. The first element in the list should be the maximal model.} \item{object}{An object of class \code{"bcct"} produced as a previous call to \code{bcct} or \code{bcctu}.} \item{data}{An object of class \code{"data.frame"} (or \code{"table"}) containing the variables in the model. If the model variables are not found in \code{data}, the variables are taken from \code{environment(formula)}, typically the environment from which \code{bcct} is called.} \item{n.sample}{ A numeric scalar giving the number of (additional, in the case of \code{bcctu}) MCMC iterations to perform. } \item{prior}{An optional argument giving the prior to be used in the analysis. It can be one of \code{c("UIP","SBH")}, where \code{"UIP"} = unit information prior; and \code{"SBH"} = Sabanes-Bove & Held prior. The default value is \code{"SBH"}. } \item{start.formula}{ An optional argument giving an object of class \code{"formula"}: a symbolic description of the starting model in the MCMC algorithm. If \code{NULL} (the default) the starting model will be the maximal model. } \item{start.beta}{ An optional argument giving the starting values of the log-linear parameters for the MCMC algorithm. It should be a vector of the same length as the number of log-linear parameters in the starting model implied by the argument \code{start.formula}. If \code{NULL} (the default) the starting value will be the posterior mode under the maximal model. } \item{start.sig}{ An optional argument giving the starting value of sigma^2 (under the Sabanes-Bove & Held prior) for the MCMC algorithm when the argument of prior is \code{"SBH"}. If \code{NULL} (the default) the starting value will be one. } \item{save}{ An optional argument for saving the MCMC output mid-algorithm. For \code{bcct} and \code{bcctsubset}, if positive, the function will save the MCMC output to external text files every \code{save} iterations. If zero (the default), the function will not save the MCMC output to external files. For \code{bcctu} and \code{bcctsubsetu}, if non-\code{NULL}, the function will save the MCMC output to external text files every \code{save} iterations. If \code{NULL} (the default), it will inherit the value of \code{save} from the previous call to \code{bcct} or \code{bcctu}. } \item{name}{ An optional argument giving a prefix to the file name of the external files saved if the argument \code{save} is positive. For \code{bcct}, a value of \code{NULL} means the external files will not have a prefix. For \code{bcctu}, a value of \code{NULL}, means the prefix will be inherited from the previous call to \code{bcct} or \code{bcctu}. } \item{null.move.prob}{ An optional scalar argument giving the probability of performing a null move in the reversible jump algorithm, i.e. proposing a move to the current model. The default value is 0.5. } \item{a}{ The shape hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). The default value is 0.001. A value of \code{a = -1} gives the Gelman prior (Gelman, 2006), i.e. a uniform prior on the standard deviation. } \item{b}{ The scale hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). The default value is 0.001. A value of \code{b = 0} gives the Gelman prior (Gelman, 2006), i.e. a uniform prior on the standard deviation. } \item{progress}{ Logical argument. If \code{TRUE}, then a progress bar will be displayed. The default value is \code{FALSE}. } } \details{ For identifiability, the parameters are constrained. The \code{\link{conting-package}} uses sum-to-zero constraints. See Overstall & King (2014), and the references therein, for more details. The Metropolis-Hastings algorithm employed is the iterated weighted least squares method for generalised linear models (GLMs) proposed by Gamerman (1997). The reversible jump algorithm employed is that orthogonal projections method for GLMs proposed by Forster et al (2012). For details on these methods applied to log-linear models see Overstall & King (2014), and the references therein. For details on the unit information and Sabanes-Bove & Held priors for generalised linear models see Ntzoufras et al (2003) and Sabanes-Bove & Held (2011), respectively. See Overstall & King (2014), and the references therein, for their application to log-linear models and contingency tables. } \value{ The functions will return an object of class \code{"bcct"} which is a list with the following components: \item{BETA}{An \code{n.sample} by p matrix containing the sampled values of the log-linear parameters, where p is the number of log-linear parameters in the maximal model. For elements of this matrix which correspond to a log-linear parameter which is not present for the current model a zero is returned.} \item{MODEL}{A vector of length \code{n.sample} giving the sampled model indicators in hexadecimal format.} \item{SIG}{A vector of length \code{n.sample} giving the sampled values for sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one.} \item{rj_acc}{A binary vector of the same length as the number of reversible jump moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{mh_acc}{A binary vector of the same length as the number of Metropolis-Hastings moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{priornum}{A numeric scalar indicating which prior was used: 1 = \code{"UIP"}, 2 = \code{"SBH"}.} \item{maximal.mod}{An object of class \code{"glm"} giving the fit of the maximal model.} \item{IP}{A p by p matrix giving the inverse of the prior scale matrix for the maximal model.} \item{eta.hat}{A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model.} \item{save}{The argument \code{save}.} \item{name}{The argument \code{name}.} \item{null.move.prob}{ The argument \code{null.move.prob}.} \item{time}{ The total computer time (in seconds) used for the MCMC computations.} \item{a}{ The argument \code{a}. } \item{b}{ The argument \code{b}. } \item{subset.index}{ Model indicators (in hexidecimal format) of the subset of models.} } \references{ Sabanes-Bove, D. & Held, L. (2011) Hyper-g priors for generalized linear models. \emph{Bayesian Analysis}, \bold{6 (3)}, 387--410. Forster, J.J., Gill, R.C. & Overstall, A.M. (2012) Reversible jump methods for generalised linear models and generalised linear mixed models. \emph{Statistics and Computing}, \bold{22 (1)}, 107--120. Gamerman, D. (1997) Sampling from the posterior distribution in generalised linear mixed models. \emph{Statistics and Computing}, \bold{7 (1)}, 57--68. Gelman, A. (2006) Prior distributions for variance parameters in hierarchical models(Comment on Article by Browne and Draper). \emph{Bayesian Analysis}, \bold{1 (3)}, 515--534. Nztoufras, I., Dellaportas, P. & Forster, J.J. (2003) Bayesian variable and link determination for generalised linear models. \emph{Journal of Statistical Planning and Inference}, \bold{111 (1)}, 165--180. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ These functions are wrappers for \code{\link{bcct.fit}}. In Version 1.0 of \code{\link{conting-package}}, note that the default value for \code{prior} was \code{"UIP"}. From Version 1.1 onwards, the default value is \code{"SBH"}. } \seealso{ \code{\link{bcct.fit}}, \code{\link{AOH}}, \code{\link{heart}}. } \examples{ set.seed(1) ## Set seed for reproducibility. data(AOH) ## Load the AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=50,prior="UIP") ## Let the maximal model be the saturated model. Starting from the ## posterior mode of the maximal model do 50 iterations under the unit ## information prior. test1<-bcctu(object=test1,n.sample=50) ## Do another 50 iterations test1 ## Printing out a bcct object produces this simple summary #Number of cells in table = 24 # #Maximal model = #y ~ (alc + hyp + obe)^3 # #Number of log-linear parameters in maximal model = 24 # #Number of MCMC iterations = 100 # #Computer time for MCMC = 00:00:01 # #Prior distribution for log-linear parameters = UIP summary(test1) ## Printing out a summary produces a bit more: #Posterior summary statistics of log-linear parameters: # post_prob post_mean post_var lower_lim upper_lim #(Intercept) 1 2.877924 0.002574 2.78778 2.97185 #alc1 1 -0.060274 0.008845 -0.27772 0.06655 #alc2 1 -0.049450 0.006940 -0.20157 0.11786 #alc3 1 0.073111 0.005673 -0.05929 0.20185 #hyp1 1 -0.544988 0.003485 -0.65004 -0.42620 #obe1 1 -0.054672 0.007812 -0.19623 0.12031 #obe2 1 0.007809 0.004127 -0.11024 0.11783 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively # #Posterior model probabilities: # prob model_formula #1 0.45 ~alc + hyp + obe #2 0.30 ~alc + hyp + obe + hyp:obe #3 0.11 ~alc + hyp + obe + alc:hyp + hyp:obe #4 0.06 ~alc + hyp + obe + alc:hyp + alc:obe + hyp:obe #5 0.05 ~alc + hyp + obe + alc:hyp # #Total number of models visited = 7 # #Under the X2 statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 11.79 20.16 23.98 24.70 28.77 52.40 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 8.18 24.22 31.51 30.12 35.63 42.49 # #Bayesian p-value = 0.28 ## For more examples see Overstall & King (2014). } conting/man/find_cens.Rd0000644000175100001440000000712212753071315014731 0ustar hornikusers\name{find_cens} \alias{find_cens} \title{ Find Censored Cells } \description{ Given all the sources and the censored source of an incomplete contingency table, this function will find the censored cells. } \usage{ find_cens(sources, cens_source, data=NULL, unobs.level = "un", obs.level = "obs") } \arguments{ \item{sources}{ An object of class \code{"formula"}, which details the sources in the incomplete contingency table. } \item{cens_source}{ An object of class \code{"formula"}, which details the source which is subject to censoring in the incomplete contingency table. } \item{data}{ An object of class \code{"data.frame"} (or \code{"table"}) containing the variables in the model. If the model variables are not found in \code{data}, the variables are taken from \code{environment(formula)}, typically the environment from which \code{find_cens} is called. } \item{unobs.level}{ The character string used to label the source level corresponding to not observing the individuals in the cell. } \item{obs.level}{ The character string used to label the source level corresponding to observing the individuals in the cell. } } \details{ Sometimes one of the sources (termed the censored source) used to estimate closed populations observes individuals which are not members of the target population. In this case we assume that when this source observes an individual that has been observed by at least one other source, then it is a member of the target population. However those individuals only observed by the censored source contain a mixture of members of the target and non-target populations. This means that the observed cell count acts as an upper bound on the true cell count. For more details on this approach, see Overstall et al (2014) and Overstall & King (2014). This function identifies the cells which are censored (i.e. correspond to only being observed by the censored source). } \value{ The function will output a numeric vector containing the cell numbers of the censored cells. These are used by the \code{\link{bict}} and \code{\link{bictu}} functions. } \references{ Overstall, A.M., King, R., Bird, S.M., Hutchinson, S.J. & Hay, G. (2014) Incomplete contingency tables with censored cells with application to estimating the number of people who inject drugs in Scotland. \emph{Statistics in Medicine}, \bold{33 (9)}, 1564--1579. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bict}}. } \examples{ data(ScotPWID) ## Load the ScotPWID data. In this dataset, the S4 source corresponding ## to the HCV database is subject to censoring. We use find_cens to find ## the censored cells. find_cens(sources=~S1+S2+S3+S4,cens_source=~S4,data=ScotPWID) ## It will produce the vector with the following elements: ##[1] 9 25 41 57 73 89 105 121 ## Let's look at these cells ScotPWID[find_cens(sources=~S1+S2+S3+S4,cens_source=~S4,data=ScotPWID),] ## It will produce: # y S1 S2 S3 S4 Region Gender Age #9 122 un un un obs GGC Male Young #25 135 un un un obs GGC Male Old #41 48 un un un obs GGC Female Young #57 38 un un un obs GGC Female Old #73 134 un un un obs Rest Male Young #89 104 un un un obs Rest Male Old #105 78 un un un obs Rest Female Young #121 25 un un un obs Rest Female Old }conting/man/print.pval.Rd0000644000175100001440000000160312753071315015074 0ustar hornikusers\name{print.pval} \alias{print.pval} \title{ Print \code{pval} Objects } \description{ This function prints objects of class \code{"pval"}. } \usage{ \method{print}{pval}(x, digits = max(3, getOption("digits") - 3), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ An object of class \code{"pval"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will print out summary statistics for the predictive (\code{T_pred}) and observed (\code{T_obs}) discrepancy statistics. Additionally it will output the associated Bayesian p-value. } \author{ Antony M. Overstall (\email{Antony.Overstall@glasgow.ac.uk}). } \note{For an example see \code{\link{bayespval}}.} \seealso{ \code{\link{bayespval}} } conting/man/plot.totpop.Rd0000644000175100001440000000120712753071315015301 0ustar hornikusers\name{plot.totpop} \alias{plot.totpop} \title{ Plot \code{totpop} Objects } \description{ This function plots objects of class \code{"totpop"}. } \usage{ \method{plot}{totpop}(x, ...) } \arguments{ \item{x}{ An object of class \code{"totpop"}. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will produce a histogram of the MCMC sample from the posterior distribution of the total population size. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{total_pop}}.} \seealso{ \code{\link{total_pop}} }conting/man/bcct.fit.Rd0000644000175100001440000001456512753071315014506 0ustar hornikusers\name{bcct.fit} \alias{bcct.fit} \alias{bcctsubset.fit} \title{ Bayesian Analysis of Complete Contingency Tables } \description{ These functions are the workhorses behind \code{\link{bcct}}, \code{\link{bcctu}}, \code{\link{bcctsubset}}, and \code{\link{bcctsubsetu}}. } \usage{ bcct.fit(priornum, maximal.mod, IP, eta.hat, ini.index, ini.beta, ini.sig, iters, save, name, null.move.prob, a, b, progress) bcctsubset.fit(priornum, subset.index, maximal.mod, IP, eta.hat, ini.index, ini.beta, ini.sig, iters, save, name, null.move.prob, a, b, progress) } \arguments{ \item{priornum}{ A numeric scalar indicating which prior is to be used: 1 = \code{"UIP"}, 2 = \code{"SBH"}. } \item{subset.index}{ A matrix where each row gives the index for each model in the subset of models under consideration. } \item{maximal.mod}{ An object of class \code{"glm"} giving the fit of the maximal model. } \item{IP}{ A p by p matrix giving the inverse of the prior scale matrix for the maximal model. } \item{eta.hat}{ A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model. } \item{ini.index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the initial model. } \item{ini.beta}{ A numeric vector giving the starting values of the log-linear parameters for the MCMC algorithm. } \item{ini.sig}{ A numeric scalar giving the starting value of sigma^2 for the MCMC algorithm. } \item{iters}{ The number of iterations of the MCMC algorithm to peform. } \item{save}{ If positive, the function will save the MCMC output to external text files every \code{save} iterations. If zero , the function will not save the MCMC output to external files. } \item{name}{ A prefix to the external files saved if the argument \code{save} is positive. If \code{NULL}, then the external files will have no prefix. } \item{null.move.prob}{ A scalar argument giving the probability of performing a null move, i.e. proposing a move to the current model. } \item{a}{ The shape hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } \item{b}{ The scale hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } \item{progress}{ Logical argument. If \code{TRUE}, then a progress bar will be displayed. } } \value{ The function will return a list with the following components: \item{BETA}{An \code{iters} by p matrix containing the sampled values of the log-linear parameters, where p is the number of log-linear parameters in the maximal model. For elements of this matrix which correspond to a log-linear parameter which is not present for the current model a zero is returned.} \item{MODEL}{A vector of length \code{iters} giving the sampled model indicators in hexadecimal form.} \item{SIG}{A vector of length \code{iters} giving the sampled values for sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one.} \item{rj_acc}{A binary vector of the same length as the number of reversible jump moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{mh_acc}{A binary vector of the same length as the number of Metropolis-Hastings moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \seealso{ \code{\link{bcct}}, \code{\link{bcctu}}. } \examples{ data(AOH) ## Load the AOH data. maximal.mod<-glm(formula=y~(alc+hyp+obe)^3,data=AOH,x=TRUE,y=TRUE, contrasts=list(alc="contr.sum",hyp="contr.sum",obe="contr.sum")) ## Set up the maximal model which in this case is the saturated ## model. curr.index<-formula2index(big.X=maximal.mod$x,formula=y~alc+hyp+obe+hyp:obe,data=AOH) ## Set up the binary vector for the model containing all main effects and the ## hyp:obe interaction. IP<-t(maximal.mod$x)\%*\%maximal.mod$x/length(maximal.mod$y) IP[,1]<-0 IP[1,]<-0 ## Set up the inverse scale matrix for the prior distribution under ## the maximal model. bmod<-beta_mode(X=maximal.mod$x,prior="UIP",y=maximal.mod$y,IP=IP) ## Find the posterior mode under the maximal model eta.hat<-as.vector(maximal.mod$x\%*\%bmod) ## Find the posterior mode of the linear predictor ## under the maximal model. set.seed(1) ## Set seed for reproducibility test1<-bcct.fit(priornum=1, maximal.mod=maximal.mod, IP=IP, eta.hat=eta.hat, ini.index=curr.index, ini.beta=bmod[curr.index==1], ini.sig=1, iters=5, save=0, name=NULL,null.move.prob=0.5, a=0.001, b=0.001, progress=TRUE) ## Run for 5 iterations starting at model defined by curr.index. test1$MODEL ## Look at sampled model indicators. Should be: ## [1] "fe00c0" "fe0000" "fe0000" "fe0000" "fe0000" model2index(test1$MODEL,dig=24) ## Convert these to binary indicators of the log-linear parameters. ## Will get: # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] #fe00c0 1 1 1 1 1 1 1 0 0 0 0 0 0 #fe0000 1 1 1 1 1 1 1 0 0 0 0 0 0 #fe0000 1 1 1 1 1 1 1 0 0 0 0 0 0 #fe0000 1 1 1 1 1 1 1 0 0 0 0 0 0 #fe0000 1 1 1 1 1 1 1 0 0 0 0 0 0 # [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] #fe00c0 0 0 0 1 1 0 0 0 0 0 0 #fe0000 0 0 0 0 0 0 0 0 0 0 0 #fe0000 0 0 0 0 0 0 0 0 0 0 0 #fe0000 0 0 0 0 0 0 0 0 0 0 0 #fe0000 0 0 0 0 0 0 0 0 0 0 0 ## See how the hyp:obe interactions in columns 17 and 18 gets dropped after ## the 1st iteration. } conting/man/heart.Rd0000644000175100001440000000375012753071315014107 0ustar hornikusers\name{heart} \alias{heart} \docType{data} \title{ Risk Factors for Coronary Heart Disease: A Complete 2^6 Table } \description{ 1841 men are cross-classified according to six risk factors for coronary heart disease: smoking (\code{A}; 2 levels), strenuous mental work (\code{B}; 2 levels), strenuous physical work (\code{C}; 2 levels), systolic blood pressure (\code{D}; 2 levels), ratio of alpha and beta lipoproteins (\code{E}; 2 levels) and family anamnesis of coronary heart disease (\code{F}; 2 levels). } \usage{data(heart)} \format{ A \code{"data.frame"} with 64 observations on the following 7 variables. \describe{ \item{\code{y}}{ Counts in each cell of table. } \item{\code{A}}{ A factor with levels \code{yes} \code{no} indicating smoking status. } \item{\code{B}}{ A factor with levels \code{yes} \code{no} indicating strenuous mental work. } \item{\code{C}}{ A factor with levels \code{yes} \code{no} indicating strenuous physical work. } \item{\code{D}}{ A factor with levels \code{yes} \code{no} indicating systolic blood pressure. } \item{\code{E}}{ A factor with levels \code{yes} \code{no} indicating high ratio of alpha and beta lipoproteins. } \item{\code{F}}{ A factor with levels \code{yes} \code{no} indicating a family anamnesis of coronary heart disease. } }} \details{ For more details on this data see Edwards & Havranek (1985). For details on the function \code{\link{bcct}} applied to this data, see Overstall & King (2014). } \source{ Edwards, D. & Havranek, T. (1985) A fast procedure for model search in multidimensional contingency tables. \emph{Biometrika}, \bold{72 (2)}, 339--351. } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \examples{ data(heart) summary(heart) } \keyword{datasets} conting/man/spina.Rd0000644000175100001440000000403112753071315014107 0ustar hornikusers\name{spina} \alias{spina} \docType{data} \title{ Persons born with Spina Bifida: An Incomplete 2 * 2 * 2 * 3 Table } \description{ 621 people born with Spina Bifida (a congenital disorder) in the state of New York between 1969 and 1974 are observed by three sources: birth certificates (\code{S1}); death certificates (\code{S2}); and medical rehabilitation lists (\code{S3}). The people are also cross-classified according to their ethnicity (\code{eth}; 3 levels). } \usage{data(spina)} \format{ A \code{"data.frame"} with 24 observations on the following 5 variables. \describe{ \item{\code{y}}{ Counts in each cell of the table with NAs for the cells corresponding to not being observed by any of the sources. } \item{\code{S1}}{ A factor with levels \code{un} \code{obs} indicating whether the birth certificate source observed the person. } \item{\code{S2}}{ A factor with levels \code{un} \code{obs} indicating whether the death certificate source observed the person. } \item{\code{S3}}{ A factor with levels \code{un} \code{obs} indicating whether the medical rehabilitation source observed the person. } \item{\code{eth}}{ A factor with levels \code{afro-american} \code{caucasian} code{other} indicating the ethnicity of the person (\code{afro-american} = Afro-American, \code{causcasian} = Caucasian, \code{other} = Other). } } } \details{ See Madigan & York (1997), and the references therin, for more details on the study. For details on the function \code{\link{bict}} applied to this data, see Overstall & King (2014). } \source{ Madigan, D. & York, J.C. (1997) Methods for Estimation of the Size of a Closed Population. \emph{Biometrika}, \bold{84 (1)}, 19--31. } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \examples{ data(spina) summary(spina) } \keyword{datasets}