conting/0000755000176000001440000000000012502103670011737 5ustar ripleyusersconting/inst/0000755000176000001440000000000012502056666012730 5ustar ripleyusersconting/inst/CITATION0000644000176000001440000000144112502056666014065 0ustar ripleyuserscitHeader("To cite conting in publications use:") citEntry(entry = "Article", title = "{conting}: An {R} Package for Bayesian Analysis of Complete and Incomplete Contingency Tables", author = personList(as.person("Antony M. Overstall"), as.person("Ruth King")), journal = "Journal of Statistical Software", year = "2014", volume = "58", number = "7", pages = "1--27", url = "http://www.jstatsoft.org/v58/i07/", textVersion = paste("Antony M. Overstall, Ruth King (2014).", "conting: An R Package for Bayesian Analysis of Complete and Incomplete Contingency Tables.", "Journal of Statistical Software, 58(7), 1-27.", "URL http://www.jstatsoft.org/v58/i07/.") ) conting/NAMESPACE0000644000176000001440000000101312502056666013165 0ustar ripleyusersimport(mvtnorm, tseries, coda) importFrom(gtools, combinations) importFrom(BMS, bin2hex) importFrom(BMS, hex2bin) exportPattern("^[[:alpha:]]+") S3method(plot, pval) S3method(plot, totpop) S3method(print, acceptrate) S3method(print, bcct) S3method(print, bict) S3method(print, interprob) S3method(print, interstat) S3method(print, modprobs) S3method(print, pval) S3method(print, sbcct) S3method(print, sbict) S3method(print, submod) S3method(print, totpop) S3method(summary, bcct) S3method(summary, bict)conting/data/0000755000176000001440000000000012502056666012664 5ustar ripleyusersconting/data/AOH.rda0000644000176000001440000000047412502056666013770 0ustar ripleyusersQMK@FEC""h_`K*=x<^X=Bm%+k,_W://޿_0S R{1:mvQg(Ra0 Wg;莢hi)m1ds˧IJ?MUiiGqM8N~jsE_*> BMywR49ug{Nͽv}zc= {'a%;~ KxN8nQ;_ӃqqS~{qrL^0 ֶcb1 Qconting/data/spina.rda0000644000176000001440000000047512502056666014474 0ustar ripleyusers r0b```b`f@& Y 2A b@}[sB'@i/>\?L^ J+Ṕ~gQfb*Q-',5BDJ,bOI,逛\_dC hvf"ELhByZ;13~\fK8K3aML+MM-L dԔ4X 6,cX襖dY_3hL @/p+%$Q/:윏conting/data/heart.rda0000644000176000001440000000060312502056666014456 0ustar ripleyusers r0b```b`f@& Y3RJv b70pHPЎ :BC=U'+Ai(ep@$P~@iK(߀j!7`@p0 fn =A|SmWCZPZ͟pv T}> (W4`A U=0=B3Q)l?JfaI-K)CD+SL|X 7'-1$GŐ蘢BH3'"la'ȥiȩdi20upZ qT NԚ+/17VsC+a G p1\` W l΢r=8dj?Ù\)%ziE@-2NF conting/R/0000755000176000001440000000000012502056666012154 5ustar ripleyusersconting/R/summary.bcct.R0000644000176000001440000000246412502056666014714 0ustar ripleyuserssummary.bcct <- function(object,n.burnin=0,thin=1,cutoff=0.75,statistic="X2",best=NULL,scale=0.1,prob.level=0.95,...){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(cutoff<0 | cutoff>1){ stop("cutoff is a probability and should be between 0 and 1")} if(statistic!="X2" & statistic!="FreemanTukey" & statistic!="deviance"){ stop("statistic not found")} if(scale<0 | scale>1){ stop("scale should be between 0 and 1")} if(!is.null(best)){ if(best<=0){ stop("best should be positive")}} if(prob.level<0 | prob.level>1){ stop("prob.level is a probability and should be between 0 and 1")} is1<-inter_stats(object,n.burnin=n.burnin,cutoff=cutoff,thin=thin,prob.level=prob.level) is2<-mod_probs(object,n.burnin=n.burnin,scale=scale,best=best,thin=thin) is3<-bayespval(object,n.burnin=n.burnin,thin=thin,statistic=statistic) est<-list(BETA=object$BETA,MODEL=object$MODEL,SIG=object$SIG,rj_acc=object$rj_acc,mh_acc=object$mh_acc,priornum=object$priornum,maximal.mod=object$maximal.mod,IP=object$IP,eta.hat=object$eta.hat,save=object$save,name=object$name,int_stats=is1,mod_stats=is2,pval_stats=is3) class(est)<-"sbcct" est } conting/R/iwls_mh.R0000644000176000001440000000336412502056666013747 0ustar ripleyusersiwls_mh <- function(curr.y,curr.X,curr.beta,iprior.var){ curr.LP<-as.vector(curr.X%*%matrix(curr.beta,ncol=1)) ## current linear predictor curr.w<-exp(curr.LP) ## current elements of weight matrix icurr.C<-iprior.var+crossprod(x=curr.X*curr.w,y=curr.X) ## inverse proposal variance #curr.C<-solve(icurr.C) curr.C<-chol2inv(chol(icurr.C)) ## proposal variance curr.z<-curr.LP+(curr.y-curr.w)/curr.w ## current value of working vector curr.m<-as.vector(tcrossprod(x=curr.C,y=curr.X*curr.w)%*%matrix(curr.z,ncol=1)) ## proposal mean prop.beta<-as.vector(rmvnorm(n=1,mean=curr.m,sigma=curr.C)) ## proposal prop.LP<-as.vector(curr.X%*%matrix(prop.beta,ncol=1)) ## proposed linear predictor prop.w<-exp(prop.LP) ## proposed elememts of weight matrix iprop.C<-iprior.var+crossprod(x=curr.X*prop.w,y=curr.X) ## inverse current variance #prop.C<-solve(iprop.C) ## current variance prop.C<-chol2inv(chol(iprop.C)) ## current variance prop.z<-prop.LP+(curr.y-prop.w)/prop.w ## proposed working vector prop.m<-as.vector(tcrossprod(x=prop.C,y=curr.X*prop.w)%*%matrix(prop.z,ncol=1))##current mean top<-sum(curr.y*prop.LP)-sum(prop.w)-0.5*as.vector(matrix(prop.beta[-1],nrow=1)%*%iprior.var[-1,-1]%*%matrix(prop.beta[-1],ncol=1))+dmvnorm(x=curr.beta,mean=prop.m,sigma=prop.C,log=TRUE) ## log numerator of acceptance probability bot<-sum(curr.y*curr.LP)-sum(curr.w)-0.5*as.vector(matrix(curr.beta[-1],nrow=1)%*%iprior.var[-1,-1]%*%matrix(curr.beta[-1],ncol=1))+dmvnorm(x=prop.beta,mean=curr.m,sigma=curr.C,log=TRUE) ## log denominatrot of acceptance probability prob<-exp(top-bot) ## acceptance probability if(prob>=runif(1)){ ## accept or reject new.beta<-prop.beta} else{ new.beta<-curr.beta} new.beta} conting/R/print.sbcct.R0000644000176000001440000000031712502056666014531 0ustar ripleyusersprint.sbcct <- function(x,...,digits = max(3, getOption("digits") - 3)){ print(x$int_stats,digits=digits) cat("\n") print(x$mod_stats,digits=digits) cat("\n") print(x$pval_stats,digits=digits) } conting/R/total_pop.R0000644000176000001440000000175012502056666014303 0ustar ripleyuserstotal_pop <- function(object,n.burnin=0,thin=1,prob.level=0.95){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(prob.level<0 | prob.level>1){ stop("prob.level is a probability and should be between 0 and 1")} missing<-c(object$missing1,object$missing2) obs.z<-sum(object$maximal.mod$y[-missing]) if(is.matrix(object$Y0)){ if(n.burnin>0){ YY0<-matrix(object$Y0[-(1:n.burnin),],ncol=dim(object$Y0)[2])} else{ YY0<-object$Y0} TOT<-apply(YY0,1,sum)+obs.z} else{ if(n.burnin>0){ YY0<-object$Y0[-(1:n.burnin)]} else{ YY0<-object$Y0} TOT<-YY0+obs.z} n.sample<-length(TOT) every<-seq(from=thin,to=n.sample,by=thin) TOT<-TOT[every] int<-HPDinterval(mcmc(TOT),prob=prob.level) est<-list(TOT=TOT,int=int,meanTOT=mean(TOT),thin=thin,prob.level=prob.level) class(est)<-"totpop" est} conting/R/bict.R0000644000176000001440000001105312502056666013220 0ustar ripleyusersbict <- function(formula,data=NULL,n.sample,prior="SBH",cens=NULL,start.formula=NULL,start.beta=NULL,start.sig=NULL,start.y0=NULL,save=0,name=NULL,null.move.prob=0.5,a=0.001,b=0.001,progress=FALSE){ if(n.sample<=0){ stop("n.sample must be positive")} if(prior!="UIP" & prior!="SBH"){ stop("prior not found")} if(save<0){ stop("save must be non-negative")} if(null.move.prob<0 | null.move.prob>1){ stop("null.move.prob is a probability and should be between 0 and 1")} if(a<0 & a!=(-1)){ stop("a and b must be non-negative")} if(b<0){ stop("a and b must be non-negative")} ptm<-(proc.time())[3] if(!is.null(data)){ if(attributes(data)$class=="table"){ data<-data.frame(data)}} if(save>0){ if(is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt" name_Y0<-"Y0.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="") name_Y0<-paste(name,"Y0.txt",sep="")} if(file.exists(name_BETA)){stop(paste("A file named ",name_BETA," already exists in the working directory",sep=""))} if(file.exists(name_MODEL)){stop(paste("A file named ",name_MODEL," already exists in the working directory",sep=""))} if(file.exists(name_SIG)){stop(paste("A file named ",name_SIG," already exists in the working directory",sep=""))} if(file.exists(name_Y0)){stop(paste("A file named ",name_Y0," already exists in the working directory",sep=""))} if(file.exists(name_RJACC)){stop(paste("A file named ",name_RJACC," already exists in the working directory",sep=""))} if(file.exists(name_MHACC)){stop(paste("A file named ",name_MHACC," already exists in the working directory",sep=""))} } priortypes<-c("UIP","SBH") priornum<-c(1,2)[prior==priortypes] options(contrasts=c("contr.sum","contr.poly"),warn=-1) if(!is.null(data)){ maximal.mod<-glm(formula=formula,data=data,method="model.frame",na.action=na.pass,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} else{ maximal.mod<-glm(formula=formula,method="model.frame",na.action=na.pass,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} options(contrasts=c("contr.treatment","contr.poly"),warn=0) missing1<-(1:length(maximal.mod[,1]))[is.na(maximal.mod[,1])] data<-maximal.mod data[missing1,1]<-0 if(!is.null(cens)){ missing2<-cens} else{ missing2<-c()} missing<-c(missing1,missing2) missing_details<-data[missing1,-1] censored_details<-data[missing2,-1] options(contrasts=c("contr.sum","contr.poly"),warn=-1) if(!is.null(data)){ maximal.mod<-glm(formula=formula,data=data,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} else{ maximal.mod<-glm(formula=formula,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} options(contrasts=c("contr.treatment","contr.poly"),warn=0) big.X<-maximal.mod$x y<-maximal.mod$y n<-dim(big.X)[1] IP<-t(big.X)%*%big.X/n IP[,1]<-0 IP[1,0]<-0 bmod<-beta_mode(X=big.X[-missing,],y=y[-missing],prior=prior,IP=IP,a=a,b=b) eta.hat<-as.vector(big.X%*%matrix(bmod,ncol=1)) if(is.null(start.formula)){ start.index<-rep(1,dim(big.X)[2])} else{ start.index<-formula2index(big.X=big.X,formula=start.formula,data=data)} if(is.null(start.beta)){ start.beta<-bmod[start.index==1]} if(is.null(start.sig)){ start.sig<-1} if(is.null(start.y0)){ start.y0<-round(exp(eta.hat[missing]))} start.mod<-index2model(start.index) runit<-bict.fit(priornum=priornum,missing1=missing1,missing2=missing2,maximal.mod=maximal.mod, IP=IP,eta.hat=eta.hat,ini.index=start.index,ini.beta=start.beta,ini.sig=start.sig,ini.y0=start.y0, iters=n.sample,save=save,name=name,null.move.prob=null.move.prob,a=a,b=b,progress=progress) BETA<-runit$BETA MODEL<-runit$MODEL SIG<-runit$SIG Y0<-runit$Y0 rj_acc<-runit$rj_acc mh_acc<-runit$mh_acc if(save>0){ rj_acc<-read.matrix(file=name_RJACC,header=FALSE) mh_acc<-read.matrix(file=name_MHACC,header=FALSE) BETA<-read.matrix(file=name_BETA,header=FALSE) SIG<-read.matrix(file=name_SIG,header=FALSE) Y0<-matrix(read.matrix(file=name_Y0,header=FALSE),nrow=length(SIG)) MODEL<-as.character(read.table(file=name_MODEL,header=FALSE)[,1])} time<-(proc.time())[3]-ptm est<-list(BETA=BETA,MODEL=MODEL,SIG=SIG,Y0=Y0,missing1=missing1,missing2=missing2,missing_details=missing_details,censored_details=censored_details,rj_acc=rj_acc,mh_acc=mh_acc,priornum=priornum,maximal.mod=maximal.mod,IP=IP,eta.hat=eta.hat,save=save,name=name,null.move.prob=null.move.prob,time=time,a=a,b=b) class(est)<-"bict" est} conting/R/drop_term.R0000644000176000001440000000304712502056666014276 0ustar ripleyusersdrop_term <- function(curr.index,data,maximal.mod){ big.X<-maximal.mod$x ## maximal design matrix full.terms<-attributes(big.X)$assign ## terms in big.X uni<-unique(full.terms[curr.index==1]) ## terms in current model uni<-uni[uni>0] ## non-interecpt terms in current model term.labels<-attr(summary(maximal.mod)$terms,"term.labels")[uni] ## term labels in current model term.order<-attr(summary(maximal.mod)$terms,"order")[uni] ## order of terms in current model term.factors<-attr(summary(maximal.mod)$terms,"factors")[,uni] ## constituent main effect terms of terms in ## current model K<-length(term.labels[term.order==1]) ## number of main effect terms can_drop<-c() ## vector containing terms we can drop if(max(term.order)>1){ ## if we are not in independence model can_drop<-term.labels[term.order==max(term.order)] ## we can drop all highest order terms candos<-(1:length(term.labels[term.order0 & is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="")} if(object$save==0 & save>0){ if(file.exists(name_BETA)){stop(paste("A file named ",name_BETA," already exists in the working directory",sep=""))} if(file.exists(name_MODEL)){stop(paste("A file named ",name_MODEL," already exists in the working directory",sep=""))} if(file.exists(name_SIG)){stop(paste("A file named ",name_SIG," already exists in the working directory",sep=""))} if(file.exists(name_RJACC)){stop(paste("A file named ",name_RJACC," already exists in the working directory",sep=""))} if(file.exists(name_MHACC)){stop(paste("A file named ",name_MHACC," already exists in the working directory",sep=""))} write.table(file=name_BETA,x=object$BETA,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MODEL,x=object$MODEL,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_SIG,x=object$SIG,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_RJACC,x=object$rj_acc,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MHACC,x=object$mh_acc,row.names=FALSE,col.names=FALSE,append=TRUE) } start.sig<-object$SIG[length(object$SIG)] start.index<-model2index(object$MODEL[length(object$SIG)],dig=dim(object$BETA)[2]) start.beta<-object$BETA[dim(object$BETA)[1],start.index==1] runit<-bcct.fit(priornum=object$priornum,maximal.mod=object$maximal.mod,IP=object$IP,eta.hat=object$eta.hat, ini.index=start.index,ini.beta=start.beta,ini.sig=start.sig,iters=n.sample,save=save,name=name,null.move.prob=object$null.move.prob, a=object$a,b=object$b,progress=progress) BETA<-runit$BETA MODEL<-runit$MODEL SIG<-runit$SIG rj_acc<-runit$rj_acc mh_acc<-runit$mh_acc if(save>0){ rj_acc<-read.matrix(file=name_RJACC,header=FALSE) mh_acc<-read.matrix(file=name_MHACC,header=FALSE) BETA<-read.matrix(file=name_BETA,header=FALSE) SIG<-read.matrix(file=name_SIG,header=FALSE) MODEL<-as.character(read.table(file=name_MODEL,header=FALSE)[,1])} if(save==0){ rj_acc<-c(object$rj_acc,rj_acc) mh_acc<-c(object$mh_acc,mh_acc) BETA<-rbind(object$BETA,BETA) SIG<-c(object$SIG,SIG) MODEL<-c(object$MODEL,MODEL)} ptm<-(proc.time())[3]-ptm time<-object$time+ptm est<-list(BETA=BETA,MODEL=MODEL,SIG=SIG,rj_acc=rj_acc,mh_acc=mh_acc,priornum=object$priornum,maximal.mod=object$maximal.mod,IP=object$IP,eta.hat=object$eta.hat,save=save,name=name,null.move.prob=object$null.move.prob,time=time,a=object$a,b=object$b) class(est)<-"bcct" est} conting/R/beta_mode.R0000644000176000001440000000516712502056666014227 0ustar ripleyusersbeta_mode <- function(X,prior="SBH",y,IP,a=0.001,b=0.001){ Xt<-t(X) ## transpose of X sy<-log(ifelse(y>0,y,1/6)) ## starting values sb<-coef(lm(sy~X-1)) ## "" ###################### Log likelihoods ######################## loglik<-function(beta){ eta<-as.vector(X%*%matrix(beta,ncol=1)) sum(dpois(x=y,lambda=exp(eta),log=TRUE))} ## log-likelihood dloglik<-function(beta){ eta<-as.vector(X%*%matrix(beta,ncol=1)) as.vector(Xt%*%matrix(y-exp(eta),ncol=1))} ## gradient of log-likelihood d2loglik<-function(beta){ eta<-as.vector(X%*%matrix(beta,ncol=1)) w<-exp(eta) -crossprod(x=X*w,y=X)} ## hessian of log-likelihood ################################################################ priortypes<-c("UIP","SBH") priornum<-c(1,2)[prior==priortypes] ## which prior - defines prior part of posterior ################# UIP Priors ################################### if(priornum==1){ iSig<-IP[-1,-1] ## inverse prior variance #Sig<-solve(iSig) ## prior variance Sig<-chol2inv(chol(iSig)) ## prior variance pp<-dim(X)[2]-1 ## number of parameters - 1 prior<-function(beta){ dmvnorm(x=beta[-1],mean=rep(0,pp),sigma=Sig,log=TRUE)} ## prior log pdf dprior<-function(beta){ -c(0,as.vector(iSig%*%matrix(beta[-1],ncol=1)))} ## gradient of prior log pdf d2prior<-function(beta){ -cbind(0,rbind(0,iSig))}} ## hessian of prior log pdf ################# SBH Priors ################################### if(priornum==2){ ## parameters from inverse gamma #a<-0.001 #b<-0.001 iSig<-IP[-1,-1] ## inverse scale matrix for t-distribution pp<-dim(X)[2]-1 prior<-function(beta){ bsb<-as.vector(matrix(beta[-1],nrow=1)%*%iSig%*%matrix(beta[-1],ncol=1)) -0.5*(a+pp)*log(b+bsb)} ## prior log pdf dprior<-function(beta){ sb<-as.vector(iSig%*%matrix(beta[-1],ncol=1)) bsb<-sum(beta[-1]*sb) -c(0,(a+pp)*sb/(b+bsb))} ## gradient of prior log pdf d2prior<-function(beta){ sb<-as.vector(iSig%*%matrix(beta[-1],ncol=1)) bsb<-sum(beta[-1]*sb) ## hessian of prior log pdf -cbind(0,rbind(0,(a+pp)*((iSig/(b+bsb))-2*((matrix(sb,ncol=1)%*%matrix(sb,nrow=1))/((b+bsb)^2)))))}} ################################################################## mlogpost<-function(beta){ -loglik(beta)-prior(beta)} ## minus log posterior mdlogpost<-function(beta){ -dloglik(beta)-dprior(beta)} ## gradient of minus log posterior md2logpost<-function(beta){ -d2loglik(beta)-d2prior(beta)} ## hessian of minus log posterior opt<-nlminb(start=sb,objective=mlogpost,gradient=mdlogpost,hessian=md2logpost) ## optimisation! mbeta<-opt$par mbeta} conting/R/inter_probs.R0000644000176000001440000000221612502056666014626 0ustar ripleyusersinter_probs <- function(object,cutoff=0.75,n.burnin=0,thin=1){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(cutoff<0 | cutoff>1){ stop("cutoff is a probability and should be between 0 and 1")} term.labels<-c("(Intercept)",attr(summary(object$maximal.mod)$terms,"term.labels")) term.numbers<-attributes(object$maximal.mod$x)$assign small<-function(i){ min((1:length(term.numbers))[term.numbers==i])} ids<-sapply(X=0:max(term.numbers),FUN=small) if(n.burnin>0){ innerBETA<-object$BETA[-(1:n.burnin),] innerMODEL<-object$MODEL[-(1:n.burnin)]} else{ innerBETA<-object$BETA innerMODEL<-object$MODEL} n.sample<-length(innerMODEL) every<-seq(from=thin,to=n.sample,by=thin) innerBETA<-innerBETA[every,] innerMODEL<-innerMODEL[every] INDO<-model2index(innerMODEL,dig=dim(innerBETA)[2]) post_prob<-apply(INDO[,ids],2,mean) est<-list(term=term.labels[post_prob>=cutoff],prob=post_prob[post_prob>=cutoff],thin=thin) class(est)<-"interprob" est } conting/R/prop_mod.R0000644000176000001440000000242312502056666014117 0ustar ripleyusersprop_mod <- function(curr.index,data,maximal.mod,null.move.prob=0.5){ stay<-runif(1) if(stay>null.move.prob){ big.X<-maximal.mod$x full.labels<-attr(summary(maximal.mod)$terms,"term.labels") full.order<-attr(summary(maximal.mod)$terms,"order") full.factors<-attr(summary(maximal.mod)$terms,"factors") full.terms<-attributes(big.X)$assign dr<-drop_term(curr.index,data,maximal.mod) ad<-add_term(curr.index,data,maximal.mod) terms<-c(dr,ad) types<-rep(c(1,2),c(length(dr),length(ad))) total.choices<-length(terms) if(length(terms)>0){ choose<-sample(x=1:total.choices,size=1) termo<-terms[choose] typo<-types[choose] new.index<-curr.index if(typo==1){ #drop move new.index[full.terms==(1:length(full.labels))[full.labels==termo]]<-0 result<-list(new.index=new.index,type="drop",total.choices=total.choices,null.move.prob=null.move.prob)} if(typo==2){ #add move new.index[full.terms==(1:length(full.labels))[full.labels==termo]]<-1 result<-list(new.index=new.index,type="add",total.choices=total.choices,null.move.prob=null.move.prob)}} else{ result<-list(new.index=curr.index,type="null",total.choices=0,null.move.prob=null.move.prob)}} else{ result<-list(new.index=curr.index,type="null",total.choices=0,null.move.prob=null.move.prob)} result} conting/R/accept_rate.R0000644000176000001440000000024312502056666014550 0ustar ripleyusersaccept_rate <- function(object){ rj<-object$rj_acc mh<-object$mh_acc est<-list(rj_ar=100*mean(rj),mh_ar=100*mean(mh)) class(est)<-"acceptrate" est} conting/R/print.interstat.R0000644000176000001440000000075312502056666015454 0ustar ripleyusersprint.interstat <- function(x,digits=max(3, getOption("digits") - 3),...){ df<-data.frame(post_prob=x$prob,post_mean=x$post_mean, post_var=x$post_var,lower_lim=x$lower,upper_lim=x$upper) nam<-x$term row.names(df)<-nam lev<-100*x$prob.level cat("Posterior summary statistics of log-linear parameters:\n") print(df,digits=digits) cat("NB: lower_lim and upper_lim refer to the lower and upper values of the\n") cat(lev,"% highest posterior density intervals, respectively\n")} conting/R/summary.bict.R0000644000176000001440000000263212502056666014717 0ustar ripleyuserssummary.bict <- function(object,n.burnin=0,thin=1,cutoff=0.75,statistic="X2",best=NULL,scale=0.1,prob.level=0.95,...){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(cutoff<0 | cutoff>1){ stop("cutoff is a probability and should be between 0 and 1")} if(statistic!="X2" & statistic!="FreemanTukey" & statistic!="deviance"){ stop("statistic not found")} if(scale<0 | scale>1){ stop("scale should be between 0 and 1")} if(!is.null(best)){ if(best<=0){ stop("best should be positive")}} if(prob.level<0 | prob.level>1){ stop("prob.level is a probability and should be between 0 and 1")} is1<-inter_stats(object,n.burnin=n.burnin,cutoff=cutoff,thin=thin,prob.level=prob.level) is2<-mod_probs(object,n.burnin=n.burnin,scale=scale,best=best,thin=thin) is3<-total_pop(object,n.burnin=n.burnin,thin=thin,prob.level=prob.level) is4<-bayespval(object,n.burnin=n.burnin,thin=thin,statistic=statistic) est<-list(BETA=object$BETA,MODEL=object$MODEL,SIG=object$SIG,Y0=object$Y0,rj_acc=object$rj_acc,mh_acc=object$mh_acc,priornum=object$priornum,maximal.mod=object$maximal.mod,IP=object$IP,eta.hat=object$eta.hat,save=object$save,name=object$name,int_stats=is1,mod_stats=is2,tpop_stats=is3,pval_stats=is4) class(est)<-"sbict" est } conting/R/bcct.R0000644000176000001440000000753612502056666013225 0ustar ripleyusersbcct <- function(formula,data=NULL,n.sample,prior="SBH",start.formula=NULL,start.beta=NULL,start.sig=NULL,save=0,name=NULL,null.move.prob=0.5,a=0.001,b=0.001,progress=FALSE){ if(n.sample<=0){ stop("n.sample must be positive")} if(prior!="UIP" & prior!="SBH"){ stop("prior not found")} if(save<0){ stop("save must be non-negative")} if(null.move.prob<0 | null.move.prob>1){ stop("null.move.prob is a probability and should be between 0 and 1")} if(a<0 & a!=(-1)){ stop("a and b must be non-negative")} if(b<0){ stop("a and b must be non-negative")} ptm<-(proc.time())[3] if(!is.null(data)){ if(attributes(data)$class=="table"){ data<-data.frame(data)}} if(save>0){ if(is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="")} ## names for files if we are saving them if(file.exists(name_BETA)){stop(paste("A file named ",name_BETA," already exists in the working directory",sep=""))} if(file.exists(name_MODEL)){stop(paste("A file named ",name_MODEL," already exists in the working directory",sep=""))} if(file.exists(name_SIG)){stop(paste("A file named ",name_SIG," already exists in the working directory",sep=""))} if(file.exists(name_RJACC)){stop(paste("A file named ",name_RJACC," already exists in the working directory",sep=""))} if(file.exists(name_MHACC)){stop(paste("A file named ",name_MHACC," already exists in the working directory",sep=""))} } ## error if files already exist priortypes<-c("UIP","SBH") priornum<-c(1,2)[prior==priortypes] ## which prior? options(contrasts=c("contr.sum","contr.poly"),warn=-1) if(!is.null(data)){ maximal.mod<-glm(formula=formula,data=data,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} else{ maximal.mod<-glm(formula=formula,family=poisson,control=list(maxit=1),x=TRUE,y=TRUE)} options(contrasts=c("contr.treatment","contr.poly"),warn=0) ## get maximal design matrix big.X<-maximal.mod$x ## maximal design matrix y<-maximal.mod$y ## responses n<-dim(big.X)[1] ## sample size (number of cells) IP<-t(big.X)%*%big.X/n IP[,1]<-0 IP[1,0]<-0 ## inverse prior scale matrix bmod<-beta_mode(X=big.X,y=y,prior=prior,IP=IP,a=a,b=b) ## find posterior mode eta.hat<-as.vector(big.X%*%matrix(bmod,ncol=1)) ## calculate eta.hat for RJ if(is.null(start.formula)){ start.index<-rep(1,dim(big.X)[2])} else{ start.index<-formula2index(big.X=big.X,formula=start.formula,data=data)} ## set up starting values for various parameters if(is.null(start.beta)){ start.beta<-bmod[start.index==1]} if(is.null(start.sig)){ start.sig<-1} start.mod<-index2model(start.index) runit<-bcct.fit(priornum=priornum,maximal.mod=maximal.mod,IP=IP,eta.hat=eta.hat,ini.index=start.index,ini.beta=start.beta,ini.sig=start.sig,iters=n.sample,save=save,name=name,null.move.prob=null.move.prob,a=a,b=b,progress=progress) ## do some iterations of data augmentation algorithm BETA<-runit$BETA MODEL<-runit$MODEL SIG<-runit$SIG rj_acc<-runit$rj_acc mh_acc<-runit$mh_acc ## list results if(save>0){ rj_acc<-read.matrix(file=name_RJACC,header=FALSE) mh_acc<-read.matrix(file=name_MHACC,header=FALSE) BETA<-read.matrix(file=name_BETA,header=FALSE) SIG<-read.matrix(file=name_SIG,header=FALSE) MODEL<-as.character(read.table(file=name_MODEL,header=FALSE)[,1])} time<-(proc.time())[3]-ptm est<-list(BETA=BETA,MODEL=MODEL,SIG=SIG,rj_acc=rj_acc,mh_acc=mh_acc,priornum=priornum,maximal.mod=maximal.mod,IP=IP,eta.hat=eta.hat,save=save,name=name,null.move.prob=null.move.prob,time=time,a=a,b=b) class(est)<-"bcct" ## setup class for results est} conting/R/print.bcct.R0000644000176000001440000000143612502056666014351 0ustar ripleyusersprint.bcct <- function(x,...){ hrs<-round(x$time%/%3600,0) mins<-round((x$time%%3600)%/%60,0) secs<-round((x$time%%3600)%%60,0) hrs<-ifelse(hrs<10,paste("0",hrs,sep=""),hrs) mins<-ifelse(mins<10,paste("0",mins,sep=""),mins) secs<-ifelse(secs<10,paste("0",secs,sep=""),secs) priortypes<-c("UIP","SBH") cat("Number of cells in table =",length(x$maximal.mod$y),"\n") cat("\n") cat("Maximal model =\n") print(x$maximal.mod$formula) cat("\n") cat("Number of log-linear parameters in maximal model =",dim(x$maximal.mod$x)[2],"\n") cat("\n") cat("Number of MCMC iterations =",length(x$MODEL),"\n") cat("\n") cat("Computer time for MCMC =",paste(hrs,":",mins,":",secs,sep=""),"\n") cat("\n") cat("Prior distribution for log-linear parameters =",priortypes[x$priornum],"\n") } conting/R/print.interprob.R0000644000176000001440000000035412502056666015440 0ustar ripleyusersprint.interprob <- function(x,digits = max(3, getOption("digits") - 3),...){ nam<-x$term df<-data.frame(post_prob=x$prob) row.names(df)<-nam cat("Posterior probabilities of log-linear parameters:\n") print(df,digits=digits)} conting/R/find_cens.R0000644000176000001440000000154212502056666014231 0ustar ripleyusersfind_cens <- function(sources,cens_source,data=NULL,unobs.level="un",obs.level="obs"){ if(!is.null(data)){ if(attributes(data)$class=="table"){ data<-data.frame(data)}} options(contrasts=c("contr.sum","contr.poly"),warn=-1) if(!is.null(data)){ small.X<-model.frame(sources,data=data) smaller.X<-model.frame(cens_source,data=data)} else{ small.X<-model.frame(sources) smaller.X<-model.frame(cens_source)} options(contrasts=c("contr.treatment","contr.poly"),warn=0) which<-c() for(i in 1:dim(small.X)[2]){ which[i]<-ifelse(all(small.X[,i]==smaller.X[,1]),1,0)} which_cens<-(1:dim(small.X)[2])[which==1] which_ok<-(1:dim(small.X)[2])[which==0] check.vec<-rep(0,dim(small.X)[2]) check.vec[which_cens]<-obs.level check.vec[which_ok]<-unobs.level res<-c() for(i in 1:dim(small.X)[1]){ if(all(small.X[i,]==check.vec)){res<-c(res,i)}} res} conting/R/print.pval.R0000644000176000001440000000071012502056666014372 0ustar ripleyusersprint.pval <- function(x, digits = max(3, getOption("digits") - 3),...){ statistic2<-c("X2","deviance","Freeman-Tukey")[x$statnum==(1:3)] cat("Under the",statistic2,"statistic \n") cat("\n") cat("Summary statistics for T_pred \n") print(round(summary(x$Tpred),digits=digits)) cat("\n") cat("Summary statistics for T_obs \n") print(round(summary(x$Tobs),digits=digits)) cat("\n") cat("Bayesian p-value = ",round(x$pval,digits=digits),"\n")} conting/R/print.sbict.R0000644000176000001440000000037512502056666014543 0ustar ripleyusersprint.sbict <- function(x,...,digits = max(3, getOption("digits") - 3)){ print(x$int_stats,digits=digits) cat("\n") print(x$mod_stats,digits=digits) cat("\n") print(x$tpop_stats,digits=digits) cat("\n") print(x$pval_stats,digits=digits) } conting/R/add_term.R0000644000176000001440000000246512502056666014065 0ustar ripleyusersadd_term <- function(curr.index,data,maximal.mod){ big.X<-maximal.mod$x full.terms<-attributes(big.X)$assign full.labels<-attr(summary(maximal.mod)$terms,"term.labels") full.order<-attr(summary(maximal.mod)$terms,"order") full.factors<-attr(summary(maximal.mod)$terms,"factors") uni<-unique(full.terms[curr.index==1]) uni<-uni[uni>0] curr.labels<-full.labels[uni] curr.order<-full.order[uni] curr.factors<-full.factors[,uni] K<-length(full.labels[full.order==1]) can_add<-c() if(!all(curr.index==1)){ pot.labels<-c() pot.order<-c() pot<-c() for(ttt in 1:length(full.labels)){ if(!any(full.labels[ttt]==curr.labels)){ pot.labels<-c(pot.labels,full.labels[ttt]) pot.order<-c(pot.order,full.order[ttt]) pot<-c(pot,ttt)}} pot.factors<-matrix(full.factors[-1,pot],nrow=K) can_add<-pot.labels[pot.order==2] candos<-(1:length(pot.labels))[pot.order>2] candos<-candos[pot.order[candos]<=(max(curr.order)+1)] for(ttt in candos){ combos<-combinations(n=pot.order[ttt],r=pot.order[ttt]-1,v=(1:K)[pot.factors[,ttt]==1]) ok<-c() for(q in 1:dim(combos)[1]){ int<-rep(0,K) int[combos[q,]]<-1 run<-as.numeric(apply(matrix(rep(int,dim(curr.factors)[2]),nrow=K,byrow=FALSE)==curr.factors[-1,],2,all)) ok[q]<-ifelse(sum(run)==0,0,1)} if(all(ok==1)){ can_add<-c(can_add,pot.labels[ttt])}}} can_add} conting/R/plot.pval.R0000644000176000001440000000055412502056666014222 0ustar ripleyusersplot.pval <- function(x,...){ plot(x$Tobs,x$Tpred,xlab="T_obs",ylab="T_pred",type="n",...) points(x$Tobs[x$Tpred>x$Tobs],x$Tpred[x$Tpred>x$Tobs],col=8,pch=16) points(x$Tobs[x$TpredT_obs","T_pred=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(prob.level<0 | prob.level>1){ stop("prob.level is a probability and should be between 0 and 1")} if(order<1){ stop("order should be greater than or equal to 1")} if(statistic!="X2" & statistic!="FreemanTukey" & statistic!="deviance"){ stop("statistic not found")} if(n.burnin>0){ if(class(object)=="bict"){ innerY0<-matrix(object$Y0[-(1:n.burnin),],ncol=dim(object$Y0)[2])} else{ innerY0<-NULL} innerSIG<-object$SIG[-(1:n.burnin)] innerBETA<-object$BETA[-(1:n.burnin),] innerMODEL<-object$MODEL[-(1:n.burnin)]} else{ if(class(object)=="bict"){ innerY0<-matrix(object$Y0,ncol=dim(object$Y0)[2])} else{ innerY0<-NULL} innerSIG<-object$SIG innerBETA<-object$BETA innerMODEL<-object$MODEL} n.sample<-length(innerMODEL) every<-seq(from=thin,to=n.sample,by=thin) innerBETA<-innerBETA[every,] innerMODEL<-innerMODEL[every] innerSIG<-innerSIG[every] if(!is.null(innerY0)){ innerY0<-matrix(innerY0[every,],ncol=dim(object$Y0)[2])} tab<-sort(table(innerMODEL)/length(innerMODEL),decreasing=TRUE) if(is.null(formula)){ border<-order if(order>length(tab)){ stop("Model not visited in (thinned) sample")} interest<-tab[order] bformula<-index2formula(index=model2index(model=names(interest),dig=dim(object$maximal.mod$x)[2]),maximal.mod=object$maximal.mod)} else{ bformula<-formula interest<-tab[names(tab)==index2model(formula2index(big.X=object$maximal.mod$x, formula=formula, data=object$maximal.mod$data))] if(length(interest)==0){ stop("Model not visited in (thinned) sample")} border<-(1:length(tab))[names(tab)==index2model(formula2index(big.X=object$maximal.mod$x, formula=formula, data=object$maximal.mod$data))] } bformula<-paste0("~",as.character(bformula)[3]) int_index<-model2index(model=names(interest),dig=dim(object$maximal.mod$x)[2]) redBETA<-innerBETA[innerMODEL==names(interest),int_index==1] redSIG<-innerSIG[innerMODEL==names(interest)] if(!is.null(innerY0)){ redY0<-matrix(innerY0[innerMODEL==names(interest),],ncol=dim(object$Y0)[2])} else{ redY0<-NULL} if(!is.null(redY0)){ missing<-c(object$missing1,object$missing2) yyy<-object$maximal.mod$y[-missing] xxx<-object$maximal.mod$x[-missing,int_index==1] obs.z<-sum(yyy) redTOT<-apply(redY0,2,sum)+obs.z int<-HPDinterval(mcmc(redTOT),prob=prob.level) meanTOT<-mean(redTOT)} else{ yyy<-object$maximal.mod$y xxx<-object$maximal.mod$x[,int_index==1] redTOT<-NULL int<-NULL meanTOT<-NULL} MU<-exp(redBETA%*%t(xxx)) PRED<-matrix(rpois(n=prod(dim(MU)),lambda=as.vector(MU)),ncol=dim(MU)[2]) Y<-matrix(rep(yyy,dim(MU)[1]),ncol=dim(MU)[2],byrow=TRUE) statnum<-(1:3)[c("X2","deviance","FreemanTukey")==statistic] if(statnum==1){ Tpred<-apply(((PRED-MU)^2)/MU,1,sum) Tobs<-apply(((Y-MU)^2)/MU,1,sum)} if(statnum==2){ Tpred<-apply(-2*dpois(x=PRED,lambda=MU,log=TRUE),1,sum) Tobs<-apply(-2*dpois(x=Y,lambda=MU,log=TRUE),1,sum)} if(statnum==3){ Tpred<-apply((sqrt(PRED)-sqrt(MU))^2,1,sum) Tobs<-apply((sqrt(Y)-sqrt(MU))^2,1,sum)} pval<-mean(as.numeric(Tpred>Tobs)) post_mean<-apply(redBETA,2,mean,na.rm=TRUE) post_var<-apply(redBETA,2,var,na.rm=TRUE) post_int<-c() for(j in 1:length(post_mean)){ into<-HPDinterval(mcmc(redBETA[,j]),prob=prob.level) post_int<-rbind(post_int,into)} dimnames(post_int)<-NULL lower<-post_int[,1] upper<-post_int[,2] est<-list(term=(dimnames(object$maximal.mod$x)[[2]])[int_index==1],post_prob=as.numeric(interest), post_mean=post_mean,post_var=post_var, lower=lower,upper=upper,thin=thin,prob.level=prob.level,formula=bformula,order=border, BETA=redBETA,SIG=redSIG,Y0=redY0,TOT=redTOT,meanTOT=meanTOT,int=int, PRED=PRED,Tpred=Tpred,Tobs=Tobs,pval=pval,statnum=statnum,statistic=statistic) class(est)<-"submod" est} conting/R/model2index.R0000644000176000001440000000033012502056666014505 0ustar ripleyusersmodel2index <- function (model,dig) { ## need to tell it number of columns in maximal design matrix res<-t(sapply(X=model,FUN=hex2bin)) ## uses hex2bin function from BMS s<-dim(res)[2] res[,(s-dig+1):s]} conting/R/print.acceptrate.R0000644000176000001440000000040112502056666015540 0ustar ripleyusersprint.acceptrate <- function(x,digits = max(3, getOption("digits") - 3),...){ cat("Acceptance rate of reversible jump proposals = ",round(x$rj_ar,digits),"% \n") cat("Acceptance rate of Metropolis-Hastings proposals = ",round(x$mh_ar,digits),"% \n")} conting/R/bcct.fit.R0000644000176000001440000000776212502056666014007 0ustar ripleyusersbcct.fit <- function(priornum,maximal.mod,IP,eta.hat,ini.index,ini.beta,ini.sig,iters,save,name,null.move.prob,a,b,progress){ if(is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="")} ## names for files if we are saving them big.X<-maximal.mod$x ## maximal design matrix y<-maximal.mod$y ## responses data<-maximal.mod$data ## data curr.index<-ini.index ## current index (binary vector) curr.X<-big.X[,curr.index==1] ## current design matrix curr.ivar<-IP[curr.index==1,curr.index==1] ## current prior scale matrix MODEL<-c() ## current model (hexidecimal) BETA<-c() ## matrix for betas curr.beta<-ini.beta ## current betas SIG<-c() ## current prior variance curr.sig<-ini.sig ## ditto rj_acc<-c() ## setting up vector for RJ accept/rejects mh_acc<-c() ## setting up vector for MH accept/rejects counter<-0 ## iteration counter if(progress){ pb<-txtProgressBar(min = 0, max = iters, style = 3)} ## set up progress bar while(counter0){ if(counter%%save==0){ write.table(file=name_BETA,x=BETA,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MODEL,x=MODEL,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_SIG,x=SIG,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_RJACC,x=rj_acc,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MHACC,x=mh_acc,row.names=FALSE,col.names=FALSE,append=TRUE) ## save progress rj_acc<-c() mh_acc<-c() BETA<-c() MODEL<-c() SIG<-c()}}} if(progress){ close(pb)} ## close progress bar list(BETA=BETA,SIG=SIG,MODEL=MODEL,rj_acc=rj_acc,mh_acc=mh_acc)} conting/R/formula2index.R0000644000176000001440000000055212502056666015060 0ustar ripleyusersformula2index <- function(big.X,formula,data){ dX<-model.matrix(formula,data=data,contrasts =attributes(big.X)$contrasts)## design matrix given by formula indo<-c() for(i in 1:dim(big.X)[2]){ indo[i]<-ifelse(any((dimnames(big.X)[[2]])[i]==(dimnames(dX)[[2]])),1,0)} ## which columns of maximal design matrix ## are in this new matrix indo} conting/R/RJ_update.R0000644000176000001440000000633412502056666014162 0ustar ripleyusersRJ_update <- function(prop.index,curr.index,curr.beta,eta.hat,curr.y,big.X,proposal.probs,i.prop.prior.var,i.curr.prior.var){ icurrR<-i.curr.prior.var[-1,-1] ## current inverse prior variance ipropR<-i.prop.prior.var[-1,-1] ## proposed inverse prior variance currR<-chol2inv(chol(icurrR)) ## current prior variance propR<-chol2inv(chol(ipropR)) ## proposed prior variance RHO_TOP<-proposal.probs[1] ## proposal prob (proposed to current) RHO_BOT<-proposal.probs[2] ## proposal prob (current to proposed) curr.X<-big.X[,curr.index==1] ## current design matrix prop.X<-big.X[,prop.index==1] ## proposed design matrix S<-matrix(big.X[,prop.index!=curr.index],nrow=length(curr.y)) ## difference in current and proposed design matrices w<-exp(eta.hat) ## weight matrix under maximal model curr.LP<-as.vector(curr.X%*%matrix(curr.beta,ncol=1)) ## current linear predictor if(sum(prop.index)sum(curr.index)){ ### Birth move cXW<-t(curr.X)%*%diag(w) cXWX<-cXW%*%curr.X icXWX<-chol2inv(chol(cXWX)) icXWX.XW<-icXWX%*%cXW SWIP<-t(S)%*%diag(w)-t(S)%*%t(cXW)%*%icXWX.XW SIG1<-chol2inv(chol(SWIP%*%S)) ## proposal variance MU1<-as.vector(SIG1%*%SWIP%*%matrix(eta.hat,ncol=1)) ## proposal mean u1<-as.vector(rmvnorm(n=1,mean=MU1,sigma=SIG1)) ## innovation variables prop.beta<-rep(0,dim(prop.X)[2]) prop.beta[curr.index[prop.index==1]==1]<-curr.beta-as.vector(icXWX.XW%*%S%*%matrix(u1,ncol=1)) prop.beta[curr.index[prop.index==1]==0]<-u1 ## proposal for beta prop.LP<-as.vector(prop.X%*%matrix(prop.beta,ncol=1)) ## proposed linear predictor top<-sum(curr.y*prop.LP)-sum(exp(prop.LP))+dmvnorm(x=prop.beta[-1],mean=rep(0,length(prop.beta)-1),sigma=propR,log=TRUE) bot<-sum(curr.y*curr.LP)-sum(exp(curr.LP))+dmvnorm(x=curr.beta[-1],mean=rep(0,length(curr.beta)-1),sigma=currR,log=TRUE) jac<--dmvnorm(x=u1,mean=MU1,sigma=SIG1,log=TRUE) ## log numerator, denominator and jacobian prob<-(RHO_TOP/RHO_BOT)*exp(top-bot+jac)} ## acceptance probability if(prob>=runif(1)){ new.beta<-prop.beta new.index<-prop.index} else{ new.beta<-curr.beta new.index<-curr.index} ## accept or reject list(new.beta=new.beta,new.index=new.index)} conting/R/index2formula.R0000644000176000001440000000101512502056666015053 0ustar ripleyusersindex2formula <- function(index,maximal.mod){ terms<-attributes(maximal.mod$x)$assign+1 ## terms in maximal model indo.terms<-unique(terms[index==1]) ## terms in formula model modA<-maximal.mod term.labels<-attr(summary(modA)$terms,"term.labels")[indo.terms[-1]-1] ## term labels in formula model form<-paste0("y~",term.labels[1]) ## cobble together the formula if(length(term.labels)>1){ for(i in 2:length(term.labels)){ form<-paste0(form,"+",term.labels[i])}} form<-as.formula(form) form} conting/R/bict.fit.R0000644000176000001440000000764612502056666014016 0ustar ripleyusersbict.fit <- function(priornum,missing1,missing2,maximal.mod,IP,eta.hat,ini.index,ini.beta,ini.sig,ini.y0,iters,save,name,null.move.prob,a,b,progress){ missing<-c(missing1,missing2) if(is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt" name_Y0<-"Y0.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="") name_Y0<-paste(name,"Y0.txt",sep="")} big.X<-maximal.mod$x y<-maximal.mod$y data<-maximal.mod$data curr.index<-ini.index curr.X<-big.X[,curr.index==1] curr.ivar<-IP[curr.index==1,curr.index==1] #MODEL<-as.character(index2model(curr.index)) MODEL<-c() #BETA<-matrix(0,nrow=1,ncol=dim(big.X)[2]) #BETA[,curr.index==1]<-ini.beta BETA<-c() curr.beta<-ini.beta #SIG<-ini.sig SIG<-c() curr.sig<-ini.sig #Y0<-matrix(ini.y0,nrow=1) Y0<-c() curr.y0<-ini.y0 rj_acc<-c() mh_acc<-c() counter<-0 if(progress){ pb<-txtProgressBar(min = 0, max = iters, style = 3)} ## set up progress bar while(counter0){ for(i in 1:length(missing2)){ ppp<-dpois(x=1:y[missing2[i]],lambda=mutar[missing2[i]],log=TRUE) ppp<-ppp-max(ppp) curr.y0[length(missing1)+i]<-sample(x=1:y[missing2[i]],size=1,prob=exp(ppp))}} curry<-rep(0,dim(big.X)[2]) curry[new.index==1]<-new.beta curr.index<-new.index curr.beta<-new.beta curr.X<-big.X[,curr.index==1] curr.ivar<-IP[curr.index==1,curr.index==1] BETA<-rbind(BETA,curry) SIG<-c(SIG,curr.sig) MODEL<-c(MODEL,index2model(new.index)) Y0<-rbind(Y0,curr.y0) counter<-counter+1 if(progress){ setTxtProgressBar(pb, counter)} ## update progress bar if(save>0){ if(counter%%save==0){ write.table(file=name_BETA,x=BETA,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MODEL,x=MODEL,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_SIG,x=SIG,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_Y0,x=Y0,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_RJACC,x=rj_acc,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MHACC,x=mh_acc,row.names=FALSE,col.names=FALSE,append=TRUE) rj_acc<-c() mh_acc<-c() BETA<-c() MODEL<-c() SIG<-c() Y0<-c()}}} if(progress){ close(pb)} list(BETA=BETA,SIG=SIG,MODEL=MODEL,Y0=Y0,rj_acc=rj_acc,mh_acc=mh_acc)} conting/R/bayespval.R0000644000176000001440000000300512502056666014263 0ustar ripleyusersbayespval <- function(object,n.burnin=0,thin=1,statistic="X2"){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(statistic!="X2" & statistic!="FreemanTukey" & statistic!="deviance"){ stop("statistic not found")} if(is.null(object$missing1)){ yyy<-object$maximal.mod$y xxx<-object$maximal.mod$x} else{ missing<-c(object$missing1,object$missing2) yyy<-object$maximal.mod$y[-missing] xxx<-object$maximal.mod$x[-missing,]} if(n.burnin>0){ innerBETA<-object$BETA[-(1:n.burnin),]} else{ innerBETA<-object$BETA} n.sample<-dim(innerBETA)[1] every<-seq(from=thin,to=n.sample,by=thin) innerBETA<-innerBETA[every,] MU<-exp(innerBETA%*%t(xxx)) PRED<-matrix(rpois(n=prod(dim(MU)),lambda=as.vector(MU)),ncol=dim(MU)[2]) Y<-matrix(rep(yyy,dim(MU)[1]),ncol=dim(MU)[2],byrow=TRUE) statnum<-(1:3)[c("X2","deviance","FreemanTukey")==statistic] if(statnum==1){ Tpred<-apply(((PRED-MU)^2)/MU,1,sum) Tobs<-apply(((Y-MU)^2)/MU,1,sum)} if(statnum==2){ Tpred<-apply(-2*dpois(x=PRED,lambda=MU,log=TRUE),1,sum) Tobs<-apply(-2*dpois(x=Y,lambda=MU,log=TRUE),1,sum)} if(statnum==3){ Tpred<-apply((sqrt(PRED)-sqrt(MU))^2,1,sum) Tobs<-apply((sqrt(Y)-sqrt(MU))^2,1,sum)} pval<-mean(as.numeric(Tpred>Tobs)) est<-list(PRED=PRED,Tpred=Tpred,Tobs=Tobs,pval=pval,statnum=statnum,statistic=statistic,thin=thin) class(est)<-"pval" est} conting/R/inter_stats.R0000644000176000001440000000313412502056666014637 0ustar ripleyusersinter_stats <- function(object,cutoff=0.75,n.burnin=0,thin=1,prob.level=0.95){ if(cutoff<0 | cutoff>1){ stop("cutoff is a probability and should be between 0 and 1")} if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(prob.level<0 | prob.level>1){ stop("prob.level is a probability and should be between 0 and 1")} if(n.burnin>0){ innerBETA<-object$BETA[-(1:n.burnin),] innerMODEL<-object$MODEL[-(1:n.burnin)]} else{ innerBETA<-object$BETA innerMODEL<-object$MODEL} n.sample<-length(innerMODEL) every<-seq(from=thin,to=n.sample,by=thin) innerBETA<-innerBETA[every,] innerMODEL<-innerMODEL[every] INDO<-model2index(innerMODEL,dig=dim(innerBETA)[2]) innerBETA2<-innerBETA innerBETA2[INDO==0]<-NA post_mean<-apply(innerBETA2,2,mean,na.rm=TRUE) post_var<-apply(innerBETA2,2,var,na.rm=TRUE) post_prob<-apply(INDO,2,mean) post_int<-c() for(j in 1:length(post_mean)){ if(length(innerBETA[INDO[,j]==1,j])>1){ into<-HPDinterval(mcmc(innerBETA[INDO[,j]==1,j]),prob=prob.level)} else{ into<-c(NA,NA)} post_int<-rbind(post_int,into)} dimnames(post_int)<-NULL lower<-post_int[,1] upper<-post_int[,2] est<-list(term=(dimnames(object$maximal.mod$x)[[2]])[post_prob>=cutoff],prob=post_prob[post_prob>=cutoff], post_mean=post_mean[post_prob>=cutoff],post_var=post_var[post_prob>=cutoff], lower=lower[post_prob>=cutoff],upper=upper[post_prob>=cutoff],thin=thin,prob.level=prob.level) class(est)<-"interstat" est} conting/R/mod_probs.R0000644000176000001440000000311012502056666014256 0ustar ripleyusersmod_probs <- function(object,n.burnin=0,scale=0.1,best=NULL,thin=1){ if(n.burnin<0){ stop("n.burnin should be positive")} if(n.burnin>=length(object$MODEL)){ stop("n.burnin should be less than the MCMC sample size (n.sample)")} if(thin<1){ stop("thin should be greater than or equal to 1")} if(scale<0 | scale>1){ stop("scale should be between 0 and 1")} if(!is.null(best)){ if(best<=0){ stop("best should be positive")}} if(n.burnin>0){ innerBETA<-object$BETA[-(1:n.burnin),] innerMODEL<-object$MODEL[-(1:n.burnin)]} else{ innerBETA<-object$BETA innerMODEL<-object$MODEL} n.sample<-length(innerMODEL) every<-seq(from=thin,to=n.sample,by=thin) innerBETA<-innerBETA[every,] innerMODEL<-innerMODEL[every] tab<-sort(table(innerMODEL)/length(innerMODEL),decreasing=TRUE) tab.names<-dimnames(tab)[[1]] if(is.null(best)){ ref.tab<-tab[tab>tab[1]*scale]} else{ if(best<=length(tab)){ ref.tab<-tab[1:best]} else{ ref.tab<-tab}} if(length(ref.tab)>1){ ref.tab.names<-dimnames(ref.tab)[[1]] dimnames(ref.tab)<-NULL} else{ ref.tab.names<-names(ref.tab) names(ref.tab)<-NULL} if(length(ref.tab)>1){ forms<-c() for(j in 1:length(ref.tab)){ forms[j]<-paste0("~",as.character(index2formula(index=model2index(ref.tab.names,dig=dim(innerBETA)[2])[j,],maximal.mod=object$maximal.mod))[3])} } else{ forms<-paste0("~",as.character(index2formula(index=model2index(ref.tab.names,dig=dim(innerBETA)[2]),maximal.mod=object$maximal.mod))[3])} est<-list(table=data.frame(prob=ref.tab,model_formula=forms),totmodsvisit=length(tab),thin=thin) class(est)<-"modprobs" est} conting/R/index2model.R0000644000176000001440000000006312502056666014510 0ustar ripleyusersindex2model <- function(index){ bin2hex(index)} conting/R/print.submod.R0000644000176000001440000000225312502056666014725 0ustar ripleyusersprint.submod <- function(x,...,digits = max(3, getOption("digits") - 3)){ blob<-round(x$post_prob,digits=digits) cat("Posterior model probability = ",blob,"\n") cat("\n") df<-data.frame(post_mean=x$post_mean, post_var=x$post_var,lower_lim=x$lower,upper_lim=x$upper) nam<-x$term row.names(df)<-nam lev<-100*x$prob.level cat("Posterior summary statistics of log-linear parameters:\n") print(df,digits=digits) cat("NB: lower_lim and upper_lim refer to the lower and upper values of the\n") cat(lev,"% highest posterior density intervals, respectively\n") cat("\n") if(!is.null(x$meanTOT)){ cat("Posterior mean of total population size =",round(x$meanTOT,digits),"\n") cat(lev,"% highest posterior density interval for total population size = (",round(x$int,digits),") \n") cat("\n")} statistic2<-c("X2","deviance","Freeman-Tukey")[x$statnum==(1:3)] cat("Under the",statistic2,"statistic \n") cat("\n") cat("Summary statistics for T_pred \n") print(round(summary(x$Tpred),digits=digits)) cat("\n") cat("Summary statistics for T_obs \n") print(round(summary(x$Tobs),digits=digits)) cat("\n") cat("Bayesian p-value = ",round(x$pval,digits=digits),"\n") } conting/R/print.totpop.R0000644000176000001440000000044112502056666014756 0ustar ripleyusersprint.totpop <- function(x,digits = max(3, getOption("digits") - 3),...){ lev<-100*x$prob.level cat("Posterior mean of total population size =",round(x$meanTOT,digits),"\n") cat(lev,"% highest posterior density interval for total population size = (",round(x$int,digits),") \n")} conting/R/print.bict.R0000644000176000001440000000165112502056666014356 0ustar ripleyusersprint.bict <- function(x,...){ hrs<-round(x$time%/%3600,0) mins<-round((x$time%%3600)%/%60,0) secs<-round((x$time%%3600)%%60,0) hrs<-ifelse(hrs<10,paste("0",hrs,sep=""),hrs) mins<-ifelse(mins<10,paste("0",mins,sep=""),mins) secs<-ifelse(secs<10,paste("0",secs,sep=""),secs) priortypes<-c("UIP","SBH") cat("Number of cells in table =",length(x$maximal.mod$y),"\n") cat("\n") cat("Maximal model =\n") print(x$maximal.mod$formula) cat("\n") cat("Number of log-linear parameters in maximal model =",dim(x$maximal.mod$x)[2],"\n") cat("\n") cat("Number of MCMC iterations =",length(x$MODEL),"\n") cat("\n") cat("Computer time for MCMC =",paste(hrs,":",mins,":",secs,sep=""),"\n") cat("\n") cat("Prior distribution for log-linear parameters =",priortypes[x$priornum],"\n") cat("\n") cat("Number of missing cells =",length(x$missing1),"\n") cat("\n") cat("Number of censored cells =",length(x$missing2),"\n") } conting/R/bictu.R0000644000176000001440000000706212502056666013412 0ustar ripleyusersbictu <- function(object,n.sample,save=NULL,name=NULL,progress=FALSE){ if(n.sample<=0){ stop("n.sample must be positive")} ptm<-(proc.time())[3] if(is.null(save)){save<-object$save} if(is.null(name)){name<-object$name} if(save>0 & is.null(name)){ name_RJACC<-"RJACC.txt" name_MHACC<-"MHACC.txt" name_BETA<-"BETA.txt" name_MODEL<-"MODEL.txt" name_SIG<-"SIG.txt" name_Y0<-"Y0.txt"} else{ name_RJACC<-paste(name,"RJACC.txt",sep="") name_MHACC<-paste(name,"MHACC.txt",sep="") name_BETA<-paste(name,"BETA.txt",sep="") name_MODEL<-paste(name,"MODEL.txt",sep="") name_SIG<-paste(name,"SIG.txt",sep="") name_Y0<-paste(name,"Y0.txt",sep="")} if(object$save==0 & save>0){ if(file.exists(name_BETA)){stop(paste("A file named ",name_BETA," already exists in the working directory",sep=""))} if(file.exists(name_MODEL)){stop(paste("A file named ",name_MODEL," already exists in the working directory",sep=""))} if(file.exists(name_SIG)){stop(paste("A file named ",name_SIG," already exists in the working directory",sep=""))} if(file.exists(name_Y0)){stop(paste("A file named ",name_Y0," already exists in the working directory",sep=""))} if(file.exists(name_RJACC)){stop(paste("A file named ",name_RJACC," already exists in the working directory",sep=""))} if(file.exists(name_MHACC)){stop(paste("A file named ",name_MHACC," already exists in the working directory",sep=""))} write.table(file=name_BETA,x=object$BETA,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MODEL,x=object$MODEL,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_SIG,x=object$SIG,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_Y0,x=object$Y0,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_RJACC,x=object$rj_acc,row.names=FALSE,col.names=FALSE,append=TRUE) write.table(file=name_MHACC,x=object$mh_acc,row.names=FALSE,col.names=FALSE,append=TRUE) } start.sig<-object$SIG[length(object$SIG)] start.index<-model2index(object$MODEL[length(object$SIG)],dig=dim(object$BETA)[2]) start.beta<-object$BETA[dim(object$BETA)[1],start.index==1] start.y0<-object$Y0[dim(object$Y0)[1],] runit<-bict.fit(priornum=object$priornum,missing1=object$missing1,missing2=object$missing2, maximal.mod=object$maximal.mod,IP=object$IP,eta.hat=object$eta.hat,ini.index=start.index,ini.beta=start.beta, ini.sig=start.sig,ini.y0=start.y0,iters=n.sample,save=save,name=name,null.move.prob=object$null.move.prob, a=object$a,b=object$b,progress=progress) BETA<-runit$BETA MODEL<-runit$MODEL SIG<-runit$SIG Y0<-runit$Y0 rj_acc<-runit$rj_acc mh_acc<-runit$mh_acc if(save>0){ rj_acc<-read.matrix(file=name_RJACC,header=FALSE) mh_acc<-read.matrix(file=name_MHACC,header=FALSE) BETA<-read.matrix(file=name_BETA,header=FALSE) SIG<-read.matrix(file=name_SIG,header=FALSE) Y0<-read.matrix(file=name_Y0,header=FALSE) MODEL<-as.character(read.table(file=name_MODEL,header=FALSE)[,1])} if(save==0){ rj_acc<-c(object$rj_acc,rj_acc) mh_acc<-c(object$mh_acc,mh_acc) BETA<-rbind(object$BETA,BETA) SIG<-c(object$SIG,SIG) Y0<-rbind(object$Y0,Y0) MODEL<-c(object$MODEL,MODEL)} ptm<-(proc.time())[3]-ptm time<-ptm+object$time est<-list(BETA=BETA,MODEL=MODEL,SIG=SIG,Y0=Y0,rj_acc=rj_acc,mh_acc=mh_acc,priornum=object$priornum, missing1=object$missing1,missing2=object$missing2,maximal.mod=object$maximal.mod,IP=object$IP, eta.hat=object$eta.hat,save=save,name=name,missing_details=object$missing_details, censored_details=object$censored_details,null.move.prob=object$null.move.prob,time=time, a=object$a,b=object$b) class(est)<-"bict" est} conting/MD50000644000176000001440000000764512502103670012263 0ustar ripleyusers103000b733f039c5dc499d9d26828f9c *DESCRIPTION 5898d6f35b23d055ac015ae0d6bcb1b2 *NAMESPACE 9071b89c46380a7af6be8fae2db00948 *R/RJ_update.R def339b59e64204e9db01335b77ac9e8 *R/accept_rate.R e3f610f6d31b1bc65164baf540514209 *R/add_term.R 35054448b4cbbd8ea26cedbf4ee4b38a *R/bayespval.R 991f15c2bebba5697cee1e292b76435e *R/bcct.R 003b9a0852fe28d4bcff06f954ccad6e *R/bcct.fit.R 821da2bca4bacb7633100c9f307c7453 *R/bcctu.R da1a1615c25899a3326d2a6c6e57978b *R/beta_mode.R 6171aa1fdcb2bd7811f3f56782621048 *R/bict.R 0da21f4dc21842b2e7ee4c77a16fd177 *R/bict.fit.R 7afe0a984173c8683b3229fbdc96a77b *R/bictu.R 7c0a2962d2ece37792f4c91367b97fe0 *R/drop_term.R 76330349796348b50441c8a6757c1a3f *R/find_cens.R df5e5c6c5e43ea8bc5c122a2ca8c6d17 *R/formula2index.R 17deb7819acee872650113f9a1d84563 *R/index2formula.R 70f4801283044832a985c39184248a25 *R/index2model.R 8a89e3326f747b115da28df58cf27c2b *R/inter_probs.R 6179a33eaa7f6df906b190f02db9ef23 *R/inter_stats.R 9bad7f600ef384dee022e4ec8e04259b *R/iwls_mh.R 11a12c2fc7251aa85f5e8478b9ceb5d4 *R/mod_probs.R c8e6c05006cd4872c7af5388cb03a1d3 *R/model2index.R b22dba5ee5e25ecd7543978698a3b977 *R/plot.pval.R d69ac5be534d94f4a2b57518ac44ca90 *R/plot.totpop.R 4816bff07a4143b889fe0deb2d844da0 *R/print.acceptrate.R 1f2dcbc0a39d4699be9ea1b6a31f3402 *R/print.bcct.R 32734498e41893130546bd75305f9b43 *R/print.bict.R 7c773ccdb2c5a8350759aa3b13a103c0 *R/print.interprob.R 3f962d6409feb992f032ba747988c50b *R/print.interstat.R 71dc48b2148fe63a0dd3803393181218 *R/print.modprobs.R 2259d5ca419a97cd8fb49cd0be2ae4ae *R/print.pval.R 77d19a2780e62b727c1ac9d2ece42e56 *R/print.sbcct.R 15742acbbd80e2758f6c6daad9b949a7 *R/print.sbict.R ebd49330a88bba6d4cc5cbfe048379ea *R/print.submod.R 85c166c04eb6ad8775bcadf3f43a6f0b *R/print.totpop.R 5cbce87ccc24bc4e57ef26010f1fce6a *R/prop_mod.R 09878e4eddf17312e38a2af136082668 *R/sub_model.R a87c0874595f4b0e9c82c46433a2d942 *R/summary.bcct.R 25d8d4dc32f86840812605ab3a2068c5 *R/summary.bict.R 84ed36115620defbe585207a53a9c951 *R/total_pop.R 5d5bbc72bf14772825619cf8d3afc1a7 *data/AOH.rda 8de3b728a3dacb3d90f7da68e09090e2 *data/ScotPWID.rda f32945364a520d8435801b7ce94499bd *data/heart.rda d9cfabe86771a3a2bf7dcc58d2eabf75 *data/spina.rda 551b319d5c059d2714af405e6e005df9 *inst/CITATION da54c697af3c2d465145cf4d78f89732 *man/AOH.Rd a7ef0b144b430f86d7c57de815e75698 *man/RJ_update.Rd f69405e0f5c86bc4982591e783e46a7c *man/ScotPWID.Rd 1044fd6d23c1183a784e00d558a1aaf4 *man/accept_rate.Rd 47b7907303f60bfaa9518b157ec5ad0a *man/add_term.Rd c8963bf5b265a82694f7cdcb58a89f73 *man/bayespval.Rd 7e41612b324eef0f24819a764448922a *man/bcct.Rd ad4407056e82e020f0f404382f97c9da *man/bcct.fit.Rd 2066618b2c7e1a2de0768407d486f2fa *man/beta_mode.Rd 9670f314af585ad547c00c072772a831 *man/bict.Rd ba14f4d22009d53888b7b26a043c7a9a *man/bict.fit.Rd 2ad151600ec77bd296c2cf3f01c972a8 *man/conting-package.Rd e7a8084279cee2c8cadc2a660d05c5ea *man/find_cens.Rd 6335a3dcae17b4f64942f74ec5214e77 *man/formula2index.Rd 8ac844d7f724081448928f8630adace5 *man/heart.Rd a23d5956100a6fb27f9858fe238fa115 *man/index2model.Rd f50b7c3621a429ba4c57951d900c2c89 *man/inter_probs.Rd a6edb1960da54e36b76e061c8e32ac15 *man/inter_stats.Rd 29382133157eb0c74cf8e4d42667fbc3 *man/iwls_mh.Rd da98e83d91295fabb16142c7af497e32 *man/mod_probs.Rd 9cf2c35c9a01151bcf0c5455090f4b55 *man/plot.pval.Rd 957d31b3019560d6dfb9710152fcda30 *man/plot.totpop.Rd 305758a3df578087fa9fe9f547f6e9b5 *man/print.acceptrate.Rd e73060dd2ce4016d1472b2c89bd170f6 *man/print.bcct.Rd 09ff6774c99c00de7ae5ce369f13cd64 *man/print.interprob.Rd 9710c237d28e26ed3464726782d5f0d2 *man/print.interstat.Rd e71f894293e3c49bed041549f4178332 *man/print.modprobs.Rd bc85fbe299f31c976f746de6496afc58 *man/print.pval.Rd 3e931d97e008f06feba602feb3a820ba *man/print.submod.Rd a765000f5df8362c890475710e153a7e *man/print.totpop.Rd 1cdd84e1c6050c775559904e230162a9 *man/spina.Rd c2e53f8a245cb4fdd98a52af7e6c75aa *man/sub_model.Rd 378736f8ee3fe210906db640b320f7be *man/summary.bcct.Rd a0156c27e007fb88b9329901c690c476 *man/total_pop.Rd conting/DESCRIPTION0000644000176000001440000000073712502103670013454 0ustar ripleyusersPackage: conting Type: Package Title: Bayesian Analysis of Contingency Tables Version: 1.5 Date: 2015-03-17 Author: Antony M. Overstall Maintainer: Antony M. Overstall Description: Bayesian analysis of complete and incomplete contingency tables. Depends: R (>= 2.15.0) Imports: mvtnorm, BMS, gtools, tseries, coda License: GPL-2 Packaged: 2015-03-17 17:00:38 UTC; ao35j NeedsCompilation: no Repository: CRAN Date/Publication: 2015-03-17 20:59:52 conting/man/0000755000176000001440000000000012502056666012526 5ustar ripleyusersconting/man/RJ_update.Rd0000644000176000001440000001157612502056666014704 0ustar ripleyusers\name{RJ_update} \alias{RJ_update} \title{ Reversible Jump Algorithm } \description{ This function implements one iteration of the orthogonal projection reversible jump algorithm for generalised linear models proposed by Forster et al (2012) applied to log-linear models. } \usage{ RJ_update(prop.index, curr.index, curr.beta, eta.hat, curr.y, big.X, proposal.probs, i.prop.prior.var, i.curr.prior.var) } \arguments{ \item{prop.index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the proposed model. } \item{curr.index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the current model. } \item{curr.beta}{ A vector of length \code{sum(curr.index)} giving the log-linear parameters under the current model. } \item{eta.hat}{ A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model. } \item{curr.y}{ A vector of length n giving the cell counts. } \item{big.X}{ The design matrix under the maximal model. } \item{proposal.probs}{ A numeric vector of length 2. The first element gives the probability of proposing a move from the proposed model to the current model. The second element gives the probability of proposing a move from the current model to the proposed model. } \item{i.prop.prior.var}{ A matrix giving the inverse of the prior variance matrix for the log-linear parameters under the proposed model. } \item{i.curr.prior.var}{ A matrix giving the inverse of the prior variance matrix for the log-linear parameters under the current model. } } \details{ For the original algorithm see Forster et al (2012). For details on its application to log-linear models see Overstall & King (2014), and the references therein. } \value{ The function will return a list with the following components: \item{new.beta}{A vector giving the new log-linear parameters.} \item{new.index}{A binary vector indicating which log-linear parameters are present in the new model.} } \references{ Forster, J.J., Gill, R.C. & Overstall, A.M. (2012) Reversible jump methods for generalised linear models and generalised linear mixed models. \emph{Statistics and Computing}, \bold{22 (1)}, 107--120. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ set.seed(4) ## Set seed for reproducibility data(AOH) ## Load data maximal.mod<-glm(y~(alc+hyp+obe)^3,family=poisson,x=TRUE,contrasts=list(alc="contr.sum", hyp="contr.sum",obe="contr.sum"),data=AOH) ## Fit maximal model to get a design matrix IP<-t(maximal.mod$x)\%*\%maximal.mod$x/length(AOH$y) IP[,1]<-0 IP[1,]<-0 ## Calculate inverse prior scale matrix under maximal model. Under the UIP this ## is the inverse prior variance matrix. Under the SBH prior, we need to divide ## this matrix by the current value of SIG. bmod<-beta_mode(X=maximal.mod$x,y=AOH$y,IP=IP) ## Find posterior mode under maximal model with UIP eta.hat<-as.vector(maximal.mod$x\%*\%bmod) ## Find posterior mode of linear predictor. curr.index<-formula2index(big.X=maximal.mod$x,formula=y~alc+hyp+obe+alc:hyp,data=AOH) ## Calculate index for current model including alc:hyp interaction curr.index ## Print out current index #[1] 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 pm<-prop_mod(curr.index=curr.index,data=AOH,maximal.mod=maximal.mod) ## Propose a model p2<-(1-pm$null.move.prob)/pm$total.choices p2 ## Calculate probability of proposing proposed model from current model #[1] 0.1666667 prop.index<-pm$new.index prop.index ## Assign and print out proposal index # [1] 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 dm<-prop_mod(curr.index=prop.index,data=AOH,maximal.mod=maximal.mod,null.move.prob=0) p1<-(1-pm$null.move.prob)/dm$total.choices p1 ## Calculate probability of proposing current model from proposed model #[1] 0.1666667 RJ_update(prop.index=prop.index,curr.index=curr.index, curr.beta=coef(maximal.mod)[curr.index==1],eta.hat=eta.hat,curr.y=AOH$y,big.X=maximal.mod$x, proposal.probs=c(p1,p2), i.prop.prior.var=IP[prop.index==1,prop.index==1], i.curr.prior.var=IP[curr.index==1,curr.index==1]) ## Do one iteration of reversible jump algorithm. Will get: #$new.beta #(Intercept) alc1 alc2 alc3 hyp1 obe1 # 2.87128918 -0.07098006 -0.07221330 0.08748803 -0.51899802 -0.07855115 # obe2 #-0.02474727 # #$new.index # [1] 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 } conting/man/print.bcct.Rd0000644000176000001440000000214512502056666015065 0ustar ripleyusers\name{print.bcct} \alias{print.bcct} \alias{print.bict} \title{ Print \code{bcct} and \code{bict} Objects } \description{ This function prints objects of class \code{"bcct"} and \code{"bict"}. } \usage{ \method{print}{bcct}(x, ...) \method{print}{bict}(x, ...) } \arguments{ \item{x}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ These functions print out very simple details on the \code{bcct} or \code{bict} objects. They display the number of cells in the table, the maximal model considered, the number of log-linear parameters in the maximal model, the number of MCMC iterations, the computer time required for the MCMC (in hours, minutes and seconds) and the prior used. In the case of objects of class \code{"bict"}, it also prints out the number of missing and censored cells. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For examples see \code{\link{bcct}} and \code{\link{bict}}.} \seealso{ \code{\link{bcct}}, \code{\link{bict}}. } conting/man/ScotPWID.Rd0000644000176000001440000000621112502056666014411 0ustar ripleyusers\name{ScotPWID} \alias{ScotPWID} \docType{data} \title{ People Who Inject Drugs in Scotland 2006: An Incomplete 2^7 Table } \description{ 5670 people who inject drugs (PWID) in Scotland in 2006 are osberved by four sources: social enquiry reports (\code{S1}), hospital records (\code{S2}), Scottish drug misuse database (\code{S3}) and Hepatitis C virus (HCV) diagnosis database (\code{S4}). The PWID are further cross-classified according to three additional factors: region (\code{Region}; 2 levels), gender (\code{Gender}; 2 levels) and age (\code{Age}; 2 levels). } \usage{data(ScotPWID)} \format{ A \code{"data.frame"} with 128 observations on the following 8 variables. \describe{ \item{\code{y}}{ Counts in each cell of the table with NAs for the cells corresponding to not being observed by any of the sources. } \item{\code{S1}}{ A factor with levels \code{un} \code{obs} indicating whether source S1 observed the PWID. } \item{\code{S2}}{ A factor with levels \code{un} \code{obs} indicating whether source S2 observed the PWID. } \item{\code{S3}}{ A factor with levels \code{un} \code{obs} indicating whether source S3 observed the PWID. } \item{\code{S4}}{ A factor with levels \code{un} \code{obs} indicating whether source S4 observed the PWID. } \item{\code{Region}}{ A factor with levels \code{GGC} \code{Rest} indicating the region (\code{GGC} = Greater Glasgow & Clyde, \code{Rest} = Rest of Scotland). } \item{\code{Gender}}{ A factor with levels \code{Male} \code{Female} indicating gender. } \item{\code{Age}}{ A factor with levels \code{Young} \code{Old} indicating age (\code{Young} = <35 years, \code{Old}=35+ years). } } } \details{ Note that the PWID observed by source \code{S4}, the HCV database, are not necessarily current PWID. They are people who have a history of drug use. Therefore the count in the cell corresponding to only being observed by the HCV database is an overcount. Overstall et al (2014) use a modelling approach whereby the count in the cell corresponding to only being observed by the HCV database is missing and the observed value acts as an upper bound. For more details on the dataset see King et al (2013). For details on the function \code{\link{bict}} applied to this data, see Overstall & King (2014). } \source{ King, R., Bird, S. M., Overstall, A. M., Hay, G. & Hutchinson, S. J. (2013) Injecting drug users in Scotland, 2006: Listing, number, demography, and opiate-related death-rates. \emph{Addiction Research and Theory}, \bold{21 (3)}, 235-246. } \references{ Overstall, A.M., King, R., Bird, S.M., Hutchinson, S.J. & Hay, G. (2014) Incomplete contingency tables with censored cells with application to estimating the number of people who inject drugs in Scotland. \emph{Statistics in Medicine}, \bold{33 (9)}, 1564--1579. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \examples{ data(ScotPWID) summary(ScotPWID) } \keyword{datasets} conting/man/bayespval.Rd0000644000176000001440000000761712502056666015016 0ustar ripleyusers\name{bayespval} \alias{bayespval} \title{ Compute Bayesian p-value } \description{ This function will compute the Bayesian (or posterior predictive) p-value. This can be used as a diagnostic tool to check model adequacy. Additionally this function outputs predictions from the model which can also be used in other assessments of model adequacy. } \usage{ bayespval(object, n.burnin = 0, thin = 1, statistic = "X2") } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"} object. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } \item{statistic}{ An optional argument giving the discrepancy statistic to use for calculating the Bayesian p-value. It can be one of \code{c("X2","FreemanTukey","deviance")} which correspond to the different statistics: \code{"X2"} = Chi-squared statistic, \code{"FreemanTukey"} = Freeman-Tukey statistic, \code{"deviance"} = deviance statistic. See Overstall & King (2014), and references therein, for descriptions of these statistics. } } \details{See Gelman et al (2004, Chapter 6) for more details on Bayesian p-values and see Overstall & King (2014), and references therein, for details of their application to contingency tables. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ The function will produce an object of class \code{"pval"} which is a list with the following components. \item{PRED}{An (\code{n.sample}-\code{n.burnin}) by n* matrix where (n* is the number of observed cell counts) containing the predictions of the observed cell counts.} \item{Tpred}{A vector of length (\code{n.sample}-\code{n.burnin}) containing the discrepancies between the predicted cell counts and their means.} \item{Tobs}{A vector of length (\code{n.sample}-\code{n.burnin}) containing the discrepancies between the observed cell counts and their means.} \item{pval}{A scalar giving the Bayesian p-value, i.e. the proportion of \code{Tpred}>\code{Tobs}.} \item{statnum}{A numeric scalar identifying which statistic is used.} \item{statistic}{A character string identifying which statistic is used.} \item{thin}{The value of the argument \code{thin}.} } \references{ Gelman, A., Carlin, J.B., Stern, H.S. & Rubin, D.B. (2004) \emph{Bayesian Data Analysis}, 2nd edition, Chapman & Hall. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bict}}, \code{\link{bcct}}, \code{\link{print.pval}}. } \examples{ set.seed(1) ## Set seed for reproducibility data(spina) ## Load spina data test1<-bict(formula=y~(S1+S2+S3+eth)^2,data=spina,n.sample=50,prior="UIP") ## Do 50 iterations starting at maximal model containing all two-way interactions. test1p<-bayespval(object=test1,statistic="FreemanTukey",n.burnin=5) ## Use the Freeman-Tukey statistic and a burn-in phase of 5 iterations. test1p ## Will get following output #Under the Freeman-Tukey statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 2.812 4.695 5.190 5.777 6.405 14.490 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 4.566 4.861 5.197 5.430 6.108 6.460 # #Bayesian p-value = 0.4667 ## Can do a plot \dontrun{plot(test1p)} } conting/man/plot.pval.Rd0000644000176000001440000000135412502056666014737 0ustar ripleyusers\name{plot.pval} \alias{plot.pval} \title{ Plot \code{pval} Objects } \description{ This function plots objects of class \code{"pval"}. } \usage{ \method{plot}{pval}(x, ...) } \arguments{ \item{x}{ An object of class \code{"pval"}. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will produce a plot of \code{T_obs} against \code{T_pred} (see \code{\link{bayespval}}) along with a line through the origin with slope one. The proportion of points above the line (grey points) gives the Bayesian p-value. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{bayespval}}.} \seealso{ \code{\link{bayespval}} }conting/man/bict.fit.Rd0000644000176000001440000001612012502056666014517 0ustar ripleyusers\name{bict.fit} \alias{bict.fit} \title{ Bayesian Analysis of Incomplete Contingency Tables } \description{ This function is the workhorse behind \code{\link{bict}} and \code{\link{bictu}}. } \usage{ bict.fit(priornum, missing1, missing2, maximal.mod, IP, eta.hat, ini.index, ini.beta, ini.sig, ini.y0, iters, save, name, null.move.prob, a, b, progress) } \arguments{ \item{priornum}{ A numeric scalar indicating which prior is to be used: 1 = \code{"UIP"}, 2 = \code{"SBH"}. } \item{missing1}{ A vector of the same length as the number of missing cell counts giving the row numbers of the data.frame in \code{data} which correspond to the missing cell counts. } \item{missing2}{ A vector of the same length as the number of censored cell counts giving the row numbers of the data.frame in \code{data} which correspond to the censored cell counts. } \item{maximal.mod}{ An object of class \code{"glm"} giving the fit of the maximal model. } \item{IP}{ A p by p matrix giving the inverse of the prior scale matrix for the maximal model. } \item{eta.hat}{ A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model. } \item{ini.index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the initial model. } \item{ini.beta}{ A numeric vector giving the starting values of the log-linear parameters for the MCMC algorithm. } \item{ini.sig}{ A numeric scalar giving the starting value of sigma^2 for the MCMC algorithm. } \item{ini.y0}{ A numeric vector giving the starting values of the missing and censored cell entries for the MCMC algorithm. } \item{iters}{ The number of iterations of the MCMC algorithm to peform. } \item{save}{ If positive, the function will save the MCMC output to external text files every \code{save} iterations. If zero , the function will not save the MCMC output to external files. } \item{name}{ A prefix to the external files saved if the argument \code{save} is positive. If \code{NULL}, then the external files will have no prefix. } \item{null.move.prob}{ A scalar argument giving the probability of performing a null move, i.e. proposing a move to the current model. } \item{a}{ The shape hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } \item{b}{ The scale hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } \item{progress}{ Logical argument. If \code{TRUE}, then a progress bar will be displayed. } } \value{ The function will return a list with the following components. \item{BETA}{An \code{iters} by p matrix containing the sampled values of the log-linear parameters, where p is the number of log-linear parameters in the maximal model. For elements of this matrix which correspond to a log-linear parameter which is not present for the current model a zero is returned.} \item{MODEL}{A vector of length \code{iters} giving the sampled model indicators in hexadecimal format.} \item{SIG}{A vector of length \code{iters} giving the sampled values for sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one.} \item{Y0}{An \code{iters} by k matrix giving the sampled values of the missing and censored cell counts, where k is the total number of missing and censored cell counts.} \item{rj_acc}{A binary vector of the same length as the number of reversible jump moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{mh_acc}{A binary vector of the same length as the number of Metropolis-Hastings moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \seealso{ \code{\link{bict}}, \code{\link{bictu}}. } \examples{ data(spina) ## Load spina data. spina$z<-spina$y spina$z[is.na(spina$y)]<-0 ## Define a new variable in spina data.frame which is equal to y except where ## y is NA, in which case z=0. This is just so we can fit maximal model to the ## complete contingency table. maximal.mod<-glm(formula=z~(S1+S2+S3+eth)^2,data=spina,x=TRUE,y=TRUE, contrasts=list(S1="contr.sum",S2="contr.sum",S3="contr.sum", eth="contr.sum")) ## Fit maximal model to complete contingency table. curr.index<-formula2index(big.X=maximal.mod$x,formula=z~S1+S2+S3+eth,data=spina) ## Set up binary vector for independence model. IP<-t(maximal.mod$x)\%*\%maximal.mod$x/length(maximal.mod$y) IP[,1]<-0 IP[1,]<-0 ## Set up the inverse scale matrix for the prior distribution under ## the maximal model. bmod<-beta_mode(X=maximal.mod$x[!is.na(spina$y),],prior="UIP", y=maximal.mod$y[!is.na(spina$y)],IP=IP) ## Find the posterior mode under the maximal model fitted to observed cell counts. eta.hat<-as.vector(maximal.mod$x\%*\%bmod) ## Find the posterior mode of the linear predictor ## under the maximal model. set.seed(1) ## Set seed for reproducibility test1<-bict.fit(priornum=1, missing1=(1:length(maximal.mod$y))[is.na(spina$y)], missing2=NULL,maximal.mod=maximal.mod, IP=IP, eta.hat=eta.hat, ini.index=curr.index, ini.beta=bmod[curr.index==1], ini.sig=1, ini.y0=c(500,200,20),iters=10, save=0, name=NULL, null.move.prob=0.5, a=0.001, b=0.001, progress = FALSE) ## Run for 10 iterations starting at model defined by curr.index. test1$MODEL ## Look at sampled model indicators. Should be: # [1] "7e00" "7e00" "7e00" "7e00" "7e00" "7e00" "7e00" "7e00" "7f00" "7f00" model2index(test1$MODEL,dig=15) ## Convert these to binary indicators of the log-linear parameters. ## Will get: # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7e00 1 1 1 1 1 1 0 0 0 0 0 #7f00 1 1 1 1 1 1 1 0 0 0 0 #7f00 1 1 1 1 1 1 1 0 0 0 0 # [,12] [,13] [,14] [,15] #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7e00 0 0 0 0 #7f00 0 0 0 0 #7f00 0 0 0 0 }conting/man/sub_model.Rd0000644000176000001440000001350312502056666014770 0ustar ripleyusers\name{sub_model} \alias{sub_model} \title{ Compute Posterior Summary Statistics for (Sub-) Models } \description{ This function computes posterior summary statistics for (sub-) models using the MCMC output of \code{"bcct"} and \code{"bict"} objects. } \usage{ sub_model(object, formula = NULL, order = 1, n.burnin = 0, thin = 1, prob.level = 0.95, statistic = "X2") } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{formula}{ An optional argument of class \code{"formula"}: a symbolic description of the model of interest. The default value is \code{NULL}. If not \code{NULL} then this argument takes precedent over \code{order}. } \item{order}{ A scalar argument identifying the model for which to compute summary statistics. The function will compute statistics for the model with the \code{order}-th largest posterior model probability. The default value is 1, meaning that, by default, the function will compute summary statistics for the posterior modal model. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } \item{prob.level}{ An optional argument giving the probability content of the highest posterior density intervals (HPDIs). The default value is 0.95. } \item{statistic}{ An optional argument giving the discrepancy statistic to use for calculating the Bayesian p-value. It can be one of \code{c("X2","FreemanTukey","deviance")} which correspond to the different statistics: \code{"X2"} = Chi-squared statistic, \code{"FreemanTukey"} = Freeman-Tukey statistic, \code{"deviance"} = deviance statistic. See Overstall & King (2014), and references therein, for descriptions of these statistics. } } \details{ If the MCMC algorithm does not visit the model of interest in the thinned MCMC sample, after burn-in, then an error message will be returned. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ This function will return an object of class \code{"submod"} which is a list with the following components. Note that, unless otherwise stated, all components are conditional on the model of interest. \item{term}{A vector of term labels for each log-linear parameter.} \item{post_prob}{A scalar giving the posterior model probability for the model of interest.} \item{post_mean}{A vector of posterior means for each of the log-linear parameters.} \item{post_var}{A vector of posterior variances for each of the log-linear parameters.} \item{lower}{A vector of lower limits for the 100*\code{prob.level}\% HPDI for each log-linear parameter.} \item{upper}{A vector of upper limits for the 100*\code{prob.level}\% HPDI for each log-linear parameter.} \item{prob.level}{The argument \code{prob.level}.} \item{order}{The ranking of the model of interest in terms of posterior model probabilities.} \item{formula}{The formula of the model of interest.} \item{BETA}{A matrix containing the sampled values of the log-linear parameters, where the number of columns is the number of log-linear parameters in the model of interest.} \item{SIG}{A vector containing the sampled values of sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one.} If \code{object} is of class \code{"bict"}, then \code{sub_model} will also return the following component. \item{Y0}{A matrix (with k columns) containing the sampled values of the missing and censored cell counts, where k is the total number of missing and censored cell counts.} } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bcct}}, \code{\link{bict}}, } \examples{ set.seed(1) ## Set seed for reproducibility. data(AOH) ## Load the AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=100,prior="UIP") ## Let the maximal model be the saturated model. Starting from the ## posterior mode of the maximal model do 100 iterations under the unit ## information prior. test1sm<-sub_model(object=test1,order=1,n.burnin=10) ## Obtain posterior summary statistics for posterior modal model using a ## burnin of 10. test1sm #Posterior model probability = 0.5 # #Posterior summary statistics of log-linear parameters: # post_mean post_var lower_lim upper_lim #(Intercept) 2.907059 0.002311 2.81725 2.97185 #alc1 -0.023605 0.004009 -0.20058 0.06655 #alc2 -0.073832 0.005949 -0.22995 0.10845 #alc3 0.062491 0.006252 -0.09635 0.18596 #hyp1 -0.529329 0.002452 -0.63301 -0.43178 #obe1 0.005441 0.004742 -0.12638 0.12031 #obe2 -0.002783 0.004098 -0.17082 0.07727 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively # #Under the X2 statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 11.07 19.76 23.34 24.47 29.04 50.37 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 30.82 34.78 35.74 36.28 37.45 42.49 # #Bayesian p-value = 0.0444 } conting/man/mod_probs.Rd0000644000176000001440000000656212502056666015012 0ustar ripleyusers\name{mod_probs} \alias{mod_probs} \title{ Compute Posterior Model Probabilities } \description{ This function computes the posterior model probabilities using the MCMC output of \code{"bcct"} and \code{"bict"} objects. } \usage{ mod_probs(object, n.burnin = 0, scale = 0.1, best = NULL, thin = 1) } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{scale}{ An optional argument for controlling how the posterior model probabilities are returned as output. The function will return details on the models with the posterior model probability larger than \code{scale} times the probability of the posterior modal model. The default value is 0.1. } \item{best}{ An optional argument for controlling how the posterior model probabilities are returned as output. The function will return details on the \code{best} models with the highest posterior model probabilities. For example, if \code{best=4}, then details on the four models with the highest posterior model probabilities will be returned. The default value is \code{NULL}. If not \code{NULL}, then this argument takes precedent over \code{scale}. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } } \details{ It will output only the probabilities of the "best" models, as defined by the user specifying either the \code{best} or \code{scale} arguments. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ The function will return an object of class \code{"modprobs"} which is a list containing the following components. \item{table}{An object of class \code{"data.frame"} with number of rows defined by \code{scale} or \code{best} and columns: \code{model_formula}; giving the model (in terms of a printed formula), and \code{prob}; giving the posterior model probability.} \item{totmodsvisit}{A numeric scalar giving the total number of models visited after the burn-in iterations.} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bcct}}, \code{\link{bict}}, \code{\link{print.modprobs}}. } \examples{ set.seed(1) ## Set seed for reproducibility data(AOH) ## Load AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=100,prior="UIP") ## Starting from maximal model of saturated model do 100 iterations of MCMC ## algorithm. mod_probs(object=test1,n.burnin=10,best=6) ## Using a burn-in of 10 iterations find the posterior model probabilities ## of the 6 models with the highest posterior model probability. Will get: #Posterior model probabilities: # prob model_formula #1 0.50000 ~alc + hyp + obe #2 0.32222 ~alc + hyp + obe + hyp:obe #3 0.12222 ~alc + hyp + obe + alc:hyp + hyp:obe #4 0.05556 ~alc + hyp + obe + alc:hyp # #Total number of models visited = 4 ## Note that since the chain only visited 4 models we only get probabilities ## for 4 models not 6. } conting/man/add_term.Rd0000644000176000001440000001056012502056666014576 0ustar ripleyusers\name{add_term} \alias{add_term} \alias{drop_term} \alias{prop_mod} \title{ Determines Model Moves Given Current Model } \description{ These functions are used to detemine which models we can propose moves to, given the current model in the MCMC algorithm, and the principle of marginality. } \usage{ add_term(curr.index, data, maximal.mod) drop_term(curr.index, data, maximal.mod) prop_mod(curr.index,data,maximal.mod,null.move.prob=0.5) } \arguments{ \item{curr.index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the current model. } \item{data}{ An object of class \code{"data.frame"} containing the variables in the model. } \item{maximal.mod}{ An object of class \code{"glm"} giving the fit of the maximal model. } \item{null.move.prob}{ An optional scalar argument giving the probability of performing a null move, i.e. proposing a move to the current model. The default value is 0.5. } } \details{ In the reversible jump algorithm we propose a move to a model given the current model. The function \code{prop_mod} implements a scheme whereby only local proposals are made, i.e. either a term is added or dropped. These types of move are called birth and death moves, respectively, by Forster et al (2012). When a term is either added or dropped, we preserve the principle of marginality, e.g. we can only propose to add a three-way interaction if all the possible two-way interactions between the three factors are included in the present model. The functions \code{add_term} and \code{drop_term} determine which terms can be added or dropped whilst preserving the principle of marginality. The function \code{prop_mod} will call \code{add_term} and \code{drop_term} thus determining which terms can be added or dropped. With probability \code{null.move.prob} it will choose to remain in the current model; otherwise it will choose one of the possible terms to add or drop. } \value{ The functions \code{add_term} and \code{drop_term} will output a character vector containing the names of terms that can be dropped. The function \code{prop_mod} will return a list with the following components. \item{new.index}{A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the proposed model.} \item{type}{A character string which will be one of \code{c("null","drop","add")} depending on the type of move proposed.} \item{total.choices}{If \code{type} is not \code{"null"}, then \code{total.choices} will be scalar giving the total number of non-null moves available. If \code{type} is equal to \code{"null"}, then \code{total.choices} will be 0.} \item{null.move.prob}{A scalar giving the probability of a null move.} } \references{ Forster, J.J., Gill, R.C. & Overstall, A.M. (2012) Reversible jump methods for generalised linear models and generalised linear mixed models. \emph{Statistics and Computing}, \bold{22 (1)}, 107--120. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ data(AOH) ## Load the AOH data maximal.mod<-glm(formula=y~(alc+hyp+obe)^3,data=AOH,x=TRUE,y=TRUE, contrasts=list(alc="contr.sum",hyp="contr.sum",obe="contr.sum")) ## Set up the maximal model which in this case is the saturated model. curr.index<-formula2index(big.X=maximal.mod$x,formula=y~alc+hyp+obe+hyp:obe,data=AOH) ## Set up the binary vector for the model containing all main effects and the ## hyp:obe interaction. add_term(curr.index=curr.index,data=AOH,maximal.mod=maximal.mod) ## See what terms we can add - will get: #[1] "alc:hyp" "alc:obe" drop_term(curr.index=curr.index,data=AOH,maximal.mod=maximal.mod) ## See what terms we can drop - will get: #[1] "hyp:obe" set.seed(4) ## Set the seed for reproducibility. prop_mod(curr.index=curr.index,data=AOH,maximal.mod=maximal.mod) ## Propose a model. Will be a drop move, proposing the independence model by ## dropping the hyp:obe interaction. The total.choices object is 3, i.e. one ## drop move and two add moves. Specifically: #$new.index # [1] 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 # #$type #[1] "drop" # #$total.choices #[1] 3 # #$null.move.prob #[1] 0.5 } conting/man/inter_probs.Rd0000644000176000001440000000541012502056666015343 0ustar ripleyusers\name{inter_probs} \alias{inter_probs} \title{ Calculate Posterior Probability of Each Term } \description{ This function computes the posterior probability of each term using the MCMC output of \code{"bcct"} and \code{"bict"} objects. } \usage{ inter_probs(object, cutoff = 0.75, n.burnin = 0, thin = 1) } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{cutoff}{ An optional argument giving the cutoff posterior probability for displaying posterior summary statistics of the log-linear parameters. Only those log-linear parameters with a posterior probability greater than \code{cutoff} will be returned as part of the output. The default value is 0.75. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } } \details{ This function provides a scaled back version of what \code{\link{inter_stats}} provides. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ This function returns an object of class \code{"interprob"} which is a list with the following components. \item{term}{A vector of term labels.} \item{prob}{A vector of posterior probabilities.} \item{thin}{The value of the argument \code{thin}.} The function will only return elements in the above list if \code{prob} > \code{cutoff}. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bcct}}, \code{\link{bict}}, \code{\link{print.interprob}}, \code{\link{inter_stats}}. } \examples{ set.seed(1) ## Set seed for reproducibility data(AOH) ## Load AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=100,prior="UIP") ## Starting from maximal model of saturated model do 100 iterations of MCMC ## algorithm. inter_probs(test1,n.burnin=10,cutoff=0) ## Calculate posterior probabilities having used a burn-in phase of ## 10 iterations and a cutoff of 0 (i.e. display all terms with ## non-zero posterior probability). Will get the following: #Posterior probabilities of log-linear parameters: # post_prob #(Intercept) 1.0000 #alc 1.0000 #hyp 1.0000 #obe 1.0000 #alc:hyp 0.1778 #alc:obe 0.0000 #hyp:obe 0.4444 #alc:hyp:obe 0.0000 ## Note that the MCMC chain (after burn-in) does not visit any models ## with the alc:obe or alc:hyp:obe interactions. }conting/man/formula2index.Rd0000644000176000001440000000456512502056666015606 0ustar ripleyusers\name{formula2index} \alias{formula2index} \alias{index2formula} \title{ Convert Between Formula and Index } \description{ These functions will convert a formula object to a binary index and vice versa. } \usage{ formula2index(big.X, formula, data) index2formula(index, maximal.mod) } \arguments{ \item{big.X}{ The design matrix under the maximal model. } \item{formula}{ An object of class \code{"formula"}: a symbolic description of the model to convert to a binary index. } \item{data}{ An object of class \code{"data.frame"} containing the variables in the model. } \item{maximal.mod}{ An object of class \code{"glm"} giving the fit of the maximal model. } \item{index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the model to be converted to a formula. } } \value{ The function \code{formula2index} will produce a binary vector of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the model represented by the argument \code{formula}. The function \code{index2formula} will produce an object of class \code{"formula"}: a symbolic description of the model given by the argument \code{index}. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ data(ScotPWID) ## Load the ScotPWID data maximal.mod<-glm(y~(S1+S2+S3+S4+Region+Gender+Age)^2,family=poisson,contrasts=list( S1="contr.sum",S2="contr.sum",S3="contr.sum",S4="contr.sum", Region="contr.sum",Gender="contr.sum",Age="contr.sum"),data=ScotPWID,x=TRUE) ## Fit the maximal model containing all two-way interactions. big.X<-maximal.mod$x ## Set the design matrix under the maximal model index<-formula2index(big.X=big.X, formula=~S1+S2+S3+S4+Region+Gender+Age+S1:S2+S1:Age+S2:Gender+S3:S4+S4:Age, data=ScotPWID) ## Find the index under the model with the following interactions: ## S1:S2 ## S1:Age ## S2:Gender ## S3:S4 ## S4:Age index ## Print index # [1] 1 1 1 1 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 index2formula(index=index,maximal.mod=maximal.mod) ## Go back to formula #y ~ S1 + S2 + S3 + S4 + Region + Gender + Age + S1:S2 + S1:Age + # S2:Gender + S3:S4 + S4:Age }conting/man/total_pop.Rd0000644000176000001440000000517512502056666015026 0ustar ripleyusers\name{total_pop} \alias{total_pop} \title{ Evaluate Posterior Distribution of Total Population Size } \description{ This function uses the MCMC output of a \code{"bict"} object to derive an MCMC sample from the posterior distribution of the total population size. } \usage{ total_pop(object, n.burnin = 0, thin = 1, prob.level = 0.95) } \arguments{ \item{object}{ An object of class \code{"bict"}. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } \item{prob.level}{ An optional argument giving the target probability content of the highest posterior density intervals for the total population size. The default value is 0.95. } } \details{ The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ This function will return an object of class \code{"totpop"} which is a list with the following components. \item{TOT}{A vector of length (\code{n.sample}-\code{n.burnin}) giving the MCMC sample from the posterior distribution of the total population size.} \item{int}{The 100*\code{prob.level}\% highest posterior density interval (HPDI) for the total population size.} \item{meanTOT}{The posterior mean of the total population size.} \item{prob.level}{The argument \code{prob.level}.} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bict}}, \code{\link{print.totpop}}. } \examples{ set.seed(1) ## Set seed for reproducibility data(spina) ## Load spina data test1<-bict(formula=y~(S1+S2+S3+eth)^2,data=spina,n.sample=100,prior="UIP") ## For the spina dataset. We do 100 iterations under the unit information ## prior. The maximal model is the model with two-way interactions and we ## start from this model at the posterior model tp<-total_pop(test1,n.burnin=10) ## Use a burn-in phase of 10 iterations tp ## Print out results. Will get: #Posterior mean of total population size = 727.0667 #95 % highest posterior density interval for total population size = ( 706 757 ) ## Could do a plot \dontrun{plot(tp)} ## Do a summary of MCMC sample from total population size summary(tp$TOT) ## Will get # Min. 1st Qu. Median Mean 3rd Qu. Max. # 697.0 716.2 727.0 727.1 735.8 763.0 } conting/man/conting-package.Rd0000644000176000001440000001321512502056666016051 0ustar ripleyusers\name{conting-package} \alias{conting-package} \alias{conting} \docType{package} \title{ Bayesian Analysis of Complete and Incomplete Contingency Tables } \description{ Performs Bayesian analysis of complete and incomplete contingency tables incorporating model uncertainty using log-linear models. These analyses can be used to identify associations/interactions between categorical factors and to estimate unknown closed populations. } \details{ \tabular{ll}{ Package: \tab conting\cr Type: \tab Package\cr Version: \tab 1.5\cr Date: \tab 2014-03-17\cr License: \tab GPL-2\cr } For the Bayesian analysis of complete contingency tables the key function is \code{\link{bcct}} which uses MCMC methods to generate a sample from the joint posterior distribution of the model parameters and model indicator. Further MCMC iterations can be performed by using \code{\link{bcctu}}. For the Bayesian analysis of incomplete contingency tables the key function is \code{\link{bict}} which uses MCMC methods to generate a sample from the joint posterior distribution of the model parameters, model indicator and the missing, and, possibly, censored cell entries. Further MCMC iterations can be performed by using \code{\link{bictu}}. In both cases see Overstall & King (2014), and the references therein, for details on the statistical and computational methods, as well as detailed examples. } \author{ Antony M. Overstall \email{antony@mcs.st-and.ac.uk} Maintainer: Antony M. Overstall \email{antony@mcs.st-and.ac.uk} } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \keyword{ package } \examples{ set.seed(1) ## Set seed for reproducibility data(AOH) ## Load AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=100,prior="UIP") ## Bayesian analysis of complete contingency table. Let the saturated model ## be the maximal model and do 100 iterations. summary(test1) ## Summarise the result. Will get: #Posterior summary statistics of log-linear parameters: # post_prob post_mean post_var lower_lim upper_lim #(Intercept) 1 2.877924 0.002574 2.78778 2.97185 #alc1 1 -0.060274 0.008845 -0.27772 0.06655 #alc2 1 -0.049450 0.006940 -0.20157 0.11786 #alc3 1 0.073111 0.005673 -0.05929 0.20185 #hyp1 1 -0.544988 0.003485 -0.65004 -0.42620 #obe1 1 -0.054672 0.007812 -0.19623 0.12031 #obe2 1 0.007809 0.004127 -0.11024 0.11783 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively # #Posterior model probabilities: # prob model_formula #1 0.45 ~alc + hyp + obe #2 0.30 ~alc + hyp + obe + hyp:obe #3 0.11 ~alc + hyp + obe + alc:hyp + hyp:obe #4 0.06 ~alc + hyp + obe + alc:hyp + alc:obe + hyp:obe #5 0.05 ~alc + hyp + obe + alc:hyp # #Total number of models visited = 7 # #Under the X2 statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 11.79 20.16 23.98 24.70 28.77 52.40 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 8.18 24.22 31.51 30.12 35.63 42.49 # #Bayesian p-value = 0.28 set.seed(1) ## Set seed for reproducibility data(spina) ## Load spina data test2<-bict(formula=y~(S1+S2+S3+eth)^2,data=spina,n.sample=100,prior="UIP") ## Bayesian analysis of incomplete contingency table. Let the model with two-way ## interactions be the maximal model and do 100 iterations. summary(test2) ## Summarise the result. Will get: #Posterior summary statistics of log-linear parameters: # post_prob post_mean post_var lower_lim upper_lim #(Intercept) 1 1.0427 0.033967 0.6498 1.4213 #S11 1 -0.3159 0.015785 -0.4477 -0.1203 #S21 1 0.8030 0.018797 0.6127 1.1865 #S31 1 0.7951 0.003890 0.6703 0.8818 #eth1 1 2.8502 0.033455 2.4075 3.1764 #eth2 1 0.1435 0.072437 -0.4084 0.5048 #S21:S31 1 -0.4725 0.002416 -0.5555 -0.3928 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively # #Posterior model probabilities: # prob model_formula #1 0.36 ~S1 + S2 + S3 + eth + S2:S3 #2 0.19 ~S1 + S2 + S3 + eth + S2:S3 + S2:eth #3 0.12 ~S1 + S2 + S3 + eth + S1:eth + S2:S3 #4 0.12 ~S1 + S2 + S3 + eth + S1:S2 + S1:S3 + S1:eth + S2:S3 + S2:eth + S3:eth #5 0.10 ~S1 + S2 + S3 + eth + S1:S3 + S1:eth + S2:S3 #6 0.06 ~S1 + S2 + S3 + eth + S1:S3 + S1:eth + S2:S3 + S2:eth #Total number of models visited = 8 # #Posterior mean of total population size = 726.75 #95 % highest posterior density interval for total population size = ( 706 758 ) # #Under the X2 statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 8.329 15.190 20.040 22.550 24.180 105.200 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 5.329 18.270 22.580 21.290 24.110 37.940 # #Bayesian p-value = 0.45 } conting/man/bict.Rd0000644000176000001440000003127012502056666013741 0ustar ripleyusers\name{bict} \alias{bict} \alias{bictu} \title{ Bayesian Analysis of Incomplete Contingency Tables } \description{ These functions implement a Bayesian analysis of incomplete contingency tables. This is accomplished using a data augmentation MCMC algorithm where the null moves are performed using the Metropolis-Hastings algorithm and the between models moves are performed using the reversible jump algorithm. This function can also accomodate cases where one of the sources observes a mixture of individuals from target and non-target populations. This results in the some of the cell counts being censored. \code{bict} should be used initially, and \code{bictu} should be used to do additional MCMC iterations, if needed. } \usage{ bict(formula, data, n.sample, prior = "SBH", cens = NULL, start.formula = NULL, start.beta = NULL, start.sig = NULL, start.y0 = NULL, save = 0, name = NULL, null.move.prob=0.5, a = 0.001, b = 0.001, progress = FALSE) bictu(object, n.sample, save = NULL, name = NULL, progress = FALSE) } \arguments{ \item{formula}{ An object of class \code{"formula"}: a symbolic description of the maximal model.} \item{object}{An object of class \code{"bict"} produced as a previous call to \code{bict} or \code{bictu}.} \item{data}{An object of class \code{"data.frame"} (or \code{"table"}) containing the variables in the model. If the model variables are not found in \code{data}, the variables are taken from \code{environment(formula)}, typically the environment from which \code{bict} is called.} \item{n.sample}{ A numeric scalar giving the number of MCMC iterations to peform. } \item{prior}{ An optional argument giving the prior to be used in the analysis. It can be one of \code{c("UIP","SBH")}, where \code{"UIP"} = unit information prior; and \code{"SBH"} = Sabanes-Bove & Held prior. The default value is \code{"SBH"}. } \item{cens}{ A numeric vector indicating the row numbers of the data.frame in \code{data} which correspond to the censored cells. This can be found using the function \code{\link{find_cens}}. } \item{start.formula}{ An optional argument giving an object of class \code{"formula"}: a symbolic description of the starting model in the MCMC algorithm. If \code{NULL} (the default) the starting model will be the maximal model. } \item{start.beta}{ An optional argument giving the starting values of the log-linear parameters for the MCMC algorithm. It should be a vector of the same length as the number of log-linear parameters in the starting model implied by the argument \code{start.formula}. If \code{NULL} (the default) the starting value will be the posterior mode under the maximal model. } \item{start.sig}{ An optional argument giving the starting value of sigma^2 (under the Sabanes-Bove & Held prior) for the MCMC algorithm when the argument of prior is \code{"SBH"}. If \code{NULL} (the default) the starting value will be one. } \item{start.y0}{ An optional argument giving the starting values of the missing and censored cell counts. This should have the same length as the number of missing and censored cell counts. } \item{save}{ An optional argument for saving the MCMC output mid-algorithm. For \code{bict}, if positive, the function will save the MCMC output to external text files every \code{save} iterations. If zero (the default), the function will not save the MCMC output to external files. For \code{bictu}, if non-\code{NULL}, the function will save the MCMC output to external text files every \code{save} iterations. If \code{NULL} (the default), it will inherit the value of \code{save} from the previous call to \code{bict} or \code{bictu}. } \item{name}{ An optional argument giving a prefix to the external files saved if the argument \code{save} is positive. For \code{bict}, a value of \code{NULL} means the external files will not have a prefix. For \code{bictu}, a value of \code{NULL}, means the prefix will be inherited from the previous call to \code{bict} or \code{bictu}. } \item{null.move.prob}{ An optional scalar argument giving the probability of performing a null move in the reversible jump algorithm, i.e. proposing a move to the current model. The default value is 0.5. } \item{a}{ The shape hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). The default value is 0.001. } \item{b}{ The scale hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). The default value is 0.001. } \item{progress}{ Logical argument. If \code{TRUE}, then a progress bar will be displayed. The default value is \code{FALSE}. } } \details{ For identifiability, the parameters are constrained. The \code{\link{conting-package}} uses sum-to-zero constraints. See Overstall & King (2014), and the references therein, for more details. The Metropolis-Hastings algorithm employed is the iterated weighted least squares method for generalised linear models (GLMs) proposed by Gamerman (1997). The reversible jump algorithm employed is the orthogonal projections method for GLMs proposed by Forster et al (2012). For details on these methods applied to log-linear models through the data-augmentation algorithm see Overstall & King (2014), and the references therein. For details on the censored approach see Overstall et al (2014). For details on the unit information and Sabanes-Bove & Held priors for generalised linear models see Ntzoufras et al (2003) and Sabanes-Bove & Held (2011), respectively. See Overstall & King (2014), and the references therein, for their application to log-linear models and contingency tables. } \value{ The functions will return an object of class \code{"bict"} which is a list with the following components. \item{BETA}{ An \code{n.sample} by p matrix containing the sampled values of the log-linear parameters, where p is the number of log-linear parameters in the maximal model. For elements of this matrix which correspond to a log-linear parameter which is not present for the current model a zero is returned. } \item{MODEL}{ A vector of length \code{n.sample} giving the sampled model indicators in hexadecimal format. } \item{SIG}{ A vector of length \code{n.sample} giving the sampled values for sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one. } \item{Y0}{ An \code{n.sample} by k matrix giving the sampled values of the missing and censored cell counts, where k is the total number of missing and censored cell counts. } \item{missing1}{ A vector of the same length as the number of missing cell counts giving the row numbers of the \code{data.frame} in \code{data} (or the elements of the variables) which correspond to the missing cell counts. } \item{missing2}{ A vector of the same length as the number of censored cell counts giving the row numbers of the \code{data.frame} in \code{data} (or the elements of the variables) which correspond to the censored cell counts. } \item{missing_details}{ The rows of the \code{data.frame} in \code{data} (or the elements of the variables) which correspond to the missing cell counts. } \item{censored_details}{ The rows of the \code{data.frame} in \code{data} (or the elements of the variables) which correspond to the censored cell counts. } \item{rj_acc}{ A binary vector of the same length as the number of reversible jump moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted. } \item{mh_acc}{ A binary vector of the same length as the number of Metropolis-Hastings moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted. } \item{priornum}{ A numeric scalar indicating which prior was used: 1 = \code{"UIP"}, 2 = \code{"SBH"}. } \item{maximal.mod}{ An object of class \code{"glm"} giving the fit of the maximal model. } \item{IP}{ A p by p matrix giving the inverse of the prior scale matrix for the maximal model. } \item{eta.hat}{ A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model. } \item{save}{ The argument \code{save}. } \item{name}{ The argument \code{name}. } \item{null.move.prob}{ The argument \code{null.move.prob}.} \item{time}{ The total computer time (in seconds) used for the MCMC computations.} \item{a}{ The argument \code{a}. } \item{b}{ The argument \code{b}. } } \references{ Sabanes-Bove, D. & Held, L. (2011) Hyper-g priors for generalized linear models. \emph{Bayesian Analysis}, \bold{6 (3)}, 387--410. Forster, J.J., Gill, R.C. & Overstall, A.M. (2012) Reversible jump methods for generalised linear models and generalised linear mixed models. \emph{Statistics and Computing}, \bold{22 (1)}, 107--120. Gamerman, D. (1997) Sampling from the posterior distribution in generalised linear mixed models. \emph{Statistics and Computing}, \bold{7 (1)}, 57--68. Nztoufras, I., Dellaportas, P. & Forster, J.J. (2003) Bayesian variable and link determination for generalised linear models. \emph{Journal of Statistical Planning and Inference}, \bold{111 (1)}, 165--180. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} Overstall, A.M., King, R., Bird, S.M., Hutchinson, S.J. & Hay, G. (2014) Incomplete contingency tables with censored cells with application to estimating the number of people who inject drugs in Scotland. \emph{Statistics in Medicine}, \bold{33 (9)}, 1564--1579. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ These functions are wrappers for \code{\link{bict.fit}}. In Version 1.0 of \code{\link{conting-package}}, note that the default value for \code{prior} was \code{"UIP"}. From Version 1.1 onwards, the default value is \code{"SBH"}. } \seealso{ \code{\link{bict.fit}}, \code{\link{spina}}, \code{\link{ScotPWID}}. } \examples{ set.seed(1) ## Set seed for reproducibility. data(spina) ## Load the spina data test1<-bict(formula=y~(S1 + S2 + S3 + eth)^2,data=spina,n.sample=50, prior="UIP") ## Let the maximal model be the model with two-way interactions. Starting from the ## posterior mode of the model with two-way interactions, do 50 iterations under the ## unit information prior. test1<-bictu(object=test1,n.sample=50) ## Do another 50 iterations test1 #Number of cells in table = 24 # #Maximal model = #y ~ (S1 + S2 + S3 + eth)^2 # #Number of log-linear parameters in maximal model = 15 # #Number of MCMC iterations = 100 # #Computer time for MCMC = 00:00:01 # #Prior distribution for log-linear parameters = UIP # #Number of missing cells = 3 # #Number of censored cells = 0 summary(test1) ## Summarise the result. Will get: #Posterior summary statistics of log-linear parameters: # post_prob post_mean post_var lower_lim upper_lim #(Intercept) 1 1.0427 0.033967 0.6498 1.4213 #S11 1 -0.3159 0.015785 -0.4477 -0.1203 #S21 1 0.8030 0.018797 0.6127 1.1865 #S31 1 0.7951 0.003890 0.6703 0.8818 #eth1 1 2.8502 0.033455 2.4075 3.1764 #eth2 1 0.1435 0.072437 -0.4084 0.5048 #S21:S31 1 -0.4725 0.002416 -0.5555 -0.3928 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively # #Posterior model probabilities: # prob model_formula #1 0.36 ~S1 + S2 + S3 + eth + S2:S3 #2 0.19 ~S1 + S2 + S3 + eth + S2:S3 + S2:eth #3 0.12 ~S1 + S2 + S3 + eth + S1:eth + S2:S3 #4 0.12 ~S1 + S2 + S3 + eth + S1:S2 + S1:S3 + S1:eth + S2:S3 + S2:eth + S3:eth #5 0.10 ~S1 + S2 + S3 + eth + S1:S3 + S1:eth + S2:S3 #6 0.06 ~S1 + S2 + S3 + eth + S1:S3 + S1:eth + S2:S3 + S2:eth # #Total number of models visited = 8 # #Posterior mean of total population size = 726.75 #95 % highest posterior density interval for total population size = ( 706 758 ) # #Under the X2 statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 8.329 15.190 20.040 22.550 24.180 105.200 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 5.329 18.270 22.580 21.290 24.110 37.940 # #Bayesian p-value = 0.45 } conting/man/AOH.Rd0000644000176000001440000000321312502056666013423 0ustar ripleyusers\name{AOH} \alias{AOH} \docType{data} \title{ Alcohol, Obesity and Hypertension: A Complete 4 * 3 * 2 Table } \description{ 491 subjects are cross-classified according to the three factors: hypertension (\code{hyp}; 2 levels), obesity (\code{obe}; 3 levels) and alcohol (\code{alc}; 4 levels). There are a total of 24 cells in the table. } \usage{data(AOH)} \format{ A \code{"data.frame"} with 24 observations on the following 4 variables. \describe{ \item{\code{y}}{ Counts in each cell of table. } \item{\code{alc}}{ A factor with levels \code{0} \code{1-2} \code{3-5} \code{6+} indicating the classification of alcohol intake of drinks per day. } \item{\code{obe}}{ A factor with levels \code{low} \code{average} \code{high} indicating the classification of obesity. } \item{\code{hyp}}{ A factor with levels \code{yes} \code{no} indicating the classification of hypertension. } } } \details{ These data are from a study in Western Australia. The study copied a larger study from USA. See Knuiman & Speed (1988) for more details. For details on the function \code{\link{bcct}} applied to these data, see Overstall & King (2014). } \source{ Knuiman, M.W. & Speed, T.P. (1988) Incorporating Prior Information into the Analysis of Contingency Tables. \emph{Biometrics}, \bold{44 (4)}, 1061--1071. } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \examples{ data(AOH) summary(AOH) } \keyword{datasets} conting/man/print.acceptrate.Rd0000644000176000001440000000144512502056666016267 0ustar ripleyusers\name{print.acceptrate} \alias{print.acceptrate} \title{ Prints \code{acceptrate} Objects } \description{ This function prints objects of class \code{"acceptrate"}. } \usage{ \method{print}{acceptrate}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{ An object of class \code{"acceptrate"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will simply print out the acceptance rates for the reversible jump and Metropolis-Hastings algorithms. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{accept_rate}}.} \seealso{ \code{\link{accept_rate}} } conting/man/beta_mode.Rd0000644000176000001440000000723512502056666014743 0ustar ripleyusers\name{beta_mode} \alias{beta_mode} \title{ Posterior Mode } \description{ This function finds the posterior mode of the log-linear parameters of a log-linear model with a given design matrix and prior distribution. } \usage{ beta_mode(X, prior = "SBH", y, IP , a = 0.001 , b = 0.001) } \arguments{ \item{X}{ The n by p design matrix where n is the number of cells and p is the number of log-linear parameters. } \item{prior}{ The prior distribution. It can be one of \code{c("UIP","SBH")}, where \code{"UIP"} = unit information prior; and \code{"SBH"} = Sabanes-Bove & Held prior. The default value is \code{"SBH"}. } \item{y}{ The n by 1 vector of cell counts. } \item{IP}{ A p by p matrix giving the inverse of the prior scale matrix. } \item{a}{ The shape hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } \item{b}{ The scale hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } } \details{ The posterior mode is found by maximising the log unnormalised posterior pdf given by the sum of the log-likelihood and the log of the prior pdf. This optimisation is achieved using a quasi Newton-Raphson method. For details on the unit information and Sabanes-Bove & Held priors for generalised linear models see Ntzoufras et al (2003) and Sabanes-Bove & Held (2011), respectively. See Overstall & King (2014), and the references therein, for their application to log-linear models and contingency tables. The posterior mode is required for the reversible jump algorithm implemented from Forster et al (2012). } \value{ \code{beta_mode} will return a p by 1 vector containing the posterior mode of the log linear parameters.} \references{ Sabanes-Bove, D. & Held, L. (2011) Hyper-g priors for generalized linear models. \emph{Bayesian Analysis}, \bold{6 (3)}, 387--410. Forster, J.J., Gill, R.C. & Overstall, A.M. (2012) Reversible jump methods for generalised linear models and generalised linear mixed models. \emph{Statistics and Computing}, \bold{22 (1)}, 107--120. Nztoufras, I., Dellaportas, P. & Forster, J.J. (2003) Bayesian variable and link determination for generalised linear models. \emph{Journal of Statistical Planning and Inference}, \bold{111 (1)}, 165--180. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ data(AOH) ## loads the AOH data X<-model.matrix(~alc+hyp+obe,data=AOH, contrasts=list(alc="contr.sum",hyp="contr.sum",obe="contr.sum")) ## Sets up the design matrix for the independence model IP<-(t(X)\%*\%X)/dim(X)[1] ## Set up inverse of prior scale matrix beta_mode(X=X,prior="UIP",y=AOH$y,IP=IP) ## Finds the posterior mode of the log-linear parameters under the ## independence model with the unit information prior. Will get: #X(Intercept) Xalc1 Xalc2 Xalc3 Xhyp1 Xobe1 # 2.894270420 -0.045859743 -0.071775824 0.089541068 -0.504141954 0.008163604 # Xobe2 #-0.016327209 beta_mode(X=X,prior="SBH",y=AOH$y,IP=IP) ## Finds the posterior mode of the log-linear parameters under the ## independence model with the Sabanes-Bove & Held prior. Will get: #X(Intercept) Xalc1 Xalc2 Xalc3 Xhyp1 Xobe1 # 2.908298763 -0.043704371 -0.068212247 0.085338704 -0.473628107 0.007762839 # Xobe2 #-0.015525678 }conting/man/iwls_mh.Rd0000644000176000001440000000462412502056666014465 0ustar ripleyusers\name{iwls_mh} \alias{iwls_mh} \title{ Iterated Weighted Least Square Metropolis Hastings Algorithm } \description{ This function implements one iteration of the Iterated Weight Least Square Metropolis Hastings Algorithm as proposed by Gamerman (1997) for generalised linear models as applied to log-linear models. } \usage{ iwls_mh(curr.y, curr.X, curr.beta, iprior.var) } \arguments{ \item{curr.y}{ A vector of length n giving the cell counts. } \item{curr.X}{ An n by p design matrix for the current model, where p is the number of log-linear parameters. } \item{curr.beta}{ A vector of length p giving the current log-linear parameters. } \item{iprior.var}{ A p by p matrix giving the inverse of the prior variance matrix. } } \details{ For details of the original algorithm see Gamerman (1997). For its application to log-linear models see Overstall & King (2014), and the references therein. } \value{ The function will output a vector of length p giving the new values of the log-linear parameters. } \references{ Gamerman, D. (1997) Sampling from the posterior distribution in generalised linear mixed models. \emph{Statistics and Computing}, \bold{7 (1)}, 57--68. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ set.seed(1) ## Set seed for reproducibility data(AOH) ## Load AOH data maximal.mod<-glm(y~alc+hyp+obe,family=poisson,x=TRUE,contrasts=list(alc="contr.sum", hyp="contr.sum",obe="contr.sum"),data=AOH) ## Fit independence model to get a design matrix IP<-t(maximal.mod$x)\%*\%maximal.mod$x/length(AOH$y) IP[,1]<-0 IP[1,]<-0 ## Set up inverse prior variance matrix under the UIP ## Let the current parameters be the MLE under the independence model as.vector(coef(maximal.mod)) #[1] 2.89365105 -0.04594959 -0.07192507 0.08971628 -0.50545335 0.00818037 #[7] -0.01636074 ## Update parameters using MH algorithm iwls_mh(curr.y=AOH$y,curr.X=maximal.mod$x,curr.beta=coef(maximal.mod),iprior.var=IP) ## Will get: #[1] 2.86468919 -0.04218623 -0.16376055 0.21656167 -0.49528676 -0.05026597 #[7] 0.02726671 } conting/man/print.interprob.Rd0000644000176000001440000000142112502056666016152 0ustar ripleyusers\name{print.interprob} \alias{print.interprob} \title{ Print \code{interprob} Objects } \description{ This function prints objects of class \code{"interprob"}. } \usage{ \method{print}{interprob}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{ An object of class \code{"interprob"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will print out the posterior probability of each term (subject to the argument \code{cutoff}). } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{inter_probs}}.} \seealso{ \code{\link{inter_probs}}. } conting/man/accept_rate.Rd0000644000176000001440000000331012502056666015264 0ustar ripleyusers\name{accept_rate} \alias{accept_rate} \title{ Compute Acceptance Rates for Metropolis-Hastings and Reversible Jump Algorithms } \description{ This function computes the acceptance rates of the Metropolis-Hastings and reversible jump algorithms from the MCMC output of \code{bcct} and \code{bict} objects. } \usage{ accept_rate(object) } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } } \details{ Acceptance rates can be used to assess the performance of MCMC methods (in particular the peformance of the reversible jump method, Brooks et al, 2003). } \value{ This function will return an object of class \code{"acceptrate"} which is a list with the following components. \item{rj_ar}{Acceptance rate (as a \%) of the reversible jump algorithm.} \item{mh_ar}{Acceptance rate (as a \%) of the Metropolis-Hastings algorithm.} } \references{ Brooks, S.P., Giudici, P., & Roberts, G.O. (2003) Efficient construction of reversible jump Markov chain Monte Carlo proposal distributions. \emph{Journal of the Royal Statistical Society, Series B}, \bold{65 (1)}, 3--55. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{print.acceptrate}}, \code{\link{bcct}}, \code{\link{bict}}. } \examples{ set.seed(1) ## set a seed for reproducibility data(AOH) test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=500,prior="UIP") ## Create a bcct object for the AOH dataset for a very small number of ## iterations (500). accept_rate(test1) ## Calculate accept rates. Will get: #Acceptance rate of reversible jump proposals = 32.5581 % #Acceptance rate of Metropolis-Hastings proposals = 76.8595 % } conting/man/print.modprobs.Rd0000644000176000001440000000144312502056666015777 0ustar ripleyusers\name{print.modprobs} \alias{print.modprobs} \title{ Print \code{modprobs} Objects } \description{ This function prints objects of class \code{"modprobs"}. } \usage{ \method{print}{modprobs}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{ An object of class \code{"modprobs"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will print out the posterior model probability of the "best" models as defined by the arguments \code{best} or \code{scale}. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{mod_probs}}.} \seealso{ \code{\link{mod_probs}}. }conting/man/print.submod.Rd0000644000176000001440000000243112502056666015441 0ustar ripleyusers\name{print.submod} \alias{print.submod} \title{ Print \code{submod} Objects } \description{ This function prints objects of class \code{"submod"}. } \usage{ \method{print}{submod}(x, ..., digits = max(3, getOption("digits") - 3)) } \arguments{ \item{x}{ An object of class \code{"submod"}. } \item{\dots}{ Arguments to be passed to and from other methods. } \item{digits}{ An optional argument controlling the rounding of output. } } \value{ Firstly, conditional on the model of interest (defined by \code{formula} and \code{order}), this function will print out the posterior means, posterior variances and 100*\code{prob.level}\% highest posterior density intervals (HPDIs) for each of the log-linear parameters. Secondly, conditional on the model of interest, it will print out summaries of the discrepancy statistics and the corresponding Bayesian p-value. Finally, if the class of the object passed to \code{sub_model} is \code{"bict"}, then it will print out the posterior mean and 100*\code{prob.level}\% HPDI for the total population size, conditional on the model of interest. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{sub_model}}.} \seealso{ \code{\link{sub_model}}. }conting/man/print.interstat.Rd0000644000176000001440000000161712502056666016172 0ustar ripleyusers\name{print.interstat} \alias{print.interstat} \title{ Print \code{interstat} Objects } \description{ This function prints objects of class \code{"interstat"}. } \usage{ \method{print}{interstat}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{ An object of class \code{"interstat"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will print out the posterior probability, posterior mean, posterior variance and the 100*\code{prob.level}\% highest posterior density intervals (HPDIs) of each log-linear parameter (subject to the argument \code{cutoff}). } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{inter_stats}}.} \seealso{ \code{\link{inter_stats}}. }conting/man/inter_stats.Rd0000644000176000001440000000741212502056666015360 0ustar ripleyusers\name{inter_stats} \alias{inter_stats} \title{ Compute Posterior Summary Statistics of the Log-Linear Parameters. } \description{ This function computes the posterior summary statistics of the log-linear parameters using the MCMC output of \code{"bcct"} and \code{"bict"} objects. The posterior summary statistics are posterior probability, posterior mean, posterior variance and lower and upper limits highest posterior density intervals (HPDIs). } \usage{ inter_stats(object, cutoff = 0.75, n.burnin = 0, thin = 1, prob.level = 0.95) } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{cutoff}{ An optional argument giving the cutoff posterior probability for displaying posterior summary statistics of the log-linear parameters. Only those log-linear parameters with a posterior probability greater than \code{cutoff} will be returned as part of the output. The default value is 0.75. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } \item{prob.level}{ An optional argument giving the probability content of the HPDIs. The default value is 0.95. } } \details{ This function provides an expanded version of what \code{\link{inter_probs}} provides. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ This function will return an object of class \code{"interstat"} which is a list with the following components: \item{term}{A vector of term labels for each parameter.} \item{prob}{A vector of posterior probabilities for each parameter.} \item{post_mean}{A vector of posterior means for each parameter.} \item{post_var}{A vector of posterior variances for each parameter.} \item{lower}{A vector of lower limits for the 100*\code{prob.level}\% HPDI for each parameter.} \item{upper}{A vector of upper limits for the 100*\code{prob.level}\% HPDI for each parameter.} \item{prob.level}{The argument \code{prob.level}.} The function will only return elements in the above list if \code{prob} > \code{cutoff}. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bcct}}, \code{\link{bict}}, \code{\link{print.interstat}} \code{\link{inter_probs}} } \examples{ set.seed(1) ## Set seed for reproducibility data(AOH) ## Load AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=100,prior="UIP") ## Starting from maximal model of saturated model do 100 iterations of MCMC ## algorithm. inter_stats(test1,n.burnin=10,cutoff=0.5) ## Calculate posterior summary statistics having used a burn-in phase of ## 10 iterations and a cutoff of 0 (i.e. display all terms with ## non-zero posterior probability. Will get the following: #Posterior summary statistics of log-linear parameters: # post_prob post_mean post_var lower_lim upper_lim #(Intercept) 1 2.88291 0.002565 2.78778 2.97185 #alc1 1 -0.05246 0.008762 -0.27772 0.06655 #alc2 1 -0.05644 0.006407 -0.20596 0.11786 #alc3 1 0.06822 0.005950 -0.09635 0.18596 #hyp1 1 -0.53895 0.003452 -0.63301 -0.39888 #obe1 1 -0.04686 0.007661 -0.20929 0.12031 #obe2 1 0.01395 0.004024 -0.11024 0.11783 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively } conting/man/summary.bcct.Rd0000644000176000001440000001643612502056666015436 0ustar ripleyusers\name{summary.bcct} \alias{summary.bcct} \alias{print.sbcct} \alias{summary.bict} \alias{print.sbict} \title{ Summary of \code{bcct} and \code{bict} Objects } \description{ These functions produce summaries of objects of class \code{"bcct"} and \code{"bict"}. They also control how these summaries are printed. } \usage{ \method{summary}{bcct}(object, n.burnin = 0, thin = 1, cutoff = 0.75, statistic = "X2", best = NULL, scale = 0.1, prob.level = 0.95, ...) \method{print}{sbcct}(x, ..., digits = max(3, getOption("digits") - 3)) \method{summary}{bict}(object, n.burnin = 0, thin = 1, cutoff = 0.75, statistic = "X2", best = NULL, scale = 0.1, prob.level = 0.95, ...) \method{print}{sbict}(x, ..., digits = max(3, getOption("digits") - 3)) } \arguments{ \item{object}{ An object of class \code{"bcct"} or \code{"bict"}. } \item{x}{ An object of class \code{"sbcct"} or \code{"sbict"} produced as a result of a call to the functions \code{summary.bcct} or \code{summary.bict}, respectively. } \item{n.burnin}{ An optional argument giving the number of iterations to use as burn-in. The default value is 0. } \item{thin}{ An optional argument giving the amount of thinning to use, i.e. the computations are based on every \code{thin}-th value in the MCMC sample. The default value is 1, i.e. no thinning. } \item{cutoff}{ An optional argument giving the cutoff posterior probability for displaying posterior summary statistics of the log-linear parameters. Only those log-linear parameters with a posterior probability greater than \code{cutoff} will be returned as part of the output. The default value is 0.75. } \item{statistic}{ An optional argument giving the discrepancy statistic to use for calculating the Bayesian p-value. It can be one of \code{c("X2","FreemanTukey","deviance")} which correspond to the different statistics: \code{"X2"} = Chi-squared statistic, \code{"FreemanTukey"} = Freeman-Tukey statistic, \code{"deviance"} = deviance statistic. See Overstall & King (2014), and references therein, for descriptions of these statistics. } \item{best}{ An optional argument for controlling how the posterior model probabilities are returned as output. The function will return details on the \code{best} models with the highest posterior model probabilities. The default value is \code{NULL}. If not \code{NULL} than this argument takes precedent over \code{scale}. } \item{scale}{ An optional argument for controlling how the posterior model probabilities are returned as output. The function will return details on the models with the posterior model probability larger than \code{scale} times the probability of the posterior modal model. The default value is 0.1. } \item{prob.level}{ An optional argument giving the probability content of the highest posterior density intervals (HPDIs). The default value is 0.95. } \item{digits}{ An optional argument controling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \details{The functions \code{summary.bcct} and \code{summary.bict} rely on the functions \code{\link{inter_stats}}, \code{\link{mod_probs}}, \code{\link{bayespval}}, and (in the case of \code{summary.bict}) \code{\link{total_pop}}. For extra information about the output from these functions, see the associated help files. The use of thinning is recommended when the number of MCMC iterations and/or the number of log-linear parameters in the maximal model are/is large, which may cause problems with comuter memory storage. } \value{ The function \code{summary.bcct} will return an object of class \code{"sbcct"} which is a list with the following components. \item{BETA}{An \code{n.sample} by p matrix containing the sampled values of the log-linear parameters, where p is the number of log-linear parameters in the maximal model. For elements of this matrix which correspond to a log-linear parameter which is not present for the current model a zero is returned.} \item{MODEL}{A vector of length \code{n.sample} giving the samlpled model indicators in hexadecimal format.} \item{SIG}{A vector of length \code{n.sample} giving the sampled values for sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one.} \item{rj_acc}{A binary vector of the same length as the number of reversible jump moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{mh_acc}{A binary vector of the same length as the number of Metropolis-Hastings moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{priornum}{A numeric scalar indicating which prior was used: 1 = \code{"UIP"}, 2 = \code{"SBH"}.} \item{maximal.mod}{An object of class \code{"glm"} giving the fit of the maximal model.} \item{IP}{A p by p matrix giving the inverse of the prior scale matrix for the maximal model.} \item{eta.hat}{A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model.} \item{save}{The argument \code{save}.} \item{name}{The argument \code{name}.} \item{int_stats}{A list which contains the same components as an object of class \code{"interstat"}, i.e. summary statistics for the log-linear parameters, see \code{\link{inter_stats}}.} \item{mod_stats}{A list which contains the same components as an object of class \code{"modprobs"}, i.e. summary statistics for the posterior model probabilities, see \code{\link{mod_probs}}.} \item{pval_stats}{A list which contains the same components as an object of class \code{"pval"}, i.e. summary statistics for the posterior model probabilities, see \code{\link{bayespval}}.} The function \code{summary.bict} will return an object of class \code{"sbict"} which is a list with the same components as an object of class \code{"sbcct"} and the following additional components. \item{Y0}{An \code{n.sample} by k matrix giving the sampled values of the missing and censored cell counts, where k is the total number of missing and censored cell counts.} \item{tpop_stats}{A list which contains the same components as an object of class \code{"totpop"}, i.e. posterior summary statistics for the total population, see \code{\link{total_pop}}.} The functions \code{\link{print.sbcct}} and \code{\link{print.sbict}} will print out the MCMC acceptance rates, posterior summary statistics for the log-linear parameters, the posterior model probabilities, the Bayesian p-value and (in the case of \code{\link{print.sbict}}) posterior summary statistics for the total population size. } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ For examples see the help files for \code{\link{bcct}} and \code{\link{bict}}. } \seealso{ \code{\link{bcct}}, \code{\link{bict}}, \code{\link{accept_rate}}, \code{\link{bayespval}}, \code{\link{inter_stats}}, \code{\link{mod_probs}}, \code{\link{total_pop}}. } conting/man/print.totpop.Rd0000644000176000001440000000143512502056666015500 0ustar ripleyusers\name{print.totpop} \alias{print.totpop} \title{ Print \code{totpop} Objects } \description{ This function prints objects of class \code{"totpop"}. } \usage{ \method{print}{totpop}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{ An object of class \code{"totpop"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ The function will print out the posterior mean and the 100*\code{prob.level}\% highest posterior density interval for the total population size. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{total_pop}}.} \seealso{ \code{\link{total_pop}}. }conting/man/index2model.Rd0000644000176000001440000000430312502056666015227 0ustar ripleyusers\name{index2model} \alias{index2model} \alias{model2index} \title{ Convert Between Index and Model Indicator } \description{ These functions convert the binary vector, indicating which terms are in the current model, to the hexadecimal model indicator, and vice versa. } \usage{ index2model(index) model2index(model,dig) } \arguments{ \item{index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the model to be converted to a hexadecimal. } \item{dig}{A scalar argument giving the number of columns of the design matrix for the maximal model. } \item{model}{A character string giving a hexadecimal model indicator. } } \value{ \code{index2model} will return a hexadecimal model indicator. \code{model2index} will return a binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the model converted from hexadecimal. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \examples{ data(ScotPWID) ## Load the ScotPWID data maximal.mod<-glm(y~(S1+S2+S3+S4+Region+Gender+Age)^2,family=poisson,contrasts=list( S1="contr.sum",S2="contr.sum",S3="contr.sum",S4="contr.sum", Region="contr.sum",Gender="contr.sum",Age="contr.sum"),data=ScotPWID,x=TRUE) ## Fit the maximal model containing all two-way interactions. big.X<-maximal.mod$x ## Set the design matrix under the maximal model index<-formula2index(big.X=big.X, formula=~S1+S2+S3+S4+Region+Gender+Age+S1:S2+S1:Age+S2:Gender+S3:S4+S4:Age, data=ScotPWID) ## Find the index under the model with the following interactions: ## S1:S2 ## S1:Age ## S2:Gender ## S3:S4 ## S4:Age index ## Print the index, will get: # [1] 1 1 1 1 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 modind<-index2model(index) ## Find the hexadecimal model indicator modind ## Print it, will get: #[1] "1ff08a08" ## Convert back to index model2index(model=modind,dig=length(index)) ## Will get: # [1] 1 1 1 1 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 } conting/man/bcct.Rd0000644000176000001440000002460512502056666013737 0ustar ripleyusers\name{bcct} \alias{bcct} \alias{bcctu} \title{ Bayesian Analysis of Complete Contingency Tables } \description{ These functions implement a Bayesian analysis of complete contingency tables. This is accomplished using an MCMC algorithm where the null moves are performed using a Metropolis-Hastings algorithm and the between models moves are performed using a reversible jump algorithm. \code{bcct} should be used initially, and \code{bcctu} should be used to do additional MCMC iterations, if required. } \usage{ bcct(formula, data, n.sample, prior = "SBH", start.formula = NULL, start.beta = NULL, start.sig = NULL, save = 0, name = NULL, null.move.prob=0.5, a = 0.001, b = 0.001, progress = FALSE) bcctu(object, n.sample, save = NULL, name = NULL, progress = FALSE) } \arguments{ \item{formula}{An object of class \code{"formula"}: a symbolic description of the maximal model.} \item{object}{An object of class \code{"bcct"} produced as a previous call to \code{bcct} or \code{bcctu}.} \item{data}{An object of class \code{"data.frame"} (or \code{"table"}) containing the variables in the model. If the model variables are not found in \code{data}, the variables are taken from \code{environment(formula)}, typically the environment from which \code{bcct} is called.} \item{n.sample}{ A numeric scalar giving the number of (additional, in the case of \code{bcctu}) MCMC iterations to perform. } \item{prior}{An optional argument giving the prior to be used in the analysis. It can be one of \code{c("UIP","SBH")}, where \code{"UIP"} = unit information prior; and \code{"SBH"} = Sabanes-Bove & Held prior. The default value is \code{"SBH"}. } \item{start.formula}{ An optional argument giving an object of class \code{"formula"}: a symbolic description of the starting model in the MCMC algorithm. If \code{NULL} (the default) the starting model will be the maximal model. } \item{start.beta}{ An optional argument giving the starting values of the log-linear parameters for the MCMC algorithm. It should be a vector of the same length as the number of log-linear parameters in the starting model implied by the argument \code{start.formula}. If \code{NULL} (the default) the starting value will be the posterior mode under the maximal model. } \item{start.sig}{ An optional argument giving the starting value of sigma^2 (under the Sabanes-Bove & Held prior) for the MCMC algorithm when the argument of prior is \code{"SBH"}. If \code{NULL} (the default) the starting value will be one. } \item{save}{ An optional argument for saving the MCMC output mid-algorithm. For \code{bcct}, if positive, the function will save the MCMC output to external text files every \code{save} iterations. If zero (the default), the function will not save the MCMC output to external files. For \code{bcctu}, if non-\code{NULL}, the function will save the MCMC output to external text files every \code{save} iterations. If \code{NULL} (the default), it will inherit the value of \code{save} from the previous call to \code{bcct} or \code{bcctu}. } \item{name}{ An optional argument giving a prefix to the file name of the external files saved if the argument \code{save} is positive. For \code{bcct}, a value of \code{NULL} means the external files will not have a prefix. For \code{bcctu}, a value of \code{NULL}, means the prefix will be inherited from the previous call to \code{bcct} or \code{bcctu}. } \item{null.move.prob}{ An optional scalar argument giving the probability of performing a null move in the reversible jump algorithm, i.e. proposing a move to the current model. The default value is 0.5. } \item{a}{ The shape hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). The default value is 0.001. } \item{b}{ The scale hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). The default value is 0.001. } \item{progress}{ Logical argument. If \code{TRUE}, then a progress bar will be displayed. The default value is \code{FALSE}. } } \details{ For identifiability, the parameters are constrained. The \code{\link{conting-package}} uses sum-to-zero constraints. See Overstall & King (2014), and the references therein, for more details. The Metropolis-Hastings algorithm employed is the iterated weighted least squares method for generalised linear models (GLMs) proposed by Gamerman (1997). The reversible jump algorithm employed is that orthogonal projections method for GLMs proposed by Forster et al (2012). For details on these methods applied to log-linear models see Overstall & King (2014), and the references therein. For details on the unit information and Sabanes-Bove & Held priors for generalised linear models see Ntzoufras et al (2003) and Sabanes-Bove & Held (2011), respectively. See Overstall & King (2014), and the references therein, for their application to log-linear models and contingency tables. } \value{ The functions will return an object of class \code{"bcct"} which is a list with the following components: \item{BETA}{An \code{n.sample} by p matrix containing the sampled values of the log-linear parameters, where p is the number of log-linear parameters in the maximal model. For elements of this matrix which correspond to a log-linear parameter which is not present for the current model a zero is returned.} \item{MODEL}{A vector of length \code{n.sample} giving the sampled model indicators in hexadecimal format.} \item{SIG}{A vector of length \code{n.sample} giving the sampled values for sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one.} \item{rj_acc}{A binary vector of the same length as the number of reversible jump moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{mh_acc}{A binary vector of the same length as the number of Metropolis-Hastings moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{priornum}{A numeric scalar indicating which prior was used: 1 = \code{"UIP"}, 2 = \code{"SBH"}.} \item{maximal.mod}{An object of class \code{"glm"} giving the fit of the maximal model.} \item{IP}{A p by p matrix giving the inverse of the prior scale matrix for the maximal model.} \item{eta.hat}{A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model.} \item{save}{The argument \code{save}.} \item{name}{The argument \code{name}.} \item{null.move.prob}{ The argument \code{null.move.prob}.} \item{time}{ The total computer time (in seconds) used for the MCMC computations.} \item{a}{ The argument \code{a}. } \item{b}{ The argument \code{b}. } } \references{ Sabanes-Bove, D. & Held, L. (2011) Hyper-g priors for generalized linear models. \emph{Bayesian Analysis}, \bold{6 (3)}, 387--410. Forster, J.J., Gill, R.C. & Overstall, A.M. (2012) Reversible jump methods for generalised linear models and generalised linear mixed models. \emph{Statistics and Computing}, \bold{22 (1)}, 107--120. Gamerman, D. (1997) Sampling from the posterior distribution in generalised linear mixed models. \emph{Statistics and Computing}, \bold{7 (1)}, 57--68. Nztoufras, I., Dellaportas, P. & Forster, J.J. (2003) Bayesian variable and link determination for generalised linear models. \emph{Journal of Statistical Planning and Inference}, \bold{111 (1)}, 165--180. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ These functions are wrappers for \code{\link{bcct.fit}}. In Version 1.0 of \code{\link{conting-package}}, note that the default value for \code{prior} was \code{"UIP"}. From Version 1.1 onwards, the default value is \code{"SBH"}. } \seealso{ \code{\link{bcct.fit}}, \code{\link{AOH}}, \code{\link{heart}}. } \examples{ set.seed(1) ## Set seed for reproducibility. data(AOH) ## Load the AOH data test1<-bcct(formula=y~(alc+hyp+obe)^3,data=AOH,n.sample=50,prior="UIP") ## Let the maximal model be the saturated model. Starting from the ## posterior mode of the maximal model do 50 iterations under the unit ## information prior. test1<-bcctu(object=test1,n.sample=50) ## Do another 50 iterations test1 ## Printing out a bcct object produces this simple summary #Number of cells in table = 24 # #Maximal model = #y ~ (alc + hyp + obe)^3 # #Number of log-linear parameters in maximal model = 24 # #Number of MCMC iterations = 100 # #Computer time for MCMC = 00:00:01 # #Prior distribution for log-linear parameters = UIP summary(test1) ## Printing out a summary produces a bit more: #Posterior summary statistics of log-linear parameters: # post_prob post_mean post_var lower_lim upper_lim #(Intercept) 1 2.877924 0.002574 2.78778 2.97185 #alc1 1 -0.060274 0.008845 -0.27772 0.06655 #alc2 1 -0.049450 0.006940 -0.20157 0.11786 #alc3 1 0.073111 0.005673 -0.05929 0.20185 #hyp1 1 -0.544988 0.003485 -0.65004 -0.42620 #obe1 1 -0.054672 0.007812 -0.19623 0.12031 #obe2 1 0.007809 0.004127 -0.11024 0.11783 #NB: lower_lim and upper_lim refer to the lower and upper values of the #95 % highest posterior density intervals, respectively # #Posterior model probabilities: # prob model_formula #1 0.45 ~alc + hyp + obe #2 0.30 ~alc + hyp + obe + hyp:obe #3 0.11 ~alc + hyp + obe + alc:hyp + hyp:obe #4 0.06 ~alc + hyp + obe + alc:hyp + alc:obe + hyp:obe #5 0.05 ~alc + hyp + obe + alc:hyp # #Total number of models visited = 7 # #Under the X2 statistic # #Summary statistics for T_pred # Min. 1st Qu. Median Mean 3rd Qu. Max. # 11.79 20.16 23.98 24.70 28.77 52.40 # #Summary statistics for T_obs # Min. 1st Qu. Median Mean 3rd Qu. Max. # 8.18 24.22 31.51 30.12 35.63 42.49 # #Bayesian p-value = 0.28 ## For more examples see Overstall & King (2014). } conting/man/find_cens.Rd0000644000176000001440000000712212502056666014747 0ustar ripleyusers\name{find_cens} \alias{find_cens} \title{ Find Censored Cells } \description{ Given all the sources and the censored source of an incomplete contingency table, this function will find the censored cells. } \usage{ find_cens(sources, cens_source, data=NULL, unobs.level = "un", obs.level = "obs") } \arguments{ \item{sources}{ An object of class \code{"formula"}, which details the sources in the incomplete contingency table. } \item{cens_source}{ An object of class \code{"formula"}, which details the source which is subject to censoring in the incomplete contingency table. } \item{data}{ An object of class \code{"data.frame"} (or \code{"table"}) containing the variables in the model. If the model variables are not found in \code{data}, the variables are taken from \code{environment(formula)}, typically the environment from which \code{find_cens} is called. } \item{unobs.level}{ The character string used to label the source level corresponding to not observing the individuals in the cell. } \item{obs.level}{ The character string used to label the source level corresponding to observing the individuals in the cell. } } \details{ Sometimes one of the sources (termed the censored source) used to estimate closed populations observes individuals which are not members of the target population. In this case we assume that when this source observes an individual that has been observed by at least one other source, then it is a member of the target population. However those individuals only observed by the censored source contain a mixture of members of the target and non-target populations. This means that the observed cell count acts as an upper bound on the true cell count. For more details on this approach, see Overstall et al (2014) and Overstall & King (2014). This function identifies the cells which are censored (i.e. correspond to only being observed by the censored source). } \value{ The function will output a numeric vector containing the cell numbers of the censored cells. These are used by the \code{\link{bict}} and \code{\link{bictu}} functions. } \references{ Overstall, A.M., King, R., Bird, S.M., Hutchinson, S.J. & Hay, G. (2014) Incomplete contingency tables with censored cells with application to estimating the number of people who inject drugs in Scotland. \emph{Statistics in Medicine}, \bold{33 (9)}, 1564--1579. Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \seealso{ \code{\link{bict}}. } \examples{ data(ScotPWID) ## Load the ScotPWID data. In this dataset, the S4 source corresponding ## to the HCV database is subject to censoring. We use find_cens to find ## the censored cells. find_cens(sources=~S1+S2+S3+S4,cens_source=~S4,data=ScotPWID) ## It will produce the vector with the following elements: ##[1] 9 25 41 57 73 89 105 121 ## Let's look at these cells ScotPWID[find_cens(sources=~S1+S2+S3+S4,cens_source=~S4,data=ScotPWID),] ## It will produce: # y S1 S2 S3 S4 Region Gender Age #9 122 un un un obs GGC Male Young #25 135 un un un obs GGC Male Old #41 48 un un un obs GGC Female Young #57 38 un un un obs GGC Female Old #73 134 un un un obs Rest Male Young #89 104 un un un obs Rest Male Old #105 78 un un un obs Rest Female Young #121 25 un un un obs Rest Female Old }conting/man/print.pval.Rd0000644000176000001440000000160312502056666015112 0ustar ripleyusers\name{print.pval} \alias{print.pval} \title{ Print \code{pval} Objects } \description{ This function prints objects of class \code{"pval"}. } \usage{ \method{print}{pval}(x, digits = max(3, getOption("digits") - 3), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ An object of class \code{"pval"}. } \item{digits}{ An optional argument controlling the rounding of output. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will print out summary statistics for the predictive (\code{T_pred}) and observed (\code{T_obs}) discrepancy statistics. Additionally it will output the associated Bayesian p-value. } \author{ Antony M. Overstall (\email{Antony.Overstall@glasgow.ac.uk}). } \note{For an example see \code{\link{bayespval}}.} \seealso{ \code{\link{bayespval}} } conting/man/plot.totpop.Rd0000644000176000001440000000120712502056666015317 0ustar ripleyusers\name{plot.totpop} \alias{plot.totpop} \title{ Plot \code{totpop} Objects } \description{ This function plots objects of class \code{"totpop"}. } \usage{ \method{plot}{totpop}(x, ...) } \arguments{ \item{x}{ An object of class \code{"totpop"}. } \item{\dots}{ Arguments to be passed to and from other methods. } } \value{ This function will produce a histogram of the MCMC sample from the posterior distribution of the total population size. } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{For an example see \code{\link{total_pop}}.} \seealso{ \code{\link{total_pop}} }conting/man/bcct.fit.Rd0000644000176000001440000001401312502056666014510 0ustar ripleyusers\name{bcct.fit} \alias{bcct.fit} \title{ Bayesian Analysis of Complete Contingency Tables } \description{ This function is the workhorse behind \code{\link{bcct}} and \code{\link{bcctu}}. } \usage{ bcct.fit(priornum, maximal.mod, IP, eta.hat, ini.index, ini.beta, ini.sig, iters, save, name, null.move.prob, a, b, progress) } \arguments{ \item{priornum}{ A numeric scalar indicating which prior is to be used: 1 = \code{"UIP"}, 2 = \code{"SBH"}. } \item{maximal.mod}{ An object of class \code{"glm"} giving the fit of the maximal model. } \item{IP}{ A p by p matrix giving the inverse of the prior scale matrix for the maximal model. } \item{eta.hat}{ A vector of length n (number of cells) giving the posterior mode of the linear predictor under the maximal model. } \item{ini.index}{ A binary vector, of the same length as the number of log-linear parameters in the maximal model, indicating which parameters are present in the initial model. } \item{ini.beta}{ A numeric vector giving the starting values of the log-linear parameters for the MCMC algorithm. } \item{ini.sig}{ A numeric scalar giving the starting value of sigma^2 for the MCMC algorithm. } \item{iters}{ The number of iterations of the MCMC algorithm to peform. } \item{save}{ If positive, the function will save the MCMC output to external text files every \code{save} iterations. If zero , the function will not save the MCMC output to external files. } \item{name}{ A prefix to the external files saved if the argument \code{save} is positive. If \code{NULL}, then the external files will have no prefix. } \item{null.move.prob}{ A scalar argument giving the probability of performing a null move, i.e. proposing a move to the current model. } \item{a}{ The shape hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } \item{b}{ The scale hyperparameter of the Sabanes-Bove & Held prior, see Overstall & King (2014). } \item{progress}{ Logical argument. If \code{TRUE}, then a progress bar will be displayed. } } \value{ The function will return a list with the following components: \item{BETA}{An \code{iters} by p matrix containing the sampled values of the log-linear parameters, where p is the number of log-linear parameters in the maximal model. For elements of this matrix which correspond to a log-linear parameter which is not present for the current model a zero is returned.} \item{MODEL}{A vector of length \code{iters} giving the sampled model indicators in hexadecimal form.} \item{SIG}{A vector of length \code{iters} giving the sampled values for sigma^2 under the Sabanes-Bove & Held prior. If the unit information prior is used then the components of this vector will be one.} \item{rj_acc}{A binary vector of the same length as the number of reversible jump moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} \item{mh_acc}{A binary vector of the same length as the number of Metropolis-Hastings moves attempted. A 0 indicates that the proposal was rejected, and a 1 that the proposal was accepted.} } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \author{ Antony M. Overstall \email{Antony.Overstall@glasgow.ac.uk}. } \note{ This function will not typically be called by the user. } \seealso{ \code{\link{bcct}}, \code{\link{bcctu}}. } \examples{ data(AOH) ## Load the AOH data. maximal.mod<-glm(formula=y~(alc+hyp+obe)^3,data=AOH,x=TRUE,y=TRUE, contrasts=list(alc="contr.sum",hyp="contr.sum",obe="contr.sum")) ## Set up the maximal model which in this case is the saturated ## model. curr.index<-formula2index(big.X=maximal.mod$x,formula=y~alc+hyp+obe+hyp:obe,data=AOH) ## Set up the binary vector for the model containing all main effects and the ## hyp:obe interaction. IP<-t(maximal.mod$x)\%*\%maximal.mod$x/length(maximal.mod$y) IP[,1]<-0 IP[1,]<-0 ## Set up the inverse scale matrix for the prior distribution under ## the maximal model. bmod<-beta_mode(X=maximal.mod$x,prior="UIP",y=maximal.mod$y,IP=IP) ## Find the posterior mode under the maximal model eta.hat<-as.vector(maximal.mod$x\%*\%bmod) ## Find the posterior mode of the linear predictor ## under the maximal model. set.seed(1) ## Set seed for reproducibility test1<-bcct.fit(priornum=1, maximal.mod=maximal.mod, IP=IP, eta.hat=eta.hat, ini.index=curr.index, ini.beta=bmod[curr.index==1], ini.sig=1, iters=5, save=0, name=NULL,null.move.prob=0.5, a=0.001, b=0.001, progress=TRUE) ## Run for 5 iterations starting at model defined by curr.index. test1$MODEL ## Look at sampled model indicators. Should be: ## [1] "fe00c0" "fe0000" "fe0000" "fe0000" "fe0000" model2index(test1$MODEL,dig=24) ## Convert these to binary indicators of the log-linear parameters. ## Will get: # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] #fe00c0 1 1 1 1 1 1 1 0 0 0 0 0 0 #fe0000 1 1 1 1 1 1 1 0 0 0 0 0 0 #fe0000 1 1 1 1 1 1 1 0 0 0 0 0 0 #fe0000 1 1 1 1 1 1 1 0 0 0 0 0 0 #fe0000 1 1 1 1 1 1 1 0 0 0 0 0 0 # [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] #fe00c0 0 0 0 1 1 0 0 0 0 0 0 #fe0000 0 0 0 0 0 0 0 0 0 0 0 #fe0000 0 0 0 0 0 0 0 0 0 0 0 #fe0000 0 0 0 0 0 0 0 0 0 0 0 #fe0000 0 0 0 0 0 0 0 0 0 0 0 ## See how the hyp:obe interactions in columns 17 and 18 gets dropped after ## the 1st iteration. } conting/man/heart.Rd0000644000176000001440000000375012502056666014125 0ustar ripleyusers\name{heart} \alias{heart} \docType{data} \title{ Risk Factors for Coronary Heart Disease: A Complete 2^6 Table } \description{ 1841 men are cross-classified according to six risk factors for coronary heart disease: smoking (\code{A}; 2 levels), strenuous mental work (\code{B}; 2 levels), strenuous physical work (\code{C}; 2 levels), systolic blood pressure (\code{D}; 2 levels), ratio of alpha and beta lipoproteins (\code{E}; 2 levels) and family anamnesis of coronary heart disease (\code{F}; 2 levels). } \usage{data(heart)} \format{ A \code{"data.frame"} with 64 observations on the following 7 variables. \describe{ \item{\code{y}}{ Counts in each cell of table. } \item{\code{A}}{ A factor with levels \code{yes} \code{no} indicating smoking status. } \item{\code{B}}{ A factor with levels \code{yes} \code{no} indicating strenuous mental work. } \item{\code{C}}{ A factor with levels \code{yes} \code{no} indicating strenuous physical work. } \item{\code{D}}{ A factor with levels \code{yes} \code{no} indicating systolic blood pressure. } \item{\code{E}}{ A factor with levels \code{yes} \code{no} indicating high ratio of alpha and beta lipoproteins. } \item{\code{F}}{ A factor with levels \code{yes} \code{no} indicating a family anamnesis of coronary heart disease. } }} \details{ For more details on this data see Edwards & Havranek (1985). For details on the function \code{\link{bcct}} applied to this data, see Overstall & King (2014). } \source{ Edwards, D. & Havranek, T. (1985) A fast procedure for model search in multidimensional contingency tables. \emph{Biometrika}, \bold{72 (2)}, 339--351. } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \examples{ data(heart) summary(heart) } \keyword{datasets} conting/man/spina.Rd0000644000176000001440000000403112502056666014125 0ustar ripleyusers\name{spina} \alias{spina} \docType{data} \title{ Persons born with Spina Bifida: An Incomplete 2 * 2 * 2 * 3 Table } \description{ 621 people born with Spina Bifida (a congenital disorder) in the state of New York between 1969 and 1974 are observed by three sources: birth certificates (\code{S1}); death certificates (\code{S2}); and medical rehabilitation lists (\code{S3}). The people are also cross-classified according to their ethnicity (\code{eth}; 3 levels). } \usage{data(spina)} \format{ A \code{"data.frame"} with 24 observations on the following 5 variables. \describe{ \item{\code{y}}{ Counts in each cell of the table with NAs for the cells corresponding to not being observed by any of the sources. } \item{\code{S1}}{ A factor with levels \code{un} \code{obs} indicating whether the birth certificate source observed the person. } \item{\code{S2}}{ A factor with levels \code{un} \code{obs} indicating whether the death certificate source observed the person. } \item{\code{S3}}{ A factor with levels \code{un} \code{obs} indicating whether the medical rehabilitation source observed the person. } \item{\code{eth}}{ A factor with levels \code{afro-american} \code{caucasian} code{other} indicating the ethnicity of the person (\code{afro-american} = Afro-American, \code{causcasian} = Caucasian, \code{other} = Other). } } } \details{ See Madigan & York (1997), and the references therin, for more details on the study. For details on the function \code{\link{bict}} applied to this data, see Overstall & King (2014). } \source{ Madigan, D. & York, J.C. (1997) Methods for Estimation of the Size of a Closed Population. \emph{Biometrika}, \bold{84 (1)}, 19--31. } \references{ Overstall, A.M. & King, R. (2014) conting: An R package for Bayesian analysis of complete and incomplete contingency tables. \emph{Journal of Statistical Software}, \bold{58 (7)}, 1--27. \url{http://www.jstatsoft.org/v58/i07/} } \examples{ data(spina) summary(spina) } \keyword{datasets}